gitextract_v77f1c8q/

├── .dockerignore
├── .gitattributes
├── .github/
│   ├── CODE_OF_CONDUCT.md
│   ├── CONTRIBUTING.md
│   ├── ISSUE_TEMPLATE/
│   │   ├── 1-bug-report.yml
│   │   ├── 2-feature-request.yml
│   │   └── config.yml
│   ├── PULL_REQUEST_TEMPLATE.md
│   ├── SECURITY.md
│   ├── copilot-instructions.md
│   ├── instructions-v0.md
│   ├── instructions-v1.md
│   └── workflows/
│       ├── docker.yml
│       ├── docs.yml
│       ├── label_issue.yml
│       ├── publish.yml
│       ├── tests.yml
│       ├── tests_cuda.yml
│       └── tests_npu.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CITATION.cff
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── README_zh.md
├── data/
│   ├── README.md
│   ├── README_zh.md
│   ├── alpaca_en_demo.json
│   ├── alpaca_zh_demo.json
│   ├── c4_demo.jsonl
│   ├── dataset_info.json
│   ├── dpo_en_demo.json
│   ├── dpo_zh_demo.json
│   ├── glaive_toolcall_en_demo.json
│   ├── glaive_toolcall_zh_demo.json
│   ├── identity.json
│   ├── kto_en_demo.json
│   ├── mllm_audio_demo.json
│   ├── mllm_demo.json
│   ├── mllm_demo_data/
│   │   └── 3.flac
│   ├── mllm_video_audio_demo.json
│   ├── mllm_video_demo.json
│   ├── reason_tool_use_demo_50.jsonl
│   ├── v1_dpo_demo.jsonl
│   ├── v1_dpo_demo.yaml
│   ├── v1_sft_demo.jsonl
│   ├── v1_sft_demo.yaml
│   └── wiki_demo.txt
├── docker/
│   ├── docker-cuda/
│   │   ├── Dockerfile
│   │   ├── Dockerfile.base
│   │   ├── Dockerfile.megatron
│   │   ├── README.md
│   │   └── docker-compose.yml
│   ├── docker-npu/
│   │   ├── Dockerfile
│   │   └── docker-compose.yml
│   └── docker-rocm/
│       ├── Dockerfile
│       └── docker-compose.yml
├── docs/
│   ├── Makefile
│   ├── _static/
│   │   ├── css/
│   │   │   └── lang-switcher.css
│   │   └── js/
│   │       └── switcher.js
│   ├── conf.py
│   ├── en/
│   │   ├── advanced/
│   │   │   ├── custom-kernels/
│   │   │   │   ├── custom-kernels.md
│   │   │   │   ├── fused-operators.md
│   │   │   │   └── triton.md
│   │   │   ├── distributed/
│   │   │   │   ├── deepspeed.md
│   │   │   │   ├── fsdp.md
│   │   │   │   └── parallel-dp-tp-ep-sp-cp.md
│   │   │   └── lora-and-quantization/
│   │   │       ├── lora.md
│   │   │       └── quantization.md
│   │   ├── conf.py
│   │   ├── data-preparation/
│   │   │   └── data-processing.md
│   │   ├── dev-guide/
│   │   │   ├── core/
│   │   │   │   ├── data-engine.md
│   │   │   │   ├── model-engine.md
│   │   │   │   └── trainer.md
│   │   │   └── plugins/
│   │   │       ├── data-plugins.md
│   │   │       └── model-plugins/
│   │   │           ├── initialization.md
│   │   │           ├── kernels.md
│   │   │           └── rendering.md
│   │   ├── getting-started.md
│   │   ├── hyperparameters/
│   │   │   ├── data-argument.md
│   │   │   ├── model-argument.md
│   │   │   ├── sample-argument.md
│   │   │   └── training-argument.md
│   │   ├── index.rst
│   │   ├── inference/
│   │   │   └── deploy.md
│   │   ├── installation.md
│   │   ├── llamaboard-web-ui.md
│   │   └── training/
│   │       ├── dpo.md
│   │       └── sft.md
│   ├── make.bat
│   ├── requirements.txt
│   └── zh/
│       ├── advanced/
│       │   ├── custom-kernels/
│       │   │   ├── custom-kernels.md
│       │   │   ├── fused-operators.md
│       │   │   └── triton.md
│       │   ├── distributed/
│       │   │   ├── deepspeed.md
│       │   │   ├── fsdp.md
│       │   │   └── parallel-dp-tp-ep-sp-cp.md
│       │   └── lora-and-quantization/
│       │       ├── lora.md
│       │       └── quantization.md
│       ├── conf.py
│       ├── data-preparation/
│       │   └── data-processing.md
│       ├── dev-guide/
│       │   ├── core/
│       │   │   ├── data-engine.md
│       │   │   ├── model-engine.md
│       │   │   └── trainer.md
│       │   └── plugins/
│       │       ├── data-plugins.md
│       │       └── model-plugins/
│       │           ├── initialization.md
│       │           ├── kernels.md
│       │           └── rendering.md
│       ├── getting-started.md
│       ├── hyperparameters/
│       │   ├── data-argument.md
│       │   ├── model-argument.md
│       │   ├── sample-argument.md
│       │   └── training-argument.md
│       ├── index.rst
│       ├── inference/
│       │   └── deploy.md
│       ├── installation.md
│       ├── llamaboard-web-ui.md
│       └── training/
│           ├── dpo.md
│           └── sft.md
├── examples/
│   ├── README.md
│   ├── README_zh.md
│   ├── accelerate/
│   │   ├── fsdp2_config.yaml
│   │   ├── fsdp_config.yaml
│   │   ├── fsdp_config_multiple_nodes.yaml
│   │   └── fsdp_config_offload.yaml
│   ├── ascend/
│   │   ├── qwen3_full_sft_fsdp2.yaml
│   │   ├── qwen3moe_full_sft_fsdp.yaml
│   │   ├── qwen3vlmoe_full_sft_fsdp2.yaml
│   │   └── qwen3vlmoe_lora_sft_fsdp.yaml
│   ├── deepspeed/
│   │   ├── ds_z0_config.json
│   │   ├── ds_z2_autotp_config.json
│   │   ├── ds_z2_config.json
│   │   ├── ds_z2_offload_config.json
│   │   ├── ds_z3_config.json
│   │   ├── ds_z3_fp8_config.json
│   │   └── ds_z3_offload_config.json
│   ├── extras/
│   │   ├── adam_mini/
│   │   │   └── qwen2_full_sft.yaml
│   │   ├── apollo/
│   │   │   └── llama3_full_sft.yaml
│   │   ├── asft/
│   │   │   ├── llama2_full_asft.yaml
│   │   │   └── qwen2_full_asft.yaml
│   │   ├── badam/
│   │   │   └── llama3_full_sft.yaml
│   │   ├── dft/
│   │   │   └── qwen2_full_sft.yaml
│   │   ├── eaft/
│   │   │   └── qwen25_05b_eaft_full.yaml
│   │   ├── fp8/
│   │   │   ├── llama3_fp8_deepspeed_sft.yaml
│   │   │   └── llama3_fp8_fsdp_sft.yaml
│   │   ├── fsdp_qlora/
│   │   │   ├── llama3_lora_sft.yaml
│   │   │   └── train.sh
│   │   ├── galore/
│   │   │   └── llama3_full_sft.yaml
│   │   ├── llama_pro/
│   │   │   ├── expand.sh
│   │   │   └── llama3_freeze_sft.yaml
│   │   ├── loraplus/
│   │   │   └── llama3_lora_sft.yaml
│   │   ├── mod/
│   │   │   └── llama3_full_sft.yaml
│   │   ├── multi_tokens/
│   │   │   └── tokens_cfg.yaml
│   │   ├── muon/
│   │   │   └── qwen2_full_sft.yaml
│   │   ├── nlg_eval/
│   │   │   └── llama3_lora_predict.yaml
│   │   ├── oft/
│   │   │   ├── llama3_oft_sft.yaml
│   │   │   └── qwen2_5vl_oft_sft.yaml
│   │   ├── pissa/
│   │   │   ├── init.sh
│   │   │   └── llama3_lora_sft.yaml
│   │   └── qoft/
│   │       ├── llama3_oft_sft_awq.yaml
│   │       ├── llama3_oft_sft_bnb_npu.yaml
│   │       └── llama3_oft_sft_gptq.yaml
│   ├── inference/
│   │   ├── qwen3.yaml
│   │   ├── qwen3_full_sft.yaml
│   │   ├── qwen3_lora_sft.yaml
│   │   └── qwen3vl.yaml
│   ├── ktransformers/
│   │   ├── infer_lora/
│   │   │   ├── deepseek2_lora_sft_kt.yaml
│   │   │   ├── deepseek3_kt.yaml
│   │   │   ├── deepseek3_lora_sft_kt.yaml
│   │   │   └── qwen3moe_lora_sft_kt.yaml
│   │   ├── kt_optimize_rules/
│   │   │   ├── DeepSeek-V2-Chat-sft-amx.yaml
│   │   │   ├── DeepSeek-V2-Chat.yaml
│   │   │   ├── DeepSeek-V2-Lite-Chat-sft-amx-multi-gpu.yaml
│   │   │   ├── DeepSeek-V2-Lite-Chat-sft-amx.yaml
│   │   │   ├── DeepSeek-V2-Lite-Chat-sft.yaml
│   │   │   ├── DeepSeek-V2-Lite-Chat.yaml
│   │   │   ├── DeepSeek-V3-Chat-amx.yaml
│   │   │   ├── DeepSeek-V3-Chat-sft-amx-multi-gpu-4.yaml
│   │   │   ├── DeepSeek-V3-Chat-sft-amx-multi-gpu.yaml
│   │   │   ├── DeepSeek-V3-Chat-sft-amx.yaml
│   │   │   └── Qwen3Moe-sft-amx.yaml
│   │   └── train_lora/
│   │       ├── deepseek2_lora_sft_kt.yaml
│   │       ├── deepseek3_lora_sft_kt.yaml
│   │       └── qwen3moe_lora_sft_kt.yaml
│   ├── megatron/
│   │   ├── qwen2_vl_full.yaml
│   │   └── qwen3_moe_full.yaml
│   ├── merge_lora/
│   │   ├── qwen3_full_sft.yaml
│   │   ├── qwen3_gptq.yaml
│   │   ├── qwen3_lora_sft.yaml
│   │   └── qwen3vl_lora_sft.yaml
│   ├── train_full/
│   │   ├── qwen3_full_sft.yaml
│   │   └── qwen3vl_full_sft.yaml
│   ├── train_lora/
│   │   ├── qwen3_lora_dpo.yaml
│   │   ├── qwen3_lora_kto.yaml
│   │   ├── qwen3_lora_pretrain.yaml
│   │   ├── qwen3_lora_reward.yaml
│   │   ├── qwen3_lora_sft.sh
│   │   ├── qwen3_lora_sft.yaml
│   │   ├── qwen3_lora_sft_ds3.yaml
│   │   ├── qwen3_lora_sft_ray.yaml
│   │   ├── qwen3_preprocess.yaml
│   │   ├── qwen3vl_lora_dpo.yaml
│   │   └── qwen3vl_lora_sft.yaml
│   ├── train_qlora/
│   │   ├── llama3_lora_sft_aqlm.yaml
│   │   ├── llama3_lora_sft_awq.yaml
│   │   ├── llama3_lora_sft_gptq.yaml
│   │   ├── qwen3_lora_sft_bnb_npu.yaml
│   │   └── qwen3_lora_sft_otfq.yaml
│   └── v1/
│       ├── train_freeze/
│       │   └── train_freeze_sft.yaml
│       ├── train_full/
│       │   ├── train_full_deepspeed.yaml
│       │   └── train_full_fsdp2.yaml
│       ├── train_lora/
│       │   ├── export_lora.yaml
│       │   └── train_lora_sft.yaml
│       └── train_qlora/
│           └── quantization.yaml
├── pyproject.toml
├── requirements/
│   ├── adam-mini.txt
│   ├── apollo.txt
│   ├── aqlm.txt
│   ├── badam.txt
│   ├── bitsandbytes.txt
│   ├── deepspeed.txt
│   ├── dev.txt
│   ├── eetq.txt
│   ├── fp8-te.txt
│   ├── fp8.txt
│   ├── galore.txt
│   ├── gptq.txt
│   ├── hqq.txt
│   ├── liger-kernel.txt
│   ├── metrics.txt
│   ├── minicpm-v.txt
│   ├── npu.txt
│   ├── openmind.txt
│   ├── sglang.txt
│   ├── swanlab.txt
│   └── vllm.txt
├── scripts/
│   ├── api_example/
│   │   ├── test_image.py
│   │   └── test_toolcall.py
│   ├── bench_qwen.py
│   ├── convert_ckpt/
│   │   ├── llamafy_baichuan2.py
│   │   ├── llamafy_qwen.py
│   │   ├── tiny_llama4.py
│   │   └── tiny_qwen3.py
│   ├── eval_bleu_rouge.py
│   ├── hf2dcp.py
│   ├── llama_pro.py
│   ├── loftq_init.py
│   ├── megatron_merge.py
│   ├── pissa_init.py
│   ├── qwen_omni_merge.py
│   ├── stat_utils/
│   │   ├── cal_flops.py
│   │   ├── cal_lr.py
│   │   ├── cal_mfu.py
│   │   ├── cal_ppl.py
│   │   └── length_cdf.py
│   └── vllm_infer.py
├── src/
│   ├── api.py
│   ├── llamafactory/
│   │   ├── __init__.py
│   │   ├── api/
│   │   │   ├── __init__.py
│   │   │   ├── app.py
│   │   │   ├── chat.py
│   │   │   ├── common.py
│   │   │   └── protocol.py
│   │   ├── chat/
│   │   │   ├── __init__.py
│   │   │   ├── base_engine.py
│   │   │   ├── chat_model.py
│   │   │   ├── hf_engine.py
│   │   │   ├── kt_engine.py
│   │   │   ├── sglang_engine.py
│   │   │   └── vllm_engine.py
│   │   ├── cli.py
│   │   ├── data/
│   │   │   ├── __init__.py
│   │   │   ├── collator.py
│   │   │   ├── converter.py
│   │   │   ├── data_utils.py
│   │   │   ├── formatter.py
│   │   │   ├── loader.py
│   │   │   ├── mm_plugin.py
│   │   │   ├── parser.py
│   │   │   ├── processor/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── feedback.py
│   │   │   │   ├── pairwise.py
│   │   │   │   ├── pretrain.py
│   │   │   │   ├── processor_utils.py
│   │   │   │   ├── supervised.py
│   │   │   │   └── unsupervised.py
│   │   │   ├── template.py
│   │   │   └── tool_utils.py
│   │   ├── eval/
│   │   │   ├── __init__.py
│   │   │   ├── evaluator.py
│   │   │   └── template.py
│   │   ├── extras/
│   │   │   ├── __init__.py
│   │   │   ├── constants.py
│   │   │   ├── env.py
│   │   │   ├── logging.py
│   │   │   ├── misc.py
│   │   │   ├── packages.py
│   │   │   └── ploting.py
│   │   ├── hparams/
│   │   │   ├── __init__.py
│   │   │   ├── data_args.py
│   │   │   ├── evaluation_args.py
│   │   │   ├── finetuning_args.py
│   │   │   ├── generating_args.py
│   │   │   ├── model_args.py
│   │   │   ├── parser.py
│   │   │   └── training_args.py
│   │   ├── launcher.py
│   │   ├── model/
│   │   │   ├── __init__.py
│   │   │   ├── adapter.py
│   │   │   ├── loader.py
│   │   │   ├── model_utils/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── attention.py
│   │   │   │   ├── checkpointing.py
│   │   │   │   ├── embedding.py
│   │   │   │   ├── ktransformers.py
│   │   │   │   ├── kv_cache.py
│   │   │   │   ├── liger_kernel.py
│   │   │   │   ├── longlora.py
│   │   │   │   ├── misc.py
│   │   │   │   ├── mod.py
│   │   │   │   ├── moe.py
│   │   │   │   ├── packing.py
│   │   │   │   ├── quantization.py
│   │   │   │   ├── rope.py
│   │   │   │   ├── unsloth.py
│   │   │   │   ├── valuehead.py
│   │   │   │   └── visual.py
│   │   │   └── patcher.py
│   │   ├── third_party/
│   │   │   ├── __init__.py
│   │   │   └── muon/
│   │   │       ├── __init__.py
│   │   │       └── muon.py
│   │   ├── train/
│   │   │   ├── __init__.py
│   │   │   ├── callbacks.py
│   │   │   ├── dpo/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── ktrainer.py
│   │   │   │   ├── trainer.py
│   │   │   │   └── workflow.py
│   │   │   ├── fp8_utils.py
│   │   │   ├── kto/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── trainer.py
│   │   │   │   └── workflow.py
│   │   │   ├── mca/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── trainer.py
│   │   │   │   └── workflow.py
│   │   │   ├── ppo/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── ppo_utils.py
│   │   │   │   ├── trainer.py
│   │   │   │   └── workflow.py
│   │   │   ├── pt/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── trainer.py
│   │   │   │   └── workflow.py
│   │   │   ├── rm/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── metric.py
│   │   │   │   ├── trainer.py
│   │   │   │   └── workflow.py
│   │   │   ├── sft/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── metric.py
│   │   │   │   ├── trainer.py
│   │   │   │   └── workflow.py
│   │   │   ├── test_utils.py
│   │   │   ├── trainer_utils.py
│   │   │   └── tuner.py
│   │   ├── v1/
│   │   │   ├── __init__.py
│   │   │   ├── accelerator/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── helper.py
│   │   │   │   ├── interface.py
│   │   │   │   └── profiler.py
│   │   │   ├── config/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── arg_parser.py
│   │   │   │   ├── arg_utils.py
│   │   │   │   ├── data_args.py
│   │   │   │   ├── model_args.py
│   │   │   │   ├── sample_args.py
│   │   │   │   └── training_args.py
│   │   │   ├── core/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── base_sampler.py
│   │   │   │   ├── base_trainer.py
│   │   │   │   ├── data_engine.py
│   │   │   │   ├── model_engine.py
│   │   │   │   └── utils/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── batching.py
│   │   │   │       ├── callback.py
│   │   │   │       ├── inference_engine.py
│   │   │   │       └── rendering.py
│   │   │   ├── launcher.py
│   │   │   ├── plugins/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── data_plugins/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── converter.py
│   │   │   │   │   └── loader.py
│   │   │   │   ├── model_plugins/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── add_token.py
│   │   │   │   │   ├── initialization.py
│   │   │   │   │   ├── kernels/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── base.py
│   │   │   │   │   │   ├── interface.py
│   │   │   │   │   │   ├── ops/
│   │   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   │   ├── mlp/
│   │   │   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   │   │   ├── npu_fused_moe.py
│   │   │   │   │   │   │   │   └── npu_swiglu.py
│   │   │   │   │   │   │   ├── rms_norm/
│   │   │   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   │   │   └── npu_rms_norm.py
│   │   │   │   │   │   │   └── rope/
│   │   │   │   │   │   │       ├── __init__.py
│   │   │   │   │   │   │       └── npu_rope.py
│   │   │   │   │   │   └── registry.py
│   │   │   │   │   ├── peft.py
│   │   │   │   │   ├── quantization.py
│   │   │   │   │   ├── rendering.py
│   │   │   │   │   └── templates/
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── qwen3.py
│   │   │   │   │       └── qwen3_nothink.py
│   │   │   │   ├── sampler_plugins/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── vllm.py
│   │   │   │   └── trainer_plugins/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── batching.py
│   │   │   │       ├── distributed/
│   │   │   │       │   ├── __init__.py
│   │   │   │       │   ├── deepspeed.py
│   │   │   │       │   ├── fsdp2.py
│   │   │   │       │   └── hub.py
│   │   │   │       ├── lr_scheduler.py
│   │   │   │       └── optimizer.py
│   │   │   ├── samplers/
│   │   │   │   └── cli_sampler.py
│   │   │   ├── trainers/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── dpo_trainer.py
│   │   │   │   ├── rm_trainer.py
│   │   │   │   └── sft_trainer.py
│   │   │   └── utils/
│   │   │       ├── __init__.py
│   │   │       ├── constants.py
│   │   │       ├── dtype.py
│   │   │       ├── env.py
│   │   │       ├── helper.py
│   │   │       ├── logging.py
│   │   │       ├── objects.py
│   │   │       ├── packages.py
│   │   │       ├── plugin.py
│   │   │       ├── pytest.py
│   │   │       └── types.py
│   │   └── webui/
│   │       ├── __init__.py
│   │       ├── chatter.py
│   │       ├── common.py
│   │       ├── components/
│   │       │   ├── __init__.py
│   │       │   ├── chatbot.py
│   │       │   ├── data.py
│   │       │   ├── eval.py
│   │       │   ├── export.py
│   │       │   ├── footer.py
│   │       │   ├── infer.py
│   │       │   ├── top.py
│   │       │   └── train.py
│   │       ├── control.py
│   │       ├── css.py
│   │       ├── engine.py
│   │       ├── interface.py
│   │       ├── locales.py
│   │       ├── manager.py
│   │       └── runner.py
│   ├── train.py
│   └── webui.py
├── tests/
│   ├── check_license.py
│   ├── conftest.py
│   ├── data/
│   │   ├── processor/
│   │   │   ├── test_feedback.py
│   │   │   ├── test_pairwise.py
│   │   │   ├── test_processor_utils.py
│   │   │   ├── test_supervised.py
│   │   │   └── test_unsupervised.py
│   │   ├── test_collator.py
│   │   ├── test_converter.py
│   │   ├── test_formatter.py
│   │   ├── test_loader.py
│   │   ├── test_mm_plugin.py
│   │   └── test_template.py
│   ├── e2e/
│   │   ├── test_chat.py
│   │   ├── test_sglang.py
│   │   └── test_train.py
│   ├── eval/
│   │   └── test_eval_template.py
│   ├── model/
│   │   ├── model_utils/
│   │   │   ├── test_add_tokens.py
│   │   │   ├── test_attention.py
│   │   │   ├── test_checkpointing.py
│   │   │   ├── test_misc.py
│   │   │   ├── test_packing.py
│   │   │   └── test_visual.py
│   │   ├── test_base.py
│   │   ├── test_freeze.py
│   │   ├── test_full.py
│   │   ├── test_lora.py
│   │   └── test_pissa.py
│   ├── train/
│   │   └── test_sft_trainer.py
│   └── version.txt
└── tests_v1/
    ├── accelerator/
    │   └── test_interface.py
    ├── config/
    │   └── test_args_parser.py
    ├── conftest.py
    ├── core/
    │   ├── test_data_engine.py
    │   ├── test_model_loader.py
    │   └── utils/
    │       ├── test_batching.py
    │       └── test_rendering.py
    ├── plugins/
    │   ├── data_plugins/
    │   │   └── test_converter.py
    │   ├── model_plugins/
    │   │   ├── test_init_plugin.py
    │   │   ├── test_kernel_plugin.py
    │   │   ├── test_peft.py
    │   │   └── test_quantization_plugin.py
    │   └── trainer_plugins/
    │       └── distributed/
    │           └── test_fsdp2.py
    ├── sampler/
    │   └── test_cli_sampler.py
    └── trainers/
        └── test_fsdp2_sft_trainer.py