gitextract__9f9_ucr/

├── .copyright.hook
├── .flake8
├── .github/
│   ├── CODE_OF_CONDUCT.md
│   ├── CODE_OF_CONDUCT_en.md
│   ├── CONTRIBUTING_en.md
│   ├── ISSUE_TEMPLATE/
│   │   ├── ask-question.yml
│   │   ├── bug-report.yml
│   │   ├── docs-report.yml
│   │   ├── feature-request.yml
│   │   ├── new-model.yaml
│   │   └── others.yml
│   ├── PULL_REQUEST_TEMPLATE.md
│   ├── actions/
│   │   └── rerun-workflow/
│   │       ├── action.yml
│   │       └── rerun.sh
│   ├── codecov.yml
│   └── workflows/
│       ├── _clone_linux.yml
│       ├── _xpu_ci_test.yml
│       ├── ce-build-ci-workflow.yml
│       ├── ce-build-images.yml
│       ├── ce-build-whl.yml
│       ├── ce-deadlink.yml
│       ├── ce-unittest-gpu.yml
│       ├── check-release-pr.yaml
│       ├── cherry-pick.yml
│       ├── ci_iluvatar.yml
│       ├── ci_xpu.yml
│       ├── debug-unittest-gpu.yml
│       ├── fleet-model-test.yml
│       ├── lint.yml
│       ├── model-unittest-gpu.yml
│       ├── requirements-review.yml
│       ├── rerun.yml
│       ├── stale.yml
│       ├── unittest-gpu.yml
│       └── update-precision.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── docs/
│   ├── en/
│   │   ├── cli_usage.md
│   │   ├── datasets.md
│   │   ├── datasets_format.md
│   │   ├── image_processors.md
│   │   ├── processors.md
│   │   └── video_processors.md
│   └── zh/
│       ├── ILUVATAR-GPU_installation_guide.md
│       ├── ILUVATAR-GPU_usage_guide.md
│       ├── Metax-GPU_installation_guide.md
│       ├── Metax-GPU_usage_guide.md
│       ├── XPU_installation_guide.md
│       ├── XPU_usage_guide.md
│       ├── chat_template_guide.md
│       ├── cli_usage.md
│       ├── custom_datasets_format_zh.md
│       ├── data_processing_guide.md
│       ├── dataset_format.md
│       ├── deployment_guide.md
│       ├── dpo_and_lora_guide.md
│       ├── ernie4.5_pretraining.md
│       ├── how_to_download_model.md
│       ├── image_processors_zh.md
│       ├── model_capability.md
│       ├── processors_zh.md
│       ├── pt_and_cpt_guide.md
│       ├── sft_and_lora_guide.md
│       ├── template.md
│       ├── template_zh.md
│       ├── training_arguments.md
│       └── video_processors_zh.md
├── examples/
│   ├── FAQ.md
│   ├── README.md
│   ├── best_practices/
│   │   ├── DeepSeek-V3/
│   │   │   ├── README.md
│   │   │   ├── SFT-Practice.md
│   │   │   ├── dsv3_128k_config.yaml
│   │   │   ├── dsv3_32k_config.yaml
│   │   │   ├── dsv3_4k_config.yaml
│   │   │   ├── pretrain/
│   │   │   │   ├── config/
│   │   │   │   │   ├── config.json
│   │   │   │   │   ├── pretrain_argument.yaml
│   │   │   │   │   ├── tokenizer.json
│   │   │   │   │   └── tokenizer_config.json
│   │   │   │   ├── run.sh
│   │   │   │   └── train_gpu.sh
│   │   │   ├── run_dsv3_128k.sh
│   │   │   ├── run_dsv3_32k.sh
│   │   │   └── run_dsv3_4k.sh
│   │   ├── ERNIE-4.5/
│   │   │   └── README.md
│   │   ├── ERNIE-4.5-VL/
│   │   │   ├── README.md
│   │   │   ├── ernie45vl_32k_config.yaml
│   │   │   ├── ernie45vl_8k_config.yaml
│   │   │   └── ernie45vl_8k_lora_config.yaml
│   │   ├── PaddleOCR-VL/
│   │   │   ├── README.md
│   │   │   ├── paddleocr-vl_full_16k_config.yaml
│   │   │   ├── paddleocr-vl_lora_16k_config.yaml
│   │   │   ├── paddleocr-vl_lora_export.yaml
│   │   │   ├── run_paddleocr-vl_full_16k.sh
│   │   │   ├── run_paddleocr-vl_full_16k_4090D.sh
│   │   │   ├── run_paddleocr-vl_lora_16k.sh
│   │   │   ├── run_paddleocr-vl_lora_16k_4090D.sh
│   │   │   └── run_paddleocr-vl_lora_export.sh
│   │   ├── PaddleOCR-VL-1.5/
│   │   │   ├── README.md
│   │   │   ├── paddleocr-vl-v15_full_16k_region_config.yaml
│   │   │   ├── paddleocr-vl-v15_full_16k_table_config.yaml
│   │   │   ├── paddleocr-vl-v15_lora_16k_region_config.yaml
│   │   │   ├── paddleocr-vl-v15_lora_16k_table_config.yaml
│   │   │   ├── region_ocr.md
│   │   │   └── table_ocr.md
│   │   ├── function_call.md
│   │   └── tutorials/
│   │       ├── how_to_train_a_function_call_model.md
│   │       ├── how_to_train_a_reasoning_model.md
│   │       ├── how_to_train_a_visual_grounding_model.md
│   │       └── how_to_train_an_emoji_model.md
│   ├── config/
│   │   ├── dpo/
│   │   │   ├── full.yaml
│   │   │   ├── full_function_call.yaml
│   │   │   ├── full_tp_pp.yaml
│   │   │   ├── full_tp_pp_ep.yaml
│   │   │   ├── lora.yaml
│   │   │   ├── lora_tp_pp.yaml
│   │   │   └── lora_tp_pp_ep.yaml
│   │   ├── dpo-vl/
│   │   │   ├── full.yaml
│   │   │   ├── full_fsdp.yaml
│   │   │   ├── full_tp.yaml
│   │   │   ├── lora.yaml
│   │   │   ├── lora_fsdp.yaml
│   │   │   └── lora_tp.yaml
│   │   ├── iluvatar/
│   │   │   ├── ERNIE-4.5-0.3B-PT/
│   │   │   │   └── sft/
│   │   │   │       ├── full_8k.yaml
│   │   │   │       ├── lora_8k.yaml
│   │   │   │       ├── lora_export.yaml
│   │   │   │       ├── run_full_8k.sh
│   │   │   │       ├── run_lora_8k.sh
│   │   │   │       └── run_lora_export.sh
│   │   │   ├── ERNIE-4.5-21B-A3B-PT/
│   │   │   │   └── sft/
│   │   │   │       ├── full_8k.yaml
│   │   │   │       ├── lora_8k.yaml
│   │   │   │       ├── lora_export.yaml
│   │   │   │       ├── run_full_8k.sh
│   │   │   │       ├── run_lora_8k.sh
│   │   │   │       └── run_lora_export.sh
│   │   │   └── PaddleOCR-VL/
│   │   │       └── sft/
│   │   │           ├── paddleocr-vl_full_16k_config.yaml
│   │   │           ├── paddleocr-vl_lora_16k_config.yaml
│   │   │           ├── paddleocr-vl_lora_export.yaml
│   │   │           ├── run_paddleocr-vl_full_16k.sh
│   │   │           ├── run_paddleocr-vl_lora_16k.sh
│   │   │           └── run_paddleocr-vl_lora_export.sh
│   │   ├── metax/
│   │   │   ├── ERNIE-4.5-0.3B/
│   │   │   │   └── sft/
│   │   │   │       ├── lora.yaml
│   │   │   │       ├── run_lora.sh
│   │   │   │       ├── run_sft.sh
│   │   │   │       └── sft.yaml
│   │   │   └── ERNIE-4.5-21B-A3B/
│   │   │       └── sft/
│   │   │           ├── lora.yaml
│   │   │           ├── run_lora.sh
│   │   │           ├── run_sft.sh
│   │   │           └── sft.yaml
│   │   ├── pt/
│   │   │   ├── eb45_pretrain/
│   │   │   │   ├── 21b_8_gpus.yaml
│   │   │   │   ├── 300b_2016_gpus.yaml
│   │   │   │   ├── 300b_4_nodes_ce.yaml
│   │   │   │   ├── 300b_8_gpus_ci.yaml
│   │   │   │   ├── 300b_96gpus.yaml
│   │   │   │   └── 300b_96gpus_small_acc.yaml
│   │   │   ├── full.yaml
│   │   │   ├── full_offline_data.yaml
│   │   │   ├── full_tp_pp.yaml
│   │   │   ├── full_tp_pp_ep.yaml
│   │   │   ├── lora.yaml
│   │   │   ├── lora_tp_pp.yaml
│   │   │   └── lora_tp_pp_ep.yaml
│   │   ├── run_export.yaml
│   │   ├── sft/
│   │   │   ├── full.yaml
│   │   │   ├── full_function_call.yaml
│   │   │   ├── full_tp_pp.yaml
│   │   │   ├── full_tp_pp_ep.yaml
│   │   │   ├── lora.yaml
│   │   │   ├── lora_tp_pp.yaml
│   │   │   └── lora_tp_pp_ep.yaml
│   │   ├── sft-vl/
│   │   │   ├── full.yaml
│   │   │   ├── full_fsdp.yaml
│   │   │   ├── full_tp.yaml
│   │   │   ├── lora.yaml
│   │   │   ├── lora_fsdp.yaml
│   │   │   └── lora_tp.yaml
│   │   └── xpu/
│   │       ├── DeepseekV3/
│   │       │   └── sft/
│   │       │       ├── full_32k_config.yaml
│   │       │       ├── full_4k_config.yaml
│   │       │       ├── run_full_32k.sh
│   │       │       └── run_full_4k.sh
│   │       ├── ERNIE-4.5-0.3B/
│   │       │   └── sft/
│   │       │       ├── full_8k.yaml
│   │       │       ├── lora_8k.yaml
│   │       │       └── lora_8k_export.yaml
│   │       ├── ERNIE-4.5-21B-A3B/
│   │       │   └── sft/
│   │       │       ├── full_32k.yaml
│   │       │       ├── lora_32k.yaml
│   │       │       ├── lora_32k_export.yaml
│   │       │       └── run_lora_32k.sh
│   │       ├── ERNIE-4.5-21B-A3B-Thinking/
│   │       │   └── sft/
│   │       │       └── full_8k.yaml
│   │       ├── ERNIE-4.5-VL-28B-A3B-Thinking/
│   │       │   └── sft/
│   │       │       └── full_32k.yaml
│   │       └── PaddleOCR-VL/
│   │           └── sft/
│   │               ├── paddleocr-vl_full_16k_config.yaml
│   │               ├── paddleocr-vl_lora_16k_config.yaml
│   │               ├── paddleocr-vl_lora_export.yaml
│   │               ├── run_paddleocr-vl_full_16k.sh
│   │               ├── run_paddleocr-vl_lora_16k.sh
│   │               └── run_paddleocr-vl_lora_export.sh
│   ├── experiments/
│   │   ├── deepseek_v3_pretrain/
│   │   │   ├── README.md
│   │   │   ├── config/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── config.json
│   │   │   │   ├── configuration.py
│   │   │   │   ├── pretrain_argument.json
│   │   │   │   └── pretrain_argument.yaml
│   │   │   ├── convert_ckpt_to_sft.py
│   │   │   ├── fp8_linear.py
│   │   │   ├── kernel.py
│   │   │   ├── load_hf_ckpt.py
│   │   │   ├── modeling.py
│   │   │   ├── modeling_pp.py
│   │   │   ├── moe_gate.py
│   │   │   ├── moe_layer.py
│   │   │   ├── moe_utils.py
│   │   │   ├── run.sh
│   │   │   ├── run_pretrain.py
│   │   │   ├── script/
│   │   │   │   └── train_gpu.sh
│   │   │   └── token_dispatcher.py
│   │   ├── ernie_pretrain/
│   │   │   ├── README.md
│   │   │   ├── README_zh.md
│   │   │   ├── demo_data/
│   │   │   │   ├── data-1-part0.idx
│   │   │   │   └── data-1-part1.idx
│   │   │   ├── ernie/
│   │   │   │   ├── config.py
│   │   │   │   ├── model_config.py
│   │   │   │   ├── pretrain.py
│   │   │   │   └── src/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── callbacks/
│   │   │   │       │   ├── __init__.py
│   │   │   │       │   ├── fp8_quant_weight_callback.py
│   │   │   │       │   ├── gc_callback.py
│   │   │   │       │   ├── logging_callback.py
│   │   │   │       │   ├── moe_correction_bias_adjust_callback.py
│   │   │   │       │   ├── moe_logging_callback.py
│   │   │   │       │   ├── ortho_loss_callback.py
│   │   │   │       │   ├── sp_grad_sync_callback.py
│   │   │   │       │   └── tensorboard_callback.py
│   │   │   │       ├── clip/
│   │   │   │       │   ├── __init__.py
│   │   │   │       │   └── moe_clip.py
│   │   │   │       ├── lr_schedulers/
│   │   │   │       │   ├── __init__.py
│   │   │   │       │   ├── cosine_lr.py
│   │   │   │       │   └── wsd_lr.py
│   │   │   │       ├── tokenizers/
│   │   │   │       │   ├── tokenization_eb_v2.py
│   │   │   │       │   └── tokenizer_model/
│   │   │   │       │       ├── added_tokens.json
│   │   │   │       │       ├── special_tokens_map.json
│   │   │   │       │       ├── tokenizer.model
│   │   │   │       │       └── tokenizer_config.json
│   │   │   │       ├── trainers/
│   │   │   │       │   ├── __init__.py
│   │   │   │       │   ├── data_parallel.py
│   │   │   │       │   ├── dygraph_optimizer/
│   │   │   │       │   │   └── hybrid_parallel_optimizer.py
│   │   │   │       │   └── pretraining_trainer.py
│   │   │   │       └── utils/
│   │   │   │           ├── __init__.py
│   │   │   │           ├── logging.py
│   │   │   │           ├── misc.py
│   │   │   │           ├── seed_utils.py
│   │   │   │           └── training_utils.py
│   │   │   ├── model_configs/
│   │   │   │   ├── ERNIE-4p5-21B-A3B/
│   │   │   │   │   └── model_config.json
│   │   │   │   └── ERNIE-4p5-300B-A47B/
│   │   │   │       └── model_config.json
│   │   │   ├── models/
│   │   │   │   ├── comm_utils.py
│   │   │   │   ├── ernie/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── configuration.py
│   │   │   │   │   ├── modeling.py
│   │   │   │   │   ├── modeling_moe.py
│   │   │   │   │   └── modeling_pp.py
│   │   │   │   ├── fp8_linear.py
│   │   │   │   ├── moe/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── moe_layer.py
│   │   │   │   │   ├── token_dispatcher/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── fp8_utils.py
│   │   │   │   │   │   └── moe_utils.py
│   │   │   │   │   └── top2_gate.py
│   │   │   │   ├── sequence_parallel_utils.py
│   │   │   │   └── utils.py
│   │   │   ├── requirements.txt
│   │   │   ├── scripts/
│   │   │   │   └── ERNIE-4p5-300B-A47B/
│   │   │   │       ├── ci_ce/
│   │   │   │       │   ├── train_4_nodes_ce.sh
│   │   │   │       │   └── train_8_gpus_ci.sh
│   │   │   │       ├── train_2016_gpus.sh
│   │   │   │       └── train_96_gpus.sh
│   │   │   ├── tools/
│   │   │   │   ├── sharded_to_uc/
│   │   │   │   │   ├── README_zh.md
│   │   │   │   │   ├── convert_multi_nodes_sharded_to_single_uc.sh
│   │   │   │   │   ├── convert_sharded_to_uc.py
│   │   │   │   │   ├── gather_all_ckpt.py
│   │   │   │   │   └── merge_sharding_ep.py
│   │   │   │   └── uc_to_sharded/
│   │   │   │       ├── README.md
│   │   │   │       ├── README_zh.md
│   │   │   │       └── convert_uc_to_sharded.py
│   │   │   └── yamls/
│   │   │       ├── ERNIE-4p5-21B-A3B/
│   │   │       │   └── pretrain_8_gpus.yaml
│   │   │       └── ERNIE-4p5-300B-A47B/
│   │   │           ├── ci_ce/
│   │   │           │   ├── pretrain_4_nodes_ce.yaml
│   │   │           │   └── pretrain_8_gpus_ci.yaml
│   │   │           ├── pretrain_2016_gpus.yaml
│   │   │           ├── pretrain_96_gpus.yaml
│   │   │           └── pretrain_96_gpus_small_acc.yaml
│   │   ├── glm_pretrain/
│   │   │   └── GLM4.5-Air.yaml
│   │   └── paddlefleet/
│   │       ├── glm45.json
│   │       ├── glm45_provider.py
│   │       ├── glm45_single_card.json
│   │       ├── qwen_provider.py
│   │       ├── qwen_single_card.json
│   │       ├── run_glm45.sh
│   │       └── run_pretrain.py
│   └── tools/
│       ├── create_pretraining_data.py
│       ├── gpt-oss_weight_change/
│       │   ├── README.md
│       │   └── change_weight_dtype.py
│       ├── merge.py
│       └── trans_paddlenlp2hf.py
├── paddleformers/
│   ├── __init__.py
│   ├── cli/
│   │   ├── __init__.py
│   │   ├── cli.py
│   │   ├── export/
│   │   │   ├── __init__.py
│   │   │   └── export.py
│   │   ├── hparams/
│   │   │   ├── __init__.py
│   │   │   ├── data_args.py
│   │   │   ├── export_args.py
│   │   │   ├── finetuning_args.py
│   │   │   ├── generating_args.py
│   │   │   ├── model_args.py
│   │   │   ├── parser.py
│   │   │   ├── preprocess_args.py
│   │   │   └── server_args.py
│   │   ├── launcher.py
│   │   ├── train/
│   │   │   ├── __init__.py
│   │   │   ├── auto_parallel/
│   │   │   │   ├── __init__.py
│   │   │   │   └── workflow.py
│   │   │   ├── deepseek_v3_pretrain/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── configuration.py
│   │   │   │   ├── fp8_linear.py
│   │   │   │   ├── kernel.py
│   │   │   │   ├── modeling.py
│   │   │   │   ├── modeling_pp.py
│   │   │   │   ├── moe_gate.py
│   │   │   │   ├── moe_layer.py
│   │   │   │   ├── moe_utils.py
│   │   │   │   ├── token_dispatcher.py
│   │   │   │   ├── utils/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── convert_ckpt_to_sft.py
│   │   │   │   │   └── load_hf_ckpt.py
│   │   │   │   └── workflow.py
│   │   │   ├── dpo/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── data_config.py
│   │   │   │   ├── dpo_argument.py
│   │   │   │   ├── dpo_estimate_training.py
│   │   │   │   ├── dpo_trainer.py
│   │   │   │   └── workflow.py
│   │   │   ├── ernie_pretrain/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── model_config.py
│   │   │   │   ├── models/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── comm_utils.py
│   │   │   │   │   ├── ernie/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── configuration.py
│   │   │   │   │   │   ├── modeling.py
│   │   │   │   │   │   ├── modeling_moe.py
│   │   │   │   │   │   └── modeling_pp.py
│   │   │   │   │   ├── fp8_linear.py
│   │   │   │   │   ├── moe/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── moe_layer.py
│   │   │   │   │   │   ├── token_dispatcher/
│   │   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   │   ├── fp8_utils.py
│   │   │   │   │   │   │   └── moe_utils.py
│   │   │   │   │   │   └── top2_gate.py
│   │   │   │   │   ├── sequence_parallel_utils.py
│   │   │   │   │   └── utils.py
│   │   │   │   ├── src/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── callbacks/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── fp8_quant_weight_callback.py
│   │   │   │   │   │   ├── gc_callback.py
│   │   │   │   │   │   ├── logging_callback.py
│   │   │   │   │   │   ├── moe_correction_bias_adjust_callback.py
│   │   │   │   │   │   ├── moe_logging_callback.py
│   │   │   │   │   │   ├── ortho_loss_callback.py
│   │   │   │   │   │   ├── sp_grad_sync_callback.py
│   │   │   │   │   │   └── tensorboard_callback.py
│   │   │   │   │   ├── clip/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── moe_clip.py
│   │   │   │   │   ├── lr_schedulers/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── cosine_lr.py
│   │   │   │   │   │   └── wsd_lr.py
│   │   │   │   │   ├── tokenizers/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── tokenization_eb_v2.py
│   │   │   │   │   ├── trainers/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── data_parallel.py
│   │   │   │   │   │   ├── dygraph_optimizer/
│   │   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   │   └── hybrid_parallel_optimizer.py
│   │   │   │   │   │   └── pretraining_trainer.py
│   │   │   │   │   └── utils/
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       ├── logging.py
│   │   │   │   │       ├── misc.py
│   │   │   │   │       ├── seed_utils.py
│   │   │   │   │       └── training_utils.py
│   │   │   │   └── workflow.py
│   │   │   ├── sft/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── dataset_formatting.py
│   │   │   │   ├── make_data_utils.py
│   │   │   │   ├── sft_config.py
│   │   │   │   ├── sft_trainer.py
│   │   │   │   └── workflow.py
│   │   │   └── tuner.py
│   │   └── utils/
│   │       ├── __init__.py
│   │       ├── llm_utils.py
│   │       ├── mllm_utils.py
│   │       └── process.py
│   ├── data/
│   │   ├── __init__.py
│   │   ├── blendable_dataset.py
│   │   ├── causal_dataset.py
│   │   ├── collate.py
│   │   ├── data_collator.py
│   │   ├── dist_dataloader.py
│   │   ├── indexed_dataset.py
│   │   ├── iterator.py
│   │   ├── sampler.py
│   │   ├── tokenizer.py
│   │   └── vocab.py
│   ├── datasets/
│   │   ├── DPODataset.py
│   │   ├── SFTDataset.py
│   │   ├── __init__.py
│   │   ├── collate.py
│   │   ├── data_utils.py
│   │   ├── dataset.py
│   │   ├── loader.py
│   │   ├── reader/
│   │   │   ├── __init__.py
│   │   │   ├── convertor.py
│   │   │   ├── data_info.json
│   │   │   ├── download_manager.py
│   │   │   ├── file_reader.py
│   │   │   ├── io.py
│   │   │   ├── mix_datasets.py
│   │   │   └── multi_source_datasets.py
│   │   ├── rlhf_datasets/
│   │   │   ├── __init__.py
│   │   │   ├── protocol.py
│   │   │   └── rl_dataset.py
│   │   ├── sampler/
│   │   │   └── __init__.py
│   │   └── template/
│   │       ├── __init__.py
│   │       ├── augment_utils.py
│   │       ├── formatter.py
│   │       ├── grounding_plugin.py
│   │       ├── mm_plugin.py
│   │       ├── template.py
│   │       └── tool_utils.py
│   ├── generation/
│   │   ├── __init__.py
│   │   ├── configuration_utils.py
│   │   ├── logits_process.py
│   │   ├── stopping_criteria.py
│   │   ├── streamers.py
│   │   └── utils.py
│   ├── mergekit/
│   │   ├── __init__.py
│   │   ├── merge_config.py
│   │   ├── merge_method.py
│   │   ├── merge_model.py
│   │   ├── merge_utils.py
│   │   └── sparsify_method.py
│   ├── nn/
│   │   ├── __init__.py
│   │   ├── activation.py
│   │   ├── attention/
│   │   │   ├── __init__.py
│   │   │   ├── eager_attention.py
│   │   │   ├── flashmask_attention.py
│   │   │   ├── interface.py
│   │   │   ├── sdpa_attention.py
│   │   │   ├── sink_impl.py
│   │   │   └── utils.py
│   │   ├── criterion/
│   │   │   ├── __init__.py
│   │   │   ├── dpo_loss.py
│   │   │   ├── interface.py
│   │   │   ├── kto_loss.py
│   │   │   ├── loss_utils.py
│   │   │   └── sft_loss.py
│   │   ├── embedding.py
│   │   ├── general.py
│   │   ├── linear.py
│   │   ├── lm_head.py
│   │   ├── mlp.py
│   │   ├── moe/
│   │   │   ├── __init__.py
│   │   │   ├── abstract.py
│   │   │   ├── all_gather.py
│   │   │   ├── all_to_all.py
│   │   │   ├── moe_allgather_layer.py
│   │   │   ├── moe_alltoall_layer.py
│   │   │   ├── moe_block.py
│   │   │   ├── topk_gate.py
│   │   │   └── utils.py
│   │   ├── moe_deepep/
│   │   │   ├── __init__.py
│   │   │   ├── modular_moe_layer.py
│   │   │   ├── moe_communication.py
│   │   │   ├── moe_expert.py
│   │   │   ├── moe_factory.py
│   │   │   ├── moe_gate.py
│   │   │   ├── moe_loss.py
│   │   │   └── moe_loss_instance.py
│   │   ├── norm.py
│   │   └── pp_model.py
│   ├── peft/
│   │   ├── __init__.py
│   │   └── lora/
│   │       ├── __init__.py
│   │       ├── auto_lora_model.py
│   │       ├── lora_config.py
│   │       ├── lora_layers.py
│   │       ├── lora_model.py
│   │       ├── lora_quant_layers.py
│   │       ├── lora_quantization_layers.py
│   │       ├── loraga_utils.py
│   │       └── utils.py
│   ├── quantization/
│   │   ├── __init__.py
│   │   ├── checkpoint_quantization_utils.py
│   │   ├── hadamard_utils.py
│   │   ├── qat_utils.py
│   │   ├── qlora.py
│   │   ├── quantization_config.py
│   │   ├── quantization_linear.py
│   │   ├── quantization_utils.py
│   │   └── unified_checkpoint_quantization.py
│   ├── trainer/
│   │   ├── __init__.py
│   │   ├── argparser.py
│   │   ├── integrations.py
│   │   ├── plugins/
│   │   │   ├── __init__.py
│   │   │   ├── npu_plugin.py
│   │   │   └── timer.py
│   │   ├── trainer.py
│   │   ├── trainer_callback.py
│   │   ├── trainer_utils.py
│   │   ├── training_args.py
│   │   ├── unified_checkpoint/
│   │   │   ├── __init__.py
│   │   │   ├── async_handler.py
│   │   │   ├── check_completion.py
│   │   │   ├── load_dynamic.py
│   │   │   ├── load_local.py
│   │   │   ├── load_save_single_card.py
│   │   │   ├── sharding_split_param_utils.py
│   │   │   ├── shared_memory_utils.py
│   │   │   ├── unified_checkpoint.py
│   │   │   └── utils.py
│   │   └── utils/
│   │       ├── __init__.py
│   │       ├── async_save.py
│   │       ├── ckpt_converter.py
│   │       ├── doc.py
│   │       ├── helper.py
│   │       ├── offload_optimizer.py
│   │       ├── reshard/
│   │       │   ├── __init__.py
│   │       │   ├── common.py
│   │       │   ├── pp_reshard.py
│   │       │   ├── sharding_v1.py
│   │       │   └── sharding_v2.py
│   │       ├── sharding_io.py
│   │       └── zero_cost_checkpoint.py
│   ├── transformers/
│   │   ├── __init__.py
│   │   ├── activations.py
│   │   ├── aistudio_utils.py
│   │   ├── attention_utils.py
│   │   ├── audio_processing_utils.py
│   │   ├── audio_utils.py
│   │   ├── auto/
│   │   │   ├── __init__.py
│   │   │   ├── configuration.py
│   │   │   ├── factory.py
│   │   │   ├── feature_extraction.py
│   │   │   ├── image_processing.py
│   │   │   ├── modeling.py
│   │   │   ├── processing.py
│   │   │   ├── tokenizer.py
│   │   │   └── video_processing.py
│   │   ├── auto_utils.py
│   │   ├── cache_utils.py
│   │   ├── configuration_utils.py
│   │   ├── context_parallel_utils.py
│   │   ├── contrastive_loss.py
│   │   ├── conversion_utils.py
│   │   ├── deepseek_v3/
│   │   │   ├── __init__.py
│   │   │   ├── configuration.py
│   │   │   ├── mfu_utils.py
│   │   │   └── modeling.py
│   │   ├── download_utils.py
│   │   ├── dpo_criterion.py
│   │   ├── embedding_utils.py
│   │   ├── ernie4_5/
│   │   │   ├── __init__.py
│   │   │   ├── configuration.py
│   │   │   ├── modeling.py
│   │   │   └── tokenizer.py
│   │   ├── ernie4_5_moe/
│   │   │   ├── __init__.py
│   │   │   ├── configuration.py
│   │   │   └── modeling.py
│   │   ├── ernie4_5_moe_vl/
│   │   │   ├── __init__.py
│   │   │   ├── configuration.py
│   │   │   ├── image_processor.py
│   │   │   ├── model/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── comm_utils.py
│   │   │   │   ├── configuration.py
│   │   │   │   ├── dfnrope/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── activation.py
│   │   │   │   │   ├── configuration.py
│   │   │   │   │   ├── modeling.py
│   │   │   │   │   └── modeling_pp.py
│   │   │   │   ├── distributed/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── common_dist_utils.py
│   │   │   │   │   └── xpu_dist_utils.py
│   │   │   │   ├── fusion_ops/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── common_fusion_ops.py
│   │   │   │   │   └── npu_fusion_ops.py
│   │   │   │   ├── longcontext_ops.py
│   │   │   │   ├── loss/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── dpo.py
│   │   │   │   ├── modeling.py
│   │   │   │   ├── modeling_moe.py
│   │   │   │   ├── modeling_moe_pp.py
│   │   │   │   ├── modeling_moe_vl.py
│   │   │   │   ├── modeling_moe_vl_pp.py
│   │   │   │   ├── moe/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── moe_all_gather_layer.py
│   │   │   │   │   ├── moe_layer.py
│   │   │   │   │   └── topk_gate.py
│   │   │   │   ├── refined_recompute/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── utils.py
│   │   │   │   ├── sequence_parallel_utils.py
│   │   │   │   └── utils/
│   │   │   │       ├── __init__.py
│   │   │   │       └── misc.py
│   │   │   ├── modeling.py
│   │   │   ├── processor.py
│   │   │   ├── tokenizer.py
│   │   │   └── vision_process.py
│   │   ├── feature_extraction_utils.py
│   │   ├── fp8_utils.py
│   │   ├── fused_a2a.py
│   │   ├── gemma3_text/
│   │   │   ├── __init__.py
│   │   │   ├── configuration.py
│   │   │   └── modeling.py
│   │   ├── glm4_moe/
│   │   │   ├── __init__.py
│   │   │   ├── configuration.py
│   │   │   └── modeling.py
│   │   ├── glm4v_moe/
│   │   │   ├── __init__.py
│   │   │   ├── configuration.py
│   │   │   ├── image_processor.py
│   │   │   ├── image_processor_fast.py
│   │   │   ├── modeling.py
│   │   │   ├── processor.py
│   │   │   └── video_processor.py
│   │   ├── glm_ocr/
│   │   │   ├── __init__.py
│   │   │   ├── configuration.py
│   │   │   ├── image_processor.py
│   │   │   ├── modeling.py
│   │   │   └── processor.py
│   │   ├── gpt_oss/
│   │   │   ├── __init__.py
│   │   │   ├── configuration.py
│   │   │   └── modeling.py
│   │   ├── gpt_provider.py
│   │   ├── image_processing_utils.py
│   │   ├── image_processing_utils_fast.py
│   │   ├── image_transforms.py
│   │   ├── image_utils.py
│   │   ├── kimi_k2/
│   │   │   ├── __init__.py
│   │   │   ├── configuration.py
│   │   │   ├── modeling.py
│   │   │   └── tokenizer.py
│   │   ├── kimi_k25/
│   │   │   ├── __init__.py
│   │   │   ├── media_utils.py
│   │   │   ├── processor.py
│   │   │   ├── tokenizer.py
│   │   │   ├── tool_declaration_ts.py
│   │   │   └── vision_processor.py
│   │   ├── kto_criterion.py
│   │   ├── legacy/
│   │   │   ├── __init__.py
│   │   │   ├── tokenizer_utils.py
│   │   │   └── tokenizer_utils_base.py
│   │   ├── linear_utils.py
│   │   ├── llama/
│   │   │   ├── __init__.py
│   │   │   ├── auto_dist_config.py
│   │   │   ├── configuration.py
│   │   │   ├── modeling.py
│   │   │   ├── tokenizer.py
│   │   │   └── tokenizer_fast.py
│   │   ├── masking_utils.py
│   │   ├── mc2_parallel_linear.py
│   │   ├── model_outputs.py
│   │   ├── model_provider.py
│   │   ├── model_utils.py
│   │   ├── modeling_rope_utils.py
│   │   ├── modelscope_utils.py
│   │   ├── moe_gate.py
│   │   ├── moe_gate_auto.py
│   │   ├── moe_layer.py
│   │   ├── moe_layer_auto.py
│   │   ├── moe_utils.py
│   │   ├── ofa_utils.py
│   │   ├── optimization.py
│   │   ├── paddle_vision_utils.py
│   │   ├── paddleocr_vl/
│   │   │   ├── __init__.py
│   │   │   ├── configuration.py
│   │   │   ├── image_processor.py
│   │   │   ├── modeling.py
│   │   │   └── processor.py
│   │   ├── phi3/
│   │   │   ├── __init__.py
│   │   │   ├── configuration.py
│   │   │   ├── modeling.py
│   │   │   └── tokenizer.py
│   │   ├── processing_utils.py
│   │   ├── qwen2/
│   │   │   ├── __init__.py
│   │   │   ├── configuration.py
│   │   │   ├── modeling.py
│   │   │   ├── tokenizer.py
│   │   │   └── tokenizer_fast.py
│   │   ├── qwen2_5_vl/
│   │   │   ├── __init__.py
│   │   │   ├── configuration.py
│   │   │   ├── modeling.py
│   │   │   └── processor.py
│   │   ├── qwen2_moe/
│   │   │   ├── __init__.py
│   │   │   ├── configuration.py
│   │   │   └── modeling.py
│   │   ├── qwen2_vl/
│   │   │   ├── __init__.py
│   │   │   ├── image_processor.py
│   │   │   ├── image_processor_fast.py
│   │   │   ├── processor.py
│   │   │   ├── video_processor.py
│   │   │   └── vision_process.py
│   │   ├── qwen3/
│   │   │   ├── __init__.py
│   │   │   ├── configuration.py
│   │   │   └── modeling.py
│   │   ├── qwen3_5/
│   │   │   ├── __init__.py
│   │   │   ├── configuration.py
│   │   │   └── modeling.py
│   │   ├── qwen3_moe/
│   │   │   ├── __init__.py
│   │   │   ├── configuration.py
│   │   │   └── modeling.py
│   │   ├── qwen3_next/
│   │   │   ├── __init__.py
│   │   │   ├── configuration.py
│   │   │   └── modeling.py
│   │   ├── qwen3_omni_moe/
│   │   │   ├── __init__.py
│   │   │   └── processor.py
│   │   ├── qwen3_vl/
│   │   │   ├── __init__.py
│   │   │   ├── configuration.py
│   │   │   ├── modeling.py
│   │   │   ├── modeling_fleet.py
│   │   │   ├── processor.py
│   │   │   └── video_processor.py
│   │   ├── qwen3_vl_moe/
│   │   │   ├── __init__.py
│   │   │   ├── configuration.py
│   │   │   └── modeling.py
│   │   ├── refined_recompute.py
│   │   ├── ring_flash_attention.py
│   │   ├── segment_parallel_utils.py
│   │   ├── sequence_parallel_utils.py
│   │   ├── tensor_parallel_utils.py
│   │   ├── token_dispatcher.py
│   │   ├── tokenizer_utils.py
│   │   ├── tokenizer_utils_base.py
│   │   ├── utils.py
│   │   ├── video_processing_utils.py
│   │   ├── video_utils.py
│   │   ├── vocab_utils.py
│   │   └── whisper/
│   │       ├── __init__.py
│   │       └── processor.py
│   ├── triton_kernels/
│   │   ├── __init__.py
│   │   └── rope_triton.py
│   ├── utils/
│   │   ├── __init__.py
│   │   ├── adamw_triton.py
│   │   ├── batch_sampler.py
│   │   ├── converter.py
│   │   ├── distributed.py
│   │   ├── doc_parser.py
│   │   ├── download/
│   │   │   ├── __init__.py
│   │   │   ├── aistudio_hub_download.py
│   │   │   ├── common.py
│   │   │   └── download.py
│   │   ├── downloader.py
│   │   ├── env.py
│   │   ├── fault_tolerance.py
│   │   ├── ie_utils.py
│   │   ├── image_utils.py
│   │   ├── import_utils.py
│   │   ├── infohub.py
│   │   ├── initializer.py
│   │   ├── lazy_import.py
│   │   ├── log.py
│   │   ├── masking_utils.py
│   │   ├── memory_utils.py
│   │   ├── moe_hybrid_parallel_optimizer.py
│   │   ├── nested.py
│   │   ├── optimizer.py
│   │   ├── paddle_patch.py
│   │   ├── pdc_sdk.py
│   │   ├── perf_utils.py
│   │   ├── profiler.py
│   │   ├── safetensors.py
│   │   ├── serialization.py
│   │   ├── tools.py
│   │   ├── type_validators.py
│   │   └── upcast_downcast_triton.py
│   └── version/
│       ├── __init__.py
│       └── git.py
├── pyproject.toml
├── requirements.txt
├── scripts/
│   ├── ci_utils/
│   │   ├── __init__.py
│   │   ├── log_analyzer.py
│   │   └── training_utils.py
│   ├── codestyle/
│   │   ├── check_dead_links.py
│   │   ├── check_spaces.py
│   │   └── get_modified_files.py
│   ├── dependence/
│   │   └── build.sh
│   ├── iluvatar_ci/
│   │   ├── base_value/
│   │   │   └── ERNIE-21B-SFT-LOSS.json
│   │   ├── config/
│   │   │   └── ERNIE-21B-SFT.yaml
│   │   ├── conftest.py
│   │   └── test_ernie_21b_sft.py
│   ├── regression/
│   │   ├── ci_model_unittest.sh
│   │   ├── test_dpo_tiny-random-glm4moe.py
│   │   ├── test_pt_tiny-random-glm4moe.py
│   │   └── test_sft_tiny-random-glm4moe.py
│   ├── unit_test/
│   │   ├── ci_unittest.sh
│   │   └── gen_allure_report.py
│   └── xpu_ci/
│       ├── README.md
│       ├── base_value/
│       │   ├── ernie_21b_sft_loss.json
│       │   └── ernie_28b_thinking_sft_loss.json
│       ├── config/
│       │   ├── ernie_21b_sft.yaml
│       │   └── ernie_vl_28b_sft.yaml
│       ├── conftest.py
│       ├── test_ernie_21b_sft.py
│       ├── test_ernie_28b_thinking_sft.py
│       └── test_example_template.py.template
├── setup.py
└── tests/
    ├── README.md
    ├── __init__.py
    ├── check_log_for_exitcode.py
    ├── common_test.py
    ├── config/
    │   ├── benchmark/
    │   │   └── config/
    │   │       ├── pt/
    │   │       │   ├── DeepSeek-V3.yaml
    │   │       │   ├── ERNIE45-21B.yaml
    │   │       │   ├── ERNIE45-300B.yaml
    │   │       │   ├── GLM4.5-Air.yaml
    │   │       │   ├── GLM4.5-Air_64k.yaml
    │   │       │   ├── GLM4.5-Air_FP8.yaml
    │   │       │   ├── Qwen3-30B-A3B-Base-64k.yaml
    │   │       │   └── Qwen3-30B-A3B-Base.yaml
    │   │       └── sft/
    │   │           ├── GLM4.5-Air.yaml
    │   │           ├── GLM4.5-Air_128k.yaml
    │   │           ├── GLM4.5-Air_64k.yaml
    │   │           ├── Qwen3-30B-A3B-Base-64k.yaml
    │   │           ├── Qwen3-30B-A3B-Base.yaml
    │   │           ├── Qwen3-VL-30B-A3B-Instruct.yaml
    │   │           └── Qwen3-VL-8B-Instruct.yaml
    │   └── ci/
    │       ├── glm45_dpo.yaml
    │       ├── glm45_dpo_lora.yaml
    │       ├── glm45_lora.yaml
    │       ├── glm45_pt.yaml
    │       ├── glm45_pt_fp8.yaml
    │       ├── glm45_pt_grouped_gemm.yaml
    │       ├── glm45_sft.yaml
    │       ├── glm45_single_pt-test.yaml
    │       ├── qwen3_multicard_lora.yaml
    │       ├── qwen3_multicard_pt.yaml
    │       ├── qwen3_multicard_sft.yaml
    │       ├── qwen3_pt.yaml
    │       ├── qwen3vl_lora.yaml
    │       ├── qwen3vl_sft.yaml
    │       ├── qwen3vl_sft_fsdp.yaml
    │       ├── qwen3vl_sft_moe.yaml
    │       ├── qwen3vl_sft_moe_a100.yaml
    │       └── qwen3vl_sft_single.yaml
    ├── conftest.py
    ├── data/
    │   ├── __init__.py
    │   ├── test_blendable_dataset.py
    │   ├── test_collate.py
    │   ├── test_data_collator.py
    │   ├── test_sampler.py
    │   └── test_vocab.py
    ├── dataset/
    │   ├── __init__.py
    │   ├── test_convertor.py
    │   ├── test_ernie_datasets.py
    │   ├── test_file_reader.py
    │   ├── test_io.py
    │   └── test_iter_datasets.py
    ├── fixtures/
    │   ├── chat_template.json
    │   ├── chat_template_with_context.json
    │   ├── dummy/
    │   │   ├── dpo/
    │   │   │   ├── eval.jsonl
    │   │   │   ├── function-call-eval.jsonl
    │   │   │   ├── function-call-train.jsonl
    │   │   │   └── train.jsonl
    │   │   ├── dpo-vl/
    │   │   │   ├── eval.jsonl
    │   │   │   └── train.jsonl
    │   │   ├── io/
    │   │   │   ├── train.jsonl
    │   │   │   └── train.parquet
    │   │   ├── pt/
    │   │   │   ├── eval.jsonl
    │   │   │   └── train.jsonl
    │   │   ├── sft/
    │   │   │   ├── eval.jsonl
    │   │   │   ├── function-call-eval.jsonl
    │   │   │   ├── function-call-train.jsonl
    │   │   │   └── train.jsonl
    │   │   ├── sft-vl/
    │   │   │   ├── thinking_safety_demo.jsonl
    │   │   │   └── train.jsonl
    │   │   └── tnews/
    │   │       ├── dev.json
    │   │       └── train.json
    │   └── sample_text.txt
    ├── generation/
    │   ├── __init__.py
    │   ├── test_logits_process.py
    │   ├── test_stopping_criteria.py
    │   ├── test_streamers.py
    │   └── test_synced_gpus.py
    ├── integration_test/
    │   ├── check_loss.py
    │   ├── check_pr_approval.py
    │   ├── check_precision_approval.sh
    │   ├── glm45_a100.sh
    │   ├── glm45_dpo.sh
    │   ├── glm45_dpo_lora.sh
    │   ├── glm45_lora.sh
    │   ├── glm45_pt.sh
    │   ├── glm45_pt_ep4.sh
    │   ├── glm45_pt_fp8.sh
    │   ├── glm45_pt_grouped_gemm.sh
    │   ├── glm45_pt_single_card.sh
    │   ├── glm45_sft.sh
    │   ├── preprocess.sh
    │   ├── qwen.sh
    │   ├── qwen3_a100.sh
    │   ├── qwen3_single_card.sh
    │   ├── qwen3vl_lora.sh
    │   ├── qwen3vl_sft.sh
    │   ├── qwen3vl_sft_single_card.sh
    │   └── update_precision.sh
    ├── mergekit/
    │   ├── __init__.py
    │   ├── test_merge_config.py
    │   ├── test_merge_method.py
    │   ├── test_merge_model.py
    │   └── test_sparsify_method.py
    ├── nn/
    │   ├── __init__.py
    │   ├── test_activation.py
    │   ├── test_attention.py
    │   ├── test_criterion.py
    │   ├── test_embedding.py
    │   ├── test_linear.py
    │   ├── test_lm_head.py
    │   ├── test_mlp.py
    │   └── test_norm.py
    ├── parallel_launch.py
    ├── peft/
    │   ├── __init__.py
    │   ├── test_lora.py
    │   └── test_quant_lora.py
    ├── quantization/
    │   ├── __init__.py
    │   └── test_quant.py
    ├── requirements.txt
    ├── testing_utils.py
    ├── trainer/
    │   ├── test_argparser.py
    │   ├── test_hf_format_saver_tp4_sharding2.py
    │   ├── test_lora_unified_checkpoint.py
    │   ├── test_moe_unified_checkpoint.py
    │   ├── test_trainer_callback.py
    │   ├── test_trainer_visualization.py
    │   ├── test_unified_checkpoint.py
    │   ├── trainer_utils.py
    │   └── unified-ckpt-llama-170m/
    │       └── config.json
    ├── transformers/
    │   ├── __init__.py
    │   ├── auto/
    │   │   ├── __init__.py
    │   │   ├── test_configuration.py
    │   │   ├── test_feature_extraction.py
    │   │   ├── test_image_processor.py
    │   │   ├── test_modeling.py
    │   │   ├── test_processor.py
    │   │   ├── test_tokenizer.py
    │   │   ├── test_tokenizer_without_paddle.py
    │   │   └── test_video_processor.py
    │   ├── deepseek_v3/
    │   │   ├── __init__.py
    │   │   └── test_modeling.py
    │   ├── ernie4_5/
    │   │   ├── __init__.py
    │   │   └── test_modeling.py
    │   ├── ernie4_5_moe/
    │   │   ├── __init__.py
    │   │   └── test_modeling.py
    │   ├── ernie4_5_moe_vl/
    │   │   ├── __init__.py
    │   │   ├── test_modeling.py
    │   │   ├── test_processor.py
    │   │   ├── test_tokenizer.py
    │   │   └── test_vision_process.py
    │   ├── gemma3_text/
    │   │   ├── __init__.py
    │   │   └── test_modeling.py
    │   ├── glm4_moe/
    │   │   ├── __init__.py
    │   │   └── test_modeling.py
    │   ├── glm4v_moe/
    │   │   ├── __init__.py
    │   │   ├── test_image_processor.py
    │   │   ├── test_modeling.py
    │   │   └── test_processor.py
    │   ├── glm_ocr/
    │   │   ├── __init__.py
    │   │   ├── test_image_processor.py
    │   │   ├── test_modeling.py
    │   │   └── test_processor.py
    │   ├── gpt_oss/
    │   │   ├── __init__.py
    │   │   ├── test_fp4_to_bf16.py
    │   │   └── test_modeling.py
    │   ├── kimi_k2/
    │   │   └── test_modeling.py
    │   ├── kimi_k25/
    │   │   ├── __init__.py
    │   │   └── test_processor.py
    │   ├── llama/
    │   │   ├── __init__.py
    │   │   ├── test_modeling.py
    │   │   └── test_tokenizer.py
    │   ├── paddleocr_vl/
    │   │   ├── __init__.py
    │   │   ├── test_modeling.py
    │   │   └── test_processor.py
    │   ├── phi3/
    │   │   ├── __init__.py
    │   │   └── test_modeling.py
    │   ├── qwen2/
    │   │   ├── __init__.py
    │   │   ├── test_modeling.py
    │   │   └── test_tokenizer.py
    │   ├── qwen2_5_vl/
    │   │   ├── __init__.py
    │   │   ├── test_modeling.py
    │   │   └── test_processor.py
    │   ├── qwen2_vl/
    │   │   ├── __init__.py
    │   │   ├── test_image_processor.py
    │   │   ├── test_processor.py
    │   │   ├── test_video_processor.py
    │   │   └── test_vision_process.py
    │   ├── qwen2moe/
    │   │   ├── __init__.py
    │   │   └── test_modeling.py
    │   ├── qwen3/
    │   │   ├── __init__.py
    │   │   └── test_modeling.py
    │   ├── qwen3_omni_moe/
    │   │   ├── __init__.py
    │   │   └── test_processor.py
    │   ├── qwen3_vl/
    │   │   ├── __init__.py
    │   │   ├── test_modeling.py
    │   │   ├── test_processor.py
    │   │   └── test_video_processor.py
    │   ├── qwen3_vl_moe/
    │   │   ├── __init__.py
    │   │   └── test_modeling.py
    │   ├── qwen3moe/
    │   │   ├── __init__.py
    │   │   └── test_modeling.py
    │   ├── qwen3next/
    │   │   ├── __init__.py
    │   │   └── test_modeling.py
    │   ├── test_cache_utils.py
    │   ├── test_configuration_common.py
    │   ├── test_configuration_utils.py
    │   ├── test_conversion_common.py
    │   ├── test_conversion_tp_split_merge.py
    │   ├── test_generation_utils.py
    │   ├── test_hf_feature_extractor.py
    │   ├── test_hf_image_processor.py
    │   ├── test_hf_processor.py
    │   ├── test_hf_tokenizer.py
    │   ├── test_hf_video_processor.py
    │   ├── test_image_processing_common.py
    │   ├── test_masking_utils.py
    │   ├── test_modeling_common.py
    │   ├── test_modeling_rope_utils.py
    │   ├── test_modeling_utils.py
    │   ├── test_processing_common.py
    │   ├── test_ring_flash_attention.py
    │   ├── test_safetensors.py
    │   ├── test_segment_parallel_utils.py
    │   ├── test_tensor_parallel.py
    │   ├── test_utils.py
    │   └── test_video_processing_common.py
    ├── triton/
    │   └── test_rope_triton.py
    └── utils/
        ├── __init__.py
        ├── test_aistudio_download.py
        ├── test_downloader.py
        ├── test_import_utils.py
        ├── test_module/
        │   ├── __init__.py
        │   ├── custom_configuration.py
        │   ├── custom_model.py
        │   ├── custom_tokenizer.py
        │   └── custom_tokenizer_fast.py
        ├── test_serialization.py
        └── test_set_nccl_config.py