Showing preview only (9,933K chars total). Download the full file or copy to clipboard to get everything.
Repository: hiyouga/LlamaFactory
Branch: main
Commit: e67ab9e2f2c9
Files: 504
Total size: 9.4 MB
Directory structure:
gitextract_v77f1c8q/
├── .dockerignore
├── .gitattributes
├── .github/
│ ├── CODE_OF_CONDUCT.md
│ ├── CONTRIBUTING.md
│ ├── ISSUE_TEMPLATE/
│ │ ├── 1-bug-report.yml
│ │ ├── 2-feature-request.yml
│ │ └── config.yml
│ ├── PULL_REQUEST_TEMPLATE.md
│ ├── SECURITY.md
│ ├── copilot-instructions.md
│ ├── instructions-v0.md
│ ├── instructions-v1.md
│ └── workflows/
│ ├── docker.yml
│ ├── docs.yml
│ ├── label_issue.yml
│ ├── publish.yml
│ ├── tests.yml
│ ├── tests_cuda.yml
│ └── tests_npu.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CITATION.cff
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── README_zh.md
├── data/
│ ├── README.md
│ ├── README_zh.md
│ ├── alpaca_en_demo.json
│ ├── alpaca_zh_demo.json
│ ├── c4_demo.jsonl
│ ├── dataset_info.json
│ ├── dpo_en_demo.json
│ ├── dpo_zh_demo.json
│ ├── glaive_toolcall_en_demo.json
│ ├── glaive_toolcall_zh_demo.json
│ ├── identity.json
│ ├── kto_en_demo.json
│ ├── mllm_audio_demo.json
│ ├── mllm_demo.json
│ ├── mllm_demo_data/
│ │ └── 3.flac
│ ├── mllm_video_audio_demo.json
│ ├── mllm_video_demo.json
│ ├── reason_tool_use_demo_50.jsonl
│ ├── v1_dpo_demo.jsonl
│ ├── v1_dpo_demo.yaml
│ ├── v1_sft_demo.jsonl
│ ├── v1_sft_demo.yaml
│ └── wiki_demo.txt
├── docker/
│ ├── docker-cuda/
│ │ ├── Dockerfile
│ │ ├── Dockerfile.base
│ │ ├── Dockerfile.megatron
│ │ ├── README.md
│ │ └── docker-compose.yml
│ ├── docker-npu/
│ │ ├── Dockerfile
│ │ └── docker-compose.yml
│ └── docker-rocm/
│ ├── Dockerfile
│ └── docker-compose.yml
├── docs/
│ ├── Makefile
│ ├── _static/
│ │ ├── css/
│ │ │ └── lang-switcher.css
│ │ └── js/
│ │ └── switcher.js
│ ├── conf.py
│ ├── en/
│ │ ├── advanced/
│ │ │ ├── custom-kernels/
│ │ │ │ ├── custom-kernels.md
│ │ │ │ ├── fused-operators.md
│ │ │ │ └── triton.md
│ │ │ ├── distributed/
│ │ │ │ ├── deepspeed.md
│ │ │ │ ├── fsdp.md
│ │ │ │ └── parallel-dp-tp-ep-sp-cp.md
│ │ │ └── lora-and-quantization/
│ │ │ ├── lora.md
│ │ │ └── quantization.md
│ │ ├── conf.py
│ │ ├── data-preparation/
│ │ │ └── data-processing.md
│ │ ├── dev-guide/
│ │ │ ├── core/
│ │ │ │ ├── data-engine.md
│ │ │ │ ├── model-engine.md
│ │ │ │ └── trainer.md
│ │ │ └── plugins/
│ │ │ ├── data-plugins.md
│ │ │ └── model-plugins/
│ │ │ ├── initialization.md
│ │ │ ├── kernels.md
│ │ │ └── rendering.md
│ │ ├── getting-started.md
│ │ ├── hyperparameters/
│ │ │ ├── data-argument.md
│ │ │ ├── model-argument.md
│ │ │ ├── sample-argument.md
│ │ │ └── training-argument.md
│ │ ├── index.rst
│ │ ├── inference/
│ │ │ └── deploy.md
│ │ ├── installation.md
│ │ ├── llamaboard-web-ui.md
│ │ └── training/
│ │ ├── dpo.md
│ │ └── sft.md
│ ├── make.bat
│ ├── requirements.txt
│ └── zh/
│ ├── advanced/
│ │ ├── custom-kernels/
│ │ │ ├── custom-kernels.md
│ │ │ ├── fused-operators.md
│ │ │ └── triton.md
│ │ ├── distributed/
│ │ │ ├── deepspeed.md
│ │ │ ├── fsdp.md
│ │ │ └── parallel-dp-tp-ep-sp-cp.md
│ │ └── lora-and-quantization/
│ │ ├── lora.md
│ │ └── quantization.md
│ ├── conf.py
│ ├── data-preparation/
│ │ └── data-processing.md
│ ├── dev-guide/
│ │ ├── core/
│ │ │ ├── data-engine.md
│ │ │ ├── model-engine.md
│ │ │ └── trainer.md
│ │ └── plugins/
│ │ ├── data-plugins.md
│ │ └── model-plugins/
│ │ ├── initialization.md
│ │ ├── kernels.md
│ │ └── rendering.md
│ ├── getting-started.md
│ ├── hyperparameters/
│ │ ├── data-argument.md
│ │ ├── model-argument.md
│ │ ├── sample-argument.md
│ │ └── training-argument.md
│ ├── index.rst
│ ├── inference/
│ │ └── deploy.md
│ ├── installation.md
│ ├── llamaboard-web-ui.md
│ └── training/
│ ├── dpo.md
│ └── sft.md
├── examples/
│ ├── README.md
│ ├── README_zh.md
│ ├── accelerate/
│ │ ├── fsdp2_config.yaml
│ │ ├── fsdp_config.yaml
│ │ ├── fsdp_config_multiple_nodes.yaml
│ │ └── fsdp_config_offload.yaml
│ ├── ascend/
│ │ ├── qwen3_full_sft_fsdp2.yaml
│ │ ├── qwen3moe_full_sft_fsdp.yaml
│ │ ├── qwen3vlmoe_full_sft_fsdp2.yaml
│ │ └── qwen3vlmoe_lora_sft_fsdp.yaml
│ ├── deepspeed/
│ │ ├── ds_z0_config.json
│ │ ├── ds_z2_autotp_config.json
│ │ ├── ds_z2_config.json
│ │ ├── ds_z2_offload_config.json
│ │ ├── ds_z3_config.json
│ │ ├── ds_z3_fp8_config.json
│ │ └── ds_z3_offload_config.json
│ ├── extras/
│ │ ├── adam_mini/
│ │ │ └── qwen2_full_sft.yaml
│ │ ├── apollo/
│ │ │ └── llama3_full_sft.yaml
│ │ ├── asft/
│ │ │ ├── llama2_full_asft.yaml
│ │ │ └── qwen2_full_asft.yaml
│ │ ├── badam/
│ │ │ └── llama3_full_sft.yaml
│ │ ├── dft/
│ │ │ └── qwen2_full_sft.yaml
│ │ ├── eaft/
│ │ │ └── qwen25_05b_eaft_full.yaml
│ │ ├── fp8/
│ │ │ ├── llama3_fp8_deepspeed_sft.yaml
│ │ │ └── llama3_fp8_fsdp_sft.yaml
│ │ ├── fsdp_qlora/
│ │ │ ├── llama3_lora_sft.yaml
│ │ │ └── train.sh
│ │ ├── galore/
│ │ │ └── llama3_full_sft.yaml
│ │ ├── llama_pro/
│ │ │ ├── expand.sh
│ │ │ └── llama3_freeze_sft.yaml
│ │ ├── loraplus/
│ │ │ └── llama3_lora_sft.yaml
│ │ ├── mod/
│ │ │ └── llama3_full_sft.yaml
│ │ ├── multi_tokens/
│ │ │ └── tokens_cfg.yaml
│ │ ├── muon/
│ │ │ └── qwen2_full_sft.yaml
│ │ ├── nlg_eval/
│ │ │ └── llama3_lora_predict.yaml
│ │ ├── oft/
│ │ │ ├── llama3_oft_sft.yaml
│ │ │ └── qwen2_5vl_oft_sft.yaml
│ │ ├── pissa/
│ │ │ ├── init.sh
│ │ │ └── llama3_lora_sft.yaml
│ │ └── qoft/
│ │ ├── llama3_oft_sft_awq.yaml
│ │ ├── llama3_oft_sft_bnb_npu.yaml
│ │ └── llama3_oft_sft_gptq.yaml
│ ├── inference/
│ │ ├── qwen3.yaml
│ │ ├── qwen3_full_sft.yaml
│ │ ├── qwen3_lora_sft.yaml
│ │ └── qwen3vl.yaml
│ ├── ktransformers/
│ │ ├── infer_lora/
│ │ │ ├── deepseek2_lora_sft_kt.yaml
│ │ │ ├── deepseek3_kt.yaml
│ │ │ ├── deepseek3_lora_sft_kt.yaml
│ │ │ └── qwen3moe_lora_sft_kt.yaml
│ │ ├── kt_optimize_rules/
│ │ │ ├── DeepSeek-V2-Chat-sft-amx.yaml
│ │ │ ├── DeepSeek-V2-Chat.yaml
│ │ │ ├── DeepSeek-V2-Lite-Chat-sft-amx-multi-gpu.yaml
│ │ │ ├── DeepSeek-V2-Lite-Chat-sft-amx.yaml
│ │ │ ├── DeepSeek-V2-Lite-Chat-sft.yaml
│ │ │ ├── DeepSeek-V2-Lite-Chat.yaml
│ │ │ ├── DeepSeek-V3-Chat-amx.yaml
│ │ │ ├── DeepSeek-V3-Chat-sft-amx-multi-gpu-4.yaml
│ │ │ ├── DeepSeek-V3-Chat-sft-amx-multi-gpu.yaml
│ │ │ ├── DeepSeek-V3-Chat-sft-amx.yaml
│ │ │ └── Qwen3Moe-sft-amx.yaml
│ │ └── train_lora/
│ │ ├── deepseek2_lora_sft_kt.yaml
│ │ ├── deepseek3_lora_sft_kt.yaml
│ │ └── qwen3moe_lora_sft_kt.yaml
│ ├── megatron/
│ │ ├── qwen2_vl_full.yaml
│ │ └── qwen3_moe_full.yaml
│ ├── merge_lora/
│ │ ├── qwen3_full_sft.yaml
│ │ ├── qwen3_gptq.yaml
│ │ ├── qwen3_lora_sft.yaml
│ │ └── qwen3vl_lora_sft.yaml
│ ├── train_full/
│ │ ├── qwen3_full_sft.yaml
│ │ └── qwen3vl_full_sft.yaml
│ ├── train_lora/
│ │ ├── qwen3_lora_dpo.yaml
│ │ ├── qwen3_lora_kto.yaml
│ │ ├── qwen3_lora_pretrain.yaml
│ │ ├── qwen3_lora_reward.yaml
│ │ ├── qwen3_lora_sft.sh
│ │ ├── qwen3_lora_sft.yaml
│ │ ├── qwen3_lora_sft_ds3.yaml
│ │ ├── qwen3_lora_sft_ray.yaml
│ │ ├── qwen3_preprocess.yaml
│ │ ├── qwen3vl_lora_dpo.yaml
│ │ └── qwen3vl_lora_sft.yaml
│ ├── train_qlora/
│ │ ├── llama3_lora_sft_aqlm.yaml
│ │ ├── llama3_lora_sft_awq.yaml
│ │ ├── llama3_lora_sft_gptq.yaml
│ │ ├── qwen3_lora_sft_bnb_npu.yaml
│ │ └── qwen3_lora_sft_otfq.yaml
│ └── v1/
│ ├── train_freeze/
│ │ └── train_freeze_sft.yaml
│ ├── train_full/
│ │ ├── train_full_deepspeed.yaml
│ │ └── train_full_fsdp2.yaml
│ ├── train_lora/
│ │ ├── export_lora.yaml
│ │ └── train_lora_sft.yaml
│ └── train_qlora/
│ └── quantization.yaml
├── pyproject.toml
├── requirements/
│ ├── adam-mini.txt
│ ├── apollo.txt
│ ├── aqlm.txt
│ ├── badam.txt
│ ├── bitsandbytes.txt
│ ├── deepspeed.txt
│ ├── dev.txt
│ ├── eetq.txt
│ ├── fp8-te.txt
│ ├── fp8.txt
│ ├── galore.txt
│ ├── gptq.txt
│ ├── hqq.txt
│ ├── liger-kernel.txt
│ ├── metrics.txt
│ ├── minicpm-v.txt
│ ├── npu.txt
│ ├── openmind.txt
│ ├── sglang.txt
│ ├── swanlab.txt
│ └── vllm.txt
├── scripts/
│ ├── api_example/
│ │ ├── test_image.py
│ │ └── test_toolcall.py
│ ├── bench_qwen.py
│ ├── convert_ckpt/
│ │ ├── llamafy_baichuan2.py
│ │ ├── llamafy_qwen.py
│ │ ├── tiny_llama4.py
│ │ └── tiny_qwen3.py
│ ├── eval_bleu_rouge.py
│ ├── hf2dcp.py
│ ├── llama_pro.py
│ ├── loftq_init.py
│ ├── megatron_merge.py
│ ├── pissa_init.py
│ ├── qwen_omni_merge.py
│ ├── stat_utils/
│ │ ├── cal_flops.py
│ │ ├── cal_lr.py
│ │ ├── cal_mfu.py
│ │ ├── cal_ppl.py
│ │ └── length_cdf.py
│ └── vllm_infer.py
├── src/
│ ├── api.py
│ ├── llamafactory/
│ │ ├── __init__.py
│ │ ├── api/
│ │ │ ├── __init__.py
│ │ │ ├── app.py
│ │ │ ├── chat.py
│ │ │ ├── common.py
│ │ │ └── protocol.py
│ │ ├── chat/
│ │ │ ├── __init__.py
│ │ │ ├── base_engine.py
│ │ │ ├── chat_model.py
│ │ │ ├── hf_engine.py
│ │ │ ├── kt_engine.py
│ │ │ ├── sglang_engine.py
│ │ │ └── vllm_engine.py
│ │ ├── cli.py
│ │ ├── data/
│ │ │ ├── __init__.py
│ │ │ ├── collator.py
│ │ │ ├── converter.py
│ │ │ ├── data_utils.py
│ │ │ ├── formatter.py
│ │ │ ├── loader.py
│ │ │ ├── mm_plugin.py
│ │ │ ├── parser.py
│ │ │ ├── processor/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── feedback.py
│ │ │ │ ├── pairwise.py
│ │ │ │ ├── pretrain.py
│ │ │ │ ├── processor_utils.py
│ │ │ │ ├── supervised.py
│ │ │ │ └── unsupervised.py
│ │ │ ├── template.py
│ │ │ └── tool_utils.py
│ │ ├── eval/
│ │ │ ├── __init__.py
│ │ │ ├── evaluator.py
│ │ │ └── template.py
│ │ ├── extras/
│ │ │ ├── __init__.py
│ │ │ ├── constants.py
│ │ │ ├── env.py
│ │ │ ├── logging.py
│ │ │ ├── misc.py
│ │ │ ├── packages.py
│ │ │ └── ploting.py
│ │ ├── hparams/
│ │ │ ├── __init__.py
│ │ │ ├── data_args.py
│ │ │ ├── evaluation_args.py
│ │ │ ├── finetuning_args.py
│ │ │ ├── generating_args.py
│ │ │ ├── model_args.py
│ │ │ ├── parser.py
│ │ │ └── training_args.py
│ │ ├── launcher.py
│ │ ├── model/
│ │ │ ├── __init__.py
│ │ │ ├── adapter.py
│ │ │ ├── loader.py
│ │ │ ├── model_utils/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── attention.py
│ │ │ │ ├── checkpointing.py
│ │ │ │ ├── embedding.py
│ │ │ │ ├── ktransformers.py
│ │ │ │ ├── kv_cache.py
│ │ │ │ ├── liger_kernel.py
│ │ │ │ ├── longlora.py
│ │ │ │ ├── misc.py
│ │ │ │ ├── mod.py
│ │ │ │ ├── moe.py
│ │ │ │ ├── packing.py
│ │ │ │ ├── quantization.py
│ │ │ │ ├── rope.py
│ │ │ │ ├── unsloth.py
│ │ │ │ ├── valuehead.py
│ │ │ │ └── visual.py
│ │ │ └── patcher.py
│ │ ├── third_party/
│ │ │ ├── __init__.py
│ │ │ └── muon/
│ │ │ ├── __init__.py
│ │ │ └── muon.py
│ │ ├── train/
│ │ │ ├── __init__.py
│ │ │ ├── callbacks.py
│ │ │ ├── dpo/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── ktrainer.py
│ │ │ │ ├── trainer.py
│ │ │ │ └── workflow.py
│ │ │ ├── fp8_utils.py
│ │ │ ├── kto/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── trainer.py
│ │ │ │ └── workflow.py
│ │ │ ├── mca/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── trainer.py
│ │ │ │ └── workflow.py
│ │ │ ├── ppo/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── ppo_utils.py
│ │ │ │ ├── trainer.py
│ │ │ │ └── workflow.py
│ │ │ ├── pt/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── trainer.py
│ │ │ │ └── workflow.py
│ │ │ ├── rm/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── metric.py
│ │ │ │ ├── trainer.py
│ │ │ │ └── workflow.py
│ │ │ ├── sft/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── metric.py
│ │ │ │ ├── trainer.py
│ │ │ │ └── workflow.py
│ │ │ ├── test_utils.py
│ │ │ ├── trainer_utils.py
│ │ │ └── tuner.py
│ │ ├── v1/
│ │ │ ├── __init__.py
│ │ │ ├── accelerator/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── helper.py
│ │ │ │ ├── interface.py
│ │ │ │ └── profiler.py
│ │ │ ├── config/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── arg_parser.py
│ │ │ │ ├── arg_utils.py
│ │ │ │ ├── data_args.py
│ │ │ │ ├── model_args.py
│ │ │ │ ├── sample_args.py
│ │ │ │ └── training_args.py
│ │ │ ├── core/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base_sampler.py
│ │ │ │ ├── base_trainer.py
│ │ │ │ ├── data_engine.py
│ │ │ │ ├── model_engine.py
│ │ │ │ └── utils/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── batching.py
│ │ │ │ ├── callback.py
│ │ │ │ ├── inference_engine.py
│ │ │ │ └── rendering.py
│ │ │ ├── launcher.py
│ │ │ ├── plugins/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── data_plugins/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── converter.py
│ │ │ │ │ └── loader.py
│ │ │ │ ├── model_plugins/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── add_token.py
│ │ │ │ │ ├── initialization.py
│ │ │ │ │ ├── kernels/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── base.py
│ │ │ │ │ │ ├── interface.py
│ │ │ │ │ │ ├── ops/
│ │ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ │ ├── mlp/
│ │ │ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ │ │ ├── npu_fused_moe.py
│ │ │ │ │ │ │ │ └── npu_swiglu.py
│ │ │ │ │ │ │ ├── rms_norm/
│ │ │ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ │ │ └── npu_rms_norm.py
│ │ │ │ │ │ │ └── rope/
│ │ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ │ └── npu_rope.py
│ │ │ │ │ │ └── registry.py
│ │ │ │ │ ├── peft.py
│ │ │ │ │ ├── quantization.py
│ │ │ │ │ ├── rendering.py
│ │ │ │ │ └── templates/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── qwen3.py
│ │ │ │ │ └── qwen3_nothink.py
│ │ │ │ ├── sampler_plugins/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── vllm.py
│ │ │ │ └── trainer_plugins/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── batching.py
│ │ │ │ ├── distributed/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── deepspeed.py
│ │ │ │ │ ├── fsdp2.py
│ │ │ │ │ └── hub.py
│ │ │ │ ├── lr_scheduler.py
│ │ │ │ └── optimizer.py
│ │ │ ├── samplers/
│ │ │ │ └── cli_sampler.py
│ │ │ ├── trainers/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── dpo_trainer.py
│ │ │ │ ├── rm_trainer.py
│ │ │ │ └── sft_trainer.py
│ │ │ └── utils/
│ │ │ ├── __init__.py
│ │ │ ├── constants.py
│ │ │ ├── dtype.py
│ │ │ ├── env.py
│ │ │ ├── helper.py
│ │ │ ├── logging.py
│ │ │ ├── objects.py
│ │ │ ├── packages.py
│ │ │ ├── plugin.py
│ │ │ ├── pytest.py
│ │ │ └── types.py
│ │ └── webui/
│ │ ├── __init__.py
│ │ ├── chatter.py
│ │ ├── common.py
│ │ ├── components/
│ │ │ ├── __init__.py
│ │ │ ├── chatbot.py
│ │ │ ├── data.py
│ │ │ ├── eval.py
│ │ │ ├── export.py
│ │ │ ├── footer.py
│ │ │ ├── infer.py
│ │ │ ├── top.py
│ │ │ └── train.py
│ │ ├── control.py
│ │ ├── css.py
│ │ ├── engine.py
│ │ ├── interface.py
│ │ ├── locales.py
│ │ ├── manager.py
│ │ └── runner.py
│ ├── train.py
│ └── webui.py
├── tests/
│ ├── check_license.py
│ ├── conftest.py
│ ├── data/
│ │ ├── processor/
│ │ │ ├── test_feedback.py
│ │ │ ├── test_pairwise.py
│ │ │ ├── test_processor_utils.py
│ │ │ ├── test_supervised.py
│ │ │ └── test_unsupervised.py
│ │ ├── test_collator.py
│ │ ├── test_converter.py
│ │ ├── test_formatter.py
│ │ ├── test_loader.py
│ │ ├── test_mm_plugin.py
│ │ └── test_template.py
│ ├── e2e/
│ │ ├── test_chat.py
│ │ ├── test_sglang.py
│ │ └── test_train.py
│ ├── eval/
│ │ └── test_eval_template.py
│ ├── model/
│ │ ├── model_utils/
│ │ │ ├── test_add_tokens.py
│ │ │ ├── test_attention.py
│ │ │ ├── test_checkpointing.py
│ │ │ ├── test_misc.py
│ │ │ ├── test_packing.py
│ │ │ └── test_visual.py
│ │ ├── test_base.py
│ │ ├── test_freeze.py
│ │ ├── test_full.py
│ │ ├── test_lora.py
│ │ └── test_pissa.py
│ ├── train/
│ │ └── test_sft_trainer.py
│ └── version.txt
└── tests_v1/
├── accelerator/
│ └── test_interface.py
├── config/
│ └── test_args_parser.py
├── conftest.py
├── core/
│ ├── test_data_engine.py
│ ├── test_model_loader.py
│ └── utils/
│ ├── test_batching.py
│ └── test_rendering.py
├── plugins/
│ ├── data_plugins/
│ │ └── test_converter.py
│ ├── model_plugins/
│ │ ├── test_init_plugin.py
│ │ ├── test_kernel_plugin.py
│ │ ├── test_peft.py
│ │ └── test_quantization_plugin.py
│ └── trainer_plugins/
│ └── distributed/
│ └── test_fsdp2.py
├── sampler/
│ └── test_cli_sampler.py
└── trainers/
└── test_fsdp2_sft_trainer.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .dockerignore
================================================
.vscode
.git
.github
.venv
cache
docker
saves
hf_cache
ms_cache
om_cache
shared_data
output
.dockerignore
.gitattributes
.gitignore
================================================
FILE: .gitattributes
================================================
# Auto detect text files and perform LF normalization
* text=auto
================================================
FILE: .github/CODE_OF_CONDUCT.md
================================================
# Contributor Covenant Code of Conduct
## Our Pledge
We as members, contributors, and leaders pledge to make participation in our
community a harassment-free experience for everyone, regardless of age, body
size, visible or invisible disability, ethnicity, sex characteristics, gender
identity and expression, level of experience, education, socio-economic status,
nationality, personal appearance, race, religion, or sexual identity
and orientation.
We pledge to act and interact in ways that contribute to an open, welcoming,
diverse, inclusive, and healthy community.
## Our Standards
Examples of behavior that contributes to a positive environment for our
community include:
* Demonstrating empathy and kindness toward other people
* Being respectful of differing opinions, viewpoints, and experiences
* Giving and gracefully accepting constructive feedback
* Accepting responsibility and apologizing to those affected by our mistakes,
and learning from the experience
* Focusing on what is best not just for us as individuals, but for the
overall community
Examples of unacceptable behavior include:
* The use of sexualized language or imagery, and sexual attention or
advances of any kind
* Trolling, insulting or derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or email
address, without their explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Enforcement Responsibilities
Community leaders are responsible for clarifying and enforcing our standards of
acceptable behavior and will take appropriate and fair corrective action in
response to any behavior that they deem inappropriate, threatening, offensive,
or harmful.
Community leaders have the right and responsibility to remove, edit, or reject
comments, commits, code, wiki edits, issues, and other contributions that are
not aligned to this Code of Conduct, and will communicate reasons for moderation
decisions when appropriate.
## Scope
This Code of Conduct applies within all community spaces, and also applies when
an individual is officially representing the community in public spaces.
Examples of representing our community include using an official e-mail address,
posting via an official social media account, or acting as an appointed
representative at an online or offline event.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported to the community leaders responsible for enforcement at
`hoshihiyouga AT gmail DOT com`.
All complaints will be reviewed and investigated promptly and fairly.
All community leaders are obligated to respect the privacy and security of the
reporter of any incident.
## Enforcement Guidelines
Community leaders will follow these Community Impact Guidelines in determining
the consequences for any action they deem in violation of this Code of Conduct:
### 1. Correction
**Community Impact**: Use of inappropriate language or other behavior deemed
unprofessional or unwelcome in the community.
**Consequence**: A private, written warning from community leaders, providing
clarity around the nature of the violation and an explanation of why the
behavior was inappropriate. A public apology may be requested.
### 2. Warning
**Community Impact**: A violation through a single incident or series
of actions.
**Consequence**: A warning with consequences for continued behavior. No
interaction with the people involved, including unsolicited interaction with
those enforcing the Code of Conduct, for a specified period of time. This
includes avoiding interactions in community spaces as well as external channels
like social media. Violating these terms may lead to a temporary or
permanent ban.
### 3. Temporary Ban
**Community Impact**: A serious violation of community standards, including
sustained inappropriate behavior.
**Consequence**: A temporary ban from any sort of interaction or public
communication with the community for a specified period of time. No public or
private interaction with the people involved, including unsolicited interaction
with those enforcing the Code of Conduct, is allowed during this period.
Violating these terms may lead to a permanent ban.
### 4. Permanent Ban
**Community Impact**: Demonstrating a pattern of violation of community
standards, including sustained inappropriate behavior, harassment of an
individual, or aggression toward or disparagement of classes of individuals.
**Consequence**: A permanent ban from any sort of public interaction within
the community.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
version 2.0, available at
https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
Community Impact Guidelines were inspired by [Mozilla's code of conduct
enforcement ladder](https://github.com/mozilla/diversity).
[homepage]: https://www.contributor-covenant.org
For answers to common questions about this code of conduct, see the FAQ at
https://www.contributor-covenant.org/faq. Translations are available at
https://www.contributor-covenant.org/translations.
================================================
FILE: .github/CONTRIBUTING.md
================================================
# Contributing to LLaMA Factory
Everyone is welcome to contribute, and we value everybody's contribution. Code contributions are not the only way to help the community. Answering questions, helping others, and improving the documentation are also immensely valuable.
It also helps us if you spread the word! Reference the library in blog posts about the awesome projects it made possible, shout out on Twitter every time it has helped you, or simply ⭐️ the repository to say thank you.
However you choose to contribute, please be mindful and respect our [code of conduct](CODE_OF_CONDUCT.md).
**This guide was heavily inspired by [transformers guide to contributing](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md).**
## Ways to contribute
There are several ways you can contribute to LLaMA Factory:
* Fix outstanding issues with the existing code.
* Submit issues related to bugs or desired new features.
* Contribute to the examples or to the documentation.
### Style guide
LLaMA Factory follows the [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html), check it for details.
### Create a Pull Request
1. Fork the [repository](https://github.com/hiyouga/LLaMA-Factory) by clicking on the [Fork](https://github.com/hiyouga/LLaMA-Factory/fork) button on the repository's page. This creates a copy of the code under your GitHub user account.
2. Clone your fork to your local disk, and add the base repository as a remote:
```bash
git clone git@github.com:[username]/LLaMA-Factory.git
cd LLaMA-Factory
git remote add upstream https://github.com/hiyouga/LLaMA-Factory.git
```
3. Create a new branch to hold your development changes:
```bash
git checkout -b dev_your_branch
```
4. Set up a development environment by running the following command in a virtual environment:
```bash
pip install -e ".[dev]"
```
If LLaMA Factory was already installed in the virtual environment, remove it with `pip uninstall llamafactory` before reinstalling it in editable mode with the -e flag.
5. Check code before commit:
```bash
make commit
make style && make quality
make test
```
6. Submit changes:
```bash
git add .
git commit -m "commit message"
git fetch upstream
git rebase upstream/main
git push -u origin dev_your_branch
```
7. Create a merge request from your branch `dev_your_branch` at [origin repo](https://github.com/hiyouga/LLaMA-Factory).
================================================
FILE: .github/ISSUE_TEMPLATE/1-bug-report.yml
================================================
name: "\U0001F41B Bug / help"
description: Create a report to help us improve the LLaMA Factory
labels: ["bug", "pending"]
body:
- type: markdown
attributes:
value: |
Issues included in **[FAQs](https://github.com/hiyouga/LLaMA-Factory/issues/4614)** or those with **insufficient** information may be closed without a response.
已经包含在 **[常见问题](https://github.com/hiyouga/LLaMA-Factory/issues/4614)** 内或提供信息**不完整**的 issues 可能不会被回复。
- type: markdown
attributes:
value: |
Please do not create issues that are not related to framework bugs under this category, use **[Discussions](https://github.com/hiyouga/LLaMA-Factory/discussions/categories/q-a)** instead.
请勿在此分类下创建和框架 bug 无关的 issues,训练问题求助请使用 **[讨论区](https://github.com/hiyouga/LLaMA-Factory/discussions/categories/q-a)**。
- type: checkboxes
id: reminder
attributes:
label: Reminder
description: |
Please ensure you have read the above rules carefully and searched the existing issues (including FAQs).
请确保您已经认真阅读了上述规则并且搜索过现有的 issues(包括常见问题)。
options:
- label: I have read the above rules and searched the existing issues.
required: true
- type: textarea
id: system-info
validations:
required: true
attributes:
label: System Info
description: |
Please share your system info with us. You can run the command **llamafactory-cli env** and copy-paste its output below.
请提供您的系统信息。您可以在命令行运行 **llamafactory-cli env** 并将其输出复制到该文本框中。
placeholder: llamafactory version, platform, python version, ...
- type: textarea
id: reproduction
validations:
required: true
attributes:
label: Reproduction
description: |
Please provide entry arguments, error messages and stack traces that reproduces the problem.
请提供入口参数,错误日志以及异常堆栈以便于我们复现问题。
value: |
```text
Put your message here.
```
- type: textarea
id: others
validations:
required: false
attributes:
label: Others
================================================
FILE: .github/ISSUE_TEMPLATE/2-feature-request.yml
================================================
name: "\U0001F680 Feature request"
description: Submit a request for a new feature
labels: ["enhancement", "pending"]
body:
- type: markdown
attributes:
value: |
Please do not create issues that are not related to new features under this category.
请勿在此分类下创建和新特性无关的 issues。
- type: checkboxes
id: reminder
attributes:
label: Reminder
description: |
Please ensure you have read the above rules carefully and searched the existing issues.
请确保您已经认真阅读了上述规则并且搜索过现有的 issues。
options:
- label: I have read the above rules and searched the existing issues.
required: true
- type: textarea
id: description
validations:
required: true
attributes:
label: Description
description: |
A clear and concise description of the feature proposal.
请详细描述您希望加入的新功能特性。
- type: textarea
id: contribution
validations:
required: false
attributes:
label: Pull Request
description: |
Have you already created the relevant PR and submitted the code?
您是否已经创建了相关 PR 并提交了代码?
================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
blank_issues_enabled: false
contact_links:
- name: 📚 FAQs | 常见问题
url: https://github.com/hiyouga/LLaMA-Factory/issues/4614
about: Reading in advance is recommended | 建议提前阅读
- name: Discussions | 讨论区
url: https://github.com/hiyouga/LLaMA-Factory/discussions
about: Please ask fine-tuning questions here | 请在这里讨论训练问题
================================================
FILE: .github/PULL_REQUEST_TEMPLATE.md
================================================
# What does this PR do?
Fixes # (issue)
## Before submitting
- [ ] Did you read the [contributor guideline](https://github.com/hiyouga/LLaMA-Factory/blob/main/.github/CONTRIBUTING.md)?
- [ ] Did you write any new necessary tests?
================================================
FILE: .github/SECURITY.md
================================================
# Reporting Security Issues
To report a security issue, please use the GitHub Security Advisory ["Report a Vulnerability"](https://github.com/hiyouga/LLaMA-Factory/security/advisories/new) tab.
We will send a response indicating the next steps in handling your report. After the initial reply to your report, the security team will keep you informed of the progress towards a fix and full announcement, and may ask for additional information or guidance.
Report security bugs in third-party modules to the person or team maintaining the module.
================================================
FILE: .github/copilot-instructions.md
================================================
# GitHub Copilot Instructions for LLaMA Factory
## Project Overview
LLaMA Factory is an efficient fine-tuning framework for 100+ large language models (LLMs). It provides:
- Support for various models: LLaMA, LLaVA, Mistral, Qwen, DeepSeek, Yi, Gemma, ChatGLM, Phi, etc.
- Multiple training methods: pre-training, supervised fine-tuning, reward modeling, PPO, DPO, KTO, ORPO
- Scalable resources: 16-bit full-tuning, freeze-tuning, LoRA and QLoRA variants
- Advanced algorithms: GaLore, BAdam, APOLLO, Adam-mini, Muon, OFT, DoRA, etc.
- Web UI (LLaMA Board) and CLI interfaces
### Architecture Versions
LLaMA Factory has two parallel architectures that can be switched via the `USE_V1` environment variable:
**v0 (default)** - File hierarchy:
- `api`, `webui` → `chat`, `eval`, `train` → `data`, `model` → `hparams` → `extras`
**v1** - File hierarchy:
- `trainers` → `core` → `accelerator`, `plugins`, `config` → `utils`
Set `USE_V1=1` to enable v1 architecture.
## Code Structure
### v0 Architecture (Default)
- `src/llamafactory/` - Main package directory
- `api/` - OpenAI-style API implementation
- `chat/` - Chat interface implementation
- `cli.py` - Command-line interface
- `data/` - Data processing and dataset handling
- `eval/` - Model evaluation utilities
- `extras/` - Additional utilities and helpers
- `hparams/` - Hyperparameter definitions
- `model/` - Model loading, patching, and utilities
- `train/` - Training pipeline implementation
- `webui/` - Gradio-based web interface
- `src/train.py` - Training entry script (delegates to `llamafactory.train.tuner`)
- `src/webui.py` - Web UI entry script (delegates to `llamafactory.webui.interface`)
- `src/api.py` - API server entry script (delegates to `llamafactory.api.app`)
- `tests/` - Test suite
- `examples/` - Example configurations for various training scenarios
- `data/` - Dataset definitions and examples
### v1 Architecture (USE_V1=1)
- `src/llamafactory/v1/` - Version 1 package directory
- `trainers/` - Training implementations
- `core/` - Core training utilities
- `accelerator/` - Acceleration and distributed training
- `plugins/` - Pluggable components (model, data, sampler, trainer)
- `config/` - Configuration management
- `utils/` - Utility functions
## Development Practices
### Code Style
- Follow the [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html)
- Use ruff for linting and formatting
- Line length: 119 characters
- Indentation: 4 spaces
- Quote style: double quotes
- Use Google-style docstrings for documentation
### Import Organization
- Known first-party: `llamafactory`
- Known third-party: `accelerate`, `datasets`, `gradio`, `numpy`, `peft`, `torch`, `transformers`, `trl`
- Use 2 blank lines after imports
### Quality Checks
Before committing code, run:
```bash
make style # Auto-fix style issues
make quality # Check code quality
make test # Run test suite
```
Or use the combined command:
```bash
make commit # Run pre-commit hooks
```
### Testing
- Use pytest for testing
- Tests are located in `tests/` and `tests_v1/` directories
- Run tests with: `make test` (which runs `WANDB_DISABLED=true pytest -vv --import-mode=importlib tests/ tests_v1/`)
- Disable wandb during testing to avoid external dependencies
- **Note**: Training configurations require GPU machines, so training is typically not tested end-to-end. Use `make test` to validate file-level functionality.
### Building
Build the package with:
```bash
pip3 install build && python3 -m build
```
### License
- All source files must include the Apache 2.0 license header
- Check license headers with: `make license`
## Common Patterns
### Configuration Files
- Training configurations are typically YAML or JSON files in `examples/` directory
- Hyperparameters are defined using dataclasses in `src/llamafactory/hparams/`
### Model Support
- New model support is added through model patches in `src/llamafactory/model/`
- Visual models use the visual utilities in `src/llamafactory/model/model_utils/visual.py`
- Quantization support is in `src/llamafactory/model/model_utils/quantization.py`
### Data Processing
- Dataset definitions are in `data/dataset_info.json`
- Data templates and processors are in `src/llamafactory/data/`
### Training
- Training pipelines are in `src/llamafactory/train/`
- Support for different training methods: SFT, DPO, PPO, RM, PT, KTO, ORPO
## Key Dependencies
- Python >= 3.9.0
- PyTorch and transformers for model handling
- datasets for data processing
- peft for parameter-efficient fine-tuning
- accelerate for distributed training
- gradio for web UI
- trl for reinforcement learning
- Optional: vllm/sglang for inference, flash-attention-2, unsloth, liger-kernel
## Entry Points
- **CLI Training**: `llamafactory-cli train --config examples/train_lora/llama3_lora_sft.yaml`
- **Web UI**: `llamafactory-cli webui` or `python src/webui.py`
- **API Server**: `llamafactory-cli api` or `python src/api.py`
- **Chat Interface**: `llamafactory-cli chat --model_name_or_path MODEL_PATH`
## Environment Setup
For development:
```bash
pip install -e ".[dev]"
```
## Important Notes
- The project supports multiple backends: default PyTorch, vLLM, SGLang
- Megatron-core training is supported via mcore_adapter
- SwanLab and W&B are supported for experiment tracking
- Docker support is available with pre-built images
- Day-0/Day-1 support for latest cutting-edge models
- Multi-modal support for vision and audio understanding tasks
## Contribution Guidelines
1. Fork the repository
2. Create a development branch
3. Set up development environment with `pip install -e ".[dev]"`
4. Make changes following the style guide
5. Run quality checks: `make style && make quality`
6. Run tests: `make test`
7. Submit a pull request
## Common Commands
- `make style` - Format code
- `make quality` - Run linters
- `make test` - Run tests
- `make commit` - Install and run pre-commit hooks
- `make license` - Check license headers
================================================
FILE: .github/instructions-v0.md
================================================
================================================
FILE: .github/instructions-v1.md
================================================
================================================
FILE: .github/workflows/docker.yml
================================================
name: docker
on:
workflow_dispatch:
push:
branches:
- "main"
paths:
- "**/*.py"
- "pyproject.toml"
- "docker/**"
- ".github/workflows/*.yml"
pull_request:
branches:
- "main"
paths:
- "**/*.py"
- "pyproject.toml"
- "docker/**"
- ".github/workflows/*.yml"
release:
types:
- published
jobs:
build:
strategy:
fail-fast: false
matrix:
include:
- device: "cuda"
- device: "npu-a2"
- device: "npu-a3"
runs-on: ubuntu-latest
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.device }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
environment:
name: docker
url: https://hub.docker.com/r/hiyouga/llamafactory
steps:
- name: Free up disk space
uses: jlumbroso/free-disk-space@v1.3.1
with:
tool-cache: true
docker-images: false
- name: Checkout
uses: actions/checkout@v6
- name: Get llamafactory version
id: version
run: |
if [ "${{ github.event_name }}" = "release" ]; then
echo "tag=$(grep -oP 'VERSION = "\K[^"]+' src/llamafactory/extras/env.py)" >> "$GITHUB_OUTPUT"
else
echo "tag=latest" >> "$GITHUB_OUTPUT"
fi
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
if: ${{ github.event_name != 'pull_request' }}
uses: docker/login-action@v3
with:
username: ${{ vars.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Login to Quay
if: ${{ github.event_name != 'pull_request' && startsWith(matrix.device, 'npu') }}
uses: docker/login-action@v3
with:
registry: quay.io
username: ${{ vars.QUAY_ASCEND_USERNAME }}
password: ${{ secrets.QUAY_ASCEND_TOKEN }}
- name: Build and push Docker image (CUDA)
if: ${{ matrix.device == 'cuda' }}
uses: docker/build-push-action@v6
with:
context: .
file: ./docker/docker-cuda/Dockerfile
push: ${{ github.event_name != 'pull_request' }}
tags: |
docker.io/hiyouga/llamafactory:${{ steps.version.outputs.tag }}
- name: Build and push Docker image (NPU-A2)
if: ${{ matrix.device == 'npu-a2' }}
uses: docker/build-push-action@v6
with:
context: .
platforms: linux/amd64,linux/arm64
file: ./docker/docker-npu/Dockerfile
push: ${{ github.event_name != 'pull_request' }}
tags: |
docker.io/hiyouga/llamafactory:${{ steps.version.outputs.tag }}-npu-a2
quay.io/ascend/llamafactory:${{ steps.version.outputs.tag }}-npu-a2
- name: Build and push Docker image (NPU-A3)
if: ${{ matrix.device == 'npu-a3' }}
uses: docker/build-push-action@v6
with:
context: .
platforms: linux/amd64,linux/arm64
file: ./docker/docker-npu/Dockerfile
build-args: |
BASE_IMAGE=quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
push: ${{ github.event_name != 'pull_request' }}
tags: |
docker.io/hiyouga/llamafactory:${{ steps.version.outputs.tag }}-npu-a3
quay.io/ascend/llamafactory:${{ steps.version.outputs.tag }}-npu-a3
================================================
FILE: .github/workflows/docs.yml
================================================
name: Build and Deploy Sphinx Docs
on:
push:
branches: ["main"]
paths:
- "docs/**"
pull_request:
branches: ["main"]
paths:
- "docs/**"
workflow_dispatch:
permissions:
contents: read
pages: write
id-token: write
concurrency:
group: "pages"
cancel-in-progress: false
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install dependencies
run: |
pip install -r docs/requirements.txt
- name: Build Sphinx
run: |
sphinx-build -b html docs/zh docs/_build/html/zh
sphinx-build -b html docs/en docs/_build/html/en
printf '%s\n' \
'<!DOCTYPE html>' \
'<html>' \
' <head>' \
' <meta charset="utf-8" />' \
' <meta http-equiv="refresh" content="0; url=zh/index.html" />' \
' <script>window.location.href="zh/index.html"+window.location.search+window.location.hash;</script>' \
' <title>Redirecting...</title>' \
' </head>' \
' <body>' \
' <a href="zh/index.html">Redirecting...</a>' \
' </body>' \
'</html>' \
> docs/_build/html/index.html
touch docs/_build/html/.nojekyll
- name: Setup Pages
uses: actions/configure-pages@v5
- name: Upload artifact
uses: actions/upload-pages-artifact@v3
with:
path: docs/_build/html
deploy:
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-latest
needs: build
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4
================================================
FILE: .github/workflows/label_issue.yml
================================================
name: label_issue
on:
issues:
types:
- opened
jobs:
label_issue:
runs-on: ubuntu-latest
permissions:
issues: write
steps:
- env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
ISSUE_URL: ${{ github.event.issue.html_url }}
ISSUE_TITLE: ${{ github.event.issue.title }}
run: |
LABEL=""
NPU_KEYWORDS=(npu huawei ascend 华为 昇腾 910)
ISSUE_TITLE_LOWER=$(echo $ISSUE_TITLE | tr '[:upper:]' '[:lower:]')
for KEYWORD in ${NPU_KEYWORDS[@]}; do
if [[ $ISSUE_TITLE_LOWER == *$KEYWORD* ]] && [[ $ISSUE_TITLE_LOWER != *input* ]]; then
LABEL="npu"
break
fi
done
if [ -n "$LABEL" ]; then
gh issue edit $ISSUE_URL --add-label $LABEL
fi
================================================
FILE: .github/workflows/publish.yml
================================================
name: publish
on:
workflow_dispatch:
release:
types:
- published
jobs:
publish:
name: Upload release to PyPI
runs-on: ubuntu-latest
environment:
name: release
url: https://pypi.org/p/llamafactory
permissions:
id-token: write
steps:
- name: Checkout
uses: actions/checkout@v6
- name: Install uv
uses: astral-sh/setup-uv@v7
with:
python-version: "3.11"
github-token: ${{ github.token }}
- name: Build package
run: |
make build
- name: Publish package
uses: pypa/gh-action-pypi-publish@release/v1
================================================
FILE: .github/workflows/tests.yml
================================================
name: tests
on:
workflow_dispatch:
push:
branches:
- "main"
paths:
- "**/*.py"
- "pyproject.toml"
- "Makefile"
- ".github/workflows/*.yml"
pull_request:
branches:
- "main"
paths:
- "**/*.py"
- "pyproject.toml"
- "Makefile"
- ".github/workflows/*.yml"
jobs:
tests:
strategy:
fail-fast: false
matrix:
python:
- "3.11"
- "3.12"
- "3.13"
os:
- "ubuntu-latest"
- "windows-latest"
- "macos-latest"
transformers:
- ""
include: # test backward compatibility
- python: "3.11"
os: "ubuntu-latest"
transformers: "4.51.0"
- python: "3.11"
os: "ubuntu-latest"
transformers: "4.53.0"
- python: "3.11"
os: "ubuntu-latest"
transformers: "4.55.0"
runs-on: ${{ matrix.os }}
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os }}-${{ matrix.python }}-${{ matrix.transformers }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
OS_NAME: ${{ matrix.os }}
UV_NO_SYNC: 1
steps:
- name: Checkout
uses: actions/checkout@v6
- name: Install uv
uses: astral-sh/setup-uv@v7
with:
python-version: ${{ matrix.python }}
github-token: ${{ github.token }}
enable-cache: false
- name: Install dependencies
run: |
uv venv
uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
uv pip install -e .
uv pip install -r requirements/dev.txt
- name: Install transformers
if: ${{ matrix.transformers }}
run: |
uv pip install "transformers==${{ matrix.transformers }}"
- name: Cache files
id: hf-hub-cache
uses: actions/cache@v5
with:
path: ${{ runner.temp }}/huggingface
key: huggingface-${{ matrix.os }}-${{ matrix.python }}-${{ matrix.transformers }}-${{ hashFiles('tests/version.txt') }}
- name: Check quality
run: |
make style && make quality
- name: Check license
run: |
make license
- name: Check build
run: |
make build
- name: Test with pytest
run: |
make test
env:
HF_HOME: ${{ runner.temp }}/huggingface
HF_HUB_OFFLINE: "${{ steps.hf-hub-cache.outputs.cache-hit == 'true' && '1' || '0' }}"
================================================
FILE: .github/workflows/tests_cuda.yml
================================================
name: tests_cuda
on:
workflow_dispatch:
push:
branches:
- "main"
paths:
- "**/*.py"
- "pyproject.toml"
- "Makefile"
- ".github/workflows/*.yml"
pull_request:
branches:
- "main"
paths:
- "**/*.py"
- "pyproject.toml"
- "Makefile"
- ".github/workflows/*.yml"
jobs:
tests:
strategy:
fail-fast: false
matrix:
python:
- "3.11"
os:
- "linux-x86_64-gpu-2"
runs-on: ${{ matrix.os }}
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os }}-${{ matrix.python }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
env:
HF_HOME: "${{ github.workspace }}/../.runner_cache/huggingface"
UV_CACHE_DIR: "${{ github.workspace }}/../.runner_cache/uv"
HF_TOKEN: ${{ secrets.HF_TOKEN }}
OS_NAME: ${{ matrix.os }}
UV_NO_SYNC: 1
steps:
- name: Checkout
uses: actions/checkout@v6
- name: Install uv
uses: astral-sh/setup-uv@v7
with:
python-version: ${{ matrix.python }}
github-token: ${{ github.token }}
enable-cache: false
- name: Check GPU Status
run: nvidia-smi
- name: Install dependencies
run: |
uv venv
uv pip install -e .
uv pip install -r requirements/dev.txt
uv pip install -r requirements/bitsandbytes.txt
- name: Check quality
run: |
make style && make quality
- name: Check license
run: |
make license
- name: Check build
run: |
make build
- name: Test with pytest
run: |
make test
================================================
FILE: .github/workflows/tests_npu.yml
================================================
name: tests_npu
on:
workflow_dispatch:
push:
branches:
- "main"
paths:
- "**/*.py"
- "pyproject.toml"
- "Makefile"
- ".github/workflows/*.yml"
pull_request:
branches:
- "main"
paths:
- "**/*.py"
- "pyproject.toml"
- "Makefile"
- ".github/workflows/*.yml"
jobs:
tests:
strategy:
fail-fast: false
matrix:
python:
- "3.11"
os:
- "linux-aarch64-a2-4"
pytorch_npu:
- "2.7.1"
runs-on: ${{ matrix.os }}
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os }}-${{ matrix.python }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
container:
image: ascendai/cann:8.3.rc2-910b-ubuntu22.04-py3.11
env:
HF_ENDPOINT: https://hf-mirror.com
HF_TOKEN: ${{ secrets.HF_TOKEN }}
OS_NAME: ${{ matrix.os }}
UV_NO_SYNC: 1
steps:
- name: Checkout
uses: actions/checkout@v6
- name: Install uv
uses: astral-sh/setup-uv@v7
with:
python-version: ${{ matrix.python }}
github-token: ${{ github.token }}
enable-cache: false
- name: Install dependencies
run: |
uv venv
uv pip install -r requirements/npu.txt
uv pip install -e .
uv pip install -r requirements/dev.txt
- name: Install node
run: |
apt-get update || true
apt-get install -y curl
curl -fsSL https://deb.nodesource.com/setup_20.x | bash -
apt-get install -y nodejs
- name: Check quality
run: |
make style && make quality
- name: Check license
run: |
make license
- name: Check build
run: |
make build
- name: Test with pytest
run: |
make test
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/
# vscode
.vscode/
# uv
uv.lock
# macOS
.DS_Store
# custom .gitignore
hf_cache/
ms_cache/
om_cache/
llamaboard_cache/
llamaboard_config/
saves/
output/
outputs/
wandb/
swanlog/
generated_predictions.jsonl
predictions_score.json
================================================
FILE: .pre-commit-config.yaml
================================================
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v6.0.0
hooks:
- id: check-ast
- id: check-added-large-files
args: ['--maxkb=25000']
- id: check-merge-conflict
- id: check-yaml
- id: debug-statements
- id: end-of-file-fixer
- id: trailing-whitespace
args: [--markdown-linebreak-ext=md]
- id: no-commit-to-branch
args: ['--branch', 'main']
- repo: https://github.com/asottile/pyupgrade
rev: v3.20.0
hooks:
- id: pyupgrade
args: [--py39-plus]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.13.2
hooks:
- id: ruff
args: [--fix]
- id: ruff-format
================================================
FILE: CITATION.cff
================================================
cff-version: 1.2.0
date-released: 2024-03
message: "If you use this software, please cite it as below."
authors:
- family-names: "Zheng"
given-names: "Yaowei"
- family-names: "Zhang"
given-names: "Richong"
- family-names: "Zhang"
given-names: "Junhao"
- family-names: "Ye"
given-names: "Yanhan"
- family-names: "Luo"
given-names: "Zheyan"
- family-names: "Feng"
given-names: "Zhangchi"
- family-names: "Ma"
given-names: "Yongqiang"
title: "LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models"
url: "https://arxiv.org/abs/2403.13372"
preferred-citation:
type: conference-paper
conference:
name: "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)"
authors:
- family-names: "Zheng"
given-names: "Yaowei"
- family-names: "Zhang"
given-names: "Richong"
- family-names: "Zhang"
given-names: "Junhao"
- family-names: "Ye"
given-names: "Yanhan"
- family-names: "Luo"
given-names: "Zheyan"
- family-names: "Feng"
given-names: "Zhangchi"
- family-names: "Ma"
given-names: "Yongqiang"
title: "LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models"
url: "https://arxiv.org/abs/2403.13372"
year: 2024
publisher: "Association for Computational Linguistics"
address: "Bangkok, Thailand"
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: MANIFEST.in
================================================
include LICENSE
================================================
FILE: Makefile
================================================
.PHONY: build commit license quality style test
check_dirs := scripts src tests tests_v1
RUN := $(shell command -v uv >/dev/null 2>&1 && echo "uv run" || echo "")
BUILD := $(shell command -v uv >/dev/null 2>&1 && echo "uv build" || echo "python -m build")
TOOL := $(shell command -v uv >/dev/null 2>&1 && echo "uvx" || echo "")
build:
$(BUILD)
commit:
$(TOOL) pre-commit install
$(TOOL) pre-commit run --all-files
license:
$(RUN) python3 tests/check_license.py $(check_dirs)
quality:
$(TOOL) ruff check $(check_dirs)
$(TOOL) ruff format --check $(check_dirs)
style:
$(TOOL) ruff check $(check_dirs) --fix
$(TOOL) ruff format $(check_dirs)
test:
WANDB_DISABLED=true $(RUN) pytest -vv --import-mode=importlib tests/ tests_v1/
================================================
FILE: README.md
================================================

[](https://github.com/hiyouga/LLaMA-Factory/stargazers)
[](https://github.com/hiyouga/LLaMA-Factory/commits/main)
[](https://github.com/hiyouga/LLaMA-Factory/graphs/contributors)
[](https://github.com/hiyouga/LLaMA-Factory/actions/workflows/tests.yml)
[](https://pypi.org/project/llamafactory/)
[](https://scholar.google.com/scholar?cites=12620864006390196564)
[](https://hub.docker.com/r/hiyouga/llamafactory/tags)
[](https://twitter.com/llamafactory_ai)
[](https://discord.gg/rKfvV9r9FK)
[](https://github.com/hiyouga/llamafactory-community)
[](https://blog.llamafactory.net/en/)
[](https://colab.research.google.com/drive/1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9?usp=sharing)
[](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory)
[](https://www.lab4ai.cn/course/detail?id=7c13e60f6137474eb40f6fd3983c0f46&utm_source=LLaMA-Factory)
[](https://www.llamafactory.com.cn/?utm_source=LLaMA-Factory)
[](https://huggingface.co/spaces/hiyouga/LLaMA-Board)
[](https://modelscope.cn/studios/hiyouga/LLaMA-Board)
[](https://novita.ai/templates-library/105981?sharer=88115474-394e-4bda-968e-b88e123d0c47)
### Used by [Amazon](https://aws.amazon.com/cn/blogs/machine-learning/how-apoidea-group-enhances-visual-information-extraction-from-banking-documents-with-multimodal-models-using-llama-factory-on-amazon-sagemaker-hyperpod/), [NVIDIA](https://developer.nvidia.com/rtx/ai-toolkit), [Aliyun](https://help.aliyun.com/zh/pai/use-cases/fine-tune-a-llama-3-model-with-llama-factory), etc.
<div align="center" markdown="1">
### Supporters ❤️
| <div style="text-align: center;"><a href="https://warp.dev/llama-factory"><img alt="Warp sponsorship" width="400" src="assets/sponsors/warp.jpg"></a><br><a href="https://warp.dev/llama-factory" style="font-size:larger;">Warp, the agentic terminal for developers</a><br><a href="https://warp.dev/llama-factory">Available for MacOS, Linux, & Windows</a> | <a href="https://serpapi.com"><img alt="SerpAPI sponsorship" width="250" src="assets/sponsors/serpapi.svg"> </a> |
| ---- | ---- |
----
### Easily fine-tune 100+ large language models with zero-code [CLI](#quickstart) and [Web UI](#fine-tuning-with-llama-board-gui-powered-by-gradio)

</div>
👋 Join our [WeChat](https://github.com/hiyouga/llamafactory-community/blob/main/wechat/main.jpg), [NPU](https://github.com/hiyouga/llamafactory-community/blob/main/wechat/npu.jpg), [Lab4AI](https://github.com/hiyouga/llamafactory-community/blob/main/wechat/lab4ai.jpg), [LLaMA Factory Online](https://github.com/hiyouga/llamafactory-community/blob/main/wechat/online.jpg) user group.
\[ English | [中文](README_zh.md) \]
**Fine-tuning a large language model can be easy as...**
https://github.com/user-attachments/assets/3991a3a8-4276-4d30-9cab-4cb0c4b9b99e
Start local training:
- Please refer to [usage](#getting-started)
Start cloud training:
- **Colab (free)**: https://colab.research.google.com/drive/1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9?usp=sharing
- **PAI-DSW (free trial)**: https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory
- **LLaMA Factory Online**: https://www.llamafactory.com.cn/?utm_source=LLaMA-Factory
- **Alaya NeW (cloud GPU deal)**: https://docs.alayanew.com/docs/documents/useGuide/LLaMAFactory/mutiple/?utm_source=LLaMA-Factory
Read technical notes:
- **Documentation (WIP)**: https://llamafactory.readthedocs.io/en/latest/
- **Documentation (AMD GPU)**: https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/notebooks/fine_tune/llama_factory_llama3.html
- **Official Blog**: https://blog.llamafactory.net/en/
- **Official Course**: https://www.lab4ai.cn/course/detail?id=7c13e60f6137474eb40f6fd3983c0f46&utm_source=LLaMA-Factory
> [!NOTE]
> Except for the above links, all other websites are unauthorized third-party websites. Please carefully use them.
## Table of Contents
- [Features](#features)
- [Blogs](#blogs)
- [Changelog](#changelog)
- [Supported Models](#supported-models)
- [Supported Training Approaches](#supported-training-approaches)
- [Provided Datasets](#provided-datasets)
- [Requirement](#requirement)
- [Getting Started](#getting-started)
- [Installation](#installation)
- [Data Preparation](#data-preparation)
- [Quickstart](#quickstart)
- [Fine-Tuning with LLaMA Board GUI](#fine-tuning-with-llama-board-gui-powered-by-gradio)
- [LLaMA Factory Online](#llama-factory-online)
- [Build Docker](#build-docker)
- [Deploy with OpenAI-style API and vLLM](#deploy-with-openai-style-api-and-vllm)
- [Download from ModelScope Hub](#download-from-modelscope-hub)
- [Download from Modelers Hub](#download-from-modelers-hub)
- [Use W&B Logger](#use-wb-logger)
- [Use SwanLab Logger](#use-swanlab-logger)
- [Projects using LLaMA Factory](#projects-using-llama-factory)
- [License](#license)
- [Citation](#citation)
- [Acknowledgement](#acknowledgement)
## Features
- **Various models**: LLaMA, LLaVA, Mistral, Mixtral-MoE, Qwen3, Qwen3-VL, DeepSeek, Gemma, GLM, Phi, etc.
- **Integrated methods**: (Continuous) pre-training, (multimodal) supervised fine-tuning, reward modeling, PPO, DPO, KTO, ORPO, etc.
- **Scalable resources**: 16-bit full-tuning, freeze-tuning, LoRA and 2/3/4/5/6/8-bit QLoRA via AQLM/AWQ/GPTQ/LLM.int8/HQQ/EETQ.
- **Advanced algorithms**: [GaLore](https://github.com/jiaweizzhao/GaLore), [BAdam](https://github.com/Ledzy/BAdam), [APOLLO](https://github.com/zhuhanqing/APOLLO), [Adam-mini](https://github.com/zyushun/Adam-mini), [Muon](https://github.com/KellerJordan/Muon), [OFT](https://github.com/huggingface/peft/tree/main/src/peft/tuners/oft), DoRA, LongLoRA, LLaMA Pro, Mixture-of-Depths, LoRA+, LoftQ and PiSSA.
- **Practical tricks**: [FlashAttention-2](https://github.com/Dao-AILab/flash-attention), [Unsloth](https://github.com/unslothai/unsloth), [Liger Kernel](https://github.com/linkedin/Liger-Kernel), [KTransformers](https://github.com/kvcache-ai/ktransformers/), RoPE scaling, NEFTune and rsLoRA.
- **Wide tasks**: Multi-turn dialogue, tool using, image understanding, visual grounding, video recognition, audio understanding, etc.
- **Experiment monitors**: LlamaBoard, TensorBoard, Wandb, MLflow, [SwanLab](https://github.com/SwanHubX/SwanLab), etc.
- **Faster inference**: OpenAI-style API, Gradio UI and CLI with [vLLM worker](https://github.com/vllm-project/vllm) or [SGLang worker](https://github.com/sgl-project/sglang).
### Day-N Support for Fine-Tuning Cutting-Edge Models
| Support Date | Model Name |
| ------------ | -------------------------------------------------------------------- |
| Day 0 | Qwen3 / Qwen2.5-VL / Gemma 3 / GLM-4.1V / InternLM 3 / MiniCPM-o-2.6 |
| Day 1 | Llama 3 / GLM-4 / Mistral Small / PaliGemma2 / Llama 4 |
## Blogs
> [!TIP]
> Now we have a dedicated blog for LLaMA Factory!
>
> Website: https://blog.llamafactory.net/en/
- 💡 [KTransformers Fine-Tuning × LLaMA Factory: Fine-tuning 1000 Billion models with 2 4090-GPU + CPU](https://blog.llamafactory.net/en/posts/ktransformers/) (English)
- 💡 [Easy Dataset × LLaMA Factory: Enabling LLMs to Efficiently Learn Domain Knowledge](https://buaa-act.feishu.cn/wiki/GVzlwYcRFiR8OLkHbL6cQpYin7g) (English)
- [Fine-tune a mental health LLM using LLaMA-Factory](https://www.lab4ai.cn/project/detail?id=25cce32ec131497b9e06a93336a0817f&type=project&utm_source=LLaMA-Factory) (Chinese)
- [Fine-tune GPT-OSS for Role-Playing using LLaMA-Factory](https://docs.llamafactory.com.cn/docs/documents/best-practice/gptroleplay/?utm_source=LLaMA-Factory) (Chinese)
- [A One-Stop Code-Free Model Reinforcement Learning and Deployment Platform based on LLaMA-Factory and EasyR1](https://aws.amazon.com/cn/blogs/china/building-llm-model-hub-based-on-llamafactory-and-easyr1/) (Chinese)
- [How Apoidea Group enhances visual information extraction from banking documents with multimodal models using LLaMA-Factory on Amazon SageMaker HyperPod](https://aws.amazon.com/cn/blogs/machine-learning/how-apoidea-group-enhances-visual-information-extraction-from-banking-documents-with-multimodal-models-using-llama-factory-on-amazon-sagemaker-hyperpod/) (English)
<details><summary>All Blogs</summary>
- [Fine-tune Llama3.1-70B for Medical Diagnosis using LLaMA-Factory](https://docs.alayanew.com/docs/documents/bestPractice/bigModel/llama70B/?utm_source=LLaMA-Factory) (Chinese)
- [Fine-tune Qwen2.5-VL for Autonomous Driving using LLaMA-Factory](https://docs.alayanew.com/docs/documents/useGuide/LLaMAFactory/mutiple/?utm_source=LLaMA-Factory) (Chinese)
- [LLaMA Factory: Fine-tuning the DeepSeek-R1-Distill-Qwen-7B Model for News Classifier](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory_deepseek_r1_distill_7b) (Chinese)
- [A One-Stop Code-Free Model Fine-Tuning \& Deployment Platform based on SageMaker and LLaMA-Factory](https://aws.amazon.com/cn/blogs/china/a-one-stop-code-free-model-fine-tuning-deployment-platform-based-on-sagemaker-and-llama-factory/) (Chinese)
- [LLaMA Factory Multi-Modal Fine-Tuning Practice: Fine-Tuning Qwen2-VL for Personal Tourist Guide](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory_qwen2vl) (Chinese)
- [LLaMA Factory: Fine-tuning Llama3 for Role-Playing](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory) (Chinese)
</details>
## Changelog
[25/10/26] We support Megatron-core training backend with [**mcore_adapter**](https://github.com/alibaba/ROLL/tree/main/mcore_adapter). See [PR #9237](https://github.com/hiyouga/LLaMA-Factory/pull/9237) to get started.
[25/08/22] We supported **[OFT](https://arxiv.org/abs/2306.07280)** and **[OFTv2](https://arxiv.org/abs/2506.19847)**. See [examples](examples/README.md) for usage.
[25/08/20] We supported fine-tuning the **[Intern-S1-mini](https://huggingface.co/internlm/Intern-S1-mini)** models. See [PR #8976](https://github.com/hiyouga/LLaMA-Factory/pull/8976) to get started.
[25/08/06] We supported fine-tuning the **[GPT-OSS](https://github.com/openai/gpt-oss)** models. See [PR #8826](https://github.com/hiyouga/LLaMA-Factory/pull/8826) to get started.
<details><summary>Full Changelog</summary>
[25/07/02] We supported fine-tuning the **[GLM-4.1V-9B-Thinking](https://github.com/THUDM/GLM-4.1V-Thinking)** model.
[25/04/28] We supported fine-tuning the **[Qwen3](https://qwenlm.github.io/blog/qwen3/)** model family.
[25/04/21] We supported the **[Muon](https://github.com/KellerJordan/Muon)** optimizer. See [examples](examples/README.md) for usage. Thank [@tianshijing](https://github.com/tianshijing)'s PR.
[25/04/16] We supported fine-tuning the **[InternVL3](https://huggingface.co/OpenGVLab/InternVL3-8B)** model. See [PR #7258](https://github.com/hiyouga/LLaMA-Factory/pull/7258) to get started.
[25/04/14] We supported fine-tuning the **[GLM-Z1](https://huggingface.co/THUDM/GLM-Z1-9B-0414)** and **[Kimi-VL](https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct)** models.
[25/04/06] We supported fine-tuning the **[Llama 4](https://ai.meta.com/blog/llama-4-multimodal-intelligence/)** model. See [PR #7611](https://github.com/hiyouga/LLaMA-Factory/pull/7611) to get started.
[25/03/31] We supported fine-tuning the **[Qwen2.5 Omni](https://qwenlm.github.io/blog/qwen2.5-omni/)** model. See [PR #7537](https://github.com/hiyouga/LLaMA-Factory/pull/7537) to get started.
[25/03/15] We supported **[SGLang](https://github.com/sgl-project/sglang)** as inference backend. Try `infer_backend: sglang` to accelerate inference.
[25/03/12] We supported fine-tuning the **[Gemma 3](https://huggingface.co/blog/gemma3)** model.
[25/02/24] Announcing **[EasyR1](https://github.com/hiyouga/EasyR1)**, an efficient, scalable and multi-modality RL training framework for efficient GRPO training.
[25/02/11] We supported saving the **[Ollama](https://github.com/ollama/ollama)** modelfile when exporting the model checkpoints. See [examples](examples/README.md) for usage.
[25/02/05] We supported fine-tuning the **[Qwen2-Audio](Qwen/Qwen2-Audio-7B-Instruct)** and **[MiniCPM-o-2.6](https://huggingface.co/openbmb/MiniCPM-o-2_6)** on audio understanding tasks.
[25/01/31] We supported fine-tuning the **[DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)** and **[Qwen2.5-VL](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct)** models.
[25/01/15] We supported **[APOLLO](https://arxiv.org/abs/2412.05270)** optimizer. See [examples](examples/README.md) for usage.
[25/01/14] We supported fine-tuning the **[MiniCPM-o-2.6](https://huggingface.co/openbmb/MiniCPM-o-2_6)** and **[MiniCPM-V-2.6](https://huggingface.co/openbmb/MiniCPM-V-2_6)** models. Thank [@BUAADreamer](https://github.com/BUAADreamer)'s PR.
[25/01/14] We supported fine-tuning the **[InternLM 3](https://huggingface.co/collections/internlm/)** models. Thank [@hhaAndroid](https://github.com/hhaAndroid)'s PR.
[25/01/10] We supported fine-tuning the **[Phi-4](https://huggingface.co/microsoft/phi-4)** model.
[24/12/21] We supported using **[SwanLab](https://github.com/SwanHubX/SwanLab)** for experiment tracking and visualization. See [this section](#use-swanlab-logger) for details.
[24/11/27] We supported fine-tuning the **[Skywork-o1](https://huggingface.co/Skywork/Skywork-o1-Open-Llama-3.1-8B)** model and the **[OpenO1](https://huggingface.co/datasets/O1-OPEN/OpenO1-SFT)** dataset.
[24/10/09] We supported downloading pre-trained models and datasets from the **[Modelers Hub](https://modelers.cn/models)**. See [this tutorial](#download-from-modelers-hub) for usage.
[24/09/19] We supported fine-tuning the **[Qwen2.5](https://qwenlm.github.io/blog/qwen2.5/)** models.
[24/08/30] We supported fine-tuning the **[Qwen2-VL](https://qwenlm.github.io/blog/qwen2-vl/)** models. Thank [@simonJJJ](https://github.com/simonJJJ)'s PR.
[24/08/27] We supported **[Liger Kernel](https://github.com/linkedin/Liger-Kernel)**. Try `enable_liger_kernel: true` for efficient training.
[24/08/09] We supported **[Adam-mini](https://github.com/zyushun/Adam-mini)** optimizer. See [examples](examples/README.md) for usage. Thank [@relic-yuexi](https://github.com/relic-yuexi)'s PR.
[24/07/04] We supported [contamination-free packed training](https://github.com/MeetKai/functionary/tree/main/functionary/train/packing). Use `neat_packing: true` to activate it. Thank [@chuan298](https://github.com/chuan298)'s PR.
[24/06/16] We supported **[PiSSA](https://arxiv.org/abs/2404.02948)** algorithm. See [examples](examples/README.md) for usage.
[24/06/07] We supported fine-tuning the **[Qwen2](https://qwenlm.github.io/blog/qwen2/)** and **[GLM-4](https://github.com/THUDM/GLM-4)** models.
[24/05/26] We supported **[SimPO](https://arxiv.org/abs/2405.14734)** algorithm for preference learning. See [examples](examples/README.md) for usage.
[24/05/20] We supported fine-tuning the **PaliGemma** series models. Note that the PaliGemma models are pre-trained models, you need to fine-tune them with `paligemma` template for chat completion.
[24/05/18] We supported **[KTO](https://arxiv.org/abs/2402.01306)** algorithm for preference learning. See [examples](examples/README.md) for usage.
[24/05/14] We supported training and inference on the Ascend NPU devices. Check [installation](#installation) section for details.
[24/04/26] We supported fine-tuning the **LLaVA-1.5** multimodal LLMs. See [examples](examples/README.md) for usage.
[24/04/22] We provided a **[Colab notebook](https://colab.research.google.com/drive/1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9?usp=sharing)** for fine-tuning the Llama-3 model on a free T4 GPU. Two Llama-3-derived models fine-tuned using LLaMA Factory are available at Hugging Face, check [Llama3-8B-Chinese-Chat](https://huggingface.co/shenzhi-wang/Llama3-8B-Chinese-Chat) and [Llama3-Chinese](https://huggingface.co/zhichen/Llama3-Chinese) for details.
[24/04/21] We supported **[Mixture-of-Depths](https://arxiv.org/abs/2404.02258)** according to [AstraMindAI's implementation](https://github.com/astramind-ai/Mixture-of-depths). See [examples](examples/README.md) for usage.
[24/04/16] We supported **[BAdam](https://arxiv.org/abs/2404.02827)** optimizer. See [examples](examples/README.md) for usage.
[24/04/16] We supported **[unsloth](https://github.com/unslothai/unsloth)**'s long-sequence training (Llama-2-7B-56k within 24GB). It achieves **117%** speed and **50%** memory compared with FlashAttention-2, more benchmarks can be found in [this page](https://github.com/hiyouga/LLaMA-Factory/wiki/Performance-comparison).
[24/03/31] We supported **[ORPO](https://arxiv.org/abs/2403.07691)**. See [examples](examples/README.md) for usage.
[24/03/21] Our paper "[LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models](https://arxiv.org/abs/2403.13372)" is available at arXiv!
[24/03/20] We supported **FSDP+QLoRA** that fine-tunes a 70B model on 2x24GB GPUs. See [examples](examples/README.md) for usage.
[24/03/13] We supported **[LoRA+](https://arxiv.org/abs/2402.12354)**. See [examples](examples/README.md) for usage.
[24/03/07] We supported **[GaLore](https://arxiv.org/abs/2403.03507)** optimizer. See [examples](examples/README.md) for usage.
[24/03/07] We integrated **[vLLM](https://github.com/vllm-project/vllm)** for faster and concurrent inference. Try `infer_backend: vllm` to enjoy **270%** inference speed.
[24/02/28] We supported weight-decomposed LoRA (**[DoRA](https://arxiv.org/abs/2402.09353)**). Try `use_dora: true` to activate DoRA training.
[24/02/15] We supported **block expansion** proposed by [LLaMA Pro](https://github.com/TencentARC/LLaMA-Pro). See [examples](examples/README.md) for usage.
[24/02/05] Qwen1.5 (Qwen2 beta version) series models are supported in LLaMA-Factory. Check this [blog post](https://qwenlm.github.io/blog/qwen1.5/) for details.
[24/01/18] We supported **agent tuning** for most models, equipping model with tool using abilities by fine-tuning with `dataset: glaive_toolcall_en`.
[23/12/23] We supported **[unsloth](https://github.com/unslothai/unsloth)**'s implementation to boost LoRA tuning for the LLaMA, Mistral and Yi models. Try `use_unsloth: true` argument to activate unsloth patch. It achieves **170%** speed in our benchmark, check [this page](https://github.com/hiyouga/LLaMA-Factory/wiki/Performance-comparison) for details.
[23/12/12] We supported fine-tuning the latest MoE model **[Mixtral 8x7B](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)** in our framework. See hardware requirement [here](#hardware-requirement).
[23/12/01] We supported downloading pre-trained models and datasets from the **[ModelScope Hub](https://modelscope.cn/models)**. See [this tutorial](#download-from-modelscope-hub) for usage.
[23/10/21] We supported **[NEFTune](https://arxiv.org/abs/2310.05914)** trick for fine-tuning. Try `neftune_noise_alpha: 5` argument to activate NEFTune.
[23/09/27] We supported **$S^2$-Attn** proposed by [LongLoRA](https://github.com/dvlab-research/LongLoRA) for the LLaMA models. Try `shift_attn: true` argument to enable shift short attention.
[23/09/23] We integrated MMLU, C-Eval and CMMLU benchmarks in this repo. See [examples](examples/README.md) for usage.
[23/09/10] We supported **[FlashAttention-2](https://github.com/Dao-AILab/flash-attention)**. Try `flash_attn: fa2` argument to enable FlashAttention-2 if you are using RTX4090, A100 or H100 GPUs.
[23/08/12] We supported **RoPE scaling** to extend the context length of the LLaMA models. Try `rope_scaling: linear` argument in training and `rope_scaling: dynamic` argument at inference to extrapolate the position embeddings.
[23/08/11] We supported **[DPO training](https://arxiv.org/abs/2305.18290)** for instruction-tuned models. See [examples](examples/README.md) for usage.
[23/07/31] We supported **dataset streaming**. Try `streaming: true` and `max_steps: 10000` arguments to load your dataset in streaming mode.
[23/07/29] We released two instruction-tuned 13B models at Hugging Face. See these Hugging Face Repos ([LLaMA-2](https://huggingface.co/hiyouga/Llama-2-Chinese-13b-chat) / [Baichuan](https://huggingface.co/hiyouga/Baichuan-13B-sft)) for details.
[23/07/18] We developed an **all-in-one Web UI** for training, evaluation and inference. Try `train_web.py` to fine-tune models in your Web browser. Thank [@KanadeSiina](https://github.com/KanadeSiina) and [@codemayq](https://github.com/codemayq) for their efforts in the development.
[23/07/09] We released **[FastEdit](https://github.com/hiyouga/FastEdit)** ⚡🩹, an easy-to-use package for editing the factual knowledge of large language models efficiently. Please follow [FastEdit](https://github.com/hiyouga/FastEdit) if you are interested.
[23/06/29] We provided a **reproducible example** of training a chat model using instruction-following datasets, see [Baichuan-7B-sft](https://huggingface.co/hiyouga/Baichuan-7B-sft) for details.
[23/06/22] We aligned the [demo API](src/api_demo.py) with the [OpenAI's](https://platform.openai.com/docs/api-reference/chat) format where you can insert the fine-tuned model in **arbitrary ChatGPT-based applications**.
[23/06/03] We supported quantized training and inference (aka **[QLoRA](https://github.com/artidoro/qlora)**). See [examples](examples/README.md) for usage.
</details>
> [!TIP]
> If you cannot use the latest feature, please pull the latest code and install LLaMA-Factory again.
## Supported Models
| Model | Model size | Template |
| ----------------------------------------------------------------- | -------------------------------- | -------------------- |
| [BLOOM/BLOOMZ](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - |
| [DeepSeek (LLM/Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek |
| [DeepSeek 3-3.2](https://huggingface.co/deepseek-ai) | 236B/671B | deepseek3 |
| [DeepSeek R1 (Distill)](https://huggingface.co/deepseek-ai) | 1.5B/7B/8B/14B/32B/70B/671B | deepseekr1 |
| [ERNIE-4.5](https://huggingface.co/baidu) | 0.3B/21B/300B | ernie_nothink |
| [Falcon/Falcon H1](https://huggingface.co/tiiuae) | 0.5B/1.5B/3B/7B/11B/34B/40B/180B | falcon/falcon_h1 |
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma/gemma2 |
| [Gemma 3/Gemma 3n](https://huggingface.co/google) | 270M/1B/4B/6B/8B/12B/27B | gemma3/gemma3n |
| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/zai-org) | 9B/32B | glm4/glmz1 |
| [GLM-4.5/GLM-4.5(6)V](https://huggingface.co/zai-org) | 9B/106B/355B | glm4_moe/glm4_5v |
| [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - |
| [GPT-OSS](https://huggingface.co/openai) | 20B/120B | gpt_oss |
| [Granite 3-4](https://huggingface.co/ibm-granite) | 1B/2B/3B/7B/8B | granite3/granite4 |
| [Hunyuan/Hunyuan1.5 (MT)](https://huggingface.co/tencent/) | 0.5B/1.8B/4B/7B/13B | hunyuan/hunyuan_small|
| [InternLM 2-3](https://huggingface.co/internlm) | 7B/8B/20B | intern2 |
| [InternVL 2.5-3.5](https://huggingface.co/OpenGVLab) | 1B/2B/4B/8B/14B/30B/38B/78B/241B | intern_vl |
| [Intern-S1-mini](https://huggingface.co/internlm/) | 8B | intern_s1 |
| [Kimi-VL](https://huggingface.co/moonshotai) | 16B | kimi_vl |
| [Ling 2.0 (mini/flash)](https://huggingface.co/inclusionAI) | 16B/100B | bailing_v2 |
| [LFM 2.5 (VL)](https://huggingface.co/LiquidAI) | 1.2B/1.6B | lfm2/lfm2_vl |
| [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |
| [Llama 2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 |
| [Llama 3-3.3](https://huggingface.co/meta-llama) | 1B/3B/8B/70B | llama3 |
| [Llama 4](https://huggingface.co/meta-llama) | 109B/402B | llama4 |
| [Llama 3.2 Vision](https://huggingface.co/meta-llama) | 11B/90B | mllama |
| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | llava |
| [LLaVA-NeXT](https://huggingface.co/llava-hf) | 7B/8B/13B/34B/72B/110B | llava_next |
| [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video |
| [MiMo](https://huggingface.co/XiaomiMiMo) | 7B/309B | mimo/mimo_v2 |
| [MiniCPM 4](https://huggingface.co/openbmb) | 0.5B/8B | cpm4 |
| [MiniCPM-o/MiniCPM-V 4.5](https://huggingface.co/openbmb) | 8B/9B | minicpm_o/minicpm_v |
| [MiniMax-M1/MiniMax-M2](https://huggingface.co/MiniMaxAI/models) | 229B/456B | minimax1/minimax2 |
| [Ministral 3](https://huggingface.co/mistralai) | 3B/8B/14B | ministral3 |
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
| [PaliGemma/PaliGemma2](https://huggingface.co/google) | 3B/10B/28B | paligemma |
| [Phi-3/Phi-3.5](https://huggingface.co/microsoft) | 4B/14B | phi |
| [Phi-3-small](https://huggingface.co/microsoft) | 7B | phi_small |
| [Phi-4-mini/Phi-4](https://huggingface.co/microsoft) | 3.8B/14B | phi4_mini/phi4 |
| [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral |
| [Qwen2 (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen |
| [Qwen3 (MoE/Instruct/Thinking/Next)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/80B/235B | qwen3/qwen3_nothink |
| [Qwen3.5](https://huggingface.co/Qwen) | 0.8B/2B/4B/9B/27B/35B/122B/397B | qwen3_5 |
| [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio |
| [Qwen2.5-Omni](https://huggingface.co/Qwen) | 3B/7B | qwen2_omni |
| [Qwen3-Omni](https://huggingface.co/Qwen) | 30B | qwen3_omni |
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl |
| [Qwen3-VL](https://huggingface.co/Qwen) | 2B/4B/8B/30B/32B/235B | qwen3_vl |
| [Seed (OSS/Coder)](https://huggingface.co/ByteDance-Seed) | 8B/36B | seed_oss/seed_coder |
| [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - |
| [TeleChat 2-2.5](https://huggingface.co/Tele-AI) | 3B/7B/35B/115B | telechat2 |
| [Yuan 2](https://huggingface.co/IEITYuan) | 2B/51B/102B | yuan |
> [!NOTE]
> For the "base" models, the `template` argument can be chosen from `default`, `alpaca`, `vicuna` etc. But make sure to use the **corresponding template** for the "instruct/chat" models.
>
> If the model has both reasoning and non-reasoning versions, please use the `_nothink` suffix to distinguish between them. For example, `qwen3` and `qwen3_nothink`.
>
> Remember to use the **SAME** template in training and inference.
>
> \*: You should install the `transformers` from main branch and use `DISABLE_VERSION_CHECK=1` to skip version check.
>
> \*\*: You need to install a specific version of `transformers` to use the corresponding model.
Please refer to [constants.py](src/llamafactory/extras/constants.py) for a full list of models we supported.
You also can add a custom chat template to [template.py](src/llamafactory/data/template.py).
## Supported Training Approaches
| Approach | Full-tuning | Freeze-tuning | LoRA | QLoRA | OFT | QOFT |
| ---------------------- | ------------------ | ------------------ | ------------------ | ------------------ | ------------------ | ------------------ |
| Pre-Training | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
| Supervised Fine-Tuning | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
| Reward Modeling | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
| PPO Training | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
| DPO Training | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
| KTO Training | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
| ORPO Training | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
| SimPO Training | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
> [!TIP]
> The implementation details of PPO can be found in [this blog](https://newfacade.github.io/notes-on-reinforcement-learning/17-ppo-trl.html).
## Provided Datasets
<details><summary>Pre-training datasets</summary>
- [Wiki Demo (en)](data/wiki_demo.txt)
- [RefinedWeb (en)](https://huggingface.co/datasets/tiiuae/falcon-refinedweb)
- [RedPajama V2 (en)](https://huggingface.co/datasets/togethercomputer/RedPajama-Data-V2)
- [Wikipedia (en)](https://huggingface.co/datasets/olm/olm-wikipedia-20221220)
- [Wikipedia (zh)](https://huggingface.co/datasets/pleisto/wikipedia-cn-20230720-filtered)
- [Pile (en)](https://huggingface.co/datasets/EleutherAI/pile)
- [SkyPile (zh)](https://huggingface.co/datasets/Skywork/SkyPile-150B)
- [FineWeb (en)](https://huggingface.co/datasets/HuggingFaceFW/fineweb)
- [FineWeb-Edu (en)](https://huggingface.co/datasets/HuggingFaceFW/fineweb-edu)
- [CCI3-HQ (zh)](https://huggingface.co/datasets/BAAI/CCI3-HQ)
- [CCI3-Data (zh)](https://huggingface.co/datasets/BAAI/CCI3-Data)
- [CCI4.0-M2-Base-v1 (en&zh)](https://huggingface.co/datasets/BAAI/CCI4.0-M2-Base-v1)
- [CCI4.0-M2-CoT-v1 (en&zh)](https://huggingface.co/datasets/BAAI/CCI4.0-M2-CoT-v1)
- [CCI4.0-M2-Extra-v1 (en&zh)](https://huggingface.co/datasets/BAAI/CCI4.0-M2-Extra-v1)
- [The Stack (en)](https://huggingface.co/datasets/bigcode/the-stack)
- [StarCoder (en)](https://huggingface.co/datasets/bigcode/starcoderdata)
</details>
<details><summary>Supervised fine-tuning datasets</summary>
- [Identity (en&zh)](data/identity.json)
- [Stanford Alpaca (en)](https://github.com/tatsu-lab/stanford_alpaca)
- [Stanford Alpaca (zh)](https://github.com/ymcui/Chinese-LLaMA-Alpaca-3)
- [Alpaca GPT4 (en&zh)](https://github.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM)
- [Glaive Function Calling V2 (en&zh)](https://huggingface.co/datasets/glaiveai/glaive-function-calling-v2)
- [LIMA (en)](https://huggingface.co/datasets/GAIR/lima)
- [Guanaco Dataset (multilingual)](https://huggingface.co/datasets/JosephusCheung/GuanacoDataset)
- [BELLE 2M (zh)](https://huggingface.co/datasets/BelleGroup/train_2M_CN)
- [BELLE 1M (zh)](https://huggingface.co/datasets/BelleGroup/train_1M_CN)
- [BELLE 0.5M (zh)](https://huggingface.co/datasets/BelleGroup/train_0.5M_CN)
- [BELLE Dialogue 0.4M (zh)](https://huggingface.co/datasets/BelleGroup/generated_chat_0.4M)
- [BELLE School Math 0.25M (zh)](https://huggingface.co/datasets/BelleGroup/school_math_0.25M)
- [BELLE Multiturn Chat 0.8M (zh)](https://huggingface.co/datasets/BelleGroup/multiturn_chat_0.8M)
- [UltraChat (en)](https://github.com/thunlp/UltraChat)
- [OpenPlatypus (en)](https://huggingface.co/datasets/garage-bAInd/Open-Platypus)
- [CodeAlpaca 20k (en)](https://huggingface.co/datasets/sahil2801/CodeAlpaca-20k)
- [Alpaca CoT (multilingual)](https://huggingface.co/datasets/QingyiSi/Alpaca-CoT)
- [OpenOrca (en)](https://huggingface.co/datasets/Open-Orca/OpenOrca)
- [SlimOrca (en)](https://huggingface.co/datasets/Open-Orca/SlimOrca)
- [MathInstruct (en)](https://huggingface.co/datasets/TIGER-Lab/MathInstruct)
- [Firefly 1.1M (zh)](https://huggingface.co/datasets/YeungNLP/firefly-train-1.1M)
- [Wiki QA (en)](https://huggingface.co/datasets/wiki_qa)
- [Web QA (zh)](https://huggingface.co/datasets/suolyer/webqa)
- [WebNovel (zh)](https://huggingface.co/datasets/zxbsmk/webnovel_cn)
- [Nectar (en)](https://huggingface.co/datasets/berkeley-nest/Nectar)
- [deepctrl (en&zh)](https://www.modelscope.cn/datasets/deepctrl/deepctrl-sft-data)
- [Advertise Generating (zh)](https://huggingface.co/datasets/HasturOfficial/adgen)
- [ShareGPT Hyperfiltered (en)](https://huggingface.co/datasets/totally-not-an-llm/sharegpt-hyperfiltered-3k)
- [ShareGPT4 (en&zh)](https://huggingface.co/datasets/shibing624/sharegpt_gpt4)
- [UltraChat 200k (en)](https://huggingface.co/datasets/HuggingFaceH4/ultrachat_200k)
- [Infinity Instruct (zh)](https://huggingface.co/datasets/BAAI/Infinity-Instruct)
- [AgentInstruct (en)](https://huggingface.co/datasets/THUDM/AgentInstruct)
- [LMSYS Chat 1M (en)](https://huggingface.co/datasets/lmsys/lmsys-chat-1m)
- [Evol Instruct V2 (en)](https://huggingface.co/datasets/WizardLM/WizardLM_evol_instruct_V2_196k)
- [Cosmopedia (en)](https://huggingface.co/datasets/HuggingFaceTB/cosmopedia)
- [STEM (zh)](https://huggingface.co/datasets/hfl/stem_zh_instruction)
- [Ruozhiba (zh)](https://huggingface.co/datasets/hfl/ruozhiba_gpt4_turbo)
- [Neo-sft (zh)](https://huggingface.co/datasets/m-a-p/neo_sft_phase2)
- [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered)
- [Magpie-ultra-v0.1 (en)](https://huggingface.co/datasets/argilla/magpie-ultra-v0.1)
- [WebInstructSub (en)](https://huggingface.co/datasets/TIGER-Lab/WebInstructSub)
- [OpenO1-SFT (en&zh)](https://huggingface.co/datasets/O1-OPEN/OpenO1-SFT)
- [Open-Thoughts (en)](https://huggingface.co/datasets/open-thoughts/OpenThoughts-114k)
- [Open-R1-Math (en)](https://huggingface.co/datasets/open-r1/OpenR1-Math-220k)
- [Chinese-DeepSeek-R1-Distill (zh)](https://huggingface.co/datasets/Congliu/Chinese-DeepSeek-R1-Distill-data-110k-SFT)
- [LLaVA mixed (en&zh)](https://huggingface.co/datasets/BUAADreamer/llava-en-zh-300k)
- [Pokemon-gpt4o-captions (en&zh)](https://huggingface.co/datasets/jugg1024/pokemon-gpt4o-captions)
- [DLR-Web (en)](https://huggingface.co/datasets/Attention1115/DLR-Web)
- [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de)
- [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de)
- [Alpaca GPT4 (de)](https://huggingface.co/datasets/mayflowergmbh/alpaca-gpt4_de)
- [OpenSchnabeltier (de)](https://huggingface.co/datasets/mayflowergmbh/openschnabeltier_de)
- [Evol Instruct (de)](https://huggingface.co/datasets/mayflowergmbh/evol-instruct_de)
- [Dolphin (de)](https://huggingface.co/datasets/mayflowergmbh/dolphin_de)
- [Booksum (de)](https://huggingface.co/datasets/mayflowergmbh/booksum_de)
- [Airoboros (de)](https://huggingface.co/datasets/mayflowergmbh/airoboros-3.0_de)
- [Ultrachat (de)](https://huggingface.co/datasets/mayflowergmbh/ultra-chat_de)
</details>
<details><summary>Preference datasets</summary>
- [DPO mixed (en&zh)](https://huggingface.co/datasets/hiyouga/DPO-En-Zh-20k)
- [UltraFeedback (en)](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized)
- [COIG-P (zh)](https://huggingface.co/datasets/m-a-p/COIG-P)
- [RLHF-V (en)](https://huggingface.co/datasets/openbmb/RLHF-V-Dataset)
- [VLFeedback (en)](https://huggingface.co/datasets/Zhihui/VLFeedback)
- [RLAIF-V (en)](https://huggingface.co/datasets/openbmb/RLAIF-V-Dataset)
- [Orca DPO Pairs (en)](https://huggingface.co/datasets/Intel/orca_dpo_pairs)
- [HH-RLHF (en)](https://huggingface.co/datasets/Anthropic/hh-rlhf)
- [Nectar (en)](https://huggingface.co/datasets/berkeley-nest/Nectar)
- [Orca DPO (de)](https://huggingface.co/datasets/mayflowergmbh/intel_orca_dpo_pairs_de)
- [KTO mixed (en)](https://huggingface.co/datasets/argilla/kto-mix-15k)
</details>
Some datasets require confirmation before using them, so we recommend logging in with your Hugging Face account using these commands.
```bash
pip install "huggingface_hub<1.0.0"
huggingface-cli login
```
## Requirement
| Mandatory | Minimum | Recommend |
| ------------ | ------- | --------- |
| python | 3.11 | >=3.11 |
| torch | 2.0.0 | 2.6.0 |
| torchvision | 0.15.0 | 0.21.0 |
| transformers | 4.49.0 | 4.50.0 |
| datasets | 2.16.0 | 3.2.0 |
| accelerate | 0.34.0 | 1.2.1 |
| peft | 0.14.0 | 0.15.1 |
| trl | 0.8.6 | 0.9.6 |
| Optional | Minimum | Recommend |
| ------------ | ------- | --------- |
| CUDA | 11.6 | 12.2 |
| deepspeed | 0.10.0 | 0.16.4 |
| bitsandbytes | 0.39.0 | 0.43.1 |
| vllm | 0.4.3 | 0.8.2 |
| flash-attn | 2.5.6 | 2.7.2 |
### Hardware Requirement
\* *estimated*
| Method | Bits | 7B | 14B | 30B | 70B | `x`B |
| ----------------------------------- | ---- | ----- | ----- | ----- | ------ | ------- |
| Full (`bf16` or `fp16`) | 32 | 120GB | 240GB | 600GB | 1200GB | `18x`GB |
| Full (`pure_bf16`) | 16 | 60GB | 120GB | 300GB | 600GB | `8x`GB |
| Freeze/LoRA/GaLore/APOLLO/BAdam/OFT | 16 | 16GB | 32GB | 64GB | 160GB | `2x`GB |
| QLoRA / QOFT | 8 | 10GB | 20GB | 40GB | 80GB | `x`GB |
| QLoRA / QOFT | 4 | 6GB | 12GB | 24GB | 48GB | `x/2`GB |
| QLoRA / QOFT | 2 | 4GB | 8GB | 16GB | 24GB | `x/4`GB |
## Getting Started
### Installation
> [!IMPORTANT]
> Installation is mandatory.
#### Install from Source
```bash
git clone --depth 1 https://github.com/hiyouga/LlamaFactory.git
cd LlamaFactory
pip install -e .
pip install -r requirements/metrics.txt
```
Optional dependencies available: `metrics`, `deepspeed`. Install with: `pip install -e . && pip install -r requirements/metrics.txt -r requirements/deepspeed.txt`
Additional dependencies for specific features are available in `examples/requirements/`.
#### Install from Docker Image
```bash
docker run -it --rm --gpus=all --ipc=host hiyouga/llamafactory:latest
```
This image is built on Ubuntu 22.04 (x86\_64), CUDA 12.4, Python 3.11, PyTorch 2.6.0, and Flash-attn 2.7.4.
Find the pre-built images: https://hub.docker.com/r/hiyouga/llamafactory/tags
Please refer to [build docker](#build-docker) to build the image yourself.
<details><summary>Setting up a virtual environment with <b>uv</b></summary>
Create an isolated Python environment with [uv](https://github.com/astral-sh/uv):
```bash
uv run llamafactory-cli webui
```
</details>
<details><summary>For Windows users</summary>
#### Install PyTorch
You need to manually install the GPU version of PyTorch on the Windows platform. Please refer to the [official website](https://pytorch.org/get-started/locally/) and the following command to install PyTorch with CUDA support:
```bash
pip uninstall torch torchvision torchaudio
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126
python -c "import torch; print(torch.cuda.is_available())"
```
If you see `True` then you have successfully installed PyTorch with CUDA support.
Try `dataloader_num_workers: 0` if you encounter `Can't pickle local object` error.
#### Install BitsAndBytes
If you want to enable the quantized LoRA (QLoRA) on the Windows platform, you need to install a pre-built version of `bitsandbytes` library, which supports CUDA 11.1 to 12.2, please select the appropriate [release version](https://github.com/jllllll/bitsandbytes-windows-webui/releases/tag/wheels) based on your CUDA version.
```bash
pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.2.post2-py3-none-win_amd64.whl
```
#### Install Flash Attention-2
To enable FlashAttention-2 on the Windows platform, please use the script from [flash-attention-windows-wheel](https://huggingface.co/lldacing/flash-attention-windows-wheel) to compile and install it by yourself.
</details>
<details><summary>For Ascend NPU users</summary>
To install LLaMA Factory on Ascend NPU devices, please upgrade Python to version 3.10 or higher: `pip install -r requirements/npu.txt`. Additionally, you need to install the **Ascend CANN Toolkit and Kernels**. Please follow the [installation tutorial](https://llamafactory.readthedocs.io/en/latest/advanced/npu_installation.html).
You can also download the pre-built Docker images:
```bash
# Docker Hub
docker pull hiyouga/llamafactory:latest-npu-a2
docker pull hiyouga/llamafactory:latest-npu-a3
# quay.io
docker pull quay.io/ascend/llamafactory:latest-npu-a2
docker pull quay.io/ascend/llamafactory:latest-npu-a3
```
#### Install BitsAndBytes
To use QLoRA based on bitsandbytes on Ascend NPU, please follow these 3 steps:
1. Manually compile bitsandbytes: Refer to [the installation documentation](https://huggingface.co/docs/bitsandbytes/installation?backend=Ascend+NPU&platform=Ascend+NPU) for the NPU version of bitsandbytes to complete the compilation and installation. The compilation requires a cmake version of at least 3.22.1 and a g++ version of at least 12.x.
```bash
# Install bitsandbytes from source
# Clone bitsandbytes repo, Ascend NPU backend is currently enabled on multi-backend-refactor branch
git clone -b multi-backend-refactor https://github.com/bitsandbytes-foundation/bitsandbytes.git
cd bitsandbytes/
# Install dependencies
pip install -r requirements-dev.txt
# Install the dependencies for the compilation tools. Note that the commands for this step may vary depending on the operating system. The following are provided for reference
apt-get install -y build-essential cmake
# Compile & install
cmake -DCOMPUTE_BACKEND=npu -S .
make
pip install .
```
2. Install transformers from the main branch.
```bash
git clone -b main https://github.com/huggingface/transformers.git
cd transformers
pip install .
```
3. Set `double_quantization: false` in the configuration. You can refer to the [example](examples/train_qlora/qwen3_lora_sft_bnb_npu.yaml).
</details>
### Data Preparation
Please refer to [data/README.md](data/README.md) for checking the details about the format of dataset files. You can use datasets on HuggingFace / ModelScope / Modelers hub, load the dataset in local disk, or specify a path to s3/gcs cloud storage.
> [!NOTE]
> Please update `data/dataset_info.json` to use your custom dataset.
You can also use **[Easy Dataset](https://github.com/ConardLi/easy-dataset)**, **[DataFlow](https://github.com/OpenDCAI/DataFlow)** and **[GraphGen](https://github.com/open-sciencelab/GraphGen)** to create synthetic data for fine-tuning.
### Quickstart
Use the following 3 commands to run LoRA **fine-tuning**, **inference** and **merging** of the Qwen3-4B-Instruct model, respectively.
```bash
llamafactory-cli train examples/train_lora/qwen3_lora_sft.yaml
llamafactory-cli chat examples/inference/qwen3_lora_sft.yaml
llamafactory-cli export examples/merge_lora/qwen3_lora_sft.yaml
```
See [examples/README.md](examples/README.md) for advanced usage (including distributed training).
> [!TIP]
> Use `llamafactory-cli help` to show help information.
>
> Read [FAQs](https://github.com/hiyouga/LLaMA-Factory/issues/4614) first if you encounter any problems.
### Fine-Tuning with LLaMA Board GUI (powered by [Gradio](https://github.com/gradio-app/gradio))
```bash
llamafactory-cli webui
```
### LLaMA Factory Online
Read our [documentation](https://docs.llamafactory.com.cn/docs/documents/quickstart/getstarted/?utm_source=LLaMA-Factory).
### Build Docker
For CUDA users:
```bash
cd docker/docker-cuda/
docker compose up -d
docker compose exec llamafactory bash
```
For Ascend NPU users:
```bash
cd docker/docker-npu/
docker compose up -d
docker compose exec llamafactory bash
```
For AMD ROCm users:
```bash
cd docker/docker-rocm/
docker compose up -d
docker compose exec llamafactory bash
```
<details><summary>Build without Docker Compose</summary>
For CUDA users:
```bash
docker build -f ./docker/docker-cuda/Dockerfile \
--build-arg PIP_INDEX=https://pypi.org/simple \
-t llamafactory:latest .
docker run -dit --ipc=host --gpus=all \
-p 7860:7860 \
-p 8000:8000 \
--name llamafactory \
llamafactory:latest
docker exec -it llamafactory bash
```
For Ascend NPU users:
```bash
docker build -f ./docker/docker-npu/Dockerfile \
--build-arg PIP_INDEX=https://pypi.org/simple \
-t llamafactory:latest .
docker run -dit --ipc=host \
-v /usr/local/dcmi:/usr/local/dcmi \
-v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
-v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
-v /etc/ascend_install.info:/etc/ascend_install.info \
-p 7860:7860 \
-p 8000:8000 \
--device /dev/davinci0 \
--device /dev/davinci_manager \
--device /dev/devmm_svm \
--device /dev/hisi_hdc \
--name llamafactory \
llamafactory:latest
docker exec -it llamafactory bash
```
For AMD ROCm users:
```bash
docker build -f ./docker/docker-rocm/Dockerfile \
--build-arg PIP_INDEX=https://pypi.org/simple \
-t llamafactory:latest .
docker run -dit --ipc=host \
-p 7860:7860 \
-p 8000:8000 \
--device /dev/kfd \
--device /dev/dri \
--name llamafactory \
llamafactory:latest
docker exec -it llamafactory bash
```
</details>
<details><summary>Use Docker volumes</summary>
You can uncomment `VOLUME [ "/root/.cache/huggingface", "/app/shared_data", "/app/output" ]` in the Dockerfile to use data volumes.
When building the Docker image, use `-v ./hf_cache:/root/.cache/huggingface` argument to mount the local directory to the container. The following data volumes are available.
- `hf_cache`: Utilize Hugging Face cache on the host machine.
- `shared_data`: The directionary to store datasets on the host machine.
- `output`: Set export dir to this location so that the merged result can be accessed directly on the host machine.
</details>
### Deploy with OpenAI-style API and vLLM
```bash
API_PORT=8000 llamafactory-cli api examples/inference/qwen3.yaml infer_backend=vllm vllm_enforce_eager=true
```
> [!TIP]
> Visit [this page](https://platform.openai.com/docs/api-reference/chat/create) for API document.
>
> Examples: [Image understanding](scripts/api_example/test_image.py) | [Function calling](scripts/api_example/test_toolcall.py)
### Download from ModelScope Hub
If you have trouble with downloading models and datasets from Hugging Face, you can use ModelScope.
```bash
export USE_MODELSCOPE_HUB=1 # `set USE_MODELSCOPE_HUB=1` for Windows
```
Train the model by specifying a model ID of the ModelScope Hub as the `model_name_or_path`. You can find a full list of model IDs at [ModelScope Hub](https://modelscope.cn/models), e.g., `LLM-Research/Meta-Llama-3-8B-Instruct`.
### Download from Modelers Hub
You can also use Modelers Hub to download models and datasets.
```bash
export USE_OPENMIND_HUB=1 # `set USE_OPENMIND_HUB=1` for Windows
```
Train the model by specifying a model ID of the Modelers Hub as the `model_name_or_path`. You can find a full list of model IDs at [Modelers Hub](https://modelers.cn/models), e.g., `TeleAI/TeleChat-7B-pt`.
### Use W&B Logger
To use [Weights & Biases](https://wandb.ai) for logging experimental results, you need to add the following arguments to yaml files.
```yaml
report_to: wandb
run_name: test_run # optional
```
Set `WANDB_API_KEY` to [your key](https://wandb.ai/authorize) when launching training tasks to log in with your W&B account.
### Use SwanLab Logger
To use [SwanLab](https://github.com/SwanHubX/SwanLab) for logging experimental results, you need to add the following arguments to yaml files.
```yaml
use_swanlab: true
swanlab_run_name: test_run # optional
```
When launching training tasks, you can log in to SwanLab in three ways:
1. Add `swanlab_api_key=<your_api_key>` to the yaml file, and set it to your [API key](https://swanlab.cn/settings).
2. Set the environment variable `SWANLAB_API_KEY` to your [API key](https://swanlab.cn/settings).
3. Use the `swanlab login` command to complete the login.
## Projects using LLaMA Factory
If you have a project that should be incorporated, please contact via email or create a pull request.
<details><summary>Click to show</summary>
1. Wang et al. ESRL: Efficient Sampling-based Reinforcement Learning for Sequence Generation. 2023. [[arxiv]](https://arxiv.org/abs/2308.02223)
1. Yu et al. Open, Closed, or Small Language Models for Text Classification? 2023. [[arxiv]](https://arxiv.org/abs/2308.10092)
1. Wang et al. UbiPhysio: Support Daily Functioning, Fitness, and Rehabilitation with Action Understanding and Feedback in Natural Language. 2023. [[arxiv]](https://arxiv.org/abs/2308.10526)
1. Luceri et al. Leveraging Large Language Models to Detect Influence Campaigns in Social Media. 2023. [[arxiv]](https://arxiv.org/abs/2311.07816)
1. Zhang et al. Alleviating Hallucinations of Large Language Models through Induced Hallucinations. 2023. [[arxiv]](https://arxiv.org/abs/2312.15710)
1. Wang et al. Know Your Needs Better: Towards Structured Understanding of Marketer Demands with Analogical Reasoning Augmented LLMs. KDD 2024. [[arxiv]](https://arxiv.org/abs/2401.04319)
1. Wang et al. CANDLE: Iterative Conceptualization and Instantiation Distillation from Large Language Models for Commonsense Reasoning. ACL 2024. [[arxiv]](https://arxiv.org/abs/2401.07286)
1. Choi et al. FACT-GPT: Fact-Checking Augmentation via Claim Matching with LLMs. 2024. [[arxiv]](https://arxiv.org/abs/2402.05904)
1. Zhang et al. AutoMathText: Autonomous Data Selection with Language Models for Mathematical Texts. 2024. [[arxiv]](https://arxiv.org/abs/2402.07625)
1. Lyu et al. KnowTuning: Knowledge-aware Fine-tuning for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.11176)
1. Yang et al. LaCo: Large Language Model Pruning via Layer Collaps. 2024. [[arxiv]](https://arxiv.org/abs/2402.11187)
1. Bhardwaj et al. Language Models are Homer Simpson! Safety Re-Alignment of Fine-tuned Language Models through Task Arithmetic. 2024. [[arxiv]](https://arxiv.org/abs/2402.11746)
1. Yang et al. Enhancing Empathetic Response Generation by Augmenting LLMs with Small-scale Empathetic Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.11801)
1. Yi et al. Generation Meets Verification: Accelerating Large Language Model Inference with Smart Parallel Auto-Correct Decoding. ACL 2024 Findings. [[arxiv]](https://arxiv.org/abs/2402.11809)
1. Cao et al. Head-wise Shareable Attention for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.11819)
1. Zhang et al. Enhancing Multilingual Capabilities of Large Language Models through Self-Distillation from Resource-Rich Languages. 2024. [[arxiv]](https://arxiv.org/abs/2402.12204)
1. Kim et al. Efficient and Effective Vocabulary Expansion Towards Multilingual Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.14714)
1. Yu et al. KIEval: A Knowledge-grounded Interactive Evaluation Framework for Large Language Models. ACL 2024. [[arxiv]](https://arxiv.org/abs/2402.15043)
1. Huang et al. Key-Point-Driven Data Synthesis with its Enhancement on Mathematical Reasoning. 2024. [[arxiv]](https://arxiv.org/abs/2403.02333)
1. Duan et al. Negating Negatives: Alignment without Human Positive Samples via Distributional Dispreference Optimization. 2024. [[arxiv]](https://arxiv.org/abs/2403.03419)
1. Xie and Schwertfeger. Empowering Robotics with Large Language Models: osmAG Map Comprehension with LLMs. 2024. [[arxiv]](https://arxiv.org/abs/2403.08228)
1. Wu et al. Large Language Models are Parallel Multilingual Learners. 2024. [[arxiv]](https://arxiv.org/abs/2403.09073)
1. Zhang et al. EDT: Improving Large Language Models' Generation by Entropy-based Dynamic Temperature Sampling. 2024. [[arxiv]](https://arxiv.org/abs/2403.14541)
1. Weller et al. FollowIR: Evaluating and Teaching Information Retrieval Models to Follow Instructions. 2024. [[arxiv]](https://arxiv.org/abs/2403.15246)
1. Hongbin Na. CBT-LLM: A Chinese Large Language Model for Cognitive Behavioral Therapy-based Mental Health Question Answering. COLING 2024. [[arxiv]](https://arxiv.org/abs/2403.16008)
1. Zan et al. CodeS: Natural Language to Code Repository via Multi-Layer Sketch. 2024. [[arxiv]](https://arxiv.org/abs/2403.16443)
1. Liu et al. Extensive Self-Contrast Enables Feedback-Free Language Model Alignment. 2024. [[arxiv]](https://arxiv.org/abs/2404.00604)
1. Luo et al. BAdam: A Memory Efficient Full Parameter Training Method for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.02827)
1. Du et al. Chinese Tiny LLM: Pretraining a Chinese-Centric Large Language Model. 2024. [[arxiv]](https://arxiv.org/abs/2404.04167)
1. Ma et al. Parameter Efficient Quasi-Orthogonal Fine-Tuning via Givens Rotation. ICML 2024. [[arxiv]](https://arxiv.org/abs/2404.04316)
1. Liu et al. Dynamic Generation of Personalities with Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.07084)
1. Shang et al. How Far Have We Gone in Stripped Binary Code Understanding Using Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.09836)
1. Huang et al. LLMTune: Accelerate Database Knob Tuning with Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.11581)
1. Deng et al. Text-Tuple-Table: Towards Information Integration in Text-to-Table Generation via Global Tuple Extraction. 2024. [[arxiv]](https://arxiv.org/abs/2404.14215)
1. Acikgoz et al. Hippocrates: An Open-Source Framework for Advancing Large Language Models in Healthcare. 2024. [[arxiv]](https://arxiv.org/abs/2404.16621)
1. Zhang et al. Small Language Models Need Strong Verifiers to Self-Correct Reasoning. ACL 2024 Findings. [[arxiv]](https://arxiv.org/abs/2404.17140)
1. Zhou et al. FREB-TQA: A Fine-Grained Robustness Evaluation Benchmark for Table Question Answering. NAACL 2024. [[arxiv]](https://arxiv.org/abs/2404.18585)
1. Xu et al. Large Language Models for Cyber Security: A Systematic Literature Review. 2024. [[arxiv]](https://arxiv.org/abs/2405.04760)
1. Dammu et al. "They are uncultured": Unveiling Covert Harms and Social Threats in LLM Generated Conversations. 2024. [[arxiv]](https://arxiv.org/abs/2405.05378)
1. Yi et al. A safety realignment framework via subspace-oriented model fusion for large language models. 2024. [[arxiv]](https://arxiv.org/abs/2405.09055)
1. Lou et al. SPO: Multi-Dimensional Preference Sequential Alignment With Implicit Reward Modeling. 2024. [[arxiv]](https://arxiv.org/abs/2405.12739)
1. Zhang et al. Getting More from Less: Large Language Models are Good Spontaneous Multilingual Learners. 2024. [[arxiv]](https://arxiv.org/abs/2405.13816)
1. Zhang et al. TS-Align: A Teacher-Student Collaborative Framework for Scalable Iterative Finetuning of Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2405.20215)
1. Zihong Chen. Sentence Segmentation and Sentence Punctuation Based on XunziALLM. 2024. [[paper]](https://aclanthology.org/2024.lt4hala-1.30)
1. Gao et al. The Best of Both Worlds: Toward an Honest and Helpful Large Language Model. 2024. [[arxiv]](https://arxiv.org/abs/2406.00380)
1. Wang and Song. MARS: Benchmarking the Metaphysical Reasoning Abilities of Language Models with a Multi-task Evaluation Dataset. 2024. [[arxiv]](https://arxiv.org/abs/2406.02106)
1. Hu et al. Computational Limits of Low-Rank Adaptation (LoRA) for Transformer-Based Models. 2024. [[arxiv]](https://arxiv.org/abs/2406.03136)
1. Ge et al. Time Sensitive Knowledge Editing through Efficient Finetuning. ACL 2024. [[arxiv]](https://arxiv.org/abs/2406.04496)
1. Tan et al. Peer Review as A Multi-Turn and Long-Context Dialogue with Role-Based Interactions. 2024. [[arxiv]](https://arxiv.org/abs/2406.05688)
1. Song et al. Turbo Sparse: Achieving LLM SOTA Performance with Minimal Activated Parameters. 2024. [[arxiv]](https://arxiv.org/abs/2406.05955)
1. Gu et al. RWKV-CLIP: A Robust Vision-Language Representation Learner. 2024. [[arxiv]](https://arxiv.org/abs/2406.06973)
1. Chen et al. Advancing Tool-Augmented Large Language Models: Integrating Insights from Errors in Inference Trees. 2024. [[arxiv]](https://arxiv.org/abs/2406.07115)
1. Zhu et al. Are Large Language Models Good Statisticians?. 2024. [[arxiv]](https://arxiv.org/abs/2406.07815)
1. Li et al. Know the Unknown: An Uncertainty-Sensitive Method for LLM Instruction Tuning. 2024. [[arxiv]](https://arxiv.org/abs/2406.10099)
1. Ding et al. IntentionQA: A Benchmark for Evaluating Purchase Intention Comprehension Abilities of Language Models in E-commerce. 2024. [[arxiv]](https://arxiv.org/abs/2406.10173)
1. He et al. COMMUNITY-CROSS-INSTRUCT: Unsupervised Instruction Generation for Aligning Large Language Models to Online Communities. 2024. [[arxiv]](https://arxiv.org/abs/2406.12074)
1. Lin et al. FVEL: Interactive Formal Verification Environment with Large Language Models via Theorem Proving. 2024. [[arxiv]](https://arxiv.org/abs/2406.14408)
1. Treutlein et al. Connecting the Dots: LLMs can Infer and Verbalize Latent Structure from Disparate Training Data. 2024. [[arxiv]](https://arxiv.org/abs/2406.14546)
1. Feng et al. SS-Bench: A Benchmark for Social Story Generation and Evaluation. 2024. [[arxiv]](https://arxiv.org/abs/2406.15695)
1. Feng et al. Self-Constructed Context Decompilation with Fined-grained Alignment Enhancement. 2024. [[arxiv]](https://arxiv.org/abs/2406.17233)
1. Liu et al. Large Language Models for Cuffless Blood Pressure Measurement From Wearable Biosignals. 2024. [[arxiv]](https://arxiv.org/abs/2406.18069)
1. Iyer et al. Exploring Very Low-Resource Translation with LLMs: The University of Edinburgh's Submission to AmericasNLP 2024 Translation Task. AmericasNLP 2024. [[paper]](https://aclanthology.org/2024.americasnlp-1.25)
1. Li et al. Calibrating LLMs with Preference Optimization on Thought Trees for Generating Rationale in Science Question Scoring. 2024. [[arxiv]](https://arxiv.org/abs/2406.19949)
1. Yang et al. Financial Knowledge Large Language Model. 2024. [[arxiv]](https://arxiv.org/abs/2407.00365)
1. Lin et al. DogeRM: Equipping Reward Models with Domain Knowledge through Model Merging. 2024. [[arxiv]](https://arxiv.org/abs/2407.01470)
1. Bako et al. Evaluating the Semantic Profiling Abilities of LLMs for Natural Language Utterances in Data Visualization. 2024. [[arxiv]](https://arxiv.org/abs/2407.06129)
1. Huang et al. RoLoRA: Fine-tuning Rotated Outlier-free LLMs for Effective Weight-Activation Quantization. 2024. [[arxiv]](https://arxiv.org/abs/2407.08044)
1. Jiang et al. LLM-Collaboration on Automatic Science Journalism for the General Audience. 2024. [[arxiv]](https://arxiv.org/abs/2407.09756)
1. Inouye et al. Applied Auto-tuning on LoRA Hyperparameters. 2024. [[paper]](https://scholarcommons.scu.edu/cseng_senior/272/)
1. Qi et al. Research on Tibetan Tourism Viewpoints information generation system based on LLM. 2024. [[arxiv]](https://arxiv.org/abs/2407.13561)
1. Xu et al. Course-Correction: Safety Alignment Using Synthetic Preferences. 2024. [[arxiv]](https://arxiv.org/abs/2407.16637)
1. Sun et al. LAMBDA: A Large Model Based Data Agent. 2024. [[arxiv]](https://arxiv.org/abs/2407.17535)
1. Zhu et al. CollectiveSFT: Scaling Large Language Models for Chinese Medical Benchmark with Collective Instructions in Healthcare. 2024. [[arxiv]](https://arxiv.org/abs/2407.19705)
1. Yu et al. Correcting Negative Bias in Large Language Models through Negative Attention Score Alignment. 2024. [[arxiv]](https://arxiv.org/abs/2408.00137)
1. Xie et al. The Power of Personalized Datasets: Advancing Chinese Composition Writing for Elementary School through Targeted Model Fine-Tuning. IALP 2024. [[paper]](https://www.asianlp.sg/conferences/ialp2024/proceedings/papers/IALP2024_P055.pdf)
1. Liu et al. Instruct-Code-Llama: Improving Capabilities of Language Model in Competition Level Code Generation by Online Judge Feedback. ICIC 2024. [[paper]](https://link.springer.com/chapter/10.1007/978-981-97-5669-8_11)
1. Wang et al. Cybernetic Sentinels: Unveiling the Impact of Safety Data Selection on Model Security in Supervised Fine-Tuning. ICIC 2024. [[paper]](https://link.springer.com/chapter/10.1007/978-981-97-5669-8_23)
1. Xia et al. Understanding the Performance and Estimating the Cost of LLM Fine-Tuning. 2024. [[arxiv]](https://arxiv.org/abs/2408.04693)
1. Zeng et al. Perceive, Reflect, and Plan: Designing LLM Agent for Goal-Directed City Navigation without Instructions. 2024. [[arxiv]](https://arxiv.org/abs/2408.04168)
1. Xia et al. Using Pre-trained Language Model for Accurate ESG Prediction. FinNLP 2024. [[paper]](https://aclanthology.org/2024.finnlp-2.1/)
1. Liang et al. I-SHEEP: Self-Alignment of LLM from Scratch through an Iterative Self-Enhancement Paradigm. 2024. [[arxiv]](https://arxiv.org/abs/2408.08072)
1. Bai et al. Aligning Large Language Model with Direct Multi-Preference Optimization for Recommendation. CIKM 2024. [[paper]](https://dl.acm.org/doi/10.1145/3627673.3679611)
1. Zhang et al. CPsyCoun: A Report-based Multi-turn Dialogue Reconstruction and Evaluation Framework for Chinese Psychological Counseling. ACL 2024. [[paper]](https://aclanthology.org/2024.findings-acl.830.pdf)
1. **[StarWhisper](https://github.com/Yu-Yang-Li/StarWhisper)**: A large language model for Astronomy, based on ChatGLM2-6B and Qwen-14B.
1. **[DISC-LawLLM](https://github.com/FudanDISC/DISC-LawLLM)**: A large language model specialized in Chinese legal domain, based on Baichuan-13B, is capable of retrieving and reasoning on legal knowledge.
1. **[Sunsimiao](https://github.com/X-D-Lab/Sunsimiao)**: A large language model specialized in Chinese medical domain, based on Baichuan-7B and ChatGLM-6B.
1. **[CareGPT](https://github.com/WangRongsheng/CareGPT)**: A series of large language models for Chinese medical domain, based on LLaMA2-7B and Baichuan-13B.
1. **[MachineMindset](https://github.com/PKU-YuanGroup/Machine-Mindset/)**: A series of MBTI Personality large language models, capable of giving any LLM 16 different personality types based on different datasets and training methods.
1. **[Luminia-13B-v3](https://huggingface.co/Nekochu/Luminia-13B-v3)**: A large language model specialized in generate metadata for stable diffusion. [[demo]](https://huggingface.co/spaces/Nekochu/Luminia-13B_SD_Prompt)
1. **[Chinese-LLaVA-Med](https://github.com/BUAADreamer/Chinese-LLaVA-Med)**: A multimodal large language model specialized in Chinese medical domain, based on LLaVA-1.5-7B.
1. **[AutoRE](https://github.com/THUDM/AutoRE)**: A document-level relation extraction system based on large language models.
1. **[NVIDIA RTX AI Toolkit](https://github.com/NVIDIA/RTX-AI-Toolkit)**: SDKs for fine-tuning LLMs on Windows PC for NVIDIA RTX.
1. **[LazyLLM](https://github.com/LazyAGI/LazyLLM)**: An easy and lazy way for building multi-agent LLMs applications and supports model fine-tuning via LLaMA Factory.
1. **[RAG-Retrieval](https://github.com/NLPJCL/RAG-Retrieval)**: A full pipeline for RAG retrieval model fine-tuning, inference, and distillation. [[blog]](https://zhuanlan.zhihu.com/p/987727357)
1. **[360-LLaMA-Factory](https://github.com/Qihoo360/360-LLaMA-Factory)**: A modified library that supports long sequence SFT & DPO using ring attention.
1. **[Sky-T1](https://novasky-ai.github.io/posts/sky-t1/)**: An o1-like model fine-tuned by NovaSky AI with very small cost.
1. **[WeClone](https://github.com/xming521/WeClone)**: One-stop solution for creating your digital avatar from chat logs.
1. **[EmoLLM](https://github.com/SmartFlowAI/EmoLLM)**: A project about large language models (LLMs) and mental health.
</details>
## License
This repository is licensed under the [Apache-2.0 License](LICENSE).
Please follow the model licenses to use the corresponding model weights: [BLOOM](https://huggingface.co/spaces/bigscience/license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [GLM-4](https://huggingface.co/THUDM/glm-4-9b/blob/main/LICENSE) / [GPT-2](https://github.com/openai/gpt-2/blob/master/LICENSE) / [Granite](LICENSE) / [InternLM](https://github.com/InternLM/InternLM#license) / [Llama](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [Llama 2](https://ai.meta.com/llama/license/) / [Llama 3](https://llama.meta.com/llama3/license/) / [Llama 4](https://github.com/meta-llama/llama-models/blob/main/models/llama4/LICENSE) / [MiniCPM](https://github.com/OpenBMB/MiniCPM/blob/main/MiniCPM%20Model%20License.md) / [Mistral/Mixtral/Pixtral](LICENSE) / [Phi-3/Phi-4](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder 2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [TeleChat2](https://huggingface.co/Tele-AI/telechat-7B/blob/main/TeleChat%E6%A8%A1%E5%9E%8B%E7%A4%BE%E5%8C%BA%E8%AE%B8%E5%8F%AF%E5%8D%8F%E8%AE%AE.pdf) / [Yuan 2](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan)
## Citation
If this work is helpful, please kindly cite as:
```bibtex
@inproceedings{zheng2024llamafactory,
title={LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models},
author={Yaowei Zheng and Richong Zhang and Junhao Zhang and Yanhan Ye and Zheyan Luo and Zhangchi Feng and Yongqiang Ma},
booktitle={Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)},
address={Bangkok, Thailand},
publisher={Association for Computational Linguistics},
year={2024},
url={http://arxiv.org/abs/2403.13372}
}
```
## Acknowledgement
This repo benefits from [PEFT](https://github.com/huggingface/peft), [TRL](https://github.com/huggingface/trl), [QLoRA](https://github.com/artidoro/qlora) and [FastChat](https://github.com/lm-sys/FastChat). Thanks for their wonderful works.
## Star History

================================================
FILE: README_zh.md
================================================

[](https://github.com/hiyouga/LLaMA-Factory/stargazers)
[](https://github.com/hiyouga/LLaMA-Factory/commits/main)
[](https://github.com/hiyouga/LLaMA-Factory/graphs/contributors)
[](https://github.com/hiyouga/LLaMA-Factory/actions/workflows/tests.yml)
[](https://pypi.org/project/llamafactory/)
[](https://scholar.google.com/scholar?cites=12620864006390196564)
[](https://hub.docker.com/r/hiyouga/llamafactory/tags)
[](https://twitter.com/llamafactory_ai)
[](https://discord.gg/rKfvV9r9FK)
[](https://github.com/hiyouga/llamafactory-community)
[](https://blog.llamafactory.net/)
[](https://colab.research.google.com/drive/1d5KQtbemerlSDSxZIfAaWXhKr30QypiK?usp=sharing)
[](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory)
[](https://www.lab4ai.cn/course/detail?id=7c13e60f6137474eb40f6fd3983c0f46&utm_source=LLaMA-Factory)
[](https://www.llamafactory.com.cn/?utm_source=LLaMA-Factory)
[](https://huggingface.co/spaces/hiyouga/LLaMA-Board)
[](https://modelscope.cn/studios/hiyouga/LLaMA-Board)
[](https://novita.ai/templates-library/105981?sharer=88115474-394e-4bda-968e-b88e123d0c47)
### 获得[亚马逊](https://aws.amazon.com/cn/blogs/china/a-one-stop-code-free-model-fine-tuning-deployment-platform-based-on-sagemaker-and-llama-factory/)、[英伟达](https://developer.nvidia.cn/rtx/ai-toolkit)、[阿里云](https://help.aliyun.com/zh/pai/use-cases/fine-tune-a-llama-3-model-with-llama-factory)等的应用。
<div align="center" markdown="1">
### 赞助商 ❤️
| <div style="text-align: center;"><a href="https://warp.dev/llama-factory"><img alt="Warp sponsorship" width="400" src="assets/sponsors/warp.jpg"></a><br><a href="https://warp.dev/llama-factory" style="font-size:larger;">Warp,面向开发者的智能终端</a><br><a href="https://warp.dev/llama-factory">适用于 MacOS、Linux 和 Windows</a> | <a href="https://serpapi.com"><img alt="SerpAPI sponsorship" width="250" src="assets/sponsors/serpapi.svg"> </a> |
| ---- | ---- |
----
### 使用零代码[命令行](#快速开始)与 [Web UI](#llama-board-可视化微调由-gradio-驱动) 轻松微调百余种大模型

</div>
👋 加入我们的[微信群](https://github.com/hiyouga/llamafactory-community/blob/main/wechat/main.jpg)、[NPU 用户群](https://github.com/hiyouga/llamafactory-community/blob/main/wechat/npu.jpg)、[大模型实验室群](https://github.com/hiyouga/llamafactory-community/blob/main/wechat/lab4ai.jpg) 或 [LLaMA Factory Online 用户群](https://github.com/hiyouga/llamafactory-community/blob/main/wechat/online.png)。
\[ [English](README.md) | 中文 \]
**微调大模型可以像这样轻松…**
https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
开始本地训练:
- 请见[如何使用](#如何使用)
开始云端训练:
- **Colab(免费)**:https://colab.research.google.com/drive/1d5KQtbemerlSDSxZIfAaWXhKr30QypiK?usp=sharing
- **PAI-DSW(免费试用)**:https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory
- **LLaMA Factory Online(在线微调)**:https://www.llamafactory.com.cn/?utm_source=LLaMA-Factory
- **九章智算云(算力优惠活动)**:https://docs.alayanew.com/docs/documents/useGuide/LLaMAFactory/mutiple/?utm_source=LLaMA-Factory
阅读技术文档:
- **入门教程**:https://zhuanlan.zhihu.com/p/695287607
- **微调视频教程**:https://www.bilibili.com/video/BV1djgRzxEts/
- **框架文档**:https://llamafactory.readthedocs.io/zh-cn/latest/
- **框架文档(昇腾 NPU)**:https://ascend.github.io/docs/sources/llamafactory/
- **官方博客**:https://blog.llamafactory.net/
- **官方课程**:https://www.lab4ai.cn/course/detail?id=7c13e60f6137474eb40f6fd3983c0f46&utm_source=LLaMA-Factory
> [!NOTE]
> 除上述链接以外的其他网站均为未经许可的第三方网站,请小心甄别。
## 目录
- [项目特色](#项目特色)
- [官方博客](#官方博客)
- [更新日志](#更新日志)
- [模型](#模型)
- [训练方法](#训练方法)
- [数据集](#数据集)
- [软硬件依赖](#软硬件依赖)
- [如何使用](#如何使用)
- [安装 LLaMA Factory](#安装-llama-factory)
- [数据准备](#数据准备)
- [快速开始](#快速开始)
- [LLaMA Board 可视化微调](#llama-board-可视化微调由-gradio-驱动)
- [LLaMA Factory Online 在线微调](#llama-factory-online-在线微调)
- [构建 Docker](#构建-docker)
- [利用 vLLM 部署 OpenAI API](#利用-vllm-部署-openai-api)
- [从魔搭社区下载](#从魔搭社区下载)
- [从魔乐社区下载](#从魔乐社区下载)
- [使用 W&B 面板](#使用-wb-面板)
- [使用 SwanLab 面板](#使用-swanlab-面板)
- [使用了 LLaMA Factory 的项目](#使用了-llama-factory-的项目)
- [协议](#协议)
- [引用](#引用)
- [致谢](#致谢)
## 项目特色
- **多种模型**:LLaMA、LLaVA、Mistral、Mixtral-MoE、Qwen3、Qwen3-VL、DeepSeek、Gemma、GLM、Phi 等等。
- **集成方法**:(增量)预训练、(多模态)指令监督微调、奖励模型训练、PPO 训练、DPO 训练、KTO 训练、ORPO 训练等等。
- **多种精度**:16 比特全参数微调、冻结微调、LoRA 微调和基于 AQLM/AWQ/GPTQ/LLM.int8/HQQ/EETQ 的 2/3/4/5/6/8 比特 QLoRA 微调。
- **先进算法**:[GaLore](https://github.com/jiaweizzhao/GaLore)、[BAdam](https://github.com/Ledzy/BAdam)、[APOLLO](https://github.com/zhuhanqing/APOLLO)、[Adam-mini](https://github.com/zyushun/Adam-mini)、[Muon](https://github.com/KellerJordan/Muon)、[OFT](https://github.com/huggingface/peft/tree/main/src/peft/tuners/oft)、DoRA、LongLoRA、LLaMA Pro、Mixture-of-Depths、LoRA+、LoftQ 和 PiSSA。
- **实用技巧**:[FlashAttention-2](https://github.com/Dao-AILab/flash-attention)、[Unsloth](https://github.com/unslothai/unsloth)、[Liger Kernel](https://github.com/linkedin/Liger-Kernel)、[KTransformers](https://github.com/kvcache-ai/ktransformers/)、RoPE scaling、NEFTune 和 rsLoRA。
- **广泛任务**:多轮对话、工具调用、图像理解、视觉定位、视频识别和语音理解等等。
- **实验监控**:LlamaBoard、TensorBoard、Wandb、MLflow、[SwanLab](https://github.com/SwanHubX/SwanLab) 等等。
- **极速推理**:基于 [vLLM](https://github.com/vllm-project/vllm) 或 [SGLang](https://github.com/sgl-project/sglang) 的 OpenAI 风格 API、浏览器界面和命令行接口。
### 最新模型的 Day-N 微调适配
| 适配时间 | 模型名称 |
| ------------ | -------------------------------------------------------------------- |
| Day 0 | Qwen3 / Qwen2.5-VL / Gemma 3 / GLM-4.1V / InternLM 3 / MiniCPM-o-2.6 |
| Day 1 | Llama 3 / GLM-4 / Mistral Small / PaliGemma2 / Llama 4 |
## 官方博客
> [!TIP]
> 我们现在拥有了 LLaMA Factory 的专属博客!
>
> 网站地址:https://blog.llamafactory.net/
- 💡 [KTransformers Fine-Tuning × LLaMA Factory: 用2张4090级的GPU+CPU 微调 1000B规模的超大模型](https://swcil84qspu.feishu.cn/wiki/Z1sSwb2poijybxkyPEkcDG6enVc) (中文)
- 💡 [Easy Dataset × LLaMA Factory: 让大模型高效学习领域知识](https://buaa-act.feishu.cn/wiki/KY9xwTGs1iqHrRkjXBwcZP9WnL9)(中文)
- [使用 LLaMA-Factory 微调心理健康大模型](https://www.lab4ai.cn/project/detail?id=25cce32ec131497b9e06a93336a0817f&type=project&utm_source=LLaMA-Factory)(中文)
- [使用 LLaMA-Factory 构建 GPT-OSS 角色扮演模型](https://docs.llamafactory.com.cn/docs/documents/best-practice/gptroleplay/?utm_source=LLaMA-Factory)(中文)
- [基于 LLaMA-Factory 和 EasyR1 打造一站式无代码大模型强化学习和部署平台 LLM Model Hub](https://aws.amazon.com/cn/blogs/china/building-llm-model-hub-based-on-llamafactory-and-easyr1/)(中文)
- [通过亚马逊 SageMaker HyperPod 上的 LLaMA-Factory 增强多模态模型银行文档的视觉信息提取](https://aws.amazon.com/cn/blogs/machine-learning/how-apoidea-group-enhances-visual-information-extraction-from-banking-documents-with-multimodal-models-using-llama-factory-on-amazon-sagemaker-hyperpod/)(英文)
<details><summary>全部博客</summary>
- [使用 LLaMA-Factory 微调 Llama3.1-70B 医学诊断模型](https://docs.alayanew.com/docs/documents/bestPractice/bigModel/llama70B/?utm_source=LLaMA-Factory)(中文)
- [使用 LLaMA-Factory 微调 Qwen2.5-VL 实现自动驾驶场景微调](https://docs.alayanew.com/docs/documents/useGuide/LLaMAFactory/mutiple/?utm_source=LLaMA-Factory)(中文)
- [LLaMA Factory:微调 DeepSeek-R1-Distill-Qwen-7B 模型实现新闻标题分类器](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory_deepseek_r1_distill_7b)(中文)
- [基于 Amazon SageMaker 和 LLaMA-Factory 打造一站式无代码模型微调部署平台 Model Hub](https://aws.amazon.com/cn/blogs/china/a-one-stop-code-free-model-fine-tuning-deployment-platform-based-on-sagemaker-and-llama-factory/)(中文)
- [LLaMA Factory 多模态微调实践:微调 Qwen2-VL 构建文旅大模型](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory_qwen2vl)(中文)
- [LLaMA Factory:微调 Llama3 模型实现角色扮演](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory)(中文)
</details>
## 更新日志
[25/10/26] 我们支持了Megatron-core作为训练后端和适配了[**mcore_adapter**](https://github.com/alibaba/ROLL/tree/main/mcore_adapter)。查看[PR #9237](https://github.com/hiyouga/LLaMA-Factory/pull/9237)以使用。
[25/08/22] 我们支持了 **[OFT](https://arxiv.org/abs/2306.07280)** 和 **[OFTv2](https://arxiv.org/abs/2506.19847)** 模型的微调。查看 [examples](examples/README.md) 以使用。
[25/08/20] 我们支持了 **[Intern-S1-mini](https://huggingface.co/internlm/Intern-S1-mini)** 模型的微调。查看 [PR #8976](https://github.com/hiyouga/LLaMA-Factory/pull/8976) 以使用。
[25/08/06] 我们支持了 **[GPT-OSS](https://github.com/openai/gpt-oss)** 模型的微调。查看 [PR #8826](https://github.com/hiyouga/LLaMA-Factory/pull/8826) 以使用。
<details><summary>展开日志</summary>
[25/07/02] 我们支持了 **[GLM-4.1V-9B-Thinking](https://github.com/THUDM/GLM-4.1V-Thinking)** 模型的微调。
[25/04/28] 我们支持了 **[Qwen3](https://qwenlm.github.io/blog/qwen3/)** 系列模型的微调。
[25/04/21] 我们支持了 **[Muon](https://github.com/KellerJordan/Muon)** 优化器。详细用法请参照 [examples](examples/README_zh.md)。感谢 [@tianshijing](https://github.com/tianshijing) 的 PR。
[25/04/16] 我们支持了 **[InternVL3](https://huggingface.co/OpenGVLab/InternVL3-8B)** 模型的微调。查看 [PR #7258](https://github.com/hiyouga/LLaMA-Factory/pull/7258) 以使用。
[25/04/14] 我们支持了 **[GLM-Z1](https://huggingface.co/THUDM/GLM-Z1-9B-0414)** 和 **[Kimi-VL](https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct)** 模型的微调。
[25/04/06] 我们支持了 **[Llama 4](https://ai.meta.com/blog/llama-4-multimodal-intelligence/)** 模型的微调。查看 [PR #7611](https://github.com/hiyouga/LLaMA-Factory/pull/7611) 以使用。
[25/03/31] 我们支持了 **[Qwen2.5 Omni](https://qwenlm.github.io/blog/qwen2.5-omni/)** 模型的微调。查看 [PR #7537](https://github.com/hiyouga/LLaMA-Factory/pull/7537) 以使用。
[25/03/15] 我们支持了 **[SGLang](https://github.com/sgl-project/sglang)** 推理后端,请使用 `infer_backend: sglang` 启用。
[25/03/12] 我们支持了 **[Gemma 3](https://huggingface.co/blog/gemma3)** 模型的微调。
[25/02/24] 我们宣布开源 **[EasyR1](https://github.com/hiyouga/EasyR1)**,一个高效可扩展的多模态强化学习框架,支持高效的 GRPO 训练。
[25/02/11] 我们支持了在导出模型时保存 **[Ollama](https://github.com/ollama/ollama)** 配置文件。详细用法请参照 [examples](examples/README_zh.md)。
[25/02/05] 我们支持了在语音理解任务上微调 **[Qwen2-Audio](Qwen/Qwen2-Audio-7B-Instruct)** 和 **[MiniCPM-o-2.6](https://huggingface.co/openbmb/MiniCPM-o-2_6)** 模型。
[25/01/31] 我们支持了 **[DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)** 和 **[Qwen2.5-VL](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct)** 模型的微调。
[25/01/15] 我们支持了 **[APOLLO](https://arxiv.org/abs/2412.05270)** 优化器。详细用法请参照 [examples](examples/README_zh.md)。
[25/01/14] 我们支持了 **[MiniCPM-o-2.6](https://huggingface.co/openbmb/MiniCPM-o-2_6)** 和 **[MiniCPM-V-2.6](https://huggingface.co/openbmb/MiniCPM-V-2_6)** 模型的微调。 感谢 [@BUAADreamer](https://github.com/BUAADreamer) 的 PR.
[25/01/14] 我们支持了 **[InternLM 3](https://huggingface.co/collections/internlm/)** 模型的微调。感谢 [@hhaAndroid](https://github.com/hhaAndroid) 的 PR。
[25/01/10] 我们支持了 **[Phi-4](https://huggingface.co/microsoft/phi-4)** 模型的微调。
[24/12/21] 我们支持了使用 **[SwanLab](https://github.com/SwanHubX/SwanLab)** 跟踪与可视化实验。详细用法请参考 [此部分](#使用-swanlab-面板)。
[24/11/27] 我们支持了 **[Skywork-o1](https://huggingface.co/Skywork/Skywork-o1-Open-Llama-3.1-8B)** 模型的微调和 **[OpenO1](https://huggingface.co/datasets/O1-OPEN/OpenO1-SFT)** 数据集。
[24/10/09] 我们支持了从 **[魔乐社区](https://modelers.cn/models)** 下载预训练模型和数据集。详细用法请参照 [此教程](#从魔乐社区下载)。
[24/09/19] 我们支持了 **[Qwen2.5](https://qwenlm.github.io/blog/qwen2.5/)** 模型的微调。
[24/08/30] 我们支持了 **[Qwen2-VL](https://qwenlm.github.io/blog/qwen2-vl/)** 模型的微调。感谢 [@simonJJJ](https://github.com/simonJJJ) 的 PR。
[24/08/27] 我们支持了 **[Liger Kernel](https://github.com/linkedin/Liger-Kernel)**。请使用 `enable_liger_kernel: true` 来加速训练。
[24/08/09] 我们支持了 **[Adam-mini](https://github.com/zyushun/Adam-mini)** 优化器。详细用法请参照 [examples](examples/README_zh.md)。感谢 [@relic-yuexi](https://github.com/relic-yuexi) 的 PR。
[24/07/04] 我们支持了[无污染打包训练](https://github.com/MeetKai/functionary/tree/main/functionary/train/packing)。请使用 `neat_packing: true` 参数。感谢 [@chuan298](https://github.com/chuan298) 的 PR。
[24/06/16] 我们支持了 **[PiSSA](https://arxiv.org/abs/2404.02948)** 算法。详细用法请参照 [examples](examples/README_zh.md)。
[24/06/07] 我们支持了 **[Qwen2](https://qwenlm.github.io/blog/qwen2/)** 和 **[GLM-4](https://github.com/THUDM/GLM-4)** 模型的微调。
[24/05/26] 我们支持了 **[SimPO](https://arxiv.org/abs/2405.14734)** 偏好对齐算法。详细用法请参照 [examples](examples/README_zh.md)。
[24/05/20] 我们支持了 **PaliGemma** 系列模型的微调。注意 PaliGemma 是预训练模型,你需要使用 `paligemma` 模板进行微调使其获得对话能力。
[24/05/18] 我们支持了 **[KTO](https://arxiv.org/abs/2402.01306)** 偏好对齐算法。详细用法请参照 [examples](examples/README_zh.md)。
[24/05/14] 我们支持了昇腾 NPU 设备的训练和推理。详情请查阅[安装](#安装-llama-factory)部分。
[24/04/26] 我们支持了多模态模型 **LLaVA-1.5** 的微调。详细用法请参照 [examples](examples/README_zh.md)。
[24/04/22] 我们提供了在免费 T4 GPU 上微调 Llama-3 模型的 **[Colab 笔记本](https://colab.research.google.com/drive/1d5KQtbemerlSDSxZIfAaWXhKr30QypiK?usp=sharing)**。Hugging Face 社区公开了两个利用 LLaMA Factory 微调的 Llama-3 模型,详情请见 [Llama3-8B-Chinese-Chat](https://huggingface.co/shenzhi-wang/Llama3-8B-Chinese-Chat) 和 [Llama3-Chinese](https://huggingface.co/zhichen/Llama3-Chinese)。
[24/04/21] 我们基于 [AstraMindAI 的仓库](https://github.com/astramind-ai/Mixture-of-depths)支持了 **[混合深度训练](https://arxiv.org/abs/2404.02258)**。详细用法请参照 [examples](examples/README_zh.md)。
[24/04/16] 我们支持了 **[BAdam](https://arxiv.org/abs/2404.02827)** 优化器。详细用法请参照 [examples](examples/README_zh.md)。
[24/04/16] 我们支持了 **[unsloth](https://github.com/unslothai/unsloth)** 的长序列训练(24GB 可训练 Llama-2-7B-56k)。该方法相比 FlashAttention-2 提供了 **117%** 的训练速度和 **50%** 的显存节约。更多数据请见[此页面](https://github.com/hiyouga/LLaMA-Factory/wiki/Performance-comparison)。
[24/03/31] 我们支持了 **[ORPO](https://arxiv.org/abs/2403.07691)**。详细用法请参照 [examples](examples/README_zh.md)。
[24/03/21] 我们的论文 "[LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models](https://arxiv.org/abs/2403.13372)" 可在 arXiv 上查看!
[24/03/20] 我们支持了能在 2x24GB GPU 上微调 70B 模型的 **FSDP+QLoRA**。详细用法请参照 [examples](examples/README_zh.md)。
[24/03/13] 我们支持了 **[LoRA+](https://arxiv.org/abs/2402.12354)**。详细用法请参照 [examples](examples/README_zh.md)。
[24/03/07] 我们支持了 **[GaLore](https://arxiv.org/abs/2403.03507)** 优化器。详细用法请参照 [examples](examples/README_zh.md)。
[24/03/07] 我们集成了 **[vLLM](https://github.com/vllm-project/vllm)** 以实现极速并发推理。请使用 `infer_backend: vllm` 来获得 **270%** 的推理速度。
[24/02/28] 我们支持了 **[DoRA](https://arxiv.org/abs/2402.09353)** 微调。请使用 `use_dora: true` 参数进行 DoRA 微调。
[24/02/15] 我们支持了 [LLaMA Pro](https://github.com/TencentARC/LLaMA-Pro) 提出的**块扩展**方法。详细用法请参照 [examples](examples/README_zh.md)。
[24/02/05] Qwen1.5(Qwen2 测试版)系列模型已在 LLaMA-Factory 中实现微调支持。详情请查阅该[博客页面](https://qwenlm.github.io/zh/blog/qwen1.5/)。
[24/01/18] 我们针对绝大多数模型实现了 **Agent 微调**,微调时指定 `dataset: glaive_toolcall_zh` 即可使模型获得工具调用能力。
[23/12/23] 我们针对 LLaMA, Mistral 和 Yi 模型支持了 **[unsloth](https://github.com/unslothai/unsloth)** 的 LoRA 训练加速。请使用 `use_unsloth: true` 参数启用 unsloth 优化。该方法可提供 **170%** 的训练速度,详情请查阅[此页面](https://github.com/hiyouga/LLaMA-Factory/wiki/Performance-comparison)。
[23/12/12] 我们支持了微调最新的混合专家模型 **[Mixtral 8x7B](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)**。硬件需求请查阅[此处](#硬件依赖)。
[23/12/01] 我们支持了从 **[魔搭社区](https://modelscope.cn/models)** 下载预训练模型和数据集。详细用法请参照 [此教程](#从魔搭社区下载)。
[23/10/21] 我们支持了 **[NEFTune](https://arxiv.org/abs/2310.05914)** 训练技巧。请使用 `neftune_noise_alpha: 5` 参数启用 NEFTune。
[23/09/27] 我们针对 LLaMA 模型支持了 [LongLoRA](https://github.com/dvlab-research/LongLoRA) 提出的 **$S^2$-Attn**。请使用 `shift_attn: true` 参数以启用该功能。
[23/09/23] 我们在项目中集成了 MMLU、C-Eval 和 CMMLU 评估集。详细用法请参照 [examples](examples/README_zh.md)。
[23/09/10] 我们支持了 **[FlashAttention-2](https://github.com/Dao-AILab/flash-attention)**。如果您使用的是 RTX4090、A100 或 H100 GPU,请使用 `flash_attn: fa2` 参数以启用 FlashAttention-2。
[23/08/12] 我们支持了 **RoPE 插值**来扩展 LLaMA 模型的上下文长度。请使用 `rope_scaling: linear` 参数训练模型或使用 `rope_scaling: dynamic` 参数评估模型。
[23/08/11] 我们支持了指令模型的 **[DPO 训练](https://arxiv.org/abs/2305.18290)**。详细用法请参照 [examples](examples/README_zh.md)。
[23/07/31] 我们支持了**数据流式加载**。请使用 `streaming: true` 和 `max_steps: 10000` 参数来流式加载数据集。
[23/07/29] 我们在 Hugging Face 发布了两个 13B 指令微调模型。详细内容请查阅我们的 Hugging Face 项目([LLaMA-2](https://huggingface.co/hiyouga/Llama-2-Chinese-13b-chat) / [Baichuan](https://huggingface.co/hiyouga/Baichuan-13B-sft))。
[23/07/18] 我们开发了支持训练和测试的**浏览器一体化界面**。请使用 `train_web.py` 在您的浏览器中微调模型。感谢 [@KanadeSiina](https://github.com/KanadeSiina) 和 [@codemayq](https://github.com/codemayq) 在该功能开发中付出的努力。
[23/07/09] 我们开源了 **[FastEdit](https://github.com/hiyouga/FastEdit)** ⚡🩹,一个简单易用的、能迅速编辑大模型事实记忆的工具包。如果您感兴趣请关注我们的 [FastEdit](https://github.com/hiyouga/FastEdit) 项目。
[23/06/29] 我们提供了一个**可复现的**指令模型微调示例,详细内容请查阅 [Baichuan-7B-sft](https://huggingface.co/hiyouga/Baichuan-7B-sft)。
[23/06/22] 我们对齐了[示例 API](src/api_demo.py) 与 [OpenAI API](https://platform.openai.com/docs/api-reference/chat) 的格式,您可以将微调模型接入**任意基于 ChatGPT 的应用**中。
[23/06/03] 我们实现了 4 比特的 LoRA 训练(也称 **[QLoRA](https://github.com/artidoro/qlora)**)。详细用法请参照 [examples](examples/README_zh.md)。
</details>
> [!TIP]
> 如果您无法使用最新的功能,请尝试重新拉取代码并再次安装 LLaMA-Factory。
## 模型
| 模型名 | 参数量 | Template |
| ----------------------------------------------------------------- | -------------------------------- | -------------------- |
| [BLOOM/BLOOMZ](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - |
| [DeepSeek (LLM/Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek |
| [DeepSeek 3-3.2](https://huggingface.co/deepseek-ai) | 236B/671B | deepseek3 |
| [DeepSeek R1 (Distill)](https://huggingface.co/deepseek-ai) | 1.5B/7B/8B/14B/32B/70B/671B | deepseekr1 |
| [ERNIE-4.5](https://huggingface.co/baidu) | 0.3B/21B/300B | ernie_nothink |
| [Falcon/Falcon H1](https://huggingface.co/tiiuae) | 0.5B/1.5B/3B/7B/11B/34B/40B/180B | falcon/falcon_h1 |
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma/gemma2 |
| [Gemma 3/Gemma 3n](https://huggingface.co/google) | 270M/1B/4B/6B/8B/12B/27B | gemma3/gemma3n |
| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/zai-org) | 9B/32B | glm4/glmz1 |
| [GLM-4.5/GLM-4.5(6)V](https://huggingface.co/zai-org) | 9B/106B/355B | glm4_moe/glm4_5v |
| [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - |
| [GPT-OSS](https://huggingface.co/openai) | 20B/120B | gpt_oss |
| [Granite 3-4](https://huggingface.co/ibm-granite) | 1B/2B/3B/7B/8B | granite3/granite4 |
| [Hunyuan/Hunyuan1.5 (MT)](https://huggingface.co/tencent/) | 0.5B/1.8B/4B/7B/13B | hunyuan/hunyuan_small|
| [InternLM 2-3](https://huggingface.co/internlm) | 7B/8B/20B | intern2 |
| [InternVL 2.5-3.5](https://huggingface.co/OpenGVLab) | 1B/2B/4B/8B/14B/30B/38B/78B/241B | intern_vl |
| [Intern-S1-mini](https://huggingface.co/internlm/) | 8B | intern_s1 |
| [Kimi-VL](https://huggingface.co/moonshotai) | 16B | kimi_vl |
| [Ling 2.0 (mini/flash)](https://huggingface.co/inclusionAI) | 16B/100B | bailing_v2 |
| [LFM 2.5 (VL)](https://huggingface.co/LiquidAI) | 1.2B/1.6B | lfm2/lfm2_vl |
| [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |
| [Llama 2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 |
| [Llama 3-3.3](https://huggingface.co/meta-llama) | 1B/3B/8B/70B | llama3 |
| [Llama 4](https://huggingface.co/meta-llama) | 109B/402B | llama4 |
| [Llama 3.2 Vision](https://huggingface.co/meta-llama) | 11B/90B | mllama |
| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | llava |
| [LLaVA-NeXT](https://huggingface.co/llava-hf) | 7B/8B/13B/34B/72B/110B | llava_next |
| [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video |
| [MiMo](https://huggingface.co/XiaomiMiMo) | 7B/309B | mimo/mimo_v2 |
| [MiniCPM 4](https://huggingface.co/openbmb) | 0.5B/8B | cpm4 |
| [MiniCPM-o/MiniCPM-V 4.5](https://huggingface.co/openbmb) | 8B/9B | minicpm_o/minicpm_v |
| [MiniMax-M1/MiniMax-M2](https://huggingface.co/MiniMaxAI/models) | 229B/456B | minimax1/minimax2 |
| [Ministral 3](https://huggingface.co/mistralai) | 3B/8B/14B | ministral3 |
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
| [PaliGemma/PaliGemma2](https://huggingface.co/google) | 3B/10B/28B | paligemma |
| [Phi-3/Phi-3.5](https://huggingface.co/microsoft) | 4B/14B | phi |
| [Phi-3-small](https://huggingface.co/microsoft) | 7B | phi_small |
| [Phi-4-mini/Phi-4](https://huggingface.co/microsoft) | 3.8B/14B | phi4_mini/phi4 |
| [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral |
| [Qwen2 (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen |
| [Qwen3 (MoE/Instruct/Thinking/Next)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/80B/235B | qwen3/qwen3_nothink |
| [Qwen3.5](https://huggingface.co/Qwen) | 0.8B/2B/4B/9B/27B/35B/122B/397B | qwen3_5 |
| [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio |
| [Qwen2.5-Omni](https://huggingface.co/Qwen) | 3B/7B | qwen2_omni |
| [Qwen3-Omni](https://huggingface.co/Qwen) | 30B | qwen3_omni |
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl |
| [Qwen3-VL](https://huggingface.co/Qwen) | 2B/4B/8B/30B/32B/235B | qwen3_vl |
| [Seed (OSS/Coder)](https://huggingface.co/ByteDance-Seed) | 8B/36B | seed_oss/seed_coder |
| [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - |
| [TeleChat 2-2.5](https://huggingface.co/Tele-AI) | 3B/7B/35B/115B | telechat2 |
| [Yuan 2](https://huggingface.co/IEITYuan) | 2B/51B/102B | yuan |
> [!NOTE]
> 对于所有“基座”(Base)模型,`template` 参数可以是 `default`, `alpaca`, `vicuna` 等任意值。但“对话”(Instruct/Chat)模型请务必使用**对应的模板**。
>
> 如果模型有推理 / 非推理两个版本,请使用 `_nothink` 后缀来区分不同的模板。例如 `qwen3` 和 `qwen3_nothink`。
>
> 请务必在训练和推理时采用**完全一致**的模板。
>
> \*:您需要从 main 分支安装 `transformers` 并使用 `DISABLE_VERSION_CHECK=1` 来跳过版本检查。
>
> \*\*:您需要安装特定版本的 `transformers` 以使用该模型。
项目所支持模型的完整列表请参阅 [constants.py](src/llamafactory/extras/constants.py)。
您也可以在 [template.py](src/llamafactory/data/template.py) 中添加自己的对话模板。
## 训练方法
| 方法 | 全参数训练 | 部分参数训练 | LoRA | QLoRA |
| --------------------- | ------------------ | ------------------ | ------------------ | ------------------ |
| 预训练 | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
| 指令监督微调 | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
| 奖励模型训练 | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
| PPO 训练 | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
| DPO 训练 | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
| KTO 训练 | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
| ORPO 训练 | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
| SimPO 训练 | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
> [!TIP]
> 有关 PPO 的实现细节,请参考[此博客](https://newfacade.github.io/notes-on-reinforcement-learning/17-ppo-trl.html)。
## 数据集
<details><summary>预训练数据集</summary>
- [Wiki Demo (en)](data/wiki_demo.txt)
- [RefinedWeb (en)](https://huggingface.co/datasets/tiiuae/falcon-refinedweb)
- [RedPajama V2 (en)](https://huggingface.co/datasets/togethercomputer/RedPajama-Data-V2)
- [Wikipedia (en)](https://huggingface.co/datasets/olm/olm-wikipedia-20221220)
- [Wikipedia (zh)](https://huggingface.co/datasets/pleisto/wikipedia-cn-20230720-filtered)
- [Pile (en)](https://huggingface.co/datasets/EleutherAI/pile)
- [SkyPile (zh)](https://huggingface.co/datasets/Skywork/SkyPile-150B)
- [FineWeb (en)](https://huggingface.co/datasets/HuggingFaceFW/fineweb)
- [FineWeb-Edu (en)](https://huggingface.co/datasets/HuggingFaceFW/fineweb-edu)
- [CCI3-HQ (zh)](https://huggingface.co/datasets/BAAI/CCI3-HQ)
- [CCI3-Data (zh)](https://huggingface.co/datasets/BAAI/CCI3-Data)
- [CCI4.0-M2-Base-v1 (en&zh)](https://huggingface.co/datasets/BAAI/CCI4.0-M2-Base-v1)
- [CCI4.0-M2-CoT-v1 (en&zh)](https://huggingface.co/datasets/BAAI/CCI4.0-M2-CoT-v1)
- [CCI4.0-M2-Extra-v1 (en&zh)](https://huggingface.co/datasets/BAAI/CCI4.0-M2-Extra-v1)
- [The Stack (en)](https://huggingface.co/datasets/bigcode/the-stack)
- [StarCoder (en)](https://huggingface.co/datasets/bigcode/starcoderdata)
</details>
<details><summary>指令微调数据集</summary>
- [Identity (en&zh)](data/identity.json)
- [Stanford Alpaca (en)](https://github.com/tatsu-lab/stanford_alpaca)
- [Stanford Alpaca (zh)](https://github.com/ymcui/Chinese-LLaMA-Alpaca-3)
- [Alpaca GPT4 (en&zh)](https://github.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM)
- [Glaive Function Calling V2 (en&zh)](https://huggingface.co/datasets/glaiveai/glaive-function-calling-v2)
- [LIMA (en)](https://huggingface.co/datasets/GAIR/lima)
- [Guanaco Dataset (multilingual)](https://huggingface.co/datasets/JosephusCheung/GuanacoDataset)
- [BELLE 2M (zh)](https://huggingface.co/datasets/BelleGroup/train_2M_CN)
- [BELLE 1M (zh)](https://huggingface.co/datasets/BelleGroup/train_1M_CN)
- [BELLE 0.5M (zh)](https://huggingface.co/datasets/BelleGroup/train_0.5M_CN)
- [BELLE Dialogue 0.4M (zh)](https://huggingface.co/datasets/BelleGroup/generated_chat_0.4M)
- [BELLE School Math 0.25M (zh)](https://huggingface.co/datasets/BelleGroup/school_math_0.25M)
- [BELLE Multiturn Chat 0.8M (zh)](https://huggingface.co/datasets/BelleGroup/multiturn_chat_0.8M)
- [UltraChat (en)](https://github.com/thunlp/UltraChat)
- [OpenPlatypus (en)](https://huggingface.co/datasets/garage-bAInd/Open-Platypus)
- [CodeAlpaca 20k (en)](https://huggingface.co/datasets/sahil2801/CodeAlpaca-20k)
- [Alpaca CoT (multilingual)](https://huggingface.co/datasets/QingyiSi/Alpaca-CoT)
- [OpenOrca (en)](https://huggingface.co/datasets/Open-Orca/OpenOrca)
- [SlimOrca (en)](https://huggingface.co/datasets/Open-Orca/SlimOrca)
- [MathInstruct (en)](https://huggingface.co/datasets/TIGER-Lab/MathInstruct)
- [Firefly 1.1M (zh)](https://huggingface.co/datasets/YeungNLP/firefly-train-1.1M)
- [Wiki QA (en)](https://huggingface.co/datasets/wiki_qa)
- [Web QA (zh)](https://huggingface.co/datasets/suolyer/webqa)
- [WebNovel (zh)](https://huggingface.co/datasets/zxbsmk/webnovel_cn)
- [Nectar (en)](https://huggingface.co/datasets/berkeley-nest/Nectar)
- [deepctrl (en&zh)](https://www.modelscope.cn/datasets/deepctrl/deepctrl-sft-data)
- [Advertise Generating (zh)](https://huggingface.co/datasets/HasturOfficial/adgen)
- [ShareGPT Hyperfiltered (en)](https://huggingface.co/datasets/totally-not-an-llm/sharegpt-hyperfiltered-3k)
- [ShareGPT4 (en&zh)](https://huggingface.co/datasets/shibing624/sharegpt_gpt4)
- [UltraChat 200k (en)](https://huggingface.co/datasets/HuggingFaceH4/ultrachat_200k)
- [Infinity Instruct (zh)](https://huggingface.co/datasets/BAAI/Infinity-Instruct)
- [AgentInstruct (en)](https://huggingface.co/datasets/THUDM/AgentInstruct)
- [LMSYS Chat 1M (en)](https://huggingface.co/datasets/lmsys/lmsys-chat-1m)
- [Evol Instruct V2 (en)](https://huggingface.co/datasets/WizardLM/WizardLM_evol_instruct_V2_196k)
- [Cosmopedia (en)](https://huggingface.co/datasets/HuggingFaceTB/cosmopedia)
- [STEM (zh)](https://huggingface.co/datasets/hfl/stem_zh_instruction)
- [Ruozhiba (zh)](https://huggingface.co/datasets/hfl/ruozhiba_gpt4_turbo)
- [Neo-sft (zh)](https://huggingface.co/datasets/m-a-p/neo_sft_phase2)
- [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered)
- [Magpie-ultra-v0.1 (en)](https://huggingface.co/datasets/argilla/magpie-ultra-v0.1)
- [WebInstructSub (en)](https://huggingface.co/datasets/TIGER-Lab/WebInstructSub)
- [OpenO1-SFT (en&zh)](https://huggingface.co/datasets/O1-OPEN/OpenO1-SFT)
- [Open-Thoughts (en)](https://huggingface.co/datasets/open-thoughts/OpenThoughts-114k)
- [Open-R1-Math (en)](https://huggingface.co/datasets/open-r1/OpenR1-Math-220k)
- [Chinese-DeepSeek-R1-Distill (zh)](https://huggingface.co/datasets/Congliu/Chinese-DeepSeek-R1-Distill-data-110k-SFT)
- [LLaVA mixed (en&zh)](https://huggingface.co/datasets/BUAADreamer/llava-en-zh-300k)
- [Pokemon-gpt4o-captions (en&zh)](https://huggingface.co/datasets/jugg1024/pokemon-gpt4o-captions)
- [DLR-Web (en)](https://huggingface.co/datasets/Attention1115/DLR-Web)
- [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de)
- [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de)
- [Alpaca GPT4 (de)](https://huggingface.co/datasets/mayflowergmbh/alpaca-gpt4_de)
- [OpenSchnabeltier (de)](https://huggingface.co/datasets/mayflowergmbh/openschnabeltier_de)
- [Evol Instruct (de)](https://huggingface.co/datasets/mayflowergmbh/evol-instruct_de)
- [Dolphin (de)](https://huggingface.co/datasets/mayflowergmbh/dolphin_de)
- [Booksum (de)](https://huggingface.co/datasets/mayflowergmbh/booksum_de)
- [Airoboros (de)](https://huggingface.co/datasets/mayflowergmbh/airoboros-3.0_de)
- [Ultrachat (de)](https://huggingface.co/datasets/mayflowergmbh/ultra-chat_de)
</details>
<details><summary>偏好数据集</summary>
- [DPO mixed (en&zh)](https://huggingface.co/datasets/hiyouga/DPO-En-Zh-20k)
- [UltraFeedback (en)](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized)
- [COIG-P (zh)](https://huggingface.co/datasets/m-a-p/COIG-P)
- [RLHF-V (en)](https://huggingface.co/datasets/openbmb/RLHF-V-Dataset)
- [VLFeedback (en)](https://huggingface.co/datasets/Zhihui/VLFeedback)
- [RLAIF-V (en)](https://huggingface.co/datasets/openbmb/RLAIF-V-Dataset)
- [Orca DPO Pairs (en)](https://huggingface.co/datasets/Intel/orca_dpo_pairs)
- [HH-RLHF (en)](https://huggingface.co/datasets/Anthropic/hh-rlhf)
- [Nectar (en)](https://huggingface.co/datasets/berkeley-nest/Nectar)
- [Orca DPO (de)](https://huggingface.co/datasets/mayflowergmbh/intel_orca_dpo_pairs_de)
- [KTO mixed (en)](https://huggingface.co/datasets/argilla/kto-mix-15k)
</details>
部分数据集的使用需要确认,我们推荐使用下述命令登录您的 Hugging Face 账户。
```bash
pip install --upgrade huggingface_hub
huggingface-cli login
```
## 软硬件依赖
| 必需项 | 至少 | 推荐 |
| ------------ | ------- | --------- |
| python | 3.11 | >=3.11 |
| torch | 2.0.0 | 2.6.0 |
| torchvision | 0.15.0 | 0.21.0 |
| transformers | 4.49.0 | 4.50.0 |
| datasets | 2.16.0 | 3.2.0 |
| accelerate | 0.34.0 | 1.2.1 |
| peft | 0.14.0 | 0.15.1 |
| trl | 0.8.6 | 0.9.6 |
| 可选项 | 至少 | 推荐 |
| ------------ | ------- | --------- |
| CUDA | 11.6 | 12.2 |
| deepspeed | 0.10.0 | 0.16.4 |
| bitsandbytes | 0.39.0 | 0.43.1 |
| vllm | 0.4.3 | 0.8.2 |
| flash-attn | 2.5.6 | 2.7.2 |
### 硬件依赖
\* *估算值*
| 方法 | 精度 | 7B | 14B | 30B | 70B | `x`B |
| ------------------------------- | ---- | ----- | ----- | ----- | ------ | ------- |
| Full (`bf16` or `fp16`) | 32 | 120GB | 240GB | 600GB | 1200GB | `18x`GB |
| Full (`pure_bf16`) | 16 | 60GB | 120GB | 300GB | 600GB | `8x`GB |
| Freeze/LoRA/GaLore/APOLLO/BAdam | 16 | 16GB | 32GB | 64GB | 160GB | `2x`GB |
| QLoRA | 8 | 10GB | 20GB | 40GB | 80GB | `x`GB |
| QLoRA | 4 | 6GB | 12GB | 24GB | 48GB | `x/2`GB |
| QLoRA | 2 | 4GB | 8GB | 16GB | 24GB | `x/4`GB |
## 如何使用
### 安装 LLaMA Factory
> [!IMPORTANT]
> 此步骤为必需。
#### 从源码安装
```bash
git clone --depth 1 https://github.com/hiyouga/LlamaFactory.git
cd LlamaFactory
pip install -e .
pip install -r requirements/metrics.txt
```
可选的额外依赖项:`metrics`、`deepspeed`。使用 `pip install -e . && pip install -r requirements/metrics.txt -r requirements/deepspeed.txt` 安装。
其他可选依赖项请参考 `examples/requirements/` 目录下的文件。
#### 从镜像安装
```bash
docker run -it --rm --gpus=all --ipc=host hiyouga/llamafactory:latest
```
该镜像基于 Ubuntu 22.04(x86\_64)、CUDA 12.4、Python 3.11、PyTorch 2.6.0 和 Flash-attn 2.7.4 构建。
查看全部镜像:https://hub.docker.com/r/hiyouga/llamafactory/tags
请参阅[构建 Docker](#构建-docker) 来重新构建镜像。
<details><summary>使用 <b>uv</b> 构建虚拟环境</summary>
使用 [uv](https://github.com/astral-sh/uv) 创建隔离的 Python 环境:
```bash
uv run llamafactory-cli webui
```
</details>
<details><summary>Windows 用户指南</summary>
#### 安装 PyTorch
Windows 平台需要额外手动安装 GPU 版本的 PyTorch 依赖包,您可以参考[官方网站](https://pytorch.org/get-started/locally/)和以下命令安装并测试 PyTorch 是否正确安装。
```bash
pip uninstall torch torchvision torchaudio
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126
python -c "import torch; print(torch.cuda.is_available())"
```
如果看到 `True` 则说明安装成功。
若遇到类似 `Can't pickle local object` 的报错,请设置 `dataloader_num_workers: 0`。
#### 安装 BitsAndBytes
如果要在 Windows 平台上开启量化 LoRA(QLoRA),需要安装预编译的 `bitsandbytes` 库, 支持 CUDA 11.1 到 12.2, 请根据您的 CUDA 版本情况选择适合的[发布版本](https://github.com/jllllll/bitsandbytes-windows-webui/releases/tag/wheels)。
```bash
pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.2.post2-py3-none-win_amd64.whl
```
#### 安装 Flash Attention-2
如果要在 Windows 平台上开启 FlashAttention-2,请使用 [flash-attention-windows-wheel](https://huggingface.co/lldacing/flash-attention-windows-wheel) 中的脚本自行编译与安装。
</details>
<details><summary>昇腾 NPU 用户指南</summary>
在昇腾 NPU 设备上安装 LLaMA Factory 时,请升级 Python 到 3.10 及以上,并需要指定额外依赖项,使用 `pip install -r requirements/npu.txt` 命令安装。此外,还需要安装 **Ascend CANN Toolkit 与 Kernels**,安装方法请参考[安装教程](https://llamafactory.readthedocs.io/zh-cn/latest/advanced/npu_installation.html)。
您可以直接下载预安装的最新docker镜像:
```bash
# Docker Hub
docker pull hiyouga/llamafactory:latest-npu-a2
docker pull hiyouga/llamafactory:latest-npu-a3
# quay.io
docker pull quay.io/ascend/llamafactory:latest-npu-a2
docker pull quay.io/ascend/llamafactory:latest-npu-a3
```
#### 安装 BitsAndBytes
如果要在 Ascend NPU 上进行基于 bitsandbytes 的 QLoRA 量化微调,请执行如下步骤:
1. 手动编译 bitsandbytes:请参考[安装文档](https://huggingface.co/docs/bitsandbytes/installation?backend=Ascend+NPU&platform=Ascend+NPU)完成 NPU 版的 bitsandbytes 安装,编译要求环境 cmake 版本不低于 3.22.1,g++ 版本不低于 12.x。
```bash
# 从源码安装 bitsandbytes
# 克隆 bitsandbytes 仓库, Ascend NPU 目前在 multi-backend-refactor 中支持
git clone -b multi-backend-refactor https://github.com/bitsandbytes-foundation/bitsandbytes.git
cd bitsandbytes/
# 安装依赖
pip install -r requirements-dev.txt
# 安装编译工具依赖,该步骤在不同系统上命令有所不同,供参考
apt-get install -y build-essential cmake
# 编译 & 安装
cmake -DCOMPUTE_BACKEND=npu -S .
make
pip install .
```
2. 安装 transformers 的 main 分支版本。
```bash
git clone -b main https://github.com/huggingface/transformers.git
cd transformers
pip install .
```
3. 在训练参数中设置 `double_quantization: false`,可参考[示例](examples/train_qlora/qwen3_lora_sft_bnb_npu.yaml)。
</details>
### 数据准备
关于数据集文件的格式,请参考 [data/README_zh.md](data/README_zh.md) 的内容。你可以使用 HuggingFace / ModelScope / Modelers 上的数据集或加载本地数据集。
> [!NOTE]
> 使用自定义数据集时,请更新 `data/dataset_info.json` 文件。
您也可以使用 **[Easy Dataset](https://github.com/ConardLi/easy-dataset)**、**[DataFlow](https://github.com/OpenDCAI/DataFlow)** 和 **[GraphGen](https://github.com/open-sciencelab/GraphGen)** 构建用于微调的合成数据。
### 快速开始
下面三行命令分别对 Qwen3-4B-Instruct 模型进行 LoRA **微调**、**推理**和**合并**。
```bash
llamafactory-cli train examples/train_lora/qwen3_lora_sft.yaml
llamafactory-cli chat examples/inference/qwen3_lora_sft.yaml
llamafactory-cli export examples/merge_lora/qwen3_lora_sft.yaml
```
高级用法请参考 [examples/README_zh.md](examples/README_zh.md)(包括多 GPU 微调)。
> [!TIP]
> 使用 `llamafactory-cli help` 显示帮助信息。
>
> 遇到报错请先看[常见问题](https://github.com/hiyouga/LLaMA-Factory/issues/4614)。
### LLaMA Board 可视化微调(由 [Gradio](https://github.com/gradio-app/gradio) 驱动)
```bash
llamafactory-cli webui
```
### LLaMA Factory Online 在线微调
详情阅读该[文档](https://docs.llamafactory.com.cn/docs/documents/quickstart/getstarted/?utm_source=LLaMA-Factory)。
### 构建 Docker
CUDA 用户:
```bash
cd docker/docker-cuda/
docker compose up -d
docker compose exec llamafactory bash
```
昇腾 NPU 用户:
```bash
cd docker/docker-npu/
docker compose up -d
docker compose exec llamafactory bash
```
AMD ROCm 用户:
```bash
cd docker/docker-rocm/
docker compose up -d
docker compose exec llamafactory bash
```
<details><summary>不使用 Docker Compose 构建</summary>
CUDA 用户:
```bash
docker build -f ./docker/docker-cuda/Dockerfile \
--build-arg PIP_INDEX=https://pypi.org/simple \
--build-arg EXTRAS=metrics \
-t llamafactory:latest .
docker run -dit --ipc=host --gpus=all \
-p 7860:7860 \
-p 8000:8000 \
--name llamafactory \
llamafactory:latest
docker exec -it llamafactory bash
```
昇腾 NPU 用户:
```bash
docker build -f ./docker/docker-npu/Dockerfile \
--build-arg PIP_INDEX=https://pypi.org/simple \
--build-arg EXTRAS=torch-npu,metrics \
-t llamafactory:latest .
docker run -dit --ipc=host \
-v /usr/local/dcmi:/usr/local/dcmi \
-v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
-v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
-v /etc/ascend_install.info:/etc/ascend_install.info \
-p 7860:7860 \
-p 8000:8000 \
--device /dev/davinci0 \
--device /dev/davinci_manager \
--device /dev/devmm_svm \
--device /dev/hisi_hdc \
--name llamafactory \
llamafactory:latest
docker exec -it llamafactory bash
```
AMD ROCm 用户:
```bash
docker build -f ./docker/docker-rocm/Dockerfile \
--build-arg PIP_INDEX=https://pypi.org/simple \
--build-arg EXTRAS=metrics \
-t llamafactory:latest .
docker run -dit --ipc=host \
-p 7860:7860 \
-p 8000:8000 \
--device /dev/kfd \
--device /dev/dri \
--name llamafactory \
llamafactory:latest
docker exec -it llamafactory bash
```
</details>
<details><summary>使用数据卷</summary>
您可以通过移除 Dockerfile 中 `VOLUME [ "/root/.cache/huggingface", "/app/shared_data", "/app/output" ]` 的注释来使用数据卷。
在构建 Docker 时使用参数 `-v ./hf_cache:/root/.cache/huggingface` 来挂载数据卷。各个数据卷的含义表示如下。
- `hf_cache`:使用宿主机的 Hugging Face 缓存文件夹。
- `shared_data`:宿主机中存放数据集的文件夹路径。
- `output`:将导出目录设置为该路径后,即可在宿主机中访问导出后的模型。
</details>
### 利用 vLLM 部署 OpenAI API
```bash
API_PORT=8000 llamafactory-cli api examples/inference/qwen3.yaml infer_backend=vllm vllm_enforce_eager=true
```
> [!TIP]
> API 文档请查阅[这里](https://platform.openai.com/docs/api-reference/chat/create)。
>
> 示例:[图像理解](scripts/api_example/test_image.py) | [工具调用](scripts/api_example/test_toolcall.py)
### 从魔搭社区下载
如果您在 Hugging Face 模型和数据集的下载中遇到了问题,可以通过下述方法使用魔搭社区。
```bash
export USE_MODELSCOPE_HUB=1 # Windows 使用 `set USE_MODELSCOPE_HUB=1`
```
将 `model_name_or_path` 设置为模型 ID 来加载对应的模型。在[魔搭社区](https://modelscope.cn/models)查看所有可用的模型,例如 `LLM-Research/Meta-Llama-3-8B-Instruct`。
### 从魔乐社区下载
您也可以通过下述方法,使用魔乐社区下载数据集和模型。
```bash
export USE_OPENMIND_HUB=1 # Windows 使用 `set USE_OPENMIND_HUB=1`
```
将 `model_name_or_path` 设置为模型 ID 来加载对应的模型。在[魔乐社区](https://modelers.cn/models)查看所有可用的模型,例如 `TeleAI/TeleChat-7B-pt`。
### 使用 W&B 面板
若要使用 [Weights & Biases](https://wandb.ai) 记录实验数据,请在 yaml 文件中添加下面的参数。
```yaml
report_to: wandb
run_name: test_run # 可选
```
在启动训练任务时,将 `WANDB_API_KEY` 设置为[密钥](https://wandb.ai/authorize)来登录 W&B 账户。
### 使用 SwanLab 面板
若要使用 [SwanLab](https://github.com/SwanHubX/SwanLab) 记录实验数据,请在 yaml 文件中添加下面的参数。
```yaml
use_swanlab: true
swanlab_run_name: test_run # 可选
```
在启动训练任务时,登录SwanLab账户有以下三种方式:
方式一:在 yaml 文件中添加 `swanlab_api_key=<your_api_key>` ,并设置为你的 [API 密钥](https://swanlab.cn/settings)。
方式二:将环境变量 `SWANLAB_API_KEY` 设置为你的 [API 密钥](https://swanlab.cn/settings)。
方式三:启动前使用 `swanlab login` 命令完成登录。
## 使用了 LLaMA Factory 的项目
如果您有项目希望添加至下述列表,请通过邮件联系或者创建一个 PR。
<details><summary>点击显示</summary>
1. Wang et al. ESRL: Efficient Sampling-based Reinforcement Learning for Sequence Generation. 2023. [[arxiv]](https://arxiv.org/abs/2308.02223)
1. Yu et al. Open, Closed, or Small Language Models for Text Classification? 2023. [[arxiv]](https://arxiv.org/abs/2308.10092)
1. Wang et al. UbiPhysio: Support Daily Functioning, Fitness, and Rehabilitation with Action Understanding and Feedback in Natural Language. 2023. [[arxiv]](https://arxiv.org/abs/2308.10526)
1. Luceri et al. Leveraging Large Language Models to Detect Influence Campaigns in Social Media. 2023. [[arxiv]](https://arxiv.org/abs/2311.07816)
1. Zhang et al. Alleviating Hallucinations of Large Language Models through Induced Hallucinations. 2023. [[arxiv]](https://arxiv.org/abs/2312.15710)
1. Wang et al. Know Your Needs Better: Towards Structured Understanding of Marketer Demands with Analogical Reasoning Augmented LLMs. KDD 2024. [[arxiv]](https://arxiv.org/abs/2401.04319)
1. Wang et al. CANDLE: Iterative Conceptualization and Instantiation Distillation from Large Language Models for Commonsense Reasoning. ACL 2024. [[arxiv]](https://arxiv.org/abs/2401.07286)
1. Choi et al. FACT-GPT: Fact-Checking Augmentation via Claim Matching with LLMs. 2024. [[arxiv]](https://arxiv.org/abs/2402.05904)
1. Zhang et al. AutoMathText: Autonomous Data Selection with Language Models for Mathematical Texts. 2024. [[arxiv]](https://arxiv.org/abs/2402.07625)
1. Lyu et al. KnowTuning: Knowledge-aware Fine-tuning for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.11176)
1. Yang et al. LaCo: Large Language Model Pruning via Layer Collaps. 2024. [[arxiv]](https://arxiv.org/abs/2402.11187)
1. Bhardwaj et al. Language Models are Homer Simpson! Safety Re-Alignment of Fine-tuned Language Models through Task Arithmetic. 2024. [[arxiv]](https://arxiv.org/abs/2402.11746)
1. Yang et al. Enhancing Empathetic Response Generation by Augmenting LLMs with Small-scale Empathetic Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.11801)
1. Yi et al. Generation Meets Verification: Accelerating Large Language Model Inference with Smart Parallel Auto-Correct Decoding. ACL 2024 Findings. [[arxiv]](https://arxiv.org/abs/2402.11809)
1. Cao et al. Head-wise Shareable Attention for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.11819)
1. Zhang et al. Enhancing Multilingual Capabilities of Large Language Models through Self-Distillation from Resource-Rich Languages. 2024. [[arxiv]](https://arxiv.org/abs/2402.12204)
1. Kim et al. Efficient and Effective Vocabulary Expansion Towards Multilingual Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.14714)
1. Yu et al. KIEval: A Knowledge-grounded Interactive Evaluation Framework for Large Language Models. ACL 2024. [[arxiv]](https://arxiv.org/abs/2402.15043)
1. Huang et al. Key-Point-Driven Data Synthesis with its Enhancement on Mathematical Reasoning. 2024. [[arxiv]](https://arxiv.org/abs/2403.02333)
1. Duan et al. Negating Negatives: Alignment without Human Positive Samples via Distributional Dispreference Optimization. 2024. [[arxiv]](https://arxiv.org/abs/2403.03419)
1. Xie and Schwertfeger. Empowering Robotics with Large Language Models: osmAG Map Comprehension with LLMs. 2024. [[arxiv]](https://arxiv.org/abs/2403.08228)
1. Wu et al. Large Language Models are Parallel Multilingual Learners. 2024. [[arxiv]](https://arxiv.org/abs/2403.09073)
1. Zhang et al. EDT: Improving Large Language Models' Generation by Entropy-based Dynamic Temperature Sampling. 2024. [[arxiv]](https://arxiv.org/abs/2403.14541)
1. Weller et al. FollowIR: Evaluating and Teaching Information Retrieval Models to Follow Instructions. 2024. [[arxiv]](https://arxiv.org/abs/2403.15246)
1. Hongbin Na. CBT-LLM: A Chinese Large Language Model for Cognitive Behavioral Therapy-based Mental Health Question Answering. COLING 2024. [[arxiv]](https://arxiv.org/abs/2403.16008)
1. Zan et al. CodeS: Natural Language to Code Repository via Multi-Layer Sketch. 2024. [[arxiv]](https://arxiv.org/abs/2403.16443)
1. Liu et al. Extensive Self-Contrast Enables Feedback-Free Language Model Alignment. 2024. [[arxiv]](https://arxiv.org/abs/2404.00604)
1. Luo et al. BAdam: A Memory Efficient Full Parameter Training Method for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.02827)
1. Du et al. Chinese Tiny LLM: Pretraining a Chinese-Centric Large Language Model. 2024. [[arxiv]](https://arxiv.org/abs/2404.04167)
1. Ma et al. Parameter Efficient Quasi-Orthogonal Fine-Tuning via Givens Rotation. ICML 2024. [[arxiv]](https://arxiv.org/abs/2404.04316)
1. Liu et al. Dynamic Generation of Personalities with Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.07084)
1. Shang et al. How Far Have We Gone in Stripped Binary Code Understanding Using Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.09836)
1. Huang et al. LLMTune: Accelerate Database Knob Tuning with Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.11581)
1. Deng et al. Text-Tuple-Table: Towards Information Integration in Text-to-Table Generation via Global Tuple Extraction. 2024. [[arxiv]](https://arxiv.org/abs/2404.14215)
1. Acikgoz et al. Hippocrates: An Open-Source Framework for Advancing Large Language Models in Healthcare. 2024. [[arxiv]](https://arxiv.org/abs/2404.16621)
1. Zhang et al. Small Language Models Need Strong Verifiers to Self-Correct Reasoning. ACL 2024 Findings. [[arxiv]](https://arxiv.org/abs/2404.17140)
1. Zhou et al. FREB-TQA: A Fine-Grained Robustness Evaluation Benchmark for Table Question Answering. NAACL 2024. [[arxiv]](https://arxiv.org/abs/2404.18585)
1. Xu et al. Large Language Models for Cyber Security: A Systematic Literature Review. 2024. [[arxiv]](https://arxiv.org/abs/2405.04760)
1. Dammu et al. "They are uncultured": Unveiling Covert Harms and Social Threats in LLM Generated Conversations. 2024. [[arxiv]](https://arxiv.org/abs/2405.05378)
1. Yi et al. A safety realignment framework via subspace-oriented model fusion for large language models. 2024. [[arxiv]](https://arxiv.org/abs/2405.09055)
1. Lou et al. SPO: Multi-Dimensional Preference Sequential Alignment With Implicit Reward Modeling. 2024. [[arxiv]](https://arxiv.org/abs/2405.12739)
1. Zhang et al. Getting More from Less: Large Language Models are Good Spontaneous Multilingual Learners. 2024. [[arxiv]](https://arxiv.org/abs/2405.13816)
1. Zhang et al. TS-Align: A Teacher-Student Collaborative Framework for Scalable Iterative Finetuning of Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2405.20215)
1. Zihong Chen. Sentence Segmentation and Sentence Punctuation Based on XunziALLM. 2024. [[paper]](https://aclanthology.org/2024.lt4hala-1.30)
1. Gao et al. The Best of Both Worlds: Toward an Honest and Helpful Large Language Model. 2024. [[arxiv]](https://arxiv.org/abs/2406.00380)
1. Wang and Song. MARS: Benchmarking the Metaphysical Reasoning Abilities of Language Models with a Multi-task Evaluation Dataset. 2024. [[arxiv]](https://arxiv.org/abs/2406.02106)
1. Hu et al. Computational Limits of Low-Rank Adaptation (LoRA) for Transformer-Based Models. 2024. [[arxiv]](https://arxiv.org/abs/2406.03136)
1. Ge et al. Time Sensitive Knowledge Editing through Efficient Finetuning. ACL 2024. [[arxiv]](https://arxiv.org/abs/2406.04496)
1. Tan et al. Peer Review as A Multi-Turn and Long-Context Dialogue with Role-Based Interactions. 2024. [[arxiv]](https://arxiv.org/abs/2406.05688)
1. Song et al. Turbo Sparse: Achieving LLM SOTA Performance with Minimal Activated Parameters. 2024. [[arxiv]](https://arxiv.org/abs/2406.05955)
1. Gu et al. RWKV-CLIP: A Robust Vision-Language Representation Learner. 2024. [[arxiv]](https://arxiv.org/abs/2406.06973)
1. Chen et al. Advancing Tool-Augmented Large Language Models: Integrating Insights from Errors in Inference Trees. 2024. [[arxiv]](https://arxiv.org/abs/2406.07115)
1. Zhu et al. Are Large Language Models Good Statisticians?. 2024. [[arxiv]](https://arxiv.org/abs/2406.07815)
1. Li et al. Know the Unknown: An Uncertainty-Sensitive Method for LLM Instruction Tuning. 2024. [[arxiv]](https://arxiv.org/abs/2406.10099)
1. Ding et al. IntentionQA: A Benchmark for Evaluating Purchase Intention Comprehension Abilities of Language Models in E-commerce. 2024. [[arxiv]](https://arxiv.org/abs/2406.10173)
1. He et al. COMMUNITY-CROSS-INSTRUCT: Unsupervised Instruction Generation for Aligning Large Language Models to Online Communities. 2024. [[arxiv]](https://arxiv.org/abs/2406.12074)
1. Lin et al. FVEL: Interactive Formal Verification Environment with Large Language Models via Theorem Proving. 2024. [[arxiv]](https://arxiv.org/abs/2406.14408)
1. Treutlein et al. Connecting the Dots: LLMs can Infer and Verbalize Latent Structure from Disparate Training Data. 2024. [[arxiv]](https://arxiv.org/abs/2406.14546)
1. Feng et al. SS-Bench: A Benchmark for Social Story Generation and Evaluation. 2024. [[arxiv]](https://arxiv.org/abs/2406.15695)
1. Feng et al. Self-Constructed Context Decompilation with Fined-grained Alignment Enhancement. 2024. [[arxiv]](https://arxiv.org/abs/2406.17233)
1. Liu et al. Large Language Models for Cuffless Blood Pressure Measurement From Wearable Biosignals. 2024. [[arxiv]](https://arxiv.org/abs/2406.18069)
1. Iyer et al. Exploring Very Low-Resource Translation with LLMs: The University of Edinburgh's Submission to AmericasNLP 2024 Translation Task. AmericasNLP 2024. [[paper]](https://aclanthology.org/2024.americasnlp-1.25)
1. Li et al. Calibrating LLMs with Preference Optimization on Thought Trees for Generating Rationale in Science Question Scoring. 2024. [[arxiv]](https://arxiv.org/abs/2406.19949)
1. Yang et al. Financial Knowledge Large Language Model. 2024. [[arxiv]](https://arxiv.org/abs/2407.00365)
1. Lin et al. DogeRM: Equipping Reward Models with Domain Knowledge through Model Merging. 2024. [[arxiv]](https://arxiv.org/abs/2407.01470)
1. Bako et al. Evaluating the Semantic Profiling Abilities of LLMs for Natural Language Utterances in Data Visualization. 2024. [[arxiv]](https://arxiv.org/abs/2407.06129)
1. Huang et al. RoLoRA: Fine-tuning Rotated Outlier-free LLMs for Effective Weight-Activation Quantization. 2024. [[arxiv]](https://arxiv.org/abs/2407.08044)
1. Jiang et al. LLM-Collaboration on Automatic Science Journalism for the General Audience. 2024. [[arxiv]](https://arxiv.org/abs/2407.09756)
1. Inouye et al. Applied Auto-tuning on LoRA Hyperparameters. 2024. [[paper]](https://scholarcommons.scu.edu/cseng_senior/272/)
1. Qi et al. Research on Tibetan Tourism Viewpoints information generation system based on LLM. 2024. [[arxiv]](https://arxiv.org/abs/2407.13561)
1. Xu et al. Course-Correction: Safety Alignment Using Synthetic Preferences. 2024. [[arxiv]](https://arxiv.org/abs/2407.16637)
1. Sun et al. LAMBDA: A Large Model Based Data Agent. 2024. [[arxiv]](https://arxiv.org/abs/2407.17535)
1. Zhu et al. CollectiveSFT: Scaling Large Language Models for Chinese Medical Benchmark with Collective Instructions in Healthcare. 2024. [[arxiv]](https://arxiv.org/abs/2407.19705)
1. Yu et al. Correcting Negative Bias in Large Language Models through Negative Attention Score Alignment. 2024. [[arxiv]](https://arxiv.org/abs/2408.00137)
1. Xie et al. The Power of Personalized Datasets: Advancing Chinese Composition Writing for Elementary School through Targeted Model Fine-Tuning. IALP 2024. [[paper]](https://www.asianlp.sg/conferences/ialp2024/proceedings/papers/IALP2024_P055.pdf)
1. Liu et al. Instruct-Code-Llama: Improving Capabilities of Language Model in Competition Level Code Generation by Online Judge Feedback. ICIC 2024. [[paper]](https://link.springer.com/chapter/10.1007/978-981-97-5669-8_11)
1. Wang et al. Cybernetic Sentinels: Unveiling the Impact of Safety Data Selection on Model Security in Supervised Fine-Tuning. ICIC 2024. [[paper]](https://link.springer.com/chapter/10.1007/978-981-97-5669-8_23)
1. Xia et al. Understanding the Performance and Estimating the Cost of LLM Fine-Tuning. 2024. [[arxiv]](https://arxiv.org/abs/2408.04693)
1. Zeng et al. Perceive, Reflect, and Plan: Designing LLM Agent for Goal-Directed City Navigation without Instructions. 2024. [[arxiv]](https://arxiv.org/abs/2408.04168)
1. Xia et al. Using Pre-trained Language Model for Accurate ESG Prediction. FinNLP 2024. [[paper]](https://aclanthology.org/2024.finnlp-2.1/)
1. Liang et al. I-SHEEP: Self-Alignment of LLM from Scratch through an Iterative Self-Enhancement Paradigm. 2024. [[arxiv]](https://arxiv.org/abs/2408.08072)
1. Bai et al. Aligning Large Language Model with Direct Multi-Preference Optimization for Recommendation. CIKM 2024. [[paper]](https://dl.acm.org/doi/10.1145/3627673.3679611)
1. **[StarWhisper](https://github.com/Yu-Yang-Li/StarWhisper)**: 天文大模型 StarWhisper,基于 ChatGLM2-6B 和 Qwen-14B 在天文数据上微调而得。
1. **[DISC-LawLLM](https://github.com/FudanDISC/DISC-LawLLM)**: 中文法律领域大模型 DISC-LawLLM,基于 Baichuan-13B 微调而得,具有法律推理和知识检索能力。
1. **[Sunsimiao](https://github.com/X-D-Lab/Sunsimiao)**: 孙思邈中文医疗大模型 Sumsimiao,基于 Baichuan-7B 和 ChatGLM-6B 在中文医疗数据上微调而得。
1. **[CareGPT](https://github.com/WangRongsheng/CareGPT)**: 医疗大模型项目 CareGPT,基于 LLaMA2-7B 和 Baichuan-13B 在中文医疗数据上微调而得。
1. **[MachineMindset](https://github.com/PKU-YuanGroup/Machine-Mindset/)**:MBTI性格大模型项目,根据数据集与训练方式让任意 LLM 拥有 16 个不同的性格类型。
1. **[Luminia-13B-v3](https://huggingface.co/Nekochu/Luminia-13B-v3)**:一个用于生成 Stable Diffusion 提示词的大型语言模型。[[demo]](https://huggingface.co/spaces/Nekochu/Luminia-13B_SD_Prompt)
1. **[Chinese-LLaVA-Med](https://github.com/BUAADreamer/Chinese-LLaVA-Med)**:中文多模态医学大模型,基于 LLaVA-1.5-7B 在中文多模态医疗数据上微调而得。
1. **[AutoRE](https://github.com/THUDM/AutoRE)**:基于大语言模型的文档级关系抽取系统。
1. **[NVIDIA RTX AI Toolkit](https://github.com/NVIDIA/RTX-AI-Toolkit)**:在 Windows 主机上利用英伟达 RTX 设备进行大型语言模型微调的开发包。
1. **[LazyLLM](https://github.com/LazyAGI/LazyLLM)**:一个低代码构建多 Agent 大模型应用的开发工具,支持基于 LLaMA Factory 的模型微调.
1. **[RAG-Retrieval](https://github.com/NLPJCL/RAG-Retrieval)**:一个全链路 RAG 检索模型微调、推理和蒸馏代码库。[[blog]](https://zhuanlan.zhihu.com/p/987727357)
1. **[360-LLaMA-Factory](https://github.com/Qihoo360/360-LLaMA-Factory)**:一个魔改后的代码库,通过 Ring Attention 支持长序列的 SFT 和 DPO 训练。
1. **[Sky-T1](https://novasky-ai.github.io/posts/sky-t1/)**:由 NovaSky AI 微调的低成本类 o1 长推理模型。
1. **[WeClone](https://github.com/xming521/WeClone)**:从聊天记录创造数字分身的一站式解决方案。
</details>
## 协议
本仓库的代码依照 [Apache-2.0](LICENSE) 协议开源。
使用模型权重时,请遵循对应的模型协议:[BLOOM](https://huggingface.co/spaces/bigscience/license)/ [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [GLM-4](https://huggingface.co/THUDM/glm-4-9b/blob/main/LICENSE) / [GPT-2](https://github.com/openai/gpt-2/blob/master/LICENSE) / [Granite](LICENSE) / [InternLM](https://github.com/InternLM/InternLM#license) / [Llama](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [Llama 2](https://ai.meta.com/llama/license/) / [Llama 3](https://llama.meta.com/llama3/license/) / [Llama 4](https://github.com/meta-llama/llama-models/blob/main/models/llama4/LICENSE) / [MiniCPM](https://github.com/OpenBMB/MiniCPM/blob/main/MiniCPM%20Model%20License.md) / [Mistral/Mixtral/Pixtral](LICENSE) / [Phi-3/Phi-4](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder 2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [TeleChat2](https://huggingface.co/Tele-AI/telechat-7B/blob/main/TeleChat%E6%A8%A1%E5%9E%8B%E7%A4%BE%E5%8C%BA%E8%AE%B8%E5%8F%AF%E5%8D%8F%E8%AE%AE.pdf) / [Yuan 2](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan)
## 引用
如果您觉得此项目有帮助,请考虑以下列格式引用
```bibtex
@inproceedings{zheng2024llamafactory,
title={LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models},
author={Yaowei Zheng and Richong Zhang and Junhao Zhang and Yanhan Ye and Zheyan Luo and Zhangchi Feng and Yongqiang Ma},
booktitle={Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)},
address={Bangkok, Thailand},
publisher={Association for Computational Linguistics},
year={2024},
url={http://arxiv.org/abs/2403.13372}
}
```
## 致谢
本项目受益于 [PEFT](https://github.com/huggingface/peft)、[TRL](https://github.com/huggingface/trl)、[QLoRA](https://github.com/artidoro/qlora) 和 [FastChat](https://github.com/lm-sys/FastChat),感谢以上诸位作者的付出。
## Star History

================================================
FILE: data/README.md
================================================
The [dataset_info.json](dataset_info.json) contains all available datasets. If you are using a custom dataset, please **make sure** to add a *dataset description* in `dataset_info.json` and specify `dataset: dataset_name` before training to use it.
The `dataset_info.json` file should be put in the `dataset_dir` directory. You can change `dataset_dir` to use another directory. The default value is `./data`.
Currently we support datasets in **alpaca** and **sharegpt** format. Allowed file types include json, jsonl, csv, parquet, arrow.
```json
"dataset_name": {
"hf_hub_url": "the name of the dataset repository on the Hugging Face hub. (if specified, ignore script_url, file_name and cloud_file_name)",
"ms_hub_url": "the name of the dataset repository on the Model Scope hub. (if specified, ignore script_url, file_name and cloud_file_name)",
"script_url": "the name of the directory containing a dataset loading script. (if specified, ignore file_name and cloud_file_name)",
"cloud_file_name": "the name of the dataset file in s3/gcs cloud storage. (if specified, ignore file_name)",
"file_name": "the name of the dataset folder or dataset file in this directory. (required if above are not specified)",
"formatting": "the format of the dataset. (optional, default: alpaca, can be chosen from {alpaca, sharegpt})",
"ranking": "whether the dataset is a preference dataset or not. (default: False)",
"subset": "the name of the subset. (optional, default: None)",
"split": "the name of dataset split to be used. (optional, default: train)",
"folder": "the name of the folder of the dataset repository on the Hugging Face hub. (optional, default: None)",
"num_samples": "the number of samples in the dataset to be used. (optional, default: None)",
"columns (optional)": {
"prompt": "the column name in the dataset containing the prompts. (default: instruction)",
"query": "the column name in the dataset containing the queries. (default: input)",
"response": "the column name in the dataset containing the responses. (default: output)",
"history": "the column name in the dataset containing the histories. (default: None)",
"messages": "the column name in the dataset containing the messages. (default: conversations)",
"system": "the column name in the dataset containing the system prompts. (default: None)",
"tools": "the column name in the dataset containing the tool description. (default: None)",
"images": "the column name in the dataset containing the image inputs. (default: None)",
"videos": "the column name in the dataset containing the videos inputs. (default: None)",
"audios": "the column name in the dataset containing the audios inputs. (default: None)",
"chosen": "the column name in the dataset containing the chosen answers. (default: None)",
"rejected": "the column name in the dataset containing the rejected answers. (default: None)",
"kto_tag": "the column name in the dataset containing the kto tags. (default: None)"
},
"tags (optional, used for the sharegpt format)": {
"role_tag": "the key in the message represents the identity. (default: from)",
"content_tag": "the key in the message represents the content. (default: value)",
"user_tag": "the value of the role_tag represents the user. (default: human)",
"assistant_tag": "the value of the role_tag represents the assistant. (default: gpt)",
"observation_tag": "the value of the role_tag represents the tool results. (default: observation)",
"function_tag": "the value of the role_tag represents the function call. (default: function_call)",
"system_tag": "the value of the role_tag represents the system prompt. (default: system, can override system column)"
}
}
```
## Alpaca Format
### Supervised Fine-Tuning Dataset
* [Example dataset](alpaca_en_demo.json)
In supervised fine-tuning, the `instruction` column will be concatenated with the `input` column and used as the user prompt, then the user prompt would be `instruction\ninput`. The `output` column represents the model response.
For reasoning models, if the dataset contains chain-of-thought (CoT), the CoT needs to be placed in the model responses, such as `<think>cot</think>output`.
The `system` column will be used as the system prompt if specified.
The `history` column is a list consisting of string tuples representing prompt-response pairs in the history messages. Note that the responses in the history **will also be learned by the model** in supervised fine-tuning.
```json
[
{
"instruction": "user instruction (required)",
"input": "user input (optional)",
"output": "model response (required)",
"system": "system prompt (optional)",
"history": [
["user instruction in the first round (optional)", "model response in the first round (optional)"],
["user instruction in the second round (optional)", "model response in the second round (optional)"]
]
}
]
```
Regarding the above dataset, the *dataset description* in `dataset_info.json` should be:
```json
"dataset_name": {
"file_name": "data.json",
"columns": {
"prompt": "instruction",
"query": "input",
"response": "output",
"system": "system",
"history": "history"
}
}
```
> [!TIP]
> If the model has reasoning capabilities (e.g. Qwen3) but the dataset does not contain chain-of-thought (CoT), LLaMA-Factory will automatically add empty CoT to the data. When `enable_thinking` is `True` (slow thinking, by default), the empty CoT will be added to the model responses and loss computation will be considered; otherwise (fast thinking), it will be added to the user prompts and loss computation will be ignored. Please keep the `enable_thinking` parameter consistent during training and inference.
>
> If you want to train data containing CoT with slow thinking and data without CoT with fast thinking, you can set `enable_thinking` to `None`. However, this feature is relatively complicated and should be used with caution.
### Pre-training Dataset
- [Example dataset](c4_demo.jsonl)
In pre-training, only the `text` column will be used for model learning.
```json
[
{"text": "document"},
{"text": "document"}
]
```
Regarding the above dataset, the *dataset description* in `dataset_info.json` should be:
```json
"dataset_name": {
"file_name": "data.json",
"columns": {
"prompt": "text"
}
}
```
### Preference Dataset
Preference datasets are used for reward modeling, DPO training, ORPO and SimPO training.
It requires a better response in `chosen` column and a worse response in `rejected` column.
```json
[
{
"instruction": "user instruction (required)",
"input": "user input (optional)",
"chosen": "chosen answer (required)",
"rejected": "rejected answer (required)"
}
]
```
Regarding the above dataset, the *dataset description* in `dataset_info.json` should be:
```json
"dataset_name": {
"file_name": "data.json",
"ranking": true,
"columns": {
"prompt": "instruction",
"query": "input",
"chosen": "chosen",
"rejected": "rejected"
}
}
```
### KTO Dataset
An additional column `kto_tag` is required. Please refer to the [sharegpt](#sharegpt-format) format for details.
### Multimodal Image Dataset
An additional column `images` is required. Please refer to the [sharegpt](#sharegpt-format) format for details.
### Multimodal Video Dataset
An additional column `videos` is required. Please refer to the [sharegpt](#sharegpt-format) format for details.
### Multimodal Audio Dataset
An additional column `audios` is required. Please refer to the [sharegpt](#sharegpt-format) format for details.
## Sharegpt Format
### Supervised Fine-Tuning Dataset
- [Example dataset](glaive_toolcall_en_demo.json)
Compared to the alpaca format, the sharegpt format allows the datasets have **more roles**, such as human, gpt, observation and function. They are presented in a list of objects in the `conversations` column.
Note that the human and observation should appear in odd positions, while gpt and function should appear in even positions. The gpt and function will be learned by the model.
```json
[
{
"conversations": [
{
"from": "human",
"value": "user instruction"
},
{
"from": "function_call",
"value": "tool arguments"
},
{
"from": "observation",
"value": "tool result"
},
{
"from": "gpt",
"value": "model response"
}
],
"system": "system prompt (optional)",
"tools": "tool description (optional)"
}
]
```
Regarding the above dataset, the *dataset description* in `dataset_info.json` should be:
```json
"dataset_name": {
"file_name": "data.json",
"formatting": "sharegpt",
"columns": {
"messages": "conversations",
"system": "system",
"tools": "tools"
}
}
```
### Pre-training Dataset
Not yet supported, please use the [alpaca](#alpaca-format) format.
### Preference Dataset
- [Example dataset](dpo_en_demo.json)
Preference datasets in sharegpt format also require a better message in `chosen` column and a worse message in `rejected` column.
```json
[
{
"conversations": [
{
"from": "human",
"value": "user instruction"
},
{
"from": "gpt",
"value": "model response"
},
{
"from": "human",
"value": "user instruction"
}
],
"chosen": {
"from": "gpt",
"value": "chosen answer (required)"
},
"rejected": {
"from": "gpt",
"value": "rejected answer (required)"
}
}
]
```
Regarding the above dataset, the *dataset description* in `dataset_info.json` should be:
```json
"dataset_name": {
"file_name": "data.json",
"formatting": "sharegpt",
"ranking": true,
"columns": {
"messages": "conversations",
"chosen": "chosen",
"rejected": "rejected"
}
}
```
### KTO Dataset
- [Example dataset](kto_en_demo.json)
KTO datasets require a extra `kto_tag` column containing the boolean human feedback.
```json
[
{
"conversations": [
{
"from": "human",
"value": "user instruction"
},
{
"from": "gpt",
"value": "model response"
}
],
"kto_tag": "human feedback [true/false] (required)"
}
]
```
Regarding the above dataset, the *dataset description* in `dataset_info.json` should be:
```json
"dataset_name": {
"file_name": "data.json",
"formatting": "sharegpt",
"columns": {
"messages": "conversations",
"kto_tag": "kto_tag"
}
}
```
### Multimodal Image Dataset
- [Example dataset](mllm_demo.json)
Multimodal image datasets require an `images` column containing the paths to the input images.
The number of images should be identical to the `<image>` tokens in the conversations.
```json
[
{
"conversations": [
{
"from": "human",
"value": "<image>user instruction"
},
{
"from": "gpt",
"value": "model response"
}
],
"images": [
"image path (required)"
]
}
]
```
Regarding the above dataset, the *dataset description* in `dataset_info.json` should be:
```json
"dataset_name": {
"file_name": "data.json",
"formatting": "sharegpt",
"columns": {
"messages": "conversations",
"images": "images"
}
}
```
### Multimodal Video Dataset
- [Example dataset](mllm_video_demo.json)
Multimodal video datasets require a `videos` column containing the paths to the input videos.
The number of videos should be identical to the `<video>` tokens in the conversations.
```json
[
{
"conversations": [
{
"from": "human",
"value": "<video>user instruction"
},
{
"from": "gpt",
"value": "model response"
}
],
"videos": [
"video path (required)"
]
}
]
```
Regarding the above dataset, the *dataset description* in `dataset_info.json` should be:
```json
"dataset_name": {
"file_name": "data.json",
"formatting": "sharegpt",
"columns": {
"messages": "conversations",
"videos": "videos"
}
}
```
### Multimodal Audio Dataset
- [Example dataset](mllm_audio_demo.json)
Multimodal audio datasets require an `audios` column containing the paths to the input audios.
The number of audios should be identical to the `<audio>` tokens in the conversations.
```json
[
{
"conversations": [
{
"from": "human",
"value": "<audio>user instruction"
},
{
"from": "gpt",
"value": "model response"
}
],
"audios": [
"audio path (required)"
]
}
]
```
Regarding the above dataset, the *dataset description* in `dataset_info.json` should be:
```json
"dataset_name": {
"file_name": "data.json",
"formatting": "sharegpt",
"columns": {
"messages": "conversations",
"audios": "audios"
}
}
```
### OpenAI Format
The openai format is simply a special case of the sharegpt format, where the first message may be a system prompt.
```json
[
{
"messages": [
{
"role": "system",
"content": "system prompt (optional)"
},
{
"role": "user",
"content": "user instruction"
},
{
"role": "assistant",
"content": "model response"
}
]
}
]
```
Regarding the above dataset, the *dataset description* in `dataset_info.json` should be:
```json
"dataset_name": {
"file_name": "data.json",
"formatting": "sharegpt",
"columns": {
"messages": "messages"
},
"tags": {
"role_tag": "role",
"content_tag": "content",
"user_tag": "user",
"assistant_tag": "assistant",
"system_tag": "system"
}
}
```
================================================
FILE: data/README_zh.md
================================================
[dataset_info.json](dataset_info.json) 包含了所有可用的数据集。如果您希望使用自定义数据集,请**务必**在 `dataset_info.json` 文件中添加*数据集描述*,并通过修改 `dataset: 数据集名称` 配置来使用数据集。
其中 `dataset_info.json` 文件应放置在 `dataset_dir` 目录下。您可以通过修改 `dataset_dir` 参数来使用其他目录。默认值为 `./data`。
目前我们支持 **alpaca** 格式和 **sharegpt** 格式的数据集。允许的文件类型包括 json、jsonl、csv、parquet 和 arrow。
```json
"数据集名称": {
"hf_hub_url": "Hugging Face 的数据集仓库地址(若指定,则忽略 script_url 和 file_name)",
"ms_hub_url": "ModelScope 的数据集仓库地址(若指定,则忽略 script_url 和 file_name)",
"script_url": "包含数据加载脚本的本地文件夹名称(若指定,则忽略 file_name)",
"file_name": "该目录下数据集文件夹或文件的名称(若上述参数未指定,则此项必需)",
"formatting": "数据集格式(可选,默认:alpaca,可以为 alpaca 或 sharegpt)",
"ranking": "是否为偏好数据集(可选,默认:False)",
"subset": "数据集子集的名称(可选,默认:None)",
"split": "所使用的数据集切分(可选,默认:train)",
"folder": "Hugging Face 仓库的文件夹名称(可选,默认:None)",
"num_samples": "该数据集所使用的样本数量。(可选,默认:None)",
"columns(可选)": {
"prompt": "数据集代表提示词的表头名称(默认:instruction)",
"query": "数据集代表请求的表头名称(默认:input)",
"response": "数据集代表回答的表头名称(默认:output)",
"history": "数据集代表历史对话的表头名称(默认:None)",
"messages": "数据集代表消息列表的表头名称(默认:conversations)",
"system": "数据集代表系统提示的表头名称(默认:None)",
"tools": "数据集代表工具描述的表头名称(默认:None)",
"images": "数据集代表图像输入的表头名称(默认:None)",
"videos": "数据集代表视频输入的表头名称(默认:None)",
"audios": "数据集代表音频输入的表头名称(默认:None)",
"chosen": "数据集代表更优回答的表头名称(默认:None)",
"rejected": "数据集代表更差回答的表头名称(默认:None)",
"kto_tag": "数据集代表 KTO 标签的表头名称(默认:None)"
},
"tags(可选,用于 sharegpt 格式)": {
"role_tag": "消息中代表发送者身份的键名(默认:from)",
"content_tag": "消息中代表文本内容的键名(默认:value)",
"user_tag": "消息中代表用户的 role_tag(默认:human)",
"assistant_tag": "消息中代表助手的 role_tag(默认:gpt)",
"observation_tag": "消息中代表工具返回结果的 role_tag(默认:observation)",
"function_tag": "消息中代表工具调用的 role_tag(默认:function_call)",
"system_tag": "消息中代表系统提示的 role_tag(默认:system,会覆盖 system column)"
}
}
```
## Alpaca 格式
### 指令监督微调数据集
- [样例数据集](alpaca_zh_demo.json)
在指令监督微调时,`instruction` 列对应的内容会与 `input` 列对应的内容拼接后作为提示词,即提示词为 `instruction\ninput`。而 `output` 列对应的内容为模型回答。
对于推理类模型的微调,如果数据集包含思维链,则需要把思维链放在模型回答中,例如 `<think>cot</think>output`。
如果指定,`system` 列对应的内容将被作为系统提示词。
`history` 列是由多个字符串二元组构成的列表,分别代表历史消息中每轮对话的指令和回答。注意在指令监督微调时,历史消息中的回答内容**也会被用于模型学习**。
```json
[
{
"instruction": "用户指令(必填)",
"input": "用户输入(选填)",
"output": "模型回答(必填)",
"system": "系统提示词(选填)",
"history": [
["第一轮指令(选填)", "第一轮回答(选填)"],
["第二轮指令(选填)", "第二轮回答(选填)"]
]
}
]
```
对于上述格式的数据,`dataset_info.json` 中的*数据集描述*应为:
```json
"数据集名称": {
"file_name": "data.json",
"columns": {
"prompt": "instruction",
"query": "input",
"response": "output",
"system": "system",
"history": "history"
}
}
```
> [!TIP]
> 如果模型本身具备推理能力(如 Qwen3)而数据集不包含思维链,LLaMA-Factory 会自动为数据添加空思维链。当 `enable_thinking` 为 `True` 时(慢思考,默认),空思维链会添加到模型回答中并且计算损失,否则会添加到用户指令中并且不计算损失(快思考)。请在训练和推理时保持 `enable_thinking` 参数一致。
>
> 如果您希望训练包含思维链的数据时使用慢思考,训练不包含思维链的数据时使用快思考,可以设置 `enable_thinking` 为 `None`。但该功能较为复杂,请谨慎使用。
### 预训练数据集
- [样例数据集](c4_demo.jsonl)
在预训练时,只有 `text` 列中的内容会用于模型学习。
```json
[
{"text": "document"},
{"text": "document"}
]
```
对于上述格式的数据,`dataset_info.json` 中的*数据集描述*应为:
```json
"数据集名称": {
"file_name": "data.json",
"columns": {
"prompt": "text"
}
}
```
### 偏好数据集
偏好数据集用于奖励模型训练、DPO 训练、ORPO 训练和 SimPO 训练。
它需要在 `chosen` 列中提供更优的回答,并在 `rejected` 列中提供更差的回答。
```json
[
{
"instruction": "用户指令(必填)",
"input": "用户输入(选填)",
"chosen": "优质回答(必填)",
"rejected": "劣质回答(必填)"
}
]
```
对于上述格式的数据,`dataset_info.json` 中的*数据集描述*应为:
```json
"数据集名称": {
"file_name": "data.json",
"ranking": true,
"columns": {
"prompt": "instruction",
"query": "input",
"chosen": "chosen",
"rejected": "rejected"
}
}
```
### KTO 数据集
KTO 数据集需要提供额外的 `kto_tag` 列。详情请参阅 [sharegpt](#sharegpt-格式)。
### 多模态图像数据集
多模态图像数据集需要提供额外的 `images` 列。详情请参阅 [sharegpt](#sharegpt-格式)。
### 多模态视频数据集
多模态视频数据集需要提供额外的 `videos` 列。详情请参阅 [sharegpt](#sharegpt-格式)。
### 多模态音频数据集
多模态音频数据集需要提供额外的 `audios` 列。详情请参阅 [sharegpt](#sharegpt-格式)。
## Sharegpt 格式
### 指令监督微调数据集
- [样例数据集](glaive_toolcall_zh_demo.json)
相比 alpaca 格式的数据集,sharegpt 格式支持**更多的角色种类**,例如 human、gpt、observation、function 等等。它们构成一个对象列表呈现在 `conversations` 列中。
注意其中 human 和 observation 必须出现在奇数位置,gpt 和 function 必须出现在偶数位置。默认所有的 gpt 和 function 会被用于学习。
```json
[
{
"conversations": [
{
"from": "human",
"value": "用户指令"
},
{
"from": "function_call",
"value": "工具参数"
},
{
"from": "observation",
"value": "工具结果"
},
{
"from": "gpt",
"value": "模型回答"
}
],
"system": "系统提示词(选填)",
"tools": "工具描述(选填)"
}
]
```
对于上述格式的数据,`dataset_info.json` 中的*数据集描述*应为:
```json
"数据集名称": {
"file_name": "data.json",
"formatting": "sharegpt",
"columns": {
"messages": "conversations",
"system": "system",
"tools": "tools"
}
}
```
### 预训练数据集
尚不支持,请使用 [alpaca](#alpaca-格式) 格式。
### 偏好数据集
- [样例数据集](
gitextract_v77f1c8q/
├── .dockerignore
├── .gitattributes
├── .github/
│ ├── CODE_OF_CONDUCT.md
│ ├── CONTRIBUTING.md
│ ├── ISSUE_TEMPLATE/
│ │ ├── 1-bug-report.yml
│ │ ├── 2-feature-request.yml
│ │ └── config.yml
│ ├── PULL_REQUEST_TEMPLATE.md
│ ├── SECURITY.md
│ ├── copilot-instructions.md
│ ├── instructions-v0.md
│ ├── instructions-v1.md
│ └── workflows/
│ ├── docker.yml
│ ├── docs.yml
│ ├── label_issue.yml
│ ├── publish.yml
│ ├── tests.yml
│ ├── tests_cuda.yml
│ └── tests_npu.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CITATION.cff
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── README_zh.md
├── data/
│ ├── README.md
│ ├── README_zh.md
│ ├── alpaca_en_demo.json
│ ├── alpaca_zh_demo.json
│ ├── c4_demo.jsonl
│ ├── dataset_info.json
│ ├── dpo_en_demo.json
│ ├── dpo_zh_demo.json
│ ├── glaive_toolcall_en_demo.json
│ ├── glaive_toolcall_zh_demo.json
│ ├── identity.json
│ ├── kto_en_demo.json
│ ├── mllm_audio_demo.json
│ ├── mllm_demo.json
│ ├── mllm_demo_data/
│ │ └── 3.flac
│ ├── mllm_video_audio_demo.json
│ ├── mllm_video_demo.json
│ ├── reason_tool_use_demo_50.jsonl
│ ├── v1_dpo_demo.jsonl
│ ├── v1_dpo_demo.yaml
│ ├── v1_sft_demo.jsonl
│ ├── v1_sft_demo.yaml
│ └── wiki_demo.txt
├── docker/
│ ├── docker-cuda/
│ │ ├── Dockerfile
│ │ ├── Dockerfile.base
│ │ ├── Dockerfile.megatron
│ │ ├── README.md
│ │ └── docker-compose.yml
│ ├── docker-npu/
│ │ ├── Dockerfile
│ │ └── docker-compose.yml
│ └── docker-rocm/
│ ├── Dockerfile
│ └── docker-compose.yml
├── docs/
│ ├── Makefile
│ ├── _static/
│ │ ├── css/
│ │ │ └── lang-switcher.css
│ │ └── js/
│ │ └── switcher.js
│ ├── conf.py
│ ├── en/
│ │ ├── advanced/
│ │ │ ├── custom-kernels/
│ │ │ │ ├── custom-kernels.md
│ │ │ │ ├── fused-operators.md
│ │ │ │ └── triton.md
│ │ │ ├── distributed/
│ │ │ │ ├── deepspeed.md
│ │ │ │ ├── fsdp.md
│ │ │ │ └── parallel-dp-tp-ep-sp-cp.md
│ │ │ └── lora-and-quantization/
│ │ │ ├── lora.md
│ │ │ └── quantization.md
│ │ ├── conf.py
│ │ ├── data-preparation/
│ │ │ └── data-processing.md
│ │ ├── dev-guide/
│ │ │ ├── core/
│ │ │ │ ├── data-engine.md
│ │ │ │ ├── model-engine.md
│ │ │ │ └── trainer.md
│ │ │ └── plugins/
│ │ │ ├── data-plugins.md
│ │ │ └── model-plugins/
│ │ │ ├── initialization.md
│ │ │ ├── kernels.md
│ │ │ └── rendering.md
│ │ ├── getting-started.md
│ │ ├── hyperparameters/
│ │ │ ├── data-argument.md
│ │ │ ├── model-argument.md
│ │ │ ├── sample-argument.md
│ │ │ └── training-argument.md
│ │ ├── index.rst
│ │ ├── inference/
│ │ │ └── deploy.md
│ │ ├── installation.md
│ │ ├── llamaboard-web-ui.md
│ │ └── training/
│ │ ├── dpo.md
│ │ └── sft.md
│ ├── make.bat
│ ├── requirements.txt
│ └── zh/
│ ├── advanced/
│ │ ├── custom-kernels/
│ │ │ ├── custom-kernels.md
│ │ │ ├── fused-operators.md
│ │ │ └── triton.md
│ │ ├── distributed/
│ │ │ ├── deepspeed.md
│ │ │ ├── fsdp.md
│ │ │ └── parallel-dp-tp-ep-sp-cp.md
│ │ └── lora-and-quantization/
│ │ ├── lora.md
│ │ └── quantization.md
│ ├── conf.py
│ ├── data-preparation/
│ │ └── data-processing.md
│ ├── dev-guide/
│ │ ├── core/
│ │ │ ├── data-engine.md
│ │ │ ├── model-engine.md
│ │ │ └── trainer.md
│ │ └── plugins/
│ │ ├── data-plugins.md
│ │ └── model-plugins/
│ │ ├── initialization.md
│ │ ├── kernels.md
│ │ └── rendering.md
│ ├── getting-started.md
│ ├── hyperparameters/
│ │ ├── data-argument.md
│ │ ├── model-argument.md
│ │ ├── sample-argument.md
│ │ └── training-argument.md
│ ├── index.rst
│ ├── inference/
│ │ └── deploy.md
│ ├── installation.md
│ ├── llamaboard-web-ui.md
│ └── training/
│ ├── dpo.md
│ └── sft.md
├── examples/
│ ├── README.md
│ ├── README_zh.md
│ ├── accelerate/
│ │ ├── fsdp2_config.yaml
│ │ ├── fsdp_config.yaml
│ │ ├── fsdp_config_multiple_nodes.yaml
│ │ └── fsdp_config_offload.yaml
│ ├── ascend/
│ │ ├── qwen3_full_sft_fsdp2.yaml
│ │ ├── qwen3moe_full_sft_fsdp.yaml
│ │ ├── qwen3vlmoe_full_sft_fsdp2.yaml
│ │ └── qwen3vlmoe_lora_sft_fsdp.yaml
│ ├── deepspeed/
│ │ ├── ds_z0_config.json
│ │ ├── ds_z2_autotp_config.json
│ │ ├── ds_z2_config.json
│ │ ├── ds_z2_offload_config.json
│ │ ├── ds_z3_config.json
│ │ ├── ds_z3_fp8_config.json
│ │ └── ds_z3_offload_config.json
│ ├── extras/
│ │ ├── adam_mini/
│ │ │ └── qwen2_full_sft.yaml
│ │ ├── apollo/
│ │ │ └── llama3_full_sft.yaml
│ │ ├── asft/
│ │ │ ├── llama2_full_asft.yaml
│ │ │ └── qwen2_full_asft.yaml
│ │ ├── badam/
│ │ │ └── llama3_full_sft.yaml
│ │ ├── dft/
│ │ │ └── qwen2_full_sft.yaml
│ │ ├── eaft/
│ │ │ └── qwen25_05b_eaft_full.yaml
│ │ ├── fp8/
│ │ │ ├── llama3_fp8_deepspeed_sft.yaml
│ │ │ └── llama3_fp8_fsdp_sft.yaml
│ │ ├── fsdp_qlora/
│ │ │ ├── llama3_lora_sft.yaml
│ │ │ └── train.sh
│ │ ├── galore/
│ │ │ └── llama3_full_sft.yaml
│ │ ├── llama_pro/
│ │ │ ├── expand.sh
│ │ │ └── llama3_freeze_sft.yaml
│ │ ├── loraplus/
│ │ │ └── llama3_lora_sft.yaml
│ │ ├── mod/
│ │ │ └── llama3_full_sft.yaml
│ │ ├── multi_tokens/
│ │ │ └── tokens_cfg.yaml
│ │ ├── muon/
│ │ │ └── qwen2_full_sft.yaml
│ │ ├── nlg_eval/
│ │ │ └── llama3_lora_predict.yaml
│ │ ├── oft/
│ │ │ ├── llama3_oft_sft.yaml
│ │ │ └── qwen2_5vl_oft_sft.yaml
│ │ ├── pissa/
│ │ │ ├── init.sh
│ │ │ └── llama3_lora_sft.yaml
│ │ └── qoft/
│ │ ├── llama3_oft_sft_awq.yaml
│ │ ├── llama3_oft_sft_bnb_npu.yaml
│ │ └── llama3_oft_sft_gptq.yaml
│ ├── inference/
│ │ ├── qwen3.yaml
│ │ ├── qwen3_full_sft.yaml
│ │ ├── qwen3_lora_sft.yaml
│ │ └── qwen3vl.yaml
│ ├── ktransformers/
│ │ ├── infer_lora/
│ │ │ ├── deepseek2_lora_sft_kt.yaml
│ │ │ ├── deepseek3_kt.yaml
│ │ │ ├── deepseek3_lora_sft_kt.yaml
│ │ │ └── qwen3moe_lora_sft_kt.yaml
│ │ ├── kt_optimize_rules/
│ │ │ ├── DeepSeek-V2-Chat-sft-amx.yaml
│ │ │ ├── DeepSeek-V2-Chat.yaml
│ │ │ ├── DeepSeek-V2-Lite-Chat-sft-amx-multi-gpu.yaml
│ │ │ ├── DeepSeek-V2-Lite-Chat-sft-amx.yaml
│ │ │ ├── DeepSeek-V2-Lite-Chat-sft.yaml
│ │ │ ├── DeepSeek-V2-Lite-Chat.yaml
│ │ │ ├── DeepSeek-V3-Chat-amx.yaml
│ │ │ ├── DeepSeek-V3-Chat-sft-amx-multi-gpu-4.yaml
│ │ │ ├── DeepSeek-V3-Chat-sft-amx-multi-gpu.yaml
│ │ │ ├── DeepSeek-V3-Chat-sft-amx.yaml
│ │ │ └── Qwen3Moe-sft-amx.yaml
│ │ └── train_lora/
│ │ ├── deepseek2_lora_sft_kt.yaml
│ │ ├── deepseek3_lora_sft_kt.yaml
│ │ └── qwen3moe_lora_sft_kt.yaml
│ ├── megatron/
│ │ ├── qwen2_vl_full.yaml
│ │ └── qwen3_moe_full.yaml
│ ├── merge_lora/
│ │ ├── qwen3_full_sft.yaml
│ │ ├── qwen3_gptq.yaml
│ │ ├── qwen3_lora_sft.yaml
│ │ └── qwen3vl_lora_sft.yaml
│ ├── train_full/
│ │ ├── qwen3_full_sft.yaml
│ │ └── qwen3vl_full_sft.yaml
│ ├── train_lora/
│ │ ├── qwen3_lora_dpo.yaml
│ │ ├── qwen3_lora_kto.yaml
│ │ ├── qwen3_lora_pretrain.yaml
│ │ ├── qwen3_lora_reward.yaml
│ │ ├── qwen3_lora_sft.sh
│ │ ├── qwen3_lora_sft.yaml
│ │ ├── qwen3_lora_sft_ds3.yaml
│ │ ├── qwen3_lora_sft_ray.yaml
│ │ ├── qwen3_preprocess.yaml
│ │ ├── qwen3vl_lora_dpo.yaml
│ │ └── qwen3vl_lora_sft.yaml
│ ├── train_qlora/
│ │ ├── llama3_lora_sft_aqlm.yaml
│ │ ├── llama3_lora_sft_awq.yaml
│ │ ├── llama3_lora_sft_gptq.yaml
│ │ ├── qwen3_lora_sft_bnb_npu.yaml
│ │ └── qwen3_lora_sft_otfq.yaml
│ └── v1/
│ ├── train_freeze/
│ │ └── train_freeze_sft.yaml
│ ├── train_full/
│ │ ├── train_full_deepspeed.yaml
│ │ └── train_full_fsdp2.yaml
│ ├── train_lora/
│ │ ├── export_lora.yaml
│ │ └── train_lora_sft.yaml
│ └── train_qlora/
│ └── quantization.yaml
├── pyproject.toml
├── requirements/
│ ├── adam-mini.txt
│ ├── apollo.txt
│ ├── aqlm.txt
│ ├── badam.txt
│ ├── bitsandbytes.txt
│ ├── deepspeed.txt
│ ├── dev.txt
│ ├── eetq.txt
│ ├── fp8-te.txt
│ ├── fp8.txt
│ ├── galore.txt
│ ├── gptq.txt
│ ├── hqq.txt
│ ├── liger-kernel.txt
│ ├── metrics.txt
│ ├── minicpm-v.txt
│ ├── npu.txt
│ ├── openmind.txt
│ ├── sglang.txt
│ ├── swanlab.txt
│ └── vllm.txt
├── scripts/
│ ├── api_example/
│ │ ├── test_image.py
│ │ └── test_toolcall.py
│ ├── bench_qwen.py
│ ├── convert_ckpt/
│ │ ├── llamafy_baichuan2.py
│ │ ├── llamafy_qwen.py
│ │ ├── tiny_llama4.py
│ │ └── tiny_qwen3.py
│ ├── eval_bleu_rouge.py
│ ├── hf2dcp.py
│ ├── llama_pro.py
│ ├── loftq_init.py
│ ├── megatron_merge.py
│ ├── pissa_init.py
│ ├── qwen_omni_merge.py
│ ├── stat_utils/
│ │ ├── cal_flops.py
│ │ ├── cal_lr.py
│ │ ├── cal_mfu.py
│ │ ├── cal_ppl.py
│ │ └── length_cdf.py
│ └── vllm_infer.py
├── src/
│ ├── api.py
│ ├── llamafactory/
│ │ ├── __init__.py
│ │ ├── api/
│ │ │ ├── __init__.py
│ │ │ ├── app.py
│ │ │ ├── chat.py
│ │ │ ├── common.py
│ │ │ └── protocol.py
│ │ ├── chat/
│ │ │ ├── __init__.py
│ │ │ ├── base_engine.py
│ │ │ ├── chat_model.py
│ │ │ ├── hf_engine.py
│ │ │ ├── kt_engine.py
│ │ │ ├── sglang_engine.py
│ │ │ └── vllm_engine.py
│ │ ├── cli.py
│ │ ├── data/
│ │ │ ├── __init__.py
│ │ │ ├── collator.py
│ │ │ ├── converter.py
│ │ │ ├── data_utils.py
│ │ │ ├── formatter.py
│ │ │ ├── loader.py
│ │ │ ├── mm_plugin.py
│ │ │ ├── parser.py
│ │ │ ├── processor/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── feedback.py
│ │ │ │ ├── pairwise.py
│ │ │ │ ├── pretrain.py
│ │ │ │ ├── processor_utils.py
│ │ │ │ ├── supervised.py
│ │ │ │ └── unsupervised.py
│ │ │ ├── template.py
│ │ │ └── tool_utils.py
│ │ ├── eval/
│ │ │ ├── __init__.py
│ │ │ ├── evaluator.py
│ │ │ └── template.py
│ │ ├── extras/
│ │ │ ├── __init__.py
│ │ │ ├── constants.py
│ │ │ ├── env.py
│ │ │ ├── logging.py
│ │ │ ├── misc.py
│ │ │ ├── packages.py
│ │ │ └── ploting.py
│ │ ├── hparams/
│ │ │ ├── __init__.py
│ │ │ ├── data_args.py
│ │ │ ├── evaluation_args.py
│ │ │ ├── finetuning_args.py
│ │ │ ├── generating_args.py
│ │ │ ├── model_args.py
│ │ │ ├── parser.py
│ │ │ └── training_args.py
│ │ ├── launcher.py
│ │ ├── model/
│ │ │ ├── __init__.py
│ │ │ ├── adapter.py
│ │ │ ├── loader.py
│ │ │ ├── model_utils/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── attention.py
│ │ │ │ ├── checkpointing.py
│ │ │ │ ├── embedding.py
│ │ │ │ ├── ktransformers.py
│ │ │ │ ├── kv_cache.py
│ │ │ │ ├── liger_kernel.py
│ │ │ │ ├── longlora.py
│ │ │ │ ├── misc.py
│ │ │ │ ├── mod.py
│ │ │ │ ├── moe.py
│ │ │ │ ├── packing.py
│ │ │ │ ├── quantization.py
│ │ │ │ ├── rope.py
│ │ │ │ ├── unsloth.py
│ │ │ │ ├── valuehead.py
│ │ │ │ └── visual.py
│ │ │ └── patcher.py
│ │ ├── third_party/
│ │ │ ├── __init__.py
│ │ │ └── muon/
│ │ │ ├── __init__.py
│ │ │ └── muon.py
│ │ ├── train/
│ │ │ ├── __init__.py
│ │ │ ├── callbacks.py
│ │ │ ├── dpo/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── ktrainer.py
│ │ │ │ ├── trainer.py
│ │ │ │ └── workflow.py
│ │ │ ├── fp8_utils.py
│ │ │ ├── kto/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── trainer.py
│ │ │ │ └── workflow.py
│ │ │ ├── mca/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── trainer.py
│ │ │ │ └── workflow.py
│ │ │ ├── ppo/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── ppo_utils.py
│ │ │ │ ├── trainer.py
│ │ │ │ └── workflow.py
│ │ │ ├── pt/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── trainer.py
│ │ │ │ └── workflow.py
│ │ │ ├── rm/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── metric.py
│ │ │ │ ├── trainer.py
│ │ │ │ └── workflow.py
│ │ │ ├── sft/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── metric.py
│ │ │ │ ├── trainer.py
│ │ │ │ └── workflow.py
│ │ │ ├── test_utils.py
│ │ │ ├── trainer_utils.py
│ │ │ └── tuner.py
│ │ ├── v1/
│ │ │ ├── __init__.py
│ │ │ ├── accelerator/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── helper.py
│ │ │ │ ├── interface.py
│ │ │ │ └── profiler.py
│ │ │ ├── config/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── arg_parser.py
│ │ │ │ ├── arg_utils.py
│ │ │ │ ├── data_args.py
│ │ │ │ ├── model_args.py
│ │ │ │ ├── sample_args.py
│ │ │ │ └── training_args.py
│ │ │ ├── core/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base_sampler.py
│ │ │ │ ├── base_trainer.py
│ │ │ │ ├── data_engine.py
│ │ │ │ ├── model_engine.py
│ │ │ │ └── utils/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── batching.py
│ │ │ │ ├── callback.py
│ │ │ │ ├── inference_engine.py
│ │ │ │ └── rendering.py
│ │ │ ├── launcher.py
│ │ │ ├── plugins/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── data_plugins/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── converter.py
│ │ │ │ │ └── loader.py
│ │ │ │ ├── model_plugins/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── add_token.py
│ │ │ │ │ ├── initialization.py
│ │ │ │ │ ├── kernels/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── base.py
│ │ │ │ │ │ ├── interface.py
│ │ │ │ │ │ ├── ops/
│ │ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ │ ├── mlp/
│ │ │ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ │ │ ├── npu_fused_moe.py
│ │ │ │ │ │ │ │ └── npu_swiglu.py
│ │ │ │ │ │ │ ├── rms_norm/
│ │ │ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ │ │ └── npu_rms_norm.py
│ │ │ │ │ │ │ └── rope/
│ │ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ │ └── npu_rope.py
│ │ │ │ │ │ └── registry.py
│ │ │ │ │ ├── peft.py
│ │ │ │ │ ├── quantization.py
│ │ │ │ │ ├── rendering.py
│ │ │ │ │ └── templates/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── qwen3.py
│ │ │ │ │ └── qwen3_nothink.py
│ │ │ │ ├── sampler_plugins/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── vllm.py
│ │ │ │ └── trainer_plugins/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── batching.py
│ │ │ │ ├── distributed/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── deepspeed.py
│ │ │ │ │ ├── fsdp2.py
│ │ │ │ │ └── hub.py
│ │ │ │ ├── lr_scheduler.py
│ │ │ │ └── optimizer.py
│ │ │ ├── samplers/
│ │ │ │ └── cli_sampler.py
│ │ │ ├── trainers/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── dpo_trainer.py
│ │ │ │ ├── rm_trainer.py
│ │ │ │ └── sft_trainer.py
│ │ │ └── utils/
│ │ │ ├── __init__.py
│ │ │ ├── constants.py
│ │ │ ├── dtype.py
│ │ │ ├── env.py
│ │ │ ├── helper.py
│ │ │ ├── logging.py
│ │ │ ├── objects.py
│ │ │ ├── packages.py
│ │ │ ├── plugin.py
│ │ │ ├── pytest.py
│ │ │ └── types.py
│ │ └── webui/
│ │ ├── __init__.py
│ │ ├── chatter.py
│ │ ├── common.py
│ │ ├── components/
│ │ │ ├── __init__.py
│ │ │ ├── chatbot.py
│ │ │ ├── data.py
│ │ │ ├── eval.py
│ │ │ ├── export.py
│ │ │ ├── footer.py
│ │ │ ├── infer.py
│ │ │ ├── top.py
│ │ │ └── train.py
│ │ ├── control.py
│ │ ├── css.py
│ │ ├── engine.py
│ │ ├── interface.py
│ │ ├── locales.py
│ │ ├── manager.py
│ │ └── runner.py
│ ├── train.py
│ └── webui.py
├── tests/
│ ├── check_license.py
│ ├── conftest.py
│ ├── data/
│ │ ├── processor/
│ │ │ ├── test_feedback.py
│ │ │ ├── test_pairwise.py
│ │ │ ├── test_processor_utils.py
│ │ │ ├── test_supervised.py
│ │ │ └── test_unsupervised.py
│ │ ├── test_collator.py
│ │ ├── test_converter.py
│ │ ├── test_formatter.py
│ │ ├── test_loader.py
│ │ ├── test_mm_plugin.py
│ │ └── test_template.py
│ ├── e2e/
│ │ ├── test_chat.py
│ │ ├── test_sglang.py
│ │ └── test_train.py
│ ├── eval/
│ │ └── test_eval_template.py
│ ├── model/
│ │ ├── model_utils/
│ │ │ ├── test_add_tokens.py
│ │ │ ├── test_attention.py
│ │ │ ├── test_checkpointing.py
│ │ │ ├── test_misc.py
│ │ │ ├── test_packing.py
│ │ │ └── test_visual.py
│ │ ├── test_base.py
│ │ ├── test_freeze.py
│ │ ├── test_full.py
│ │ ├── test_lora.py
│ │ └── test_pissa.py
│ ├── train/
│ │ └── test_sft_trainer.py
│ └── version.txt
└── tests_v1/
├── accelerator/
│ └── test_interface.py
├── config/
│ └── test_args_parser.py
├── conftest.py
├── core/
│ ├── test_data_engine.py
│ ├── test_model_loader.py
│ └── utils/
│ ├── test_batching.py
│ └── test_rendering.py
├── plugins/
│ ├── data_plugins/
│ │ └── test_converter.py
│ ├── model_plugins/
│ │ ├── test_init_plugin.py
│ │ ├── test_kernel_plugin.py
│ │ ├── test_peft.py
│ │ └── test_quantization_plugin.py
│ └── trainer_plugins/
│ └── distributed/
│ └── test_fsdp2.py
├── sampler/
│ └── test_cli_sampler.py
└── trainers/
└── test_fsdp2_sft_trainer.py
SYMBOL INDEX (1376 symbols across 213 files)
FILE: docs/_static/js/switcher.js
function buildSwitcher (line 9) | function buildSwitcher() {
function hideOtherLanguageToc (line 41) | function hideOtherLanguageToc() {
FILE: scripts/api_example/test_image.py
function main (line 24) | def main():
FILE: scripts/api_example/test_toolcall.py
function calculate_gpa (line 25) | def calculate_gpa(grades: list[str], hours: list[int]) -> float:
function main (line 34) | def main():
FILE: scripts/bench_qwen.py
class DummyDataset (line 32) | class DummyDataset(Dataset):
method __init__ (line 33) | def __init__(self, size: int = 1000, seq_length: int = 1024, processor...
method __len__ (line 54) | def __len__(self):
method __getitem__ (line 57) | def __getitem__(self, index: int):
class MultiModalDataCollatorForSeq2Seq (line 79) | class MultiModalDataCollatorForSeq2Seq(DataCollatorForSeq2Seq):
method __post_init__ (line 80) | def __post_init__(self):
method __call__ (line 91) | def __call__(self, features: list[dict[str, Any]]) -> dict[str, "torch...
function bench_qwen (line 119) | def bench_qwen(
FILE: scripts/convert_ckpt/llamafy_baichuan2.py
function save_weight (line 31) | def save_weight(input_dir: str, output_dir: str, shard_size: str, save_s...
function save_config (line 76) | def save_config(input_dir: str, output_dir: str):
function llamafy_baichuan2 (line 91) | def llamafy_baichuan2(
FILE: scripts/convert_ckpt/llamafy_qwen.py
function save_weight (line 39) | def save_weight(input_dir: str, output_dir: str, shard_size: str, save_s...
function save_config (line 113) | def save_config(input_dir: str, output_dir: str, torch_dtype: str):
function llamafy_qwen (line 144) | def llamafy_qwen(
FILE: scripts/eval_bleu_rouge.py
function compute_metrics (line 35) | def compute_metrics(sample):
function main (line 61) | def main(filename: str):
FILE: scripts/hf2dcp.py
function convert (line 31) | def convert(hf_path: str, dcp_path: str) -> None:
function help (line 49) | def help() -> None:
FILE: scripts/llama_pro.py
function change_name (line 36) | def change_name(name: str, old_index: int, new_index: int) -> str:
function block_expansion (line 40) | def block_expansion(
FILE: scripts/loftq_init.py
function quantize_loftq (line 30) | def quantize_loftq(
FILE: scripts/megatron_merge.py
function convert_mca_to_hf (line 31) | def convert_mca_to_hf(
function convert (line 64) | def convert(
function main (line 125) | def main():
FILE: scripts/pissa_init.py
function quantize_pissa (line 30) | def quantize_pissa(
FILE: scripts/qwen_omni_merge.py
function merge_lora (line 36) | def merge_lora(
function save_full_model (line 90) | def save_full_model(
FILE: scripts/stat_utils/cal_flops.py
function calculate_flops (line 26) | def calculate_flops(
FILE: scripts/stat_utils/cal_lr.py
function calculate_lr (line 37) | def calculate_lr(
FILE: scripts/stat_utils/cal_mfu.py
function compute_model_flops (line 29) | def compute_model_flops(
function compute_device_flops (line 86) | def compute_device_flops(world_size: int) -> float:
function calculate_mfu (line 101) | def calculate_mfu(
FILE: scripts/stat_utils/cal_ppl.py
class PairwiseDataCollatorWithPadding (line 32) | class PairwiseDataCollatorWithPadding(MultiModalDataCollatorForSeq2Seq):
method __call__ (line 37) | def __call__(self, features: list[dict[str, Any]]) -> dict[str, torch....
function calculate_ppl (line 55) | def calculate_ppl(
FILE: scripts/stat_utils/length_cdf.py
function length_cdf (line 25) | def length_cdf(
FILE: scripts/vllm_infer.py
function _need_video_kwargs (line 39) | def _need_video_kwargs(template):
function vllm_infer (line 47) | def vllm_infer(
FILE: src/api.py
function main (line 23) | def main():
FILE: src/llamafactory/api/app.py
function sweeper (line 54) | async def sweeper() -> None:
function lifespan (line 61) | async def lifespan(app: "FastAPI", chat_model: "ChatModel"): # collects...
function create_app (line 69) | def create_app(chat_model: "ChatModel") -> "FastAPI":
function run_api (line 127) | def run_api() -> None:
FILE: src/llamafactory/api/chat.py
function _process_request (line 73) | def _process_request(
function _create_stream_chat_completion_chunk (line 181) | def _create_stream_chat_completion_chunk(
function create_chat_completion_response (line 193) | async def create_chat_completion_response(
function create_stream_chat_completion_response (line 247) | async def create_stream_chat_completion_response(
function create_score_evaluation_response (line 286) | async def create_score_evaluation_response(
FILE: src/llamafactory/api/common.py
function dictify (line 38) | def dictify(data: "BaseModel") -> dict[str, Any]:
function jsonify (line 45) | def jsonify(data: "BaseModel") -> str:
function check_lfi_path (line 52) | def check_lfi_path(path: str) -> None:
function check_ssrf_url (line 70) | def check_ssrf_url(url: str) -> None:
FILE: src/llamafactory/api/protocol.py
class Role (line 23) | class Role(StrEnum):
class Finish (line 32) | class Finish(StrEnum):
class ModelCard (line 38) | class ModelCard(BaseModel):
class ModelList (line 45) | class ModelList(BaseModel):
class Function (line 50) | class Function(BaseModel):
class FunctionDefinition (line 55) | class FunctionDefinition(BaseModel):
class FunctionAvailable (line 61) | class FunctionAvailable(BaseModel):
class FunctionCall (line 66) | class FunctionCall(BaseModel):
class URL (line 72) | class URL(BaseModel):
class MultimodalInputItem (line 77) | class MultimodalInputItem(BaseModel):
class ChatMessage (line 85) | class ChatMessage(BaseModel):
class ChatCompletionMessage (line 91) | class ChatCompletionMessage(BaseModel):
class ChatCompletionRequest (line 97) | class ChatCompletionRequest(BaseModel):
class ChatCompletionResponseChoice (line 111) | class ChatCompletionResponseChoice(BaseModel):
class ChatCompletionStreamResponseChoice (line 117) | class ChatCompletionStreamResponseChoice(BaseModel):
class ChatCompletionResponseUsage (line 123) | class ChatCompletionResponseUsage(BaseModel):
class ChatCompletionResponse (line 129) | class ChatCompletionResponse(BaseModel):
class ChatCompletionStreamResponse (line 138) | class ChatCompletionStreamResponse(BaseModel):
class ScoreEvaluationRequest (line 146) | class ScoreEvaluationRequest(BaseModel):
class ScoreEvaluationResponse (line 152) | class ScoreEvaluationResponse(BaseModel):
FILE: src/llamafactory/chat/base_engine.py
class Response (line 32) | class Response:
class BaseEngine (line 39) | class BaseEngine(ABC):
method __init__ (line 53) | def __init__(
method chat (line 64) | async def chat(
method stream_chat (line 78) | async def stream_chat(
method get_scores (line 92) | async def get_scores(
FILE: src/llamafactory/chat/chat_model.py
function _start_background_loop (line 34) | def _start_background_loop(loop: "asyncio.AbstractEventLoop") -> None:
class ChatModel (line 39) | class ChatModel:
method __init__ (line 47) | def __init__(self, args: Optional[dict[str, Any]] = None) -> None:
method chat (line 91) | def chat(
method achat (line 107) | async def achat(
method stream_chat (line 120) | def stream_chat(
method astream_chat (line 139) | async def astream_chat(
method get_scores (line 155) | def get_scores(
method aget_scores (line 164) | async def aget_scores(
function run_chat (line 173) | def run_chat() -> None:
FILE: src/llamafactory/chat/hf_engine.py
class HuggingfaceEngine (line 44) | class HuggingfaceEngine(BaseEngine):
method __init__ (line 45) | def __init__(
method _process_args (line 73) | def _process_args(
method _chat (line 212) | def _chat(
method _stream_chat (line 267) | def _stream_chat(
method _get_scores (line 314) | def _get_scores(
method chat (line 335) | async def chat(
method stream_chat (line 366) | async def stream_chat(
method get_scores (line 402) | async def get_scores(
FILE: src/llamafactory/chat/kt_engine.py
class KTransformersEngine (line 51) | class KTransformersEngine(BaseEngine):
method __init__ (line 52) | def __init__(
method _get_scores (line 88) | def _get_scores(
method _generate (line 108) | async def _generate(
method chat (line 220) | async def chat(
method stream_chat (line 254) | async def stream_chat(
method get_scores (line 275) | async def get_scores(
FILE: src/llamafactory/chat/sglang_engine.py
class SGLangEngine (line 46) | class SGLangEngine(BaseEngine):
method __init__ (line 58) | def __init__(
method _cleanup_server (line 130) | def _cleanup_server(self):
method _generate (line 140) | async def _generate(
method chat (line 232) | async def chat(
method stream_chat (line 258) | async def stream_chat(
method get_scores (line 276) | async def get_scores(
method __del__ (line 283) | def __del__(self):
FILE: src/llamafactory/chat/vllm_engine.py
class VllmEngine (line 46) | class VllmEngine(BaseEngine):
method __init__ (line 47) | def __init__(
method _generate (line 111) | async def _generate(
method chat (line 216) | async def chat(
method stream_chat (line 245) | async def stream_chat(
method get_scores (line 263) | async def get_scores(
FILE: src/llamafactory/cli.py
function main (line 16) | def main():
FILE: src/llamafactory/data/collator.py
function prepare_4d_attention_mask (line 42) | def prepare_4d_attention_mask(attention_mask_with_indices: "torch.Tensor...
class MultiModalDataCollatorForSeq2Seq (line 86) | class MultiModalDataCollatorForSeq2Seq(DataCollatorForSeq2Seq):
method __post_init__ (line 95) | def __post_init__(self):
method __call__ (line 109) | def __call__(self, features: list[dict[str, Any]]) -> dict[str, "torch...
class SFTDataCollatorWith4DAttentionMask (line 258) | class SFTDataCollatorWith4DAttentionMask(MultiModalDataCollatorForSeq2Seq):
method __call__ (line 265) | def __call__(self, features: list[dict[str, Any]]) -> dict[str, "torch...
class PairwiseDataCollatorWithPadding (line 278) | class PairwiseDataCollatorWithPadding(MultiModalDataCollatorForSeq2Seq):
method __call__ (line 281) | def __call__(self, features: list[dict[str, Any]]) -> dict[str, "torch...
class KTODataCollatorWithPadding (line 304) | class KTODataCollatorWithPadding(MultiModalDataCollatorForSeq2Seq):
method __call__ (line 307) | def __call__(self, features: list[dict[str, Any]]) -> dict[str, "torch...
FILE: src/llamafactory/data/converter.py
class DatasetConverter (line 39) | class DatasetConverter:
method _find_medias (line 43) | def _find_medias(self, medias: Union["MediaType", list["MediaType"], N...
method __call__ (line 79) | def __call__(self, example: dict[str, Any]) -> dict[str, Any]:
class AlpacaDatasetConverter (line 85) | class AlpacaDatasetConverter(DatasetConverter):
method __call__ (line 86) | def __call__(self, example: dict[str, Any]) -> dict[str, Any]:
class SharegptDatasetConverter (line 135) | class SharegptDatasetConverter(DatasetConverter):
method __call__ (line 136) | def __call__(self, example: dict[str, Any]) -> dict[str, Any]:
class OpenAIDatasetConverter (line 231) | class OpenAIDatasetConverter(DatasetConverter):
method __call__ (line 232) | def __call__(self, example: dict[str, Any]) -> dict[str, Any]:
function register_dataset_converter (line 377) | def register_dataset_converter(name: str, dataset_converter: type["Datas...
function get_dataset_converter (line 385) | def get_dataset_converter(name: str, dataset_attr: "DatasetAttr", data_a...
function align_dataset (line 393) | def align_dataset(
FILE: src/llamafactory/data/data_utils.py
class Role (line 38) | class Role(StrEnum):
class DatasetModule (line 46) | class DatasetModule(TypedDict):
function merge_dataset (line 51) | def merge_dataset(
function split_dataset (line 85) | def split_dataset(
function get_dataset_module (line 134) | def get_dataset_module(dataset: Union["Dataset", "DatasetDict"]) -> "Dat...
function setup_fs (line 158) | def setup_fs(path: str, anon: bool = False) -> "fsspec.AbstractFileSystem":
function _read_json_with_fs (line 174) | def _read_json_with_fs(fs: "fsspec.AbstractFileSystem", path: str) -> li...
function read_cloud_json (line 183) | def read_cloud_json(cloud_path: str) -> list[Any]:
FILE: src/llamafactory/data/formatter.py
class Formatter (line 27) | class Formatter(ABC):
method apply (line 32) | def apply(self, **kwargs) -> SLOTS:
method extract (line 36) | def extract(self, content: str) -> str | list["FunctionCall"]:
class EmptyFormatter (line 45) | class EmptyFormatter(Formatter):
method __post_init__ (line 46) | def __post_init__(self):
method apply (line 56) | def apply(self, **kwargs) -> SLOTS:
class StringFormatter (line 61) | class StringFormatter(Formatter):
method __post_init__ (line 62) | def __post_init__(self):
method apply (line 72) | def apply(self, **kwargs) -> SLOTS:
class FunctionFormatter (line 91) | class FunctionFormatter(StringFormatter):
method __post_init__ (line 92) | def __post_init__(self):
method apply (line 97) | def apply(self, **kwargs) -> SLOTS:
class ToolFormatter (line 144) | class ToolFormatter(Formatter):
method __post_init__ (line 145) | def __post_init__(self):
method apply (line 149) | def apply(self, **kwargs) -> SLOTS:
method extract (line 158) | def extract(self, content: str) -> str | list["FunctionCall"]:
FILE: src/llamafactory/data/loader.py
function _load_single_dataset (line 51) | def _load_single_dataset(
function _get_merged_dataset (line 164) | def _get_merged_dataset(
function _get_dataset_processor (line 189) | def _get_dataset_processor(
function _get_preprocessed_dataset (line 229) | def _get_preprocessed_dataset(
function get_dataset (line 276) | def get_dataset(
FILE: src/llamafactory/data/mm_plugin.py
class EncodedImage (line 65) | class EncodedImage(TypedDict):
class RegularizedImageOutput (line 73) | class RegularizedImageOutput(TypedDict):
class RegularizedVideoOutput (line 76) | class RegularizedVideoOutput(TypedDict):
class RegularizedAudioOutput (line 81) | class RegularizedAudioOutput(TypedDict):
class MMProcessor (line 85) | class MMProcessor(ProcessorMixin):
method _get_number_of_features (line 91) | def _get_number_of_features(self, orig_height: int, orig_width: int, h...
function _get_paligemma_token_type_ids (line 95) | def _get_paligemma_token_type_ids(imglens: list[int], seqlens: list[int]...
function _get_gemma3_token_type_ids (line 112) | def _get_gemma3_token_type_ids(batch_ids: list[list[int]], processor: "M...
function _make_batched_images (line 130) | def _make_batched_images(images: list["ImageObject"], imglens: list[int]...
function _check_video_is_nested_images (line 140) | def _check_video_is_nested_images(video: "VideoInput") -> bool:
class MMPluginMixin (line 146) | class MMPluginMixin:
method _validate_input (line 152) | def _validate_input(
method _validate_messages (line 194) | def _validate_messages(
method _preprocess_image (line 223) | def _preprocess_image(
method _get_video_sample_indices (line 242) | def _get_video_sample_indices(
method _regularize_images (line 254) | def _regularize_images(self, images: list["ImageInput"], **kwargs) -> ...
method _regularize_videos (line 275) | def _regularize_videos(self, videos: list["VideoInput"], **kwargs) -> ...
method _regularize_audios (line 306) | def _regularize_audios(
method _get_mm_inputs (line 327) | def _get_mm_inputs(
class BasePlugin (line 417) | class BasePlugin(MMPluginMixin):
method process_messages (line 418) | def process_messages(
method process_token_ids (line 430) | def process_token_ids(
method get_mm_inputs (line 444) | def get_mm_inputs(
class ErnieVLPlugin (line 473) | class ErnieVLPlugin(BasePlugin):
method process_messages (line 475) | def process_messages(
class Gemma3Plugin (line 524) | class Gemma3Plugin(BasePlugin):
method process_messages (line 526) | def process_messages(
method get_mm_inputs (line 565) | def get_mm_inputs(
class Gemma3nPlugin (line 583) | class Gemma3nPlugin(Gemma3Plugin):
method process_messages (line 585) | def process_messages(
class InternVLPlugin (line 617) | class InternVLPlugin(BasePlugin):
method _get_mm_inputs (line 619) | def _get_mm_inputs(
method process_messages (line 703) | def process_messages(
method get_mm_inputs (line 748) | def get_mm_inputs(
class KimiVLPlugin (line 767) | class KimiVLPlugin(BasePlugin):
method process_messages (line 769) | def process_messages(self, messages, images, videos, audios, processor):
class Llama4Plugin (line 799) | class Llama4Plugin(BasePlugin):
method process_messages (line 801) | def process_messages(
method get_mm_inputs (line 848) | def get_mm_inputs(
class LlavaPlugin (line 866) | class LlavaPlugin(BasePlugin):
method process_messages (line 868) | def process_messages(
class LlavaNextPlugin (line 902) | class LlavaNextPlugin(BasePlugin):
method process_messages (line 904) | def process_messages(
class LlavaNextVideoPlugin (line 942) | class LlavaNextVideoPlugin(BasePlugin):
method process_messages (line 944) | def process_messages(
class MiniCPMVPlugin (line 997) | class MiniCPMVPlugin(BasePlugin):
method _get_mm_inputs (line 999) | def _get_mm_inputs(
method process_messages (line 1071) | def process_messages(
method get_mm_inputs (line 1162) | def get_mm_inputs(
class MllamaPlugin (line 1231) | class MllamaPlugin(BasePlugin):
method process_messages (line 1233) | def process_messages(
method get_mm_inputs (line 1253) | def get_mm_inputs(
class PaliGemmaPlugin (line 1286) | class PaliGemmaPlugin(BasePlugin):
method process_messages (line 1288) | def process_messages(
method process_token_ids (line 1311) | def process_token_ids(
method get_mm_inputs (line 1332) | def get_mm_inputs(
class PixtralPlugin (line 1351) | class PixtralPlugin(BasePlugin):
method process_messages (line 1353) | def process_messages(
method get_mm_inputs (line 1398) | def get_mm_inputs(
class Qwen2AudioPlugin (line 1420) | class Qwen2AudioPlugin(BasePlugin):
method process_messages (line 1422) | def process_messages(
method get_mm_inputs (line 1459) | def get_mm_inputs(
class Qwen2VLPlugin (line 1475) | class Qwen2VLPlugin(BasePlugin):
method _preprocess_image (line 1480) | def _preprocess_image(self, image: "ImageObject", **kwargs) -> "ImageO...
method _regularize_videos (line 1497) | def _regularize_videos(self, videos: list["VideoInput"], **kwargs) -> ...
method _get_mm_inputs (line 1534) | def _get_mm_inputs(
method process_messages (line 1568) | def process_messages(
class Qwen3VLPlugin (line 1617) | class Qwen3VLPlugin(Qwen2VLPlugin):
method _get_mm_inputs (line 1619) | def _get_mm_inputs(
method process_messages (line 1664) | def process_messages(
class GLM4VPlugin (line 1740) | class GLM4VPlugin(Qwen2VLPlugin):
method _get_mm_inputs (line 1742) | def _get_mm_inputs(
method process_messages (line 1778) | def process_messages(
method get_mm_inputs (line 1853) | def get_mm_inputs(
class Qwen2OmniPlugin (line 1871) | class Qwen2OmniPlugin(Qwen2VLPlugin):
method _get_mm_inputs (line 1876) | def _get_mm_inputs(
method process_messages (line 1930) | def process_messages(
class VideoLlavaPlugin (line 2052) | class VideoLlavaPlugin(BasePlugin):
method process_messages (line 2054) | def process_messages(
class LFMVLPlugin (line 2105) | class LFMVLPlugin(BasePlugin):
method _get_mm_inputs (line 2114) | def _get_mm_inputs(
method process_messages (line 2133) | def process_messages(
class YoutuVLPlugin (line 2172) | class YoutuVLPlugin(BasePlugin):
method process_messages (line 2179) | def process_messages(
function register_mm_plugin (line 2231) | def register_mm_plugin(name: str, plugin_class: type["BasePlugin"]) -> N...
function get_mm_plugin (line 2239) | def get_mm_plugin(
FILE: src/llamafactory/data/parser.py
class DatasetAttr (line 27) | class DatasetAttr:
method __repr__ (line 66) | def __repr__(self) -> str:
method set_attr (line 69) | def set_attr(self, key: str, obj: dict[str, Any], default: Any | None ...
method join (line 72) | def join(self, attr: dict[str, Any]) -> None:
function get_dataset_list (line 93) | def get_dataset_list(dataset_names: list[str] | None, dataset_dir: str |...
FILE: src/llamafactory/data/processor/feedback.py
class FeedbackDatasetProcessor (line 30) | class FeedbackDatasetProcessor(DatasetProcessor):
method _encode_data_example (line 31) | def _encode_data_example(
method preprocess_dataset (line 85) | def preprocess_dataset(self, examples: dict[str, list[Any]]) -> dict[s...
method print_data_example (line 124) | def print_data_example(self, example: dict[str, list[int]]) -> None:
FILE: src/llamafactory/data/processor/pairwise.py
class PairwiseDatasetProcessor (line 30) | class PairwiseDatasetProcessor(DatasetProcessor):
method _encode_data_example (line 31) | def _encode_data_example(
method preprocess_dataset (line 71) | def preprocess_dataset(self, examples: dict[str, list[Any]]) -> dict[s...
method print_data_example (line 102) | def print_data_example(self, example: dict[str, list[int]]) -> None:
FILE: src/llamafactory/data/processor/pretrain.py
class PretrainDatasetProcessor (line 26) | class PretrainDatasetProcessor(DatasetProcessor):
method preprocess_dataset (line 27) | def preprocess_dataset(self, examples: dict[str, list[Any]]) -> dict[s...
method print_data_example (line 55) | def print_data_example(self, example: dict[str, list[int]]) -> None:
FILE: src/llamafactory/data/processor/processor_utils.py
class DatasetProcessor (line 29) | class DatasetProcessor(ABC):
method preprocess_dataset (line 38) | def preprocess_dataset(self, examples: dict[str, list[Any]]) -> dict[s...
method print_data_example (line 43) | def print_data_example(self, example: dict[str, list[int]]) -> None:
function search_for_fit (line 48) | def search_for_fit(numbers: list[int], capacity: int) -> int:
function greedy_knapsack (line 54) | def greedy_knapsack(numbers: list[int], capacity: int) -> list[list[int]]:
function infer_seqlen (line 76) | def infer_seqlen(source_len: int, target_len: int, cutoff_len: int) -> t...
FILE: src/llamafactory/data/processor/supervised.py
class SupervisedDatasetProcessor (line 32) | class SupervisedDatasetProcessor(DatasetProcessor):
method _encode_data_example (line 33) | def _encode_data_example(
method preprocess_dataset (line 88) | def preprocess_dataset(self, examples: dict[str, list[Any]]) -> dict[s...
method print_data_example (line 117) | def print_data_example(self, example: dict[str, list[int]]) -> None:
class PackedSupervisedDatasetProcessor (line 126) | class PackedSupervisedDatasetProcessor(SupervisedDatasetProcessor):
method preprocess_dataset (line 127) | def preprocess_dataset(self, examples: dict[str, list[Any]]) -> dict[s...
FILE: src/llamafactory/data/processor/unsupervised.py
class UnsupervisedDatasetProcessor (line 30) | class UnsupervisedDatasetProcessor(DatasetProcessor):
method _encode_data_example (line 31) | def _encode_data_example(
method preprocess_dataset (line 59) | def preprocess_dataset(self, examples: dict[str, list[Any]]) -> dict[s...
method print_data_example (line 87) | def print_data_example(self, example: dict[str, list[int]]) -> None:
FILE: src/llamafactory/data/template.py
class Template (line 41) | class Template:
method encode_oneturn (line 59) | def encode_oneturn(
method encode_multiturn (line 75) | def encode_multiturn(
method extract_tool (line 86) | def extract_tool(self, content: str) -> Union[str, list["FunctionCall"]]:
method get_stop_token_ids (line 90) | def get_stop_token_ids(self, tokenizer: "PreTrainedTokenizer") -> list...
method add_thought (line 98) | def add_thought(self, content: str = "") -> str:
method remove_thought (line 102) | def remove_thought(self, content: str) -> str:
method get_thought_word_ids (line 107) | def get_thought_word_ids(self, tokenizer: "PreTrainedTokenizer") -> li...
method _convert_elements_to_ids (line 111) | def _convert_elements_to_ids(self, tokenizer: "PreTrainedTokenizer", e...
method _encode (line 130) | def _encode(
method _add_or_replace_eos_token (line 171) | def _add_or_replace_eos_token(tokenizer: "PreTrainedTokenizer", eos_to...
method fix_special_tokens (line 187) | def fix_special_tokens(self, tokenizer: "PreTrainedTokenizer") -> None:
method _jinja_escape (line 216) | def _jinja_escape(content: str) -> str:
method _convert_slots_to_jinja (line 221) | def _convert_slots_to_jinja(slots: "SLOTS", tokenizer: "PreTrainedToke...
method _get_jinja_template (line 243) | def _get_jinja_template(self, tokenizer: "PreTrainedTokenizer") -> str:
method fix_jinja_template (line 271) | def fix_jinja_template(self, tokenizer: "PreTrainedTokenizer") -> None:
method _convert_slots_to_ollama (line 280) | def _convert_slots_to_ollama(
method _get_ollama_template (line 304) | def _get_ollama_template(self, tokenizer: "PreTrainedTokenizer") -> str:
method get_ollama_modelfile (line 316) | def get_ollama_modelfile(self, tokenizer: "PreTrainedTokenizer") -> str:
class Llama2Template (line 335) | class Llama2Template(Template):
method _encode (line 339) | def _encode(
method _get_jinja_template (line 373) | def _get_jinja_template(self, tokenizer: "PreTrainedTokenizer") -> str:
class ReasoningTemplate (line 405) | class ReasoningTemplate(Template):
method encode_oneturn (line 409) | def encode_oneturn(
method encode_multiturn (line 436) | def encode_multiturn(
class Glm47ReasoningTemplate (line 463) | class Glm47ReasoningTemplate(ReasoningTemplate):
method add_thought (line 467) | def add_thought(self, content: str = "") -> str:
function register_template (line 477) | def register_template(
function parse_template (line 550) | def parse_template(tokenizer: "PreTrainedTokenizer") -> "Template":
function get_template_and_fix_tokenizer (line 612) | def get_template_and_fix_tokenizer(tokenizer: "PreTrainedTokenizer", dat...
FILE: src/llamafactory/data/tool_utils.py
class FunctionCall (line 26) | class FunctionCall(NamedTuple):
class ToolUtils (line 124) | class ToolUtils(ABC):
method tool_formatter (line 129) | def tool_formatter(tools: list[dict[str, Any]]) -> str:
method function_formatter (line 135) | def function_formatter(functions: list["FunctionCall"]) -> str:
method tool_extractor (line 141) | def tool_extractor(content: str) -> Union[str, list["FunctionCall"]]:
class DefaultToolUtils (line 149) | class DefaultToolUtils(ToolUtils):
method tool_formatter (line 154) | def tool_formatter(tools: list[dict[str, Any]]) -> str:
method function_formatter (line 189) | def function_formatter(functions: list["FunctionCall"]) -> str:
method tool_extractor (line 194) | def tool_extractor(content: str) -> Union[str, list["FunctionCall"]]:
class GLM4ToolUtils (line 213) | class GLM4ToolUtils(ToolUtils):
method tool_formatter (line 218) | def tool_formatter(tools: list[dict[str, Any]]) -> str:
method function_formatter (line 230) | def function_formatter(functions: list["FunctionCall"]) -> str:
method tool_extractor (line 238) | def tool_extractor(content: str) -> Union[str, list["FunctionCall"]]:
class Llama3ToolUtils (line 251) | class Llama3ToolUtils(ToolUtils):
method tool_formatter (line 259) | def tool_formatter(tools: list[dict[str, Any]]) -> str:
method function_formatter (line 270) | def function_formatter(functions: list["FunctionCall"]) -> str:
method tool_extractor (line 276) | def tool_extractor(content: str) -> Union[str, list["FunctionCall"]]:
class MiniMaxM1ToolUtils (line 289) | class MiniMaxM1ToolUtils(ToolUtils):
method tool_formatter (line 294) | def tool_formatter(tools: list[dict[str, Any]]) -> str:
method function_formatter (line 304) | def function_formatter(functions: list["FunctionCall"]) -> str:
method tool_extractor (line 314) | def tool_extractor(content: str) -> Union[str, list["FunctionCall"]]:
class MiniMaxM2ToolUtils (line 336) | class MiniMaxM2ToolUtils(ToolUtils):
method tool_formatter (line 341) | def tool_formatter(tools: list[dict[str, Any]]) -> str:
method function_formatter (line 351) | def function_formatter(functions: list["FunctionCall"]) -> str:
method tool_extractor (line 366) | def tool_extractor(content: str) -> Union[str, list["FunctionCall"]]:
class MistralToolUtils (line 392) | class MistralToolUtils(ToolUtils):
method tool_formatter (line 397) | def tool_formatter(tools: list[dict[str, Any]]) -> str:
method function_formatter (line 406) | def function_formatter(functions: list["FunctionCall"]) -> str:
method tool_extractor (line 413) | def tool_extractor(content: str) -> Union[str, list["FunctionCall"]]:
class QwenToolUtils (line 426) | class QwenToolUtils(ToolUtils):
method tool_formatter (line 431) | def tool_formatter(tools: list[dict[str, Any]]) -> str:
method function_formatter (line 441) | def function_formatter(functions: list["FunctionCall"]) -> str:
method tool_extractor (line 450) | def tool_extractor(content: str) -> Union[str, list["FunctionCall"]]:
class Qwen35ToolUtils (line 471) | class Qwen35ToolUtils(ToolUtils):
method tool_formatter (line 476) | def tool_formatter(tools: list[dict[str, Any]]) -> str:
method function_formatter (line 486) | def function_formatter(functions: list["FunctionCall"]) -> str:
method tool_extractor (line 503) | def tool_extractor(content: str) -> Union[str, list["FunctionCall"]]:
class GLM4MOEToolUtils (line 522) | class GLM4MOEToolUtils(QwenToolUtils):
method tool_formatter (line 527) | def tool_formatter(tools: list[dict[str, Any]]) -> str:
method function_formatter (line 537) | def function_formatter(functions: list["FunctionCall"]) -> str:
class SeedToolUtils (line 554) | class SeedToolUtils(ToolUtils):
method tool_formatter (line 559) | def tool_formatter(tools: list[dict[str, Any]]) -> str:
method function_formatter (line 564) | def function_formatter(functions: list["FunctionCall"]) -> str:
method tool_extractor (line 583) | def tool_extractor(content: str) -> Union[str, list["FunctionCall"]]:
class LingToolUtils (line 604) | class LingToolUtils(QwenToolUtils):
method tool_formatter (line 609) | def tool_formatter(tools: list[dict[str, Any]]) -> str:
class LFM2ToolUtils (line 618) | class LFM2ToolUtils(ToolUtils):
method tool_formatter (line 623) | def tool_formatter(tools: list[dict[str, Any]]) -> str:
method function_formatter (line 633) | def function_formatter(functions: list["FunctionCall"]) -> str:
method _ast_to_value (line 649) | def _ast_to_value(node: ast.AST) -> Any:
method tool_extractor (line 667) | def tool_extractor(content: str) -> Union[str, list["FunctionCall"]]:
function get_tool_utils (line 738) | def get_tool_utils(name: str) -> "ToolUtils":
FILE: src/llamafactory/eval/evaluator.py
class Evaluator (line 61) | class Evaluator:
method __init__ (line 62) | def __init__(self, args: Optional[dict[str, Any]] = None) -> None:
method batch_inference (line 72) | def batch_inference(self, batch_input: dict[str, "torch.Tensor"]) -> l...
method eval (line 79) | def eval(self) -> None:
method _save_results (line 139) | def _save_results(self, category_corrects: dict[str, "NDArray"], resul...
function run_eval (line 157) | def run_eval() -> None:
FILE: src/llamafactory/eval/template.py
class EvalTemplate (line 22) | class EvalTemplate:
method _parse_example (line 27) | def _parse_example(self, example: dict[str, str]) -> tuple[str, str]:
method format_example (line 36) | def format_example(
function _register_eval_template (line 56) | def _register_eval_template(name: str, system: str, choice: str, answer:...
function get_eval_template (line 60) | def get_eval_template(name: str) -> "EvalTemplate":
FILE: src/llamafactory/extras/constants.py
class AttentionFunction (line 116) | class AttentionFunction(StrEnum):
class EngineName (line 124) | class EngineName(StrEnum):
class DownloadSource (line 131) | class DownloadSource(StrEnum):
class QuantizationMethod (line 138) | class QuantizationMethod(StrEnum):
class RopeScaling (line 152) | class RopeScaling(StrEnum):
function register_model_group (line 159) | def register_model_group(
FILE: src/llamafactory/extras/env.py
function print_env (line 25) | def print_env() -> None:
FILE: src/llamafactory/extras/logging.py
class LoggerHandler (line 34) | class LoggerHandler(logging.Handler):
method __init__ (line 37) | def __init__(self, output_dir: str) -> None:
method _write_log (line 52) | def _write_log(self, log_entry: str) -> None:
method emit (line 56) | def emit(self, record) -> None:
method close (line 63) | def close(self) -> None:
class _Logger (line 68) | class _Logger(logging.Logger):
method info_rank0 (line 71) | def info_rank0(self, *args, **kwargs) -> None:
method warning_rank0 (line 74) | def warning_rank0(self, *args, **kwargs) -> None:
method warning_rank0_once (line 77) | def warning_rank0_once(self, *args, **kwargs) -> None:
function _get_default_logging_level (line 81) | def _get_default_logging_level() -> "logging._Level":
function _get_library_name (line 93) | def _get_library_name() -> str:
function _get_library_root_logger (line 97) | def _get_library_root_logger() -> "_Logger":
function _configure_library_root_logger (line 101) | def _configure_library_root_logger() -> None:
function get_logger (line 121) | def get_logger(name: str | None = None) -> "_Logger":
function add_handler (line 130) | def add_handler(handler: "logging.Handler") -> None:
function remove_handler (line 136) | def remove_handler(handler: logging.Handler) -> None:
function info_rank0 (line 142) | def info_rank0(self: "logging.Logger", *args, **kwargs) -> None:
function warning_rank0 (line 147) | def warning_rank0(self: "logging.Logger", *args, **kwargs) -> None:
function warning_rank0_once (line 153) | def warning_rank0_once(self: "logging.Logger", *args, **kwargs) -> None:
FILE: src/llamafactory/extras/misc.py
class AverageMeter (line 57) | class AverageMeter:
method __init__ (line 60) | def __init__(self):
method reset (line 63) | def reset(self):
method update (line 69) | def update(self, val, n=1):
function check_version (line 76) | def check_version(requirement: str, mandatory: bool = False) -> None:
function check_dependencies (line 95) | def check_dependencies() -> None:
function calculate_tps (line 104) | def calculate_tps(dataset: list[dict[str, Any]], metrics: dict[str, floa...
function count_parameters (line 117) | def count_parameters(model: "torch.nn.Module") -> tuple[int, int]:
function get_current_device (line 144) | def get_current_device() -> "torch.device":
function get_device_name (line 160) | def get_device_name() -> str:
function get_torch_device (line 176) | def get_torch_device():
function get_device_count (line 187) | def get_device_count() -> int:
function get_logits_processor (line 201) | def get_logits_processor() -> "LogitsProcessorList":
function get_current_memory (line 208) | def get_current_memory() -> tuple[int, int]:
function get_peak_memory (line 222) | def get_peak_memory() -> tuple[int, int]:
function has_tokenized_data (line 236) | def has_tokenized_data(path: "os.PathLike") -> bool:
function infer_optim_dtype (line 241) | def infer_optim_dtype(model_dtype: Optional["torch.dtype"]) -> "torch.dt...
function is_accelerator_available (line 251) | def is_accelerator_available() -> bool:
function is_env_enabled (line 258) | def is_env_enabled(env_var: str, default: str = "0") -> bool:
function numpify (line 263) | def numpify(inputs: Union["NDArray", "torch.Tensor"]) -> "NDArray":
function skip_check_imports (line 275) | def skip_check_imports() -> None:
function torch_gc (line 281) | def torch_gc() -> None:
function try_download_model_from_other_hub (line 294) | def try_download_model_from_other_hub(model_args: "ModelArguments") -> str:
function use_modelscope (line 331) | def use_modelscope() -> bool:
function use_openmind (line 335) | def use_openmind() -> bool:
function use_ray (line 339) | def use_ray() -> bool:
function use_kt (line 343) | def use_kt() -> bool:
function find_available_port (line 347) | def find_available_port() -> int:
function fix_proxy (line 356) | def fix_proxy(ipv6_enabled: bool = False) -> None:
FILE: src/llamafactory/extras/packages.py
function _is_package_available (line 30) | def _is_package_available(name: str) -> bool:
function _get_package_version (line 34) | def _get_package_version(name: str) -> "Version":
function is_pyav_available (line 41) | def is_pyav_available():
function is_librosa_available (line 45) | def is_librosa_available():
function is_fastapi_available (line 49) | def is_fastapi_available():
function is_galore_available (line 53) | def is_galore_available():
function is_apollo_available (line 57) | def is_apollo_available():
function is_jieba_available (line 61) | def is_jieba_available():
function is_gradio_available (line 65) | def is_gradio_available():
function is_matplotlib_available (line 69) | def is_matplotlib_available():
function is_mcore_adapter_available (line 73) | def is_mcore_adapter_available():
function is_pillow_available (line 77) | def is_pillow_available():
function is_ray_available (line 81) | def is_ray_available():
function is_kt_available (line 85) | def is_kt_available():
function is_requests_available (line 89) | def is_requests_available():
function is_rouge_available (line 93) | def is_rouge_available():
function is_safetensors_available (line 97) | def is_safetensors_available():
function is_sglang_available (line 101) | def is_sglang_available():
function is_starlette_available (line 105) | def is_starlette_available():
function is_transformers_version_greater_than (line 110) | def is_transformers_version_greater_than(content: str):
function is_torch_version_greater_than (line 115) | def is_torch_version_greater_than(content: str):
function is_uvicorn_available (line 119) | def is_uvicorn_available():
function is_vllm_available (line 123) | def is_vllm_available():
FILE: src/llamafactory/extras/ploting.py
function smooth (line 34) | def smooth(scalars: list[float]) -> list[float]:
function gen_loss_plot (line 49) | def gen_loss_plot(trainer_log: list[dict[str, Any]]) -> "matplotlib.figu...
function plot_loss (line 69) | def plot_loss(save_dictionary: str, keys: list[str] = ["loss"]) -> None:
FILE: src/llamafactory/hparams/data_args.py
class DataArguments (line 23) | class DataArguments:
method __post_init__ (line 143) | def __post_init__(self):
method to_dict (line 187) | def to_dict(self) -> dict[str, Any]:
FILE: src/llamafactory/hparams/evaluation_args.py
class EvaluationArguments (line 23) | class EvaluationArguments:
method __post_init__ (line 58) | def __post_init__(self):
FILE: src/llamafactory/hparams/finetuning_args.py
class FreezeArguments (line 20) | class FreezeArguments:
class LoraArguments (line 56) | class LoraArguments:
class OFTArguments (line 126) | class OFTArguments:
class RLHFArguments (line 168) | class RLHFArguments:
class GaloreArguments (line 263) | class GaloreArguments:
class ApolloArguments (line 302) | class ApolloArguments:
class BAdamArgument (line 353) | class BAdamArgument:
class SwanLabArguments (line 404) | class SwanLabArguments:
class FinetuningArguments (line 444) | class FinetuningArguments(
method __post_init__ (line 542) | def __post_init__(self):
method to_dict (line 599) | def to_dict(self) -> dict[str, Any]:
FILE: src/llamafactory/hparams/generating_args.py
class GeneratingArguments (line 22) | class GeneratingArguments:
method to_dict (line 70) | def to_dict(self, obey_generation_config: bool = False) -> dict[str, A...
FILE: src/llamafactory/hparams/model_args.py
class BaseModelArguments (line 34) | class BaseModelArguments:
method __post_init__ (line 205) | def __post_init__(self):
class QuantizationArguments (line 275) | class QuantizationArguments:
class ProcessorArguments (line 301) | class ProcessorArguments:
method __post_init__ (line 345) | def __post_init__(self):
class ExportArguments (line 354) | class ExportArguments:
method __post_init__ (line 394) | def __post_init__(self):
class VllmArguments (line 400) | class VllmArguments:
method __post_init__ (line 424) | def __post_init__(self):
class SGLangArguments (line 430) | class SGLangArguments:
method __post_init__ (line 456) | def __post_init__(self):
class KTransformersArguments (line 462) | class KTransformersArguments:
class ModelArguments (line 507) | class ModelArguments(
method __post_init__ (line 542) | def __post_init__(self):
method copyfrom (line 550) | def copyfrom(cls, source: "Self", **kwargs) -> "Self":
method to_dict (line 565) | def to_dict(self) -> dict[str, Any]:
FILE: src/llamafactory/hparams/parser.py
function read_args (line 69) | def read_args(args: dict[str, Any] | list[str] | None = None) -> dict[st...
function _parse_args (line 86) | def _parse_args(
function _verify_trackio_args (line 103) | def _verify_trackio_args(training_args: "TrainingArguments") -> None:
function _set_transformers_logging (line 149) | def _set_transformers_logging() -> None:
function _set_env_vars (line 156) | def _set_env_vars() -> None:
function _verify_model_args (line 164) | def _verify_model_args(
function _check_extra_dependencies (line 189) | def _check_extra_dependencies(
function _parse_train_args (line 241) | def _parse_train_args(args: dict[str, Any] | list[str] | None = None) ->...
function _parse_train_mca_args (line 247) | def _parse_train_mca_args(args: dict[str, Any] | list[str] | None = None...
function _configure_mca_training_args (line 259) | def _configure_mca_training_args(training_args, data_args, finetuning_ar...
function _parse_infer_args (line 268) | def _parse_infer_args(args: dict[str, Any] | list[str] | None = None) ->...
function _parse_eval_args (line 274) | def _parse_eval_args(args: dict[str, Any] | list[str] | None = None) -> ...
function get_ray_args (line 280) | def get_ray_args(args: dict[str, Any] | list[str] | None = None) -> RayA...
function get_train_args (line 286) | def get_train_args(args: dict[str, Any] | list[str] | None = None) -> _T...
function get_infer_args (line 519) | def get_infer_args(args: dict[str, Any] | list[str] | None = None) -> _I...
function get_eval_args (line 554) | def get_eval_args(args: dict[str, Any] | list[str] | None = None) -> _EV...
FILE: src/llamafactory/hparams/training_args.py
class RayArguments (line 39) | class RayArguments:
method __post_init__ (line 59) | def __post_init__(self):
class Fp8Arguments (line 67) | class Fp8Arguments:
class TrainingArguments (line 90) | class TrainingArguments(Fp8Arguments, RayArguments, BaseTrainingArguments):
method __post_init__ (line 98) | def __post_init__(self):
FILE: src/llamafactory/launcher.py
function launch (line 38) | def launch():
FILE: src/llamafactory/model/adapter.py
function _setup_full_tuning (line 40) | def _setup_full_tuning(
function _setup_freeze_tuning (line 59) | def _setup_freeze_tuning(
function _setup_lora_tuning (line 143) | def _setup_lora_tuning(
function init_adapter (line 321) | def init_adapter(
FILE: src/llamafactory/model/loader.py
class TokenizerModule (line 52) | class TokenizerModule(TypedDict):
function _get_init_kwargs (line 57) | def _get_init_kwargs(model_args: "ModelArguments") -> dict[str, Any]:
function load_tokenizer (line 72) | def load_tokenizer(model_args: "ModelArguments") -> "TokenizerModule":
function load_config (line 126) | def load_config(model_args: "ModelArguments") -> "PretrainedConfig":
function load_model (line 132) | def load_model(
FILE: src/llamafactory/model/model_utils/attention.py
function configure_attn_implementation (line 31) | def configure_attn_implementation(config: "PretrainedConfig", model_args...
function print_attn_implementation (line 104) | def print_attn_implementation(config: "PretrainedConfig") -> None:
FILE: src/llamafactory/model/model_utils/checkpointing.py
function get_unsloth_gradient_checkpointing_func (line 43) | def get_unsloth_gradient_checkpointing_func() -> Callable:
function get_custom_gradient_checkpointing_func (line 80) | def get_custom_gradient_checkpointing_func(gradient_checkpointing_func: ...
function _gradient_checkpointing_enable (line 106) | def _gradient_checkpointing_enable(
function _fp32_forward_post_hook (line 137) | def _fp32_forward_post_hook(
function prepare_model_for_training (line 143) | def prepare_model_for_training(model: "PreTrainedModel", model_args: "Mo...
FILE: src/llamafactory/model/model_utils/embedding.py
function _noisy_mean_initialization (line 32) | def _noisy_mean_initialization(embed_weight: "torch.Tensor", num_new_tok...
function _description_based_initialization (line 48) | def _description_based_initialization(
function _initialize_embeddings (line 114) | def _initialize_embeddings(
function resize_embedding_layer (line 153) | def resize_embedding_layer(
FILE: src/llamafactory/model/model_utils/ktransformers.py
function _get_kt_kwargs (line 48) | def _get_kt_kwargs(
function load_kt_pretrained_model (line 69) | def load_kt_pretrained_model(config: "PretrainedConfig", model_args: "Mo...
function get_kt_peft_model (line 117) | def get_kt_peft_model(model: "PreTrainedModel", peft_kwargs: dict[str, A...
function load_kt_peft_model (line 124) | def load_kt_peft_model(model_args: "ModelArguments", model: "PreTrainedM...
FILE: src/llamafactory/model/model_utils/kv_cache.py
function configure_kv_cache (line 29) | def configure_kv_cache(config: "PretrainedConfig", model_args: "ModelArg...
FILE: src/llamafactory/model/model_utils/liger_kernel.py
function apply_liger_kernel (line 30) | def apply_liger_kernel(
FILE: src/llamafactory/model/model_utils/longlora.py
function llama_attention_forward (line 56) | def llama_attention_forward(
function llama_flash_attention_2_forward (line 141) | def llama_flash_attention_2_forward(
function llama_sdpa_attention_forward (line 249) | def llama_sdpa_attention_forward(
function _apply_llama_patch (line 352) | def _apply_llama_patch() -> None:
function configure_longlora (line 359) | def configure_longlora(config: "PretrainedConfig", model_args: "ModelArg...
FILE: src/llamafactory/model/model_utils/misc.py
function find_all_linear_modules (line 28) | def find_all_linear_modules(model: "PreTrainedModel", freeze_vision_towe...
function find_expanded_modules (line 55) | def find_expanded_modules(model: "PreTrainedModel", target_modules: list...
function register_autoclass (line 80) | def register_autoclass(config: "PretrainedConfig", model: "PreTrainedMod...
FILE: src/llamafactory/model/model_utils/mod.py
function load_mod_pretrained_model (line 26) | def load_mod_pretrained_model(**init_kwargs) -> "PreTrainedModel":
function convert_pretrained_model_to_mod (line 32) | def convert_pretrained_model_to_mod(
FILE: src/llamafactory/model/model_utils/moe.py
function _set_z3_leaf_modules (line 36) | def _set_z3_leaf_modules(model: "PreTrainedModel", leaf_modules: list[Un...
function add_z3_leaf_module (line 43) | def add_z3_leaf_module(model: "PreTrainedModel") -> None:
function configure_moe (line 151) | def configure_moe(config: "PretrainedConfig", model_args: "ModelArgument...
class Qwen3OmniMoeThinkerTextSparseMoeBlock (line 202) | class Qwen3OmniMoeThinkerTextSparseMoeBlock(nn.Module):
method __init__ (line 203) | def __init__(self, config):
method forward (line 220) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
FILE: src/llamafactory/model/model_utils/packing.py
function get_seqlens_in_batch (line 55) | def get_seqlens_in_batch(attention_mask: "torch.Tensor") -> "torch.Tensor":
function get_unpad_data (line 81) | def get_unpad_data(attention_mask: "torch.Tensor") -> tuple["torch.Tenso...
function configure_packing (line 110) | def configure_packing(model_args: "ModelArguments", is_trainable: bool) ...
FILE: src/llamafactory/model/model_utils/quantization.py
function _get_quantization_dataset (line 43) | def _get_quantization_dataset(tokenizer: "PreTrainedTokenizer", model_ar...
function configure_quantization (line 82) | def configure_quantization(
FILE: src/llamafactory/model/model_utils/rope.py
function configure_rope (line 35) | def configure_rope(config: "PretrainedConfig", model_args: "ModelArgumen...
FILE: src/llamafactory/model/model_utils/unsloth.py
function _get_unsloth_kwargs (line 30) | def _get_unsloth_kwargs(
function load_unsloth_pretrained_model (line 51) | def load_unsloth_pretrained_model(
function get_unsloth_peft_model (line 68) | def get_unsloth_peft_model(
function load_unsloth_peft_model (line 82) | def load_unsloth_peft_model(
FILE: src/llamafactory/model/model_utils/valuehead.py
function load_valuehead_params (line 33) | def load_valuehead_params(path_or_repo_id: str, model_args: "ModelArgume...
function prepare_valuehead_model (line 61) | def prepare_valuehead_model(model: "PreTrainedModel") -> None:
FILE: src/llamafactory/model/model_utils/visual.py
class CompositeModel (line 41) | class CompositeModel:
method get_projector (line 48) | def get_projector(self, module: "torch.nn.Module") -> "torch.nn.Module":
function _register_composite_model (line 58) | def _register_composite_model(
class LlavaMultiModalProjectorForYiVL (line 84) | class LlavaMultiModalProjectorForYiVL(torch.nn.Module):
method __init__ (line 85) | def __init__(self, config: "LlavaConfig") -> None:
method forward (line 98) | def forward(self, image_features: "torch.Tensor") -> "torch.Tensor":
class LlavaMultiModalProjectorForYiVLForVLLM (line 118) | class LlavaMultiModalProjectorForYiVLForVLLM(LlavaMultiModalProjectorFor...
method __init__ (line 119) | def __init__(self, vision_hidden_size: int, text_hidden_size: int, pro...
function autocast_projector_dtype (line 129) | def autocast_projector_dtype(model: "PreTrainedModel", model_args: "Mode...
function configure_visual_model (line 148) | def configure_visual_model(config: "PretrainedConfig") -> None:
function get_forbidden_modules (line 159) | def get_forbidden_modules(config: "PretrainedConfig", finetuning_args: "...
function patch_target_modules (line 182) | def patch_target_modules(
FILE: src/llamafactory/model/patcher.py
function patch_qwen3_omni_moe_thinker_text_sparse_moe_block (line 53) | def patch_qwen3_omni_moe_thinker_text_sparse_moe_block():
function patch_youtu_vl_model (line 64) | def patch_youtu_vl_model(model: "PreTrainedModel") -> None:
function patch_tokenizer (line 84) | def patch_tokenizer(tokenizer: "PreTrainedTokenizer", model_args: "Model...
function patch_processor (line 108) | def patch_processor(
function patch_config (line 126) | def patch_config(
function patch_model (line 195) | def patch_model(
function patch_valuehead_model (line 246) | def patch_valuehead_model(model: "AutoModelForCausalLMWithValueHead") ->...
FILE: src/llamafactory/third_party/muon/muon.py
function zeropower_via_newtonschulz5 (line 48) | def zeropower_via_newtonschulz5(G: "torch.Tensor", steps: int) -> "torch...
class Muon (line 76) | class Muon(torch.optim.Optimizer):
method __init__ (line 102) | def __init__(
method adjust_lr_for_muon (line 137) | def adjust_lr_for_muon(self, lr: float, param_shape: list[int]) -> float:
method step (line 145) | def step(self, closure=None):
FILE: src/llamafactory/train/callbacks.py
function fix_valuehead_checkpoint (line 53) | def fix_valuehead_checkpoint(
class FixValueHeadModelCallback (line 98) | class FixValueHeadModelCallback(TrainerCallback):
method on_save (line 102) | def on_save(self, args: "TrainingArguments", state: "TrainerState", co...
class SaveProcessorCallback (line 112) | class SaveProcessorCallback(TrainerCallback):
method __init__ (line 115) | def __init__(self, processor: "ProcessorMixin") -> None:
method on_save (line 119) | def on_save(self, args: "TrainingArguments", state: "TrainerState", co...
method on_train_end (line 125) | def on_train_end(self, args: "TrainingArguments", state: "TrainerState...
class PissaConvertCallback (line 130) | class PissaConvertCallback(TrainerCallback):
method on_train_begin (line 134) | def on_train_begin(self, args: "TrainingArguments", state: "TrainerSta...
method on_train_end (line 146) | def on_train_end(self, args: "TrainingArguments", state: "TrainerState...
class LogCallback (line 172) | class LogCallback(TrainerCallback):
method __init__ (line 175) | def __init__(self) -> None:
method _set_abort (line 194) | def _set_abort(self, signum, frame) -> None:
method _reset (line 197) | def _reset(self, max_steps: int = 0) -> None:
method _timing (line 204) | def _timing(self, cur_steps: int) -> None:
method _write_log (line 213) | def _write_log(self, output_dir: str, logs: dict[str, Any]) -> None:
method _create_thread_pool (line 217) | def _create_thread_pool(self, output_dir: str) -> None:
method _close_thread_pool (line 221) | def _close_thread_pool(self) -> None:
method on_init_end (line 227) | def on_init_end(self, args: "TrainingArguments", state: "TrainerState"...
method on_train_begin (line 237) | def on_train_begin(self, args: "TrainingArguments", state: "TrainerSta...
method on_train_end (line 244) | def on_train_end(self, args: "TrainingArguments", state: "TrainerState...
method on_substep_end (line 248) | def on_substep_end(self, args: "TrainingArguments", state: "TrainerSta...
method on_step_end (line 254) | def on_step_end(self, args: "TrainingArguments", state: "TrainerState"...
method on_evaluate (line 260) | def on_evaluate(self, args: "TrainingArguments", state: "TrainerState"...
method on_predict (line 265) | def on_predict(self, args: "TrainingArguments", state: "TrainerState",...
method on_log (line 270) | def on_log(self, args: "TrainingArguments", state: "TrainerState", con...
method on_prediction_step (line 311) | def on_prediction_step(
class ReporterCallback (line 341) | class ReporterCallback(TrainerCallback):
method __init__ (line 344) | def __init__(
method on_train_begin (line 358) | def on_train_begin(self, args: "TrainingArguments", state: "TrainerSta...
FILE: src/llamafactory/train/dpo/ktrainer.py
class KDPOTrainer (line 32) | class KDPOTrainer(KTrainer, CustomDPOTrainer):
method concatenated_forward (line 34) | def concatenated_forward(
FILE: src/llamafactory/train/dpo/trainer.py
class CustomDPOTrainer (line 44) | class CustomDPOTrainer(DPOTrainer):
method __init__ (line 45) | def __init__(
method create_optimizer (line 126) | def create_optimizer(self) -> "torch.optim.Optimizer":
method create_scheduler (line 132) | def create_scheduler(
method _get_train_sampler (line 139) | def _get_train_sampler(self, *args, **kwargs) -> Optional["torch.utils...
method get_batch_samples (line 146) | def get_batch_samples(self, *args, **kwargs):
method odds_ratio_loss (line 150) | def odds_ratio_loss(self, chosen_logps: "torch.Tensor", rejected_logps...
method simpo_loss (line 160) | def simpo_loss(self, chosen_logps: "torch.Tensor", rejected_logps: "to...
method bco_loss (line 168) | def bco_loss(
method compute_preference_loss (line 187) | def compute_preference_loss(
method concatenated_forward (line 219) | def concatenated_forward(
method compute_reference_log_probs (line 255) | def compute_reference_log_probs(
method get_batch_loss_metrics (line 277) | def get_batch_loss_metrics(
method compute_loss (line 320) | def compute_loss(
method log (line 327) | def log(self, logs: dict[str, float], *args, **kwargs) -> None:
FILE: src/llamafactory/train/dpo/workflow.py
function run_dpo (line 35) | def run_dpo(
FILE: src/llamafactory/train/fp8_utils.py
function create_fp8_kwargs (line 29) | def create_fp8_kwargs(training_args: "TrainingArguments") -> list[Any]:
function get_fp8_mixed_precision (line 108) | def get_fp8_mixed_precision(training_args: "TrainingArguments") -> Optio...
function configure_fp8_environment (line 120) | def configure_fp8_environment(training_args: "TrainingArguments") -> None:
function verify_fp8_status (line 155) | def verify_fp8_status(accelerator, training_args: "TrainingArguments") -...
function patch_accelerator_for_fp8 (line 185) | def patch_accelerator_for_fp8() -> None:
FILE: src/llamafactory/train/kto/trainer.py
class CustomKTOTrainer (line 43) | class CustomKTOTrainer(KTOTrainer):
method __init__ (line 44) | def __init__(
method create_optimizer (line 123) | def create_optimizer(self) -> "torch.optim.Optimizer":
method create_scheduler (line 129) | def create_scheduler(
method _get_train_sampler (line 136) | def _get_train_sampler(self, *args, **kwargs) -> Optional["torch.utils...
method get_batch_samples (line 144) | def get_batch_samples(self, *args, **kwargs):
method forward (line 149) | def forward(
method concatenated_forward (line 184) | def concatenated_forward(
method compute_reference_log_probs (line 202) | def compute_reference_log_probs(
method get_batch_loss_metrics (line 221) | def get_batch_loss_metrics(
method compute_loss (line 271) | def compute_loss(
method log (line 278) | def log(self, logs: dict[str, float], *args, **kwargs) -> None:
FILE: src/llamafactory/train/kto/workflow.py
function run_kto (line 35) | def run_kto(
FILE: src/llamafactory/train/mca/workflow.py
function _data_collator_wrapper (line 60) | def _data_collator_wrapper(data_collator: Any):
function _check_model_support (line 80) | def _check_model_support(model_args: "ModelArguments"):
function _freeze_model_parameters (line 98) | def _freeze_model_parameters(model: Any, finetuning_args: "FinetuningArg...
function run_pt (line 121) | def run_pt(
function run_sft (line 173) | def run_sft(
function run_dpo (line 237) | def run_dpo(
FILE: src/llamafactory/train/ppo/ppo_utils.py
function get_rewards_from_server (line 34) | def get_rewards_from_server(server_url: str, messages: list[str]) -> lis...
function replace_model (line 43) | def replace_model(model: "AutoModelForCausalLMWithValueHead", target: Li...
function dump_layernorm (line 65) | def dump_layernorm(model: "PreTrainedModel") -> dict[str, "torch.Tensor"]:
function restore_layernorm (line 76) | def restore_layernorm(model: "PreTrainedModel", layernorm_params: Option...
FILE: src/llamafactory/train/ppo/trainer.py
class CustomPPOTrainer (line 64) | class CustomPPOTrainer(PPOTrainer, Trainer):
method __init__ (line 67) | def __init__(
method ppo_train (line 200) | def ppo_train(self, resume_from_checkpoint: Optional[str] = None) -> N...
method create_optimizer (line 311) | def create_optimizer(
method create_scheduler (line 338) | def create_scheduler(
method get_inputs (line 351) | def get_inputs(self, batch: dict[str, "torch.Tensor"]) -> tuple[list["...
method get_rewards (line 389) | def get_rewards(
method batched_forward_pass (line 422) | def batched_forward_pass(
method save_model (line 492) | def save_model(self, output_dir: Optional[str] = None) -> None:
FILE: src/llamafactory/train/ppo/workflow.py
function run_ppo (line 34) | def run_ppo(
FILE: src/llamafactory/train/pt/trainer.py
class CustomTrainer (line 33) | class CustomTrainer(Trainer):
method __init__ (line 36) | def __init__(
method create_optimizer (line 72) | def create_optimizer(self) -> "torch.optim.Optimizer":
method create_scheduler (line 78) | def create_scheduler(
method _get_train_sampler (line 85) | def _get_train_sampler(self, *args, **kwargs) -> Optional["torch.utils...
method compute_loss (line 92) | def compute_loss(self, model, inputs, *args, **kwargs):
FILE: src/llamafactory/train/pt/workflow.py
function run_pt (line 36) | def run_pt(
FILE: src/llamafactory/train/rm/metric.py
class ComputeAccuracy (line 28) | class ComputeAccuracy:
method _dump (line 31) | def _dump(self) -> dict[str, float] | None:
method __post_init__ (line 39) | def __post_init__(self):
method __call__ (line 42) | def __call__(self, eval_preds: "EvalPrediction", compute_result: bool ...
FILE: src/llamafactory/train/rm/trainer.py
class PairwiseTrainer (line 43) | class PairwiseTrainer(Trainer):
method __init__ (line 46) | def __init__(
method create_optimizer (line 68) | def create_optimizer(self) -> "torch.optim.Optimizer":
method create_scheduler (line 74) | def create_scheduler(
method _get_train_sampler (line 81) | def _get_train_sampler(self, *args, **kwargs) -> Optional["torch.utils...
method compute_loss (line 88) | def compute_loss(
method _save (line 113) | def _save(self, output_dir: Optional[str] = None, state_dict=None):
method save_predictions (line 133) | def save_predictions(self, predict_results: "PredictionOutput") -> None:
FILE: src/llamafactory/train/rm/workflow.py
function run_rm (line 35) | def run_rm(
FILE: src/llamafactory/train/sft/metric.py
function eval_logit_processor (line 47) | def eval_logit_processor(logits: "torch.Tensor", labels: "torch.Tensor")...
class ComputeAccuracy (line 62) | class ComputeAccuracy:
method _dump (line 65) | def _dump(self) -> Optional[dict[str, float]]:
method __post_init__ (line 73) | def __post_init__(self):
method __call__ (line 76) | def __call__(self, eval_preds: "EvalPrediction", compute_result: bool ...
class ComputeSimilarity (line 88) | class ComputeSimilarity:
method _dump (line 96) | def _dump(self) -> Optional[dict[str, float]]:
method __post_init__ (line 104) | def __post_init__(self):
method __call__ (line 107) | def __call__(self, eval_preds: "EvalPrediction", compute_result: bool ...
FILE: src/llamafactory/train/sft/trainer.py
class CustomSeq2SeqTrainer (line 47) | class CustomSeq2SeqTrainer(Seq2SeqTrainer):
method __init__ (line 50) | def __init__(
method create_optimizer (line 131) | def create_optimizer(self) -> "torch.optim.Optimizer":
method create_scheduler (line 137) | def create_scheduler(
method _get_train_sampler (line 144) | def _get_train_sampler(self, *args, **kwargs) -> Optional["torch.utils...
method compute_loss (line 151) | def compute_loss(self, model, inputs, *args, **kwargs):
method prediction_step (line 165) | def prediction_step(
method save_predictions (line 191) | def save_predictions(
FILE: src/llamafactory/train/sft/workflow.py
function run_sft (line 41) | def run_sft(
FILE: src/llamafactory/train/test_utils.py
function compare_model (line 34) | def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module"...
function check_lora_model (line 45) | def check_lora_model(model: "LoraModel") -> tuple[set[str], set[str]]:
function load_train_model (line 63) | def load_train_model(add_valuehead: bool = False, **kwargs) -> "PreTrain...
function load_infer_model (line 69) | def load_infer_model(add_valuehead: bool = False, **kwargs) -> "PreTrain...
function load_reference_model (line 75) | def load_reference_model(
function load_dataset_module (line 101) | def load_dataset_module(**kwargs) -> "DatasetModule":
function patch_valuehead_model (line 109) | def patch_valuehead_model() -> None:
FILE: src/llamafactory/train/trainer_utils.py
class DummyOptimizer (line 68) | class DummyOptimizer(torch.optim.Optimizer):
method __init__ (line 71) | def __init__(
method zero_grad (line 79) | def zero_grad(self, set_to_none: bool = True) -> None:
method step (line 83) | def step(self, closure: Optional[Callable[[], float]] = None) -> Optio...
function create_modelcard_and_push (line 87) | def create_modelcard_and_push(
function create_ref_model (line 116) | def create_ref_model(
function create_reward_model (line 151) | def create_reward_model(
function _get_decay_parameter_names (line 193) | def _get_decay_parameter_names(model: "PreTrainedModel") -> list[str]:
function _create_galore_optimizer (line 200) | def _create_galore_optimizer(
function _create_apollo_optimizer (line 288) | def _create_apollo_optimizer(
function _create_loraplus_optimizer (line 372) | def _create_loraplus_optimizer(
function _create_badam_optimizer (line 412) | def _create_badam_optimizer(
function _create_adam_mini_optimizer (line 473) | def _create_adam_mini_optimizer(
function _create_muon_optimizer (line 498) | def _create_muon_optimizer(
function create_custom_optimizer (line 527) | def create_custom_optimizer(
function create_custom_scheduler (line 551) | def create_custom_scheduler(
function get_batch_logps (line 592) | def get_batch_logps(
function dft_loss_func (line 639) | def dft_loss_func(
function _dft_cross_entropy (line 658) | def _dft_cross_entropy(
function asft_loss_func (line 686) | def asft_loss_func(
function _asft_cross_entropy (line 720) | def _asft_cross_entropy(
function _kl_divergence (line 743) | def _kl_divergence(
function eaft_loss_func (line 768) | def eaft_loss_func(
function _eaft_cross_entropy (line 790) | def _eaft_cross_entropy(
function nested_detach (line 829) | def nested_detach(
function get_swanlab_callback (line 848) | def get_swanlab_callback(finetuning_args: "FinetuningArguments") -> "Tra...
function get_placement_group (line 894) | def get_placement_group(num_workers: int) -> tuple["PlacementGroup", dic...
function get_ray_remote_config_for_worker (line 906) | def get_ray_remote_config_for_worker(
function get_ray_head_node_ip (line 943) | def get_ray_head_node_ip() -> str:
function sort_placement_group_by_node_ip (line 949) | def sort_placement_group_by_node_ip(placement_group: "PlacementGroup", m...
FILE: src/llamafactory/train/tuner.py
function _training_function (line 57) | def _training_function(config: dict[str, Any]) -> None:
function run_exp (line 115) | def run_exp(args: Optional[dict[str, Any]] = None, callbacks: Optional[l...
function export_model (line 128) | def export_model(args: Optional[dict[str, Any]] = None) -> None:
class Worker (line 227) | class Worker:
method __init__ (line 228) | def __init__(self):
method _setup_env_visible_devices (line 234) | def _setup_env_visible_devices(self) -> None:
method _training_function (line 247) | def _training_function(self, config: dict[str, Any]) -> None:
function _ray_training_function (line 251) | def _ray_training_function(ray_args: "RayArguments", config: dict[str, A...
FILE: src/llamafactory/v1/accelerator/helper.py
class DeviceType (line 42) | class DeviceType(StrEnum):
class ReduceOp (line 52) | class ReduceOp(StrEnum):
function requires_accelerator (line 59) | def requires_accelerator(fn):
function is_distributed (line 75) | def is_distributed() -> bool:
function get_rank (line 80) | def get_rank() -> int:
function get_world_size (line 85) | def get_world_size() -> int:
function get_local_rank (line 90) | def get_local_rank() -> int:
function get_local_world_size (line 95) | def get_local_world_size() -> int:
function get_current_accelerator (line 102) | def get_current_accelerator(check_available: bool = True) -> torch.device:
function get_device_count (line 110) | def get_device_count() -> int:
function synchronize (line 116) | def synchronize() -> None:
function set_device_index (line 122) | def set_device_index() -> None:
function get_current_device (line 129) | def get_current_device() -> torch.device:
function is_torch_cuda_available (line 137) | def is_torch_cuda_available():
function is_torch_mps_available (line 142) | def is_torch_mps_available():
function is_torch_npu_available (line 147) | def is_torch_npu_available():
function is_torch_xpu_available (line 152) | def is_torch_xpu_available():
function operate_tensorlike (line 157) | def operate_tensorlike(fn: Callable[[...], Tensor], data: TensorLike, **...
function get_process_group_backend (line 183) | def get_process_group_backend() -> str:
function all_gather (line 193) | def all_gather(tensor: Tensor, group: Optional[ProcessGroup] = None) -> ...
function all_reduce (line 201) | def all_reduce(tensor: Tensor, op: ReduceOp = ReduceOp.MEAN, group: Opti...
function broadcast (line 216) | def broadcast(tensor: Tensor, src: int = 0, group: Optional[ProcessGroup...
function main_process_first (line 223) | def main_process_first(local_only: bool = True) -> None:
FILE: src/llamafactory/v1/accelerator/interface.py
class Dim (line 45) | class Dim(StrEnum):
class DistributedStrategy (line 55) | class DistributedStrategy:
method __post_init__ (line 67) | def __post_init__(self) -> None:
method model_mesh_shape (line 89) | def model_mesh_shape(self) -> tuple[int, int]:
method model_mesh_dim_names (line 94) | def model_mesh_dim_names(self) -> tuple[str, str]:
method data_mesh_shape (line 99) | def data_mesh_shape(self) -> tuple[int, int]:
method data_mesh_dim_names (line 104) | def data_mesh_dim_names(self) -> tuple[str, str]:
class DistributedInterface (line 109) | class DistributedInterface:
method __new__ (line 115) | def __new__(cls, *args: Any, **kwargs: Any) -> "DistributedInterface":
method __init__ (line 122) | def __init__(self, config: DistributedConfig | None = None) -> None:
method __str__ (line 166) | def __str__(self) -> str:
method get_device_mesh (line 173) | def get_device_mesh(self, dim: Dim | None = None) -> DeviceMesh | None:
method get_group (line 184) | def get_group(self, dim: Dim | None = None) -> Optional[ProcessGroup]:
method get_rank (line 191) | def get_rank(self, dim: Dim | None = None) -> int:
method get_world_size (line 200) | def get_world_size(self, dim: Dim | None = None) -> int:
method get_local_rank (line 209) | def get_local_rank(self) -> int:
method get_local_world_size (line 213) | def get_local_world_size(self) -> int:
method all_gather (line 217) | def all_gather(self, data: TensorLike, dim: Dim | None = Dim.DP) -> Te...
method all_reduce (line 224) | def all_reduce(
method broadcast (line 233) | def broadcast(self, data: TensorLike, src: int = 0, dim: Dim | None = ...
method sync (line 240) | def sync(self) -> None:
method barrier (line 245) | def barrier(self) -> None:
method destroy (line 250) | def destroy(self) -> None:
FILE: src/llamafactory/v1/config/arg_parser.py
function get_args (line 34) | def get_args(args: InputArgument = None) -> tuple[ModelArguments, DataAr...
FILE: src/llamafactory/v1/config/arg_utils.py
class PluginConfig (line 23) | class PluginConfig(dict):
method name (line 27) | def name(self) -> str:
class ModelClass (line 39) | class ModelClass(StrEnum):
class SampleBackend (line 48) | class SampleBackend(StrEnum):
class BatchingStrategy (line 54) | class BatchingStrategy(StrEnum):
function _convert_str_dict (line 61) | def _convert_str_dict(data: dict) -> dict:
function get_plugin_config (line 84) | def get_plugin_config(config: PluginArgument) -> PluginConfig | None:
FILE: src/llamafactory/v1/config/data_args.py
class DataArguments (line 20) | class DataArguments:
FILE: src/llamafactory/v1/config/model_args.py
class ModelArguments (line 22) | class ModelArguments:
method __post_init__ (line 56) | def __post_init__(self) -> None:
FILE: src/llamafactory/v1/config/sample_args.py
class SampleArguments (line 22) | class SampleArguments:
FILE: src/llamafactory/v1/config/training_args.py
class TrainingArguments (line 23) | class TrainingArguments:
method __post_init__ (line 89) | def __post_init__(self) -> None:
FILE: src/llamafactory/v1/core/base_sampler.py
class BaseSampler (line 23) | class BaseSampler:
method __init__ (line 33) | def __init__(
method generate (line 45) | async def generate(self, messages: list[Message], tools: str | None = ...
method batch_infer (line 58) | async def batch_infer(self, dataset: TorchDataset) -> list[Sample]:
FILE: src/llamafactory/v1/core/base_trainer.py
class BaseTrainer (line 48) | class BaseTrainer:
method __init__ (line 49) | def __init__(
method _create_batch_generator (line 102) | def _create_batch_generator(self) -> None:
method _shard_model (line 114) | def _shard_model(self) -> None:
method _init_optimizer (line 132) | def _init_optimizer(self) -> None:
method _init_lr_scheduler (line 142) | def _init_lr_scheduler(self) -> None:
method compute_log_probs (line 153) | def compute_log_probs(self, model: HFModel, batch: BatchInput) -> Tensor:
method compute_loss (line 170) | def compute_loss(self, batch: BatchInput) -> Tensor:
method fit (line 174) | def fit(self) -> None:
method save_model (line 224) | def save_model(self) -> None:
FILE: src/llamafactory/v1/core/data_engine.py
class DataEngine (line 47) | class DataEngine(Dataset):
method __init__ (line 54) | def __init__(self, dataset_path: str) -> None:
method _get_dataset_info (line 69) | def _get_dataset_info(self) -> None:
method _load_dataset (line 82) | def _load_dataset(self) -> None:
method _build_data_index (line 100) | def _build_data_index(self) -> None:
method _convert_data_sample (line 117) | def _convert_data_sample(self, raw_sample: dict[str, Any], dataset_nam...
method __len__ (line 135) | def __len__(self) -> int:
method __getitem__ (line 146) | def __getitem__(self, index: int | Any) -> Sample | list[Sample]:
method __iter__ (line 174) | def __iter__(self) -> Iterable[Sample]:
FILE: src/llamafactory/v1/core/model_engine.py
class ModelEngine (line 47) | class ModelEngine:
method __init__ (line 55) | def __init__(self, model_args: ModelArguments, is_train: bool = False)...
method _init_processor (line 69) | def _init_processor(self) -> Processor:
method _init_model_config (line 80) | def _init_model_config(self) -> HFConfig:
method _init_model (line 87) | def _init_model(self) -> HFModel:
FILE: src/llamafactory/v1/core/utils/batching.py
function default_collate_fn (line 46) | def default_collate_fn(buffer: StatefulBuffer, batch_info: BatchInfo) ->...
class BatchGenerator (line 63) | class BatchGenerator(Iterator):
method __init__ (line 64) | def __init__(
method _init_data_provider (line 127) | def _init_data_provider(self) -> None:
method __len__ (line 162) | def __len__(self) -> int:
method __iter__ (line 165) | def __iter__(self):
method __next__ (line 174) | def __next__(self):
method _fill_buffer (line 182) | def _fill_buffer(self) -> None:
method _generate_batch (line 196) | def _generate_batch(self) -> list[BatchInput] | None:
method state_dict (line 204) | def state_dict(self) -> dict[str, Any]:
method load_state_dict (line 211) | def load_state_dict(self, state: dict[str, Any]) -> None:
method set_epoch (line 217) | def set_epoch(self, epoch: int) -> None:
FILE: src/llamafactory/v1/core/utils/inference_engine.py
class BaseEngine (line 31) | class BaseEngine(ABC):
method __init__ (line 33) | def __init__(
method generate (line 51) | async def generate(self, messages: list[Message], tools: str | None = ...
method batch_infer (line 64) | async def batch_infer(self, dataset: TorchDataset) -> list[Sample]:
class HuggingFaceEngine (line 76) | class HuggingFaceEngine(BaseEngine):
method __init__ (line 77) | def __init__(
method generate (line 91) | async def generate(self, messages: list[Message], tools: str | None = ...
method batch_infer (line 112) | async def batch_infer(self, dataset: TorchDataset) -> list[Sample]:
FILE: src/llamafactory/v1/core/utils/rendering.py
function render_chatml_messages (line 31) | def render_chatml_messages(
function parse_chatml_message (line 76) | def parse_chatml_message(generated_text: str) -> Message:
class Renderer (line 88) | class Renderer:
method __init__ (line 89) | def __init__(self, template: str, processor: Processor):
method render_messages (line 93) | def render_messages(
method parse_message (line 120) | def parse_message(self, generated_text: str) -> Message:
method process_samples (line 136) | def process_samples(self, samples: list[Sample]) -> list[ModelInput]:
FILE: src/llamafactory/v1/launcher.py
function launch (line 34) | def launch():
function main (line 157) | def main():
FILE: src/llamafactory/v1/plugins/data_plugins/converter.py
class AlpacaSample (line 27) | class AlpacaSample(TypedDict, total=False):
class SharegptSample (line 40) | class SharegptSample(TypedDict, total=False):
class OpenaiMessage (line 45) | class OpenaiMessage(TypedDict, total=False):
class OpenaiSample (line 50) | class OpenaiSample(TypedDict, total=False):
class PairSample (line 54) | class PairSample(TypedDict, total=False):
class DataConverterPlugin (line 59) | class DataConverterPlugin(BasePlugin):
method __call__ (line 62) | def __call__(self, raw_sample: dict[str, Any]) -> Sample:
function alpaca_converter (line 67) | def alpaca_converter(raw_sample: AlpacaSample) -> SFTSample:
function sharegpt_converter (line 104) | def sharegpt_converter(raw_sample: SharegptSample) -> SFTSample:
function pair_converter (line 168) | def pair_converter(raw_sample: PairSample) -> DPOSample:
FILE: src/llamafactory/v1/plugins/data_plugins/loader.py
class DataLoaderPlugin (line 26) | class DataLoaderPlugin(BasePlugin):
method load (line 29) | def load(self, dataset_info: DatasetInfo) -> HFDataset:
function _get_builder_name (line 36) | def _get_builder_name(path: str) -> Literal["arrow", "csv", "json", "par...
function load_data_from_file (line 53) | def load_data_from_file(filepath: str, split: str, streaming: bool) -> H...
function adjust_data_index (line 69) | def adjust_data_index(
function select_data_sample (line 91) | def select_data_sample(
FILE: src/llamafactory/v1/plugins/model_plugins/initialization.py
class InitPlugin (line 23) | class InitPlugin(BasePlugin):
method __call__ (line 24) | def __call__(self) -> torch.device:
function init_on_meta (line 29) | def init_on_meta() -> torch.device:
function init_on_rank0 (line 34) | def init_on_rank0() -> torch.device:
function init_on_default (line 42) | def init_on_default() -> torch.device:
FILE: src/llamafactory/v1/plugins/model_plugins/kernels/base.py
class BaseKernel (line 30) | class BaseKernel(ABC):
method get_kernel_id (line 40) | def get_kernel_id(cls) -> str:
method get_device (line 45) | def get_device(cls) -> str:
method check_deps (line 50) | def check_deps(cls) -> bool:
method apply (line 67) | def apply(cls, **kwargs) -> HFModel:
FILE: src/llamafactory/v1/plugins/model_plugins/kernels/interface.py
function scan_all_kernels (line 36) | def scan_all_kernels():
function get_default_kernels (line 80) | def get_default_kernels():
function apply_kernel (line 89) | def apply_kernel(kernel_id: str, **kwargs):
class KernelPlugin (line 107) | class KernelPlugin(BasePlugin):
function apply_default_kernels (line 114) | def apply_default_kernels(model: HFModel, include_kernels: str = None) -...
FILE: src/llamafactory/v1/plugins/model_plugins/kernels/ops/mlp/npu_fused_moe.py
class GmmFunction (line 42) | class GmmFunction(torch.autograd.Function):
method forward (line 46) | def forward(ctx, x, weight, group_list):
method backward (line 67) | def backward(ctx, grad_output):
class HybridGmmFunction (line 96) | class HybridGmmFunction(torch.autograd.Function):
method forward (line 100) | def forward(ctx, num_experts, *args):
method backward (line 126) | def backward(ctx, *grad_outputs):
class NpuMoeFused (line 178) | class NpuMoeFused:
method npu_moe_experts_forward (line 182) | def npu_moe_experts_forward(
method npu_moe_sparse_block_forward (line 210) | def npu_moe_sparse_block_forward(self, hidden_states: torch.Tensor) ->...
class Qwen3NpuMoeFused (line 232) | class Qwen3NpuMoeFused:
method qwen3moe_sparse_moe_block_forward (line 236) | def qwen3moe_sparse_moe_block_forward(self, hidden_states: torch.Tensor):
class NpuFusedMoEKernel (line 300) | class NpuFusedMoEKernel(BaseKernel):
method apply (line 307) | def apply(cls, **kwargs) -> HFModel:
FILE: src/llamafactory/v1/plugins/model_plugins/kernels/ops/mlp/npu_swiglu.py
function npu_swiglu_forward (line 40) | def npu_swiglu_forward(self, hidden_state):
function _npu_swiglu_glm4_forward (line 55) | def _npu_swiglu_glm4_forward(self, hidden_states):
function _npu_swiglu_gemma3ntext_forward (line 70) | def _npu_swiglu_gemma3ntext_forward(self, hidden_states):
class NpuSwiGluKernel (line 90) | class NpuSwiGluKernel(BaseKernel):
method apply (line 128) | def apply(cls, **kwargs) -> "HFModel":
FILE: src/llamafactory/v1/plugins/model_plugins/kernels/ops/rms_norm/npu_rms_norm.py
function npu_rms_norm_forward (line 32) | def npu_rms_norm_forward(self, hidden_states):
class NpuRMSNormKernel (line 48) | class NpuRMSNormKernel(BaseKernel):
method apply (line 55) | def apply(cls, **kwargs) -> "HFModel":
FILE: src/llamafactory/v1/plugins/model_plugins/kernels/ops/rope/npu_rope.py
function _apply_rotary_pos_emb (line 42) | def _apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_d...
function _apply_multimodal_rotary_pos_emb_qwen25_vl (line 63) | def _apply_multimodal_rotary_pos_emb_qwen25_vl(q, k, cos, sin, mrope_sec...
class NpuRoPEKernel (line 91) | class NpuRoPEKernel(BaseKernel):
method apply (line 98) | def apply(cls, **kwargs) -> "HFModel":
FILE: src/llamafactory/v1/plugins/model_plugins/kernels/registry.py
class Registry (line 30) | class Registry:
method register (line 39) | def register(cls, kernel_cls: type[BaseKernel]) -> type[BaseKernel] | ...
method get (line 74) | def get(cls, kernel_id: str) -> type[BaseKernel] | None:
method get_registered_kernels (line 86) | def get_registered_kernels(cls) -> dict[str, type[BaseKernel]]:
FILE: src/llamafactory/v1/plugins/model_plugins/peft.py
class LoraConfigDict (line 31) | class LoraConfigDict(TypedDict, total=False):
class FreezeConfigDict (line 62) | class FreezeConfigDict(TypedDict, total=False):
class PeftPlugin (line 75) | class PeftPlugin(BasePlugin):
method __call__ (line 76) | def __call__(self, model: HFModel, config: dict, is_train: bool) -> HF...
function _find_all_linear_modules (line 80) | def _find_all_linear_modules(model: HFModel) -> list[str]:
function merge_adapters (line 94) | def merge_adapters(model: HFModel, adapter_name_or_path: Union[list[str]...
function load_adapter (line 106) | def load_adapter(model: HFModel, adapter_name_or_path: Union[list[str], ...
function get_lora_model (line 152) | def get_lora_model(model: HFModel, config: LoraConfigDict, is_train: boo...
function get_freeze_model (line 194) | def get_freeze_model(model: HFModel, config: FreezeConfigDict, is_train:...
function merge_and_export_model (line 287) | def merge_and_export_model(args: InputArgument = None):
FILE: src/llamafactory/v1/plugins/model_plugins/quantization.py
class QuantizationPlugin (line 36) | class QuantizationPlugin(BasePlugin):
method __call__ (line 39) | def __call__(
function quantization_auto (line 53) | def quantization_auto(
function quantization_with_bnb (line 81) | def quantization_with_bnb(
FILE: src/llamafactory/v1/plugins/model_plugins/rendering.py
class RenderingPlugin (line 25) | class RenderingPlugin(BasePlugin):
method _ensure_template_imported (line 28) | def _ensure_template_imported(self) -> None:
method __getitem__ (line 39) | def __getitem__(self, method_name: str):
method render_messages (line 43) | def render_messages(
method parse_messages (line 54) | def parse_messages(self, generated_text: str) -> Message:
FILE: src/llamafactory/v1/plugins/model_plugins/templates/qwen3.py
function _update_model_input (line 24) | def _update_model_input(
function _concat_text_content (line 48) | def _concat_text_content(message: Message) -> str:
function _get_last_query_index (line 60) | def _get_last_query_index(messages: list[Message]) -> int:
function _split_assistant_content (line 86) | def _split_assistant_content(message: Message) -> tuple[str, str, list[T...
function render_qwen3_messages (line 111) | def render_qwen3_messages(
function parse_qwen3_message (line 220) | def parse_qwen3_message(generated_text: str) -> Message:
FILE: src/llamafactory/v1/plugins/model_plugins/templates/qwen3_nothink.py
function _update_model_input (line 24) | def _update_model_input(
function _concat_text_content (line 48) | def _concat_text_content(message: Message) -> str:
function render_qwen3_nothink_messages (line 61) | def render_qwen3_nothink_messages(
function parse_qwen3_nothink_message (line 170) | def parse_qwen3_nothink_message(generated_text: str) -> Message:
FILE: src/llamafactory/v1/plugins/trainer_plugins/batching.py
class BatchingPlugin (line 20) | class BatchingPlugin(BasePlugin):
method compute_length (line 21) | def compute_length(self, data_provider: DataLoader) -> int:
method fill_buffer (line 28) | def fill_buffer(self, buffer: StatefulBuffer, batch_info: BatchInfo) -...
method generate_batch (line 32) | def generate_batch(self, buffer: StatefulBuffer, batch_info: BatchInfo...
FILE: src/llamafactory/v1/plugins/trainer_plugins/distributed/deepspeed.py
class DeepSpeedEngine (line 35) | class DeepSpeedEngine:
method __init__ (line 47) | def __init__(self, dist_config: dict[str, Any], num_micro_batch: int =...
method shard_model (line 67) | def shard_model(self, model: HFModel) -> "DeepSpeedEngine":
method prepare (line 74) | def prepare(
method backward (line 94) | def backward(self, loss: torch.Tensor) -> None:
method get_grad_norm (line 104) | def get_grad_norm(self) -> float:
function save_model (line 112) | def save_model(model: HFModel, output_dir: str, processor: Processor) ->...
FILE: src/llamafactory/v1/plugins/trainer_plugins/distributed/fsdp2.py
function get_transformer_layer_cls (line 38) | def get_transformer_layer_cls(model: HFModel) -> type[nn.Module] | None:
function save_model (line 54) | def save_model(model: HFModel, output_dir: str, processor: Processor) ->...
class FSDP2Engine (line 68) | class FSDP2Engine:
method __init__ (line 69) | def __init__(self, dist_config: dict):
method get_mp_policy (line 96) | def get_mp_policy(self) -> MixedPrecisionPolicy:
method is_lora_module_wrap (line 113) | def is_lora_module_wrap(self, model) -> bool:
method prepare_model (line 116) | def prepare_model(self, model: HFModel) -> HFModel:
method materialize_and_load (line 196) | def materialize_and_load(self, model: HFModel, hf_model_path: str, dcp...
method _save_non_persistent_buffers (line 216) | def _save_non_persistent_buffers(self, model: HFModel) -> dict:
method _restore_non_persistent_buffers (line 229) | def _restore_non_persistent_buffers(self, model: HFModel, saved_buffer...
method shard_model (line 246) | def shard_model(self, model: HFModel) -> HFModel:
method _load_from_dcp (line 266) | def _load_from_dcp(self, model: HFModel, dcp_path: str):
method _load_weights_from_hf_checkpoint (line 285) | def _load_weights_from_hf_checkpoint(self, model: HFModel, hf_model_pa...
method _resolve_hf_checkpoint_dir (line 348) | def _resolve_hf_checkpoint_dir(self, hf_model_path: str) -> str:
method _copy_weights (line 433) | def _copy_weights(self, param, loaded_tensor):
FILE: src/llamafactory/v1/plugins/trainer_plugins/distributed/hub.py
class DistributedPlugin (line 27) | class DistributedPlugin(BasePlugin):
method __call__ (line 28) | def __call__(self, model: HFModel, dist_config: PluginConfig, **kwargs...
function shard_model_fsdp2 (line 33) | def shard_model_fsdp2(model: HFModel, dist_config: PluginConfig, **kwarg...
function save_model_fsdp2 (line 40) | def save_model_fsdp2(model: HFModel, output_dir: str, processor: Process...
function shard_model_deepspeed (line 47) | def shard_model_deepspeed(model: HFModel, dist_config: PluginConfig, **k...
function save_model_deepspeed (line 58) | def save_model_deepspeed(model: HFModel, output_dir: str, processor: Pro...
FILE: src/llamafactory/v1/plugins/trainer_plugins/lr_scheduler.py
class LRSchedulerPlugin (line 18) | class LRSchedulerPlugin(BasePlugin):
FILE: src/llamafactory/v1/plugins/trainer_plugins/optimizer.py
class OptimizerPlugin (line 18) | class OptimizerPlugin(BasePlugin):
FILE: src/llamafactory/v1/samplers/cli_sampler.py
class SyncSampler (line 28) | class SyncSampler(BaseSampler):
method __init__ (line 29) | def __init__(
method generate (line 45) | def generate(self, messages: list[Message], tools: str | None = None) ...
method batch_infer (line 63) | def batch_infer(self, dataset: TorchDataset) -> list[Sample]:
function run_chat (line 75) | def run_chat(args: InputArgument = None):
FILE: src/llamafactory/v1/trainers/sft_trainer.py
class SFTTrainer (line 24) | class SFTTrainer(BaseTrainer):
method compute_loss (line 25) | def compute_loss(self, batch: BatchInput) -> Tensor:
function run_sft (line 32) | def run_sft(args: InputArgument = None):
FILE: src/llamafactory/v1/utils/dtype.py
class DtypeRegistry (line 26) | class DtypeRegistry:
class DtypeInterface (line 32) | class DtypeInterface:
method is_available (line 40) | def is_available(precision: str | torch.dtype) -> bool:
method is_fp16 (line 51) | def is_fp16(precision: str | torch.dtype) -> bool:
method is_fp32 (line 55) | def is_fp32(precision: str | torch.dtype) -> bool:
method is_bf16 (line 59) | def is_bf16(precision: str | torch.dtype) -> bool:
method to_dtype (line 63) | def to_dtype(precision: str | torch.dtype) -> torch.dtype:
method to_str (line 74) | def to_str(precision: torch.dtype) -> str:
method set_dtype (line 85) | def set_dtype(self, precision: str | torch.dtype):
FILE: src/llamafactory/v1/utils/env.py
function find_available_port (line 19) | def find_available_port() -> int:
function is_env_enabled (line 28) | def is_env_enabled(env_var: str, default: str = "0") -> bool:
function use_ray (line 33) | def use_ray() -> bool:
function use_kt (line 37) | def use_kt() -> bool:
FILE: src/llamafactory/v1/utils/helper.py
function set_seed (line 25) | def set_seed(seed: int) -> None:
function is_tokenizer (line 34) | def is_tokenizer(processor: Processor) -> bool:
function get_tokenizer (line 46) | def get_tokenizer(processor: Processor) -> PreTrainedTokenizer:
function _pad_and_truncate (line 58) | def _pad_and_truncate(tensor: Tensor, max_seqlen: int, pad_value: int = ...
function pad_and_truncate (line 68) | def pad_and_truncate(samples: list[ModelInput], max_seqlen: int) -> list...
function compute_valid_tokens (line 89) | def compute_valid_tokens(batches: list[BatchInput]) -> int:
FILE: src/llamafactory/v1/utils/logging.py
class _Logger (line 31) | class _Logger(logging.Logger):
method info_rank0 (line 34) | def info_rank0(self, *args, **kwargs) -> None:
method warning_rank0 (line 37) | def warning_rank0(self, *args, **kwargs) -> None:
method warning_rank0_once (line 40) | def warning_rank0_once(self, *args, **kwargs) -> None:
function _get_default_logging_level (line 44) | def _get_default_logging_level() -> "logging._Level":
function _get_library_name (line 56) | def _get_library_name() -> str:
function _get_library_root_logger (line 60) | def _get_library_root_logger() -> "_Logger":
function _configure_library_root_logger (line 64) | def _configure_library_root_logger() -> None:
function get_logger (line 84) | def get_logger(name: str | None = None) -> "_Logger":
function add_handler (line 93) | def add_handler(handler: "logging.Handler") -> None:
function remove_handler (line 99) | def remove_handler(handler: logging.Handler) -> None:
function info_rank0 (line 105) | def info_rank0(self: "logging.Logger", *args, **kwargs) -> None:
function warning_rank0 (line 110) | def warning_rank0(self: "logging.Logger", *args, **kwargs) -> None:
function warning_rank0_once (line 116) | def warning_rank0_once(self: "logging.Logger", *args, **kwargs) -> None:
FILE: src/llamafactory/v1/utils/objects.py
class StatefulBuffer (line 21) | class StatefulBuffer:
method __init__ (line 24) | def __init__(self, max_buffer_size: int = 1_000_000_000) -> None:
method __len__ (line 29) | def __len__(self) -> int:
method size (line 33) | def size(self) -> int:
method put (line 36) | def put(self, samples: list[ModelInput]) -> None:
method get (line 45) | def get(self, value: int) -> list[ModelInput]:
method clear (line 52) | def clear(self) -> None:
method state_dict (line 57) | def state_dict(self) -> dict:
method load_state_dict (line 64) | def load_state_dict(self, state_dict: dict) -> None:
FILE: src/llamafactory/v1/utils/packages.py
function _is_package_available (line 37) | def _is_package_available(name: str) -> bool:
function _get_package_version (line 41) | def _get_package_version(name: str) -> "Version":
function is_transformers_version_greater_than (line 49) | def is_transformers_version_greater_than(content: str):
function check_version (line 53) | def check_version(requirement: str, mandatory: bool = False) -> None:
FILE: src/llamafactory/v1/utils/plugin.py
class BasePlugin (line 26) | class BasePlugin:
method __init__ (line 55) | def __init__(self, name: str | None = None) -> None:
method register (line 59) | def register(self, method_name: str = "__call__") -> Callable:
method __call__ (line 73) | def __call__(self, *args, **kwargs) -> Any:
method __getattr__ (line 77) | def __getattr__(self, method_name: str) -> Callable:
method __getitem__ (line 81) | def __getitem__(self, method_name: str) -> Callable:
class PrintPlugin (line 94) | class PrintPlugin(BasePlugin):
method again (line 95) | def again(self): # optional
function print_hello (line 99) | def print_hello():
function print_hello_again (line 103) | def print_hello_again():
FILE: src/llamafactory/v1/utils/pytest.py
function dist_env (line 20) | def dist_env(local_rank: int = 0, world_size: int = 1, master_port: int ...
FILE: src/llamafactory/v1/utils/types.py
class DatasetInfo (line 57) | class DatasetInfo(TypedDict, total=False):
class DistributedConfig (line 74) | class DistributedConfig(TypedDict, total=False):
class Content (line 87) | class Content(TypedDict):
class Message (line 94) | class Message(TypedDict):
class SFTSample (line 103) | class SFTSample(TypedDict):
class DPOSample (line 114) | class DPOSample(TypedDict):
class ToolCall (line 130) | class ToolCall(TypedDict):
class ModelInput (line 137) | class ModelInput(TypedDict, total=False):
class BatchInput (line 152) | class BatchInput(TypedDict, total=False):
class BatchInfo (line 167) | class BatchInfo(TypedDict):
class ModelOutput (line 178) | class ModelOutput(NamedTuple):
FILE: src/llamafactory/webui/chatter.py
function _escape_html (line 41) | def _escape_html(text: str) -> str:
function _format_response (line 46) | def _format_response(text: str, lang: str, escape_html: bool, thought_wo...
function update_attr (line 73) | def update_attr(obj: Any, name: str, value: Any):
class WebChatModel (line 80) | class WebChatModel(ChatModel):
method __init__ (line 81) | def __init__(self, manager: "Manager", demo_mode: bool = False, lazy_i...
method loaded (line 98) | def loaded(self) -> bool:
method load_model (line 101) | def load_model(self, data) -> Generator[str, None, None]:
method unload_model (line 161) | def unload_model(self, data) -> Generator[str, None, None]:
method append (line 175) | def append(
method stream (line 193) | def stream(
FILE: src/llamafactory/webui/common.py
function abort_process (line 46) | def abort_process(pid: int) -> None:
function get_save_dir (line 59) | def get_save_dir(*paths: str) -> os.PathLike:
function _get_config_path (line 69) | def _get_config_path() -> os.PathLike:
function load_config (line 74) | def load_config() -> dict[str, str | dict[str, Any]]:
function save_config (line 83) | def save_config(
function get_model_path (line 103) | def get_model_path(model_name: str) -> str:
function get_template (line 125) | def get_template(model_name: str) -> str:
function get_time (line 130) | def get_time() -> str:
function is_multimodal (line 135) | def is_multimodal(model_name: str) -> bool:
function load_dataset_info (line 140) | def load_dataset_info(dataset_dir: str) -> dict[str, dict[str, Any]]:
function load_args (line 154) | def load_args(config_path: str) -> dict[str, Any] | None:
function save_args (line 163) | def save_args(config_path: str, config_dict: dict[str, Any]) -> None:
function _clean_cmd (line 169) | def _clean_cmd(args: dict[str, Any]) -> dict[str, Any]:
function gen_cmd (line 182) | def gen_cmd(args: dict[str, Any]) -> str:
function save_cmd (line 202) | def save_cmd(args: dict[str, Any]) -> str:
function load_eval_results (line 212) | def load_eval_results(path: os.PathLike) -> str:
function calculate_pixels (line 220) | def calculate_pixels(pixels: str) -> int:
function create_ds_config (line 228) | def create_ds_config() -> None:
FILE: src/llamafactory/webui/components/chatbot.py
function check_json_schema (line 34) | def check_json_schema(text: str, lang: str) -> None:
function create_chat_box (line 49) | def create_chat_box(
FILE: src/llamafactory/webui/components/data.py
function prev_page (line 34) | def prev_page(page_index: int) -> int:
function next_page (line 38) | def next_page(page_index: int, total_num: int) -> int:
function can_preview (line 42) | def can_preview(dataset_dir: str, dataset: list) -> "gr.Button":
function _load_data_file (line 60) | def _load_data_file(file_path: str) -> list[Any]:
function get_preview (line 70) | def get_preview(dataset_dir: str, dataset: list, page_index: int) -> tup...
function create_preview_box (line 86) | def create_preview_box(dataset_dir: "gr.Textbox", dataset: "gr.Dropdown"...
FILE: src/llamafactory/webui/components/eval.py
function create_eval_tab (line 33) | def create_eval_tab(engine: "Engine") -> dict[str, "Component"]:
FILE: src/llamafactory/webui/components/export.py
function can_quantize (line 40) | def can_quantize(checkpoint_path: str | list[str]) -> "gr.Dropdown":
function save_model (line 47) | def save_model(
function create_export_tab (line 118) | def create_export_tab(engine: "Engine") -> dict[str, "Component"]:
FILE: src/llamafactory/webui/components/footer.py
function get_device_memory (line 29) | def get_device_memory() -> "gr.Slider":
function create_footer (line 39) | def create_footer() -> dict[str, "Component"]:
FILE: src/llamafactory/webui/components/infer.py
function create_infer_tab (line 32) | def create_infer_tab(engine: "Engine") -> dict[str, "Component"]:
FILE: src/llamafactory/webui/components/top.py
function create_top (line 33) | def create_top() -> dict[str, "Component"]:
FILE: src/llamafactory/webui/components/train.py
function create_train_tab (line 37) | def create_train_tab(engine: "Engine") -> dict[str, "Component"]:
FILE: src/llamafactory/webui/control.py
function switch_hub (line 41) | def switch_hub(hub_name: str) -> None:
function can_quantize (line 50) | def can_quantize(finetuning_type: str) -> "gr.Dropdown":
function can_quantize_to (line 62) | def can_quantize_to(quantization_method: str) -> "gr.Dropdown":
function change_stage (line 78) | def change_stage(training_stage: str = list(TRAINING_STAGES.keys())[0]) ...
function get_model_info (line 87) | def get_model_info(model_name: str) -> tuple[str, str]:
function check_template (line 96) | def check_template(lang: str, template: str) -> None:
function get_trainer_info (line 107) | def get_trainer_info(lang: str, output_path: os.PathLike, do_train: bool...
function list_checkpoints (line 160) | def list_checkpoints(model_name: str, finetuning_type: str) -> "gr.Dropd...
function list_config_paths (line 182) | def list_config_paths(current_time: str) -> "gr.Dropdown":
function list_datasets (line 197) | def list_datasets(dataset_dir: str = None, training_stage: str = list(TR...
function list_output_dirs (line 209) | def list_output_dirs(model_name: str | None, finetuning_type: str, curre...
FILE: src/llamafactory/webui/engine.py
class Engine (line 28) | class Engine:
method __init__ (line 31) | def __init__(self, demo_mode: bool = False, pure_chat: bool = False) -...
method _update_component (line 40) | def _update_component(self, input_dict: dict[str, dict[str, Any]]) -> ...
method resume (line 49) | def resume(self):
method change_lang (line 77) | def change_lang(self, lang: str):
FILE: src/llamafactory/webui/interface.py
function create_ui (line 38) | def create_ui(demo_mode: bool = False) -> "gr.Blocks":
function create_web_demo (line 73) | def create_web_demo() -> "gr.Blocks":
function run_web_ui (line 91) | def run_web_ui() -> None:
function run_web_demo (line 100) | def run_web_demo() -> None:
FILE: src/llamafactory/webui/manager.py
class Manager (line 23) | class Manager:
method __init__ (line 26) | def __init__(self) -> None:
method add_elems (line 30) | def add_elems(self, tab_name: str, elem_dict: dict[str, "Component"]) ...
method get_elem_list (line 37) | def get_elem_list(self) -> list["Component"]:
method get_elem_iter (line 41) | def get_elem_iter(self) -> Generator[tuple[str, "Component"], None, No...
method get_elem_by_id (line 46) | def get_elem_by_id(self, elem_id: str) -> "Component":
method get_id_by_elem (line 53) | def get_id_by_elem(self, elem: "Component") -> str:
method get_base_elems (line 57) | def get_base_elems(self) -> set["Component"]:
FILE: src/llamafactory/webui/runner.py
class Runner (line 54) | class Runner:
method __init__ (line 57) | def __init__(self, manager: "Manager", demo_mode: bool = False) -> None:
method set_abort (line 69) | def set_abort(self) -> None:
method _initialize (line 74) | def _initialize(self, data: dict["Component", Any], do_train: bool, fr...
method _finalize (line 116) | def _finalize(self, lang: str, finish_info: str) -> None:
method _parse_train_args (line 126) | def _parse_train_args(self, data: dict["Component", Any]) -> dict[str,...
method _parse_eval_args (line 292) | def _parse_eval_args(self, data: dict["Component", Any]) -> dict[str, ...
method _preview (line 346) | def _preview(self, data: dict["Component", Any], do_train: bool) -> Ge...
method _launch (line 357) | def _launch(self, data: dict["Component", Any], do_train: bool) -> Gen...
method _build_config_dict (line 381) | def _build_config_dict(self, data: dict["Component", Any]) -> dict[str...
method preview_train (line 392) | def preview_train(self, data):
method preview_eval (line 395) | def preview_eval(self, data):
method run_train (line 398) | def run_train(self, data):
method run_eval (line 401) | def run_eval(self, data):
method monitor (line 404) | def monitor(self):
method save_args (line 462) | def save_args(self, data):
method load_args (line 478) | def load_args(self, lang: str, config_path: str):
method check_output_dir (line 492) | def check_output_dir(self, lang: str, model_name: str, finetuning_type...
FILE: src/train.py
function main (line 18) | def main():
function _mp_fn (line 22) | def _mp_fn(index):
FILE: src/webui.py
function main (line 21) | def main():
FILE: tests/check_license.py
function main (line 22) | def main():
FILE: tests/conftest.py
function pytest_configure (line 35) | def pytest_configure(config: Config):
function _handle_runs_on (line 51) | def _handle_runs_on(items: list[Item]):
function _handle_slow_tests (line 66) | def _handle_slow_tests(items: list[Item]):
function _get_visible_devices_env (line 75) | def _get_visible_devices_env() -> str | None:
function _handle_device_visibility (line 85) | def _handle_device_visibility(items: list[Item]):
function pytest_collection_modifyitems (line 109) | def pytest_collection_modifyitems(config: Config, items: list[Item]):
function _cleanup_distributed_state (line 123) | def _cleanup_distributed_state():
function _manage_distributed_env (line 131) | def _manage_distributed_env(request: FixtureRequest, monkeypatch: Monkey...
function fix_valuehead_cpu_loading (line 166) | def fix_valuehead_cpu_loading():
function bypass_mistral_regex_check (line 172) | def bypass_mistral_regex_check():
FILE: tests/data/processor/test_feedback.py
function test_feedback_data (line 48) | def test_feedback_data(num_samples: int):
FILE: tests/data/processor/test_pairwise.py
function _convert_sharegpt_to_openai (line 46) | def _convert_sharegpt_to_openai(messages: list[dict[str, str]]) -> list[...
function test_pairwise_data (line 57) | def test_pairwise_data(num_samples: int):
FILE: tests/data/processor/test_processor_utils.py
function test_infer_seqlen (line 34) | def test_infer_seqlen(test_input: tuple[int, int, int], test_output: tup...
FILE: tests/data/processor/test_supervised.py
function test_supervised_single_turn (line 48) | def test_supervised_single_turn(num_samples: int):
function test_supervised_multi_turn (line 77) | def test_supervised_multi_turn(num_samples: int):
function test_supervised_train_on_prompt (line 95) | def test_supervised_train_on_prompt(num_samples: int):
function test_supervised_mask_history (line 113) | def test_supervised_mask_history(num_samples: int):
FILE: tests/data/processor/test_unsupervised.py
function test_unsupervised_data (line 52) | def test_unsupervised_data(num_samples: int):
FILE: tests/data/test_collator.py
function test_base_collator (line 34) | def test_base_collator():
function test_multimodal_collator (line 78) | def test_multimodal_collator():
function test_4d_attention_mask (line 136) | def test_4d_attention_mask():
FILE: tests/data/test_converter.py
function test_alpaca_converter (line 24) | def test_alpaca_converter():
function test_sharegpt_converter (line 45) | def test_sharegpt_converter():
FILE: tests/data/test_formatter.py
function test_empty_formatter (line 42) | def test_empty_formatter():
function test_string_formatter (line 48) | def test_string_formatter():
function test_function_formatter (line 54) | def test_function_formatter():
function test_multi_function_formatter (line 64) | def test_multi_function_formatter():
function test_default_tool_formatter (line 75) | def test_default_tool_formatter():
function test_default_tool_extractor (line 94) | def test_default_tool_extractor():
function test_default_multi_tool_extractor (line 101) | def test_default_multi_tool_extractor():
function test_glm4_function_formatter (line 114) | def test_glm4_function_formatter():
function test_glm4_tool_formatter (line 121) | def test_glm4_tool_formatter():
function test_glm4_tool_extractor (line 132) | def test_glm4_tool_extractor():
function test_llama3_function_formatter (line 139) | def test_llama3_function_formatter():
function test_llama3_multi_function_formatter (line 148) | def test_llama3_multi_function_formatter():
function test_llama3_tool_formatter (line 159) | def test_llama3_tool_formatter():
function test_llama3_tool_extractor (line 173) | def test_llama3_tool_extractor():
function test_llama3_multi_tool_extractor (line 180) | def test_llama3_multi_tool_extractor():
function test_mistral_function_formatter (line 193) | def test_mistral_function_formatter():
function test_mistral_multi_function_formatter (line 203) | def test_mistral_multi_function_formatter():
function test_mistral_tool_formatter (line 215) | def test_mistral_tool_formatter():
function test_mistral_tool_extractor (line 224) | def test_mistral_tool_extractor():
function test_mistral_multi_tool_extractor (line 231) | def test_mistral_multi_tool_extractor():
function test_qwen_function_formatter (line 244) | def test_qwen_function_formatter():
function test_qwen_multi_function_formatter (line 253) | def test_qwen_multi_function_formatter():
function test_qwen_tool_formatter (line 264) | def test_qwen_tool_formatter():
function test_qwen_tool_extractor (line 278) | def test_qwen_tool_extractor():
function test_qwen_multi_tool_extractor (line 285) | def test_qwen_multi_tool_extractor():
function test_lfm2_function_formatter (line 298) | def test_lfm2_function_formatter():
function test_lfm2_multi_function_formatter (line 307) | def test_lfm2_multi_function_formatter():
function test_lfm2_tool_formatter (line 317) | def test_lfm2_tool_formatter():
function test_lfm2_tool_extractor (line 325) | def test_lfm2_tool_extractor():
function test_lfm2_multi_tool_extractor (line 332) | def test_lfm2_multi_tool_extractor():
function test_lfm2_tool_extractor_with_nested_dict (line 342) | def test_lfm2_tool_extractor_with_nested_dict():
function test_lfm2_tool_extractor_with_list_arg (line 354) | def test_lfm2_tool_extractor_with_list_arg():
function test_lfm2_tool_extractor_no_match (line 366) | def test_lfm2_tool_extractor_no_match():
function test_lfm2_tool_round_trip (line 374) | def test_lfm2_tool_round_trip():
FILE: tests/data/test_loader.py
function test_load_train_only (line 44) | def test_load_train_only():
function test_load_val_size (line 51) | def test_load_val_size():
function test_load_eval_data (line 58) | def test_load_eval_data():
FILE: tests/data/test_mm_plugin.py
function _get_mm_inputs (line 93) | def _get_mm_inputs(processor: "ProcessorMixin") -> dict[str, "torch.Tens...
function _get_omni_inputs (line 98) | def _get_omni_inputs(processor: "ProcessorMixin") -> dict[str, "torch.Te...
function _is_close (line 117) | def _is_close(batch_a: dict[str, Any], batch_b: dict[str, Any]) -> None:
function _load_tokenizer_module (line 130) | def _load_tokenizer_module(model_name_or_path: str) -> "TokenizerModule":
function _check_plugin (line 135) | def _check_plugin(
function test_base_plugin (line 183) | def test_base_plugin():
function test_gemma3_plugin (line 193) | def test_gemma3_plugin():
function test_internvl_plugin (line 215) | def test_internvl_plugin():
function test_llama4_plugin (line 234) | def test_llama4_plugin():
function test_llava_plugin (line 255) | def test_llava_plugin():
function test_llava_next_plugin (line 269) | def test_llava_next_plugin():
function test_llava_next_video_plugin (line 283) | def test_llava_next_video_plugin():
function test_paligemma_plugin (line 298) | def test_paligemma_plugin():
function test_pixtral_plugin (line 318) | def test_pixtral_plugin():
function test_qwen2_omni_plugin (line 341) | def test_qwen2_omni_plugin():
function test_qwen2_vl_plugin (line 371) | def test_qwen2_vl_plugin():
function test_qwen3_vl_plugin (line 389) | def test_qwen3_vl_plugin():
function test_video_llava_plugin (line 411) | def test_video_llava_plugin():
function test_lfm2_vl_plugin (line 425) | def test_lfm2_vl_plugin():
FILE: tests/data/test_template.py
function _check_tokenization (line 51) | def _check_tokenization(
function _check_template (line 64) | def _check_template(
function test_encode_oneturn (line 95) | def test_encode_oneturn():
function test_encode_multiturn (line 110) | def test_encode_multiturn():
function test_reasoning_encode_oneturn (line 133) | def test_reasoning_encode_oneturn(cot_messages: bool, enable_thinking: b...
function test_reasoning_encode_multiturn (line 159) | def test_reasoning_encode_multiturn(cot_messages: bool, enable_thinking:...
function test_jinja_template (line 186) | def test_jinja_template():
function test_ollama_modelfile (line 196) | def test_ollama_modelfile():
function test_get_stop_token_ids (line 214) | def test_get_stop_token_ids():
function test_gemma_template (line 222) | def test_gemma_template():
function test_gemma2_template (line 235) | def test_gemma2_template():
function test_llama3_template (line 248) | def test_llama3_template():
function test_llama4_template (line 260) | def test_llama4_template():
function test_phi4_template (line 272) | def test_phi4_template():
function test_qwen2_5_template (line 285) | def test_qwen2_5_template():
function test_qwen3_template (line 299) | def test_qwen3_template(cot_messages: bool):
function test_parse_llama3_template (line 317) | def test_parse_llama3_template():
function test_parse_qwen_template (line 332) | def test_parse_qwen_template():
function test_parse_qwen3_template (line 345) | def test_parse_qwen3_template():
FILE: tests/e2e/test_chat.py
function test_chat (line 41) | def test_chat():
function test_stream_chat (line 47) | def test_stream_chat():
FILE: tests/e2e/test_sglang.py
function test_chat (line 44) | def test_chat():
function test_stream_chat (line 54) | def test_stream_chat():
FILE: tests/e2e/test_train.py
function test_run_exp (line 63) | def test_run_exp(stage: str, dataset: str):
function test_export (line 70) | def test_export():
FILE: tests/eval/test_eval_template.py
function test_eval_template_en (line 21) | def test_eval_template_en():
function test_eval_template_zh (line 60) | def test_eval_template_zh():
FILE: tests/model/model_utils/test_add_tokens.py
function test_add_tokens (line 29) | def test_add_tokens(special_tokens: bool):
FILE: tests/model/model_utils/test_attention.py
function is_torch_sdpa_available (line 26) | def is_torch_sdpa_available():
function test_attention (line 43) | def test_attention():
FILE: tests/model/model_utils/test_checkpointing.py
function test_vanilla_checkpointing (line 43) | def test_vanilla_checkpointing(disable_gradient_checkpointing: bool):
function test_unsloth_gradient_checkpointing (line 49) | def test_unsloth_gradient_checkpointing():
function test_upcast_layernorm (line 55) | def test_upcast_layernorm():
function test_upcast_lmhead_output (line 62) | def test_upcast_lmhead_output():
FILE: tests/model/model_utils/test_misc.py
function test_expanded_modules (line 28) | def test_expanded_modules():
FILE: tests/model/model_utils/test_packing.py
function test_get_seqlens_in_batch (line 37) | def test_get_seqlens_in_batch(attention_mask, golden_seq_lens):
function test_get_unpad_data (line 63) | def test_get_unpad_data(attention_mask, golden_indices, golden_cu_seqlen...
FILE: tests/model/model_utils/test_visual.py
function test_visual_full (line 30) | def test_visual_full(freeze_vision_tower: bool, freeze_multi_modal_proje...
function test_visual_lora (line 53) | def test_visual_lora(freeze_vision_tower: bool, freeze_language_model: b...
function test_visual_model_save_load (line 84) | def test_visual_model_save_load():
FILE: tests/model/test_base.py
function test_base (line 33) | def test_base():
function test_valuehead (line 40) | def test_valuehead():
FILE: tests/model/test_freeze.py
function test_freeze_train_all_modules (line 46) | def test_freeze_train_all_modules():
function test_freeze_train_extra_modules (line 57) | def test_freeze_train_extra_modules():
function test_freeze_inference (line 68) | def test_freeze_inference():
FILE: tests/model/test_full.py
function test_full_train (line 46) | def test_full_train():
function test_full_inference (line 53) | def test_full_inference():
FILE: tests/model/test_lora.py
function test_lora_train_qv_modules (line 58) | def test_lora_train_qv_modules():
function test_lora_train_all_modules (line 64) | def test_lora_train_all_modules():
function test_lora_train_extra_modules (line 70) | def test_lora_train_extra_modules():
function test_lora_train_old_adapters (line 76) | def test_lora_train_old_adapters():
function test_lora_train_new_adapters (line 82) | def test_lora_train_new_adapters():
function test_lora_train_valuehead (line 91) | def test_lora_train_valuehead():
function test_lora_inference (line 100) | def test_lora_inference():
FILE: tests/model/test_pissa.py
function test_pissa_train (line 53) | def test_pissa_train():
function test_pissa_inference (line 60) | def test_pissa_inference():
FILE: tests/train/test_sft_trainer.py
class DataCollatorWithVerbose (line 49) | class DataCollatorWithVerbose(DataCollatorWithPadding):
method __call__ (line 52) | def __call__(self, features: list[dict[str, Any]]) -> dict[str, Any]:
function test_shuffle (line 63) | def test_shuffle(disable_shuffling: bool):
FILE: tests_v1/accelerator/test_interface.py
function _all_reduce_tests (line 26) | def _all_reduce_tests(local_rank: int, world_size: int, master_port: int):
function test_all_device (line 48) | def test_all_device():
function test_multi_device (line 57) | def test_multi_device():
FILE: tests_v1/config/test_args_parser.py
function test_get_args_from_yaml (line 22) | def test_get_args_from_yaml(tmp_path: Path):
FILE: tests_v1/conftest.py
function pytest_configure (line 36) | def pytest_configure(config: Config):
function _handle_runs_on (line 52) | def _handle_runs_on(items: list[Item]):
function _handle_slow_tests (line 67) | def _handle_slow_tests(items: list[Item]):
function _get_visible_devices_env (line 76) | def _get_visible_devices_env() -> str | None:
function _handle_device_visibility (line 86) | def _handle_device_visibility(items: list[Item]):
function pytest_collection_modifyitems (line 110) | def pytest_collection_modifyitems(config: Config, items: list[Item]):
function _set_env (line 124) | def _set_env():
function _cleanup_distributed_state (line 135) | def _cleanup_distributed_state():
function _manage_distributed_env (line 143) | def _manage_distributed_env(request: FixtureRequest, monkeypatch: Monkey...
function bypass_mistral_regex_check (line 178) | def bypass_mistral_regex_check():
FILE: tests_v1/core/test_data_engine.py
function test_map_dataset (line 25) | def test_map_dataset(num_samples: int):
FILE: tests_v1/core/test_model_loader.py
function test_tiny_qwen (line 21) | def test_tiny_qwen():
function test_tiny_qwen_with_kernel_plugin (line 30) | def test_tiny_qwen_with_kernel_plugin():
FILE: tests_v1/core/utils/test_batching.py
function test_normal_batching (line 21) | def test_normal_batching():
FILE: tests_v1/core/utils/test_rendering.py
function _get_input_ids (line 26) | def _get_input_ids(inputs: list | dict) -> list:
function test_chatml_rendering (line 87) | def test_chatml_rendering():
function test_chatml_parse (line 109) | def test_chatml_parse():
function test_chatml_rendering_remote (line 118) | def test_chatml_rendering_remote(num_samples: int):
function test_qwen3_nothink_rendering (line 130) | def test_qwen3_nothink_rendering():
function test_qwen3_nothink_parse (line 158) | def test_qwen3_nothink_parse():
function test_qwen3_nothink_rendering_remote (line 178) | def test_qwen3_nothink_rendering_remote(num_samples: int):
function test_process_sft_samples (line 198) | def test_process_sft_samples():
function test_process_dpo_samples (line 211) | def test_process_dpo_samples():
FILE: tests_v1/plugins/data_plugins/test_converter.py
function test_alpaca_converter (line 26) | def test_alpaca_converter(num_samples: int):
function test_sharegpt_converter (line 52) | def test_sharegpt_converter():
function test_pair_converter (line 75) | def test_pair_converter(num_samples: int):
FILE: tests_v1/plugins/model_plugins/test_init_plugin.py
function test_init_on_meta (line 21) | def test_init_on_meta():
function test_init_on_rank0 (line 32) | def test_init_on_rank0():
function test_init_on_default (line 46) | def test_init_on_default():
FILE: tests_v1/plugins/model_plugins/test_kernel_plugin.py
function _apply_kernel (line 22) | def _apply_kernel(rank) -> None:
function _apply_all_kernels (line 45) | def _apply_all_kernels(rank) -> None:
function test_apply_kernel (line 68) | def test_apply_kernel():
function test_apply_all_kernels (line 72) | def test_apply_all_kernels():
FILE: tests_v1/plugins/model_plugins/test_peft.py
function model_path (line 27) | def model_path():
function model (line 32) | def model(model_path):
function tokenizer (line 37) | def tokenizer(model_path):
function adapter_path (line 42) | def adapter_path(tmp_path):
function test_find_all_linear_modules (line 60) | def test_find_all_linear_modules(model):
function test_get_lora_model (line 67) | def test_get_lora_model(model):
function test_get_freeze_model_layers (line 76) | def test_get_freeze_model_layers(model):
function test_get_freeze_model_modules (line 94) | def test_get_freeze_model_modules(model):
function test_load_adapter_single_for_inference (line 109) | def test_load_adapter_single_for_inference(model, adapter_path):
function test_load_adapter_resume_train (line 116) | def test_load_adapter_resume_train(model, adapter_path):
function test_load_adapter_train_multiple_disallowed (line 123) | def test_load_adapter_train_multiple_disallowed(model, adapter_path):
function test_load_adapter_infer_multiple_merges (line 129) | def test_load_adapter_infer_multiple_merges(model, adapter_path):
function test_merge_and_export_model (line 136) | def test_merge_and_export_model(tmp_path, adapter_path):
FILE: tests_v1/plugins/model_plugins/test_quantization_plugin.py
function check_quantization_status (line 24) | def check_quantization_status(model):
function test_quantization_plugin (line 39) | def test_quantization_plugin(name, quantization_bit):
FILE: tests_v1/plugins/trainer_plugins/distributed/test_fsdp2.py
function collect_non_persistent_buffers (line 33) | def collect_non_persistent_buffers(model):
function test_fsdp2_meta_loading_buffers_and_tied_weights (line 45) | def test_fsdp2_meta_loading_buffers_and_tied_weights():
FILE: tests_v1/sampler/test_cli_sampler.py
function test_sync_sampler (line 23) | def test_sync_sampler():
FILE: tests_v1/trainers/test_fsdp2_sft_trainer.py
function test_fsdp2_sft_trainer (line 24) | def test_fsdp2_sft_trainer(tmp_path: Path):
Copy disabled (too large)
Download .json
Condensed preview — 504 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (10,299K chars).
[
{
"path": ".dockerignore",
"chars": 132,
"preview": ".vscode\n.git\n.github\n.venv\ncache\ndocker\nsaves\nhf_cache\nms_cache\nom_cache\nshared_data\noutput\n.dockerignore\n.gitattributes"
},
{
"path": ".gitattributes",
"chars": 66,
"preview": "# Auto detect text files and perform LF normalization\n* text=auto\n"
},
{
"path": ".github/CODE_OF_CONDUCT.md",
"chars": 5233,
"preview": "# Contributor Covenant Code of Conduct\n\n## Our Pledge\n\nWe as members, contributors, and leaders pledge to make participa"
},
{
"path": ".github/CONTRIBUTING.md",
"chars": 2407,
"preview": "# Contributing to LLaMA Factory\n\nEveryone is welcome to contribute, and we value everybody's contribution. Code contribu"
},
{
"path": ".github/ISSUE_TEMPLATE/1-bug-report.yml",
"chars": 2084,
"preview": "name: \"\\U0001F41B Bug / help\"\ndescription: Create a report to help us improve the LLaMA Factory\nlabels: [\"bug\", \"pending"
},
{
"path": ".github/ISSUE_TEMPLATE/2-feature-request.yml",
"chars": 1132,
"preview": "name: \"\\U0001F680 Feature request\"\ndescription: Submit a request for a new feature\nlabels: [\"enhancement\", \"pending\"]\nbo"
},
{
"path": ".github/ISSUE_TEMPLATE/config.yml",
"chars": 335,
"preview": "blank_issues_enabled: false\ncontact_links:\n - name: 📚 FAQs | 常见问题\n url: https://github.com/hiyouga/LLaMA-Factory/iss"
},
{
"path": ".github/PULL_REQUEST_TEMPLATE.md",
"chars": 233,
"preview": "# What does this PR do?\n\nFixes # (issue)\n\n## Before submitting\n\n- [ ] Did you read the [contributor guideline](https://g"
},
{
"path": ".github/SECURITY.md",
"chars": 548,
"preview": "# Reporting Security Issues\n\nTo report a security issue, please use the GitHub Security Advisory [\"Report a Vulnerabilit"
},
{
"path": ".github/copilot-instructions.md",
"chars": 6045,
"preview": "# GitHub Copilot Instructions for LLaMA Factory\n\n## Project Overview\n\nLLaMA Factory is an efficient fine-tuning framewor"
},
{
"path": ".github/instructions-v0.md",
"chars": 0,
"preview": ""
},
{
"path": ".github/instructions-v1.md",
"chars": 0,
"preview": ""
},
{
"path": ".github/workflows/docker.yml",
"chars": 3478,
"preview": "name: docker\n\non:\n workflow_dispatch:\n push:\n branches:\n - \"main\"\n paths:\n - \"**/*.py\"\n - \"pyproj"
},
{
"path": ".github/workflows/docs.yml",
"chars": 1876,
"preview": "name: Build and Deploy Sphinx Docs\n\non:\n push:\n branches: [\"main\"]\n paths:\n - \"docs/**\"\n pull_request:\n "
},
{
"path": ".github/workflows/label_issue.yml",
"chars": 822,
"preview": "name: label_issue\n\non:\n issues:\n types:\n - opened\n\njobs:\n label_issue:\n runs-on: ubuntu-latest\n\n permiss"
},
{
"path": ".github/workflows/publish.yml",
"chars": 651,
"preview": "name: publish\n\non:\n workflow_dispatch:\n release:\n types:\n - published\n\njobs:\n publish:\n name: Upload relea"
},
{
"path": ".github/workflows/tests.yml",
"chars": 2658,
"preview": "name: tests\n\non:\n workflow_dispatch:\n push:\n branches:\n - \"main\"\n paths:\n - \"**/*.py\"\n - \"pyproje"
},
{
"path": ".github/workflows/tests_cuda.yml",
"chars": 1736,
"preview": "name: tests_cuda\n\non:\n workflow_dispatch:\n push:\n branches:\n - \"main\"\n paths:\n - \"**/*.py\"\n - \"py"
},
{
"path": ".github/workflows/tests_npu.yml",
"chars": 1914,
"preview": "name: tests_npu\n\non:\n workflow_dispatch:\n push:\n branches:\n - \"main\"\n paths:\n - \"**/*.py\"\n - \"pyp"
},
{
"path": ".gitignore",
"chars": 3306,
"preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packagi"
},
{
"path": ".pre-commit-config.yaml",
"chars": 713,
"preview": "repos:\n- repo: https://github.com/pre-commit/pre-commit-hooks\n rev: v6.0.0\n hooks:\n - id: check-ast\n - "
},
{
"path": "CITATION.cff",
"chars": 1378,
"preview": "cff-version: 1.2.0\ndate-released: 2024-03\nmessage: \"If you use this software, please cite it as below.\"\nauthors:\n- famil"
},
{
"path": "LICENSE",
"chars": 11324,
"preview": "Apache License\n Version 2.0, January 2004\n http://www.apache.org/licens"
},
{
"path": "MANIFEST.in",
"chars": 16,
"preview": "include LICENSE\n"
},
{
"path": "Makefile",
"chars": 741,
"preview": ".PHONY: build commit license quality style test\n\ncheck_dirs := scripts src tests tests_v1\n\nRUN := $(shell command -v uv "
},
{
"path": "README.md",
"chars": 69278,
"preview": "\n\n[\n\n[ contains all available datasets. If you are using a custom dataset, please **"
},
{
"path": "data/README_zh.md",
"chars": 8569,
"preview": "[dataset_info.json](dataset_info.json) 包含了所有可用的数据集。如果您希望使用自定义数据集,请**务必**在 `dataset_info.json` 文件中添加*数据集描述*,并通过修改 `datase"
},
{
"path": "data/alpaca_en_demo.json",
"chars": 859411,
"preview": "[\n {\n \"instruction\": \"Describe a process of making crepes.\",\n \"input\": \"\",\n \"output\": \"Making crepes is an eas"
},
{
"path": "data/alpaca_zh_demo.json",
"chars": 288584,
"preview": "[\n {\n \"instruction\": \"识别并解释给定列表中的两个科学理论:细胞理论和日心说。\",\n \"input\": \"\",\n \"output\": \"细胞理论是生物科学的一个理论,它认为所有生命体都是由微小的基本单"
},
{
"path": "data/c4_demo.jsonl",
"chars": 743720,
"preview": "{\"text\": \"Don’t think you need all the bells and whistles? No problem. McKinley Heating Service Experts Heating & Air Co"
},
{
"path": "data/dataset_info.json",
"chars": 17576,
"preview": "{\n \"identity\": {\n \"file_name\": \"identity.json\"\n },\n \"alpaca_en_demo\": {\n \"file_name\": \"alpaca_en_demo.json\"\n }"
},
{
"path": "data/dpo_en_demo.json",
"chars": 1571557,
"preview": "[\n {\n \"conversations\": [\n {\n \"from\": \"human\",\n \"value\": \"Hi! I'd like to create a new language ga"
},
{
"path": "data/dpo_zh_demo.json",
"chars": 387442,
"preview": "[\n {\n \"conversations\": [\n {\n \"from\": \"human\",\n \"value\": \"国会的转发\\n美国国会由众议院和参议院组成,每两年换届一次(参议员任期为6年,但"
},
{
"path": "data/glaive_toolcall_en_demo.json",
"chars": 738883,
"preview": "[\n {\n \"conversations\": [\n {\n \"from\": \"human\",\n \"value\": \"Hi, I have some ingredients and I want t"
},
{
"path": "data/glaive_toolcall_zh_demo.json",
"chars": 442330,
"preview": "[\n {\n \"conversations\": [\n {\n \"from\": \"human\",\n \"value\": \"我需要为John Doe生成一张发票。他购买了2个苹果,每个$1,以及3根香蕉,"
},
{
"path": "data/identity.json",
"chars": 12432,
"preview": "[\n {\n \"instruction\": \"hi\",\n \"input\": \"\",\n \"output\": \"Hello! I am {{name}}, an AI assistant developed by {{auth"
},
{
"path": "data/kto_en_demo.json",
"chars": 907341,
"preview": "[\n {\n \"messages\": [\n {\n \"content\": \"The Federal Trade Commission is going after spyware, bits of compute"
},
{
"path": "data/mllm_audio_demo.json",
"chars": 877,
"preview": "[\n {\n \"messages\": [\n {\n \"content\": \"<audio>What's that sound?\",\n \"role\": \"user\"\n },\n {\n"
},
{
"path": "data/mllm_demo.json",
"chars": 2958,
"preview": "[\n {\n \"messages\": [\n {\n \"content\": \"<image>Who are they?\",\n \"role\": \"user\"\n },\n {\n "
},
{
"path": "data/mllm_video_audio_demo.json",
"chars": 1071,
"preview": "[\n {\n \"messages\": [\n {\n \"content\": \"<video><audio>What is the video describing?\",\n \"role\": \"user\""
},
{
"path": "data/mllm_video_demo.json",
"chars": 828,
"preview": "[\n {\n \"messages\": [\n {\n \"content\": \"<video>Why is this video funny?\",\n \"role\": \"user\"\n },\n "
},
{
"path": "data/reason_tool_use_demo_50.jsonl",
"chars": 258699,
"preview": "{\"messages\": [{\"role\": \"system\", \"content\": [{\"type\": \"text\", \"value\": \"You are a methodical and expert assistant. Your "
},
{
"path": "data/v1_dpo_demo.jsonl",
"chars": 28956,
"preview": "{\"chosen_messages\": [{\"role\": \"system\", \"content\": [{\"type\": \"text\", \"value\": \"You are an AI assistant. You will be give"
},
{
"path": "data/v1_dpo_demo.yaml",
"chars": 89,
"preview": "dpo_zh_demo:\n path: HuggingFaceH4/orca_dpo_pairs\n split: train_prefs\n converter: pair\n"
},
{
"path": "data/v1_sft_demo.jsonl",
"chars": 440621,
"preview": "{\"messages\": [{\"role\": \"user\", \"content\": [{\"type\": \"text\", \"value\": \"hi\"}], \"loss_weight\": 0.0}, {\"role\": \"assistant\", "
},
{
"path": "data/v1_sft_demo.yaml",
"chars": 170,
"preview": "identity:\n path: data/identity.json\n source: local\n converter: alpaca\nalpaca_en_demo:\n path: data/alpaca_en_demo.jso"
},
{
"path": "data/wiki_demo.txt",
"chars": 1026454,
"preview": "Anarchism is a political philosophy and movement that is sceptical of authority and rejects all involuntary, coercive fo"
},
{
"path": "docker/docker-cuda/Dockerfile",
"chars": 1742,
"preview": "# https://hub.docker.com/r/hiyouga/pytorch/tags\nARG BASE_IMAGE=hiyouga/pytorch:th2.6.0-cu124-flashattn2.7.4-cxx11abi0-de"
},
{
"path": "docker/docker-cuda/Dockerfile.base",
"chars": 2269,
"preview": "# Start from the pytorch official image (ubuntu-22.04 + cuda-12.4.1 + python-3.11)\n# https://hub.docker.com/r/pytorch/py"
},
{
"path": "docker/docker-cuda/Dockerfile.megatron",
"chars": 2939,
"preview": "# NVIDIA official image (ubuntu-24.04 + cuda-12.9.1 + python-3.12)\n# https://docs.nvidia.com/deeplearning/frameworks/pyt"
},
{
"path": "docker/docker-cuda/README.md",
"chars": 3117,
"preview": "# Docker Setup for NVIDIA GPUs\n\nThis directory contains Docker configuration files for running LLaMA Factory with NVIDIA"
},
{
"path": "docker/docker-cuda/docker-compose.yml",
"chars": 541,
"preview": "services:\n llamafactory:\n build:\n dockerfile: ./docker/docker-cuda/Dockerfile\n context: ../..\n args:\n"
},
{
"path": "docker/docker-npu/Dockerfile",
"chars": 1737,
"preview": "# https://hub.docker.com/r/ascendai/cann/tags\n\nARG BASE_IMAGE=quay.io/ascend/cann:8.5.1-910b-ubuntu22.04-py3.11\nFROM ${B"
},
{
"path": "docker/docker-npu/docker-compose.yml",
"chars": 1586,
"preview": "services:\n llamafactory-a2:\n build:\n dockerfile: ./docker/docker-npu/Dockerfile\n context: ../..\n args"
},
{
"path": "docker/docker-rocm/Dockerfile",
"chars": 2014,
"preview": "# https://hub.docker.com/r/rocm/pytorch/tags\n# ROCm 7.2 + PyTorch 2.7.1 (Python 3.12). Keep base image's PyTorch; do not"
},
{
"path": "docker/docker-rocm/docker-compose.yml",
"chars": 450,
"preview": "services:\n llamafactory:\n build:\n dockerfile: ./docker/docker-rocm/Dockerfile\n context: ../..\n args:\n"
},
{
"path": "docs/Makefile",
"chars": 632,
"preview": "# Minimal makefile for Sphinx documentation\n#\n\n# You can set these variables from the command line, and also\n# from the "
},
{
"path": "docs/_static/css/lang-switcher.css",
"chars": 1400,
"preview": ".lang-switcher {\n display: flex;\n align-items: center;\n justify-content: center;\n}\n\n.lang-switcher__select {\n appear"
},
{
"path": "docs/_static/js/switcher.js",
"chars": 3127,
"preview": "document.addEventListener('DOMContentLoaded', function () {\n var path = window.location.pathname || '';\n var isZh = pa"
},
{
"path": "docs/conf.py",
"chars": 637,
"preview": "# Configuration file for the Sphinx documentation builder.\n\n\n# Define common settings here\nproject = \"LlamaFactory\"\ncopy"
},
{
"path": "docs/en/advanced/custom-kernels/custom-kernels.md",
"chars": 115,
"preview": "# Custom Kernels\n\nThis page is not yet available in English. Use the language switcher to view Simplified Chinese.\n"
},
{
"path": "docs/en/advanced/custom-kernels/fused-operators.md",
"chars": 116,
"preview": "# Fused Operators\n\nThis page is not yet available in English. Use the language switcher to view Simplified Chinese.\n"
},
{
"path": "docs/en/advanced/custom-kernels/triton.md",
"chars": 107,
"preview": "# Triton\n\nThis page is not yet available in English. Use the language switcher to view Simplified Chinese.\n"
},
{
"path": "docs/en/advanced/distributed/deepspeed.md",
"chars": 110,
"preview": "# DeepSpeed\n\nThis page is not yet available in English. Use the language switcher to view Simplified Chinese.\n"
},
{
"path": "docs/en/advanced/distributed/fsdp.md",
"chars": 105,
"preview": "# FSDP\n\nThis page is not yet available in English. Use the language switcher to view Simplified Chinese.\n"
},
{
"path": "docs/en/advanced/distributed/parallel-dp-tp-ep-sp-cp.md",
"chars": 130,
"preview": "# Parallel (DP, TP, EP, SP, CP)\n\nThis page is not yet available in English. Use the language switcher to view Simplified"
},
{
"path": "docs/en/advanced/lora-and-quantization/lora.md",
"chars": 105,
"preview": "# LoRA\n\nThis page is not yet available in English. Use the language switcher to view Simplified Chinese.\n"
},
{
"path": "docs/en/advanced/lora-and-quantization/quantization.md",
"chars": 113,
"preview": "# Quantization\n\nThis page is not yet available in English. Use the language switcher to view Simplified Chinese.\n"
},
{
"path": "docs/en/conf.py",
"chars": 386,
"preview": "import os\nimport sys\n\n\n# Add parent dir to path to allow importing conf.py\nsys.path.insert(0, os.path.abspath(\"..\"))\n\nfr"
},
{
"path": "docs/en/data-preparation/data-processing.md",
"chars": 116,
"preview": "# Data Processing\n\nThis page is not yet available in English. Use the language switcher to view Simplified Chinese.\n"
},
{
"path": "docs/en/dev-guide/core/data-engine.md",
"chars": 111,
"preview": "# DataEngine\n\nThis page is not yet available in English. Use the language switcher to view Simplified Chinese.\n"
},
{
"path": "docs/en/dev-guide/core/model-engine.md",
"chars": 112,
"preview": "# ModelEngine\n\nThis page is not yet available in English. Use the language switcher to view Simplified Chinese.\n"
},
{
"path": "docs/en/dev-guide/core/trainer.md",
"chars": 108,
"preview": "# Trainer\n\nThis page is not yet available in English. Use the language switcher to view Simplified Chinese.\n"
},
{
"path": "docs/en/dev-guide/plugins/data-plugins.md",
"chars": 113,
"preview": "# Data Plugins\n\nThis page is not yet available in English. Use the language switcher to view Simplified Chinese.\n"
},
{
"path": "docs/en/dev-guide/plugins/model-plugins/initialization.md",
"chars": 115,
"preview": "# Initialization\n\nThis page is not yet available in English. Use the language switcher to view Simplified Chinese.\n"
},
{
"path": "docs/en/dev-guide/plugins/model-plugins/kernels.md",
"chars": 108,
"preview": "# Kernels\n\nThis page is not yet available in English. Use the language switcher to view Simplified Chinese.\n"
},
{
"path": "docs/en/dev-guide/plugins/model-plugins/rendering.md",
"chars": 110,
"preview": "# Rendering\n\nThis page is not yet available in English. Use the language switcher to view Simplified Chinese.\n"
},
{
"path": "docs/en/getting-started.md",
"chars": 116,
"preview": "# Getting Started\n\nThis page is not yet available in English. Use the language switcher to view Simplified Chinese.\n"
},
{
"path": "docs/en/hyperparameters/data-argument.md",
"chars": 114,
"preview": "# Data Argument\n\nThis page is not yet available in English. Use the language switcher to view Simplified Chinese.\n"
},
{
"path": "docs/en/hyperparameters/model-argument.md",
"chars": 115,
"preview": "# Model Argument\n\nThis page is not yet available in English. Use the language switcher to view Simplified Chinese.\n"
},
{
"path": "docs/en/hyperparameters/sample-argument.md",
"chars": 116,
"preview": "# Sample Argument\n\nThis page is not yet available in English. Use the language switcher to view Simplified Chinese.\n"
},
{
"path": "docs/en/hyperparameters/training-argument.md",
"chars": 118,
"preview": "# Training Argument\n\nThis page is not yet available in English. Use the language switcher to view Simplified Chinese.\n"
},
{
"path": "docs/en/index.rst",
"chars": 1247,
"preview": "LlamaFactory Docs\n=================\n\n.. toctree::\n :maxdepth: 1\n :caption: Getting Started\n\n getting-started\n in"
},
{
"path": "docs/en/inference/deploy.md",
"chars": 107,
"preview": "# Deploy\n\nThis page is not yet available in English. Use the language switcher to view Simplified Chinese.\n"
},
{
"path": "docs/en/installation.md",
"chars": 113,
"preview": "# Installation\n\nThis page is not yet available in English. Use the language switcher to view Simplified Chinese.\n"
},
{
"path": "docs/en/llamaboard-web-ui.md",
"chars": 118,
"preview": "# LlamaBoard Web UI\n\nThis page is not yet available in English. Use the language switcher to view Simplified Chinese.\n"
},
{
"path": "docs/en/training/dpo.md",
"chars": 104,
"preview": "# DPO\n\nThis page is not yet available in English. Use the language switcher to view Simplified Chinese.\n"
},
{
"path": "docs/en/training/sft.md",
"chars": 104,
"preview": "# SFT\n\nThis page is not yet available in English. Use the language switcher to view Simplified Chinese.\n"
},
{
"path": "docs/make.bat",
"chars": 765,
"preview": "@ECHO OFF\n\npushd %~dp0\n\nREM Command file for Sphinx documentation\n\nif \"%SPHINXBUILD%\" == \"\" (\n\tset SPHINXBUILD=sphinx-bu"
},
{
"path": "docs/requirements.txt",
"chars": 57,
"preview": "sphinx>=6.0.0\nsphinx-rtd-theme>=1.2.0\nmyst-parser>=2.0.0\n"
},
{
"path": "docs/zh/advanced/custom-kernels/custom-kernels.md",
"chars": 2913,
"preview": "# LLaMA-Factory Kernels 系统\n\n## 概述\n\nLLaMA-Factory Kernels 系统用于管理不同硬件设备提供的高性能计算内核(kernel)实现,该系统通过替换模型中的关键模块(如 RMSNorm、SwiG"
},
{
"path": "docs/zh/advanced/custom-kernels/fused-operators.md",
"chars": 4816,
"preview": "# Fused Operators\n\nLLaMA-Factory 提供了一系列针对特定硬件优化的融合算子。这些算子位于 `src/llamafactory/v1/plugins/model_plugins/kernels/ops` 目录下。"
},
{
"path": "docs/zh/advanced/custom-kernels/triton.md",
"chars": 9,
"preview": "# Triton\n"
},
{
"path": "docs/zh/advanced/distributed/deepspeed.md",
"chars": 12,
"preview": "# DeepSpeed\n"
},
{
"path": "docs/zh/advanced/distributed/fsdp.md",
"chars": 7,
"preview": "# FSDP\n"
},
{
"path": "docs/zh/advanced/distributed/parallel-dp-tp-ep-sp-cp.md",
"chars": 31,
"preview": "# Parallel(DP, TP, EP, SP, CP)\n"
},
{
"path": "docs/zh/advanced/lora-and-quantization/lora.md",
"chars": 21,
"preview": "# Lora\n\n参数管理(二级参数形式)\n"
},
{
"path": "docs/zh/advanced/lora-and-quantization/quantization.md",
"chars": 15,
"preview": "# Quantization\n"
},
{
"path": "docs/zh/conf.py",
"chars": 389,
"preview": "import os\nimport sys\n\n\n# Add parent dir to path to allow importing conf.py\nsys.path.insert(0, os.path.abspath(\"..\"))\n\nfr"
},
{
"path": "docs/zh/data-preparation/data-processing.md",
"chars": 9735,
"preview": "# LLaMA-Factory v1 数据预处理\n\n## 总览\n\nLLaMA-Factory `v1` 采用了全新的数据处理架构,主要包含以下核心组件:\n\n- **DataEngine**:数据引擎,负责数据集的加载、索引和转换等各种插件的"
},
{
"path": "docs/zh/dev-guide/core/data-engine.md",
"chars": 6223,
"preview": "# DataEngine\n\n## 1. DataEngine 简介\n\n\n`DataEngine` 是 LLaMA-Factory v1 数据处理的核心类,继承自 PyTorch 的 `Dataset`,负责各种插件的接入,其他功能(如数据格"
},
{
"path": "docs/zh/dev-guide/core/model-engine.md",
"chars": 14,
"preview": "# ModelEngine\n"
},
{
"path": "docs/zh/dev-guide/core/trainer.md",
"chars": 10,
"preview": "# Trainer\n"
},
{
"path": "docs/zh/dev-guide/plugins/data-plugins.md",
"chars": 11217,
"preview": "# Data Plugins\n\n## 1. Data Plugins 简介\n\n## DataConverterPlugin\n\n### 1. DataConverterPlugin 简介\n\nDataConverter 负责将非标准格式的数据集"
},
{
"path": "docs/zh/dev-guide/plugins/model-plugins/initialization.md",
"chars": 0,
"preview": ""
},
{
"path": "docs/zh/dev-guide/plugins/model-plugins/kernels.md",
"chars": 4347,
"preview": "# Kernels plugins\n\n## 概览\nLLaMA-Factory 通过 Kernels plugins 系统,依据不同硬件设备提供高性能计算内核(kernel)实现。该系统通过注册表机制管理所有 kernel,通过 `@regi"
},
{
"path": "docs/zh/dev-guide/plugins/model-plugins/rendering.md",
"chars": 0,
"preview": ""
},
{
"path": "docs/zh/getting-started.md",
"chars": 1928,
"preview": "# Getting Started\n\n\n## 训练方法\n\n| 方法 | 全参数训练 | 部分参数训练 | LoRA | QLoRA "
},
{
"path": "docs/zh/hyperparameters/data-argument.md",
"chars": 16,
"preview": "# Data Argument\n"
},
{
"path": "docs/zh/hyperparameters/model-argument.md",
"chars": 0,
"preview": ""
},
{
"path": "docs/zh/hyperparameters/sample-argument.md",
"chars": 0,
"preview": ""
},
{
"path": "docs/zh/hyperparameters/training-argument.md",
"chars": 0,
"preview": ""
},
{
"path": "docs/zh/index.rst",
"chars": 1245,
"preview": "LlamaFactory 文档\n=================\n\n.. toctree::\n :maxdepth: 1\n :caption: Getting Started\n\n getting-started\n inst"
},
{
"path": "docs/zh/inference/deploy.md",
"chars": 9,
"preview": "# Deploy\n"
},
{
"path": "docs/zh/installation.md",
"chars": 15,
"preview": "# Installation\n"
},
{
"path": "docs/zh/llamaboard-web-ui.md",
"chars": 20,
"preview": "# LlamaBoard Web UI\n"
},
{
"path": "docs/zh/training/dpo.md",
"chars": 6,
"preview": "# DPO\n"
},
{
"path": "docs/zh/training/sft.md",
"chars": 6,
"preview": "# SFT\n"
},
{
"path": "examples/README.md",
"chars": 7036,
"preview": "We provide diverse examples about fine-tuning LLMs.\n\nMake sure to execute these commands in the `LLaMA-Factory` director"
},
{
"path": "examples/README_zh.md",
"chars": 5684,
"preview": "我们提供了多样化的大模型微调示例脚本。\n\n请确保在 `LLaMA-Factory` 目录下执行下述命令。\n\n## 目录\n\n- [LoRA 微调](#lora-微调)\n- [QLoRA 微调](#qlora-微调)\n- [全参数微调](#全参"
},
{
"path": "examples/accelerate/fsdp2_config.yaml",
"chars": 592,
"preview": "compute_environment: LOCAL_MACHINE\ndebug: false\ndistributed_type: FSDP\ndowncast_bf16: 'no'\nfsdp_config:\n fsdp_auto_wrap"
},
{
"path": "examples/accelerate/fsdp_config.yaml",
"chars": 707,
"preview": "compute_environment: LOCAL_MACHINE\ndebug: false\ndistributed_type: FSDP\ndowncast_bf16: 'no'\nfsdp_config:\n fsdp_auto_wrap"
},
{
"path": "examples/accelerate/fsdp_config_multiple_nodes.yaml",
"chars": 1323,
"preview": "# If you want to run this example on multiple nodes, you need to set the following parameters:\n# - num_machines: the num"
},
{
"path": "examples/accelerate/fsdp_config_offload.yaml",
"chars": 743,
"preview": "compute_environment: LOCAL_MACHINE\ndebug: false\ndistributed_type: FSDP\ndowncast_bf16: 'no'\nfsdp_config:\n fsdp_auto_wrap"
},
{
"path": "examples/ascend/qwen3_full_sft_fsdp2.yaml",
"chars": 990,
"preview": "# Start FSDP2 fine-tuning\n# accelerate launch \\\n# --config_file examples/accelerate/fsdp2_config.yaml \\\n# src/tr"
},
{
"path": "examples/ascend/qwen3moe_full_sft_fsdp.yaml",
"chars": 1057,
"preview": "# Start FSDP fine-tuning\n# accelerate launch \\\n# --config_file examples/accelerate/fsdp_config.yaml \\\n# src/trai"
},
{
"path": "examples/ascend/qwen3vlmoe_full_sft_fsdp2.yaml",
"chars": 1126,
"preview": "# Start FSDP2 fine-tuning\n# accelerate launch \\\n# --config_file examples/accelerate/fsdp2_config.yaml \\\n# src/tr"
},
{
"path": "examples/ascend/qwen3vlmoe_lora_sft_fsdp.yaml",
"chars": 972,
"preview": "### model\nmodel_name_or_path: Qwen/Qwen3-VL-30B-A3B-Instruct\nimage_max_pixels: 262144\nvideo_max_pixels: 16384\ntrust_remo"
},
{
"path": "examples/deepspeed/ds_z0_config.json",
"chars": 665,
"preview": "{\n \"train_batch_size\": \"auto\",\n \"train_micro_batch_size_per_gpu\": \"auto\",\n \"gradient_accumulation_steps\": \"auto\",\n \""
},
{
"path": "examples/deepspeed/ds_z2_autotp_config.json",
"chars": 835,
"preview": "{\n \"_comment\": \"suooprted model list: https://www.deepspeed.ai/tutorials/automatic-tensor-parallelism/#supported-models"
},
{
"path": "examples/deepspeed/ds_z2_config.json",
"chars": 665,
"preview": "{\n \"train_batch_size\": \"auto\",\n \"train_micro_batch_size_per_gpu\": \"auto\",\n \"gradient_accumulation_steps\": \"auto\",\n \""
},
{
"path": "examples/deepspeed/ds_z2_offload_config.json",
"chars": 747,
"preview": "{\n \"train_batch_size\": \"auto\",\n \"train_micro_batch_size_per_gpu\": \"auto\",\n \"gradient_accumulation_steps\": \"auto\",\n \""
},
{
"path": "examples/deepspeed/ds_z3_config.json",
"chars": 789,
"preview": "{\n \"train_batch_size\": \"auto\",\n \"train_micro_batch_size_per_gpu\": \"auto\",\n \"gradient_accumulation_steps\": \"auto\",\n \""
},
{
"path": "examples/deepspeed/ds_z3_fp8_config.json",
"chars": 1110,
"preview": "{\n \"train_micro_batch_size_per_gpu\": \"auto\",\n \"gradient_clipping\": \"auto\",\n \"zero_allow_untested_optimizer\": true,\n "
},
{
"path": "examples/deepspeed/ds_z3_offload_config.json",
"chars": 949,
"preview": "{\n \"train_batch_size\": \"auto\",\n \"train_micro_batch_size_per_gpu\": \"auto\",\n \"gradient_accumulation_steps\": \"auto\",\n \""
},
{
"path": "examples/extras/adam_mini/qwen2_full_sft.yaml",
"chars": 849,
"preview": "### model\nmodel_name_or_path: Qwen/Qwen2-1.5B-Instruct\ntrust_remote_code: true\n\n### method\nstage: sft\ndo_train: true\nfin"
},
{
"path": "examples/extras/apollo/llama3_full_sft.yaml",
"chars": 1052,
"preview": "### model\nmodel_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct\ntrust_remote_code: true\n\n### method\nstage: sft\ndo_trai"
},
{
"path": "examples/extras/asft/llama2_full_asft.yaml",
"chars": 889,
"preview": "### model\nmodel_name_or_path: models/Llama-2-7b\ntrust_remote_code: true\n\n### method\nstage: sft\ndo_train: true\nfinetuning"
},
{
"path": "examples/extras/asft/qwen2_full_asft.yaml",
"chars": 888,
"preview": "### model\nmodel_name_or_path: models/Qwen2.5-7B\ntrust_remote_code: true\n\n### method\nstage: sft\ndo_train: true\nfinetuning"
},
{
"path": "examples/extras/badam/llama3_full_sft.yaml",
"chars": 963,
"preview": "### model\nmodel_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct\ntrust_remote_code: true\n\n### method\nstage: sft\ndo_trai"
},
{
"path": "examples/extras/dft/qwen2_full_sft.yaml",
"chars": 848,
"preview": "### model\nmodel_name_or_path: Qwen/Qwen2-1.5B-Instruct\ntrust_remote_code: true\n\n### method\nstage: sft\ndo_train: true\nfin"
},
{
"path": "examples/extras/eaft/qwen25_05b_eaft_full.yaml",
"chars": 753,
"preview": "### model\nmodel_name_or_path: Qwen/Qwen2.5-0.5B-Instruct\ntrust_remote_code: true\n\n### method\nstage: sft\ndo_train: true\nf"
},
{
"path": "examples/extras/fp8/llama3_fp8_deepspeed_sft.yaml",
"chars": 1175,
"preview": "# FP8 training example with DeepSpeed ZeRO-3\n# This config demonstrates FP8 mixed precision training using HuggingFace A"
},
{
"path": "examples/extras/fp8/llama3_fp8_fsdp_sft.yaml",
"chars": 1268,
"preview": "# FP8 training example with FSDP\n# This config demonstrates FP8 mixed precision training using HuggingFace Accelerate\n# "
},
{
"path": "examples/extras/fsdp_qlora/llama3_lora_sft.yaml",
"chars": 891,
"preview": "### model\nmodel_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct\nquantization_bit: 4\ntrust_remote_code: true\n\n### metho"
},
{
"path": "examples/extras/fsdp_qlora/train.sh",
"chars": 222,
"preview": "#!/bin/bash\n# DO NOT use GPTQ/AWQ model in FSDP+QLoRA\n\nCUDA_VISIBLE_DEVICES=0,1 accelerate launch \\\n --config_file ex"
},
{
"path": "examples/extras/galore/llama3_full_sft.yaml",
"chars": 1024,
"preview": "### model\nmodel_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct\ntrust_remote_code: true\n\n### method\nstage: sft\ndo_trai"
},
{
"path": "examples/extras/llama_pro/expand.sh",
"chars": 165,
"preview": "#!/bin/bash\n\npython scripts/llama_pro.py \\\n --model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct \\\n --output_d"
},
{
"path": "examples/extras/llama_pro/llama3_freeze_sft.yaml",
"chars": 911,
"preview": "### model\nmodel_name_or_path: models/llama3-8b-pro\ntrust_remote_code: true\n\n### method\nstage: sft\ndo_train: true\nfinetun"
},
{
"path": "examples/extras/loraplus/llama3_lora_sft.yaml",
"chars": 895,
"preview": "### model\nmodel_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct\ntrust_remote_code: true\n\n### method\nstage: sft\ndo_trai"
},
{
"path": "examples/extras/mod/llama3_full_sft.yaml",
"chars": 901,
"preview": "### model\nmodel_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct\ntrust_remote_code: true\n\n### method\nstage: sft\ndo_trai"
},
{
"path": "examples/extras/multi_tokens/tokens_cfg.yaml",
"chars": 1083,
"preview": "# SVG Container Tags\n\"<|START_OF_SVG|>\": \"Marks the beginning of an SVG document\"\n\"<|END_OF_SVG|>\": \"Marks the end of an"
},
{
"path": "examples/extras/muon/qwen2_full_sft.yaml",
"chars": 844,
"preview": "### model\nmodel_name_or_path: Qwen/Qwen2-1.5B-Instruct\ntrust_remote_code: true\n\n### method\nstage: sft\ndo_train: true\nfin"
},
{
"path": "examples/extras/nlg_eval/llama3_lora_predict.yaml",
"chars": 745,
"preview": "# The batch generation can be SLOW using this config.\n# For faster inference, we recommend to use `scripts/vllm_infer.py"
},
{
"path": "examples/extras/oft/llama3_oft_sft.yaml",
"chars": 934,
"preview": "### model\nmodel_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct\ntrust_remote_code: true\n\n### method\nstage: sft\ndo_trai"
},
{
"path": "examples/extras/oft/qwen2_5vl_oft_sft.yaml",
"chars": 985,
"preview": "### model\nmodel_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct\nimage_max_pixels: 262144\nvideo_max_pixels: 16384\ntrust_remote_"
},
{
"path": "examples/extras/pissa/init.sh",
"chars": 147,
"preview": "#!/bin/bash\n\npython scripts/pissa_init.py \\\n --model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct \\\n --output_"
},
{
"path": "examples/extras/pissa/llama3_lora_sft.yaml",
"chars": 923,
"preview": "### model\nmodel_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct\ntrust_remote_code: true\n\n### method\nstage: sft\ndo_trai"
},
{
"path": "examples/extras/qoft/llama3_oft_sft_awq.yaml",
"chars": 878,
"preview": "### model\nmodel_name_or_path: TechxGenus/Meta-Llama-3-8B-Instruct-AWQ\ntrust_remote_code: true\n\n### method\nstage: sft\ndo_"
},
{
"path": "examples/extras/qoft/llama3_oft_sft_bnb_npu.yaml",
"chars": 946,
"preview": "### model\nmodel_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct\nquantization_bit: 4\nquantization_method: bnb\ndouble_qu"
},
{
"path": "examples/extras/qoft/llama3_oft_sft_gptq.yaml",
"chars": 879,
"preview": "### model\nmodel_name_or_path: TechxGenus/Meta-Llama-3-8B-Instruct-GPTQ\ntrust_remote_code: true\n\n### method\nstage: sft\ndo"
},
{
"path": "examples/inference/qwen3.yaml",
"chars": 178,
"preview": "model_name_or_path: Qwen/Qwen3-4B-Instruct-2507\ntemplate: qwen3_nothink\ninfer_backend: huggingface # choices: [huggingf"
},
{
"path": "examples/inference/qwen3_full_sft.yaml",
"chars": 174,
"preview": "model_name_or_path: saves/qwen3-4b/full/sft\ntemplate: qwen3_nothink\ninfer_backend: huggingface # choices: [huggingface,"
},
{
"path": "examples/inference/qwen3_lora_sft.yaml",
"chars": 224,
"preview": "model_name_or_path: Qwen/Qwen3-4B-Instruct-2507\nadapter_name_or_path: saves/qwen3-4b/lora/sft\ntemplate: qwen3_nothink\nin"
},
{
"path": "examples/inference/qwen3vl.yaml",
"chars": 179,
"preview": "model_name_or_path: Qwen/Qwen3-VL-4B-Instruct\ntemplate: qwen3_vl_nothink\ninfer_backend: huggingface # choices: [hugging"
},
{
"path": "examples/ktransformers/infer_lora/deepseek2_lora_sft_kt.yaml",
"chars": 414,
"preview": "model_name_or_path: deepseek-ai/DeepSeek-V2-Lite\nadapter_name_or_path: saves/Kllama_deepseekV2\ntemplate: deepseek\ninfer_"
},
{
"path": "examples/ktransformers/infer_lora/deepseek3_kt.yaml",
"chars": 380,
"preview": "model_name_or_path: opensourcerelease/DeepSeek-V3-bf16\ntemplate: deepseek3\ninfer_backend: ktransformers # choices: [hug"
},
{
"path": "examples/ktransformers/infer_lora/deepseek3_lora_sft_kt.yaml",
"chars": 426,
"preview": "model_name_or_path: opensourcerelease/DeepSeek-V3-bf16\nadapter_name_or_path: saves/Kllama_deepseekV3\ntemplate: deepseek3"
},
{
"path": "examples/ktransformers/infer_lora/qwen3moe_lora_sft_kt.yaml",
"chars": 419,
"preview": "model_name_or_path: Qwen/Qwen3-235B-A22B-Instruct-2507\nadapter_name_or_path: saves/Kllama_Qwen3MoE_235bA22b\ntemplate: qw"
},
{
"path": "examples/ktransformers/kt_optimize_rules/DeepSeek-V2-Chat-sft-amx.yaml",
"chars": 2283,
"preview": "- match:\n class: ktransformers.models.modeling_deepseek.DeepseekV2YarnRotaryEmbedding\n replace:\n class: ktransfor"
},
{
"path": "examples/ktransformers/kt_optimize_rules/DeepSeek-V2-Chat.yaml",
"chars": 2217,
"preview": "- match:\n class: ktransformers.models.modeling_deepseek.DeepseekV2YarnRotaryEmbedding\n replace:\n class: ktransfor"
},
{
"path": "examples/ktransformers/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft-amx-multi-gpu.yaml",
"chars": 4561,
"preview": "- match:\n name: \"^model.embed_tokens\"\n replace:\n class: \"default\"\n kwargs:\n generate_device: \"cpu\"\n "
},
{
"path": "examples/ktransformers/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft-amx.yaml",
"chars": 2282,
"preview": "- match:\n class: ktransformers.models.modeling_deepseek.DeepseekV2YarnRotaryEmbedding\n replace:\n class: ktransfor"
},
{
"path": "examples/ktransformers/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft.yaml",
"chars": 2217,
"preview": "- match:\n class: ktransformers.models.modeling_deepseek.DeepseekV2YarnRotaryEmbedding\n replace:\n class: ktransfor"
},
{
"path": "examples/ktransformers/kt_optimize_rules/DeepSeek-V2-Lite-Chat.yaml",
"chars": 2217,
"preview": "- match:\n class: ktransformers.models.modeling_deepseek.DeepseekV2YarnRotaryEmbedding\n replace:\n class: ktransfor"
},
{
"path": "examples/ktransformers/kt_optimize_rules/DeepSeek-V3-Chat-amx.yaml",
"chars": 2717,
"preview": "- match:\n class: ktransformers.models.modeling_deepseek_v3.DeepseekV3RotaryEmbedding\n replace:\n class: ktransform"
},
{
"path": "examples/ktransformers/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx-multi-gpu-4.yaml",
"chars": 11129,
"preview": "- match:\n name: \"^model.embed_tokens\"\n replace:\n class: \"default\"\n kwargs:\n generate_device: \"cpu\"\n "
},
{
"path": "examples/ktransformers/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx-multi-gpu.yaml",
"chars": 5216,
"preview": "- match:\n name: \"^model.embed_tokens\"\n replace:\n class: \"default\"\n kwargs:\n generate_device: \"cpu\"\n "
},
{
"path": "examples/ktransformers/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx.yaml",
"chars": 2718,
"preview": "- match:\n class: ktransformers.models.modeling_deepseek_v3.DeepseekV3RotaryEmbedding\n replace:\n class: ktransform"
},
{
"path": "examples/ktransformers/kt_optimize_rules/Qwen3Moe-sft-amx.yaml",
"chars": 2732,
"preview": "- match:\n class: ktransformers.models.modeling_qwen2_moe.Qwen2MoeRotaryEmbedding\n replace:\n class: ktransformers."
},
{
"path": "examples/ktransformers/train_lora/deepseek2_lora_sft_kt.yaml",
"chars": 1125,
"preview": "### model\nmodel_name_or_path: deepseek-ai/DeepSeek-V2-Lite\ntrust_remote_code: true\n\n### method\nstage: sft\ndo_train: true"
},
{
"path": "examples/ktransformers/train_lora/deepseek3_lora_sft_kt.yaml",
"chars": 1137,
"preview": "### model\nmodel_name_or_path: opensourcerelease/DeepSeek-V3-bf16\ntrust_remote_code: true\n\n### method\nstage: sft\ndo_train"
},
{
"path": "examples/ktransformers/train_lora/qwen3moe_lora_sft_kt.yaml",
"chars": 1128,
"preview": "### model\nmodel_name_or_path: Qwen/Qwen3-235B-A22B-Instruct-2507\ntrust_remote_code: true\n\n### method\nstage: sft\ndo_train"
},
{
"path": "examples/megatron/qwen2_vl_full.yaml",
"chars": 656,
"preview": "model_name_or_path: Qwen/Qwen2-VL-7B-Instruct\nimage_max_pixels: 262144\nvideo_max_pixels: 16384\n\ndo_train: true\nstage: sf"
},
{
"path": "examples/megatron/qwen3_moe_full.yaml",
"chars": 862,
"preview": "model_name_or_path: Qwen/Qwen3-30B-A3B-Instruct-2507\n\n# GPU memory: 8 * 78GB\ndo_train: true\nstage: sft\nfinetuning_type: "
},
{
"path": "examples/merge_lora/qwen3_full_sft.yaml",
"chars": 235,
"preview": "### model\nmodel_name_or_path: saves/qwen3-4b/full/sft\ntemplate: qwen3_nothink\ntrust_remote_code: true\n\n### export\nexport"
},
{
"path": "examples/merge_lora/qwen3_gptq.yaml",
"chars": 308,
"preview": "### model\nmodel_name_or_path: Qwen/Qwen3-4B-Instruct-2507\ntemplate: qwen3_nothink\ntrust_remote_code: true\n\n### export\nex"
},
{
"path": "examples/merge_lora/qwen3_lora_sft.yaml",
"chars": 370,
"preview": "### Note: DO NOT use quantized model or quantization_bit when merging lora adapters\n\n### model\nmodel_name_or_path: Qwen/"
},
{
"path": "examples/merge_lora/qwen3vl_lora_sft.yaml",
"chars": 377,
"preview": "### Note: DO NOT use quantized model or quantization_bit when merging lora adapters\n\n### model\nmodel_name_or_path: Qwen/"
},
{
"path": "examples/train_full/qwen3_full_sft.yaml",
"chars": 995,
"preview": "### model\nmodel_name_or_path: Qwen/Qwen3-4B-Instruct-2507\ntrust_remote_code: true\n\n### method\nstage: sft\ndo_train: true\n"
},
{
"path": "examples/train_full/qwen3vl_full_sft.yaml",
"chars": 1047,
"preview": "### model\nmodel_name_or_path: Qwen/Qwen3-VL-4B-Instruct\nimage_max_pixels: 262144\nvideo_max_pixels: 16384\ntrust_remote_co"
},
{
"path": "examples/train_lora/qwen3_lora_dpo.yaml",
"chars": 967,
"preview": "### model\nmodel_name_or_path: Qwen/Qwen3-4B-Instruct-2507\ntrust_remote_code: true\n\n### method\nstage: dpo\ndo_train: true\n"
},
{
"path": "examples/train_lora/qwen3_lora_kto.yaml",
"chars": 827,
"preview": "### model\nmodel_name_or_path: Qwen/Qwen3-4B-Instruct-2507\ntrust_remote_code: true\n\n### method\nstage: kto\ndo_train: true\n"
},
{
"path": "examples/train_lora/qwen3_lora_pretrain.yaml",
"chars": 864,
"preview": "### model\nmodel_name_or_path: Qwen/Qwen3-4B-Instruct-2507\ntrust_remote_code: true\n\n### method\nstage: pt\ndo_train: true\nf"
},
{
"path": "examples/train_lora/qwen3_lora_reward.yaml",
"chars": 894,
"preview": "### model\nmodel_name_or_path: Qwen/Qwen3-4B-Instruct-2507\ntrust_remote_code: true\n\n### method\nstage: rm\ndo_train: true\nf"
},
{
"path": "examples/train_lora/qwen3_lora_sft.sh",
"chars": 873,
"preview": "#!/bin/bash\n\nset -x\n\nMODEL_PATH=Qwen/Qwen3-4B-Instruct-2507\n\nllamafactory-cli train \\\n --model_name_or_path ${MODEL_P"
},
{
"path": "examples/train_lora/qwen3_lora_sft.yaml",
"chars": 907,
"preview": "### model\nmodel_name_or_path: Qwen/Qwen3-4B-Instruct-2507\ntrust_remote_code: true\n\n### method\nstage: sft\ndo_train: true\n"
},
{
"path": "examples/train_lora/qwen3_lora_sft_ds3.yaml",
"chars": 1025,
"preview": "### model\nmodel_name_or_path: Qwen/Qwen3-4B-Instruct-2507\ntrust_remote_code: true\n\n### method\nstage: sft\ndo_train: true\n"
}
]
// ... and 304 more files (download for full content)
About this extraction
This page contains the full source code of the hiyouga/LlamaFactory GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 504 files (9.4 MB), approximately 2.5M tokens, and a symbol index with 1376 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.