gitextract_9etz2yip/

├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug_report.yml
│   │   └── config.yml
│   └── workflows/
│       └── inactive.yml
├── .gitignore
├── .readthedocs.yaml
├── README.md
├── docker/
│   ├── Dockerfile-cu121
│   ├── docker_cli_demo.sh
│   └── docker_web_demo.sh
├── docs/
│   ├── Makefile
│   ├── README.md
│   ├── locales/
│   │   └── zh_CN/
│   │       └── LC_MESSAGES/
│   │           ├── deployment/
│   │           │   ├── dstack.po
│   │           │   ├── openllm.po
│   │           │   ├── sglang.po
│   │           │   ├── skypilot.po
│   │           │   ├── tgi.po
│   │           │   └── vllm.po
│   │           ├── framework/
│   │           │   ├── Langchain.po
│   │           │   ├── LlamaIndex.po
│   │           │   ├── function_call.po
│   │           │   └── qwen_agent.po
│   │           ├── getting_started/
│   │           │   ├── concepts.po
│   │           │   ├── quantization_benchmark.po
│   │           │   ├── quickstart.po
│   │           │   ├── speed_benchmark.po
│   │           │   └── thinking_budget.po
│   │           ├── index.po
│   │           ├── inference/
│   │           │   └── transformers.po
│   │           ├── quantization/
│   │           │   ├── awq.po
│   │           │   ├── gptq.po
│   │           │   └── llama.cpp.po
│   │           ├── run_locally/
│   │           │   ├── llama.cpp.po
│   │           │   ├── mlx-lm.po
│   │           │   └── ollama.po
│   │           └── training/
│   │               ├── axolotl.po
│   │               ├── llama_factory.po
│   │               ├── ms_swift.po
│   │               ├── unsloth.po
│   │               └── verl.po
│   ├── make.bat
│   ├── requirements-docs.txt
│   └── source/
│       ├── _static/
│       │   ├── css/
│       │   │   └── custom.css
│       │   └── design-tabs.js
│       ├── assets/
│       │   └── qwen3_nonthinking.jinja
│       ├── conf.py
│       ├── deployment/
│       │   ├── dstack.rst
│       │   ├── openllm.rst
│       │   ├── sglang.md
│       │   ├── skypilot.rst
│       │   ├── tgi.rst
│       │   └── vllm.md
│       ├── framework/
│       │   ├── Langchain.rst
│       │   ├── LlamaIndex.rst
│       │   ├── function_call.md
│       │   └── qwen_agent.rst
│       ├── getting_started/
│       │   ├── concepts.md
│       │   ├── quantization_benchmark.rst
│       │   ├── quickstart.md
│       │   ├── speed_benchmark.md
│       │   └── thinking_budget.md
│       ├── index.rst
│       ├── inference/
│       │   └── transformers.md
│       ├── quantization/
│       │   ├── awq.md
│       │   ├── gptq.md
│       │   └── llama.cpp.md
│       ├── run_locally/
│       │   ├── llama.cpp.md
│       │   ├── lmstudio.md
│       │   ├── mlx-lm.md
│       │   └── ollama.md
│       └── training/
│           ├── axolotl.md
│           ├── llama_factory.md
│           ├── ms_swift.md
│           ├── unsloth.md
│           └── verl.md
├── eval/
│   ├── README.md
│   ├── configs/
│   │   └── ARCAGI-Qwen3-235B-A22B-Instruct-2507.yaml
│   ├── data/
│   │   └── arc_agi_1.jsonl
│   ├── eval/
│   │   ├── arc_agi_1.py
│   │   └── eval.py
│   ├── eval_res/
│   │   └── ARCAGI-Qwen3-235B-A22B-Instruct-2507_eval_result.txt
│   ├── generate_api_answers/
│   │   ├── infer_multithread.py
│   │   └── utils_vllm.py
│   ├── output/
│   │   ├── ARCAGI-Qwen3-235B-A22B-Instruct-2507.jsonl
│   │   └── ARCAGI-Qwen3-235B-A22B-Instruct-2507_details.jsonl
│   └── requirements.txt
└── examples/
    ├── README.md
    ├── demo/
    │   ├── cli_demo.py
    │   └── web_demo.py
    ├── gcu-support/
    │   ├── README.md
    │   └── gcu_demo.py
    ├── llama-factory/
    │   ├── finetune-zh.md
    │   ├── qwen2-7b-full-sft.yaml
    │   ├── qwen2-7b-lora-sft.yaml
    │   ├── qwen2-7b-merge-lora.yaml
    │   └── qwen2-7b-qlora-sft.yaml
    └── speed-benchmark/
        ├── README.md
        ├── README_zh.md
        ├── requirements-perf-transformers.txt
        ├── requirements-perf-vllm.txt
        ├── speed_benchmark_transformers.py
        └── speed_benchmark_vllm.py