Full Code of OptimalScale/LMFlow for AI

main 63a001770e82 cached
281 files
2.7 MB
709.5k tokens
850 symbols
1 requests
Download .txt
Showing preview only (2,833K chars total). Download the full file or copy to clipboard to get everything.
Repository: OptimalScale/LMFlow
Branch: main
Commit: 63a001770e82
Files: 281
Total size: 2.7 MB

Directory structure:
gitextract_35w5f4e3/

├── .gitattributes
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── api-feedback.md
│   │   ├── blank-template.md
│   │   ├── bug-report.md
│   │   └── feature-request.md
│   └── workflows/
│       └── documentation.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── configs/
│   ├── accelerate_dsz0_config.yaml
│   ├── accelerate_dsz2_config.yaml
│   ├── accelerate_dsz3_config.yaml
│   ├── accelerate_fsdp_config.yaml
│   ├── accelerate_singlegpu_config.yaml
│   ├── archive/
│   │   ├── accelerate_multigpu_config.yaml
│   │   ├── accelerate_singlegpu_config.yaml
│   │   ├── ds_config_chatbot.json
│   │   ├── ds_config_eval.json
│   │   ├── ds_config_multimodal.json
│   │   └── ds_config_vis_chatbot.json
│   ├── deepspeed/
│   │   ├── zero0_no_offload.json
│   │   ├── zero2.json
│   │   ├── zero2_no_offload.json
│   │   ├── zero3.json
│   │   ├── zero3_for_eval.json
│   │   └── zero3_no_offload.json
│   └── iterative_dpo.yaml
├── contrib/
│   ├── README.md
│   ├── langchain/
│   │   ├── README.md
│   │   └── retrieval_chatbot.py
│   ├── long-context/
│   │   ├── hf_sft_full_finetune.sh
│   │   ├── hf_sft_lora_flashattn.sh
│   │   └── sft_summarizer.py
│   ├── rlhflow/
│   │   ├── reward_modeling.py
│   │   └── run_reward_modeling.sh
│   ├── text2image/
│   │   ├── README.md
│   │   ├── accelerate_t2i_config.yaml
│   │   ├── diffuser_args.py
│   │   ├── diffuser_finetuner.py
│   │   ├── finetune_t2i.py
│   │   ├── finetune_t2i.sh
│   │   ├── requirements.txt
│   │   └── t2i_dataset.py
│   └── tool-finetune/
│       ├── README.md
│       ├── function_call_finetune.py
│       └── run_function_call_finetune.sh
├── docs/
│   ├── dev_notes/
│   │   └── finetuning.mmd
│   ├── readme/
│   │   ├── Position_Interpolation.md
│   │   ├── README_es.md
│   │   ├── README_hindi.md
│   │   ├── README_jp.md
│   │   ├── README_ko.md
│   │   ├── README_zh-hans.md
│   │   ├── flash_attn2.md
│   │   └── multi_node.md
│   ├── requirements.txt
│   └── source/
│       ├── _static/
│       │   └── check_before_after_lora_tuning.jsonl
│       ├── about/
│       │   ├── authors.md
│       │   ├── changelog.md
│       │   └── index.md
│       ├── blogs/
│       │   ├── benchmark.md
│       │   └── index.md
│       ├── conf.py
│       ├── examples/
│       │   ├── DATASETS.md
│       │   ├── TASK_GUIDE.md
│       │   ├── checkpoints.md
│       │   ├── customize_conversation_template.md
│       │   ├── finetuning.md
│       │   ├── index.md
│       │   ├── medical_finetune.md
│       │   ├── raft.md
│       │   ├── reward_modeling.md
│       │   └── supported_conversation_template.md
│       └── index.md
├── examples/
│   ├── benchmarking.py
│   ├── chatbot.py
│   ├── chatbot_gradio.py
│   ├── detail_memory.py
│   ├── dpo_train.py
│   ├── dpov2_train.py
│   ├── evaluation.py
│   ├── finetune.py
│   ├── finetune_multi_modal.py
│   ├── inference.py
│   ├── iterative_dpo_train.py
│   ├── merge_lora.py
│   ├── multistage_finetune.py
│   ├── raft_align.py
│   ├── reward_modeling.py
│   ├── rm_inference.py
│   ├── sglang_inference.py
│   ├── speculative_inference.py
│   ├── tool_inference.py
│   ├── vis_chatbot.py
│   ├── vis_chatbot_gradio.py
│   └── vllm_inference.py
├── experimental/
│   ├── Hymba/
│   │   ├── README.md
│   │   └── run_finetune_hymba.sh
│   ├── LISA-diffusion/
│   │   ├── README.md
│   │   ├── diffusion_dpo/
│   │   │   ├── train_diffusion_dpo.py
│   │   │   └── train_diffusion_dpo_lisa.py
│   │   ├── instruct_pix2pix/
│   │   │   ├── test_instruct_pix2pix.py
│   │   │   └── train_instruct_pix2pix_lisa.py
│   │   ├── latent_consistency_model/
│   │   │   ├── train_lcm_distill_sd_wds_lisa.py
│   │   │   └── train_lcm_distill_sd_wds_lora.py
│   │   ├── requirement.txt
│   │   └── single_lisa.py
│   └── RAFT-diffusion/
│       ├── README.md
│       ├── SD256-RAFT.ipynb
│       ├── requirements.txt
│       └── train_text_to_image_lora.py
├── pyproject.toml
├── requirements.txt
├── scripts/
│   ├── archive/
│   │   ├── bash.sh
│   │   ├── convert_llama_weights_to_hf.py
│   │   ├── download_model.sh
│   │   ├── export_llama_state_dict_checkpoint.py
│   │   ├── run_all_benchmark.sh
│   │   ├── run_app.sh
│   │   ├── run_benchmark.sh
│   │   ├── run_chatbot.sh
│   │   ├── run_chatbot_chatglm.sh
│   │   ├── run_chatbot_cpu.sh
│   │   ├── run_detail_gpu_memory.sh
│   │   ├── run_dpo_align.sh
│   │   ├── run_dpov2_align.sh
│   │   ├── run_evaluation.sh
│   │   ├── run_evaluation_accelerator.sh
│   │   ├── run_evaluation_with_lora.sh
│   │   ├── run_finetune.sh
│   │   ├── run_finetune_with_custom_optim.sh
│   │   ├── run_finetune_with_lisa.sh
│   │   ├── run_finetune_with_lora.sh
│   │   ├── run_finetune_with_qlora.sh
│   │   ├── run_inference.sh
│   │   ├── run_inference_multimodal_model.sh
│   │   ├── run_iterative_dpo.sh
│   │   ├── run_multistage_finetune.sh
│   │   ├── run_raft_align.sh
│   │   ├── run_reward_modeling.sh
│   │   ├── run_reward_modeling_with_lisa.sh
│   │   ├── run_reward_modeling_with_lora.sh
│   │   ├── run_rm_inference.sh
│   │   ├── run_tool.sh
│   │   └── run_vllm_inference.sh
│   ├── multimodal/
│   │   ├── README.md
│   │   ├── run_finetune_multi_modal_stage1.sh
│   │   ├── run_finetune_multi_modal_stage2.sh
│   │   ├── run_vis_chatbot_blip2.sh
│   │   ├── run_vis_chatbot_gradio_minigpt4.sh
│   │   ├── run_vis_chatbot_llava.sh
│   │   └── run_vis_chatbot_minigpt4.sh
│   ├── run_finetune.sh
│   ├── run_finetune_with_custom_optim.sh
│   ├── run_finetune_with_lisa.sh
│   ├── run_finetune_with_lora.sh
│   ├── run_finetune_with_qlora.sh
│   ├── run_merge_lora.sh
│   ├── run_sglang_inference.sh
│   └── run_unittest.sh
├── setup.py
├── src/
│   └── lmflow/
│       ├── __init__.py
│       ├── args.py
│       ├── datasets/
│       │   ├── __init__.py
│       │   ├── dataset.py
│       │   └── multi_modal_dataset.py
│       ├── models/
│       │   ├── __init__.py
│       │   ├── auto_model.py
│       │   ├── base_model.py
│       │   ├── decoder_model.py
│       │   ├── encoder_decoder_model.py
│       │   ├── hf_decoder_model.py
│       │   ├── hf_model_mixin.py
│       │   ├── hf_text_regression_model.py
│       │   ├── interfaces/
│       │   │   ├── __init__.py
│       │   │   └── tunable.py
│       │   ├── regression_model.py
│       │   ├── text_regression_model.py
│       │   ├── vision2seq_model.py
│       │   └── vision_encoder/
│       │       ├── __init__.py
│       │       └── clip_encoder.py
│       ├── optim/
│       │   ├── __init__.py
│       │   ├── adabelief.py
│       │   ├── adabound.py
│       │   ├── adadelta.py
│       │   ├── adagrad.py
│       │   ├── adam.py
│       │   ├── adamax.py
│       │   ├── adamp.py
│       │   ├── adamw_schedule_free.py
│       │   ├── adan.py
│       │   ├── dummy.py
│       │   ├── lamb.py
│       │   ├── lars.py
│       │   ├── muon.py
│       │   ├── nadam.py
│       │   ├── novograd.py
│       │   ├── optimizers.py
│       │   ├── radam.py
│       │   ├── sgd_schedule_free.py
│       │   ├── sgdp.py
│       │   ├── sophia.py
│       │   ├── utils.py
│       │   └── yogi.py
│       ├── pipeline/
│       │   ├── __init__.py
│       │   ├── auto_pipeline.py
│       │   ├── base_aligner.py
│       │   ├── base_pipeline.py
│       │   ├── base_tuner.py
│       │   ├── dpo_aligner.py
│       │   ├── dpov2_aligner.py
│       │   ├── evaluator.py
│       │   ├── finetuner.py
│       │   ├── inferencer.py
│       │   ├── iterative_dpo_aligner.py
│       │   ├── raft_aligner.py
│       │   ├── rm_inferencer.py
│       │   ├── rm_tuner.py
│       │   ├── sglang_inferencer.py
│       │   ├── utils/
│       │   │   ├── __init__.py
│       │   │   ├── dpov2_dataprocessor.py
│       │   │   ├── dpov2_trainer.py
│       │   │   ├── lisa_trainer.py
│       │   │   ├── memory_safe_dpov2_align.py
│       │   │   ├── memory_safe_vllm_inference.py
│       │   │   ├── raft_trainer.py
│       │   │   ├── rm_dataprocessor.py
│       │   │   └── rm_trainer.py
│       │   └── vllm_inferencer.py
│       ├── tokenization/
│       │   ├── __init__.py
│       │   ├── hf_decoder_model.py
│       │   └── hf_text_regression_model.py
│       ├── utils/
│       │   ├── __init__.py
│       │   ├── common.py
│       │   ├── constants.py
│       │   ├── conversation_template/
│       │   │   ├── __init__.py
│       │   │   ├── base.py
│       │   │   ├── chatglm.py
│       │   │   ├── chatml.py
│       │   │   ├── deepseek.py
│       │   │   ├── gemma.py
│       │   │   ├── hymba.py
│       │   │   ├── internlm.py
│       │   │   ├── llama.py
│       │   │   ├── phi.py
│       │   │   ├── qwen.py
│       │   │   ├── yi.py
│       │   │   └── zephyr.py
│       │   ├── data_utils.py
│       │   ├── debug/
│       │   │   └── profiler.py
│       │   ├── deprecated.py
│       │   ├── envs.py
│       │   ├── llava_conversation_lib.py
│       │   ├── model.py
│       │   ├── multimodal.py
│       │   ├── position_interpolation/
│       │   │   ├── __init__.py
│       │   │   └── llama_rope_scaled_monkey_patch.py
│       │   ├── protocol.py
│       │   ├── test_utils.py
│       │   └── versioning.py
│       └── version.py
└── tests/
    ├── __init__.py
    ├── conftest.py
    ├── datasets/
    │   ├── __init__.py
    │   ├── conftest.py
    │   └── test_dataset.py
    ├── models/
    │   ├── __init__.py
    │   ├── test_auto_model.py
    │   ├── test_hf_decoder_model.py
    │   └── test_tool_inferencer.py
    ├── pipeline/
    │   ├── test_auto_pipeline.py
    │   ├── test_finetuner_distributed_loss.py
    │   ├── test_memory_safe_vllm_inferencer.py
    │   └── test_sglang_infernecer.py
    └── utils/
        ├── __init__.py
        ├── test_conversation_formatter.py
        ├── test_conversation_template.py
        └── test_data_utils.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitattributes
================================================
*.html linguist-detectable=false
*.js linguist-detectable=false
*.ipynb linguist-detectable=false
*RAFT.pdf filter=lfs diff=lfs merge=lfs -text
*.gif filter=lfs diff=lfs merge=lfs -text
docs/figs/*.gif filter=lfs diff=lfs merge=lfs -text


================================================
FILE: .github/ISSUE_TEMPLATE/api-feedback.md
================================================
---
name: API Feedback
about: Provide feedback regarding the current design of the API.
title: "[API Design]"
labels: ''
assignees: ''

---




================================================
FILE: .github/ISSUE_TEMPLATE/blank-template.md
================================================
---
name: Blank Template
about: Other issues
title: ''
labels: ''
assignees: ''

---




================================================
FILE: .github/ISSUE_TEMPLATE/bug-report.md
================================================
---
name: Bug Report
about: Create a report to help us improve
title: "[BUG]"
labels: bug
assignees: ''

---

**Describe the bug**
A clear and concise description of what the bug is.

**To Reproduce**
Steps to reproduce the behavior:
1. Go to '...'
2. Click on '....'
3. Scroll down to '....'
4. See error

**Expected behavior**
A clear and concise description of what you expected to happen.

**Screenshots**
If applicable, add screenshots to help explain your problem.

**Desktop (please complete the following information):**
 - OS: [e.g. iOS]
 - Browser [e.g. chrome, safari]
 - Version [e.g. 22]

**Smartphone (please complete the following information):**
 - Device: [e.g. iPhone6]
 - OS: [e.g. iOS8.1]
 - Browser [e.g. stock browser, safari]
 - Version [e.g. 22]

**Additional context**
Add any other context about the problem here.


================================================
FILE: .github/ISSUE_TEMPLATE/feature-request.md
================================================
---
name: Feature Request
about: Suggest an idea for this project
title: "[New Feature]"
labels: ''
assignees: ''

---

**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]

**Describe the solution you'd like**
A clear and concise description of what you want to happen.

**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.

**Additional context**
Add any other context or screenshots about the feature request here.


================================================
FILE: .github/workflows/documentation.yaml
================================================
name: Docs
on: [push, pull_request, workflow_dispatch]
jobs:
  docs:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3
      - name: python environment setup
        uses: actions/setup-python@v5.1.0
        with: 
          python-version: "3.11"
      - name: Install dependencies
        run: |
          pip install -r ./docs/requirements.txt
      - name: Sphinx build
        run: |
          sphinx-build docs/source _build
      - name: Deploy
        uses: peaceiris/actions-gh-pages@v3
        with:
          publish_branch: gh-pages
          github_token: ${{ secrets.GITHUB_TOKEN }}
          publish_dir: _build/
          force_orphan: true


================================================
FILE: .gitignore
================================================
# Initially taken from Github's Python gitignore file

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
_build

# C extensions
*.so

# tests and logs
tests/fixtures/cached_*_text.txt
logs/
lightning_logs/
lang_code_data/
log/
regression_test/*/new_output_models
regression_test/*/new_log
output_dir/
tests_out

# data files
data/

# output models
output_models
adapter_model/

# output data
output_data/

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# vscode
.vs
.vscode

# Pycharm
.idea

# TF code
tensorflow_code

# Models
proc_data

# examples
runs
/runs_old
/wandb
/examples/runs
/examples/**/*.args
/examples/rag/sweep

# data
# /data
serialization_dir

# emacs
*.*~
debug.env

# vim
.*.swp

#ctags
tags

# .lock
*.lock

# DS_Store (MacOS)
.DS_Store

# ruff
.ruff_cache

# lm_evaluation cache
lm_cache/


================================================
FILE: .pre-commit-config.yaml
================================================
repos:
  - repo: https://github.com/astral-sh/ruff-pre-commit
    rev: "v0.11.4"
    hooks:
      - id: ruff
        args: ["--fix", "--show-fixes", "--output-format=full"]
        exclude: ^.*\.(ipynb)$
      - id: ruff-format

================================================
FILE: CODE_OF_CONDUCT.md
================================================
# Contributor Covenant Code of Conduct

## Our Pledge

We as members, contributors, and leaders pledge to make participation in our
community a harassment-free experience for everyone, regardless of age, body
size, visible or invisible disability, ethnicity, sex characteristics, gender
identity and expression, level of experience, education, socio-economic status,
nationality, personal appearance, race, religion, or sexual identity
and orientation.

We pledge to act and interact in ways that contribute to an open, welcoming,
diverse, inclusive, and healthy community.

## Our Standards

Examples of behavior that contributes to a positive environment for our
community include:

* Demonstrating empathy and kindness toward other people
* Being respectful of differing opinions, viewpoints, and experiences
* Giving and gracefully accepting constructive feedback
* Accepting responsibility and apologizing to those affected by our mistakes,
  and learning from the experience
* Focusing on what is best not just for us as individuals, but for the
  overall community

Examples of unacceptable behavior include:

* The use of sexualized language or imagery, and sexual attention or
  advances of any kind
* Trolling, insulting or derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or email
  address, without their explicit permission
* Other conduct which could reasonably be considered inappropriate in a
  professional setting

## Enforcement Responsibilities

Community leaders are responsible for clarifying and enforcing our standards of
acceptable behavior and will take appropriate and fair corrective action in
response to any behavior that they deem inappropriate, threatening, offensive,
or harmful.

Community leaders have the right and responsibility to remove, edit, or reject
comments, commits, code, wiki edits, issues, and other contributions that are
not aligned to this Code of Conduct, and will communicate reasons for moderation
decisions when appropriate.

## Scope

This Code of Conduct applies within all community spaces, and also applies when
an individual is officially representing the community in public spaces.
Examples of representing our community include using an official e-mail address,
posting via an official social media account, or acting as an appointed
representative at an online or offline event.

## Enforcement

Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported to the community leaders responsible for enforcement at
LMFLow.
All complaints will be reviewed and investigated promptly and fairly.

All community leaders are obligated to respect the privacy and security of the
reporter of any incident.

## Enforcement Guidelines

Community leaders will follow these Community Impact Guidelines in determining
the consequences for any action they deem in violation of this Code of Conduct:

### 1. Correction

**Community Impact**: Use of inappropriate language or other behavior deemed
unprofessional or unwelcome in the community.

**Consequence**: A private, written warning from community leaders, providing
clarity around the nature of the violation and an explanation of why the
behavior was inappropriate. A public apology may be requested.

### 2. Warning

**Community Impact**: A violation through a single incident or series
of actions.

**Consequence**: A warning with consequences for continued behavior. No
interaction with the people involved, including unsolicited interaction with
those enforcing the Code of Conduct, for a specified period of time. This
includes avoiding interactions in community spaces as well as external channels
like social media. Violating these terms may lead to a temporary or
permanent ban.

### 3. Temporary Ban

**Community Impact**: A serious violation of community standards, including
sustained inappropriate behavior.

**Consequence**: A temporary ban from any sort of interaction or public
communication with the community for a specified period of time. No public or
private interaction with the people involved, including unsolicited interaction
with those enforcing the Code of Conduct, is allowed during this period.
Violating these terms may lead to a permanent ban.

### 4. Permanent Ban

**Community Impact**: Demonstrating a pattern of violation of community
standards, including sustained inappropriate behavior,  harassment of an
individual, or aggression toward or disparagement of classes of individuals.

**Consequence**: A permanent ban from any sort of public interaction within
the community.

## Attribution

This Code of Conduct is adapted from the [Contributor Covenant][homepage],
version 2.0, available at
https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.

Community Impact Guidelines were inspired by [Mozilla's code of conduct
enforcement ladder](https://github.com/mozilla/diversity).

[homepage]: https://www.contributor-covenant.org

For answers to common questions about this code of conduct, see the FAQ at
https://www.contributor-covenant.org/faq. Translations are available at
https://www.contributor-covenant.org/translations.


================================================
FILE: CONTRIBUTING.md
================================================
# LMFlow

We welcome contributions from the open-source community with open arms! We value and appreciate all types of participation, not just code. Whether you're answering questions, offering help, improving the documentation, or simply reaching out, your contributions are immensely valuable to us. So, if you're interested, don't hesitate to get involved!

To start, we encourage everyone to say hello in our public Discord channel. Here, we discuss the latest trends in Large Foundation models, showcase personal projects, help each other with contributions, or just hang out over a cup of coffee. Join us on Discord!

No matter how you choose to contribute, we strive to maintain an open, welcoming, and kind community. We ask that you read our code of conduct and be respectful during your interactions. It's also essential that you become familiar with the ethical guidelines that guide our project and adhere to the same principles of transparency and responsibility.

We highly value feedback from the community, so please don't hesitate to speak up if you have any valuable feedback that can help improve the library. We read and consider every message, comment, issue, and pull request (PR).


================================================
FILE: LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.

================================================
FILE: README.md
================================================
<p align="center" width="50%">
<img src="docs/assets/logo.png" alt="LMFlow" style="width: 50%; min-width: 200px; display: block; margin: auto; background-color: transparent;">
</p>

# LMFlow

<h4 align="center">
    <p>
        <b>English</b> |
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/docs/readme/README_zh-hans.md">简体中文</a> |
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/docs/readme/README_es.md">Español</a> |
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/docs/readme/README_jp.md">日本語</a> |
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/docs/readme/README_ko.md">한국어</a> |
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/docs/readme/README_hindi.md">हिंदी</a>
    <p>
</h4>

[![Website](https://img.shields.io/badge/Website-Demo-20B2AA.svg)](https://lmflow.com)
[![Code License](https://img.shields.io/badge/Code%20License-Apache_2.0-green.svg)](https://github.com/OptimalScale/LMFlow/blob/main/LICENSE)
[![Python 3.9+](https://img.shields.io/badge/Python-3.9+-blue.svg)](https://www.python.org/downloads/release/python-390/)
[![Doc](https://img.shields.io/badge/Website-Doc-ff69b4.svg)](https://optimalscale.github.io/LMFlow/)
[![Embark](https://img.shields.io/badge/Discord-LMFlow-%237289da.svg?logo=discord)](https://discord.gg/u9VJNpzhvA)
[![slack badge](https://img.shields.io/badge/Slack-Join-blueviolet?logo=slack&amp)](https://join.slack.com/t/lmflow/shared_invite/zt-1wju9nicy-woXbNtS~5MavHSAtiMxmxQ)
[![WeChat badge](https://img.shields.io/badge/WeChat-Join-brightgreen?logo=wechat&amp)](https://ibb.co/ZhM4hhn)

An extensible, convenient, and efficient toolbox for finetuning large machine learning models, designed to be user-friendly, speedy and reliable, and accessible to the entire community.

<p align="center" width="100%">
<img src="docs/assets/features.png" alt="LMFlow-features" style="width: 100%; min-width: 300px; display: block; margin: auto;">
</p>

## Latest News
> [!IMPORTANT]
> * :exclamation: [2025-07-09] We have a major update to LMFlow with full Accelerate support and extensive streamlining. If you're looking for the previous version, please use `git checkout v0.0.10`, or check out the [v0.0.10 branch](https://github.com/OptimalScale/LMFlow/tree/v0.0.10). View all releases [here](https://github.com/OptimalScale/LMFlow/tags).

* [2024-12-02] Support [Hymba](https://github.com/NVlabs/hymba), a new family of small language models featuring a hybrid-head parallel architecture. Check out [Post-training Hymba](https://github.com/OptimalScale/LMFlow/tree/main/experimental/Hymba) for more details.
* [2024-07-01] 🏆 LMFlow receives the [**Best Demo Paper Award**](https://docs.google.com/presentation/d/1TVDooAZqkNObz5ysVhDFtqnnVHR-u8wqYvgix-gzPMs/edit#slide=id.g2e55907bbcc_0_70) at **NAACL 2024**! 🎉
* [2024-06-30] Expanding Optimization Options! We now support custom optimizer training with a variety of optimizers. Dive into the details and try out the new features with our updated script at [custom_optimizers](https://github.com/OptimalScale/LMFlow/blob/main/scripts/run_finetune_with_custom_optim.sh).
* [2024-04-25] :rocket: Support conversation template! We've preset the latest [Llama-3](https://huggingface.co/meta-llama/Meta-Llama-3-70B) and [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct) conversation templates as well as some frequently used templates such as `chatml` (see all templates [here](https://optimalscale.github.io/LMFlow/examples/DATASETS.html#conversation-template)), and we are working on adding more preset templates. Adding corresponding `--conversation_template` in the shell script and you are all set! :rocket:

<details> <summary>More news...</summary>

* [2024-03-27] Support [LISA](https://arxiv.org/abs/2403.17919), enabling 7B training in 24G memory without offloading! 
* [2023-09-11] Support [speculative decoding](https://arxiv.org/abs/2211.17192). Check out [speculative_decoding](https://github.com/OptimalScale/LMFlow/blob/main/scripts/speculative_decoding/README.md) for the usage and acceleration details.
* [2023-08-14] Support long context inference with position interpolation (Linear & NTK scaling ) for LLaMA models. Check out [postion_interpolation](https://github.com/OptimalScale/LMFlow/blob/main/readme/Position_Interpolation.md) for more details.
* [2023-08-07] Support [Flash Attention-2](https://crfm.stanford.edu/2023/07/17/flash2.html). Check out [flash_attention](https://github.com/OptimalScale/LMFlow/blob/main/readme/flash_attn2.md) for more details.
* [2023-08-02] Support [Llama2](https://ai.meta.com/llama/), [ChatGLM2](https://huggingface.co/THUDM/chatglm2-6b), and [Baichuan](https://huggingface.co/baichuan-inc/Baichuan-7B) models.
* [2023-07-23] [LMFlow multimodal chatbot](https://github.com/OptimalScale/LMFlow/blob/main/scripts/run_vis_chatbot_gradio_minigpt4.sh) is now available! Support multimodal inputs of images and texts. [Online Demo](http://multimodal.lmflow.online) is also provided (We hold the service on a single GPU, hence one may experience "queuing" or "application busy" sometimes when multiple users are accessing at the same time, please wait and attempt again later when such event happens)![image](https://github.com/OptimalScale/LMFlow/blob/rpan-vision-encoder/docs/assets/multimodal-chatbot-demo.gif)
* [2023-06-22]  [LMFlow paper](https://arxiv.org/abs/2306.12420) is out! Check out our implementation details at https://arxiv.org/abs/2306.12420
* [2023-06-16] Our finetuned Robin-33B-V2 scored an impressive 64.1 on the Huggingface LLM leaderboard in our offline evaluation, outperforming major open-source LLMs! All checkpoints (7B, 13B, 33B, and 65B) are [released](https://huggingface.co/OptimalScale)! Checkout the performance [here](https://medium.com/@hkust.ml/robin-v2-launches-achieves-unparalleled-performance-on-openllm-4f6886e822c1).
* [2023-06-07] LMFlow is now officially available on PyPI! Install it with `pip install lmflow-finetune`!
* [2023-05-30] Release [Robin-13B-v2](https://huggingface.co/OptimalScale/robin-13b-v2-delta) and [Robin-33B-v2](https://huggingface.co/OptimalScale/robin-33b-v2-delta)!

* [2023-05-15] Release [LMFlow-data](http://lmflow.org:5000/lmflow_data.tar.gz), the training dataset of Robin-7B-v2. A new [test data](http://lmflow.org:5000/lmflow_chat_en_dialog_multiturn_single_nll_text2text.tar.gz) is also released.
* [2023-05-09] Release [Robin-7B-v2](http://lmflow.org:5000/robin-7b-v2-delta.tar.gz), achieving competitive performance on chitchat, commonsense reasoning and instruction-following tasks. Refer to our [comprehensive study](https://medium.com/@hkust.ml/lmflow-benchmark-an-automatic-evaluation-framework-for-open-source-llms-ef5c6f142418).
* [2023-05-08] Release [LMFlow Benchmark](https://medium.com/@hkust.ml/lmflow-benchmark-an-automatic-evaluation-framework-for-open-source-llms-ef5c6f142418), an automatic evaluation framework for open-source chat-style LLMs. [Benchmark results](https://docs.google.com/spreadsheets/d/1JYh4_pxNzmNA9I0YM2epgRA7VXBIeIGS64gPJBg5NHA/edit#gid=0) on 31 popular models are reported. [Participate in LMFlow Benchmark](https://github.com/OptimalScale/LMFlow#33-lmflow-benchmark).
* [2023-04-21] Release [Robin-7B](http://lmflow.org:5000/robin-7b.tar.gz) (based on LLaMA-7B), and two models for commercial use: Parakeets-2.7B (based on GPT-NEO-2.7B) and Cokatoo-7B (based on StableLM-7B) [Download here](https://github.com/OptimalScale/LMFlow/tree/main#model-zoo)
* [2023-04-15] Inference: Support streaming output and ChatGLM.
* [2023-04-10] We propose a new alignment algorithm: [Reward rAnked FineTuning (RAFT)](https://optimalscale.github.io/LMFlow/examples/raft.html), which is more efficient than conventional (PPO-based) RLHF. [[Paper](https://arxiv.org/abs/2304.06767)]
* [2023-04-02] [Web service](https://lmflow.com/) is online!
* [2023-04-01] Release three instruction-tuned checkpoints and three medical checkpoints in [model zoo](https://github.com/OptimalScale/LMFlow#model-zoo): LLaMA-7B-tuned, LLaMA-13B-tuned, LLaMA-33B-tuned, LLaMA-7B-medical, LLaMA-13B-medical, and LLaMA-33B-medical.
* [2023-03-27] Support full tuning and lora tuning for all decoder models.
* [2023-03-27] [Tasked tuned model beats ChatGPT on medical domain](https://github.com/OptimalScale/LMFlow#model-performance).
* [2023-03-27] Release code and checkpoints - [version 0.0.1](https://optimalscale.github.io/LMFlow/)! [Our tasked-tuned model beats ChatGPT on medical domain](https://github.com/OptimalScale/LMFlow#model-performance).

</details>

## Table of Contents

- [LMFlow](#lmflow)
  - [Latest News](#latest-news)
  - [Table of Contents](#table-of-contents)
  - [Quick Start](#quick-start)
    - [Setup](#setup)
    - [Prepare Dataset](#prepare-dataset)
    - [Finetuning](#finetuning)
      - [Estimated Hardware Requirement](#estimated-hardware-requirement)
      - [Full Finetuning](#full-finetuning)
      - [LISA](#lisa)
      - [LoRA](#lora)
    - [Inference](#inference)
    - [Deployment](#deployment)
    - [Evaluation](#evaluation)
  - [Supported Features](#supported-features)
  - [Support](#support)
  - [License](#license)
  - [Citation](#citation)


## Quick Start

### Setup

Our package has been tested on Linux OS (Ubuntu 20.04). Other OS platforms (MacOS, Windows) are not fully tested, where you may encounter unexpected errors. If you are using LMFlow for the first time, we recommend you to try on a Linux machine or Google Colab.

```bash
git clone -b v1.0.0 https://github.com/OptimalScale/LMFlow.git
cd LMFlow
conda create -n lmflow python=3.9 -y
conda activate lmflow
conda install mpi4py
pip install -e .
```

<details><summary> Looking for a previous version? </summary>

```bash
git clone -b v0.0.10 https://github.com/OptimalScale/LMFlow.git
cd LMFlow
conda create -n lmflow python=3.9 -y
conda activate lmflow
conda install mpi4py
pip install -e .
```

</details>

<details><summary> For CUDA versions 10.3-11.7 </summary>

```bash
git clone -b v0.0.5 https://github.com/OptimalScale/LMFlow.git
cd LMFlow
conda create -n lmflow python=3.9 -y
conda activate lmflow
conda install mpi4py
pip install -e .
```

</details>

> [!TIP]
> We use WandB to track and visualize the training process by default. Before running the training scripts, users may need to log in to WandB using the command: 
>
>```bash
>wandb login
>```
>
> For detailed instructions, refer to the [WandB Quickstart Guide](https://docs.wandb.ai/quickstart/). Step 1 (registration) and Step 2 (login using your WandB API key) should be sufficient to set up your environment.
>
> <details><summary>Disabling wandb</summary>  
>
> One can disable wandb by either:  
>
> 1. Adding environment variable before running the training command.
>
>```bash
>export WANDB_MODE=disabled
>```
>
> 2. OR, specifying the integrations to report the results and logs to. In the training script, add:
>
>```bash
>--report_to none \
>```
>
> </details>

### Prepare Dataset

Please refer to our [doc](https://optimalscale.github.io/LMFlow/examples/DATASETS.html).

### Finetuning

#### Estimated Hardware Requirement

| Method                 | 0.5B |  3B  |  7B  |  14B  |  30B  |  70B  |  `x`B   |
| ---------------------- | ---- | ---- | ---- | ----- | ----- | ----- | ------- |
| Full `bf16`/`fp16`     |  9GB | 55GB |120GB | 240GB | 600GB | 1200GB| `18x`GB |
| LoRA                   |  1GB | 6GB  | 16GB |  32GB |  64GB | 160GB |  `2x`GB |
| QLoRA `quant_bit=8`    | 0.7GB| 3GB  | 10GB |  20GB |  40GB |   80GB|  `x`GB  |
| QLoRA `quant_bit=4`    | 0.4GB| 1.5GB|  6GB |  12GB |  24GB |   48GB| `x/2`GB |


#### Full Finetuning

Full training updates all the parameters to finetune a language model.
Here is an example to finetune a GPT-2 base model.

```sh
cd data && ./download.sh alpaca && cd -

bash ./scripts/run_finetune.sh \
  --model_name_or_path gpt2 \
  --dataset_path data/alpaca/train_conversation \
  --output_model_path output_models/finetuned_gpt2
```

> [!TIP]
> For conversation dataset, specify a conversation template for better performance by adding `--conversation_template` to the command.
>
> <details><summary>Llama-3-8B conversation dataset example</summary>  
>
>```bash
>cd data && ./download.sh alpaca && cd -
>
>bash ./scripts/run_finetune.sh \
>  --model_name_or_path meta-llama/Meta-Llama-3-8B \
>  --dataset_path data/alpaca/train_conversation \
>  --conversation_template llama3 \
>  --output_model_path output_models/finetuned_llama3_8b
>```
>
> </details>

#### LISA

[LISA](https://arxiv.org/abs/2403.17919) is a memory-efficient finetuning algorithm that allows tradeoff between memory and the number of randomly unfreezed layers. This script currently is only tested in single gpus. Please stay tuned for our latest updates :smile:

```sh
cd data && ./download.sh alpaca && cd -

bash ./scripts/run_finetune_with_lisa.sh \
  --model_name_or_path meta-llama/Llama-2-7b-hf \
  --dataset_path data/alpaca/train_conversation \
  --output_model_path output_models/finetuned_llama2_7b \
  --lisa_activated_layers 1 \
  --lisa_interval_steps 20
```

> [!TIP]
> <details><summary>Llama-2-7B conversation dataset example</summary>  
>
>```bash
>cd data && ./download.sh alpaca && cd -
>
>bash ./scripts/run_finetune_with_lisa.sh \
>  --model_name_or_path meta-llama/Llama-2-7b-hf \
>  --dataset_path data/alpaca/train_conversation \
>  --conversation_template llama2 \
>  --output_model_path output_models/finetuned_llama2_7b_lisa \
>  --lisa_activated_layers 1 \
>  --lisa_interval_steps 20
>```
>
> </details>

#### LoRA

LoRA is a parameter-efficient finetuning algorithm and is more efficient than full finetuning.

```sh
cd data && ./download.sh alpaca && cd -

bash ./scripts/run_finetune_with_lora.sh \
  --model_name_or_path facebook/galactica-1.3b \
  --dataset_path data/alpaca/train_conversation \
  --output_lora_path output_models/finetuned_galactica_lora
```

> [!TIP]
> <details><summary>Llama-2-7B conversation dataset example</summary>  
>
>```bash
>cd data && ./download.sh alpaca && cd -
>
>bash ./scripts/run_finetune_with_lora.sh \
>  --model_name_or_path meta-llama/Llama-2-7b-hf \
>  --dataset_path data/alpaca/train_conversation \
>  --conversation_template llama2 \
>  --output_model_path output_models/finetuned_llama2_7b_lora \
>```
>
> </details>
>
> <details><summary>Merge LoRA Weight</summary>
>
>Merge LoRA weight and the base model into one using:  
>
>```sh
>bash ./scripts/run_merge_lora.sh \
>  --model_name_or_path Qwen/Qwen1.5-1.8B \
>  --lora_model_path output_models/lora \
>  --output_model_path output_models/lora_merged \
>```
>
></details>

### Inference

After finetuning, you can run the following command to chat with the model.
```sh
bash ./scripts/run_chatbot.sh output_models/finetuned_gpt2
```

> [!TIP]
> We recommend using SGLang for faster batch inference.
>
> <details><summary>Faster inference using SGLang</summary>  
>
>```bash
>bash ./scripts/run_sglang_inference.sh
>```
> Note: If you encounter error ModuleNotFoundError: No module named 'common_ops' when using SGLang, please try `apt-get update` and then `apt install numactl`. 
> </details>

### Deployment

If you want to deploy your own model locally, we provide a gradio-based UI for building chatbots. 
Running the following command will launch the demo for robin-7b:

```sh
pip install gradio
python ./examples/chatbot_gradio.py --deepspeed configs/ds_config_chatbot.json --model_name_or_path YOUR-LLAMA  --lora_model_path ./robin-7b --prompt_structure "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: {input_text}###Assistant:"       --end_string "#" --max_new_tokens 200
```

### Evaluation

We recommend using [LM Evaluation Harness](https://github.com/EleutherAI/lm-evaluation-harness) for most evaluation purposes.

## Supported Features

<details> <summary>Finetune Acceleration & Memory Optimization</summary>

* LISA: Layerwise Importance Sampling for Memory-Efficient Large Language Model Fine-Tuning
  
  LISA is a novel and memory-efficient training strategy for large language models that outperforms existing methods like LoRA by selectively freezing layers during optimization. Check out [LISA](https://arxiv.org/abs/2403.17919) for more details.  
  In LMFLow, activate LISA using `--use_lisa 1` in your training command. Control the number of activation layers with `--lisa_activated_layers 2`, and adjust the freezing layers interval using `--lisa_step_interval 20`. 

* LoRA
  
  LoRA is a parameter-efficient finetuning algorithm and is more efficient than full finetuning. Check out [finetuning-lora](#finetuning-lora) for more details.

* FlashAttention

  LMFlow supports both FlashAttention-1 and the latest FlashAttention-2. Check out [flash_attention](https://github.com/OptimalScale/LMFlow/blob/main/readme/flash_attn2.md) for more details.

* Gradient Checkpointing
  
  [Gradient checkpointing](https://github.com/cybertronai/gradient-checkpointing) is a memory optimization technique that trades compute for memory.
  It is useful when the model is too large to fit into GPU memory. 
  Use it by just adding `--gradient_checkpointing` to your training command.

* Deepspeed Zero3
  
  LMFlow supports [Deepspeed Zero-3 Offload](https://www.deepspeed.ai/2021/03/07/zero3-offload.html). 
  We provide an example [deepspeed config](https://github.com/OptimalScale/LMFlow/blob/main/configs/ds_config_zero3.json), and you can directly use it.

</details>

<details> <summary>Inference Acceleration</summary>

* LLaMA Inference on CPU

  Thanks to the great efforts of [llama.cpp](https://github.com/ggerganov/llama.cpp). It is possible for everyone to run their LLaMA models on CPU by 4-bit quantization. We provide a script to convert LLaMA LoRA weights to `.pt` files. You only need to use `convert-pth-to-ggml.py` in llama.cpp to perform quantization.

* FlashAttention

  LMFlow supports both FlashAttention-1 and the latest FlashAttention-2. Check out [flash_attention](https://github.com/OptimalScale/LMFlow/blob/main/readme/flash_attn2.md) for more details.

* vLLM

  Try vLLM for fast and easy-to-use LLM inference and serving. Thanks for the [great work](https://github.com/vllm-project/vllm)!

</details>

<details> <summary>Long Context</summary>

* Position Interpolation for LLaMA Models

  Now LMFlow supports the latest Linear & NTK (Neural Kernel theory) scaling techniques for LLaMA models. Check out [postion_interpolation](https://github.com/OptimalScale/LMFlow/blob/main/readme/Position_Interpolation.md) for more details.

</details>

<details> <summary>Model Customization</summary>

* Vocabulary Extension

  Now you can train your own sentencepiece tokenizer and merge it with model's origin hf tokenizer. Check out [vocab_extension](https://github.com/OptimalScale/LMFlow/blob/main/scripts/vocab_extension) for more details.

</details>

<details> <summary>Multimodal</summary>

* Multimodal Chatbot

  LMFlow supports multimodal inputs of images and texts. Check out our [LMFlow multimodal chatbot](https://github.com/OptimalScale/LMFlow/blob/main/scripts/run_vis_chatbot_gradio_minigpt4.sh).

</details>

<details> <summary>Custom Optimization</summary>

* Custom Optimization

  LMFlow now supports custom optimizer training with a variety of optimizers. Elevate your model's performance with tailored optimization strategies. Dive into the details and try out the new features with our updated script at [custom_optimizers](https://github.com/OptimalScale/LMFlow/blob/main/scripts/run_finetune_with_custom_optim.sh).

  The following table evaluates the performance of custom optimizers in the fine-tuning process of GPT-2 on the Alpaca dataset, emphasizing their individual impacts on the training loss. The specific hyperparameter settings utilize default configurations, which can be customized and adjusted at [custom_optimizers](https://github.com/OptimalScale/LMFlow/blob/main/scripts/run_finetune_with_custom_optim.sh). It is important to note that the evaluations were conducted over a duration of 0.1 epochs to provide a preliminary insight into the optimizers' effectiveness.

  | Optimizer Name | Train Loss |
  |----------------|------------|
  | RMSprop        | 2.4016     |
  | LION-32bit     | 2.4041     |
  | Adam           | 2.4292     |
  | AdamP          | 2.4295     |
  | AdamW          | 2.4469     |
  | AdaFactor      | 2.4543     |
  | AdaBound       | 2.4547     |
  | AdamWScheduleFree       | 2.4677     |
  | Adan           | 2.5063     |
  | NAdam          | 2.5569     |
  | AdaBelief      | 2.5857     |
  | AdaMax         | 2.5924     |
  | RAdam          | 2.6104     |
  | AdaDelta       | 2.6298     |
  | AdaGrad        | 2.8657     |
  | Yogi           | 2.9314     |
  | NovoGrad       | 3.1071     |
  | Sophia         | 3.1517     |
  | LAMB           | 3.2350     |
  | LARS           | 3.3329     |
  | SGDScheduleFree        | 3.3541     |
  | SGDP           | 3.3567     |
  | SGD            | 3.3734     |

</details>

## Support

If you need any help, please submit a Github issue.

## License

The code included in this project is licensed under the [Apache 2.0 license](https://github.com/OptimalScale/LMFlow/blob/main/LICENSE).
If you wish to use the codes and models included in this project for commercial purposes, please sign this [document](https://docs.google.com/forms/d/e/1FAIpQLSfJYcci6cbgpIvx_Fh1xDL6pNkzsjGDH1QIcm4cYk88K2tqkw/viewform?usp=pp_url) to obtain authorization.

## Citation

If you find this repository useful, please consider giving ⭐ and citing our [paper](https://arxiv.org/abs/2306.12420):

```citation
@article{diao2023lmflow,
  title={Lmflow: An extensible toolkit for finetuning and inference of large foundation models},
  author={Diao, Shizhe and Pan, Rui and Dong, Hanze and Shum, Ka Shun and Zhang, Jipeng and Xiong, Wei and Zhang, Tong},
  journal={arXiv preprint arXiv:2306.12420},
  year={2023}
}
```

```citation
@article{dong2023raft,
  title={Raft: Reward ranked finetuning for generative foundation model alignment},
  author={Dong, Hanze and Xiong, Wei and Goyal, Deepanshu and Pan, Rui and Diao, Shizhe and Zhang, Jipeng and Shum, Kashun and Zhang, Tong},
  journal={arXiv preprint arXiv:2304.06767},
  year={2023}
}
```

```citation
@article{pan2024lisa,
  title={LISA: Layerwise Importance Sampling for Memory-Efficient Large Language Model Fine-Tuning}, 
  author={Pan, Rui and Liu, Xiang and Diao, Shizhe and Pi, Renjie and Zhang, Jipeng and Han, Chi and Zhang, Tong},
  journal={arXiv preprint arXiv:2403.17919},
  year={2024}
}
```


================================================
FILE: configs/accelerate_dsz0_config.yaml
================================================
compute_environment: LOCAL_MACHINE
debug: false
deepspeed_config:
  gradient_accumulation_steps: 16
  zero3_init_flag: false
  zero_stage: 0
distributed_type: DEEPSPEED
downcast_bf16: 'no'
machine_rank: 0
main_training_function: main
mixed_precision: bf16
num_machines: 1
num_processes: 8
gpu_ids: 
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false
main_process_port: 12580

================================================
FILE: configs/accelerate_dsz2_config.yaml
================================================
compute_environment: LOCAL_MACHINE
debug: false
deepspeed_config:
  offload_optimizer_device: none
  offload_param_device: none
  zero3_init_flag: false
  zero_stage: 2
distributed_type: DEEPSPEED
downcast_bf16: 'no'
machine_rank: 0
main_training_function: main
mixed_precision: bf16
num_machines: 1
num_processes: 8
gpu_ids:
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false
main_process_port: 12580

================================================
FILE: configs/accelerate_dsz3_config.yaml
================================================
compute_environment: LOCAL_MACHINE
debug: false
deepspeed_config:
  deepspeed_multinode_launcher: standard
  offload_optimizer_device: none
  offload_param_device: none
  zero3_init_flag: true
  zero3_save_16bit_model: true
  zero_stage: 3
distributed_type: DEEPSPEED
downcast_bf16: 'no'
machine_rank: 0
main_training_function: main
mixed_precision: bf16
num_machines: 1
num_processes: 8
gpu_ids:
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false
main_process_port: 12580

================================================
FILE: configs/accelerate_fsdp_config.yaml
================================================
compute_environment: LOCAL_MACHINE
debug: false
distributed_type: FSDP

fsdp_config:
  fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
  fsdp_min_num_params: 1000000
  fsdp_backward_prefetch: BACKWARD_PRE
  fsdp_forward_prefetch: false
  fsdp_cpu_ram_efficient_loading: true
  fsdp_offload_params: false
  fsdp_sharding_strategy: FULL_SHARD
  fsdp_state_dict_type: FULL_STATE_DICT
  fsdp_sync_module_states: true
  fsdp_use_orig_params: true

downcast_bf16: true
machine_rank: 0
main_training_function: main
mixed_precision: bf16
num_machines: 1
num_processes: 8 # NOTE: distributed_type should be `NO` if you're training on a single GPU
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false
main_process_port: 1204

================================================
FILE: configs/accelerate_singlegpu_config.yaml
================================================
compute_environment: LOCAL_MACHINE
debug: false
distributed_type: 'NO'

fsdp_config:
  fsdp_auto_wrap_policy: SIZE_BASED_WRAP
  fsdp_min_num_params: 1000000
  fsdp_backward_prefetch: BACKWARD_PRE
  fsdp_forward_prefetch: false
  fsdp_cpu_ram_efficient_loading: true
  fsdp_offload_params: false
  fsdp_sharding_strategy: 'NO_SHARD'
  fsdp_state_dict_type: FULL_STATE_DICT
  fsdp_sync_module_states: true
  fsdp_use_orig_params: true

downcast_bf16: true
machine_rank: 0
main_training_function: main
mixed_precision: bf16
num_machines: 1
num_processes: 1
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false
main_process_port: 1204

================================================
FILE: configs/archive/accelerate_multigpu_config.yaml
================================================
compute_environment: LOCAL_MACHINE
distributed_type: MULTI_GPU
downcast_bf16: 'no'
dynamo_config:
  dynamo_backend: INDUCTOR
gpu_ids: 
machine_rank: 0
main_training_function: main
mixed_precision: bf16
num_machines: 1
num_processes: 1
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false
main_process_port: 11002


================================================
FILE: configs/archive/accelerate_singlegpu_config.yaml
================================================
compute_environment: LOCAL_MACHINE
distributed_type: 'NO'
downcast_bf16: 'no'
dynamo_config:
  dynamo_backend: INDUCTOR
gpu_ids: 
machine_rank: 0
main_training_function: main
mixed_precision: bf16
num_machines: 1
num_processes: 1
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false


================================================
FILE: configs/archive/ds_config_chatbot.json
================================================
{
    "fp16": {
        "enabled": false
    },
    "bf16": {
        "enabled": true
    },
    "comms_logger": {
        "enabled": false,
        "verbose": false,
        "prof_all": false,
        "debug": false
    },
    "steps_per_print": 20000000000000000,
    "train_micro_batch_size_per_gpu": 1,
    "wall_clock_breakdown": false
}


================================================
FILE: configs/archive/ds_config_eval.json
================================================
{
    "fp16": {
        "enabled": false
    },
    "bf16": {
        "enabled": false
    },
    "steps_per_print": 2000,
    "train_micro_batch_size_per_gpu": 1,
    "wall_clock_breakdown": false
}


================================================
FILE: configs/archive/ds_config_multimodal.json
================================================
{
    "fp16": {
        "enabled": "auto",
        "loss_scale": 0,
        "loss_scale_window": 1000,
        "initial_scale_power": 16,
        "hysteresis": 2,
        "min_loss_scale": 1
    },
    "bf16": {
        "enabled": "auto"
    },
    "train_micro_batch_size_per_gpu": "auto",
    "train_batch_size": "auto",
    "gradient_accumulation_steps": "auto",
    "zero_optimization": {
        "stage": 2,
        "overlap_comm": true,
        "contiguous_gradients": true,
        "sub_group_size": 1e9,
        "reduce_bucket_size": "auto"
    }
}

================================================
FILE: configs/archive/ds_config_vis_chatbot.json
================================================
{
    "fp16": {
        "enabled": false
    },
    "bf16": {
        "enabled": false
    },
    "comms_logger": {
        "enabled": false,
        "verbose": false,
        "prof_all": false,
        "debug": false
    },
    "steps_per_print": 20000000000000000,
    "train_micro_batch_size_per_gpu": 1,
    "wall_clock_breakdown": false
}

================================================
FILE: configs/deepspeed/zero0_no_offload.json
================================================
{
    "fp16": {
        "enabled": "auto",
        "loss_scale": 0,
        "loss_scale_window": 1000,
        "initial_scale_power": 16,
        "hysteresis": 2,
        "min_loss_scale": 1
    },

    "bf16": {
        "enabled": "auto"
    },

    "zero_optimization": {
        "stage": 0,
        "allgather_partitions": true,
        "allgather_bucket_size": 2e8,
        "overlap_comm": true,
        "reduce_scatter": true,
        "reduce_bucket_size": 2e8,
        "contiguous_gradients": true
    },

    "gradient_accumulation_steps": "auto",
    "gradient_clipping": "auto",
    "steps_per_print": 2000,
    "train_batch_size": "auto",
    "train_micro_batch_size_per_gpu": "auto",
    "wall_clock_breakdown": false
}


================================================
FILE: configs/deepspeed/zero2.json
================================================
{
    "fp16": {
        "enabled": "auto",
        "loss_scale": 0,
        "loss_scale_window": 1000,
        "initial_scale_power": 16,
        "hysteresis": 2,
        "min_loss_scale": 1
    },

    "bf16": {
        "enabled": "auto"
    },

    "zero_optimization": {
        "stage": 2,
        "offload_optimizer": {
            "device": "cpu",
            "pin_memory": true
        },
        "allgather_partitions": true,
        "allgather_bucket_size": 2e8,
        "overlap_comm": true,
        "reduce_scatter": true,
        "reduce_bucket_size": 2e8,
        "contiguous_gradients": true
    },

    "gradient_accumulation_steps": "auto",
    "gradient_clipping": "auto",
    "steps_per_print": 2000,
    "train_batch_size": "auto",
    "train_micro_batch_size_per_gpu": "auto",
    "wall_clock_breakdown": false
}

================================================
FILE: configs/deepspeed/zero2_no_offload.json
================================================
{
    "fp16": {
        "enabled": "auto",
        "loss_scale": 0,
        "loss_scale_window": 1000,
        "initial_scale_power": 16,
        "hysteresis": 2,
        "min_loss_scale": 1
    },

    "bf16": {
        "enabled": "auto"
    },

    "zero_optimization": {
        "stage": 2,
        "allgather_partitions": true,
        "allgather_bucket_size": 2e8,
        "overlap_comm": true,
        "reduce_scatter": true,
        "reduce_bucket_size": 2e8,
        "contiguous_gradients": true
    },

    "gradient_accumulation_steps": "auto",
    "gradient_clipping": "auto",
    "steps_per_print": 2000,
    "train_batch_size": "auto",
    "train_micro_batch_size_per_gpu": "auto",
    "wall_clock_breakdown": false
}


================================================
FILE: configs/deepspeed/zero3.json
================================================
{
    "fp16": {
        "enabled": "auto",
        "loss_scale": 0,
        "loss_scale_window": 1000,
        "initial_scale_power": 16,
        "hysteresis": 2,
        "min_loss_scale": 1
    },

    "bf16": {
        "enabled": "auto"
    },

    "zero_optimization": {
        "stage": 3,
        "offload_optimizer": {
            "device": "cpu"
        },
        "overlap_comm": true,
        "contiguous_gradients": true,
        "sub_group_size": 1e9,
        "reduce_bucket_size": "auto",
        "stage3_prefetch_bucket_size": "auto",
        "stage3_param_persistence_threshold": "auto",
        "stage3_max_live_parameters": 2e10,
        "stage3_max_reuse_distance": 2e10,
        "stage3_gather_16bit_weights_on_model_save": true
    },

    "gradient_accumulation_steps": "auto",
    "gradient_clipping": "auto",
    "steps_per_print": 2000,
    "train_batch_size": "auto",
    "train_micro_batch_size_per_gpu": "auto",
    "wall_clock_breakdown": false
}


================================================
FILE: configs/deepspeed/zero3_for_eval.json
================================================
{
    "bf16": {
        "enabled": true
    },
    "zero_optimization": {
        "stage": 3,
        "offload_optimizer": {
            "device": "cpu",
            "pin_memory": true
        },
        "offload_param": {
            "device": "cpu",
            "pin_memory": true
        },
        "overlap_comm": true,
        "contiguous_gradients": true,
        "sub_group_size": 1e9,
        "reduce_bucket_size": "auto",
        "stage3_prefetch_bucket_size": "auto",
        "stage3_param_persistence_threshold": "auto",
        "stage3_max_live_parameters": 1e9,
        "stage3_max_reuse_distance": 1e9,
        "stage3_gather_16bit_weights_on_model_save": true
    },

    "steps_per_print": 2000,
    "train_micro_batch_size_per_gpu": 1,
    "wall_clock_breakdown": false
}


================================================
FILE: configs/deepspeed/zero3_no_offload.json
================================================
{
    "fp16": {
        "enabled": "auto",
        "loss_scale": 0,
        "loss_scale_window": 1000,
        "initial_scale_power": 16,
        "hysteresis": 2,
        "min_loss_scale": 1
    },

    "bf16": {
        "enabled": "auto"
    },

    "zero_optimization": {
        "stage": 3,
        "overlap_comm": true,
        "contiguous_gradients": true,
        "sub_group_size": 1e9,
        "reduce_bucket_size": "auto",
        "stage3_prefetch_bucket_size": "auto",
        "stage3_param_persistence_threshold": "auto",
        "stage3_max_live_parameters": 2e10,
        "stage3_max_reuse_distance": 2e10,
        "stage3_gather_16bit_weights_on_model_save": true
    },

    "gradient_accumulation_steps": "auto",
    "gradient_clipping": "auto",
    "steps_per_print": 2000,
    "train_batch_size": "auto",
    "train_micro_batch_size_per_gpu": "auto",
    "wall_clock_breakdown": false
}


================================================
FILE: configs/iterative_dpo.yaml
================================================
# general
## model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
reference_model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
reward_model_name_or_path: sfairXC/FsfairX-LLaMA3-RM-v0.1
trust_remote_code: True

## data
dataset_path_list:
  - data/iterative-prompt-3it/iter1
  - data/iterative-prompt-3it/iter2
  - data/iterative-prompt-3it/iter3
conversation_template: llama3
preprocessing_num_workers: 16

## pipeline
output_dir: ./output_models/iterative_dpo
run_name: iterative_dpo
random_seed: 42
enable_distributed_inference: True
distributed_inference_num_instances: 8
initial_iter_idx: 0 # 0 refers to the first dataset in dataset_path_list
do_response_generation: True
do_scoring: True
do_dpo_align: True


# inference phase
## general
apply_chat_template: True
num_output_sequences: 8
use_beam_search: False
temperature: 1.0
top_p: 1.0
max_new_tokens: 2048
enable_decode_inference_result: True

## vllm
use_vllm: True
vllm_gpu_memory_utilization: 0.95
vllm_tensor_parallel_size: 1
vllm_inference_batch_size: 16


# reward model scoring phase
reward_arch_type: text_regression
reward_torch_dtype: bf16
reward_use_flash_attention: True
reward_model_inference_block_size: 2048
overwrite_cache: True
reward_model_inference_batch_size: 10 # the actual batch size for rm forward will be reward_model_inference_batch_size * num_output_sequences


# dpo phase
## model
do_train: True
use_flash_attention: True

## data
sampling_paired_method: max_min
margin_scale: 1.0
length_penalty: 0
max_prompt_length: 1000
mask_prompt: False

## pipeline
### training
accelerate_config_file: configs/accelerate_dsz2_config.yaml
bf16: True
num_train_epochs: 2
max_steps: 1200
learning_rate: 5.0e-7
warmup_steps: 100
per_device_train_batch_size: 1
per_device_eval_batch_size: 1
gradient_accumulation_steps: 16
gradient_checkpointing: True
loss_type: sigmoid
lr_scheduler_type: cosine
optim: paged_adamw_32bit

### logging
logging_steps: 2
save_strategy: steps
save_steps: 500
evaluation_strategy: steps
eval_steps: 500
report_to: wandb

================================================
FILE: contrib/README.md
================================================
# Contributing to LMFlow

Thanks for your interest in LMFlow! Our LMFlow team appreciate contributions in any form:

  * issues
  * documentation improvements
  * new features
  * bug fixes
  * and particularly, runnable examples with SOTA models or techniques.

For details of the contribution guidelines, please kindly refer to the following sections.

## How to Contribute

### How to create Pull Requests (PR)

One may refer to the following guideline for general Pull Request instructions [GitHub Pull Request Examples](https://gist.github.com/Chaser324/ce0505fbed06b947d962).

In short, every PR has following steps:

  1. Fork the repository under your own account.
  2. Clone and install the repository to your local machine.
  3. Add your own modifications.
  4. Run tests and make sure everything is working.
  5. Push to your own remote repository.
  6. Check the instructions in the [guidebook](https://gist.github.com/Chaser324/ce0505fbed06b947d962), make sure the remote modification is update-to-date with LMFlow's main branch.
  7. If not, go back to Step 3 and resolve the conflict.
  8. If so, create your PR. We will be reviewing the code soon and merge the changes into main once the review is finished :smile:

Currently, we enthusiastically welcome contributions of documentations and runnable examples. Runnable examples are collected under `contrib/{YOUR_NAME}` and can be used by everyone! :rocket:

## Style Guidelines

### Code Style

LMFlow adopts [google coding style](https://google.github.io/styleguide/) in principle. We would encourage every contribution to have the same style as well.

### Git Commits

We would appreciate the commit to follow the principles below:

  * Describe the message concisely about what this commit do
  * Describe the message in imperative mood, starting with a capitalized verb, e.g., "Fix typo in README" or "Add support LISA for model parallelism".
  * Squash commits to make sure that each commit describes a whole fix/feature

Thank you for your interest in LMFlow! Any suggestions and contributions would be greatly appreciated.


================================================
FILE: contrib/langchain/README.md
================================================
## Langchain

### Setup

```
pip install langchain
pip install langchain-openai langchain-anthropic langchain-google-genai langchain-chroma langchain-community bs4
```
     
### Run Chatbot

To run the script, go to the root of this repo and use the following command:

```
python contrib/langchain/retrieval_chatbot.py [options]
```

### Command-Line Arguments
- `--model-name-or-path` - Specifies the name or path of the model used for generating responses.
- `--provider` - Supports the following providers: `openai`, `anthropic`, `google`, and `huggingface`.
- `--set-url` - Retrieve content from a specified URL if enabled.
- `--set-txt` - Retrieve content from a local txt file if enabled.
- `--session-id` - Session id of this chat, default: `demo`.
- `--save-history` - Saves the chat history if enabled.
- `--save-dir` - Directory to store chat history, default: `tmp/chat_history`

### Example Usage

- Inference with `gpt-4o`, specified url and txt file
```
cd data && ./download.sh example_doc_for_retrieval.txt && cd -
python contrib/langchain/retrieval_chatbot.py --provider "openai" --model-name-or-path "gpt-4o" --set-url --set-txt
```
- Then set the url and txt file as follows:
```
Please enter the url: https://optimalscale.github.io/LMFlow/index.html
Please enter the text file path: data/example_doc_for_retrieval.txt
```

================================================
FILE: contrib/langchain/retrieval_chatbot.py
================================================
import argparse
import logging
import os
import re
from pathlib import Path

from langchain_anthropic import ChatAnthropic
from langchain_chroma import Chroma
from langchain_community.chat_message_histories import ChatMessageHistory

# retrieval usage
from langchain_community.document_loaders import TextLoader, WebBaseLoader
from langchain_community.llms import HuggingFacePipeline
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.messages import SystemMessage
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder
from langchain_core.runnables import Runnable
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

logging.getLogger().setLevel(logging.ERROR)  # hide warning log


class LangchainChatbot:
    def __init__(self, model_name_or_path: str, provider: str):
        self.prompt = ChatPromptTemplate.from_messages(
            [
                SystemMessage(content="You are a helpful chatbot."),
                MessagesPlaceholder(variable_name="history"),
                MessagesPlaceholder(variable_name="retriever", optional=True),
                HumanMessagePromptTemplate.from_template("{input}"),
            ]
        )
        self.model_name_or_path = model_name_or_path
        self.provider = provider
        self.check_valid_provider()
        self.model = self.get_model()
        self.retriever_url = None
        self.retriever_file = None
        self.memory = {}
        self.runnable: Runnable = self.prompt | self.model
        self.llm_chain = RunnableWithMessageHistory(
            self.runnable,
            self.get_session_history,
            input_messages_key="input",
            history_messages_key="history",
        )

    def check_valid_provider(self):
        provider = self.provider
        model_name_or_path = self.model_name_or_path
        if provider == "openai" and "gpt" in model_name_or_path:
            if os.getenv("OPENAI_API_KEY") is None:
                raise OSError("OPENAI_API_KEY environment variable is not set.")
        elif provider == "anthropic" and "claude" in model_name_or_path:
            if os.getenv("ANTHROPIC_API_KEY") is None:
                raise OSError("ANTHROPIC_API_KEY environment variable is not set.")
        elif provider == "google" and "gemini" in model_name_or_path:
            if os.getenv("GOOGLE_API_KEY") is None:
                raise OSError("GOOGLE_API_KEY environment variable is not set.")
        elif provider == "huggingface":
            if os.getenv("HUGGINGFACEHUB_API_TOKEN") is None:
                raise OSError("HUGGINGFACEHUB_API_TOKEN environment variable is not set.")
        else:
            raise ValueError("Invalid provider or model_name_or_path.")

    def set_retriever_url(self, url, chunk_size, chunk_overlap):
        loader = WebBaseLoader(url)
        data = loader.load()
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
        all_splits = text_splitter.split_documents(data)
        vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())
        self.retriever_url = vectorstore.as_retriever(k=4)

    def set_retriever_file(self, file, chunk_size, chunk_overlap):
        loader = TextLoader(file, encoding="utf-8")
        data = loader.load()
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
        all_splits = text_splitter.split_documents(data)
        vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())
        self.retriever_file = vectorstore.as_retriever(k=4)

    def get_model(self):
        provider = self.provider
        model_name_or_path = self.model_name_or_path
        if provider == "openai":
            model = ChatOpenAI(model=model_name_or_path)
        elif provider == "anthropic":
            model = ChatAnthropic(model=model_name_or_path)
        elif provider == "google":
            model = ChatGoogleGenerativeAI(model=model_name_or_path)
        elif provider == "huggingface":
            model = HuggingFacePipeline.from_model_id(model_id=model_name_or_path, task="text-generation")
            # model = HuggingFaceEndpoint(repo_id=model_name_or_path)
        else:
            raise ValueError("Invalid provider.")
        return model

    def chat_with_chatbot(self, human_input, session_id):
        retriever_search = []
        if self.retriever_url:
            retriever_search.extend(self.retrieve_by_url(human_input))
        if self.retriever_file:
            retriever_search.extend(self.retrieve_by_file(human_input))

        response = self.llm_chain.invoke(
            {"input": human_input, "retriever": retriever_search}, config={"configurable": {"session_id": session_id}}
        )
        return response if self.provider == "huggingface" else response.content

    def retrieve_by_url(self, query):
        return [re.sub("\n+", "\n", dict(result)["page_content"]) for result in self.retriever_url.invoke(query)]

    def retrieve_by_file(self, query):
        return [re.sub("\n+", "\n", dict(result)["page_content"]) for result in self.retriever_file.invoke(query)]

    def get_session_history(self, session_id: str) -> BaseChatMessageHistory:
        if session_id not in self.memory:
            self.memory[session_id] = ChatMessageHistory()
        return self.memory[session_id]


def get_cli() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        description=__doc__,
    )
    parser.add_argument("--model-name-or-path", type=str, help="Model name or path")
    parser.add_argument("--provider", type=str, help="Provider of the model")
    parser.add_argument("--set-url", action="store_true", help="Set a URL for retrieval if enabled")
    parser.add_argument("--set-txt", action="store_true", help="Set a single text file for retrieval if enabled")
    parser.add_argument("--chunk-size", type=int, default=400, help="Chunk size for splitting documents.")
    parser.add_argument("--chunk-overlap", type=int, default=20, help="Chunk overlap for splitting documents.")
    parser.add_argument("--session-id", type=str, default="demo", help="Session id of this chat")
    parser.add_argument("--save-history", action="store_true", help="Save chat history if enabled")
    parser.add_argument(
        "--save-dir", type=Path, default=Path("tmp", "chat_history"), help="Directory to store chat history"
    )
    return parser


def main(
    model_name_or_path: str,
    provider: str,
    set_url: bool,
    set_txt: bool,
    chunk_size: int,
    chunk_overlap: int,
    session_id: str,
    save_history: bool,
    save_dir: Path,
):
    chatbot = LangchainChatbot(model_name_or_path=model_name_or_path, provider=provider)
    if set_url:
        url = input("Please enter the url: ")
        chatbot.set_retriever_url(url, chunk_size, chunk_overlap)
    if set_txt:
        file = input("Please enter the text file path: ")
        chatbot.set_retriever_file(file, chunk_size, chunk_overlap)
    while True:
        human_input = input("User: ")
        if human_input == "exit":
            break
        response = chatbot.chat_with_chatbot(human_input, session_id)
        print(f"Chatbot: {response}")
    if save_history:
        if "/" in model_name_or_path:
            model_name_or_path = Path(model_name_or_path).parts[-1]
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        save_path = Path(save_dir, f"{model_name_or_path}_{session_id}.txt")
        with open(save_path, "w") as file:
            file.write(str(chatbot.memory[session_id].messages))


if __name__ == "__main__":
    args = get_cli().parse_args()
    main(**vars(args))


================================================
FILE: contrib/long-context/hf_sft_full_finetune.sh
================================================
#!/bin/bash
# accelerate launch --main_process_port 0 ...

# Finetune
python sft_summarizer.py    \
    --model_name_or_path microsoft/Phi-3-vision-128k-instruct     \
    --learning_rate 1e-3 \
    --per_device_train_batch_size 1 \
    --per_device_eval_batch_size 1 \
    --gradient_accumulation_steps 1 \
    --trust_remote_code \
    --output_dir output_models/finetuned_Phi3 \
    --logging_steps 1 \
    --num_train_epochs 1 \
    --save_strategy "steps" \
    --save_total_limit 2    \
    --lr_scheduler_type "constant" \
    --max_steps -1 \
    --torch_dtype 'bfloat16'    \
    --gradient_checkpointing \
    --logging_strategy  "epoch" \
    --do_eval True \
    --evaluation_strategy 'epoch' \
    --bf16 \
    --bf16_full_eval True \
    --max_seq_length 10000 \
    --eval_accumulation_steps 4 \
    --use_peft False\
    --save_only_model True  \
    --overwrite_output_dir True 

================================================
FILE: contrib/long-context/hf_sft_lora_flashattn.sh
================================================
#!/bin/bash
# accelerate launch --main_process_port 0 ...

# Finetunes
python sft_summarizer.py    \
    --model_name_or_path microsoft/Phi-3-vision-128k-instruct     \
    --learning_rate 1e-3 \
    --per_device_train_batch_size 1 \
    --per_device_eval_batch_size 1 \
    --gradient_accumulation_steps 1 \
    --trust_remote_code \
    --output_dir output_models/finetuned_Phi3 \
    --logging_steps 1 \
    --num_train_epochs 1 \
    --save_strategy "steps" \
    --save_total_limit 2    \
    --lr_scheduler_type "constant" \
    --max_steps -1 \
    --torch_dtype 'bfloat16'    \
    --gradient_checkpointing \
    --logging_strategy  "epoch" \
    --do_eval True \
    --evaluation_strategy 'epoch' \
    --bf16 \
    --bf16_full_eval True \
    --max_seq_length 10000 \
    --attn_implementation 'flash_attention_2' \
    --eval_accumulation_steps 4 \
    --use_peft False\
    --lora_r 16 \
    --lora_alpha 16 \
    --save_only_model True  \
    --overwrite_output_dir True 

================================================
FILE: contrib/long-context/sft_summarizer.py
================================================
#!/usr/bin/env python
import logging
import os
from dataclasses import dataclass, field
from typing import Optional

import torch
import wandb
from colorama import Fore, init
from datasets import load_dataset
from tqdm.rich import tqdm
from transformers import AutoTokenizer, TrainerCallback, TrainingArguments

# os.environ['CUDA_VISIBLE_DEVICES'] = "6"
from transformers.trainer_callback import TrainerControl, TrainerState

from lmflow.utils.versioning import is_trl_available

if is_trl_available():
    from trl import (
        DataCollatorForCompletionOnlyLM,
        ModelConfig,
        SFTConfig,
        SFTTrainer,
        get_kbit_device_map,
        get_peft_config,
        get_quantization_config,
    )
    from trl.commands.cli_utils import TrlParser
else:
    raise ImportError("Please install trl package to use sft_summarizer.py")


@dataclass
class UserArguments:
    wandb_key: Optional[str] = field(
        default=None, metadata={"help": "User's own wandb key if there are multiple wandb accounts in your server"}
    )
    wandb_projectname: Optional[str] = field(
        default="huggingface_sft_summarizer", metadata={"help": "The name of project saved in wandb"}
    )


if __name__ == "__main__":
    # Initialize logging, tqdm and init
    logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
    tqdm.pandas()
    init(autoreset=True)

    parser = TrlParser((UserArguments, SFTConfig, ModelConfig))
    user_args, sft_config, model_config = parser.parse_args_and_config()

    # Initialize wandb
    if user_args.wandb_key:
        wandb.login(
            key=user_args.wandb_key
        )  # replace your own wandb key if there are multiple wandb accounts in your server
    else:
        wandb.init(mode="offline")
    wandb.init(project=user_args.wandb_projectname)

    # https://huggingface.co/docs/transformers/en/main_classes/trainer#transformers.TrainingArguments
    logging.debug(sft_config)
    logging.debug("-" * 50)
    logging.debug(model_config)
    logging.debug("-" * 50)
    logging.debug("cuda===> %s", os.environ["CUDA_VISIBLE_DEVICES"])

    if model_config.use_peft:
        use_peft = "peft"
    else:
        use_peft = "nopeft"

    ################
    # Model & Tokenizer
    ################
    torch_dtype = (
        model_config.torch_dtype
        if model_config.torch_dtype in ["auto", None]
        else getattr(torch, model_config.torch_dtype)
    )
    logging.debug("torch_dtype===> %s", torch_dtype)
    if model_config.use_peft:
        quantization_config = None
    else:
        quantization_config = get_quantization_config(model_config)
    logging.debug("quantization_config===> %s", quantization_config)
    model_kwargs = dict(
        revision=model_config.model_revision,
        trust_remote_code=model_config.trust_remote_code,
        attn_implementation=model_config.attn_implementation,
        torch_dtype=model_config.torch_dtype,
        use_cache=False if sft_config.gradient_checkpointing else True,
        device_map=get_kbit_device_map() if quantization_config is not None else None,
        quantization_config=quantization_config,
        local_files_only=True,
    )
    logging.debug("model_kwargs: %s", model_kwargs)
    tokenizer = AutoTokenizer.from_pretrained(model_config.model_name_or_path, use_fast=True, local_files_only=True)
    tokenizer.pad_token = tokenizer.eos_token

    ################
    # Dataset
    ################

    train_dataset = load_dataset("LukaMagic077/downsampled_below10k_arxiv_dataset_on_hub", split="train")
    val_dataset = load_dataset("LukaMagic077/downsampled_below10k_arxiv_dataset_on_hub", split="validation")
    # test_dataset = load_dataset("LukaMagic077/downsampled_below10k_arxiv_dataset_on_hub", split='test')

    # Get the size of training dataset
    train_dataset_size = len(train_dataset)
    # Get the size of validation dataset
    val_dataset_size = len(val_dataset)

    # Print the size of dataset
    logging.debug(f"Training dataset size: {train_dataset_size}")
    logging.debug(f"Validation dataset size: {val_dataset_size}")

    ################
    # Training
    ################

    # Define datacollector
    data_collector = DataCollatorForCompletionOnlyLM(
        instruction_template="article", response_template="abstract", tokenizer=tokenizer, mlm=False
    )

    class WandbCallback(TrainerCallback):
        def __init__(self, trainer):
            # trainer.model.to("cuda:0")
            self.model, self.tokenizer = trainer.model, trainer.tokenizer
            self.tokenizer.pad_token = self.tokenizer.eos_token
            logging.debug(Fore.GREEN)
            logging.debug("entering callback=====>")
            logging.debug(self.tokenizer)

        def on_save(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs):
            logging.debug("current step %s", state.global_step)
            return super().on_save(args, state, control, **kwargs)

    trainer = SFTTrainer(
        model=model_config.model_name_or_path,
        model_init_kwargs=model_kwargs,
        args=sft_config,
        train_dataset=train_dataset,
        dataset_text_field="article",
        eval_dataset=val_dataset,
        tokenizer=tokenizer,
        peft_config=get_peft_config(model_config),
    )

    trainer.train()


================================================
FILE: contrib/rlhflow/reward_modeling.py
================================================
import os
import sys

sys.path.remove(os.path.abspath(os.path.dirname(sys.argv[0])))
import numpy as np
import torch
import torch.nn as nn
from datasets import load_dataset
from peft import LoraConfig, TaskType, get_peft_model
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    HfArgumentParser,
    Trainer,
)

from lmflow.args import (
    AutoArguments,
    DatasetArguments,
    ModelArguments,
)

## Prepare training_args
pipeline_name = "finetuner"
PipelineArguments = AutoArguments.get_pipeline_args_class(pipeline_name)
parser = HfArgumentParser((ModelArguments, DatasetArguments, PipelineArguments))

if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
    model_args, data_args, pipeline_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
else:
    model_args, data_args, pipeline_args = parser.parse_args_into_dataclasses()

pipeline_args.remove_unused_columns = False
pipeline_args.label_names = []

## Get model, by default we use lora to accelerate training
peft_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=16,
    lora_alpha=32,
    lora_dropout=0.1,
)
# trust_remote_code=True if you want to use chatglm
model = AutoModelForSequenceClassification.from_pretrained(
    model_args.model_name_or_path, num_labels=1, torch_dtype=torch.bfloat16
)
model_lora = get_peft_model(model, peft_config)
model_lora.print_trainable_parameters()

## Get tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path)

if "llama" in model_args.model_name_or_path:
    tokenizer.add_special_tokens(
        {
            "eos_token": "[PAD]",
            "bos_token": "</s>",
            "unk_token": "</s>",
            "pad_token": "</s>",
        }
    )
else:
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.pad_token_id = tokenizer.eos_token_id

# We also need to add a pad_token for the model. Otherwise, the reward model cannot handle a batch of inputs
model_lora.config.pad_token_id = tokenizer.eos_token_id
assert model_lora.config.pad_token_id == tokenizer.pad_token_id


## Get the dataset
def build_dataset(tokenizer, config):
    """
    We assume that we have preprocessed the dataset appropriately such that the sample is organized as follows:
    {"positive": prompt + answer_positive, "negative": prompt + answer_negative},
    where the positive response is preferred.
    """

    def tokenize(sample):
        tokenized_pos = tokenizer(sample["positive"], truncation=True)
        tokenized_neg = tokenizer(sample["negative"], truncation=True)
        sample["chosen_input_ids"] = tokenized_pos["input_ids"]
        sample["chosen_attention_mask"] = tokenized_pos["attention_mask"]
        sample["rejected_input_ids"] = tokenized_neg["input_ids"]
        sample["rejected_attention_mask"] = tokenized_neg["attention_mask"]
        return sample

    ds = load_dataset("json", data_files=config.dataset_path, split="train", field="instances")
    ds = ds.map(tokenize, batched=False)
    ds = ds.filter(lambda x: len(x["chosen_input_ids"]) <= 512 and len(x["rejected_input_ids"]) <= 512)
    eval_dataset = None
    if config.validation_split_percentage > 0:
        idx_gap = int((1 - config.validation_split_percentage / 100) * len(ds))
        train_dataset = ds.select(range(idx_gap))
        eval_dataset = ds.select(range(idx_gap, len(ds)))
    else:
        train_dataset = ds

    return train_dataset, eval_dataset


train_dataset, eval_dataset = build_dataset(tokenizer, data_args)
if not eval_dataset and pipeline_args.eval_steps > 0:
    raise ValueError("Cannot evaluate on an empty eval set")
print("Training set: ", len(train_dataset), " Eval set: ", len(eval_dataset))


## Define the trainer
def compute_metrics(eval_pred):
    result = {}
    pos_predictions_scores = eval_pred.predictions[0]
    neg_predictions_scores = eval_pred.predictions[1]
    # We assume that the first sample is preferred by default in groundtruth
    result["accuracy"] = np.sum(pos_predictions_scores >= neg_predictions_scores) / len(pos_predictions_scores)
    return result


class DataCollatorReward:
    def __init__(self, tokenizer):
        self.tokenizer = tokenizer

    def __call__(self, data):
        batch = {}
        data_pos = []
        data_neg = []
        for sample in data:
            data_pos.append(
                {"input_ids": sample["chosen_input_ids"], "attention_mask": sample["chosen_attention_mask"]}
            )
            data_neg.append(
                {"input_ids": sample["rejected_input_ids"], "attention_mask": sample["rejected_attention_mask"]}
            )
        batch_pos = self.tokenizer.pad(data_pos, padding=True, return_tensors="pt")
        batch_neg = self.tokenizer.pad(data_neg, padding=True, return_tensors="pt")
        batch["chosen_input_ids"] = batch_pos["input_ids"]
        batch["rejected_input_ids"] = batch_neg["input_ids"]
        batch["chosen_attention_mask"] = batch_pos["attention_mask"]
        batch["rejected_attention_mask"] = batch_neg["attention_mask"]
        batch["return_loss"] = True
        return batch


class RMTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        chosen_rewards = model(input_ids=inputs["chosen_input_ids"], attention_mask=inputs["chosen_attention_mask"])[0]
        rejected_rewards = model(
            input_ids=inputs["rejected_input_ids"], attention_mask=inputs["rejected_attention_mask"]
        )[0]
        loss = -nn.functional.logsigmoid(chosen_rewards - rejected_rewards).mean()
        if return_outputs:
            return loss, {"chosen_rewards": chosen_rewards, "rejected_rewards": rejected_rewards}
        return loss


data_collator = DataCollatorReward(tokenizer=tokenizer)
trainer = RMTrainer(
    model=model_lora,
    args=pipeline_args,
    train_dataset=train_dataset,
    compute_metrics=compute_metrics,
    eval_dataset=eval_dataset,
    data_collator=data_collator,
)

trainer.train()

## Save model
model_lora.save_pretrained(pipeline_args.output_dir)


================================================
FILE: contrib/rlhflow/run_reward_modeling.sh
================================================
#!/bin/bash
# Please run this script under ${project_id} in project directory of
#   https://github.com/shizhediao/llm-ft
#     COMMIT: d5fecf30ba8011067b10cf51fede53a5ab6574e4

deepspeed_args="--master_port=11000"      # Default argument
if [ $# -ge 1 ]; then
  deepspeed_args="$1"
fi

exp_id=rm
project_dir=$(cd "$(dirname $0)"/..; pwd)
output_dir=${project_dir}/output_models/${exp_id}
log_dir=${project_dir}/log/${exp_id}

dataset_path=${project_dir}/data/hh_rlhf/rm/hh_rlhf_rm_training.json
if [ ! -d data/hh_rlhf ]; then
  cd data && ./download.sh hh_rlhf && cd -
fi

mkdir -p ${output_dir} ${log_dir}

deepspeed ${deepspeed_args} \
  contrib/rlhflow/reward_modeling.py \
    --model_name_or_path gpt2 \
    --dataset_path ${dataset_path} \
    --output_dir ${output_dir} --overwrite_output_dir \
    --num_train_epochs 1 \
    --learning_rate 3e-5 \
    --block_size 512 \
    --per_device_train_batch_size 1 \
    --per_device_eval_batch_size 1\
    --deepspeed configs/archive/ds_config_zero2.json \
    --bf16 \
    --run_name rm_test \
    --validation_split_percentage 10 \
    --logging_steps 10 \
    --do_train \
    --ddp_timeout 72000 \
    --save_steps 999999 \
    --evaluation_strategy steps\
    --eval_steps 100\
    --weight_decay 0.001\
    --dataloader_num_workers 1 \
    | tee ${log_dir}/train.log \
    2> ${log_dir}/train.err


================================================
FILE: contrib/text2image/README.md
================================================
# Fine-tuning Text2Img

Here is a fork function for fine-tuning text2image diffusion model based on diffusers, under the framework of lmflow.

## Environment Preparation

After install the `lmflow`, directly use `pip install -r requirements.txt` for extensive packages of t2i fine-tuning.

## Data Preparation

Here is a tree struct of the required data organization. In detail, under a `dataset_path` *example*, by default, an `img` directory is used for image files, and `train.json`, `valid.json` and `test.json` are used for reference of training, validation and testinig data. The `valid.json` and `test.json` are optional. If one is provided and the other is not, the two files will be set as the same.

```bash
data
└── example
    ├── img
    │   ├── 00.jpg
    │   ├── 01.jpg
    │   ├── 02.jpg
    │   ├── 03.jpg
    │   └── 04.jpg
    ├── train.json
    ├── [valid.json]
    └── [test.json]
```

The `train.json` should be the format as follow:

```json
{
    "type": "text-image",
    "instances": [
        {
            "image": "00.jpg",
            "text": "A photo of a <SKS> dog"
        },
        ...
    ]
}
```

And the `valid.json` and `test.json` should be the format as follow:

```json
{
    "type": "text-only",
    "instances": [
        {
            "text": "A photo of a <SKS> dog in front of Eiffel Tower."
        },
        ...
    ]
}
```

Here is a specific example of the data [dog_t2i_data_example](https://drive.google.com/drive/folders/106ahvIrXbiuZMBw0NuOTjY0vnM_xXARW?usp=sharing)

## Fine-tuning

For convenience, we provide a script `finetune_t2i.sh` for fine-tuning. It can be used as follow:

```bash
bash finetune_t2i.sh \
    model_name_or_path=stabilityai/stable-diffusion-2-1 \
    dataset_path=data/example
```

The `model_name_or_path` is the model name in [huggingface](https://huggingface.co/) or path of the pre-trained model. The `dataset_path` is the path of the dataset, which should be organized as the above tree struct.

There are also some optional arguments for the script:

- `model_type`: The type of the model, which can be `unet` or `transformer`. Default is `unet`. (The `transformer` is not supported yet.)
- `output_dir`: The output directory of the fine-tuned model. Default is `output`.
- `main_port`: The main port of the server. Default is `29500`.
- `img_size`: The size of the image for fine-tuning, validation and testing. Default is `768`.

For more customization, you can refer to the `finetune_t2i.sh` and `finetune_t2i.py`.


================================================
FILE: contrib/text2image/accelerate_t2i_config.yaml
================================================
compute_environment: LOCAL_MACHINE
debug: false
distributed_type: MULTI_GPU
downcast_bf16: 'no'
enable_cpu_affinity: false
gpu_ids: all
machine_rank: 0
main_training_function: main
mixed_precision: fp16
num_machines: 1
num_processes: 4
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false


================================================
FILE: contrib/text2image/diffuser_args.py
================================================
import os
from dataclasses import dataclass, field
from typing import Optional

from lmflow.args import DatasetArguments


@dataclass
class T2IDatasetArguments(DatasetArguments):
    """Arguments for T2I dataset"""

    image_folder: Optional[str] = field(default=None, metadata={"help": "The folder of the image file."})

    image_size: Optional[int] = field(default=512, metadata={"help": "The size of the image."})

    image_crop_type: Optional[str] = field(default="center", metadata={"help": "The type of image crop."})

    text_embedding_type: Optional[str] = field(default="raw", metadata={"help": "How to get text embedding."})

    is_t2i: Optional[bool] = field(default=True, metadata={"help": "Flag for the modality type."})

    def __post_init__(self):
        def check_extension(file_path: str, extension: str):
            assert file_path.split(".")[-1] == extension, f"The file must be a {extension} file."

        if self.dataset_path is None or self.image_folder is None:
            raise ValueError("The dataset_path, image_folder must be provided.")

        else:
            if self.train_file is None:
                if os.path.exists(os.path.join(self.dataset_path, "train.json")):
                    self.train_file = "train.json"
                else:
                    raise ValueError("The train_file must be provided.")

            check_extension(self.train_file, "json")
            if (self.validation_file is not None and self.test_file is None) or (
                self.validation_file is None and self.test_file is not None
            ):
                same_file = self.validation_file if self.validation_file is not None else self.test_file
                self.validation_file = same_file
                self.test_file = same_file
            if self.validation_file is not None:
                check_extension(self.validation_file, "json")
                if not os.path.exists(os.path.join(self.dataset_path, self.validation_file)):
                    self.validation_file = None
            if self.test_file is not None:
                check_extension(self.test_file, "json")
                if not os.path.exists(os.path.join(self.dataset_path, self.test_file)):
                    self.test_file = None


@dataclass
class DiffuserModelArguments:
    """Arguments for T2I model"""

    model_name_or_path: Optional[str] = field(default=None, metadata={"help": "The model name or path."})

    model_type: Optional[str] = field(default=None, metadata={"help": "The model type."})

    # torch_dtype: Optional[str] = field(
    #     default=None,
    #     metadata={
    #         "help": (
    #             "Override the default `torch.dtype` and load the model under this dtype. If `auto` is passed, the "
    #             "dtype will be automatically derived from the model's weights."
    #         ),
    #         "choices": ["auto", "bfloat16", "float16", "float32"],
    #     },
    # )

    use_lora: bool = field(
        default=False,
        metadata={"help": "Whether to lora."},
    )

    lora_r: int = field(
        default=8,
        metadata={"help": "the rank of the lora parameters. The smaller lora_r is , the fewer parameters lora has."},
    )
    lora_alpha: int = field(
        default=8,
        metadata={
            "help": (
                "Merging ratio between the fine-tuned model and the original. This is controlled by a "
                "parameter called alpha in the paper."
            ),
        },
    )
    lora_target_modules: list[str] = field(default=None, metadata={"help": "Modules to apply lora."})
    lora_dropout: float = field(
        default=0.1,
        metadata={"help": "The dropout rate in lora.linear."},
    )


@dataclass
class DiffuserTunerArguments:
    """Arguments for T2I finetuner"""

    output_dir: Optional[str] = field(default="output", metadata={"help": "The output directory."})

    logging_dir: Optional[str] = field(default="logs", metadata={"help": "The logging directory."})

    overwrite_output_dir: bool = field(
        default=False, metadata={"help": "Overwrite the content of the output directory."}
    )

    mixed_precision: str = field(default="no", metadata={"help": "Whether to use mixed precision."})

    do_train: bool = field(default=True, metadata={"help": "Whether to run training."})

    num_train_epochs: Optional[int] = field(default=50, metadata={"help": "The number of training epochs."})

    train_batch_size: Optional[int] = field(default=1, metadata={"help": "The number of batch size in training."})

    learning_rate: Optional[float] = field(default=1e-4, metadata={"help": "The learning rate."})

    weight_decay: Optional[float] = field(default=0.0, metadata={"help": "The weight decay."})

    do_valid: bool = field(default=True, metadata={"help": "Whether to run evaluation."})

    do_test: bool = field(default=True, metadata={"help": "Whether to run testing."})

    valid_steps: Optional[int] = field(default=50, metadata={"help": "The evaluation steps."})

    valid_seed: Optional[int] = field(default=42, metadata={"help": "The seed for validation."})

    test_seed: Optional[int] = field(default=42, metadata={"help": "The seed for testing."})

    save_steps: Optional[int] = field(default=500, metadata={"help": "The saving steps."})

    save_total_limit: Optional[int] = field(default=None, metadata={"help": "The total number of checkpoints to save."})


================================================
FILE: contrib/text2image/diffuser_finetuner.py
================================================
import copy
import gc
import json
import logging
import os

import torch
import torch.nn.functional as F
import wandb
from accelerate import Accelerator
from diffuser_args import DiffuserModelArguments, DiffuserTunerArguments, T2IDatasetArguments
from diffusers import (
    DDPMScheduler,
    DiffusionPipeline,
)
from diffusers.loaders import LoraLoaderMixin
from diffusers.optimization import get_scheduler
from diffusers.utils import (
    convert_state_dict_to_diffusers,
)
from diffusers.utils.torch_utils import is_compiled_module
from peft.utils import get_peft_model_state_dict
from torch.utils.data import DataLoader
from tqdm import tqdm

from lmflow.pipeline.finetuner import BaseTuner

logger = logging.getLogger(__name__)


def log_validation(
    pipeline,
    accelerator: Accelerator,
    pipeline_args: dict,
    save_dir,
    global_step,
):
    pipeline.to(accelerator.device)
    pipeline.vae.to(torch.float32)

    with torch.no_grad():
        prompt_images = [(pipeline_arg["prompt"], pipeline(**pipeline_arg).images[0]) for pipeline_arg in pipeline_args]

    for tracker in accelerator.trackers:
        if tracker.name == "wandb":
            tracker.log(
                {
                    "validation": [
                        wandb.Image(image, caption=f"{i}: {prompt}") for i, (prompt, image) in enumerate(prompt_images)
                    ]
                }
            )

    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    for i, (prompt, image) in enumerate(prompt_images):
        image.save(os.path.join(save_dir, f"{prompt.replace(' ', '_')}.png"))

    del pipeline
    torch.cuda.empty_cache()
    gc.collect()

    return


class DiffuserModelTuner(BaseTuner):
    """Initializes the `RewardModelTuner` class.

    Parameters
    ----------
    model_args : ModelArguments object.
        Contains the arguments required to load the model.

    data_args : DatasetArguments object.
        Contains the arguments required to load the dataset.

    finetuner_args : RewardModelTunerArguments object.
        Contains the arguments required to perform finetuning.

    args : Optional.
        Positional arguments.

    kwargs : Optional.
        Keyword arguments.
    """

    def __init__(
        self,
        model_args: DiffuserModelArguments,
        data_args: T2IDatasetArguments,
        finetuner_args: DiffuserTunerArguments,
        *args,
        **kwargs,
    ):
        self.model_args = model_args
        self.data_args = data_args
        self.finetuner_args = finetuner_args

    def tune(
        self,
        accelerator: Accelerator,
        model,
        dataset,
    ):
        dataloader = DataLoader(dataset=dataset, batch_size=self.finetuner_args.train_batch_size, shuffle=True)

        noise_scheduler = DDPMScheduler.from_pretrained(self.model_args.model_name_or_path, subfolder="scheduler")

        def unwrap_model(model):
            model = accelerator.unwrap_model(model)
            model = model._orig_mod if is_compiled_module(model) else model
            return model

        # filter trainable parameters
        params_to_optimize = list(filter(lambda p: p.requires_grad, model.parameters()))
        accelerator.print(len(params_to_optimize))

        optimizer = torch.optim.AdamW(
            params_to_optimize, lr=self.finetuner_args.learning_rate, weight_decay=self.finetuner_args.weight_decay
        )

        lr_scheduler = get_scheduler(
            "constant",
            optimizer=optimizer,
        )

        model, dataloader, optimizer, lr_scheduler = accelerator.prepare(model, dataloader, optimizer, lr_scheduler)
        weight_dtype = torch.float32
        if accelerator.mixed_precision == "fp16":
            weight_dtype = torch.float16

        progress_bar = tqdm(
            range(self.finetuner_args.num_train_epochs * len(dataloader)),
            desc="Training",
            disable=not accelerator.is_main_process,
        )

        global_step = 0
        for epoch in range(self.finetuner_args.num_train_epochs):
            model.train()
            for batch in dataloader:
                clean_latents = batch["image"].to(dtype=weight_dtype)
                text_embedding = batch["text"].to(dtype=weight_dtype)

                bsz, channel, height, width = clean_latents.shape
                noise = torch.randn_like(clean_latents).to(dtype=weight_dtype)
                timesteps = torch.randint(
                    0, noise_scheduler.config.num_train_timesteps, (bsz,), device=clean_latents.device
                )
                timesteps = timesteps.long()

                noisy_latents = noise_scheduler.add_noise(clean_latents, noise, timesteps)
                model_pred = model(
                    noisy_latents,
                    timesteps,
                    text_embedding,
                )[0]

                if noise_scheduler.config.prediction_type == "epsilon":
                    target = noise
                elif noise_scheduler.config.prediction_type == "v_prediction":
                    target = noise_scheduler.get_velocity(clean_latents, noise, timesteps)
                else:
                    raise ValueError(f"Unknown prediction type {noise_scheduler.config.prediction_type}")

                loss = F.mse_loss(model_pred.float(), target.float(), reduction="mean")

                accelerator.backward(loss)
                optimizer.step()
                lr_scheduler.step()
                optimizer.zero_grad()

                progress_bar.update(1)

                if accelerator.is_main_process:
                    logs = {"loss": loss.item(), "lr": lr_scheduler.get_last_lr()[0]}
                    progress_bar.set_postfix(**logs)
                    accelerator.log(logs, step=global_step)
                    global_step += 1

                # validation
                if (
                    accelerator.is_main_process
                    and self.finetuner_args.do_valid
                    and self.data_args.validation_file is not None
                ):
                    if global_step % self.finetuner_args.valid_steps == 0:
                        with torch.no_grad():
                            pipeline = DiffusionPipeline.from_pretrained(
                                self.model_args.model_name_or_path,
                                torch_dtype=weight_dtype,
                            )
                            if self.model_args.model_type == "unet":
                                pipeline.unet = unwrap_model(model)
                            elif self.model_args.model_type == "transformer":
                                pipeline.transformer = unwrap_model(model)
                            else:
                                raise ValueError(f"Unknown model type {self.model_args.model_type}")

                            with open(os.path.join(self.data_args.dataset_path, self.data_args.validation_file)) as f:
                                validation_data = json.load(f)
                            generator = torch.Generator(device=accelerator.device).manual_seed(
                                self.finetuner_args.valid_seed
                            )
                            pipeline_args = [
                                {
                                    "prompt": item["text"],
                                    "generator": generator,
                                    "width": self.data_args.image_size,
                                    "height": self.data_args.image_size,
                                }
                                for item in validation_data["instances"]
                            ]
                            log_validation(
                                pipeline,
                                accelerator,
                                pipeline_args,
                                os.path.join(self.finetuner_args.output_dir, f"step_{global_step}_validation"),
                                global_step,
                            )

                if accelerator.is_main_process and global_step % self.finetuner_args.save_steps == 0:
                    os.makedirs(os.path.join(self.finetuner_args.output_dir, "checkpoints"), exist_ok=True)
                    if (
                        len(os.listdir(os.path.join(self.finetuner_args.output_dir, "checkpoints")))
                        > self.finetuner_args.max_checkpoints
                    ):
                        os.remove(
                            os.path.join(
                                self.finetuner_args.output_dir,
                                "checkpoints",
                                sorted(os.listdir(os.path.join(self.finetuner_args.output_dir, "checkpoints")))[0],
                            )
                        )
                    if self.model_args.use_lora:
                        temp_model = unwrap_model(copy.deepcopy(model))
                        temp_model = temp_model.to(torch.float32)
                        model_lora_state_dict = convert_state_dict_to_diffusers(get_peft_model_state_dict(temp_model))
                        LoraLoaderMixin.save_lora_weights(
                            save_directory=os.path.join(self.finetuner_args.output_dir, "checkpoints", "final"),
                            unet_lora_layers=model_lora_state_dict if self.model_args.model_type == "unet" else None,
                            transformer_lora_layers=model_lora_state_dict
                            if self.model_args.model_type == "transformer"
                            else None,
                        )
                        del temp_model
                    else:
                        accelerator.save(
                            accelerator.get_state_dict(model),
                            os.path.join(self.finetuner_args.output_dir, "checkpoints", "final.pt"),
                        )

        accelerator.wait_for_everyone()
        progress_bar.close()
        if accelerator.is_main_process:
            if self.finetuner_args.do_test and self.data_args.test_file is not None:
                pipeline = DiffusionPipeline.from_pretrained(
                    self.model_args.model_name_or_path,
                    torch_dtype=weight_dtype,
                )
                if self.model_args.model_type == "unet":
                    pipeline.unet = unwrap_model(model)
                elif self.model_args.model_type == "transformer":
                    pipeline.transformer = unwrap_model(model)
                else:
                    raise ValueError(f"Unknown model type {self.model_args.model_type}")

                with open(os.path.join(self.data_args.dataset_path, self.data_args.test_file)) as f:
                    test_data = json.load(f)
                generator = torch.Generator(device=accelerator.device).manual_seed(self.finetuner_args.test_seed)
                pipeline_args = [
                    {
                        "prompt": item["text"],
                        "generator": generator,
                        "width": self.data_args.image_size,
                        "height": self.data_args.image_size,
                    }
                    for item in test_data["instances"]
                ]
                log_validation(
                    pipeline,
                    accelerator,
                    pipeline_args,
                    os.path.join(self.finetuner_args.output_dir, "test_final"),
                    global_step,
                )

            os.makedirs(os.path.join(self.finetuner_args.output_dir, "checkpoints"), exist_ok=True)
            if self.model_args.use_lora:
                model = unwrap_model(model)
                model = model.to(torch.float32)
                model_lora_state_dict = convert_state_dict_to_diffusers(get_peft_model_state_dict(model))
                LoraLoaderMixin.save_lora_weights(
                    save_directory=os.path.join(self.finetuner_args.output_dir, "checkpoints", "final"),
                    unet_lora_layers=model_lora_state_dict if self.model_args.model_type == "unet" else None,
                    transformer_lora_layers=model_lora_state_dict
                    if self.model_args.model_type == "transformer"
                    else None,
                )
                # pipeline.load_lora_weights(output_dir, weight_name="pytorch_lora_weights.safetensors")
            else:
                accelerator.save(
                    accelerator.get_state_dict(model),
                    os.path.join(self.finetuner_args.output_dir, "checkpoints", "final.pt"),
                )

        return


================================================
FILE: contrib/text2image/finetune_t2i.py
================================================
import os

os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["WANDB_MODE"] = "offline"
import gc
import shutil
from pathlib import Path

import torch
from accelerate import Accelerator
from accelerate.utils import ProjectConfiguration
from diffuser_args import DiffuserModelArguments, DiffuserTunerArguments, T2IDatasetArguments
from diffuser_finetuner import DiffuserModelTuner
from diffusers import AutoencoderKL, UNet2DConditionModel
from peft import LoraConfig
from t2i_dataset import build_t2i_dataset
from transformers import AutoTokenizer, CLIPTextModel, HfArgumentParser


def main():
    parser = HfArgumentParser((DiffuserModelArguments, T2IDatasetArguments, DiffuserTunerArguments))
    model_args, data_args, tuner_args = parser.parse_args_into_dataclasses()

    logging_dir = Path(tuner_args.output_dir, tuner_args.logging_dir)
    accelerator_project_config = ProjectConfiguration(project_dir=tuner_args.output_dir, logging_dir=logging_dir)
    accelerator = Accelerator(
        mixed_precision=tuner_args.mixed_precision,
        log_with="wandb",
        project_config=accelerator_project_config,
    )

    if accelerator.is_main_process and tuner_args.overwrite_output_dir and os.path.exists(tuner_args.output_dir):
        shutil.rmtree(tuner_args.output_dir)

    tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, subfolder="tokenizer")
    text_encoder = CLIPTextModel.from_pretrained(model_args.model_name_or_path, subfolder="text_encoder").to("cuda")
    vae = AutoencoderKL.from_pretrained(model_args.model_name_or_path, subfolder="vae").to("cuda")

    dataset = build_t2i_dataset(data_args, tokenizer, text_encoder, vae)

    del tokenizer, text_encoder, vae
    torch.cuda.empty_cache()
    gc.collect()

    model = None
    if model_args.model_type == "unet":
        model = UNet2DConditionModel.from_pretrained(model_args.model_name_or_path, subfolder=model_args.model_type)
    elif model_args.model_type == "transformer":
        raise NotImplementedError("Transformer model is not implemented.")
    else:
        raise ValueError("The model type is not supported.")
    if model_args.use_lora:
        accelerator.print(f"Using LoRA of {model_args.lora_target_modules} for training")
        model.requires_grad_(False)
        lora_config = LoraConfig(
            r=model_args.lora_r,
            lora_alpha=model_args.lora_alpha,
            lora_dropout=model_args.lora_dropout,
            init_lora_weights="gaussian",
            target_modules=model_args.lora_target_modules,
        )
        model.add_adapter(lora_config)
    else:
        model.requires_grad_(True)

    fintuner = DiffuserModelTuner(model_args, data_args, tuner_args)
    accelerator.init_trackers(
        "text2image-finetune",
        config={
            "data_args": data_args,
            "model_args": model_args,
            "tuner_args": tuner_args,
        },
    )

    accelerator.wait_for_everyone()
    fintuner.tune(accelerator=accelerator, model=model, dataset=dataset)


if __name__ == "__main__":
    main()


================================================
FILE: contrib/text2image/finetune_t2i.sh
================================================
# Parses arguments
model_name_or_path=stabilityai/stable-diffusion-2-1
model_type="unet"
dataset_path=data/example
output_dir=output
main_port=29500
img_size=768

while [[ $# -ge 1 ]]; do
    key="$1"
    case ${key} in
        -m|--model_name_or_path)
            model_name_or_path="$2"
            shift
            ;;
        -t|--model_type)
            model_type="$2"
            shift
            ;;
        -d|--dataset_path)
            dataset_path="$2"
            shift
            ;;
        -o|--output_dir)
            output_dir="$2"
            shift
            ;;
        -p|--main_port)
            main_port="$2"
            shift
            ;;
        -i|--img_size)
            img_size="$2"
            shift
            ;;
        *)
            echo "error: unknown option \"${key}\"" 1>&2
            exit 1
    esac
    shift
done

echo "model_name_or_path: ${model_name_or_path}"
echo "model_type: ${model_type}"
echo "dataset_path: ${dataset_path}"
echo "output_dir: ${output_dir}"
echo "main_port: ${main_port}"
echo "img_size: ${img_size}"


accelerate launch \
    --config_file=./accelerate_t2i_config.yaml \
    --main_port=${main_port} \
    finetune_t2i.py \
        --model_name_or_path=${model_name_or_path} \
        --model_type=${model_type} \
        --use_lora=True \
        --lora_target_module "to_k" "to_q" "to_v" "to_out.0" "add_k_proj" "add_v_proj" \
        --dataset_path=${dataset_path} \
        --image_folder="img" \
        --image_size=${img_size} \
        --train_file="train.json" \
        --validation_file="valid.json" \
        --test_file="test.json" \
        --output_dir=${output_dir} \
        --logging_dir="logs" \
        --overwrite_output_dir=True \
        --mixed_precision="fp16" \
        --num_train_epochs=100 \
        --train_batch_size=1 \
        --learning_rate=1e-4 \
        --valid_steps=50


================================================
FILE: contrib/text2image/requirements.txt
================================================
diffusers>=0.29.2

================================================
FILE: contrib/text2image/t2i_dataset.py
================================================
#!/usr/bin/env python

"""This Python code defines a class T2I Dataset."""

import json
import logging
import os.path as osp

from diffuser_args import T2IDatasetArguments
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
from tqdm import tqdm

logger = logging.getLogger(__name__)


class CustomT2IDataset(Dataset):
    """Dataset for T2I data"""

    def __init__(self, data_args: T2IDatasetArguments):
        self.data_args = data_args
        self.image_folder = osp.join(data_args.dataset_path, data_args.image_folder)
        self.data_file = osp.join(data_args.dataset_path, data_args.train_file)

        self.data_dict = json.load(open(self.data_file))
        assert self.data_dict["type"] == "text-image", "The dataset type must be text-image."

        self.data_instances = self.data_dict["instances"]

    def __len__(self):
        return len(self.data_instances)

    def __getitem__(self, idx):
        instance = self.data_instances[idx]
        image_path = osp.join(self.image_folder, instance["image"])
        image = Image.open(image_path)
        image = image.convert("RGB")

        return {
            "image": image,
            "text": instance["text"],
        }


class EncodePreprocessor:
    def __init__(self, data_args: T2IDatasetArguments, kind: str = "simple", **kwargs):
        self.transform = transforms.Compose(
            [
                transforms.Resize(data_args.image_size, interpolation=transforms.InterpolationMode.BILINEAR),
                transforms.CenterCrop(data_args.image_size)
                if data_args.image_crop_type == "center"
                else transforms.RandomCrop(data_args.image_size),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.5], std=[0.5]),
            ]
        )

        self.pre_func = None
        if kind == "simple":
            self.register_simple_func(**kwargs)

    def register_simple_func(self, tokenizer, text_encoder, vae):
        self.tokenizer = tokenizer
        self.text_encoder = text_encoder
        self.vae = vae

        def simple_func(data_item):
            image = self.transform(data_item["image"])
            latents = self.vae.encode(image.to(self.vae.device, dtype=self.vae.dtype).unsqueeze(0)).latent_dist.sample()
            encoded_image = latents * self.vae.config.scaling_factor
            encoded_image = encoded_image.detach()
            encoded_image = encoded_image.squeeze(0).cpu()

            max_length = self.tokenizer.model_max_length
            tokens = self.tokenizer(
                [data_item["text"]], max_length=max_length, padding="max_length", truncation=True, return_tensors="pt"
            ).input_ids
            encoded_text = self.text_encoder(tokens.to(self.text_encoder.device))[0]
            encoded_text = encoded_text.detach()
            encoded_text = encoded_text.squeeze(0).cpu()

            return {
                "image": encoded_image,
                "text": encoded_text,
            }

        self.pre_func = simple_func

    def __call__(self, data_item):
        return self.pre_func(data_item)


class PreprocessedT2IDataset(Dataset):
    "Preprocess dataset with prompt"

    def __init__(self, raw_dataset: Dataset, data_args: T2IDatasetArguments, preprocessor: EncodePreprocessor):
        self.data_dict = []

        logger.info("Preprocessing data ...")
        for data_item in tqdm(raw_dataset):
            self.data_dict.append(preprocessor(data_item))

    def __len__(self):
        return len(self.data_dict)

    def __getitem__(self, idx):
        return self.data_dict[idx]


def build_t2i_dataset(data_args: T2IDatasetArguments, tokenizer, text_encoder, vae):
    raw_dataset = CustomT2IDataset(data_args)
    # dataset = SimpleT2IDataset(raw_dataset, data_args, tokenizer, text_encoder, vae)
    preprocessor = EncodePreprocessor(
        kind="simple", data_args=data_args, tokenizer=tokenizer, text_encoder=text_encoder, vae=vae
    )
    dataset = PreprocessedT2IDataset(raw_dataset, data_args, preprocessor)

    return dataset


================================================
FILE: contrib/tool-finetune/README.md
================================================
## Function-call Finetune

### Pip dependency

```
bitsandbytes==0.40.0
deepspeed==0.12.0
flash-attn==2.5.7
peft==0.10.0
torch==2.1.2+cu118
transformers==4.40.1
vllm==0.5.2
xformers==0.0.27
```

### Conversation Template
```
{
    "type": "conversation",
    "instances": [
        {
            "system": "You are a helpful assistant with access to the following functions. Use them if required - ",
            "tools": ["{\"name\": \"", \"description\": \"", \"parameters\": {\"type\": \"object\", \"properties\": {\"property_1\": {\"type\": \"xxx\", \"description\": \"\"}, \"property_2\": {\"type\": \"xxx\", \"description\": \"\"}}, \"required\": [\"required_1\", \"property_n\"]}}",]",
            "messages": [
                {
                    "role": "user",
                    "content": ""
                },
                {
                    "role": "function",
                    "content": ""
                },
                {
                    "role": "observation",
                    "content": ""
                },
                {
                    "role": "assistant",
                    "content": ""
                }
            ]
        },
        {
            "system": "You are a helpful assistant, with no access to external functions.",
            "tools": [],
            "messages": [
                {
                    "role": "user",
                    "content": ""
                },
                {
                    "role": "assistant",
                    "content": ""
                }
            ]
        },
    ]
}
```

### Run Function-call Finetune Example
```
./contrib/tool-finetune/run_function_call_finetune.sh \
    --model_name_or_path meta-llama/Meta-Llama-3-8B \
    --trust_remote_code True \
    --conversation_template llama3_for_tool \
    --dataset_path /home/wenhesun/LMFlow/data/glaive-function-calling-v2 \
    --output_model_path /home/wenhesun/LMFlow/output_models/function-call-finetuned-llama
```

### Command-Line Arguments
- `--model-name-or-path` - Specifies the name or path of the model used for
- `--conversation_template` - So far supports the following choices: llama3_for_tool, qwen2_for_tool
- `--dataset_path` - The path to the dataset that has been converted to the specified format
- `--output_model_path` - Directory to store the finetuned model and logs

================================================
FILE: contrib/tool-finetune/function_call_finetune.py
================================================
import os
import sys

sys.path.remove(os.path.abspath(os.path.dirname(sys.argv[0])))
import hashlib
import logging
from typing import Union

import transformers
from transformers import HfArgumentParser, PreTrainedTokenizer, PreTrainedTokenizerFast
from transformers.testing_utils import CaptureLogger

from lmflow.args import (
    AutoArguments,
    DatasetArguments,
    ModelArguments,
)
from lmflow.datasets.dataset import Dataset
from lmflow.models.hf_decoder_model import HFDecoderModel
from lmflow.pipeline.auto_pipeline import AutoPipeline
from lmflow.tokenization.hf_decoder_model import blocking
from lmflow.utils.constants import (
    CONVERSATION_DATASET_DESCRIPTION,
    CONVERSATION_ROLE_NAMES,
    TEXT2TEXT_DATASET_DESCRIPTION,
    TEXT_ONLY_DATASET_DESCRIPTION,
)
from lmflow.utils.conversation_template import PRESET_TEMPLATES, ConversationTemplateForTool

tok_logger = transformers.utils.logging.get_logger("transformers.tokenization_utils_base")
logger = logging.getLogger(__name__)


class HFDecoderModelForTool(HFDecoderModel):
    def tokenize(self, dataset, add_special_tokens=True, *args, **kwargs) -> Dataset:
        """
        Tokenize the full dataset.

        Parameters
        ------------
        dataset : lmflow.datasets.Dataset.

        args : Optional.
            Positional arguments.

        kwargs : Optional.
            Keyword arguments.

        Returns
        ------------
        tokenized_datasets :
            The tokenized dataset, without any leading or trailing special
            tokens (normally they are Begin-Of-Sentence or End-Of-Sentence
            tokens).
        """
        # Preprocessing the datasets.
        # First we tokenize all the texts.
        if dataset.get_backend() != "huggingface":
            raise NotImplementedError("tokenization of datasets with non-huggingface backend arenot supported yet")

        dataset_type = dataset.get_type()
        model_args = self.model_args
        raw_datasets = dataset
        hf_raw_datasets = dataset.get_backend_dataset()
        column_names = list(hf_raw_datasets.features)
        data_args = raw_datasets.get_data_args()

        # Requires three types of information for tokenizing different datasets
        #   1) Which fields require tokenization, e.g.
        #        "text2float": "text", but not "float"
        #        "text2text": both "input" and "output"
        #   2) How will there tokenized sequence concatenated together, e.g.
        #        "text_only": "text" -> "text"
        #        "text2text": "input", "output" -> "input" + "output"
        #   3) Which fields require loss in final computation, e.g.
        #        "text_only": "text"
        #        "text2text": "output" only
        tokenized_column_order = None  # Handles 1) and 2)
        label_columns = None  # Handles 3)
        if dataset_type == "text_only":
            tokenized_column_order = ["text"]
            label_columns = ["text"]
        elif dataset_type == "text2text":
            tokenized_column_order = ["input", "output"]
            label_columns = ["output"]
            add_special_tokens = False
        elif dataset_type == "conversation":
            if data_args.conversation_template:
                if data_args.conversation_template in PRESET_TEMPLATES.keys():
                    conversation_template = PRESET_TEMPLATES[data_args.conversation_template]
                else:
                    raise NotImplementedError(
                        f"Conversation template {data_args.conversation_template} is not supported yet."
                    )
            else:
                logger.warning("No conversation template provided. Using default template.")
                conversation_template = PRESET_TEMPLATES["empty"]

            logger.warning(f"Conversation template: {conversation_template}")
        else:
            raise NotImplementedError(
                f'dataset type "{dataset_type}" is not supported, currently'
                " only support following data types:\n"
                f"    1) {TEXT_ONLY_DATASET_DESCRIPTION}\n"
                f"    2) {TEXT2TEXT_DATASET_DESCRIPTION}\n"
                f"    3) {CONVERSATION_DATASET_DESCRIPTION}\n"
            )

        # Whether to truncate long sequences to fit into max_length
        use_truncation = False
        if model_args.use_lora or data_args.disable_group_texts:
            use_truncation = True

        tokenize_fn = conversation_tokenize_function
        tokenize_fn_kwargs = {
            "data_args": data_args,
            "tokenizer": self.tokenizer,
            "column_names": column_names,
        }
        if "conversation" in dataset_type:
            tokenize_fn_kwargs["conversation_template"] = conversation_template
        else:
            tokenize_fn_kwargs["label_columns"] = label_columns
            tokenize_fn_kwargs["tokenized_column_order"] = tokenized_column_order
            tokenize_fn_kwargs["add_special_tokens"] = add_special_tokens
            tokenize_fn_kwargs["use_truncation"] = use_truncation

        tokenize_kwargs = {}
        if not data_args.streaming:
            fingerprint = hashlib.md5(
                (
                    raw_datasets.get_fingerprint()
                    + str(self.tokenizer)
                    + f"###padding_side={self.tokenizer.padding_side}"
                    + (
                        "###conversation_template=" + str(conversation_template)
                        if "conversation" in dataset_type
                        else ""
                    )
                    + f"###disable_group_texts={data_args.disable_group_texts}"
                    + f"###block_size={data_args.block_size}"
                ).encode("utf-8")
            ).hexdigest()
            tokenize_kwargs = {
                "num_proc": data_args.preprocessing_num_workers,
                "load_from_cache_file": not data_args.overwrite_cache,
                "desc": "Running tokenizer on dataset",
                "new_fingerprint": fingerprint,
            }

        tokenized_datasets = raw_datasets.map(
            tokenize_fn, batched=True, remove_columns=column_names, fn_kwargs=tokenize_fn_kwargs, **tokenize_kwargs
        )

        return tokenized_datasets


def conversation_tokenize_function(
    examples,
    data_args: DatasetArguments,
    tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
    column_names,
    conversation_template: ConversationTemplateForTool,
) -> dict:
    """Handels conversation datasets tokenization"""
    num_example = len(examples[column_names[0]])
    token_dict = {
        "input_ids": [[] for _ in range(num_example)],
        "attention_mask": [[] for _ in range(num_example)],
        "labels": [[] for _ in range(num_example)],
    }
    with CaptureLogger(tok_logger) as cl:
        for i in range(len(examples["messages"])):
            messages = examples["messages"][i]
            system = examples.get("system", [None] * num_example)[i]
            tools = examples.get("tools", [None] * num_example)[i]
            if len(messages) < 2 or messages[0]["role"] != CONVERSATION_ROLE_NAMES["user"]:
                tok_logger.warning(
                    "Invalid instance encountered. Either the conversation has less than "
                    "one round or the first message is not from the user."
                )
                continue

            if len(messages) % 2 != 0:
                logger.warning("The number of messages is not even, the last message will be ignored.")
                messages = messages[:-1]

            encoded_conversation = conversation_template.encode_conversation(
                tokenizer=tokenizer,
                messages=messages,
                system=system,
                tools=tools,
            )

            input_ids, labels = [], []
            for turn_idx, conversation_tuple in enumerate(encoded_conversation):
                if len(conversation_tuple) == 2:
                    user_input = conversation_tuple[0]
                    assistant_result = conversation_tuple[1]
                    input_ids += user_input + assistant_result
                    if data_args.train_on_prompt:
                        labels += user_input + assistant_result
                    else:
                        labels += [-100] * len(user_input) + assistant_result
                elif len(conversation_tuple) == 4:
                    user_input = conversation_tuple[0]
                    function_result = conversation_tuple[1]
                    observation_input = conversation_tuple[2]
                    assistant_result = conversation_tuple[3]
                    input_ids += user_input + function_result + observation_input + assistant_result
                    if data_args.train_on_prompt:
                        labels += user_input + function_result + observation_input + assistant_result
                    else:
                        labels += (
                            [-100] * len(user_input)
                            + function_result
                            + [-100] * len(observation_input)
                            + assistant_result
                        )
                else:
                    logger.warning("The number of roles in conversation is not appropriate")

            token_dict["input_ids"][i].extend(input_ids)
            token_dict["attention_mask"][i].extend([1] * len(input_ids))
            token_dict["labels"][i].extend(labels)

    if data_args.disable_group_texts:
        token_dict = blocking(
            token_dict=token_dict,
            block_size=data_args.block_size,
            model_max_length=tokenizer.model_max_length,
            pad_token_id=tokenizer.pad_token_id,
            padding_side=tokenizer.padding_side,
        )

    # clm input could be much much longer than block_size
    if "Token indices sequence length is longer than the" in cl.out:
        tok_logger.warning(
            "^^^^^^^^^^^^^^^^ Please ignore the warning above - this long input will be chunked into smaller bits"
            " before being passed to the model."
        )
    return token_dict


def train():
    # Initialize args
    ## Prepare training_args
    pipeline_name = "finetuner"
    PipelineArguments = AutoArguments.get_pipeline_args_class(pipeline_name)
    parser = HfArgumentParser((ModelArguments, DatasetArguments, PipelineArguments))
    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
        model_args, data_args, pipeline_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
    else:
        model_args, data_args, pipeline_args = parser.parse_args_into_dataclasses()
    print("Model args", model_args)
    print("data_args", data_args)
    print("training_args", pipeline_args)

    # Init model
    model = HFDecoderModelForTool(model_args)

    # Process data
    dataset = Dataset(data_args)

    # Finetune
    finetuner = AutoPipeline.get_pipeline(
        pipeline_name=pipeline_name,
        model_args=model_args,
        data_args=data_args,
        pipeline_args=pipeline_args,
    )
    finetuner.tune(model=model, dataset=dataset)


if __name__ == "__main__":
    train()


================================================
FILE: contrib/tool-finetune/run_function_call_finetune.sh
================================================
#!/bin/bash
# Please run this script under ${project_id} in project directory of
#   https://github.com/shizhediao/llm-ft
#     COMMIT: d5fecf30ba8011067b10cf51fede53a5ab6574e4

# Parses arguments
model_name_or_path=gpt2
dataset_path=data/alpaca/train_conversation
output_dir=output_models/finetune
deepspeed_args="--master_port=12000"
conversation_template=llama2

# Safety related arguments
trust_remote_code=0

while [[ $# -ge 1 ]]; do
  key="$1"
  case ${key} in
    -m|--model_name_or_path)
      model_name_or_path="$2"
      shift
      ;;
    -d|--dataset_path)
      dataset_path="$2"
      shift
      ;;
    -o|--output_model_path)
      output_dir="$2"
      shift
      ;;
    --conversation_template)
      conversation_template="$2"
      shift
      ;;
    --deepspeed_args)
      deepspeed_args="$2"
      shift
      ;;
    --trust_remote_code)
      trust_remote_code="$2"
      shift
      ;;
    *)
      echo "error: unknown option \"${key}\"" 1>&2
      exit 1
  esac
  shift
done

# Finetune
exp_id=finetune
project_dir=$(cd "$(dirname $0)"/..; pwd)
log_dir=${project_dir}/log/${exp_id}
mkdir -p ${output_dir} ${log_dir}

deepspeed ${deepspeed_args} \
  contrib/tool-finetune/function_call_finetune.py \
    --model_name_or_path ${model_name_or_path} \
    --trust_remote_code ${trust_remote_code} \
    --dataset_path ${dataset_path} \
    --output_dir ${output_dir} --overwrite_output_dir \
    --conversation_template ${conversation_template} \
    --num_train_epochs 0.01 \
    --learning_rate 2e-5 \
    --disable_group_texts 1 \
    --block_size 1024 \
    --per_device_train_batch_size 1 \
    --deepspeed configs/archive/ds_config_zero3.json \
    --fp16 \
    --run_name finetune \
    --validation_split_percentage 0 \
    --logging_steps 20 \
    --do_train \
    --ddp_timeout 72000 \
    --save_steps 5000 \
    --dataloader_num_workers 1 \
    > >(tee ${log_dir}/train.log) \
    2> >(tee ${log_dir}/train.err >&2)


================================================
FILE: docs/dev_notes/finetuning.mmd
================================================
sequenceDiagram
    participant User
    participant Finetuner as LMFlow Finetuner
    participant Model as LMFlow Model
    participant Dataset as LMFlow Dataset
    participant Trainer as Trainer
    
    User->>Finetuner: tune(model, dataset)
        
    %% Tokenization
    Finetuner->>Model: tokenize(dataset)
    Model->>Dataset: Apply tokenization to dataset
    
    alt if not disable_group_texts
        Finetuner->>Finetuner: group_text(tokenized_dataset, model_max_length)
    end
    
    %% Prepare for training
    Finetuner->>Finetuner: Prepare dataset for trainer
    
    %% Create appropriate trainer based on configuration
    alt if model_args.use_lora
        Finetuner->>Finetuner: Initialize PeftTrainer
    else
        Finetuner->>Finetuner: Initialize standard Trainer
    end
    
    alt if training_args.use_customized_optim
        Finetuner->>Finetuner: create_customized_optimizer()
    end
    
    alt if training_args.use_lisa
        Finetuner->>Finetuner: Create DynamicLayerActivationCallback
    end
    
    %% Start training
    Finetuner->>Trainer: train(resume_from_checkpoint)
    
    %% Training loop (simplified)
    loop Training iterations (Trainer._inner_training_loop simplified)
        Trainer->>Model: Forward pass
        Model-->>Trainer: Return predictions
        Trainer->>Trainer: Compute loss
        Trainer->>Model: Backward pass
        Model->>Model: Compute Gradient  
        Trainer->>Trainer: Optimizer step
    end
    
    %% Save the model
    alt if not model_args.use_lora
        Trainer->>Trainer: save_model()
    else
        alt if model_args.save_aggregated_lora
            Finetuner->>Model: merge_lora_weights()
        end
        Finetuner->>Model: save(output_dir, save_aggregated_lora)
    end
    
    %% Finish and return
    Trainer-->>Finetuner: Return train result
    Finetuner->>Finetuner: Log metrics
    Finetuner-->>User: Return fine-tuned model

================================================
FILE: docs/readme/Position_Interpolation.md
================================================
# Position Interpolation 
Now LMFlow supports the latest Linear & NTK (Neural Kernel theory) scaling techniques for LLaMA models. \
For more details of these techniques, you can checkout the links below:
* Linear scaling: \
https://arxiv.org/abs/2306.15595
* NTK scaling: \
https://www.reddit.com/r/LocalLLaMA/comments/14lz7j5/ntkaware_scaled_rope_allows_llama_models_to_have/
## Usage
To use the Position Interpolation Techniques, you need to set the following options:
```
--truncate_to_model_max_length False
--do_rope_scaling True
```
For linear scaling, set the extending ratio by:
```
--rope_pi_ratio 4
```
For NTK scaling, set the extending ratio by:
```
--rope_ntk_ratio 4
```
Here is an example of evaluation bash code:
```
#!/bin/bash

CUDA_VISIBLE_DEVICES=0 \
    deepspeed examples/evaluation.py \
    --answer_type text \
    --model_name_or_path pinkmanlove/llama-7b-hf \
    --dataset_path data/wiki_en_eval \
    --deepspeed examples/ds_config.json \
    --inference_batch_size_per_device 1 \
    --truncate_to_model_max_length False \
    --block_size 4096 \
    --use_flash_attention True \
    --do_rope_scaling True \
    --rope_pi_ratio 2 \
    --rope_ntk_ratio 4 \
    --metric ppl
```

================================================
FILE: docs/readme/README_es.md
================================================
<p align="center" width="100%">
<img src="../docs/assets/logo.png" alt="LMFlow" style="width: 100%; min-width: 300px; display: block; margin: auto; background-color: transparent;">
</p>

# LMFlow

<h4 align="center">
    <p>
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/README.md">English</a> |
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/docs/readme/README_zh-hans.md">简体中文</a> |
        <b>Español</b> |
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/docs/readme/README_jp.md">日本語</a> |
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/docs/readme/README_ko.md">한국어</a> |
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/docs/readme/README_hindi.md">हिंदी</a>
    <p>
</h4>

> [!NOTE]
> This README file was translated by LLM. Spanish speakers are welcome to submit PRs to polish the document!  

> [!NOTE]  
La versión en español fue traducida por ChatGPT, si hay algún error, bienvenido sea al contributor para corregirlo, gracias. Al mismo tiempo, si hay alguna diferencia o inconsistencia en el contenido con la versión en inglés, se debe considerar la versión en inglés como la correcta.

[![Website](https://img.shields.io/badge/Website-Demo-20B2AA.svg)](https://lmflow.com)
[![Code License](https://img.shields.io/badge/Code%20License-Apache_2.0-green.svg)](https://github.com/OptimalScale/LMFlow/blob/main/LICENSE)
[![Python 3.9+](https://img.shields.io/badge/Python-3.9+-blue.svg)](https://www.python.org/downloads/release/python-390/)
[![Doc](https://img.shields.io/badge/Website-Doc-ff69b4.svg)](https://optimalscale.github.io/LMFlow/)
[![Embark](https://img.shields.io/badge/Discord-LMFlow-%237289da.svg?logo=discord)](https://discord.gg/u9VJNpzhvA)
[![slack badge](https://img.shields.io/badge/Slack-Join-blueviolet?logo=slack&amp)](https://join.slack.com/t/lmflow/shared_invite/zt-1wju9nicy-woXbNtS~5MavHSAtiMxmxQ)
[![WeChat badge](https://img.shields.io/badge/WeChat-Join-brightgreen?logo=wechat&amp)](https://ibb.co/ZhM4hhn)

Una caja de herramientas extensible, conveniente y eficiente para ajustar modelos de aprendizaje automático grandes, diseñada para ser fácil de usar, rápida, confiable y accesible para toda la comunidad.


<p align="center" width="100%">
<img src="../docs/assets/features.png" alt="LMFlow-features" style="width: 100%; min-width: 300px; display: block; margin: auto;">
</p>


## Latest News
* [2024-04-25] :rocket: ¡Soporte para plantilla de conversación! Hemos preconfigurado las últimas plantillas de conversación [Llama-3](https://huggingface.co/meta-llama/Meta-Llama-3-70B) y [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct), así como algunas plantillas de conversación frecuentemente utilizadas como `chatml` (ver todas las plantillas [aquí](https://optimalscale.github.io/LMFlow/examples/DATASETS.html#conversation-template)), y estamos trabajando en agregar más plantillas preconfiguradas. ¡Agrega el correspondiente `--conversation_template` en el script de la terminal y estarás listo! :rocket:  
* [2024-03-27] Soporte para [LISA](https://arxiv.org/abs/2403.17919) — ¡Entrenamiento de modelos de 7B en GPU con 24G de memoria sin necesidad de offloading!  
* [2023-09-11] Soporte para [decodificación especulativa](https://arxiv.org/abs/2211.17192), consulta la [guía de uso](https://github.com/OptimalScale/LMFlow/blob/main/scripts/speculative_decoding/README.md) para ver cómo utilizarlo y estadísticas de rendimiento básicas.
* [2023-08-14] Soporte para ampliar la ventana de contexto de LLaMA a través de interpolación de posición (Lineal y Escalado NTK), más información en: [Interpolación de Posición](https://github.com/OptimalScale/LMFlow/blob/main/readme/Position_Interpolation.md).
* [2023-08-07] Soporte para [Flash Attention-2](https://crfm.stanford.edu/2023/07/17/flash2.html), consulta la [guía de uso de Flash Attention](https://github.com/OptimalScale/LMFlow/blob/main/readme/flash_attn2.md) para más detalles.


## Quick Start
### Setup
Nuestro repositorio ha sido probado en Linux (Ubuntu 20.04). Las otras plataformas de sistemas operativos (macOS, Windows) aún no han sido completamente probadas, por lo que pueden surgir algunos errores inesperados. Se recomienda probar primero en Linux/Windows WSL o utilizar Google Colab para experimentar.

Para CUDA 10.3-11.7, se recomienda utilizar `v0.0.5` o versiones anteriores. Para CUDA superior a 11.7, por favor, utilice nuestra rama estable `>= v0.0.6` para una mejor experiencia.
```bash
git clone https://github.com/OptimalScale/LMFlow.git
cd LMFlow
conda create -n lmflow python=3.9 -y
conda activate lmflow
conda install mpi4py
bash install.sh
```

### Prepare Dataset
Por favor, consulta nuestra [documentación oficial (en inglés)](https://optimalscale.github.io/LMFlow/examples/DATASETS.html). La documentación oficial se encuentra actualmente en proceso de traducción, te pedimos paciencia mientras tanto.

### Fine-Tuning (Full)
El ajuste fino completo actualizará todos los parámetros del modelo. A continuación se muestra un ejemplo de ajuste fino completo de GPT-2:

```sh
cd data && ./download.sh alpaca && cd -

./scripts/run_finetune.sh \
  --model_name_or_path gpt2 \
  --dataset_path data/alpaca/train_conversation \
  --output_model_path output_models/finetuned_gpt2
```

> [!TIP]
> Puedes especificar una plantilla de conversación para el conjunto de datos de diálogo agregando el parámetro `--conversation_template`.
>
><details><summary>Ejemplo: Especificar una plantilla de conversación para Llama-3-8B</summary>  
>
>```bash
>cd data && ./download.sh alpaca && cd -
>
>./scripts/run_finetune.sh \
>  --model_name_or_path meta-llama/Meta-Llama-3-8B \
>  --dataset_path data/alpaca/train_conversation \
>  --conversation_template llama3 \
>  --output_model_path output_models/finetuned_llama3_8b
>```
></details>

### Fine-Tuning (LISA)
[LISA](https://arxiv.org/abs/2403.17919) es un algoritmo de ajuste fino que es **eficiente en memoria**, permitiendo un equilibrio entre la memoria y el número de capas descongeladas aleatoriamente. El script siguiente ha sido probado únicamente en **una sola GPU**. ¡Estén atentos a nuestras últimas actualizaciones! :smile:

```sh
cd data && ./download.sh alpaca && cd -

./scripts/run_finetune_with_lisa.sh \
  --model_name_or_path meta-llama/Llama-2-7b-hf \
  --dataset_path data/alpaca/train_conversation \
  --output_model_path output_models/finetuned_llama2_7b \
  --lisa_activated_layers 1 \
  --lisa_interval_steps 20
```

> [!TIP]
> <details><summary>Ejemplo: Especificando el conjunto de datos de conversación para Llama-2-7B</summary>  
> 
>```bash
>cd data && ./download.sh alpaca && cd -
>
>./scripts/run_finetune_with_lisa.sh \
>  --model_name_or_path meta-llama/Llama-2-7b-hf \
>  --dataset_path data/alpaca/train_conversation \
>  --conversation_template llama2 \
>  --output_model_path output_models/finetuned_llama2_7b_lisa \
>  --lisa_activated_layers 1 \
>  --lisa_interval_steps 20
>```
> </details>

### Fine-Tuning (LoRA)
LoRA es un algoritmo de ajuste fino de parámetros que es más eficiente que el ajuste fino completo de parámetros.
```sh
cd data && ./download.sh alpaca && cd -

./scripts/run_finetune_with_lora.sh \
  --model_name_or_path facebook/galactica-1.3b \
  --dataset_path data/alpaca/train_conversation \
  --output_lora_path output_models/finetuned_galactica_lora
```

> [!TIP]
> <details><summary>Ejemplo: Especificando el conjunto de datos de diálogo para Llama-2-7B</summary>  
>
>```bash
>cd data && ./download.sh alpaca && cd -
>
>./scripts/run_finetune_with_lora.sh \
>  --model_name_or_path meta-llama/Llama-2-7b-hf \
>  --dataset_path data/alpaca/train_conversation \
>  --conversation_template llama2 \
>  --output_model_path output_models/finetuned_llama2_7b_lora \
>```
> </details>
>
> <details><summary>Combinando pesos de LoRA</summary>
>
>Puede combinar los pesos de LoRA con el modelo original utilizando el siguiente comando:  
>```sh
>./scripts/run_merge_lora.sh \
>  --model_name_or_path Qwen/Qwen1.5-1.8B \
>  --lora_model_path output_models/lora \
>  --output_model_path output_models/lora_merged \
>```
></details>

### Inference
Después de haber terminado el ajuste fino, puedes entablar una conversación con el modelo usando el siguiente comando.
```sh
./scripts/run_chatbot.sh output_models/finetuned_gpt2
```

### Deployment
Si deseas implementar tu propio modelo localmente, ofrecemos una interfaz de chatbot basada en Gradio.
Para iniciar la demostración de Robin-7b con esta interfaz, utilice los siguientes comandos:
```sh
pip install gradio
python ./examples/chatbot_gradio.py --deepspeed configs/ds_config_chatbot.json --model_name_or_path YOUR-LLAMA  --lora_model_path ./robin-7b --prompt_structure "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: {input_text}###Assistant:"       --end_string "#" --max_new_tokens 200
```

### Evaluation
[LMFlow Benchmark](https://blog.gopenai.com/lmflow-benchmark-an-automatic-evaluation-framework-for-open-source-llms-ef5c6f142418) es un marco de evaluación automática para LLM de código abierto. Utilizamos la Probabilidad Negativa del Logaritmo (NLL) como métrica para evaluar diversos aspectos de los LLM, como el chat casual, el razonamiento común y la capacidad de seguir instrucciones. Le invitamos a utilizar LMFlow Benchmark para evaluar los modelos que tenga disponibles y a participar en nuestra [Comparación de Modelos (LLM comparision)](https://docs.google.com/spreadsheets/d/1JYh4_pxNzmNA9I0YM2epgRA7VXBIeIGS64gPJBg5NHA/edit?usp=sharing).

Tomando como ejemplo el GPT-2 XL, puede comenzar la evaluación con el siguiente comando:
```sh
./scripts/run_benchmark.sh --model_name_or_path gpt2-xl
```
`--model_name_or_path` es un parámetro obligatorio, donde puede ingresar el nombre del modelo de Hugging Face o la ruta local del modelo.
Puede revisar los resultados de la evaluación en `benchmark.log` dentro de `./output_dir/gpt2-xl_lmflow_chat_nll_eval`, `./output_dir/gpt2-xl_all_nll_eval` y `./output_dir/gpt2-xl_commonsense_qa_eval`.


## Supported Features
<details> <summary>Optimización de Ajuste Fino y Memoria</summary>

* LISA: Layerwise Importance Sampling for Memory-Efficient Large Language Model Fine-Tuning
  
  LISA es un algoritmo de ajuste fino de LLM eficiente en memoria. Al seleccionar selectivamente capas para congelar durante el ajuste fino, LISA supera los métodos de ajuste fino existentes (como LoRA). Consulta el [documento](https://arxiv.org/abs/2403.17919) para obtener más información. Puedes utilizar LISA especificando el parámetro `--use_lisa 1` en el comando de entrenamiento. Controla el número de capas activadas con `--lisa_activated_layers 2` y ajusta el intervalo de congelación de capas con `--lisa_step_interval 20`.

* LoRA
  
  LoRA es un algoritmo de ajuste fino eficiente en parámetros que es más eficiente que el ajuste fino de todos los parámetros. Consulta [Ajuste Fino (LoRA)](#fine-tuning-lora) para más detalles.

* FlashAttention
  
  Soportamos FlashAttention-1 y FlashAttention-2. Para más detalles, consulta: [FlashAttention](https://github.com/OptimalScale/LMFlow/blob/main/readme/flash_attn2.md).

* Punto de Control de Gradientes
  
  [Punto de control de gradientes](https://github.com/cybertronai/gradient-checkpointing) es una técnica de optimización de memoria que intercambia cálculos por memoria para reducir el uso de la memoria de la GPU. Puedes utilizarlo agregando `--gradient_checkpointing` al comando de entrenamiento.

* Deepspeed Zero3
  
  LMFlow es compatible con [Deepspeed Zero-3 Offload](https://www.deepspeed.ai/2021/03/07/zero3-offload.html). Proporcionamos un archivo de configuración de deepspeed listo para usar [aquí](https://github.com/OptimalScale/LMFlow/blob/main/configs/ds_config_zero3.json).

</details>


<details> <summary>Aceleración de inferencia</summary>

* Inferencia de CPU LLaMA
  
  ¡Gracias a [llama.cpp](https://github.com/ggerganov/llama.cpp), ahora todos pueden ejecutar su propio LLaMA (cuantificación de 4 bits) en la CPU! Proporcionamos un script para convertir los pesos de LLaMA LoRA en archivos `.pt`, solo necesita usar `convert-pth-to-ggml.py` de llama.cpp para realizar la cuantificación del modelo y así realizar la inferencia de LLaMA en la CPU.

* FlashAttention
  
  Apoyamos FlashAttention-1 y FlashAttention-2. Para más detalles, consulta: [FlashAttention](https://github.com/OptimalScale/LMFlow/blob/main/readme/flash_attn2.md).

</details>


<details> <summary>Texto largo</summary>

* Interpolación de posición del modelo LLaMA (Position Interpolation)
  
  Se admite la extensión del contexto de la ventana LLaMA mediante interpolación de posición (Position Interpolation) (escalamiento lineal y NTK), consulte más detalles en: [Interpolación de posición](https://github.com/OptimalScale/LMFlow/blob/main/readme/Position_Interpolation.md).

</details>


<details> <summary>Personalización del modelo</summary>

* Ampliación del vocabulario
  
  Entrena tu propio tokenizador de SentencePiece y luego combínalo con el tokenizador de Hugging Face que viene con el modelo. Consulta: [Ampliación del vocabulario](https://github.com/OptimalScale/LMFlow/blob/main/scripts/vocab_extension).

</details>


<details> <summary>Multi-modal</summary>

* Chatbot multi-modal
  
  LMFlow admite entradas multi-modales (imágenes, texto). Consulta: [Chatbot multi-modal de LMFlow](https://github.com/OptimalScale/LMFlow/blob/main/scripts/run_vis_chatbot_gradio_minigpt4.sh).

</details>


## Support
Si necesitas ayuda, no dudes en presentar un [problema en Github](https://github.com/OptimalScale/LMFlow/issues).


## License
El código incluido en este proyecto está bajo la licencia Apache 2.0. Si desea utilizar los modelos incluidos en este proyecto para fines comerciales, por favor, póngase en contacto con el desarrollador para obtener autorización.


## Citation
Si encuentras este repositorio útil, por favor considera darle ⭐ y citarlo:

```
@article{diao2023lmflow,
  title={Lmflow: An extensible toolkit for finetuning and inference of large foundation models},
  author={Diao, Shizhe and Pan, Rui and Dong, Hanze and Shum, Ka Shun and Zhang, Jipeng and Xiong, Wei and Zhang, Tong},
  journal={arXiv preprint arXiv:2306.12420},
  year={2023}
}
```
```
@article{dong2023raft,
  title={Raft: Reward ranked finetuning for generative foundation model alignment},
  author={Dong, Hanze and Xiong, Wei and Goyal, Deepanshu and Pan, Rui and Diao, Shizhe and Zhang, Jipeng and Shum, Kashun and Zhang, Tong},
  journal={arXiv preprint arXiv:2304.06767},
  year={2023}
}
```
```
@article{pan2024lisa,
  title={LISA: Layerwise Importance Sampling for Memory-Efficient Large Language Model Fine-Tuning}, 
  author={Pan, Rui and Liu, Xiang and Diao, Shizhe and Pi, Renjie and Zhang, Jipeng and Han, Chi and Zhang, Tong},
  journal={arXiv preprint arXiv:2403.17919},
  year={2024}
}
```


================================================
FILE: docs/readme/README_hindi.md
================================================
<p align="center" width="100%">
<img src="../docs/assets/logo.png" alt="LMFlow" style="width: 100%; min-width: 300px; display: block; margin: auto; background-color: transparent;">
</p>

# LMFlow

<h4 align="center">
    <p>
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/README.md">English</a> |
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/docs/readme/README_zh-hans.md">简体中文</a> |
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/docs/readme/README_es.md">Español</a> |
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/docs/readme/README_jp.md">日本語</a> |
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/docs/readme/README_ko.md">한국어</a> |
        <b>हिंदी</b>
    <p>
</h4>

> [!NOTE]
> This README file was translated by LLM for reference only. Hindi speakers are welcome to submit PRs to polish the document!  

> [!NOTE]
यह चैटजीपीटी द्वारा अनुवादित हिंदी संस्करण है, यदि कोई त्रुटि हो, तो संबंधित योगदानकर्ताओं द्वारा संशोधित किया जा सकता है। इसके साथ ही यदि कोई सामग्री अंग्रेजी संस्करण से भिन्न हो या मेल नहीं खाती हो, तो कृपया अंग्रेजी संस्करण को ही मान्य रखें। धन्यवाद।

[![Website](https://img.shields.io/badge/Website-Demo-20B2AA.svg)](https://lmflow.com)
[![Code License](https://img.shields.io/badge/Code%20License-Apache_2.0-green.svg)](https://github.com/OptimalScale/LMFlow/blob/main/LICENSE)
[![Python 3.9+](https://img.shields.io/badge/Python-3.9+-blue.svg)](https://www.python.org/downloads/release/python-390/)
[![Doc](https://img.shields.io/badge/Website-Doc-ff69b4.svg)](https://optimalscale.github.io/LMFlow/)
[![Embark](https://img.shields.io/badge/Discord-LMFlow-%237289da.svg?logo=discord)](https://discord.gg/u9VJNpzhvA)
[![slack badge](https://img.shields.io/badge/Slack-Join-blueviolet?logo=slack&amp)](https://join.slack.com/t/lmflow/shared_invite/zt-1wju9nicy-woXbNtS~5MavHSAtiMxmxQ)
[![WeChat badge](https://img.shields.io/badge/WeChat-Join-brightgreen?logo=wechat&amp)](https://ibb.co/ZhM4hhn)

एक विस्तारयोग्य, सुविधाजनक और दक्ष टूलबॉक्स जो बड़े मशीन लर्निंग मॉडल को finetune करने के लिए बनाया गया है, जो सभी समुदाय के उपयोगकर्ताओं के लिए उपलब्ध होने के साथ-साथ उपयोगकर्ता मित्रता, गति और विश्वसनीयता के साथ डिजाइन किया गया है।

<p align="center" width="100%">
<img src="../docs/assets/features.png" alt="LMFlow-features" style="width: 100%; min-width: 300px; display: block; margin: auto;">
</p>


## Latest News
* [2024-04-25] :rocket: बातचीत टेम्पलेट का समर्थन! हमने नवीनतम [Llama-3](https://huggingface.co/meta-llama/Meta-Llama-3-70B) और [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct) बातचीत टेम्पलेट को पूर्वनिर्धारित किया है, साथ ही कुछ अक्सर प्रयुक्त टेम्पलेट जैसे `chatml` भी (सभी टेम्पलेट यहाँ देखें [यहाँ](https://optimalscale.github.io/LMFlow/examples/DATASETS.html#conversation-template)), और हम अधिक पूर्वनिर्धारित टेम्पलेट जोड़ने पर काम कर रहे हैं। शैल अनुक्रम में संबंधित `--conversation_template` को शैल अनुक्रम में जोड़ें और आप तैयार हैं! :rocket:  
* [2024-03-27] [LISA](https://arxiv.org/abs/2403.17919) का समर्थन —— 24जीबी जीपीयू पर 7B मॉडल का प्रशिक्षण बिना ऑफलोडिंग के!  
* [2023-09-11] [स्पेक्युलेटिव डिकोडिंग](https://arxiv.org/abs/2211.17192) का समर्थन, इस्तेमाल के तरीके और साधारण प्रदर्शन आँकड़े देखने के लिए [उपयोग गाइड](https://github.com/OptimalScale/LMFlow/blob/main/scripts/speculative_decoding/README.md) पर क्लिक करें।
* [2023-08-14] [पोजीशन इंटरपोलेशन](https://github.com/OptimalScale/LMFlow/blob/main/readme/Position_Interpolation.md) के माध्यम से LLaMA की संदर्भ विंडो को विस्तारित करने का समर्थन (लीनियर और NTK स्केलिंग)।
* [2023-08-07] [फ्लैश एटेंशन-2](https://crfm.stanford.edu/2023/07/17/flash2.html) का समर्थन, अधिक जानकारी के लिए [फ्लैश एटेंशन उपयोग गाइड](https://github.com/OptimalScale/LMFlow/blob/main/readme/flash_attn2.md) देखें।


## Table of Contents
- [LMFlow](#lmflow)
  - [Latest News](#latest-news)
  - [Table of Contents](#table-of-contents)
  - [Quick Start](#quick-start)
    - [Setup](#setup)
    - [Prepare Dataset](#prepare-dataset)
    - [Fine-Tuning (Full)](#fine-tuning-full)
    - [Fine-Tuning (LISA)](#fine-tuning-lisa)
    - [Fine-Tuning (LoRA)](#fine-tuning-lora)
    - [Inference](#inference)
    - [Deployment](#deployment)
    - [Evaluation](#evaluation)
  - [Supported Features](#supported-features)
  - [Support](#support)
  - [License](#license)
  - [Citation](#citation)


## Quick Start
### Setup
हमारे रेपो को Linux (Ubuntu 20.04) पर परीक्षण किया गया है। अन्य ऑपरेटिंग सिस्टम प्लेटफॉर्म (MacOS, Windows) को पूरी तरह से परीक्षण नहीं किया गया है, इसलिए कुछ अपेक्षित त्रुटियों का सामना कर सकता है। Linux/Windows WSL पर प्रयोग करने या Google Colab का उपयोग करके अनुभव करने की सिफारिश की जाती है।

CUDA 10.3-11.7 के लिए, `v0.0.5` या इससे पुराने संस्करणों का उपयोग करने की सिफारिश की जाती है। 11.7 से अधिक CUDA के लिए, बेहतर अनुभव के लिए हमारी स्थिर शाखा `>= v0.0.6` का उपयोग करें।
```bash
git clone https://github.com/OptimalScale/LMFlow.git
cd LMFlow
conda create -n lmflow python=3.9 -y
conda activate lmflow
conda install mpi4py
bash install.sh
```

### Prepare Dataset
आप हमारी [आधिकारिक दस्तावेज़ीकरण (अंग्रेजी में)](https://optimalscale.github.io/LMFlow/examples/DATASETS.html) को देखें। आधिकारिक दस्तावेज़ीकरण अनुवाद के प्रक्रिया में है, कृपया धैर्य रखें।

### Fine-Tuning (Full)
मॉडल को पूर्ण पैरामीटर फ़ाइन ट्यूनिंग करने से सभी पैरामीटर अपडेट होते हैं। GPT-2 का एक पूर्ण पैरामीटर फ़ाइन ट्यूनिंग का उदाहरण निम्नलिखित है:

```sh
cd data && ./download.sh alpaca && cd -

./scripts/run_finetune.sh \
  --model_name_or_path gpt2 \
  --dataset_path data/alpaca/train_conversation \
  --output_model_path output_models/finetuned_gpt2
```

>[!TIP]
>आप बातचीत डेटासेट के लिए बातचीत टेम्पलेट को निर्दिष्ट करने के लिए `--conversation_template` पैरामीटर को जोड़कर कर सकते हैं।
>
><details><summary>उदाहरण: Llama-3-8B के लिए बातचीत डेटासेट टेम्पलेट का निर्दिष्ट करें</summary>
>
>```bash
>cd data && ./download.sh alpaca && cd -
>
>./scripts/run_finetune.sh \
>  --model_name_or_path meta-llama/Meta-Llama-3-8B \
>  --dataset_path data/alpaca/train_conversation \
>  --conversation_template llama3 \
>  --output_model_path output_models/finetuned_llama3_8b
>```
></details>

### Fine-Tuning (LISA)
[LISA](https://arxiv.org/abs/2403.17919) एक **मेमरी-एफिशिएंट (memory-efficient)** फ़ाइन ट्यूनिंग एल्गोरिदम है, जो मेमरी और रैंडम अनफ्रोज़न लेयरों के बीच संतुलन स्थापित करता है। निम्नलिखित स्क्रिप्ट अब **एकल GPU** पर ही टेस्ट किया गया है। हमारे नवीनतम अपडेट पर ध्यान दें! :smile:
```sh
cd data && ./download.sh alpaca && cd -

./scripts/run_finetune_with_lisa.sh \
  --model_name_or_path meta-llama/Llama-2-7b-hf \
  --dataset_path data/alpaca/train_conversation \
  --output_model_path output_models/finetuned_llama2_7b \
  --lisa_activated_layers 1 \
  --lisa_interval_steps 20
```

> [!TIP]
> <details><summary>उदाहरण: Llama-2-7B के लिए बातचीत डेटा सेट टेम्पलेट का निर्दिष्ट करें</summary>  
> 
>```bash
>cd data && ./download.sh alpaca && cd -
>
>./scripts/run_finetune_with_lisa.sh \
>  --model_name_or_path meta-llama/Llama-2-7b-hf \
>  --dataset_path data/alpaca/train_conversation \
>  --conversation_template llama2 \
>  --output_model_path output_models/finetuned_llama2_7b_lisa \
>  --lisa_activated_layers 1 \
>  --lisa_interval_steps 20
>```
> </details>

### Fine-Tuning (LoRA)
LoRA एक पैरामीटर-सुसंगत (parameter-efficient) फाइन-ट्यूनिंग एल्गोरिथ्म है जो पूर्ण-पैरामीटर फाइन-ट्यूनिंग से अधिक दक्ष है।
```sh
cd data && ./download.sh alpaca && cd -

./scripts/run_finetune_with_lora.sh \
  --model_name_or_path facebook/galactica-1.3b \
  --dataset_path data/alpaca/train_conversation \
  --output_lora_path output_models/finetuned_galactica_lora
```

> [!TIP]
> <details><summary>उदाहरण: Llama-2-7B के लिए बातचीत डेटा सेट टेम्पलेट निर्दिष्ट करें</summary>  
> 
>```bash
>cd data && ./download.sh alpaca && cd -
>
>./scripts/run_finetune_with_lora.sh \
>  --model_name_or_path meta-llama/Llama-2-7b-hf \
>  --dataset_path data/alpaca/train_conversation \
>  --conversation_template llama2 \
>  --output_model_path output_models/finetuned_llama2_7b_lora \
>```
> </details>
>
> <details><summary>LoRA वज़न को मिलाना</summary>
>
>निम्नलिखित आदेश का उपयोग करके LoRA वज़न और मूल मॉडल को मिलाया जा सकता है:  
>```sh
>./scripts/run_merge_lora.sh \
>  --model_name_or_path Qwen/Qwen1.5-1.8B \
>  --lora_model_path output_models/lora \
>  --output_model_path output_models/lora_merged \
>```
></details>

### Inference
एक बार फ़ाइन-ट्यूनिंग समाप्त हो जाने पर, आप निम्न आदेशों का उपयोग करके मॉडल के साथ इंटरैक्ट कर सकते हैं।
```sh
./scripts/run_chatbot.sh output_models/finetuned_gpt2
```

### Deployment
यदि आप अपने मॉडल को स्थानीय रूप से डिप्लॉय करना चाहते हैं, तो हम ग्राडियो पर आधारित चैट रोबोट UI प्रदान करते हैं।
निम्नलिखित कमांड robin-7b के डेमो को शुरू कर सकते हैं, कृपया संदर्भ के लिए:
```sh
pip install gradio
python ./examples/chatbot_gradio.py --deepspeed configs/ds_config_chatbot.json --model_name_or_path YOUR-LLAMA  --lora_model_path ./robin-7b --prompt_structure "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: {input_text}###Assistant:"       --end_string "#" --max_new_tokens 200
```

### Evaluation
मुद्रित (छापे) [LMFlow Benchmark](https://blog.gopenai.com/lmflow-benchmark-an-automatic-evaluation-framework-for-open-source-llms-ef5c6f142418) एक स्वत: एकांत मूल्यांकन के लिए एक फ्रेमवर्क है जो ओपन सोर्स एलएलएम के लिए बनाया गया है। हम विभिन्न पहलुओं का मूल्यांकन करने के लिए नेगेटिव लॉग लाइकलीहुड (एनएलएल) का उपयोग करते हैं, जैसे: चिटचट, सामान्य बुद्धिमत्ता और निर्देशों का पालन। आप अपने पास के मॉडल को मूल्यांकन करने के लिए LMFlow Benchmark का उपयोग करने का स्वागत करते हैं, और हमारे [मॉडल तुलना (LLM comparision)](https://docs.google.com/spreadsheets/d/1JYh4_pxNzmNA9I0YM2epgRA7VXBIeIGS64gPJBg5NHA/edit?usp=sharing) में शामिल होने के लिए।

GPT-2 XL को उदाहरण के रूप में, निम्नलिखित आदेश का पालन करके मूल्यांकन शुरू करें:
```sh
./scripts/run_benchmark.sh --model_name_or_path gpt2-xl
```
`--model_name_or_path` एक आवश्यक पैरामीटर है, जिसे हगिंगफेस मॉडल नाम या मॉडल का स्थानीय पथ पास किया जा सकता है। मूल्यांकन परिणामों को देखने के लिए `./output_dir/gpt2-xl_lmflow_chat_nll_eval`, `./output_dir/gpt2-xl_all_nll_eval`, और `./output_dir/gpt2-xl_commonsense_qa_eval` के अंतर्गत `benchmark.log` पर जा सकता है।


## Supported Features
<details> <summary>तेज़ प्रदर्शन और मेमोरी अनुकूलन के लिए फ़ाइन-ट्यूनिंग</summary>

* LISA: Layerwise Importance Sampling for Memory-Efficient Large Language Model Fine-Tuning
  
  LISA एक मेमोरी अनुकूल LLM माइक्रो-ट्यूनिंग एल्गोरिदम है। माइक्रो-ट्यूनिंग प्रक्रिया में लेयर को विशेष रूप से फ्रीज़ करके, LISA मौजूदा माइक्रो-ट्यूनिंग विधियों (जैसे LoRA) से आगे निकलता है। अधिक जानकारी के लिए [पेपर](https://arxiv.org/abs/2403.17919) पर जाएं।
  LISA का उपयोग करने के लिए प्रशिक्षण कमांड में पैरामीटर `--use_lisa 1` निर्दिष्ट किया जा सकता है। सक्रिय किए गए परतों की संख्या को `--lisa_activated_layers 2` द्वारा नियंत्रित किया जा सकता है, और फ्रीज़ की गई परतों के अंतराल को `--lisa_step_interval 20` द्वारा समायोजित किया जा सकता है।

* LoRA
  
  LoRA पैरामीटर-अनुकूल (parameter-efficient) माइक्रो-ट्यूनिंग एल्गोरिदम है, जो पूरे पैरामीटर माइक्रो-ट्यूनिंग से अधिक कुशल है। कृपया देखें: [माइक्रो-ट्यूनिंग (LoRA)](#fine-tuning-lora)।

* FlashAttention

  LMFlow में FlashAttention-1 और नवीनतम FlashAttention-2 दोनों का समर्थन है। अधिक जानकारी के लिए [flash_attention](https://github.com/OptimalScale/LMFlow/blob/main/readme/flash_attn2.md) देखें।

* Gradient Checkpointing

  [ग्रेडिएंट चेकपॉइंटिंग](https://github.com/cybertronai/gradient-checkpointing) एक मेमोरी अनुकूलन तकनीक है जो कंप्यूट को मेमोरी के लिए विनिमय करती है। 
  यह उपयोगी होता है जब मॉडल GPU मेमोरी में फिट करने के लिए बहुत बड़ा हो। 
  इसे आप अपने प्रशिक्षण कमांड में `--gradient_checkpointing` जोड़कर उपयोग करें।

* Deepspeed Zero3

  LMFlow [Deepspeed Zero-3 Offload](https://www.deepspeed.ai/2021/03/07/zero3-offload.html) का समर्थन करता है। 
  हम एक उदाहरण [deepspeed कॉन्फ़िग](https://github.com/OptimalScale/LMFlow/blob/main/configs/ds_config_zero3.json) प्रदान करते हैं, और आप इसे सीधे उपयोग कर सकते हैं।

</details>

<details> <summary>अनुमान त्वरण</summary>

* LLaMA Inference on CPU

  [llama.cpp](https://github.com/ggerganov/llama.cpp) के महान प्रयासों के धन्यवाद। यह सभी के लिए संभव है कि उनके LLaMA मॉडलों को CPU पर 4-बिट क्वांटाइजेशन के साथ चलाया जाए। हम LLaMA LoRA वेट्स को `.pt` फ़ाइलों में रूपांतरित करने के लिए एक स्क्रिप्ट प्रदान करते हैं। आपको केवल llama.cpp में `convert-pth-to-ggml.py` का उपयोग करना होगा ताकि क्वांटाइजेशन किया जा सके।

* FlashAttention

  LMFlow दोनों FlashAttention-1 और नवीनतम FlashAttention-2 का समर्थन करता है। अधिक विवरण के लिए [flash_attention](https://github.com/OptimalScale/LMFlow/blob/main/readme/flash_attn2.md) देखें।

</details>

<details> <summary>लंबा संदर्भ</summary>

* LLaMA मॉडल के लिए स्थिति अंतर्पोलेशन

  अब एलएमफ्लो LMFlow नवीनतम लीनियर और NTK (न्यूरल कर्नेल सिद्धांत) स्केलिंग तकनीकों का समर्थन करता है। अधिक विवरण के लिए [पोज़िशन इंटरपोलेशन](https://github.com/OptimalScale/LMFlow/blob/main/readme/Position_Interpolation.md) देखें।

</details>

<details> <summary>मॉडल कस्टमाइज़ेशन</summary>


* शब्दावली विस्तार

  अब आप अपने खुद के सेंटेंसपीस टोकनाइज़र को प्रशिक्षित कर सकते हैं और इसे मॉडल के मूल hf टोकनाइज़र के साथ मर्ज कर सकते हैं। अधिक विवरण के लिए [vocab_extension](https://github.com/OptimalScale/LMFlow/blob/main/scripts/vocab_extension) देखें।

</details>

<details> <summary>बहुविध</summary>

* Multimodal Chatbot

  एलएमफ्लो में चित्रों और पाठों के बहुसाधारण इनपुट का समर्थन है। हमारे [एलएमफ्लो बहुसाधारण चैटबॉट](https://github.com/OptimalScale/LMFlow/blob/main/scripts/run_vis_chatbot_gradio_minigpt4.sh) की जाँच करें।
  
</details>


## Support
यदि आपको किसी भी मदद की आवश्यकता हो तो, कृपया एक [Github](https://github.com/OptimalScale/LMFlow) इशु प्रस्तुत करें।


## License
इस परियोजना में शामिल कोड [Apache 2.0 लाइसेंस](https://github.com/OptimalScale/LMFlow/blob/main/LICENSE) के तहत लाइसेंस प्राप्त है। इस परियोजना में शामिल कोड और मॉडल का व्यापारिक उद्देश्यों के लिए उपयोग करने की इच्छा हो तो, कृपया योगदानकर्ताओं से संपर्क करें।


## Citation
यदि आपको यह रेपो उपयोगी लगता है, तो कृपया ⭐ देने और उद्धरण करने का विचार करें:

```
@article{diao2023lmflow,
  title={Lmflow: An extensible toolkit for finetuning and inference of large foundation models},
  author={Diao, Shizhe and Pan, Rui and Dong, Hanze and Shum, Ka Shun and Zhang, Jipeng and Xiong, Wei and Zhang, Tong},
  journal={arXiv preprint arXiv:2306.12420},
  year={2023}
}
```
```
@article{dong2023raft,
  title={Raft: Reward ranked finetuning for generative foundation model alignment},
  author={Dong, Hanze and Xiong, Wei and Goyal, Deepanshu and Pan, Rui and Diao, Shizhe and Zhang, Jipeng and Shum, Kashun and Zhang, Tong},
  journal={arXiv preprint arXiv:2304.06767},
  year={2023}
}
```
```
@article{pan2024lisa,
  title={LISA: Layerwise Importance Sampling for Memory-Efficient Large Language Model Fine-Tuning}, 
  author={Pan, Rui and Liu, Xiang and Diao, Shizhe and Pi, Renjie and Zhang, Jipeng and Han, Chi and Zhang, Tong},
  journal={arXiv preprint arXiv:2403.17919},
  year={2024}
}
```


================================================
FILE: docs/readme/README_jp.md
================================================
<p align="center" width="100%">
<img src="../docs/assets/logo.png" alt="LMFlow" style="width: 100%; min-width: 300px; display: block; margin: auto; background-color: transparent;">
</p>

# LMFlow

<h4 align="center">
    <p>
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/README.md">English</a> |
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/docs/readme/README_zh-hans.md">简体中文</a> |
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/docs/readme/README_es.md">Español</a> |
        <b>日本語</b> |
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/docs/readme/README_ko.md">한국어</a> |
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/docs/readme/README_hindi.md">हिंदी</a>
    <p>
</h4>

> [!NOTE]
> This README file was translated by LLM for reference only. Japanese speakers are welcome to submit PRs to polish the document!  

> [!NOTE]  
日本語版はChatGPTによって翻訳されました。もし間違いがあれば、contributorに修正していただけると幸いです。また、英語版と内容に差異がある場合は、英語版を優先してください。

[![Website](https://img.shields.io/badge/Website-Demo-20B2AA.svg)](https://lmflow.com)
[![Code License](https://img.shields.io/badge/Code%20License-Apache_2.0-green.svg)](https://github.com/OptimalScale/LMFlow/blob/main/LICENSE)
[![Python 3.9+](https://img.shields.io/badge/Python-3.9+-blue.svg)](https://www.python.org/downloads/release/python-390/)
[![Doc](https://img.shields.io/badge/Website-Doc-ff69b4.svg)](https://optimalscale.github.io/LMFlow/)
[![Embark](https://img.shields.io/badge/Discord-LMFlow-%237289da.svg?logo=discord)](https://discord.gg/u9VJNpzhvA)
[![slack badge](https://img.shields.io/badge/Slack-Join-blueviolet?logo=slack&amp)](https://join.slack.com/t/lmflow/shared_invite/zt-1wju9nicy-woXbNtS~5MavHSAtiMxmxQ)
[![WeChat badge](https://img.shields.io/badge/WeChat-Join-brightgreen?logo=wechat&amp)](https://ibb.co/ZhM4hhn)

拡張性、利便性、効率性に優れた、大規模な機械学習モデルのファインチューニングに最適なツールボックスで、ユーザーフレンドリーで高速かつ信頼性があり、コミュニティ全体で利用可能な設計です。


<p align="center" width="100%">
<img src="../docs/assets/features.png" alt="LMFlow-features" style="width: 100%; min-width: 300px; display: block; margin: auto;">
</p>


## Latest News
* [2024-04-25] :rocket: 会話テンプレートのサポート!最新の[Llama-3](https://huggingface.co/meta-llama/Meta-Llama-3-70B)と[Phi-3](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct)の会話テンプレートを事前設定しました。また、`chatml`などのよく使用されるテンプレートも用意しています(すべてのテンプレートは[こちら](https://optimalscale.github.io/LMFlow/examples/DATASETS.html#conversation-template)を参照してください)。さらに、追加の事前設定済みテンプレートを追加しています。シェルスクリプトに対応する`--conversation_template`を追加するだけで、準備完了です! :rocket:  
* [2024-03-27] [LISA](https://arxiv.org/abs/2403.17919) に対応 —— オフロード不要、24GのGPUで7Bモデルをトレーニング!  
* [2023-09-11] [スペキュラティブ・デコーディング](https://arxiv.org/abs/2211.17192) をサポート、使用方法や簡単な性能統計については [使用ガイド](https://github.com/OptimalScale/LMFlow/blob/main/scripts/speculative_decoding/README.md) を参照してください。
* [2023-08-14] [位置補間(Linear & NTK scaling)](https://github.com/OptimalScale/LMFlow/blob/main/readme/Position_Interpolation.md) を使用したLLaMAのコンテキストウィンドウを拡張する機能をサポートしています。
* [2023-08-07] [Flash Attention-2](https://crfm.stanford.edu/2023/07/17/flash2.html) をサポートしています。詳細は[Flash Attentionの使用ガイド](https://github.com/OptimalScale/LMFlow/blob/main/readme/flash_attn2.md)を参照してください。


## Table of Contents
- [LMFlow](#lmflow)
  - [Latest News](#latest-news)
  - [Table of Contents](#table-of-contents)
  - [Quick Start](#quick-start)
    - [Setup](#setup)
    - [Prepare Dataset](#prepare-dataset)
    - [Fine-Tuning (Full)](#fine-tuning-full)
    - [Fine-Tuning (LISA)](#fine-tuning-lisa)
    - [Fine-Tuning (LoRA)](#fine-tuning-lora)
    - [Inference](#inference)
    - [Deployment](#deployment)
    - [Evaluation](#evaluation)
  - [Supported Features](#supported-features)
  - [Support](#support)
  - [License](#license)
  - [Citation](#citation)


## Quick Start
### Setup
私たちのリポジトリはすでにLinux(Ubuntu 20.04)で包括的なテストを完了しています。他のオペレーティングシステムプラットフォーム(MacOS、Windows)は完全にテストされていませんので、予期しないエラーが発生する可能性があります。まずLinux/Windows WSLで試してみるか、またはGoogle Colabをご利用ください。
CUDA 10.3-11.7については、`v0.0.5`またはそれ以前のバージョンを使用することをお勧めします。11.7よりも新しいCUDAの場合は、より良い体験を得るために、安定したブランチ`>= v0.0.6`を使用してください。
```bash
git clone https://github.com/OptimalScale/LMFlow.git
cd LMFlow
conda create -n lmflow python=3.9 -y
conda activate lmflow
conda install mpi4py
bash install.sh
```

### Prepare Dataset
当社の[公式ドキュメント(英語版)](https://optimalscale.github.io/LMFlow/examples/DATASETS.html)を参照してください。公式ドキュメントは現在翻訳中ですので、しばらくお待ちください。

### Fine-Tuning (Full)
全パラメーターファインチューニングは、モデルのすべてのパラメーターを更新します。GPT-2の全パラメーターファインチューニングの例を以下に示します:

```sh
cd data && ./download.sh alpaca && cd -

./scripts/run_finetune.sh \
  --model_name_or_path gpt2 \
  --dataset_path data/alpaca/train_conversation \
  --output_model_path output_models/finetuned_gpt2
```

> [!TIP]
> 対話データセットに対話テンプレートを指定するには、`--conversation_template`パラメータを追加します。
> 
> <details><summary>Llama-3-8Bに対話データセットテンプレートを指定する例</summary>  
> 
>```bash
>cd data && ./download.sh alpaca && cd -
>
>./scripts/run_finetune.sh \
>  --model_name_or_path meta-llama/Meta-Llama-3-8B \
>  --dataset_path data/alpaca/train_conversation \
>  --conversation_template llama3 \
>  --output_model_path output_models/finetuned_llama3_8b
>```
> </details>

### Fine-Tuning (LISA)
[LISA](https://arxiv.org/abs/2403.17919) は、**メモリ効率** の高いファインチューニングアルゴリズムであり、メモリとランダムに解凍された層の間でのバランスを取ることができます。以下のスクリプトは現在、**単一のGPU** 上でのみテストされています。最新情報にご注意ください! :smile:
```sh
cd data && ./download.sh alpaca && cd -

./scripts/run_finetune_with_lisa.sh \
  --model_name_or_path meta-llama/Llama-2-7b-hf \
  --dataset_path data/alpaca/train_conversation \
  --output_model_path output_models/finetuned_llama2_7b \
  --lisa_activated_layers 1 \
  --lisa_interval_steps 20
```

> [!TIP]
> <details><summary>例: Llama-2-7Bの対話データセットテンプレートの指定</summary>  
> 
>```bash
>cd data && ./download.sh alpaca && cd -
>
>./scripts/run_finetune_with_lisa.sh \
>  --model_name_or_path meta-llama/Llama-2-7b-hf \
>  --dataset_path data/alpaca/train_conversation \
>  --conversation_template llama2 \
>  --output_model_path output_models/finetuned_llama2_7b_lisa \
>  --lisa_activated_layers 1 \
>  --lisa_interval_steps 20
>```
> </details> 

### Fine-Tuning (LoRA)
LoRAは、全パラメータ微調整よりも効率的なパラメータ効率微調整アルゴリズムです。
```sh
cd data && ./download.sh alpaca && cd -

./scripts/run_finetune_with_lora.sh \
  --model_name_or_path facebook/galactica-1.3b \
  --dataset_path data/alpaca/train_conversation \
  --output_lora_path output_models/finetuned_galactica_lora
```

> [!TIP]
> <details><summary>例:Llama-2-7Bに対する対話データセットのテンプレートを指定する</summary>  
>
>```bash
>cd data && ./download.sh alpaca && cd -
>
>./scripts/run_finetune_with_lora.sh \
>  --model_name_or_path meta-llama/Llama-2-7b-hf \
>  --dataset_path data/alpaca/train_conversation \
>  --conversation_template llama2 \
>  --output_model_path output_models/finetuned_llama2_7b_lora \
>```
></details>
>
><details><summary>LoRA重みの結合</summary>
>
>以下のコマンドを使用して、LoRAの重みと元のモデルを結合できます:  
>```sh
>./scripts/run_merge_lora.sh \
>  --model_name_or_path Qwen/Qwen1.5-1.8B \
>  --lora_model_path output_models/lora \
>  --output_model_path output_models/lora_merged \
>```
></details>

### Inference
微調が終了したら、以下のコマンドを使用してモデルと対話できます。
```sh
./scripts/run_chatbot.sh output_models/finetuned_gpt2
```

### Deployment
ローカルでモデルを展開したい場合、GradioをベースにしたチャットボットUIが提供されています。
以下のコマンドでrobin-7bのデモを起動できます。詳細は次のとおりです:
```sh
pip install gradio
python ./examples/chatbot_gradio.py --deepspeed configs/ds_config_chatbot.json --model_name_or_path YOUR-LLAMA  --lora_model_path ./robin-7b --prompt_structure "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: {input_text}###Assistant:"       --end_string "#" --max_new_tokens 200
```

### Evaluation
[LMFlow Benchmark](https://blog.gopenai.com/lmflow-benchmark-an-automatic-evaluation-framework-for-open-source-llms-ef5c6f142418) はオープンソースLLMの自動評価フレームワークです。我々はNegative Log Likelihood (NLL) を使用して、LLMのチャット、一般的な推論、および命令に従う能力など、さまざまな側面を評価します。お手持ちのモデルを評価するために、LMFlow Benchmarkをご利用ください。そして、[モデルの比較](https://docs.google.com/spreadsheets/d/1JYh4_pxNzmNA9I0YM2epgRA7VXBIeIGS64gPJBg5NHA/edit?usp=sharing)にご参加ください。

GPT-2 XLを例に挙げますと、次のコマンドを使用して評価を開始します:
```sh
./scripts/run_benchmark.sh --model_name_or_path gpt2-xl
```
`--model_name_or_path`は必須のパラメータであり、Hugging Faceのモデル名またはモデルのローカルパスを渡すことができます。
評価結果は、`./output_dir/gpt2-xl_lmflow_chat_nll_eval`、`./output_dir/gpt2-xl_all_nll_eval`、および `./output_dir/gpt2-xl_commonsense_qa_eval`の`benchmark.log`で確認できます。


## Supported Features
<details> <summary>微調加速&メモリ最適化</summary>

* LISA: Layerwise Importance Sampling for Memory-Efficient Large Language Model Fine-Tuning

  LISAはメモリ効率の高いLLMファインチューニングアルゴリズムです。微調整プロセス中に層を選択的に凍結することにより、LISAは既存のファインチューニング方法(LoRAなど)を超えています。詳細については[論文](https://arxiv.org/abs/2403.17919)をご覧ください。
  LISAを使用するには、トレーニングコマンドでパラメータ `--use_lisa 1` を指定します。アクティブ化される層の数を `--lisa_activated_layers 2` で制御し、フリーズされる層の間隔を `--lisa_step_interval 20` で調整できます。

* LoRA

  LoRAは、全パラメータ微調整よりも効率的なパラメータ効率(parameter-efficient)の微調整アルゴリズムです。詳細はこちらを参照してください:[微調(LoRA)](#fine-tuning-lora)。

* FlashAttention

  FlashAttention-1とFlashAttention-2をサポートしています。詳細については[FlashAttention](https://github.com/OptimalScale/LMFlow/blob/main/readme/flash_attn2.md)をご覧ください。

* Gradient Checkpointing

  [Gradient checkpointing](https://github.com/cybertronai/gradient-checkpointing)は、メモリ最適化技術の一種であり、計算をメモリとの交換により显存の使用量を削減します。トレーニングコマンドに `--gradient_checkpointing` を追加すると使用できます。

* Deepspeed Zero3

  LMFlowは[Deepspeed Zero-3 Offload](https://www.deepspeed.ai/2021/03/07/zero3-offload.html)をサポートしています。我々は使いやすい [deepspeed設定ファイル](https://github.com/OptimalScale/LMFlow/blob/main/configs/ds_config_zero3.json) を提供しています。

</details>


<details> <summary>推論の高速化</summary>

* LLaMA CPU推論
  
  [llama.cpp](https://github.com/ggerganov/llama.cpp)に感謝します。これにより、誰もがCPU上で自分のLLaMA(4ビット量子化)を実行できるようになりました!LLaMA LoRA重みを`.pt`ファイルに変換するスクリプトを提供しており、`convert-pth-to-ggml.py`を使用してモデルを量子化するだけで、LLaMA CPU推論を行うことができます。

* FlashAttention
  
  FlashAttention-1とFlashAttention-2をサポートしています。詳細はこちらをご覧ください:[FlashAttention](https://github.com/OptimalScale/LMFlow/blob/main/readme/flash_attn2.md)。

</details>


<details> <summary>長文</summary>

* LLaMAモデルの位置補間(Position Interpolation)

  位置補間(Linear & NTK scaling)を使用してLLaMAのコンテキストウィンドウを拡張することができます。詳細はこちら:[位置補間](https://github.com/OptimalScale/LMFlow/blob/main/readme/Position_Interpolation.md)。

</details>


<details> <summary>モデルのカスタマイズ</summary>

* 語彙の拡張

  独自のsentencepiece tokenizerをトレーニングし、それをモデルに含まれるhuggingface tokenizerとマージします。詳細はこちら:[語彙の拡張](https://github.com/OptimalScale/LMFlow/blob/main/scripts/vocab_extension)。

</details>


<details> <summary>マルチモーダル</summary>

* マルチモーダルチャットボット

  LMFlowはマルチモーダル(画像、テキスト)入力をサポートしています。詳細はこちら:[LMFlowマルチモーダルチャットボット](https://github.com/OptimalScale/LMFlow/blob/main/scripts/run_vis_chatbot_gradio_minigpt4.sh)。

</details>


## Support
何かお困りのことがございましたら、[GitHub](https://github.com/OptimalScale/LMFlow)のissueにご投稿ください。


## License
このプロジェクトに含まれるコードはApache 2.0ライセンスで提供されています。このプロジェクトに含まれるモデルを商業目的で使用したい場合は、プロジェクトの開発者に連絡して許可を取得してください。


## Citation
もしこのリポジトリが役立った場合は、ぜひ⭐をつけて引用してください。

```
@article{diao2023lmflow,
  title={Lmflow: An extensible toolkit for finetuning and inference of large foundation models},
  author={Diao, Shizhe and Pan, Rui and Dong, Hanze and Shum, Ka Shun and Zhang, Jipeng and Xiong, Wei and Zhang, Tong},
  journal={arXiv preprint arXiv:2306.12420},
  year={2023}
}
```
```
@article{dong2023raft,
  title={Raft: Reward ranked finetuning for generative foundation model alignment},
  author={Dong, Hanze and Xiong, Wei and Goyal, Deepanshu and Pan, Rui and Diao, Shizhe and Zhang, Jipeng and Shum, Kashun and Zhang, Tong},
  journal={arXiv preprint arXiv:2304.06767},
  year={2023}
}
```
```
@article{pan2024lisa,
  title={LISA: Layerwise Importance Sampling for Memory-Efficient Large Language Model Fine-Tuning}, 
  author={Pan, Rui and Liu, Xiang and Diao, Shizhe and Pi, Renjie and Zhang, Jipeng and Han, Chi and Zhang, Tong},
  journal={arXiv preprint arXiv:2403.17919},
  year={2024}
}
```


================================================
FILE: docs/readme/README_ko.md
================================================
<p align="center" width="100%">
<img src="../docs/assets/logo.png" alt="LMFlow" style="width: 100%; min-width: 300px; display: block; margin: auto; background-color: transparent;">
</p>

# LMFlow

<h4 align="center">
    <p>
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/README.md">English</a> |
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/docs/readme/README_zh-hans.md">简体中文</a> |
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/docs/readme/README_es.md">Español</a> |
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/docs/readme/README_jp.md">日本語</a> |
        <b>한국어</b> |
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/docs/readme/README_hindi.md">हिंदी</a>
    <p>
</h4>

> [!NOTE]
> The Korean README file was translated by LLM for reference only. Korean speakers are welcome to submit a PR to polish the document!  

> [!NOTE]  
> 한국어 README 파일은 참고용으로 LLM에 의해 번역되었습니다. 한국어 사용자들은 문서를 개선하기 위해 PR을 제출할 것을 환영합니다!  

[![Website](https://img.shields.io/badge/Website-Demo-20B2AA.svg)](https://lmflow.com)
[![Code License](https://img.shields.io/badge/Code%20License-Apache_2.0-green.svg)](https://github.com/OptimalScale/LMFlow/blob/main/LICENSE)
[![Python 3.9+](https://img.shields.io/badge/Python-3.9+-blue.svg)](https://www.python.org/downloads/release/python-390/)
[![Doc](https://img.shields.io/badge/Website-Doc-ff69b4.svg)](https://optimalscale.github.io/LMFlow/)
[![Embark](https://img.shields.io/badge/Discord-LMFlow-%237289da.svg?logo=discord)](https://discord.gg/u9VJNpzhvA)
[![slack badge](https://img.shields.io/badge/Slack-Join-blueviolet?logo=slack&amp)](https://join.slack.com/t/lmflow/shared_invite/zt-1wju9nicy-woXbNtS~5MavHSAtiMxmxQ)
[![WeChat badge](https://img.shields.io/badge/WeChat-Join-brightgreen?logo=wechat&amp)](https://ibb.co/ZhM4hhn)

다음은 사용자 친화적이고 빠르며 신뢰할 수 있으며 커뮤니티 전체에 액세스할 수 있도록 설계된 대규모 기계 학습 모델을 미세 조정하는 데 유용한 확장 가능하고 편리하며 효율적인 도구 상자입니다.

<p align="center" width="100%">
<img src="../docs/assets/features.png" alt="LMFlow-features" style="width: 100%; min-width: 300px; display: block; margin: auto;">
</p>


## Latest News
* [2024-04-25] :rocket: 대화 템플릿을 지원합니다! 최신 [Llama-3](https://huggingface.co/meta-llama/Meta-Llama-3-70B) 및 [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct) 대화 템플릿과 `chatml`과 같은 자주 사용되는 템플릿을 미리 설정해 두었습니다 ([여기](https://optimalscale.github.io/LMFlow/examples/DATASETS.html#conversation-template)에서 모든 템플릿을 확인하세요). 더 많은 미리 설정된 템플릿을 추가하는 작업 중에 있습니다. 셸 스크립트에 해당하는 `--conversation_template`를 추가하면 됩니다! :rocket:
* [2024-03-27] [LISA](https://arxiv.org/abs/2403.17919)를 지원합니다. 메모리를 비우지 않고도 24G 메모리에서 7B 훈련이 가능합니다!  
* [2023-09-11] [추론적 디코딩 (speculative decoding)](https://arxiv.org/abs/2211.17192)을 지원합니다. 사용법 및 가속화 세부 정보는 [speculative_decoding](https://github.com/OptimalScale/LMFlow/blob/main/scripts/speculative_decoding/README.md) 를 확인하세요.
* [2023-08-14] LLaMA 모델에 대한 위치 보간(선형 및 NTK 스케일링)을 사용하여 긴 문맥 추론을 지원합니다. 자세한 내용은 [Postion Interpolation](https://github.com/OptimalScale/LMFlow/blob/main/readme/Position_Interpolation.md) 를 확인하세요.
* [2023-08-07] [Flash Attention-2](https://crfm.stanford.edu/2023/07/17/flash2.html)를 지원합니다. 자세한 내용은 [Flash Attention](https://github.com/OptimalScale/LMFlow/blob/main/readme/flash_attn2.md) 를 확인하세요.


## Table of Contents
- [LMFlow](#lmflow)
  - [Latest News](#latest-news)
  - [Table of Contents](#table-of-contents)
  - [Quick Start](#quick-start)
    - [Setup](#setup)
    - [Prepare Dataset](#prepare-dataset)
    - [Fine-Tuning (Full)](#fine-tuning-full)
    - [Fine-Tuning (LISA)](#fine-tuning-lisa)
    - [Fine-Tuning (LoRA)](#fine-tuning-lora)
    - [Inference](#inference)
    - [Deployment](#deployment)
    - [Evaluation](#evaluation)
  - [Supported Features](#supported-features)
  - [Support](#support)
  - [License](#license)
  - [Citation](#citation)


## Quick Start
### Setup
저희의 Repo는 이미 리눅스 (우분투 20.04)에서 완전한 테스트가 이루어졌습니다. 다른 운영 체제 플랫폼 (맥OS, 윈도우)은 아직 완전히 테스트되지 않았으므로 예상치 못한 오류가 발생할 수 있습니다. 먼저 리눅스/윈도우 WSL에서 사용해보거나 Google Colab을 사용하는 것을 권장합니다.
CUDA 10.3-11.7에 대해서는 `v0.0.5` 및 그 이전 버전을 사용하는 것이 좋습니다. 11.7보다 큰 CUDA의 경우, 더 나은 경험을 위해 우리의 stable 브랜치인 `>= v0.0.6` 을 사용하십시오.
```bash
git clone https://github.com/OptimalScale/LMFlow.git
cd LMFlow
conda create -n lmflow python=3.9 -y
conda activate lmflow
conda install mpi4py
bash install.sh
```

### Prepare Dataset
저희의 [공식 문서(영문)](https://optimalscale.github.io/LMFlow/examples/DATASETS.html) 를 참고해 주세요. 공식 문서는 현재 번역 중이며, 조금만 기다려 주시기 바랍니다.

### Fine-Tuning (Full)
전체 매개변수 파인 튜닝은 모델의 모든 매개변수를 업데이트합니다. GPT-2의 전체 매개변수 파인 튜닝의 예시는 아래와 같습니다:

```sh
cd data && ./download.sh alpaca && cd -

./scripts/run_finetune.sh \
  --model_name_or_path gpt2 \
  --dataset_path data/alpaca/train_conversation \
  --output_model_path output_models/finetuned_gpt2
```

> [!TIP]
> 대화 데이터셋에 대화 템플릿을 지정하려면 `--conversation_template` 매개변수를 추가할 수 있습니다.
> 
> <details><summary>예시: Llama-3-8B에 대화 데이터셋 템플릿 지정</summary>  
> 
>```bash
>cd data && ./download.sh alpaca && cd -
>
>./scripts/run_finetune.sh \
>  --model_name_or_path meta-llama/Meta-Llama-3-8B \
>  --dataset_path data/alpaca/train_conversation \
>  --conversation_template llama3 \
>  --output_model_path output_models/finetuned_llama3_8b
>```
> </details>

### Fine-Tuning (LISA)
[LISA](https://arxiv.org/abs/2403.17919) 는 **메모리 효율적인(memory-efficient)** 파인 튜닝 알고리즘이며, 메모리와 무작위로 해동하는 레이어 수 사이의 균형을 가능하게 합니다. 아래 스크립트는 현재 **단일 GPU** 에서만 테스트되었습니다. 최신 업데이트에 주목해 주세요! :smile:
```sh
cd data && ./download.sh alpaca && cd -

./scripts/run_finetune_with_lisa.sh \
  --model_name_or_path meta-llama/Llama-2-7b-hf \
  --dataset_path data/alpaca/train_conversation \
  --output_model_path output_models/finetuned_llama2_7b \
  --lisa_activated_layers 1 \
  --lisa_interval_steps 20
```

> [!TIP]
> <details><summary>예시: Llama-2-7B 대화 데이터셋 템플릿 지정</summary>  
>
>```bash
>cd data && ./download.sh alpaca && cd -
>
>./scripts/run_finetune_with_lisa.sh \
>  --model_name_or_path meta-llama/Llama-2-7b-hf \
>  --dataset_path data/alpaca/train_conversation \
>  --conversation_template llama2 \
>  --output_model_path output_models/finetuned_llama2_7b_lisa \
>  --lisa_activated_layers 1 \
>  --lisa_interval_steps 20
>```
></details>

### Fine-Tuning (LoRA)
LoRA는 전체 매개변수 미세 조정보다 더 효율적인 매개변수 효율적인 미세 조정 알고리즘입니다.
```sh
cd data && ./download.sh alpaca && cd -

./scripts/run_finetune_with_lora.sh \
  --model_name_or_path facebook/galactica-1.3b \
  --dataset_path data/alpaca/train_conversation \
  --output_lora_path output_models/finetuned_galactica_lora
```

> [!TIP]
> <details><summary>예시: Llama-2-7B 대화 데이터셋 템플릿 지정</summary>  
> 
>```bash
>cd data && ./download.sh alpaca && cd -
>
>./scripts/run_finetune_with_lora.sh \
>  --model_name_or_path meta-llama/Llama-2-7b-hf \
>  --dataset_path data/alpaca/train_conversation \
>  --conversation_template llama2 \
>  --output_model_path output_models/finetuned_llama2_7b_lora \
>```
> </details>
>
> <details><summary>LoRA 가중치 병합</summary>
>
>아래 명령어를 사용하여 LoRA 가중치를 원본 모델과 병합할 수 있습니다:  
>```sh
>./scripts/run_merge_lora.sh \
>  --model_name_or_path Qwen/Qwen1.5-1.8B \
>  --lora_model_path output_models/lora \
>  --output_model_path output_models/lora_merged \
>```
></details>

### Inference
미세 조정이 완료된 후에는 다음 명령을 사용하여 모델과 대화할 수 있습니다.
```sh
./scripts/run_chatbot.sh output_models/finetuned_gpt2
```

### Deployment
지역에 모델을 배포하려는 경우, Gradio 기반의 챗봇 UI를 제공합니다. Robin-7b의 데모를 시작하려면 다음 명령을 참고하세요:
```sh
pip install gradio
python ./examples/chatbot_gradio.py --deepspeed configs/ds_config_chatbot.json --model_name_or_path YOUR-LLAMA  --lora_model_path ./robin-7b --prompt_structure "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: {input_text}###Assistant:"       --end_string "#" --max_new_tokens 200
```

### Evaluation
[LMFlow Benchmark](https://blog.gopenai.com/lmflow-benchmark-an-automatic-evaluation-framework-for-open-source-llms-ef5c6f142418) 은(는) 오픈 소스 LLM을 자동으로 평가하기 위한 프레임워크입니다. 우리는 Negative Log Likelihood (NLL)을 평가 지표로 사용하여 대화, 상식 추론 및 지시 따름 능력과 같은 LLM의 여러 측면을 평가합니다. LMFlow Benchmark를 사용하여 손에 있는 모델을 평가하고 [모델 비교 (LLM Comparision)](https://docs.google.com/spreadsheets/d/1JYh4_pxNzmNA9I0YM2epgRA7VXBIeIGS64gPJBg5NHA/edit?usp=sharing)에 참여하십시오.

GPT-2 XL을 예로 들면 다음 명령으로 평가를 시작할 수 있습니다:
```sh
./scripts/run_benchmark.sh --model_name_or_path gpt2-xl
```
`--model_name_or_path`은 필수 입력 항목이며, huggingface 모델 이름 또는 모델의 로컬 경로를 전달할 수 있습니다. `./output_dir/gpt2-xl_lmflow_chat_nll_eval`, `./output_dir/gpt2-xl_all_nll_eval`, 그리고 `./output_dir/gpt2-xl_commonsense_qa_eval` 폴더 내의 `benchmark.log`를 통해 평가 결과를 확인할 수 있습니다.


## Supported Features
<details> <summary>미세 조정 가속 & 메모리 최적화</summary>

* LISA: 메모리 효율적인 대규모 언어 모델 미세 조정을 위한 레이어별 중요도 샘플링

  LISA는 메모리 효율적인 LLM 미세 조정 알고리즘이다. 미세 조정 과정에서 층을 선택적으로 고정함으로써, LISA는 LoRA와 같은 기존의 미세 조정 방법을 뛰어넘는다. 자세한 내용은 [논문](https://arxiv.org/abs/2403.17919)을 참조하십시오.
  훈련 명령어에 `--use_lisa 1` 매개변수를 지정하여 LISA를 사용할 수 있습니다. 활성화된 층의 수는 `--lisa_activated_layers 2`로 제어되며, 고정된 층의 간격은 `--lisa_step_interval 20`으로 조정할 수 있습니다.

* LoRA

  LoRA는 전체 파라미터 튜닝보다 효율적인 파라미터 효율적인(feasible-efficient) 튜닝 알고리즘입니다. 자세한 내용은 [Fine-tuning (LoRA)](#Fine-tuning-LoRA)를 참조하십시오.

* FlashAttention

  FlashAttention-1 및 FlashAttention-2를 지원합니다. 자세한 내용은 [FlashAttention](https://github.com/OptimalScale/LMFlow/blob/main/readme/flash_attn2.md)를 참조하십시오.

* Gradient Checkpointing

  [Gradient checkpointing](https://github.com/cybertronai/gradient-checkpointing)은 메모리 최적화 기술로, 핵심 아이디어는 메모리 점유를 줄이기 위해 계산을 메모리와 교환하는 것입니다. 훈련 명령에 `--gradient_checkpointing`을 추가하여 사용할 수 있습니다.

* Deepspeed Zero3

  LMFlow는 [Deepspeed Zero-3 Offload](https://www.deepspeed.ai/2021/03/07/zero3-offload.html)를 지원합니다. 사용 가능한 [deepspeed 설정 파일](https://github.com/OptimalScale/LMFlow/blob/main/configs/ds_config_zero3.json)을 제공합니다.

</details>


<details> <summary>추론 가속화</summary>

* LLaMA CPU 추론
  
  [llama.cpp](https://github.com/ggerganov/llama.cpp)에 감사드립니다. 이제 모든 사람이 CPU에서 자신의 LLaMA(4-bit 양자화)를 실행할 수 있습니다! 우리는 LLaMA LoRA 가중치를 `.pt` 파일로 변환하는 스크립트를 제공하며, llama.cpp의 `convert-pth-to-ggml.py`를 사용하여 모델 양자화를 수행하여 LLaMA CPU 추론을 진행할 수 있습니다.

* FlashAttention

  FlashAttention-1 및 FlashAttention-2를 지원합니다. 자세한 내용은 [FlashAttention](https://github.com/OptimalScale/LMFlow/blob/main/readme/flash_attn2.md)를 참조하십시오.

</details>


<details> <summary>긴 텍스트</summary>

* LLaMA 모델의 위치 보간 (Position Interpolation)
  
  위치 보간 (Linear & NTK scaling을 통한)을 지원하여 LLaMA의 컨텍스트 창을 확장합니다. 자세한 내용은 여기를 참조하세요: [위치 보간](https://github.com/OptimalScale/LMFlow/blob/main/readme/Position_Interpolation.md)。

</details>


<details> <summary>모델 커스터마이징</summary>

* 어휘 확장
  
  자체 sentencepiece tokenizer를 학습한 다음 모델에 내장된 huggingface tokenizer와 결합하세요! 자세한 내용은 여기를 참조하세요: [어휘 확장](https://github.com/OptimalScale/LMFlow/blob/main/scripts/vocab_extension)。

</details>


<details> <summary>다중 모달</summary>

* 다중 모달 챗봇
  
  LMFlow는 다중 모달 (이미지, 텍스트) 입력을 지원합니다. 자세한 내용은 여기를 참조하세요: [LMFlow 다중 모달 챗봇](https://github.com/OptimalScale/LMFlow/blob/main/scripts/run_vis_chatbot_gradio_minigpt4.sh)。

</details>


## Support
도움이 필요하면 공식 [깃 허브 레포지토리](https://github.com/OptimalScale/LMFlow)에 이슈를 생성해주세요.


## License
이 프로젝트에 포함된 코드는 Apache 2.0 라이센스를 사용합니다. 이 프로젝트에 포함된 모델을 상업적 용도로 사용하려는 경우, 프로젝트 개발자에게 허가를 요청하십시오. 


## Citation
이 repository를 유용하게 사용하셨다면 ⭐을 눌러주시고 다음을 통해 인용해주시면 감사하겠습니다. [arXiv](https://arxiv.org/abs/2306.12420)

```
@article{diao2023lmflow,
  title={Lmflow: An extensible toolkit for finetuning and inference of large foundation models},
  author={Diao, Shizhe and Pan, Rui and Dong, Hanze and Shum, Ka Shun and Zhang, Jipeng and Xiong, Wei and Zhang, Tong},
  journal={arXiv preprint arXiv:2306.12420},
  year={2023}
}
```
```
@article{dong2023raft,
  title={Raft: Reward ranked finetuning for generative foundation model alignment},
  author={Dong, Hanze and Xiong, Wei and Goyal, Deepanshu and Pan, Rui and Diao, Shizhe and Zhang, Jipeng and Shum, Kashun and Zhang, Tong},
  journal={arXiv preprint arXiv:2304.06767},
  year={2023}
}
```
```
@article{pan2024lisa,
  title={LISA: Layerwise Importance Sampling for Memory-Efficient Large Language Model Fine-Tuning}, 
  author={Pan, Rui and Liu, Xiang and Diao, Shizhe and Pi, Renjie and Zhang, Jipeng and Han, Chi and Zhang, Tong},
  journal={arXiv preprint arXiv:2403.17919},
  year={2024}
}
```


================================================
FILE: docs/readme/README_zh-hans.md
================================================
<p align="center" width="100%">
<img src="../docs/assets/logo.png" alt="LMFlow" style="width: 100%; min-width: 300px; display: block; margin: auto; background-color: transparent;">
</p>

# LMFlow

<h4 align="center">
    <p>
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/README.md">English</a> |
        <b>简体中文</b> |
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/docs/readme/README_es.md">Español</a> |
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/docs/readme/README_jp.md">日本語</a> |
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/docs/readme/README_ko.md">한국어</a> |
        <a href="https://github.com/OptimalScale/LMFlow/blob/main/docs/readme/README_hindi.md">हिंदी</a>
    <p>
</h4>

[![Website](https://img.shields.io/badge/Website-Demo-20B2AA.svg)](https://lmflow.com)
[![Code License](https://img.shields.io/badge/Code%20License-Apache_2.0-green.svg)](https://github.com/OptimalScale/LMFlow/blob/main/LICENSE)
[![Python 3.9+](https://img.shields.io/badge/Python-3.9+-blue.svg)](https://www.python.org/downloads/release/python-390/)
[![Doc](https://img.shields.io/badge/Website-Doc-ff69b4.svg)](https://optimalscale.github.io/LMFlow/)
[![Embark](https://img.shields.io/badge/Discord-LMFlow-%237289da.svg?logo=discord)](https://discord.gg/u9VJNpzhvA)
[![slack badge](https://img.shields.io/badge/Slack-Join-blueviolet?logo=slack&amp)](https://join.slack.com/t/lmflow/shared_invite/zt-1wju9nicy-woXbNtS~5MavHSAtiMxmxQ)
[![WeChat badge](https://img.shields.io/badge/WeChat-Join-brightgreen?logo=wechat&amp)](https://ibb.co/ZhM4hhn)

一个可扩展、方便和高效的工具箱,用于微调大型机器学习模型。我们的目标是开发一套用户友好、快速可靠,并对整个社区开放的全流程微调代码库。

<p align="center" width="100%">
<img src="../docs/assets/features.png" alt="LMFlow-features" style="width: 100%; min-width: 300px; display: block; margin: auto;">
</p>


## 新闻
* [2024-04-25] :rocket: 支持多轮对话数据格式以及对话模板!我们已经添加了近期热门模型 [Llama-3](https://huggingface.co/meta-llama/Meta-Llama-3-70B) 和 [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct)的对应模板,也提供了一些如`chatml`等常用的模板([这里](https://optimalscale.github.io/LMFlow/examples/DATASETS.html#conversation-template)查看所有已经预设的模板),更多模板正在添加中。在微调shell脚本里指定对应的`--conversation_template`试试吧! :rocket:  
* [2024-03-27] 支持 [LISA](https://arxiv.org/abs/2403.17919) —— 无需offloading,在24G显存的GPU上训练7B模型!  
* [2023-09-11] 支持 [投机解码(speculative decoding)](https://arxiv.org/abs/2211.17192), 点击 [使用指南](https://github.com/OptimalScale/LMFlow/blob/main/scripts/speculative_decoding/README.md) 查看使用方法和简单的性能统计。
* [2023-08-14] 支持通过位置插值(Postion Interpolation)(Linear & NTK scaling)扩展LLaMA的上下文窗口,查看详情:[位置插值](https://github.com/OptimalScale/LMFlow/blob/main/readme/Position_Interpolation.md)。
* [2023-08-07] 支持 [Flash Attention-2](https://crfm.stanford.edu/2023/07/17/flash2.html),查看详情:[Flash Attention使用指南](https://github.com/OptimalScale/LMFlow/blob/main/readme/flash_attn2.md)。
* [2023-08-02] 支持 [Llama2](https://ai.meta.com/llama/),[ChatGLM2](https://huggingface.co/THUDM/chatglm2-6b),[Baichuan](https://huggingface.co/baichuan-inc/Baichuan-7B)。


## 目录
- [LMFlow](#lmflow)
  - [新闻](#新闻)
  - [目录](#目录)
  - [快速上手](#快速上手)
    - [安装](#安装)
    - [准备数据集](#准备数据集)
    - [微调(全参数)](#微调全参数)
    - [微调(LISA)](#微调lisa)
    - [微调(LoRA)](#微调lora)
    - [推理](#推理)
    - [部署](#部署)
    - [评测](#评测)
  - [支持功能](#支持功能)
  - [需要帮助?](#需要帮助)
  - [协议](#协议)
  - [引用](#引用)


## 快速上手
### 安装
我们的Repo已经在Linux(Ubuntu 20.04)上进行了测试。其他操作系统平台(MacOS、Windows)尚未完全测试,因此可能会遇到一些预期外的错误。建议先在Linux/Windows WSL上尝试使用,或者使用Google Colab来体验。

对于CUDA 10.3-11.7,建议使用`v0.0.5`及更早版本。对于大于11.7的CUDA,请使用我们的稳定分支`>= v0.0.6`以获得更好的体验。
```bash
git clone https://github.com/OptimalScale/LMFlow.git
cd LMFlow
conda create -n lmflow python=3.9 -y
conda activate lmflow
conda install mpi4py
bash install.sh
```

### 准备数据集
请参考我们的 [官方文档(英文版)](https://optimalscale.github.io/LMFlow/examples/DATASETS.html)。官方文档正在汉化中,请耐心等待。

### 微调(全参数)
全参数微调将更新模型的所有参数。全参数微调GPT-2的示例如下:

```sh
cd data && ./download.sh alpaca && cd -

./scripts/run_finetune.sh \
  --model_name_or_path gpt2 \
  --dataset_path data/alpaca/train_conversation \
  --output_model_path output_models/finetuned_gpt2
```

> [!TIP]
> 可以通过添加`--conversation_template`参数为对话数据集指定对话模板。
> 
> <details><summary>示例:为 Llama-3-8B 指定对话数据集模板</summary>  
> 
>```bash
>cd data && ./download.sh alpaca && cd -
>
>./scripts/run_finetune.sh \
>  --model_name_or_path meta-llama/Meta-Llama-3-8B \
>  --dataset_path data/alpaca/train_conversation \
>  --conversation_template llama3 \
>  --output_model_path output_models/finetuned_llama3_8b
>```
> </details>

### 微调(LISA)
[LISA](https://arxiv.org/abs/2403.17919) 是一种 **内存高效(memory-efficient)** 的微调算法,它允许在内存和随机解冻的层数之间进行权衡。下面的脚本目前仅在 **单个GPU** 上进行了测试。请关注我们的最新更新! :smile:
```sh
cd data && ./download.sh alpaca && cd -

./scripts/run_finetune_with_lisa.sh \
  --model_name_or_path meta-llama/Llama-2-7b-hf \
  --dataset_path data/alpaca/train_conversation \
  --output_model_path output_models/finetuned_llama2_7b \
  --lisa_activated_layers 1 \
  --lisa_interval_steps 20
```

> [!TIP]
> <details><summary>示例:为 Llama-2-7B 指定对话数据集模板</summary>  
> 
>```bash
>cd data && ./download.sh alpaca && cd -
>
>./scripts/run_finetune_with_lisa.sh \
>  --model_name_or_path meta-llama/Llama-2-7b-hf \
>  --dataset_path data/alpaca/train_conversation \
>  --conversation_template ll
Download .txt
gitextract_35w5f4e3/

├── .gitattributes
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── api-feedback.md
│   │   ├── blank-template.md
│   │   ├── bug-report.md
│   │   └── feature-request.md
│   └── workflows/
│       └── documentation.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── configs/
│   ├── accelerate_dsz0_config.yaml
│   ├── accelerate_dsz2_config.yaml
│   ├── accelerate_dsz3_config.yaml
│   ├── accelerate_fsdp_config.yaml
│   ├── accelerate_singlegpu_config.yaml
│   ├── archive/
│   │   ├── accelerate_multigpu_config.yaml
│   │   ├── accelerate_singlegpu_config.yaml
│   │   ├── ds_config_chatbot.json
│   │   ├── ds_config_eval.json
│   │   ├── ds_config_multimodal.json
│   │   └── ds_config_vis_chatbot.json
│   ├── deepspeed/
│   │   ├── zero0_no_offload.json
│   │   ├── zero2.json
│   │   ├── zero2_no_offload.json
│   │   ├── zero3.json
│   │   ├── zero3_for_eval.json
│   │   └── zero3_no_offload.json
│   └── iterative_dpo.yaml
├── contrib/
│   ├── README.md
│   ├── langchain/
│   │   ├── README.md
│   │   └── retrieval_chatbot.py
│   ├── long-context/
│   │   ├── hf_sft_full_finetune.sh
│   │   ├── hf_sft_lora_flashattn.sh
│   │   └── sft_summarizer.py
│   ├── rlhflow/
│   │   ├── reward_modeling.py
│   │   └── run_reward_modeling.sh
│   ├── text2image/
│   │   ├── README.md
│   │   ├── accelerate_t2i_config.yaml
│   │   ├── diffuser_args.py
│   │   ├── diffuser_finetuner.py
│   │   ├── finetune_t2i.py
│   │   ├── finetune_t2i.sh
│   │   ├── requirements.txt
│   │   └── t2i_dataset.py
│   └── tool-finetune/
│       ├── README.md
│       ├── function_call_finetune.py
│       └── run_function_call_finetune.sh
├── docs/
│   ├── dev_notes/
│   │   └── finetuning.mmd
│   ├── readme/
│   │   ├── Position_Interpolation.md
│   │   ├── README_es.md
│   │   ├── README_hindi.md
│   │   ├── README_jp.md
│   │   ├── README_ko.md
│   │   ├── README_zh-hans.md
│   │   ├── flash_attn2.md
│   │   └── multi_node.md
│   ├── requirements.txt
│   └── source/
│       ├── _static/
│       │   └── check_before_after_lora_tuning.jsonl
│       ├── about/
│       │   ├── authors.md
│       │   ├── changelog.md
│       │   └── index.md
│       ├── blogs/
│       │   ├── benchmark.md
│       │   └── index.md
│       ├── conf.py
│       ├── examples/
│       │   ├── DATASETS.md
│       │   ├── TASK_GUIDE.md
│       │   ├── checkpoints.md
│       │   ├── customize_conversation_template.md
│       │   ├── finetuning.md
│       │   ├── index.md
│       │   ├── medical_finetune.md
│       │   ├── raft.md
│       │   ├── reward_modeling.md
│       │   └── supported_conversation_template.md
│       └── index.md
├── examples/
│   ├── benchmarking.py
│   ├── chatbot.py
│   ├── chatbot_gradio.py
│   ├── detail_memory.py
│   ├── dpo_train.py
│   ├── dpov2_train.py
│   ├── evaluation.py
│   ├── finetune.py
│   ├── finetune_multi_modal.py
│   ├── inference.py
│   ├── iterative_dpo_train.py
│   ├── merge_lora.py
│   ├── multistage_finetune.py
│   ├── raft_align.py
│   ├── reward_modeling.py
│   ├── rm_inference.py
│   ├── sglang_inference.py
│   ├── speculative_inference.py
│   ├── tool_inference.py
│   ├── vis_chatbot.py
│   ├── vis_chatbot_gradio.py
│   └── vllm_inference.py
├── experimental/
│   ├── Hymba/
│   │   ├── README.md
│   │   └── run_finetune_hymba.sh
│   ├── LISA-diffusion/
│   │   ├── README.md
│   │   ├── diffusion_dpo/
│   │   │   ├── train_diffusion_dpo.py
│   │   │   └── train_diffusion_dpo_lisa.py
│   │   ├── instruct_pix2pix/
│   │   │   ├── test_instruct_pix2pix.py
│   │   │   └── train_instruct_pix2pix_lisa.py
│   │   ├── latent_consistency_model/
│   │   │   ├── train_lcm_distill_sd_wds_lisa.py
│   │   │   └── train_lcm_distill_sd_wds_lora.py
│   │   ├── requirement.txt
│   │   └── single_lisa.py
│   └── RAFT-diffusion/
│       ├── README.md
│       ├── SD256-RAFT.ipynb
│       ├── requirements.txt
│       └── train_text_to_image_lora.py
├── pyproject.toml
├── requirements.txt
├── scripts/
│   ├── archive/
│   │   ├── bash.sh
│   │   ├── convert_llama_weights_to_hf.py
│   │   ├── download_model.sh
│   │   ├── export_llama_state_dict_checkpoint.py
│   │   ├── run_all_benchmark.sh
│   │   ├── run_app.sh
│   │   ├── run_benchmark.sh
│   │   ├── run_chatbot.sh
│   │   ├── run_chatbot_chatglm.sh
│   │   ├── run_chatbot_cpu.sh
│   │   ├── run_detail_gpu_memory.sh
│   │   ├── run_dpo_align.sh
│   │   ├── run_dpov2_align.sh
│   │   ├── run_evaluation.sh
│   │   ├── run_evaluation_accelerator.sh
│   │   ├── run_evaluation_with_lora.sh
│   │   ├── run_finetune.sh
│   │   ├── run_finetune_with_custom_optim.sh
│   │   ├── run_finetune_with_lisa.sh
│   │   ├── run_finetune_with_lora.sh
│   │   ├── run_finetune_with_qlora.sh
│   │   ├── run_inference.sh
│   │   ├── run_inference_multimodal_model.sh
│   │   ├── run_iterative_dpo.sh
│   │   ├── run_multistage_finetune.sh
│   │   ├── run_raft_align.sh
│   │   ├── run_reward_modeling.sh
│   │   ├── run_reward_modeling_with_lisa.sh
│   │   ├── run_reward_modeling_with_lora.sh
│   │   ├── run_rm_inference.sh
│   │   ├── run_tool.sh
│   │   └── run_vllm_inference.sh
│   ├── multimodal/
│   │   ├── README.md
│   │   ├── run_finetune_multi_modal_stage1.sh
│   │   ├── run_finetune_multi_modal_stage2.sh
│   │   ├── run_vis_chatbot_blip2.sh
│   │   ├── run_vis_chatbot_gradio_minigpt4.sh
│   │   ├── run_vis_chatbot_llava.sh
│   │   └── run_vis_chatbot_minigpt4.sh
│   ├── run_finetune.sh
│   ├── run_finetune_with_custom_optim.sh
│   ├── run_finetune_with_lisa.sh
│   ├── run_finetune_with_lora.sh
│   ├── run_finetune_with_qlora.sh
│   ├── run_merge_lora.sh
│   ├── run_sglang_inference.sh
│   └── run_unittest.sh
├── setup.py
├── src/
│   └── lmflow/
│       ├── __init__.py
│       ├── args.py
│       ├── datasets/
│       │   ├── __init__.py
│       │   ├── dataset.py
│       │   └── multi_modal_dataset.py
│       ├── models/
│       │   ├── __init__.py
│       │   ├── auto_model.py
│       │   ├── base_model.py
│       │   ├── decoder_model.py
│       │   ├── encoder_decoder_model.py
│       │   ├── hf_decoder_model.py
│       │   ├── hf_model_mixin.py
│       │   ├── hf_text_regression_model.py
│       │   ├── interfaces/
│       │   │   ├── __init__.py
│       │   │   └── tunable.py
│       │   ├── regression_model.py
│       │   ├── text_regression_model.py
│       │   ├── vision2seq_model.py
│       │   └── vision_encoder/
│       │       ├── __init__.py
│       │       └── clip_encoder.py
│       ├── optim/
│       │   ├── __init__.py
│       │   ├── adabelief.py
│       │   ├── adabound.py
│       │   ├── adadelta.py
│       │   ├── adagrad.py
│       │   ├── adam.py
│       │   ├── adamax.py
│       │   ├── adamp.py
│       │   ├── adamw_schedule_free.py
│       │   ├── adan.py
│       │   ├── dummy.py
│       │   ├── lamb.py
│       │   ├── lars.py
│       │   ├── muon.py
│       │   ├── nadam.py
│       │   ├── novograd.py
│       │   ├── optimizers.py
│       │   ├── radam.py
│       │   ├── sgd_schedule_free.py
│       │   ├── sgdp.py
│       │   ├── sophia.py
│       │   ├── utils.py
│       │   └── yogi.py
│       ├── pipeline/
│       │   ├── __init__.py
│       │   ├── auto_pipeline.py
│       │   ├── base_aligner.py
│       │   ├── base_pipeline.py
│       │   ├── base_tuner.py
│       │   ├── dpo_aligner.py
│       │   ├── dpov2_aligner.py
│       │   ├── evaluator.py
│       │   ├── finetuner.py
│       │   ├── inferencer.py
│       │   ├── iterative_dpo_aligner.py
│       │   ├── raft_aligner.py
│       │   ├── rm_inferencer.py
│       │   ├── rm_tuner.py
│       │   ├── sglang_inferencer.py
│       │   ├── utils/
│       │   │   ├── __init__.py
│       │   │   ├── dpov2_dataprocessor.py
│       │   │   ├── dpov2_trainer.py
│       │   │   ├── lisa_trainer.py
│       │   │   ├── memory_safe_dpov2_align.py
│       │   │   ├── memory_safe_vllm_inference.py
│       │   │   ├── raft_trainer.py
│       │   │   ├── rm_dataprocessor.py
│       │   │   └── rm_trainer.py
│       │   └── vllm_inferencer.py
│       ├── tokenization/
│       │   ├── __init__.py
│       │   ├── hf_decoder_model.py
│       │   └── hf_text_regression_model.py
│       ├── utils/
│       │   ├── __init__.py
│       │   ├── common.py
│       │   ├── constants.py
│       │   ├── conversation_template/
│       │   │   ├── __init__.py
│       │   │   ├── base.py
│       │   │   ├── chatglm.py
│       │   │   ├── chatml.py
│       │   │   ├── deepseek.py
│       │   │   ├── gemma.py
│       │   │   ├── hymba.py
│       │   │   ├── internlm.py
│       │   │   ├── llama.py
│       │   │   ├── phi.py
│       │   │   ├── qwen.py
│       │   │   ├── yi.py
│       │   │   └── zephyr.py
│       │   ├── data_utils.py
│       │   ├── debug/
│       │   │   └── profiler.py
│       │   ├── deprecated.py
│       │   ├── envs.py
│       │   ├── llava_conversation_lib.py
│       │   ├── model.py
│       │   ├── multimodal.py
│       │   ├── position_interpolation/
│       │   │   ├── __init__.py
│       │   │   └── llama_rope_scaled_monkey_patch.py
│       │   ├── protocol.py
│       │   ├── test_utils.py
│       │   └── versioning.py
│       └── version.py
└── tests/
    ├── __init__.py
    ├── conftest.py
    ├── datasets/
    │   ├── __init__.py
    │   ├── conftest.py
    │   └── test_dataset.py
    ├── models/
    │   ├── __init__.py
    │   ├── test_auto_model.py
    │   ├── test_hf_decoder_model.py
    │   └── test_tool_inferencer.py
    ├── pipeline/
    │   ├── test_auto_pipeline.py
    │   ├── test_finetuner_distributed_loss.py
    │   ├── test_memory_safe_vllm_inferencer.py
    │   └── test_sglang_infernecer.py
    └── utils/
        ├── __init__.py
        ├── test_conversation_formatter.py
        ├── test_conversation_template.py
        └── test_data_utils.py
Download .txt
SYMBOL INDEX (850 symbols across 127 files)

FILE: contrib/langchain/retrieval_chatbot.py
  class LangchainChatbot (line 26) | class LangchainChatbot:
    method __init__ (line 27) | def __init__(self, model_name_or_path: str, provider: str):
    method check_valid_provider (line 51) | def check_valid_provider(self):
    method set_retriever_url (line 69) | def set_retriever_url(self, url, chunk_size, chunk_overlap):
    method set_retriever_file (line 77) | def set_retriever_file(self, file, chunk_size, chunk_overlap):
    method get_model (line 85) | def get_model(self):
    method chat_with_chatbot (line 101) | def chat_with_chatbot(self, human_input, session_id):
    method retrieve_by_url (line 113) | def retrieve_by_url(self, query):
    method retrieve_by_file (line 116) | def retrieve_by_file(self, query):
    method get_session_history (line 119) | def get_session_history(self, session_id: str) -> BaseChatMessageHistory:
  function get_cli (line 125) | def get_cli() -> argparse.ArgumentParser:
  function main (line 143) | def main(

FILE: contrib/long-context/sft_summarizer.py
  class UserArguments (line 35) | class UserArguments:
  class WandbCallback (line 128) | class WandbCallback(TrainerCallback):
    method __init__ (line 129) | def __init__(self, trainer):
    method on_save (line 137) | def on_save(self, args: TrainingArguments, state: TrainerState, contro...

FILE: contrib/rlhflow/reward_modeling.py
  function build_dataset (line 72) | def build_dataset(tokenizer, config):
  function compute_metrics (line 109) | def compute_metrics(eval_pred):
  class DataCollatorReward (line 118) | class DataCollatorReward:
    method __init__ (line 119) | def __init__(self, tokenizer):
    method __call__ (line 122) | def __call__(self, data):
  class RMTrainer (line 143) | class RMTrainer(Trainer):
    method compute_loss (line 144) | def compute_loss(self, model, inputs, return_outputs=False):

FILE: contrib/text2image/diffuser_args.py
  class T2IDatasetArguments (line 9) | class T2IDatasetArguments(DatasetArguments):
    method __post_init__ (line 22) | def __post_init__(self):
  class DiffuserModelArguments (line 54) | class DiffuserModelArguments:
  class DiffuserTunerArguments (line 98) | class DiffuserTunerArguments:

FILE: contrib/text2image/diffuser_finetuner.py
  function log_validation (line 31) | def log_validation(
  class DiffuserModelTuner (line 66) | class DiffuserModelTuner(BaseTuner):
    method __init__ (line 87) | def __init__(
    method tune (line 99) | def tune(

FILE: contrib/text2image/finetune_t2i.py
  function main (line 20) | def main():

FILE: contrib/text2image/t2i_dataset.py
  class CustomT2IDataset (line 18) | class CustomT2IDataset(Dataset):
    method __init__ (line 21) | def __init__(self, data_args: T2IDatasetArguments):
    method __len__ (line 31) | def __len__(self):
    method __getitem__ (line 34) | def __getitem__(self, idx):
  class EncodePreprocessor (line 46) | class EncodePreprocessor:
    method __init__ (line 47) | def __init__(self, data_args: T2IDatasetArguments, kind: str = "simple...
    method register_simple_func (line 63) | def register_simple_func(self, tokenizer, text_encoder, vae):
    method __call__ (line 90) | def __call__(self, data_item):
  class PreprocessedT2IDataset (line 94) | class PreprocessedT2IDataset(Dataset):
    method __init__ (line 97) | def __init__(self, raw_dataset: Dataset, data_args: T2IDatasetArgument...
    method __len__ (line 104) | def __len__(self):
    method __getitem__ (line 107) | def __getitem__(self, idx):
  function build_t2i_dataset (line 111) | def build_t2i_dataset(data_args: T2IDatasetArguments, tokenizer, text_en...

FILE: contrib/tool-finetune/function_call_finetune.py
  class HFDecoderModelForTool (line 34) | class HFDecoderModelForTool(HFDecoderModel):
    method tokenize (line 35) | def tokenize(self, dataset, add_special_tokens=True, *args, **kwargs) ...
  function conversation_tokenize_function (line 158) | def conversation_tokenize_function(
  function train (line 245) | def train():

FILE: examples/benchmarking.py
  function is_lmflow_local_benchmarking (line 115) | def is_lmflow_local_benchmarking(dataset_name):
  function is_lm_evaluation_benchmarking (line 142) | def is_lm_evaluation_benchmarking(dataset_name):
  function run_lmflow_local_benchmarking (line 149) | def run_lmflow_local_benchmarking(
  function run_lm_evaluation_benchmarking (line 191) | def run_lm_evaluation_benchmarking(dataset_name, model_name):
  function main (line 214) | def main():

FILE: examples/chatbot.py
  class ChatbotArguments (line 27) | class ChatbotArguments:
  function main (line 42) | def main():

FILE: examples/chatbot_gradio.py
  class ChatbotArguments (line 80) | class ChatbotArguments:
  function hist2context (line 141) | def hist2context(hist):
  function chat_stream (line 150) | def chat_stream(query: str, history=None, **kwargs):
  function predict (line 178) | def predict(input, history=None):

FILE: examples/dpov2_train.py
  function main (line 28) | def main():

FILE: examples/finetune.py
  function main (line 32) | def main():

FILE: examples/finetune_multi_modal.py
  function main (line 34) | def main():

FILE: examples/inference.py
  function main (line 24) | def main():

FILE: examples/iterative_dpo_train.py
  function main (line 35) | def main():

FILE: examples/merge_lora.py
  class MergeLoraArguments (line 24) | class MergeLoraArguments:
  function main (line 43) | def main():

FILE: examples/multistage_finetune.py
  class MultistageFinetuneArgs (line 37) | class MultistageFinetuneArgs:
  function setup_logger (line 45) | def setup_logger():
  function generate_new_seed (line 56) | def generate_new_seed(seed):
  function shuffle_and_split_data (line 60) | def shuffle_and_split_data(dataset, num_split=None, seed=None):
  function main (line 75) | def main():

FILE: examples/raft_align.py
  class RewardArguments (line 25) | class RewardArguments:
  function get_reward_function (line 53) | def get_reward_function(reward_args, pipeline_args):
  function main (line 92) | def main():

FILE: examples/reward_modeling.py
  function main (line 23) | def main():

FILE: examples/rm_inference.py
  function main (line 21) | def main():

FILE: examples/sglang_inference.py
  function main (line 21) | def main():

FILE: examples/tool_inference.py
  function main (line 9) | def main():

FILE: examples/vis_chatbot.py
  class ChatbotArguments (line 33) | class ChatbotArguments:
  function main (line 63) | def main():

FILE: examples/vis_chatbot_gradio.py
  class ChatbotArguments (line 69) | class ChatbotArguments:
  function gradio_reset (line 89) | def gradio_reset(chat_state, img_list):
  function upload_image (line 103) | def upload_image(image_file, history, text_input, chat_state, image_list):
  function read_img (line 133) | def read_img(image):
  function gradio_ask (line 143) | def gradio_ask(user_message, chatbot, chat_state):
  function gradio_answer (line 155) | def gradio_answer(chatbot, chat_state, image_list, num_beams=1, temperat...
  function start_inferencer (line 214) | def start_inferencer(

FILE: examples/vllm_inference.py
  function main (line 21) | def main():

FILE: experimental/LISA-diffusion/diffusion_dpo/train_diffusion_dpo.py
  function import_model_class_from_model_name_or_path (line 69) | def import_model_class_from_model_name_or_path(pretrained_model_name_or_...
  function log_validation (line 85) | def log_validation(args, unet, accelerator, weight_dtype, epoch, is_fina...
  function parse_args (line 150) | def parse_args(input_args=None):
  function tokenize_captions (line 443) | def tokenize_captions(tokenizer, examples):
  function encode_prompt (line 455) | def encode_prompt(text_encoder, input_ids):
  function main (line 465) | def main(args):

FILE: experimental/LISA-diffusion/diffusion_dpo/train_diffusion_dpo_lisa.py
  function freeze_all_layers (line 67) | def freeze_all_layers(model):
  function random_activate_layers (line 72) | def random_activate_layers(model, p):
  function lisa (line 85) | def lisa(model, p=0.25):
  function import_model_class_from_model_name_or_path (line 90) | def import_model_class_from_model_name_or_path(pretrained_model_name_or_...
  function update_ema (line 107) | def update_ema(target_params, source_params, rate=0.99):
  function log_validation (line 121) | def log_validation(args, unet, accelerator, weight_dtype, epoch, is_fina...
  function parse_args (line 185) | def parse_args(input_args=None):
  function tokenize_captions (line 472) | def tokenize_captions(tokenizer, examples):
  function encode_prompt (line 484) | def encode_prompt(text_encoder, input_ids):
  function main (line 494) | def main(args):

FILE: experimental/LISA-diffusion/instruct_pix2pix/test_instruct_pix2pix.py
  class InstructPix2Pix (line 30) | class InstructPix2Pix(ExamplesTestsAccelerate):
    method test_instruct_pix2pix_checkpointing_checkpoints_total_limit (line 31) | def test_instruct_pix2pix_checkpointing_checkpoints_total_limit(self):
    method test_instruct_pix2pix_checkpointing_checkpoints_total_limit_removes_multiple_checkpoints (line 54) | def test_instruct_pix2pix_checkpointing_checkpoints_total_limit_remove...

FILE: experimental/LISA-diffusion/instruct_pix2pix/train_instruct_pix2pix_lisa.py
  function parse_args (line 63) | def parse_args():
  function convert_to_np (line 369) | def convert_to_np(image, resolution):
  function download_image (line 374) | def download_image(url):
  function freeze_all_layers (line 381) | def freeze_all_layers(model):
  function random_activate_layers (line 386) | def random_activate_layers(model, p):
  function lisa (line 399) | def lisa(model, p=0.25):
  function main (line 404) | def main():

FILE: experimental/LISA-diffusion/latent_consistency_model/train_lcm_distill_sd_wds_lisa.py
  function freeze_all_layers (line 76) | def freeze_all_layers(model):
  function random_activate_layers (line 81) | def random_activate_layers(model, p):
  function lisa (line 94) | def lisa(model, p=0.25):
  function filter_keys (line 99) | def filter_keys(key_set):
  function group_by_keys_nothrow (line 106) | def group_by_keys_nothrow(data, keys=base_plus_ext, lcase=True, suffixes...
  function tarfile_to_samples_nothrow (line 134) | def tarfile_to_samples_nothrow(src, handler=wds.warn_and_continue):
  class WebdatasetFilter (line 142) | class WebdatasetFilter:
    method __init__ (line 143) | def __init__(self, min_size=1024, max_pwatermark=0.5):
    method __call__ (line 147) | def __call__(self, x):
  class SDText2ImageDataset (line 162) | class SDText2ImageDataset:
    method __init__ (line 163) | def __init__(
    method train_dataset (line 233) | def train_dataset(self):
    method train_dataloader (line 237) | def train_dataloader(self):
  function log_validation (line 241) | def log_validation(vae, unet, args, accelerator, weight_dtype, step, nam...
  function guidance_scale_embedding (line 337) | def guidance_scale_embedding(w, embedding_dim=512, dtype=torch.float32):
  function append_dims (line 366) | def append_dims(x, target_dims):
  function scalings_for_boundary_conditions (line 375) | def scalings_for_boundary_conditions(timestep, sigma_data=0.5, timestep_...
  function get_predicted_original_sample (line 383) | def get_predicted_original_sample(model_output, timesteps, sample, predi...
  function get_predicted_noise (line 402) | def get_predicted_noise(model_output, timesteps, sample, prediction_type...
  function extract_into_tensor (line 420) | def extract_into_tensor(a, t, x_shape):
  class DDIMSolver (line 426) | class DDIMSolver:
    method __init__ (line 427) | def __init__(self, alpha_cumprods, timesteps=1000, ddim_timesteps=50):
    method to (line 440) | def to(self, device):
    method ddim_step (line 446) | def ddim_step(self, pred_x0, pred_noise, timestep_index):
  function update_ema (line 454) | def update_ema(target_params, source_params, rate=0.99):
  function import_model_class_from_model_name_or_path (line 468) | def import_model_class_from_model_name_or_path(
  function parse_args (line 488) | def parse_args():
  function encode_prompt (line 849) | def encode_prompt(prompt_batch, text_encoder, tokenizer, proportion_empt...
  function main (line 874) | def main(args):

FILE: experimental/LISA-diffusion/latent_consistency_model/train_lcm_distill_sd_wds_lora.py
  function get_module_kohya_state_dict (line 77) | def get_module_kohya_state_dict(module, prefix: str, dtype: torch.dtype,...
  function filter_keys (line 94) | def filter_keys(key_set):
  function group_by_keys_nothrow (line 101) | def group_by_keys_nothrow(data, keys=base_plus_ext, lcase=True, suffixes...
  function tarfile_to_samples_nothrow (line 129) | def tarfile_to_samples_nothrow(src, handler=wds.warn_and_continue):
  class WebdatasetFilter (line 137) | class WebdatasetFilter:
    method __init__ (line 138) | def __init__(self, min_size=1024, max_pwatermark=0.5):
    method __call__ (line 142) | def __call__(self, x):
  class SDText2ImageDataset (line 157) | class SDText2ImageDataset:
    method __init__ (line 158) | def __init__(
    method train_dataset (line 228) | def train_dataset(self):
    method train_dataloader (line 232) | def train_dataloader(self):
  function log_validation (line 236) | def log_validation(vae, unet, args, accelerator, weight_dtype, step):
  function guidance_scale_embedding (line 318) | def guidance_scale_embedding(w, embedding_dim=512, dtype=torch.float32):
  function append_dims (line 347) | def append_dims(x, target_dims):
  function scalings_for_boundary_conditions (line 356) | def scalings_for_boundary_conditions(timestep, sigma_data=0.5, timestep_...
  function get_predicted_original_sample (line 364) | def get_predicted_original_sample(model_output, timesteps, sample, predi...
  function get_predicted_noise (line 383) | def get_predicted_noise(model_output, timesteps, sample, prediction_type...
  function extract_into_tensor (line 401) | def extract_into_tensor(a, t, x_shape):
  class DDIMSolver (line 407) | class DDIMSolver:
    method __init__ (line 408) | def __init__(self, alpha_cumprods, timesteps=1000, ddim_timesteps=50):
    method to (line 421) | def to(self, device):
    method ddim_step (line 427) | def ddim_step(self, pred_x0, pred_noise, timestep_index):
  function update_ema (line 435) | def update_ema(target_params, source_params, rate=0.99):
  function import_model_class_from_model_name_or_path (line 448) | def import_model_class_from_model_name_or_path(
  function parse_args (line 468) | def parse_args():
  function encode_prompt (line 842) | def encode_prompt(prompt_batch, text_encoder, tokenizer, proportion_empt...
  function main (line 867) | def main(args):

FILE: experimental/LISA-diffusion/single_lisa.py
  class LISADiffusion (line 5) | class LISADiffusion:
    method __init__ (line 6) | def __init__(self, model, rate=None):
    method freeze_all_layers (line 11) | def freeze_all_layers(self, model):
    method random_activate_layers (line 15) | def random_activate_layers(self, model, p):
    method lisa (line 26) | def lisa(self, model, p=0.25):
    method lisa_recall (line 30) | def lisa_recall(self):
    method initialize (line 35) | def initialize(self):
    method register (line 40) | def register(
    method insert_hook (line 65) | def insert_hook(

FILE: experimental/RAFT-diffusion/train_text_to_image_lora.py
  function save_model_card (line 53) | def save_model_card(repo_name, images=None, base_model=str, dataset_name...
  function parse_args (line 82) | def parse_args():
  function get_full_repo_name (line 346) | def get_full_repo_name(model_id: str, organization: Optional[str] = None...
  function main (line 361) | def main():

FILE: scripts/archive/convert_llama_weights_to_hf.py
  function compute_intermediate_size (line 59) | def compute_intermediate_size(n):
  function read_json (line 63) | def read_json(path):
  function write_json (line 68) | def write_json(text, path):
  function write_model (line 73) | def write_model(model_path, input_base_path, model_size):
  function write_tokenizer (line 232) | def write_tokenizer(tokenizer_path, input_tokenizer_path):
  function main (line 249) | def main():

FILE: scripts/archive/export_llama_state_dict_checkpoint.py
  function permute (line 11) | def permute(w):
  function unpermute (line 15) | def unpermute(w):
  function translate_state_dict_key (line 19) | def translate_state_dict_key(k):  # noqa: C901

FILE: src/lmflow/args.py
  class OptimizerNames (line 34) | class OptimizerNames:
  class ModelArguments (line 58) | class ModelArguments:
    method __post_init__ (line 331) | def __post_init__(self):
  class VisModelArguments (line 368) | class VisModelArguments(ModelArguments):
  class DatasetArguments (line 418) | class DatasetArguments:
    method __post_init__ (line 601) | def __post_init__(self):
  class MultiModalDatasetArguments (line 617) | class MultiModalDatasetArguments(DatasetArguments):
  class FinetunerArguments (line 626) | class FinetunerArguments(TrainingArguments):
  class RewardModelTunerArguments (line 684) | class RewardModelTunerArguments(FinetunerArguments):
  class EvaluatorArguments (line 693) | class EvaluatorArguments:
    method __post_init__ (line 843) | def __post_init__(self):
  class InferencerArguments (line 853) | class InferencerArguments:
    method __post_init__ (line 1070) | def __post_init__(self):
  class RaftAlignerArguments (line 1135) | class RaftAlignerArguments(TrainingArguments):
  class BenchmarkingArguments (line 1195) | class BenchmarkingArguments:
  class DPOAlignerArguments (line 1227) | class DPOAlignerArguments:
  class DPOv2AlignerArguments (line 1309) | class DPOv2AlignerArguments(FinetunerArguments):
  class IterativeAlignerArguments (line 1333) | class IterativeAlignerArguments(InferencerArguments):
  class IterativeDPOAlignerArguments (line 1347) | class IterativeDPOAlignerArguments(IterativeAlignerArguments, DPOv2Align...
  class AutoArguments (line 1384) | class AutoArguments:
    method get_pipeline_args_class (line 1389) | def get_pipeline_args_class(pipeline_name: str):
  function split_args (line 1393) | def split_args(args):

FILE: src/lmflow/datasets/dataset.py
  class Dataset (line 53) | class Dataset:
    method __init__ (line 72) | def __init__(self, data_args: DatasetArguments = None, backend: str = ...
    method __len__ (line 112) | def __len__(self):
    method _check_instance_format (line 115) | def _check_instance_format(self):
    method _check_hf_json_format (line 125) | def _check_hf_json_format(self, data_files: list[str]):
    method from_dict (line 150) | def from_dict(self, dict_obj: dict, *args, **kwargs):
    method create_from_dict (line 238) | def create_from_dict(cls, dict_obj, *args, **kwargs):
    method to_dict (line 249) | def to_dict(self):
    method to_list (line 298) | def to_list(self):
    method map (line 310) | def map(self, *args, **kwargs):
    method get_backend (line 336) | def get_backend(self) -> Optional[str]:
    method get_backend_dataset (line 345) | def get_backend_dataset(self):
    method get_fingerprint (line 354) | def get_fingerprint(self):
    method get_data_args (line 363) | def get_data_args(self):
    method get_type (line 372) | def get_type(self) -> str:
    method save (line 381) | def save(self, file_path: str, format: str = "json"):
    method sample (line 398) | def sample(self, n: int, seed: int = 42):
    method train_test_split (line 428) | def train_test_split(self, test_size: float = 0.2, shuffle: bool = Tru...
    method drop_instances (line 470) | def drop_instances(self, indices: list):
    method sanity_check (line 484) | def sanity_check(
    method hf_dataset_sanity_check (line 496) | def hf_dataset_sanity_check(

FILE: src/lmflow/datasets/multi_modal_dataset.py
  class CustomMultiModalDataset (line 26) | class CustomMultiModalDataset(Dataset):
    method __init__ (line 29) | def __init__(self, dataset_path: str, data_args: DatasetArguments):
    method __len__ (line 37) | def __len__(self):
    method register_tokenizer (line 40) | def register_tokenizer(self, tokenizer, image_processor=None):
    method __getitem__ (line 44) | def __getitem__(self, i):
  function preprocess_multimodal_llava (line 94) | def preprocess_multimodal_llava(sources, data_args):
  function tokenizer_image_token (line 116) | def tokenizer_image_token(prompt, tokenizer, image_token_index=IMAGE_TOK...
  function preprocess_llama_from_llava_plain (line 138) | def preprocess_llama_from_llava_plain(sources, tokenizer: transformers.P...
  function preprocess_llama_from_llava_v1 (line 166) | def preprocess_llama_from_llava_v1(sources, tokenizer: transformers.PreT...
  class DataCollatorForSupervisedDataset (line 254) | class DataCollatorForSupervisedDataset:
    method __call__ (line 259) | def __call__(self, instances):

FILE: src/lmflow/models/auto_model.py
  class AutoModel (line 10) | class AutoModel:
    method get_model (line 12) | def get_model(self, model_args, *args, **kwargs):

FILE: src/lmflow/models/base_model.py
  class BaseModel (line 5) | class BaseModel:

FILE: src/lmflow/models/decoder_model.py
  class DecoderModel (line 18) | class DecoderModel(BaseModel):
    method __init__ (line 19) | def __init__(self, *args, **kwargs):

FILE: src/lmflow/models/encoder_decoder_model.py
  class EncoderDecoderModel (line 18) | class EncoderDecoderModel(BaseModel):
    method __init__ (line 19) | def __init__(self, *args, **kwargs):

FILE: src/lmflow/models/hf_decoder_model.py
  class HFDecoderModel (line 62) | class HFDecoderModel(DecoderModel, HFModelMixin, Tunable):
    method __init__ (line 83) | def __init__(self, model_args, do_train=True, device="gpu", **kwargs):
    method tokenize (line 86) | def tokenize(self, dataset: Dataset, add_special_tokens=True, *args, *...
    method encode (line 239) | def encode(self, input: Union[str, list[str]], *args, **kwargs) -> Uni...
    method decode (line 273) | def decode(self, input, **kwargs) -> Union[str, list[str]]:
    method inference (line 315) | def inference(
    method __inference (line 387) | def __inference(self, inputs, *args, **kwargs):
    method __vllm_inference (line 425) | def __vllm_inference(
    method __sglang_inference (line 468) | def __sglang_inference(
    method prepare_inputs_for_inference (line 493) | def prepare_inputs_for_inference(
    method merge_lora_weights (line 597) | def merge_lora_weights(self):
    method get_peft_without_qlora (line 610) | def get_peft_without_qlora(self):
    method save (line 649) | def save(self, dir, save_full_model=False, *args, **kwargs):

FILE: src/lmflow/models/hf_model_mixin.py
  class HFModelMixin (line 43) | class HFModelMixin(BaseModel):
    method __init__ (line 44) | def __init__(
    method __prepare_tokenizer (line 94) | def __prepare_tokenizer(
    method __prepare_dtype (line 133) | def __prepare_dtype(
    method __prepare_model_config (line 159) | def __prepare_model_config(
    method __prepare_quant_config (line 203) | def __prepare_quant_config(
    method __prepare_peft_config (line 248) | def __prepare_peft_config(
    method __model_module_inject (line 304) | def __model_module_inject(
    method __prepare_model_for_training (line 323) | def __prepare_model_for_training(
    method __prepare_model_for_inference (line 386) | def __prepare_model_for_inference(
    method __prepare_model_for_vllm_inference (line 446) | def __prepare_model_for_vllm_inference(
    method __prepare_model_for_sglang_inference (line 466) | def __prepare_model_for_sglang_inference(
    method __fix_special_tokens (line 489) | def __fix_special_tokens(self):
    method activate_model_for_inference (line 511) | def activate_model_for_inference(
    method deactivate_model_for_inference (line 545) | def deactivate_model_for_inference(
    method get_max_length (line 575) | def get_max_length(self):
    method get_tokenizer (line 581) | def get_tokenizer(self):
    method get_backend_model (line 587) | def get_backend_model(self):

FILE: src/lmflow/models/hf_text_regression_model.py
  class HFTextRegressionModel (line 45) | class HFTextRegressionModel(TextRegressionModel, HFModelMixin, Tunable):
    method __init__ (line 66) | def __init__(self, model_args: ModelArguments, do_train: bool = False,...
    method tokenize (line 80) | def tokenize(self, dataset: Dataset, add_special_tokens=True, *args, *...
    method inference (line 222) | def inference(
    method __inference (line 266) | def __inference(self, inputs, **kwargs):
    method __vllm_inference (line 310) | def __vllm_inference(
    method prepare_inputs_for_inference (line 330) | def prepare_inputs_for_inference(
    method postprocess_inference_outputs (line 366) | def postprocess_inference_outputs(
    method postprocess_distributed_inference_outputs (line 398) | def postprocess_distributed_inference_outputs(
    method save (line 411) | def save(self, dir, *args, **kwargs):

FILE: src/lmflow/models/interfaces/tunable.py
  class Tunable (line 5) | class Tunable:

FILE: src/lmflow/models/regression_model.py
  class RegressionModel (line 7) | class RegressionModel(BaseModel):
    method __init__ (line 8) | def __init__(self, *args, **kwargs):

FILE: src/lmflow/models/text_regression_model.py
  class TextRegressionModel (line 10) | class TextRegressionModel(RegressionModel):
    method __init__ (line 27) | def __init__(self, model_args, *args, **kwargs):
    method register_inference_function (line 34) | def register_inference_function(self, inference_func):
    method inference (line 40) | def inference(self, inputs: Dataset):

FILE: src/lmflow/models/vision2seq_model.py
  class CustomAutoVision2SeqModel (line 31) | class CustomAutoVision2SeqModel(Blip2ForConditionalGeneration, BaseModel):
    method __init__ (line 32) | def __init__(
    method get_backend_model (line 111) | def get_backend_model(self):
    method vision_model_from_pretrained (line 114) | def vision_model_from_pretrained(self, pretrained_path):
    method qformer_from_pretrained (line 117) | def qformer_from_pretrained(self, pretrained_path):
    method language_model_from_pretrained (line 120) | def language_model_from_pretrained(self, pretrained_path, low_resource...
    method vision_feature_select (line 136) | def vision_feature_select(self, image_forward_outs):
    method register_prompt_cache (line 146) | def register_prompt_cache(self, prompt_ids, prompt_keys_values):
    method save_prompt_cache (line 161) | def save_prompt_cache(self, path):
    method load_prompt_cache (line 174) | def load_prompt_cache(self, path):
    method get_tokenizer (line 186) | def get_tokenizer(self):
    method forward (line 189) | def forward(
    method processor_image_token_in_minigpt4 (line 307) | def processor_image_token_in_minigpt4(
    method generate (line 352) | def generate(

FILE: src/lmflow/models/vision_encoder/clip_encoder.py
  function build_vision_tower (line 8) | def build_vision_tower(vision_tower_cfg, **kwargs):
  class CLIPVisionTower (line 17) | class CLIPVisionTower(nn.Module):
    method __init__ (line 18) | def __init__(self, vision_tower, args, delay_load=False):
    method load_model (line 31) | def load_model(self):
    method encode_images (line 37) | def encode_images(self, images, language_projection):
    method feature_select (line 45) | def feature_select(self, image_forward_outs):
    method forward (line 56) | def forward(self, images):
    method dummy_feature (line 74) | def dummy_feature(self):
    method dtype (line 78) | def dtype(self):
    method device (line 82) | def device(self):
    method config (line 86) | def config(self):
    method hidden_size (line 93) | def hidden_size(self):
    method num_patches (line 97) | def num_patches(self):
    method prepare_inputs_labels_for_multimodal (line 100) | def prepare_inputs_labels_for_multimodal(

FILE: src/lmflow/optim/adabelief.py
  class AdaBelief (line 9) | class AdaBelief(Optimizer):
    method __init__ (line 14) | def __init__(
    method __setstate__ (line 66) | def __setstate__(self, state):
    method reset (line 71) | def reset(self):
    method step (line 88) | def step(self, closure=None):

FILE: src/lmflow/optim/adabound.py
  class AdaBound (line 9) | class AdaBound(Optimizer):
    method __init__ (line 19) | def __init__(
    method __setstate__ (line 56) | def __setstate__(self, state) -> None:
    method step (line 61) | def step(self, closure=None):

FILE: src/lmflow/optim/adadelta.py
  class Adadelta (line 7) | class Adadelta(Optimizer):
    method __init__ (line 8) | def __init__(self, params, lr=1.0, rho=0.95, eps=1e-6):
    method step (line 12) | def step(self, closure=None):

FILE: src/lmflow/optim/adagrad.py
  class AdaGrad (line 6) | class AdaGrad(torch.optim.Optimizer):
    method __init__ (line 7) | def __init__(self, params, lr=0.001, eps=1e-8, weight_decay=0):
    method step (line 11) | def step(self, closure=None):

FILE: src/lmflow/optim/adam.py
  class Adam (line 7) | class Adam(Optimizer):
    method __init__ (line 8) | def __init__(self, params, lr=0.001, betas=(0.9, 0.999), eps=1e-8):
    method step (line 12) | def step(self, closure=None):

FILE: src/lmflow/optim/adamax.py
  class Adamax (line 7) | class Adamax(Optimizer):
    method __init__ (line 8) | def __init__(self, params, lr=2e-3, betas=(0.9, 0.999), eps=1e-8, weig...
    method __setstate__ (line 22) | def __setstate__(self, state):
    method step (line 25) | def step(self, closure=None):

FILE: src/lmflow/optim/adamp.py
  class AdamP (line 9) | class AdamP(Optimizer):
    method __init__ (line 20) | def __init__(
    method _channel_view (line 58) | def _channel_view(x):
    method _layer_view (line 62) | def _layer_view(x):
    method _cosine_similarity (line 66) | def _cosine_similarity(x, y, eps, view_func):
    method _projection (line 76) | def _projection(self, p, grad, perturb, delta, wd_ratio, eps):
    method step (line 91) | def step(self, closure=None):

FILE: src/lmflow/optim/adamw_schedule_free.py
  class AdamWScheduleFree (line 15) | class AdamWScheduleFree(torch.optim.Optimizer):
    method __init__ (line 27) | def __init__(
    method eval (line 58) | def eval(self):
    method train (line 70) | def train(self):
    method step (line 82) | def step(self, closure=None):

FILE: src/lmflow/optim/adan.py
  class Adan (line 10) | class Adan(Optimizer):
    method __init__ (line 19) | def __init__(
    method __setstate__ (line 53) | def __setstate__(self, state):
    method restart_opt (line 59) | def restart_opt(self):
    method step (line 75) | def step(self):
  function _single_tensor_adan (line 166) | def _single_tensor_adan(
  function _multi_tensor_adan (line 217) | def _multi_tensor_adan(

FILE: src/lmflow/optim/dummy.py
  class Dummy (line 12) | class Dummy(Optimizer):
    method __init__ (line 23) | def __init__(
    method step (line 40) | def step(self, closure: Callable = None):

FILE: src/lmflow/optim/lamb.py
  class Lamb (line 9) | class Lamb(Optimizer):
    method __init__ (line 20) | def __init__(
    method step (line 51) | def step(self, closure=None):

FILE: src/lmflow/optim/lars.py
  class LARS (line 7) | class LARS(Optimizer):
    method __init__ (line 39) | def __init__(
    method __setstate__ (line 77) | def __setstate__(self, state) -> None:
    method step (line 84) | def step(self, closure=None):

FILE: src/lmflow/optim/muon.py
  function zeropower_via_newtonschulz5 (line 8) | def zeropower_via_newtonschulz5(G: Tensor, steps: int) -> Tensor:
  class Muon (line 41) | class Muon(torch.optim.Optimizer):
    method __init__ (line 46) | def __init__(self, params, lr=0.001, betas=(0.9, 0.999), eps=1e-8, wei...
    method step (line 51) | def step(self, closure=None):

FILE: src/lmflow/optim/nadam.py
  class NAdam (line 8) | class NAdam(torch.optim.Optimizer):
    method __init__ (line 9) | def __init__(self, params, lr=2e-3, betas=(0.9, 0.999), eps=1e-8, weig...
    method __setstate__ (line 25) | def __setstate__(self, state):
    method step (line 28) | def step(self, closure=None):

FILE: src/lmflow/optim/novograd.py
  class NovoGrad (line 7) | class NovoGrad(optim.Optimizer):
    method __init__ (line 8) | def __init__(
    method __setstate__ (line 26) | def __setstate__(self, state):
    method step (line 31) | def step(self, closure=None):

FILE: src/lmflow/optim/radam.py
  class RAdam (line 10) | class RAdam(Optimizer):
    method __init__ (line 24) | def __init__(
    method __setstate__ (line 62) | def __setstate__(self, state):
    method step (line 65) | def step(self, closure=None):

FILE: src/lmflow/optim/sgd_schedule_free.py
  class SGDScheduleFree (line 13) | class SGDScheduleFree(torch.optim.Optimizer):
    method __init__ (line 25) | def __init__(
    method eval (line 61) | def eval(self):
    method train (line 73) | def train(self):
    method step (line 85) | def step(self, closure=None):

FILE: src/lmflow/optim/sgdp.py
  class SGDP (line 9) | class SGDP(Optimizer):
    method __init__ (line 20) | def __init__(
    method _channel_view (line 60) | def _channel_view(x):
    method _layer_view (line 64) | def _layer_view(x):
    method _cosine_similarity (line 68) | def _cosine_similarity(x, y, eps, view_func):
    method _projection (line 78) | def _projection(self, p, grad, perturb, delta, wd_ratio, eps):
    method step (line 93) | def step(self, closure=None):

FILE: src/lmflow/optim/sophia.py
  class SophiaG (line 7) | class SophiaG(Optimizer):
    method __init__ (line 13) | def __init__(
    method __setstate__ (line 39) | def __setstate__(self, state):
    method update_hessian (line 51) | def update_hessian(self):
    method step (line 74) | def step(self, closure=None, bs=5120):

FILE: src/lmflow/optim/utils.py
  function create_customized_optimizer (line 10) | def create_customized_optimizer(base_trainer_class, model_args):

FILE: src/lmflow/optim/yogi.py
  class Yogi (line 10) | class Yogi(Optimizer):
    method __init__ (line 20) | def __init__(
    method step (line 49) | def step(self, closure=None):

FILE: src/lmflow/pipeline/auto_pipeline.py
  class AutoPipeline (line 57) | class AutoPipeline:
    method get_pipeline (line 63) | def get_pipeline(self, pipeline_name, model_args, data_args, pipeline_...

FILE: src/lmflow/pipeline/base_aligner.py
  class BaseAligner (line 9) | class BaseAligner(BasePipeline):
    method __init__ (line 12) | def __init__(self, *args, **kwargs):
    method _check_if_alignable (line 15) | def _check_if_alignable(self, model, dataset, reward_model):
    method align (line 21) | def align(self, model, dataset, reward_model):

FILE: src/lmflow/pipeline/base_pipeline.py
  class BasePipeline (line 5) | class BasePipeline:

FILE: src/lmflow/pipeline/base_tuner.py
  class BaseTuner (line 9) | class BaseTuner(BasePipeline):
    method __init__ (line 12) | def __init__(self, *args, **kwargs):
    method _check_if_tunable (line 15) | def _check_if_tunable(self, model, dataset):
    method tune (line 20) | def tune(self, model, dataset):

FILE: src/lmflow/pipeline/dpo_aligner.py
  function get_paired_dataset (line 23) | def get_paired_dataset(
  class DPOAligner (line 70) | class DPOAligner(BaseAligner):
    method __init__ (line 71) | def __init__(self, model_args, data_args, aligner_args):
    method _initialize_trainer (line 78) | def _initialize_trainer(self, model, tokenizer):
    method _load_dataset (line 132) | def _load_dataset(self):
    method align (line 151) | def align(self, model, dataset, reward_model):

FILE: src/lmflow/pipeline/dpov2_aligner.py
  class DPOv2Aligner (line 30) | class DPOv2Aligner(BaseAligner):
    method __init__ (line 31) | def __init__(
    method align (line 43) | def align(
    method __prepare_training_args (line 114) | def __prepare_training_args(
    method convert_to_paired_dataset (line 143) | def convert_to_paired_dataset(
    method _calc_response_lengths (line 184) | def _calc_response_lengths(
    method _calc_reward_with_length_penalty (line 198) | def _calc_reward_with_length_penalty(
    method sampling_paired_idx_from_rewards (line 210) | def sampling_paired_idx_from_rewards(
    method _sampling_paired_idx_from_rewards (line 228) | def _sampling_paired_idx_from_rewards(
    method _sampling_paired_idx_from_rewards_fast (line 250) | def _sampling_paired_idx_from_rewards_fast(
  class MemorySafeDPOv2Aligner (line 273) | class MemorySafeDPOv2Aligner:
    method __init__ (line 274) | def __init__(
    method align (line 287) | def align(self):

FILE: src/lmflow/pipeline/evaluator.py
  class Evaluator (line 41) | class Evaluator(BasePipeline):
    method __init__ (line 59) | def __init__(
    method create_dataloader (line 103) | def create_dataloader(self, dataset: Dataset):
    method _match (line 122) | def _match(self, predicted_answer, groundtruth, answer_type=None):
    method evaluate (line 137) | def evaluate(
    method _evaluate_acc_with_accelerate (line 174) | def _evaluate_acc_with_accelerate(self, model, dataset, verbose=True):
    method _evaluate_acc_with_deepspeed (line 282) | def _evaluate_acc_with_deepspeed(self, model, dataset, verbose=True):
    method _evaluate_ppl (line 386) | def _evaluate_ppl(self, model, dataset: Dataset, verbose=True):
    method _evaluate_nll (line 435) | def _evaluate_nll(

FILE: src/lmflow/pipeline/finetuner.py
  class Finetuner (line 40) | class Finetuner(BaseTuner):
    method __init__ (line 63) | def __init__(
    method group_text (line 131) | def group_text(self, tokenized_datasets, model_max_length):
    method tune (line 222) | def tune(

FILE: src/lmflow/pipeline/inferencer.py
  function rstrip_partial_utf8 (line 32) | def rstrip_partial_utf8(string):
  class Inferencer (line 44) | class Inferencer(BasePipeline):
    method __init__ (line 62) | def __init__(
    method create_dataloader (line 98) | def create_dataloader(self, dataset: Dataset):
    method inference (line 127) | def inference(
    method stream_inference (line 269) | def stream_inference(
  class SpeculativeInferencer (line 317) | class SpeculativeInferencer(Inferencer):
    method __init__ (line 338) | def __init__(self, model_args, draft_model_args, data_args, inferencer...
    method score_to_prob (line 350) | def score_to_prob(
    method sample (line 399) | def sample(prob: torch.Tensor, num_samples: int = 1) -> dict:
    method predict_next_token (line 409) | def predict_next_token(model: HFDecoderModel, input_ids: torch.Tensor,...
    method autoregressive_sampling (line 421) | def autoregressive_sampling(
    method inference (line 437) | def inference(
    method stream_inference (line 560) | def stream_inference(self):
  class ToolInferencer (line 564) | class ToolInferencer(Inferencer):
    method __init__ (line 582) | def __init__(self, model_args, data_args, inferencer_args):
    method inference (line 587) | def inference(
    method code_exec (line 627) | def code_exec(self, code):

FILE: src/lmflow/pipeline/iterative_dpo_aligner.py
  class IterativeDPOAligner (line 27) | class IterativeDPOAligner:
    method __init__ (line 28) | def __init__(
    method align (line 44) | def align(self, dataset_list: list[Dataset]):
    method _align_single_iteration (line 64) | def _align_single_iteration(
    method _do_target_model_inference (line 118) | def _do_target_model_inference(
    method _do_reward_model_inference (line 146) | def _do_reward_model_inference(
    method _do_single_dpo_align (line 171) | def _do_single_dpo_align(
    method _parse_target_model_inference_args (line 191) | def _parse_target_model_inference_args(
    method _parse_reward_model_inference_args (line 205) | def _parse_reward_model_inference_args(
    method _parse_dpo_aligner_args (line 216) | def _parse_dpo_aligner_args(
    method __filter_args (line 231) | def __filter_args(

FILE: src/lmflow/pipeline/raft_aligner.py
  class RaftAligner (line 34) | class RaftAligner(BaseAligner):
    method __init__ (line 57) | def __init__(self, model_args, data_args, aligner_args, *args, **kwargs):
    method _initialize_trainer (line 80) | def _initialize_trainer(self, model, tokenizer, training_args):
    method _load_dataset (line 96) | def _load_dataset(
    method _load_input_dataset (line 218) | def _load_input_dataset(self, dataset, tokenizer):
    method _clean_text (line 244) | def _clean_text(self, text):
    method _discard_sample (line 250) | def _discard_sample(self, text):
    method _get_batch_dataset_top (line 257) | def _get_batch_dataset_top(
    method _get_batch_dataset_local (line 390) | def _get_batch_dataset_local(
    method align (line 545) | def align(self, model, dataset, reward_model):

FILE: src/lmflow/pipeline/rm_inferencer.py
  class RewardModelInferencer (line 41) | class RewardModelInferencer(BasePipeline):
    method __init__ (line 57) | def __init__(
    method inference (line 84) | def inference(
    method _inference (line 132) | def _inference(
    method __inference (line 157) | def __inference(
    method __distributed_inference (line 200) | def __distributed_inference(
    method __vllm_inference (line 300) | def __vllm_inference(
    method __post_process_model_output (line 308) | def __post_process_model_output(
    method flatten_list (line 316) | def flatten_list(self, list_of_list: list[list]) -> tuple[list, list[i...
    method compress_list (line 321) | def compress_list(self, list_to_compress: list, sublist_lengths: list[...

FILE: src/lmflow/pipeline/rm_tuner.py
  class RewardModelTuner (line 16) | class RewardModelTuner(Finetuner):
    method __init__ (line 37) | def __init__(self, model_args, data_args, finetuner_args, *args, **kwa...
    method tune (line 40) | def tune(

FILE: src/lmflow/pipeline/sglang_inferencer.py
  class SGLangInferencer (line 29) | class SGLangInferencer(BasePipeline):
    method __init__ (line 30) | def __init__(
    method _parse_args_to_sampling_params (line 43) | def _parse_args_to_sampling_params(
    method inference (line 62) | def inference(
    method save_inference_results (line 99) | def save_inference_results(
    method load_inference_results (line 110) | def load_inference_results(

FILE: src/lmflow/pipeline/utils/dpov2_dataprocessor.py
  class PreferenceDataCollatorWithPadding (line 16) | class PreferenceDataCollatorWithPadding:
    method tokenize_batch_element (line 29) | def tokenize_batch_element(
    method collate (line 133) | def collate(self, batch):
    method __call__ (line 174) | def __call__(self, features: list[dict[str, Any]]) -> dict[str, Any]:

FILE: src/lmflow/pipeline/utils/dpov2_trainer.py
  class DPOv2Trainer (line 29) | class DPOv2Trainer(DPOTrainer):
    method __init__ (line 30) | def __init__(
    method dpo_loss (line 102) | def dpo_loss(
    method get_batch_loss_metrics (line 193) | def get_batch_loss_metrics(
    method get_batch_metrics (line 201) | def get_batch_metrics(

FILE: src/lmflow/pipeline/utils/lisa_trainer.py
  class DynamicLayerActivationCallback (line 8) | class DynamicLayerActivationCallback(TrainerCallback):
    method __init__ (line 9) | def __init__(
    method freeze_all_layers (line 43) | def freeze_all_layers(self):
    method on_step_begin (line 49) | def on_step_begin(self, args, state, control, **kwargs):
    method switch_active_layers (line 54) | def switch_active_layers(self):

FILE: src/lmflow/pipeline/utils/memory_safe_dpov2_align.py
  function main (line 26) | def main():

FILE: src/lmflow/pipeline/utils/memory_safe_vllm_inference.py
  function main (line 27) | def main():

FILE: src/lmflow/pipeline/utils/raft_trainer.py
  class RaftTrainer (line 209) | class RaftTrainer:
    method __init__ (line 282) | def __init__(
    method add_callback (line 655) | def add_callback(self, callback):
    method pop_callback (line 665) | def pop_callback(self, callback):
    method remove_callback (line 678) | def remove_callback(self, callback):
    method _move_model_to_device (line 688) | def _move_model_to_device(self, model, device):
    method _set_signature_columns_if_needed (line 694) | def _set_signature_columns_if_needed(self):
    method _remove_unused_columns (line 702) | def _remove_unused_columns(self, dataset: "datasets.Dataset", descript...
    method _get_collator_with_removed_columns (line 728) | def _get_collator_with_removed_columns(
    method _get_train_sampler (line 746) | def _get_train_sampler(self) -> Optional[torch.utils.data.Sampler]:
    method get_train_dataloader (line 817) | def get_train_dataloader(self) -> DataLoader:
    method _get_eval_sampler (line 865) | def _get_eval_sampler(self, eval_dataset: Dataset) -> Optional[torch.u...
    method get_eval_dataloader (line 894) | def get_eval_dataloader(self, eval_dataset: Optional[Dataset] = None) ...
    method get_test_dataloader (line 942) | def get_test_dataloader(self, test_dataset: Dataset) -> DataLoader:
    method create_optimizer_and_scheduler (line 988) | def create_optimizer_and_scheduler(self, num_training_steps: int):
    method create_optimizer (line 1003) | def create_optimizer(self):
    method get_optimizer_cls_and_kwargs (line 1059) | def get_optimizer_cls_and_kwargs(args: TrainingArguments) -> tuple[Any...
    method create_scheduler (line 1147) | def create_scheduler(self, num_training_steps: int, optimizer: torch.o...
    method num_examples (line 1166) | def num_examples(self, dataloader: DataLoader) -> int:
    method _hp_search_setup (line 1180) | def _hp_search_setup(self, trial: Union["optuna.Trial", dict[str, Any]]):
    method _report_to_hp_search (line 1221) | def _report_to_hp_search(self, trial: Union["optuna.Trial", dict[str, ...
    method _tune_save_checkpoint (line 1239) | def _tune_save_checkpoint(self):
    method call_model_init (line 1252) | def call_model_init(self, trial=None):
    method torch_jit_model_eval (line 1266) | def torch_jit_model_eval(self, model, dataloader, training=False):
    method ipex_optimize_model (line 1304) | def ipex_optimize_model(self, model, training=False, dtype=torch.float...
    method _wrap_model (line 1327) | def _wrap_model(self, model, training=True, dataloader=None):
    method train (line 1517) | def train(
    method _one_train (line 1613) | def _one_train(
    method _inner_training_loop (line 1950) | def _inner_training_loop(
    method _get_output_dir (line 2053) | def _get_output_dir(self, trial):
    method _load_from_checkpoint (line 2073) | def _load_from_checkpoint(self, resume_from_checkpoint, model=None):
    method _load_best_model (line 2131) | def _load_best_model(self):
    method _issue_warnings_after_load (line 2191) | def _issue_warnings_after_load(self, load_result):
    method _maybe_log_save_evaluate (line 2202) | def _maybe_log_save_evaluate(self, tr_loss, model, trial, epoch, ignor...
    method _load_rng_state (line 2241) | def _load_rng_state(self, checkpoint):
    method _save_checkpoint (line 2282) | def _save_checkpoint(self, model, trial, metrics=None):
    method _load_optimizer_and_scheduler (line 2391) | def _load_optimizer_and_scheduler(self, checkpoint):
    method hyperparameter_search (line 2448) | def hyperparameter_search(
    method log (line 2546) | def log(self, logs: dict[str, float]) -> None:
    method _prepare_input (line 2561) | def _prepare_input(self, data: Union[torch.Tensor, Any]) -> Union[torc...
    method _prepare_inputs (line 2579) | def _prepare_inputs(self, inputs: dict[str, Union[torch.Tensor, Any]])...
    method compute_loss_context_manager (line 2595) | def compute_loss_context_manager(self):
    method autocast_smart_context_manager (line 2601) | def autocast_smart_context_manager(self, cache_enabled: Optional[bool]...
    method training_step (line 2620) | def training_step(self, model: nn.Module, inputs: dict[str, Union[torc...
    method compute_loss (line 2664) | def compute_loss(self, model, inputs, return_outputs=False):
    method is_local_process_zero (line 2695) | def is_local_process_zero(self) -> bool:
    method is_world_process_zero (line 2702) | def is_world_process_zero(self) -> bool:
    method save_model (line 2714) | def save_model(self, output_dir: Optional[str] = None, _internal_call:...
    method _save_tpu (line 2777) | def _save_tpu(self, output_dir: Optional[str] = None):
    method _save (line 2805) | def _save(self, output_dir: Optional[str] = None, state_dict=None):
    method store_flos (line 2830) | def store_flos(self):
    method _sorted_checkpoints (line 2841) | def _sorted_checkpoints(
    method _rotate_checkpoints (line 2865) | def _rotate_checkpoints(self, use_mtime=False, output_dir=None) -> None:
    method evaluate (line 2890) | def evaluate(
    method predict (line 2957) | def predict(
    method evaluation_loop (line 3012) | def evaluation_loop(
    method _nested_gather (line 3220) | def _nested_gather(self, tensors, name=None):
    method _pad_across_processes (line 3238) | def _pad_across_processes(self, tensor, pad_index=-100):
    method prediction_step (line 3270) | def prediction_step(
    method floating_point_ops (line 3371) | def floating_point_ops(self, inputs: dict[str, Union[torch.Tensor, Any...
    method init_git_repo (line 3387) | def init_git_repo(self, at_init: bool = False):
    method create_model_card (line 3433) | def create_model_card(
    method _push_from_checkpoint (line 3488) | def _push_from_checkpoint(self, checkpoint_folder):
    method push_to_hub (line 3530) | def push_to_hub(self, commit_message: Optional[str] = "End of training...
    method prediction_loop (line 3588) | def prediction_loop(
    method _gather_and_numpify (line 3733) | def _gather_and_numpify(self, tensors, name):
    method _add_sm_patterns_to_gitignore (line 3749) | def _add_sm_patterns_to_gitignore(self) -> None:

FILE: src/lmflow/pipeline/utils/rm_dataprocessor.py
  class RewardDataCollatorWithPadding (line 12) | class RewardDataCollatorWithPadding:
    method __call__ (line 19) | def __call__(self, features: list[dict[str, Any]]) -> dict[str, Any]:

FILE: src/lmflow/pipeline/utils/rm_trainer.py
  function compute_metrics (line 7) | def compute_metrics(eval_pred):
  function rm_loss (line 16) | def rm_loss(model, inputs, return_outputs=False):
  class RewardTrainer (line 29) | class RewardTrainer(Trainer):
    method compute_loss (line 30) | def compute_loss(self, model, inputs, return_outputs=False):

FILE: src/lmflow/pipeline/vllm_inferencer.py
  class InferencerWithOffloading (line 46) | class InferencerWithOffloading(BasePipeline):
    method __init__ (line 47) | def __init__(
    method inference (line 58) | def inference(self):
    method save_inference_results (line 61) | def save_inference_results(self):
    method load_inference_results (line 64) | def load_inference_results(self):
  class VLLMInferencer (line 68) | class VLLMInferencer(InferencerWithOffloading):
    method __init__ (line 69) | def __init__(
    method parse_to_sampling_params (line 79) | def parse_to_sampling_params(
    method inference (line 94) | def inference(
    method _inference (line 171) | def _inference(
    method _distributed_inference (line 189) | def _distributed_inference(
    method save_inference_results (line 277) | def save_inference_results(
    method load_inference_results (line 287) | def load_inference_results(
  class MemorySafeVLLMInferencer (line 297) | class MemorySafeVLLMInferencer(VLLMInferencer):
    method __init__ (line 298) | def __init__(
    method inference (line 308) | def inference(self) -> list[VLLMInferenceResultWithInput]:

FILE: src/lmflow/tokenization/hf_decoder_model.py
  function blocking (line 19) | def blocking(
  function tokenize_function (line 57) | def tokenize_function(
  function conversation_tokenize_function (line 111) | def conversation_tokenize_function(

FILE: src/lmflow/tokenization/hf_text_regression_model.py
  function blocking_paired (line 19) | def blocking_paired(
  function blocking (line 61) | def blocking(
  function blocking_text_to_textlist (line 99) | def blocking_text_to_textlist(
  function paired_conversation_tokenize_function (line 140) | def paired_conversation_tokenize_function(
  function conversation_tokenize_function (line 214) | def conversation_tokenize_function(
  function tokenize_function (line 283) | def tokenize_function(
  function text_to_textlist_tokenize_function (line 337) | def text_to_textlist_tokenize_function(

FILE: src/lmflow/utils/common.py
  function make_shell_args_from_dataclass (line 13) | def make_shell_args_from_dataclass(
  function create_copied_dataclass (line 76) | def create_copied_dataclass(original_dataclass, field_prefix: str, class...
  function remove_dataclass_attr_prefix (line 133) | def remove_dataclass_attr_prefix(data_instance, prefix: str) -> dict:
  function add_dataclass_attr_prefix (line 156) | def add_dataclass_attr_prefix(data_instance, prefix: str) -> dict:
  function print_banner (line 179) | def print_banner(message: str):

FILE: src/lmflow/utils/conversation_template/base.py
  class TemplateComponent (line 18) | class TemplateComponent:
    method __post_init__ (line 56) | def __post_init__(self):
    method __repr__ (line 74) | def __repr__(self) -> str:
    method __str__ (line 77) | def __str__(self) -> str:
  class Formatter (line 82) | class Formatter(ABC):
    method format (line 86) | def format(self, **kwargs) -> list[TemplateComponent]: ...
    method has_placeholder (line 88) | def has_placeholder(self):
  class EmptyFormatter (line 99) | class EmptyFormatter(Formatter):
    method __post_init__ (line 100) | def __post_init__(self):
    method format (line 104) | def format(self, **kwargs) -> list:
  class StringFormatter (line 117) | class StringFormatter(Formatter):
    method __post_init__ (line 118) | def __post_init__(self):
    method format (line 122) | def format(self, **kwargs) -> list:
  class ConversationTemplate (line 156) | class ConversationTemplate:
    method __post_init__ (line 171) | def __post_init__(self):
    method encode_conversation (line 182) | def encode_conversation(
    method _encode (line 231) | def _encode(
    method _encode_template (line 264) | def _encode_template(
    method post_process_pairs (line 302) | def post_process_pairs(self, encoded_pairs, tokenizer):
    method remove_last_separator (line 326) | def remove_last_separator(
    method add_special_starter (line 349) | def add_special_starter(
    method add_special_stopper (line 372) | def add_special_stopper(
    method _ensure_id_list (line 395) | def _ensure_id_list(self, obj: Union[int, list[int]]) -> list[int]:
  class ConversationTemplateForTool (line 406) | class ConversationTemplateForTool(ConversationTemplate):
    method encode_conversation (line 407) | def encode_conversation(
    method _encode (line 456) | def _encode(
    method _encode_template (line 508) | def _encode_template(
    method _handle_tools (line 546) | def _handle_tools(self, tools: Optional[list[str]]) -> str:

FILE: src/lmflow/utils/conversation_template/gemma.py
  class GemmaConversationTemplate (line 12) | class GemmaConversationTemplate(ConversationTemplate):
    method encode_conversation (line 13) | def encode_conversation(self, *args, **kwargs):

FILE: src/lmflow/utils/conversation_template/hymba.py
  class HymbaConversationTemplate (line 37) | class HymbaConversationTemplate(ConversationTemplateForTool):
    method _handle_tools (line 38) | def _handle_tools(self, tools: Optional[list[str]]) -> str:

FILE: src/lmflow/utils/conversation_template/llama.py
  class Llama2ConversationTemplate (line 16) | class Llama2ConversationTemplate(ConversationTemplate):
    method _encode (line 17) | def _encode(
  class Llama2ConversationTemplateForTool (line 54) | class Llama2ConversationTemplateForTool(Llama2ConversationTemplate):
    method _encode (line 55) | def _encode(

FILE: src/lmflow/utils/conversation_template/zephyr.py
  class ZephyrConversationTemplate (line 14) | class ZephyrConversationTemplate(ConversationTemplate):
    method _encode (line 15) | def _encode(

FILE: src/lmflow/utils/data_utils.py
  function set_random_seed (line 15) | def set_random_seed(seed: int):
  function load_data (line 32) | def load_data(file_name: str):
  function batchlize (line 66) | def batchlize(examples: list, batch_size: int, random_shuffle: bool):
  function preview_file (line 99) | def preview_file(file_path: str, chars: int = 100):
  function get_dataset_type_fast (line 129) | def get_dataset_type_fast(file_path: str, max_chars: int = 100) -> Union...
  function check_dataset_instances_key_fast (line 144) | def check_dataset_instances_key_fast(file_path: str, instances_key: str,...
  function answer_extraction (line 155) | def answer_extraction(response, answer_type=None):  # use this funtion t...
  function process_image_flag (line 276) | def process_image_flag(text, image_flag="<ImageHere>"):
  class VLLMInferenceResultWithInput (line 288) | class VLLMInferenceResultWithInput(TypedDict):
  class RewardModelInferenceResultWithInput (line 293) | class RewardModelInferenceResultWithInput(TypedDict):

FILE: src/lmflow/utils/debug/profiler.py
  class Timer (line 5) | class Timer:
    method __init__ (line 6) | def __init__(self, name):
    method start (line 11) | def start(self, tag):
    method end (line 14) | def end(self, tag):
    method get_runtime (line 18) | def get_runtime(self, tag):
    method show (line 21) | def show(self):
    method _to_readable (line 25) | def _to_readable(self):

FILE: src/lmflow/utils/deprecated.py
  function deprecated_args (line 13) | def deprecated_args(**deprecated_params: dict[str, Any]):

FILE: src/lmflow/utils/envs.py
  function is_accelerate_env (line 15) | def is_accelerate_env():
  function get_device_name (line 22) | def get_device_name() -> str:
  function get_torch_device (line 33) | def get_torch_device() -> any:

FILE: src/lmflow/utils/llava_conversation_lib.py
  class SeparatorStyle (line 6) | class SeparatorStyle(Enum):
  class Conversation (line 17) | class Conversation:
    method get_prompt (line 31) | def get_prompt(self):
    method append_message (line 109) | def append_message(self, role, message):
    method get_images (line 112) | def get_images(self, return_pil=False):
    method to_gradio_chatbot (line 165) | def to_gradio_chatbot(self):
    method copy (line 199) | def copy(self):
    method dict (line 211) | def dict(self):

FILE: src/lmflow/utils/model.py
  function check_homogeneity (line 12) | def check_homogeneity(model_args_list: list[ModelArguments]) -> bool:

FILE: src/lmflow/utils/multimodal.py
  function update_custom_config (line 8) | def update_custom_config(config, model_args):
  function load_llava_pretrain_model (line 23) | def load_llava_pretrain_model(model, checkpoint_path):
  function adapt_llava_model_to_lmflow_type (line 38) | def adapt_llava_model_to_lmflow_type(state_dict):

FILE: src/lmflow/utils/position_interpolation/llama_rope_scaled_monkey_patch.py
  class CondenseRotaryEmbedding (line 8) | class CondenseRotaryEmbedding(torch.nn.Module):
    method __init__ (line 9) | def __init__(self, dim, pi_ratio, ntk_ratio, max_position_embeddings=2...
    method forward (line 31) | def forward(self, x, seq_len=None):
  function replace_llama_with_condense (line 50) | def replace_llama_with_condense(pi_ratio, ntk_ratio):

FILE: src/lmflow/utils/protocol.py
  function union_python_dict (line 34) | def union_python_dict(dict1: dict, dict2: dict):
  function union_tensor_dict (line 52) | def union_tensor_dict(tensor_dict1: TensorDict, tensor_dict2: TensorDict...
  function _array_equal (line 68) | def _array_equal(array1: np.ndarray, array2: np.ndarray, visited: Set[in...
  function _deep_equal (line 95) | def _deep_equal(a: Any, b: Any, visited: Set[int]) -> bool:
  function union_numpy_dict (line 131) | def union_numpy_dict(tensor_dict1: Dict[str, np.ndarray], tensor_dict2: ...
  function list_of_dict_to_dict_of_list (line 145) | def list_of_dict_to_dict_of_list(list_of_dict: List[dict]):
  function collate_fn (line 157) | def collate_fn(x: List["DataProtoItem"]):
  function get_tensordict (line 170) | def get_tensordict(tensor_dict: Dict[str, Union[torch.Tensor, list]], no...
  class DataProtoItem (line 250) | class DataProtoItem:
  class DataProto (line 257) | class DataProto:
    method __post_init__ (line 269) | def __post_init__(self):
    method __len__ (line 273) | def __len__(self):
    method __getitem__ (line 282) | def __getitem__(self, item):
    method __getstate__ (line 316) | def __getstate__(self):
    method __setstate__ (line 326) | def __setstate__(self, data):
    method save_to_disk (line 336) | def save_to_disk(self, filepath):
    method load_from_disk (line 341) | def load_from_disk(filepath) -> "DataProto":
    method print_size (line 346) | def print_size(self, prefix=""):
    method check_consistency (line 364) | def check_consistency(self):
    method from_single_dict (line 390) | def from_single_dict(cls, data: Dict[str, Union[torch.Tensor, np.ndarr...
    method from_dict (line 406) | def from_dict(
    method from_tensordict (line 453) | def from_tensordict(
    method to (line 493) | def to(self, device) -> "DataProto":
    method select (line 507) | def select(self, batch_keys=None, non_tensor_batch_keys=None, meta_inf...
    method select_idxs (line 542) | def select_idxs(self, idxs):
    method slice (line 582) | def slice(self, start=None, end=None, step=None):
    method pop (line 628) | def pop(self, batch_keys=None, non_tensor_batch_keys=None, meta_info_k...
    method rename (line 661) | def rename(self, old_keys=None, new_keys=None) -> "DataProto":
    method union (line 688) | def union(self, other: "DataProto") -> "DataProto":
    method make_iterator (line 707) | def make_iterator(self, mini_batch_size, epochs, seed=None, dataloader...
    method padding (line 747) | def padding(self, padding_size, padding_candidate=""):
    method chunk (line 762) | def chunk(self, chunks: int) -> list["DataProto"]:
    method split (line 803) | def split(self, split_size: int) -> list["DataProto"]:
    method concat (line 815) | def concat(data: list["DataProto"]) -> "DataProto":
    method reorder (line 861) | def reorder(self, indices):
    method repeat (line 869) | def repeat(self, repeat_times=2, interleave=True):
    method unfold_column_chunks (line 913) | def unfold_column_chunks(self, n_split: int, split_keys: Optional[list...
    method sample_level_repeat (line 952) | def sample_level_repeat(self, repeat_times):
    method to_tensordict (line 1000) | def to_tensordict(self) -> TensorDict:
    method get_data_info (line 1024) | def get_data_info(self) -> str:
    method _get_type_info (line 1051) | def _get_type_info(self, value):

FILE: src/lmflow/utils/test_utils.py
  function compare_model (line 7) | def compare_model(

FILE: src/lmflow/utils/versioning.py
  function get_python_version (line 13) | def get_python_version():
  function _is_package_available (line 17) | def _is_package_available(package_name: str, skippable: bool = False):
  function _is_packages_available (line 33) | def _is_packages_available(packages: Union[List[str], List[Tuple[str, bo...
  function is_package_version_at_least (line 42) | def is_package_version_at_least(package_name, min_version):
  function is_gradio_available (line 52) | def is_gradio_available():
  function is_ray_available (line 56) | def is_ray_available():
  function is_vllm_available (line 60) | def is_vllm_available():
  function is_sglang_available (line 64) | def is_sglang_available():
  function is_flash_attn_available (line 68) | def is_flash_attn_available():
  function is_flask_available (line 72) | def is_flask_available():
  function is_trl_available (line 76) | def is_trl_available():
  function is_multimodal_available (line 80) | def is_multimodal_available():
  function is_deepspeed_available (line 84) | def is_deepspeed_available():
  function get_lmflow_dir (line 88) | def get_lmflow_dir(return_src_dir: bool = False) -> Path:

FILE: tests/conftest.py
  function pytest_configure (line 4) | def pytest_configure(config: pytest.Config):

FILE: tests/datasets/conftest.py
  function dataset_inference_conversation (line 8) | def dataset_inference_conversation() -> Dataset:
  function dataset_inference_conversation_batch (line 17) | def dataset_inference_conversation_batch() -> Dataset:

FILE: tests/datasets/test_dataset.py
  class DatasetTest (line 24) | class DatasetTest(unittest.TestCase):
    method test_init (line 25) | def test_init(self):
    method test_create_from_dict (line 36) | def test_create_from_dict(self):
    method test_create_from_dict_bad_type (line 47) | def test_create_from_dict_bad_type(self):

FILE: tests/models/test_auto_model.py
  class AutoModelTest (line 12) | class AutoModelTest(unittest.TestCase):
    method test_get_decoder_model (line 13) | def test_get_decoder_model(self):
    method test_get_unsupported_model (line 36) | def test_get_unsupported_model(self):

FILE: tests/models/test_hf_decoder_model.py
  function make_gt_from_conversation_ids (line 286) | def make_gt_from_conversation_ids(conversation_ids):
  function make_gt_from_conversation_ids_batch (line 299) | def make_gt_from_conversation_ids_batch(batched_conversation_ids):
  class HFDecoderModelTest (line 309) | class HFDecoderModelTest(unittest.TestCase):
    method _test_tokenize (line 311) | def _test_tokenize(
    method test_tokenize_text_only (line 341) | def test_tokenize_text_only(self):
    method test_tokenize_text_only_multiple (line 363) | def test_tokenize_text_only_multiple(self):
    method test_tokenize_text2text (line 384) | def test_tokenize_text2text(self):
    method test_tokenize_conversation (line 407) | def test_tokenize_conversation(self):
    method test_tokenize_conversation_multiple (line 512) | def test_tokenize_conversation_multiple(self):
    method test_encode (line 629) | def test_encode(self):
    method test_decode (line 640) | def test_decode(self):
    method test_inference (line 651) | def test_inference(self):

FILE: tests/models/test_tool_inferencer.py
  class ToolInferencerTest (line 15) | class ToolInferencerTest(unittest.TestCase):
    method set_up (line 16) | def set_up(self):
    method test_code_exec_1 (line 22) | def test_code_exec_1(self, code=CODE_1, expected_output=RES_1):
    method test_code_exec_2 (line 26) | def test_code_exec_2(self, code=CODE_2):

FILE: tests/pipeline/test_auto_pipeline.py
  class AutoPipelineTest (line 12) | class AutoPipelineTest(unittest.TestCase):
    method test_get_evaluator_pipeline (line 13) | def test_get_evaluator_pipeline(self):
    method test_get_finetuner_pipeline (line 21) | def test_get_finetuner_pipeline(self):
    method test_get_inferencer_pipeline (line 29) | def test_get_inferencer_pipeline(self):
    method test_get_unsupported_pipeline (line 37) | def test_get_unsupported_pipeline(self):

FILE: tests/pipeline/test_finetuner_distributed_loss.py
  class AccelerateBackend (line 123) | class AccelerateBackend(Enum):
  class DeepSpeedZeroStage (line 128) | class DeepSpeedZeroStage(Enum):
  class PeftMethod (line 132) | class PeftMethod(Enum):
  class TestDtype (line 139) | class TestDtype(Enum):
  class TestFinetunerBase (line 158) | class TestFinetunerBase(TestCasePlus):
    method setUp (line 161) | def setUp(self):
    method _make_cmd (line 167) | def _make_cmd(self, run_name: str, args: list[list[str]]) -> list[str]:
    method _log_std (line 185) | def _log_std(self, res: _RunOutput):
    method _load_trainer_state (line 194) | def _load_trainer_state(self, output_dir: str) -> dict:
    method _run_with_accelerate (line 199) | def _run_with_accelerate(
    method _run_with_deepspeed (line 234) | def _run_with_deepspeed(
    method _compare_loss (line 269) | def _compare_loss(self, trainer_state1: dict, trainer_state2: dict):
    method test_loss_accelerate_dsz3_vs_fsdp (line 285) | def test_loss_accelerate_dsz3_vs_fsdp(self):
    method test_loss_deepspeed_z3_vs_accelerate_dsz3 (line 298) | def test_loss_deepspeed_z3_vs_accelerate_dsz3(self):
  class TestFinetunerLora (line 308) | class TestFinetunerLora(TestFinetunerBase):
    method test_loss_accelerate_dsz3_vs_fsdp (line 313) | def test_loss_accelerate_dsz3_vs_fsdp(self):
    method test_loss_deepspeed_z3_vs_accelerate_dsz3 (line 330) | def test_loss_deepspeed_z3_vs_accelerate_dsz3(self):
  class TestFinetunerQlora (line 344) | class TestFinetunerQlora(TestFinetunerBase):
    method test_loss_accelerate_dsz3_vs_fsdp (line 351) | def test_loss_accelerate_dsz3_vs_fsdp(self):
    method test_loss_deepspeed_z3_vs_accelerate_dsz3 (line 370) | def test_loss_deepspeed_z3_vs_accelerate_dsz3(self):
  class TestFinetunerCustomOptim (line 386) | class TestFinetunerCustomOptim(TestFinetunerBase):
    method test_loss_accelerate_dsz3_vs_fsdp (line 391) | def test_loss_accelerate_dsz3_vs_fsdp(self):
    method test_loss_deepspeed_z3_vs_accelerate_dsz3 (line 404) | def test_loss_deepspeed_z3_vs_accelerate_dsz3(self):

FILE: tests/pipeline/test_memory_safe_vllm_inferencer.py
  class MemorySafeVLLMInferencerTest (line 40) | class MemorySafeVLLMInferencerTest:
    method test_init (line 41) | def test_init(self):
    method test_inference (line 51) | def test_inference(self):
    method test_inference_detokenize (line 64) | def test_inference_detokenize(self):
    method summary (line 82) | def summary(self):

FILE: tests/pipeline/test_sglang_infernecer.py
  function sglang_test_model_args (line 14) | def sglang_test_model_args() -> ModelArguments:
  function sglang_test_inferencer_args (line 19) | def sglang_test_inferencer_args() -> InferencerArguments:
  function test_sglang_inferencer (line 30) | def test_sglang_inferencer(

FILE: tests/utils/test_conversation_formatter.py
  class StringFormatterTest (line 6) | class StringFormatterTest(unittest.TestCase):
    method test_format_string_component (line 7) | def test_format_string_component(self):

FILE: tests/utils/test_conversation_template.py
  class EmptyConversationTemplateTest (line 103) | class EmptyConversationTemplateTest(unittest.TestCase):
    method setUp (line 104) | def setUp(self):
    method test_encode_conversation_singleturn_llama2 (line 109) | def test_encode_conversation_singleturn_llama2(self):
    method test_encode_conversation_multiturn_llama2 (line 115) | def test_encode_conversation_multiturn_llama2(self):
  class Llama2ConversationTemplateTest (line 122) | class Llama2ConversationTemplateTest(unittest.TestCase):
    method setUp (line 123) | def setUp(self):
    method test_encode_conversation_singleturn (line 128) | def test_encode_conversation_singleturn(self):
    method test_encode_conversation_multiturn (line 137) | def test_encode_conversation_multiturn(self):
  class Qwen2ConversationTemplateTest (line 147) | class Qwen2ConversationTemplateTest(unittest.TestCase):
    method setUp (line 148) | def setUp(self):
    method test_encode_conversation_singleturn (line 153) | def test_encode_conversation_singleturn(self):
    method test_encode_conversation_multiturn (line 162) | def test_encode_conversation_multiturn(self):

FILE: tests/utils/test_data_utils.py
  class DataUtilsTest (line 56) | class DataUtilsTest(unittest.TestCase):
    method test_load_data (line 57) | def test_load_data(self):
    method test_batchlize (line 70) | def test_batchlize(self):
    method test_answer_extraction (line 80) | def test_answer_extraction(self):
Condensed preview — 281 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (2,971K chars).
[
  {
    "path": ".gitattributes",
    "chars": 238,
    "preview": "*.html linguist-detectable=false\n*.js linguist-detectable=false\n*.ipynb linguist-detectable=false\n*RAFT.pdf filter=lfs d"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/api-feedback.md",
    "chars": 142,
    "preview": "---\nname: API Feedback\nabout: Provide feedback regarding the current design of the API.\ntitle: \"[API Design]\"\nlabels: ''"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/blank-template.md",
    "chars": 87,
    "preview": "---\nname: Blank Template\nabout: Other issues\ntitle: ''\nlabels: ''\nassignees: ''\n\n---\n\n\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/bug-report.md",
    "chars": 840,
    "preview": "---\nname: Bug Report\nabout: Create a report to help us improve\ntitle: \"[BUG]\"\nlabels: bug\nassignees: ''\n\n---\n\n**Describe"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/feature-request.md",
    "chars": 608,
    "preview": "---\nname: Feature Request\nabout: Suggest an idea for this project\ntitle: \"[New Feature]\"\nlabels: ''\nassignees: ''\n\n---\n\n"
  },
  {
    "path": ".github/workflows/documentation.yaml",
    "chars": 679,
    "preview": "name: Docs\non: [push, pull_request, workflow_dispatch]\njobs:\n  docs:\n    runs-on: ubuntu-latest\n    steps:\n      - uses:"
  },
  {
    "path": ".gitignore",
    "chars": 1999,
    "preview": "# Initially taken from Github's Python gitignore file\n\n# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$"
  },
  {
    "path": ".pre-commit-config.yaml",
    "chars": 227,
    "preview": "repos:\n  - repo: https://github.com/astral-sh/ruff-pre-commit\n    rev: \"v0.11.4\"\n    hooks:\n      - id: ruff\n        arg"
  },
  {
    "path": "CODE_OF_CONDUCT.md",
    "chars": 5208,
    "preview": "# Contributor Covenant Code of Conduct\n\n## Our Pledge\n\nWe as members, contributors, and leaders pledge to make participa"
  },
  {
    "path": "CONTRIBUTING.md",
    "chars": 1204,
    "preview": "# LMFlow\n\nWe welcome contributions from the open-source community with open arms! We value and appreciate all types of p"
  },
  {
    "path": "LICENSE",
    "chars": 11356,
    "preview": "                                 Apache License\n                           Version 2.0, January 2004\n                   "
  },
  {
    "path": "README.md",
    "chars": 22685,
    "preview": "<p align=\"center\" width=\"50%\">\n<img src=\"docs/assets/logo.png\" alt=\"LMFlow\" style=\"width: 50%; min-width: 200px; display"
  },
  {
    "path": "configs/accelerate_dsz0_config.yaml",
    "chars": 433,
    "preview": "compute_environment: LOCAL_MACHINE\ndebug: false\ndeepspeed_config:\n  gradient_accumulation_steps: 16\n  zero3_init_flag: f"
  },
  {
    "path": "configs/accelerate_dsz2_config.yaml",
    "chars": 460,
    "preview": "compute_environment: LOCAL_MACHINE\ndebug: false\ndeepspeed_config:\n  offload_optimizer_device: none\n  offload_param_devic"
  },
  {
    "path": "configs/accelerate_dsz3_config.yaml",
    "chars": 531,
    "preview": "compute_environment: LOCAL_MACHINE\ndebug: false\ndeepspeed_config:\n  deepspeed_multinode_launcher: standard\n  offload_opt"
  },
  {
    "path": "configs/accelerate_fsdp_config.yaml",
    "chars": 769,
    "preview": "compute_environment: LOCAL_MACHINE\ndebug: false\ndistributed_type: FSDP\n\nfsdp_config:\n  fsdp_auto_wrap_policy: TRANSFORME"
  },
  {
    "path": "configs/accelerate_singlegpu_config.yaml",
    "chars": 687,
    "preview": "compute_environment: LOCAL_MACHINE\ndebug: false\ndistributed_type: 'NO'\n\nfsdp_config:\n  fsdp_auto_wrap_policy: SIZE_BASED"
  },
  {
    "path": "configs/archive/accelerate_multigpu_config.yaml",
    "chars": 370,
    "preview": "compute_environment: LOCAL_MACHINE\ndistributed_type: MULTI_GPU\ndowncast_bf16: 'no'\ndynamo_config:\n  dynamo_backend: INDU"
  },
  {
    "path": "configs/archive/accelerate_singlegpu_config.yaml",
    "chars": 340,
    "preview": "compute_environment: LOCAL_MACHINE\ndistributed_type: 'NO'\ndowncast_bf16: 'no'\ndynamo_config:\n  dynamo_backend: INDUCTOR\n"
  },
  {
    "path": "configs/archive/ds_config_chatbot.json",
    "chars": 343,
    "preview": "{\n    \"fp16\": {\n        \"enabled\": false\n    },\n    \"bf16\": {\n        \"enabled\": true\n    },\n    \"comms_logger\": {\n     "
  },
  {
    "path": "configs/archive/ds_config_eval.json",
    "chars": 200,
    "preview": "{\n    \"fp16\": {\n        \"enabled\": false\n    },\n    \"bf16\": {\n        \"enabled\": false\n    },\n    \"steps_per_print\": 200"
  },
  {
    "path": "configs/archive/ds_config_multimodal.json",
    "chars": 556,
    "preview": "{\n    \"fp16\": {\n        \"enabled\": \"auto\",\n        \"loss_scale\": 0,\n        \"loss_scale_window\": 1000,\n        \"initial_"
  },
  {
    "path": "configs/archive/ds_config_vis_chatbot.json",
    "chars": 343,
    "preview": "{\n    \"fp16\": {\n        \"enabled\": false\n    },\n    \"bf16\": {\n        \"enabled\": false\n    },\n    \"comms_logger\": {\n    "
  },
  {
    "path": "configs/deepspeed/zero0_no_offload.json",
    "chars": 731,
    "preview": "{\n    \"fp16\": {\n        \"enabled\": \"auto\",\n        \"loss_scale\": 0,\n        \"loss_scale_window\": 1000,\n        \"initial_"
  },
  {
    "path": "configs/deepspeed/zero2.json",
    "chars": 832,
    "preview": "{\n    \"fp16\": {\n        \"enabled\": \"auto\",\n        \"loss_scale\": 0,\n        \"loss_scale_window\": 1000,\n        \"initial_"
  },
  {
    "path": "configs/deepspeed/zero2_no_offload.json",
    "chars": 731,
    "preview": "{\n    \"fp16\": {\n        \"enabled\": \"auto\",\n        \"loss_scale\": 0,\n        \"loss_scale_window\": 1000,\n        \"initial_"
  },
  {
    "path": "configs/deepspeed/zero3.json",
    "chars": 974,
    "preview": "{\n    \"fp16\": {\n        \"enabled\": \"auto\",\n        \"loss_scale\": 0,\n        \"loss_scale_window\": 1000,\n        \"initial_"
  },
  {
    "path": "configs/deepspeed/zero3_for_eval.json",
    "chars": 789,
    "preview": "{\n    \"bf16\": {\n        \"enabled\": true\n    },\n    \"zero_optimization\": {\n        \"stage\": 3,\n        \"offload_optimizer"
  },
  {
    "path": "configs/deepspeed/zero3_no_offload.json",
    "chars": 904,
    "preview": "{\n    \"fp16\": {\n        \"enabled\": \"auto\",\n        \"loss_scale\": 0,\n        \"loss_scale_window\": 1000,\n        \"initial_"
  },
  {
    "path": "configs/iterative_dpo.yaml",
    "chars": 2035,
    "preview": "# general\n## model\nmodel_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct\nreference_model_name_or_path: meta-llama/Meta"
  },
  {
    "path": "contrib/README.md",
    "chars": 2097,
    "preview": "# Contributing to LMFlow\n\nThanks for your interest in LMFlow! Our LMFlow team appreciate contributions in any form:\n\n  *"
  },
  {
    "path": "contrib/langchain/README.md",
    "chars": 1342,
    "preview": "## Langchain\n\n### Setup\n\n```\npip install langchain\npip install langchain-openai langchain-anthropic langchain-google-gen"
  },
  {
    "path": "contrib/langchain/retrieval_chatbot.py",
    "chars": 7996,
    "preview": "import argparse\nimport logging\nimport os\nimport re\nfrom pathlib import Path\n\nfrom langchain_anthropic import ChatAnthrop"
  },
  {
    "path": "contrib/long-context/hf_sft_full_finetune.sh",
    "chars": 895,
    "preview": "#!/bin/bash\n# accelerate launch --main_process_port 0 ...\n\n# Finetune\npython sft_summarizer.py    \\\n    --model_name_or_"
  },
  {
    "path": "contrib/long-context/hf_sft_lora_flashattn.sh",
    "chars": 984,
    "preview": "#!/bin/bash\n# accelerate launch --main_process_port 0 ...\n\n# Finetunes\npython sft_summarizer.py    \\\n    --model_name_or"
  },
  {
    "path": "contrib/long-context/sft_summarizer.py",
    "chars": 5369,
    "preview": "#!/usr/bin/env python\nimport logging\nimport os\nfrom dataclasses import dataclass, field\nfrom typing import Optional\n\nimp"
  },
  {
    "path": "contrib/rlhflow/reward_modeling.py",
    "chars": 6048,
    "preview": "import os\nimport sys\n\nsys.path.remove(os.path.abspath(os.path.dirname(sys.argv[0])))\nimport numpy as np\nimport torch\nimp"
  },
  {
    "path": "contrib/rlhflow/run_reward_modeling.sh",
    "chars": 1355,
    "preview": "#!/bin/bash\n# Please run this script under ${project_id} in project directory of\n#   https://github.com/shizhediao/llm-f"
  },
  {
    "path": "contrib/text2image/README.md",
    "chars": 2505,
    "preview": "# Fine-tuning Text2Img\n\nHere is a fork function for fine-tuning text2image diffusion model based on diffusers, under the"
  },
  {
    "path": "contrib/text2image/accelerate_t2i_config.yaml",
    "chars": 346,
    "preview": "compute_environment: LOCAL_MACHINE\ndebug: false\ndistributed_type: MULTI_GPU\ndowncast_bf16: 'no'\nenable_cpu_affinity: fal"
  },
  {
    "path": "contrib/text2image/diffuser_args.py",
    "chars": 5452,
    "preview": "import os\nfrom dataclasses import dataclass, field\nfrom typing import Optional\n\nfrom lmflow.args import DatasetArguments"
  },
  {
    "path": "contrib/text2image/diffuser_finetuner.py",
    "chars": 12713,
    "preview": "import copy\nimport gc\nimport json\nimport logging\nimport os\n\nimport torch\nimport torch.nn.functional as F\nimport wandb\nfr"
  },
  {
    "path": "contrib/text2image/finetune_t2i.py",
    "chars": 3075,
    "preview": "import os\n\nos.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\nos.environ[\"WANDB_MODE\"] = \"offline\"\nimport gc\nimport shutil\nf"
  },
  {
    "path": "contrib/text2image/finetune_t2i.sh",
    "chars": 1882,
    "preview": "# Parses arguments\nmodel_name_or_path=stabilityai/stable-diffusion-2-1\nmodel_type=\"unet\"\ndataset_path=data/example\noutpu"
  },
  {
    "path": "contrib/text2image/requirements.txt",
    "chars": 17,
    "preview": "diffusers>=0.29.2"
  },
  {
    "path": "contrib/text2image/t2i_dataset.py",
    "chars": 4108,
    "preview": "#!/usr/bin/env python\n\n\"\"\"This Python code defines a class T2I Dataset.\"\"\"\n\nimport json\nimport logging\nimport os.path as"
  },
  {
    "path": "contrib/tool-finetune/README.md",
    "chars": 2366,
    "preview": "## Function-call Finetune\n\n### Pip dependency\n\n```\nbitsandbytes==0.40.0\ndeepspeed==0.12.0\nflash-attn==2.5.7\npeft==0.10.0"
  },
  {
    "path": "contrib/tool-finetune/function_call_finetune.py",
    "chars": 11253,
    "preview": "import os\nimport sys\n\nsys.path.remove(os.path.abspath(os.path.dirname(sys.argv[0])))\nimport hashlib\nimport logging\nfrom "
  },
  {
    "path": "contrib/tool-finetune/run_function_call_finetune.sh",
    "chars": 1953,
    "preview": "#!/bin/bash\n# Please run this script under ${project_id} in project directory of\n#   https://github.com/shizhediao/llm-f"
  },
  {
    "path": "docs/dev_notes/finetuning.mmd",
    "chars": 1944,
    "preview": "sequenceDiagram\n    participant User\n    participant Finetuner as LMFlow Finetuner\n    participant Model as LMFlow Model"
  },
  {
    "path": "docs/readme/Position_Interpolation.md",
    "chars": 1207,
    "preview": "# Position Interpolation \nNow LMFlow supports the latest Linear & NTK (Neural Kernel theory) scaling techniques for LLaM"
  },
  {
    "path": "docs/readme/README_es.md",
    "chars": 15022,
    "preview": "<p align=\"center\" width=\"100%\">\n<img src=\"../docs/assets/logo.png\" alt=\"LMFlow\" style=\"width: 100%; min-width: 300px; di"
  },
  {
    "path": "docs/readme/README_hindi.md",
    "chars": 15190,
    "preview": "<p align=\"center\" width=\"100%\">\n<img src=\"../docs/assets/logo.png\" alt=\"LMFlow\" style=\"width: 100%; min-width: 300px; di"
  },
  {
    "path": "docs/readme/README_jp.md",
    "chars": 12087,
    "preview": "<p align=\"center\" width=\"100%\">\n<img src=\"../docs/assets/logo.png\" alt=\"LMFlow\" style=\"width: 100%; min-width: 300px; di"
  },
  {
    "path": "docs/readme/README_ko.md",
    "chars": 12349,
    "preview": "<p align=\"center\" width=\"100%\">\n<img src=\"../docs/assets/logo.png\" alt=\"LMFlow\" style=\"width: 100%; min-width: 300px; di"
  },
  {
    "path": "docs/readme/README_zh-hans.md",
    "chars": 11133,
    "preview": "<p align=\"center\" width=\"100%\">\n<img src=\"../docs/assets/logo.png\" alt=\"LMFlow\" style=\"width: 100%; min-width: 300px; di"
  },
  {
    "path": "docs/readme/flash_attn2.md",
    "chars": 2053,
    "preview": "# FlashAttention-2\nWe're thrilled to announce that LMFlow now supports training and inference using **FlashAttention-2**"
  },
  {
    "path": "docs/readme/multi_node.md",
    "chars": 4318,
    "preview": "# Multi-node Training with LMFlow\n\n### Environment setup\n\nIf NFS is available, we only have to setup this once in the sh"
  },
  {
    "path": "docs/requirements.txt",
    "chars": 87,
    "preview": "sphinx\npydata-sphinx-theme\nsphinx_design\nmyst-parser\nsphinx-autoapi\nmatplotlib\nnumpydoc"
  },
  {
    "path": "docs/source/_static/check_before_after_lora_tuning.jsonl",
    "chars": 965957,
    "preview": "{\"id\": 0, \"instruction\": \"The sentence you are given might be too wordy, complicated, or unclear. Rewrite the sentence a"
  },
  {
    "path": "docs/source/about/authors.md",
    "chars": 101,
    "preview": "# Contributors\n\n\nShizhe Diao, Rui Pan, Hanze Dong, Ka Shun Shum, Jipeng Zhang, Wei Xiong, Tong Zhang\n"
  },
  {
    "path": "docs/source/about/changelog.md",
    "chars": 253,
    "preview": "# Changelog\n\n\n## Version 0.0.1 (Mar 28, 2023)\n\nThe first public version. \n\nTask tuning, instruction tuning, on user defi"
  },
  {
    "path": "docs/source/about/index.md",
    "chars": 92,
    "preview": "# About\n\n\n```{toctree}\n:maxdepth: 2\n\nchangelog\n```\n\n\n```{toctree}\n:maxdepth: 2\n\nauthors\n```\n"
  },
  {
    "path": "docs/source/blogs/benchmark.md",
    "chars": 25539,
    "preview": "# LMFlow Benchmark: An Automatic Evaluation Framework for Open-Source LLMs\n\nMay 9, 2023\n\n\n## Introduction\n\nEvaluation of"
  },
  {
    "path": "docs/source/blogs/index.md",
    "chars": 63,
    "preview": "# Blogs\n\n## 2023\n\n\n```{toctree}\n:maxdepth: 1\n\nbenchmark\n```\n\n\n\n"
  },
  {
    "path": "docs/source/conf.py",
    "chars": 2430,
    "preview": "# Configuration file for the Sphinx documentation builder.\n#\n# For the full list of built-in configuration values, see t"
  },
  {
    "path": "docs/source/examples/DATASETS.md",
    "chars": 18156,
    "preview": "# Dataset\n\n- [Dataset](#dataset)\n  - [Dataset Format in General](#dataset-format-in-general)\n  - [Supported Dataset and "
  },
  {
    "path": "docs/source/examples/TASK_GUIDE.md",
    "chars": 3749,
    "preview": "# LMFlow Benchmark Guide\n\nWe support two ways to add evaluation settings in our repo, `NLL Task Setting` and `LM-Evaluat"
  },
  {
    "path": "docs/source/examples/checkpoints.md",
    "chars": 1399,
    "preview": "# Checkpoints\n\nIn general, you can directly load from checkpoints by using `--model_name_or_path`. However, the LLaMA ca"
  },
  {
    "path": "docs/source/examples/customize_conversation_template.md",
    "chars": 4239,
    "preview": "# Customize Conversation Template\n\n> For beginners: Why template?   \n> Almost all LLMs today do a simple job - predict t"
  },
  {
    "path": "docs/source/examples/finetuning.md",
    "chars": 2973,
    "preview": "# Finetuning \n\n## Full Parameters\n\nFull training updates all the parameters to finetune a language model.\nHere is an exa"
  },
  {
    "path": "docs/source/examples/index.md",
    "chars": 529,
    "preview": "# Examples\n\nWe provide several examples to show how to use our package in your problem.\n\n## Data preparation\n\n```{toctre"
  },
  {
    "path": "docs/source/examples/medical_finetune.md",
    "chars": 1699,
    "preview": "# Finetune\n\n```python\nimport sys\n\nfrom transformers import HfArgumentParser\n\nfrom lmflow.args import (\n    ModelArgument"
  },
  {
    "path": "docs/source/examples/raft.md",
    "chars": 30959,
    "preview": "# RAFT\n## 1 Introduction\n\nWe remark that the example is built on LLaMA whose [licensed](https://docs.google.com/forms/d/"
  },
  {
    "path": "docs/source/examples/reward_modeling.md",
    "chars": 9921,
    "preview": "# Reward Modeling\n\n## Introduction\n\nReinforcement Learning from Human Feedback (RLHF) requires a reward function to guid"
  },
  {
    "path": "docs/source/examples/supported_conversation_template.md",
    "chars": 48336,
    "preview": "# Supported Conversation Template\n\n- [Supported Conversation Template](#supported-conversation-template)\n  - [ChatGLM-3]"
  },
  {
    "path": "docs/source/index.md",
    "chars": 10112,
    "preview": "<img src=\"_static/logo.png\" alt=\"LMFlow\" style=\"width: 100%; min-width: 300px; display: block; margin: auto; background-"
  },
  {
    "path": "examples/benchmarking.py",
    "chars": 10801,
    "preview": "#!/usr/bin/env python\n# Copyright 2023 Statistics and Machine Learning Research Group at HKUST. All rights reserved.\n\"\"\""
  },
  {
    "path": "examples/chatbot.py",
    "chars": 4495,
    "preview": "#!/usr/bin/env python\n# Copyright 2023 Statistics and Machine Learning Research Group at HKUST. All rights reserved.\n\"\"\""
  },
  {
    "path": "examples/chatbot_gradio.py",
    "chars": 6179,
    "preview": "#!/usr/bin/env python\n# Copyright 2023 Statistics and Machine Learning Research Group at HKUST. All rights reserved.\n\"\"\""
  },
  {
    "path": "examples/detail_memory.py",
    "chars": 12594,
    "preview": "import sys\nimport time\n\nimport torch\nfrom peft import LoraConfig, TaskType, get_peft_model\nfrom transformers import Auto"
  },
  {
    "path": "examples/dpo_train.py",
    "chars": 1235,
    "preview": "#!/usr/bin/env python\n# @Time    : 7/4/2024 20:31\n# @Author  : Yu Li\n# @Site    :\n# @File    : dpo_train.py\n# 0. imports"
  },
  {
    "path": "examples/dpov2_train.py",
    "chars": 2255,
    "preview": "#!/usr/bin/env python\n# Copyright 2024 Statistics and Machine Learning Research Group. All rights reserved.\nimport copy\n"
  },
  {
    "path": "examples/evaluation.py",
    "chars": 1538,
    "preview": "#!/usr/bin/env python\n# Copyright 2023 Statistics and Machine Learning Research Group at HKUST. All rights reserved.\n\"\"\""
  },
  {
    "path": "examples/finetune.py",
    "chars": 1872,
    "preview": "#!/usr/bin/env python\n# Copyright 2023 Statistics and Machine Learning Research Group at HKUST. All rights reserved.\n\"\"\""
  },
  {
    "path": "examples/finetune_multi_modal.py",
    "chars": 3127,
    "preview": "#!/usr/bin/env python\n# Copyright 2023 Statistics and Machine Learning Research Group at HKUST. All rights reserved.\n# F"
  },
  {
    "path": "examples/inference.py",
    "chars": 2309,
    "preview": "#!/usr/bin/env python\n# Copyright 2023 Statistics and Machine Learning Research Group at HKUST. All rights reserved.\n\"\"\""
  },
  {
    "path": "examples/iterative_dpo_train.py",
    "chars": 2884,
    "preview": "#!/usr/bin/env python\n# Copyright 2024 Statistics and Machine Learning Research Group. All rights reserved.\nimport copy\n"
  },
  {
    "path": "examples/merge_lora.py",
    "chars": 1729,
    "preview": "#!/usr/bin/env python\n# Copyright 2023 Statistics and Machine Learning Research Group at HKUST. All rights reserved.\n\"\"\""
  },
  {
    "path": "examples/multistage_finetune.py",
    "chars": 5408,
    "preview": "#!/usr/bin/env python\n# Copyright 2023 Statistics and Machine Learning Research Group at HKUST. All rights reserved.\n\"\"\""
  },
  {
    "path": "examples/raft_align.py",
    "chars": 4626,
    "preview": "#!/usr/bin/env python\n# Copyright 2023 Statistics and Machine Learning Research Group at HKUST. All rights reserved.\n\"\"\""
  },
  {
    "path": "examples/reward_modeling.py",
    "chars": 1544,
    "preview": "#!/usr/bin/env python\n# Copyright 2024 Statistics and Machine Learning Research Group. All rights reserved.\nimport loggi"
  },
  {
    "path": "examples/rm_inference.py",
    "chars": 1553,
    "preview": "#!/usr/bin/env python\n# Copyright 2024 Statistics and Machine Learning Research Group. All rights reserved.\nimport loggi"
  },
  {
    "path": "examples/sglang_inference.py",
    "chars": 1481,
    "preview": "#!/usr/bin/env python\n# Copyright 2025 Statistics and Machine Learning Research Group. All rights reserved.\nimport loggi"
  },
  {
    "path": "examples/speculative_inference.py",
    "chars": 2304,
    "preview": "import argparse\nimport os\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\n  "
  },
  {
    "path": "examples/tool_inference.py",
    "chars": 1631,
    "preview": "import argparse\nimport os\n\nfrom lmflow.args import DatasetArguments, InferencerArguments, ModelArguments\nfrom lmflow.mod"
  },
  {
    "path": "examples/vis_chatbot.py",
    "chars": 11205,
    "preview": "#!/usr/bin/env python\n# Copyright 2023 Statistics and Machine Learning Research Group at HKUST. All rights reserved.\n\"\"\""
  },
  {
    "path": "examples/vis_chatbot_gradio.py",
    "chars": 11275,
    "preview": "#!/usr/bin/env python\n# Copyright 2023 Statistics and Machine Learning Research Group at HKUST. All rights reserved.\n\"\"\""
  },
  {
    "path": "examples/vllm_inference.py",
    "chars": 1651,
    "preview": "#!/usr/bin/env python\n# Copyright 2024 Statistics and Machine Learning Research Group. All rights reserved.\nimport loggi"
  },
  {
    "path": "experimental/Hymba/README.md",
    "chars": 1349,
    "preview": "# Training Hymba with LMFlow\n\n## Hymba \n[GITHUB](https://github.com/NVlabs/hymba/tree/main)  \nHymba is a family of small"
  },
  {
    "path": "experimental/Hymba/run_finetune_hymba.sh",
    "chars": 2042,
    "preview": "#!/bin/bash\n# Please run this script under ${project_id} in project directory of\n#   https://github.com/shizhediao/llm-f"
  },
  {
    "path": "experimental/LISA-diffusion/README.md",
    "chars": 7575,
    "preview": "## How to introduce LISA into Stable Diffusion?\n\n[LISA](https://arxiv.org/abs/2403.17919) is an efficient fine-tuning al"
  },
  {
    "path": "experimental/LISA-diffusion/diffusion_dpo/train_diffusion_dpo.py",
    "chars": 39588,
    "preview": "#!/usr/bin/env python\n# Copyright 2024 bram-w, The HuggingFace Inc. team. All rights reserved.\n#\n# Licensed under the Ap"
  },
  {
    "path": "experimental/LISA-diffusion/diffusion_dpo/train_diffusion_dpo_lisa.py",
    "chars": 41704,
    "preview": "#!/usr/bin/env python\n# Copyright 2024 bram-w, The HuggingFace Inc. team. All rights reserved.\n#\n# Licensed under the Ap"
  },
  {
    "path": "experimental/LISA-diffusion/instruct_pix2pix/test_instruct_pix2pix.py",
    "chars": 3758,
    "preview": "# Copyright 2024 HuggingFace Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use "
  },
  {
    "path": "experimental/LISA-diffusion/instruct_pix2pix/train_instruct_pix2pix_lisa.py",
    "chars": 47946,
    "preview": "#!/usr/bin/env python\n# Copyright 2024 The HuggingFace Inc. team. All rights reserved.\n#\n# Licensed under the Apache Lic"
  },
  {
    "path": "experimental/LISA-diffusion/latent_consistency_model/train_lcm_distill_sd_wds_lisa.py",
    "chars": 63254,
    "preview": "#!/usr/bin/env python\n# Copyright 2024 The HuggingFace Inc. team. All rights reserved.\n#\n# Licensed under the Apache Lic"
  },
  {
    "path": "experimental/LISA-diffusion/latent_consistency_model/train_lcm_distill_sd_wds_lora.py",
    "chars": 60705,
    "preview": "#!/usr/bin/env python\n# Copyright 2024 The HuggingFace Inc. team. All rights reserved.\n#\n# Licensed under the Apache Lic"
  },
  {
    "path": "experimental/LISA-diffusion/requirement.txt",
    "chars": 106,
    "preview": "accelerate>=0.16.0\ntorchvision\ntransformers>=4.25.1\nftfy\npeft\nwandb\ntensorboard\nJinja2\nwebdataset\ndatasets"
  },
  {
    "path": "experimental/LISA-diffusion/single_lisa.py",
    "chars": 3758,
    "preview": "import numpy as np\nimport torch\n\n\nclass LISADiffusion:\n    def __init__(self, model, rate=None):\n        self.model = mo"
  },
  {
    "path": "experimental/RAFT-diffusion/README.md",
    "chars": 3285,
    "preview": "# RAFT-Diffusion\n\n\nIn this folder, we provide an example to show that how does RAFT work on diffusion models. We will al"
  },
  {
    "path": "experimental/RAFT-diffusion/SD256-RAFT.ipynb",
    "chars": 16928,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"id\": \"YoURrh11fbIc\"\n   },\n   \"source\": [\n    \"# RAFT"
  },
  {
    "path": "experimental/RAFT-diffusion/requirements.txt",
    "chars": 1758,
    "preview": "accelerate==0.18.0\nasttokens==2.2.1\nbackcall==0.2.0\nbitsandbytes==0.37.2\ncertifi==2022.12.7\ncharset-normalizer==3.1.0\ncl"
  },
  {
    "path": "experimental/RAFT-diffusion/train_text_to_image_lora.py",
    "chars": 36417,
    "preview": "# Copyright 2023 The HuggingFace Inc. team. All rights reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the"
  },
  {
    "path": "pyproject.toml",
    "chars": 1458,
    "preview": "[build-system]\nrequires = [\"setuptools >= 64\"]\nbuild-backend = \"setuptools.build_meta\"\n\n# ------------------------------"
  },
  {
    "path": "requirements.txt",
    "chars": 227,
    "preview": "packaging\nnumpy\ndatasets==3.6.0\ntokenizers>=0.13.3\npeft>=0.10.0\ntorch>=2.0.1\nwandb\nsentencepiece\ntransformers>=4.31.0\ncp"
  },
  {
    "path": "scripts/archive/bash.sh",
    "chars": 50,
    "preview": "#!/bin/bash\n#\n# Shell and python scripts goes here"
  },
  {
    "path": "scripts/archive/convert_llama_weights_to_hf.py",
    "chars": 10690,
    "preview": "# Copyright 2022 EleutherAI and The HuggingFace Inc. team. All rights reserved.\n#\n# Licensed under the Apache License, V"
  },
  {
    "path": "scripts/archive/download_model.sh",
    "chars": 5830,
    "preview": "#!/bin/bash\n\nfunction main() {\n    public_server=\"http://lmflow.org:5000\"\n    if [ $# -lt 1 -o \"$1\" = \"-h\" -o \"$1\" = \"--"
  },
  {
    "path": "scripts/archive/export_llama_state_dict_checkpoint.py",
    "chars": 4692,
    "preview": "# Export state dict for downstream inference, such as llama.cpp\n\nimport json\nimport os\n\nimport torch\nfrom peft import Pe"
  },
  {
    "path": "scripts/archive/run_all_benchmark.sh",
    "chars": 1448,
    "preview": "#!/bin/bash\n\nhelp_message=\"./$(basename $0)\"\nhelp_message+=\" --model_name_or_path MODEL_NAME_OR_PATH\"\n\nif [ $# -ge 1 ]; "
  },
  {
    "path": "scripts/archive/run_app.sh",
    "chars": 219,
    "preview": "#!/bin/bash\n\nCUDA_VISIBLE_DEVICES=0 accelerate launch --config_file configs/archive/accelerate_singlegpu_config.yaml ser"
  },
  {
    "path": "scripts/archive/run_benchmark.sh",
    "chars": 596,
    "preview": "#!/bin/bash\n\nif [ \"$1\" == \"-h\" -o \"$1\" == \"--help\" ]; then\n  help_message=\"./$(basename $0)\"\n  help_message+=\" --dataset"
  },
  {
    "path": "scripts/archive/run_chatbot.sh",
    "chars": 564,
    "preview": "#!/bin/bash\n# A simple chatbot script, the memory of the chatbot has a length of maximum\n# model length, e.g. 4k for lla"
  },
  {
    "path": "scripts/archive/run_chatbot_chatglm.sh",
    "chars": 356,
    "preview": "#!/bin/bash\n\nmodel=THUDM/chatglm-6b\nlora_args=\"\"\nif [ $# -ge 1 ]; then\n  model=$1\nfi\nif [ $# -ge 2 ]; then\n  lora_args=\""
  },
  {
    "path": "scripts/archive/run_chatbot_cpu.sh",
    "chars": 330,
    "preview": "#!/bin/bash\n\nmodel=gpt2\nlora_args=\"\"\nif [ $# -ge 1 ]; then\n  model=$1\nfi\nif [ $# -ge 2 ]; then\n  lora_args=\"--lora_model"
  },
  {
    "path": "scripts/archive/run_detail_gpu_memory.sh",
    "chars": 986,
    "preview": "python ./examples/detail_memory.py meta-llama/Llama-2-7b-hf 10  0 0 128 # base\npython ./examples/detail_memory.py meta-l"
  },
  {
    "path": "scripts/archive/run_dpo_align.sh",
    "chars": 1304,
    "preview": "#!/bin/bash\n# Please run this script under ${project_id} in project directory of\n\n# Parses arguments\nmodel_name_or_path="
  },
  {
    "path": "scripts/archive/run_dpov2_align.sh",
    "chars": 1777,
    "preview": "#!/bin/bash\nmodel_name_or_path=meta-llama/Meta-Llama-3-8B-Instruct\ndataset_path=data/iterative-prompt/train\noutput_dir=o"
  },
  {
    "path": "scripts/archive/run_evaluation.sh",
    "chars": 384,
    "preview": "#!/bin/bash\n\nif [ ! -d data/MedQA-USMLE ]; then\n  cd data && ./download.sh MedQA-USMLE && cd -\nfi\n\nCUDA_VISIBLE_DEVICES="
  },
  {
    "path": "scripts/archive/run_evaluation_accelerator.sh",
    "chars": 561,
    "preview": "#!/bin/bash\n\nif [ ! -d data/MedQA-USMLE ]; then\n  cd data && ./download.sh MedQA-USMLE && cd -\nfi\n\nCUDA_VISIBLE_DEVICES="
  },
  {
    "path": "scripts/archive/run_evaluation_with_lora.sh",
    "chars": 676,
    "preview": "#!/bin/bash\n\n# --model_name_or_path specifies the original huggingface model\n# --lora_model_path specifies the model dif"
  },
  {
    "path": "scripts/archive/run_finetune.sh",
    "chars": 1925,
    "preview": "#!/bin/bash\n# Please run this script under ${project_id} in project directory of\n#   https://github.com/shizhediao/llm-f"
  },
  {
    "path": "scripts/archive/run_finetune_with_custom_optim.sh",
    "chars": 10254,
    "preview": "#!/bin/bash\n# Please run this script under ${project_id} in project directory of\n#   https://github.com/shizhediao/llm-f"
  },
  {
    "path": "scripts/archive/run_finetune_with_lisa.sh",
    "chars": 3397,
    "preview": "#!/bin/bash\n# Please run this script under ${project_id} in project directory of\n#   https://github.com/shizhediao/llm-f"
  },
  {
    "path": "scripts/archive/run_finetune_with_lora.sh",
    "chars": 1861,
    "preview": "#!/bin/bash\n# Please run this script under ${project_id} in project directory of\n\n# Parses arguments\nmodel_name_or_path="
  },
  {
    "path": "scripts/archive/run_finetune_with_qlora.sh",
    "chars": 1867,
    "preview": "#!/bin/bash\n# Please run this script under ${project_id} in project directory of\n\n# Parses arguments\nmodel_name_or_path="
  },
  {
    "path": "scripts/archive/run_inference.sh",
    "chars": 444,
    "preview": "#!/bin/bash\n# An interactive inference script without context history, i.e. the chatbot\n# won't have conversation memory"
  },
  {
    "path": "scripts/archive/run_inference_multimodal_model.sh",
    "chars": 392,
    "preview": "#!/bin/bash\n\nmodel=\"Salesforce/blip-image-captioning-base\"\nlora_args=\"\"\nif [ $# -ge 1 ]; then\n  model=$1\nfi\nif [ $# -ge "
  },
  {
    "path": "scripts/archive/run_iterative_dpo.sh",
    "chars": 65,
    "preview": "python examples/iterative_dpo_train.py configs/iterative_dpo.yaml"
  },
  {
    "path": "scripts/archive/run_multistage_finetune.sh",
    "chars": 1306,
    "preview": "#!/bin/bash\n# Please run this script under ${project_id} in project directory of\n\ndeepspeed_args=\"--master_port=11000\"  "
  },
  {
    "path": "scripts/archive/run_raft_align.sh",
    "chars": 1464,
    "preview": "#!/bin/bash\n# Please run this script under project directory.\n\ndeepspeed_args=\"--master_port=11110\"      # Default argum"
  },
  {
    "path": "scripts/archive/run_reward_modeling.sh",
    "chars": 2538,
    "preview": "#!/bin/bash\n# Copyright 2024 Statistics and Machine Learning Research Group. All rights reserved.\n# Parses arguments\nmod"
  },
  {
    "path": "scripts/archive/run_reward_modeling_with_lisa.sh",
    "chars": 2853,
    "preview": "#!/bin/bash\n# Copyright 2024 Statistics and Machine Learning Research Group. All rights reserved.\n# Parses arguments\nmod"
  },
  {
    "path": "scripts/archive/run_reward_modeling_with_lora.sh",
    "chars": 2551,
    "preview": "#!/bin/bash\n# Copyright 2024 Statistics and Machine Learning Research Group. All rights reserved.\n# Parses arguments\nmod"
  },
  {
    "path": "scripts/archive/run_rm_inference.sh",
    "chars": 1840,
    "preview": "#!/bin/bash\n# Copyright 2024 Statistics and Machine Learning Research Group. All rights reserved.\n\n# Parses arguments\nru"
  },
  {
    "path": "scripts/archive/run_tool.sh",
    "chars": 101,
    "preview": "model=\"gorilla-llm/gorilla-7b-hf-delta-v1\"\npython examples/tool_inference.py \\\n    --model ${model} \\"
  },
  {
    "path": "scripts/archive/run_vllm_inference.sh",
    "chars": 1900,
    "preview": "#!/bin/bash\n# Copyright 2024 Statistics and Machine Learning Research Group. All rights reserved.\n\n# Parses arguments\nru"
  },
  {
    "path": "scripts/multimodal/README.md",
    "chars": 1189,
    "preview": "# MultiModal Conversation\n## Download dataset\nWe use the dataset from LLava to present the example of multi-modaltiy tra"
  },
  {
    "path": "scripts/multimodal/run_finetune_multi_modal_stage1.sh",
    "chars": 2326,
    "preview": "#!/bin/bash\n# Please run this script under ${project_id} in project directory of\n#   https://github.com/shizhediao/llm-f"
  },
  {
    "path": "scripts/multimodal/run_finetune_multi_modal_stage2.sh",
    "chars": 3080,
    "preview": "#!/bin/bash\n# Please run this script under ${project_id} in project directory of\n#   https://github.com/shizhediao/llm-f"
  },
  {
    "path": "scripts/multimodal/run_vis_chatbot_blip2.sh",
    "chars": 348,
    "preview": "model=Salesforce/blip2-opt-2.7b\ndeepspeed examples/vis_chatbot.py --model_name_or_path ${model} \\\n                      "
  },
  {
    "path": "scripts/multimodal/run_vis_chatbot_gradio_minigpt4.sh",
    "chars": 1894,
    "preview": "#!/bin/bash\n\nmodel=Salesforce/blip2-flan-t5-xxl\n\n# if [ ! -f output_models/pretrained_minigpt4_7b.pth ]; then\n#   cd out"
  },
  {
    "path": "scripts/multimodal/run_vis_chatbot_llava.sh",
    "chars": 1436,
    "preview": "# only work for gpu mem > 25G; fail to do 4 bit and 8 bit inference.\nmodel_name_or_path=Salesforce/blip2-flan-t5-xxl\nlla"
  },
  {
    "path": "scripts/multimodal/run_vis_chatbot_minigpt4.sh",
    "chars": 1119,
    "preview": "model=Salesforce/blip2-flan-t5-xxl\nllm_model_name_or_path=lmsys/vicuna-7b-v1.3\ndeepspeed_args=\"--master_port=12000 --num"
  },
  {
    "path": "scripts/run_finetune.sh",
    "chars": 1207,
    "preview": "#!/bin/bash\nmodel_name_or_path=meta-llama/Llama-3.2-3B-Instruct\ndataset_path=data/alpaca/train_conversation\nconversation"
  },
  {
    "path": "scripts/run_finetune_with_custom_optim.sh",
    "chars": 7032,
    "preview": "#!/bin/bash\nmodel_name_or_path=meta-llama/Llama-3.2-3B-Instruct\ndataset_path=data/alpaca/train_conversation\nconversation"
  },
  {
    "path": "scripts/run_finetune_with_lisa.sh",
    "chars": 1415,
    "preview": "#!/bin/bash\nmodel_name_or_path=meta-llama/Llama-3.2-3B-Instruct\ndataset_path=data/alpaca/train_conversation\nconversation"
  },
  {
    "path": "scripts/run_finetune_with_lora.sh",
    "chars": 1402,
    "preview": "#!/bin/bash\nmodel_name_or_path=meta-llama/Llama-3.2-3B-Instruct\ndataset_path=data/alpaca/train_conversation\nconversation"
  },
  {
    "path": "scripts/run_finetune_with_qlora.sh",
    "chars": 1449,
    "preview": "#!/bin/bash\nmodel_name_or_path=meta-llama/Llama-3.2-3B-Instruct\ndataset_path=data/alpaca/train_conversation\nconversation"
  },
  {
    "path": "scripts/run_merge_lora.sh",
    "chars": 1004,
    "preview": "#!/bin/bash\n# Parses arguments\nmodel_name_or_path=gpt2\nlora_model_path=output_models/lora\noutput_model_path=output_model"
  },
  {
    "path": "scripts/run_sglang_inference.sh",
    "chars": 454,
    "preview": "python examples/sglang_inference.py \\\n    --model_name_or_path Qwen/Qwen3-4B-Instruct-2507 \\\n    --dataset_path data/alp"
  },
  {
    "path": "scripts/run_unittest.sh",
    "chars": 41,
    "preview": "#!/bin/bash\n\npython -m unittest discover\n"
  },
  {
    "path": "setup.py",
    "chars": 1696,
    "preview": "import os\n\nfrom setuptools import find_packages, setup\n\nfolder = os.path.dirname(__file__)\nversion_path = os.path.join(f"
  },
  {
    "path": "src/lmflow/__init__.py",
    "chars": 558,
    "preview": "from .version import __version__ as internal_version\n\n__version__ = internal_version\n\nfrom transformers.utils import che"
  },
  {
    "path": "src/lmflow/args.py",
    "chars": 55227,
    "preview": "#!/usr/bin/env python\n\"\"\"This script defines dataclasses: ModelArguments and DatasetArguments,\nthat contain the argument"
  },
  {
    "path": "src/lmflow/datasets/__init__.py",
    "chars": 640,
    "preview": "\"\"\"This Python code defines a class Dataset with methods for initializing, loading,\nand manipulating datasets from diffe"
  },
  {
    "path": "src/lmflow/datasets/dataset.py",
    "chars": 18376,
    "preview": "#!/usr/bin/env python\n\"\"\"This Python code defines a class Dataset with methods for initializing, loading,\nand manipulati"
  },
  {
    "path": "src/lmflow/datasets/multi_modal_dataset.py",
    "chars": 10847,
    "preview": "#!/usr/bin/env python\n# FIXME update the doc string.\n\"\"\"This Python code defines a class Multi Modal Dataset.\"\"\"\n\nimport"
  },
  {
    "path": "src/lmflow/models/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "src/lmflow/models/auto_model.py",
    "chars": 914,
    "preview": "#!/usr/bin/env python\n\"\"\"Automatically get correct model type.\"\"\"\n\nfrom lmflow.models.hf_decoder_model import HFDecoderM"
  },
  {
    "path": "src/lmflow/models/base_model.py",
    "chars": 74,
    "preview": "#!/usr/bin/env python\n\"\"\"Base model class.\"\"\"\n\n\nclass BaseModel:\n    pass\n"
  },
  {
    "path": "src/lmflow/models/decoder_model.py",
    "chars": 528,
    "preview": "#!/usr/bin/env python\n\"\"\"A one-line summary of the module or program, terminated by a period.\n\nLeave one blank line.  Th"
  },
  {
    "path": "src/lmflow/models/encoder_decoder_model.py",
    "chars": 535,
    "preview": "#!/usr/bin/env python\n\"\"\"A one-line summary of the module or program, terminated by a period.\n\nLeave one blank line.  Th"
  },
  {
    "path": "src/lmflow/models/hf_decoder_model.py",
    "chars": 28064,
    "preview": "#!/usr/bin/env python\n\"\"\"This is a class called HFDecoderModel which is a wrapper around transformers model and\ntokenize"
  },
  {
    "path": "src/lmflow/models/hf_model_mixin.py",
    "chars": 24672,
    "preview": "#!/usr/bin/env python\n# Copyright 2024 Statistics and Machine Learning Research Group. All rights reserved.\nimport copy\n"
  },
  {
    "path": "src/lmflow/models/hf_text_regression_model.py",
    "chars": 16360,
    "preview": "#!/usr/bin/env python\n# Copyright 2024 Statistics and Machine Learning Research Group. All rights reserved.\nimport copy\n"
  },
  {
    "path": "src/lmflow/models/interfaces/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "src/lmflow/models/interfaces/tunable.py",
    "chars": 68,
    "preview": "#!/usr/bin/env python\n\"\"\"Tunable class\"\"\"\n\n\nclass Tunable:\n    pass\n"
  },
  {
    "path": "src/lmflow/models/regression_model.py",
    "chars": 192,
    "preview": "#!/usr/bin/env python\n\"\"\"General regression model.\"\"\"\n\nfrom lmflow.models.base_model import BaseModel\n\n\nclass Regression"
  },
  {
    "path": "src/lmflow/models/text_regression_model.py",
    "chars": 1244,
    "preview": "#!/usr/bin/env python\n\"\"\"\nA model maps \"text_only\" data to float.\n\"\"\"\n\nfrom lmflow.datasets.dataset import Dataset\nfrom "
  },
  {
    "path": "src/lmflow/models/vision2seq_model.py",
    "chars": 19619,
    "preview": "#!/usr/bin/env python\n# TODO update the doc\n\nfrom typing import Optional, Union\n\nimport torch\nimport torch.nn as nn\nfrom"
  },
  {
    "path": "src/lmflow/models/vision_encoder/__init__.py",
    "chars": 79,
    "preview": "from .clip_encoder import build_vision_tower\n\n__all__ = [\"build_vision_tower\"]\n"
  },
  {
    "path": "src/lmflow/models/vision_encoder/clip_encoder.py",
    "chars": 13546,
    "preview": "import torch\nimport torch.nn as nn\nfrom transformers import CLIPImageProcessor, CLIPVisionConfig, CLIPVisionModel\n\nfrom "
  },
  {
    "path": "src/lmflow/optim/__init__.py",
    "chars": 90,
    "preview": "from .utils import create_customized_optimizer\n\n__all__ = [\"create_customized_optimizer\"]\n"
  },
  {
    "path": "src/lmflow/optim/adabelief.py",
    "chars": 8394,
    "preview": "#!/usr/bin/env python\n\nimport math\n\nimport torch\nfrom torch.optim.optimizer import Optimizer\n\n\nclass AdaBelief(Optimizer"
  },
  {
    "path": "src/lmflow/optim/adabound.py",
    "chars": 5236,
    "preview": "#!/usr/bin/env python\n\nimport math\n\nimport torch\nfrom torch.optim.optimizer import Optimizer\n\n\nclass AdaBound(Optimizer)"
  },
  {
    "path": "src/lmflow/optim/adadelta.py",
    "chars": 1290,
    "preview": "#!/usr/bin/env python\n\nimport torch\nfrom torch.optim.optimizer import Optimizer\n\n\nclass Adadelta(Optimizer):\n    def __i"
  },
  {
    "path": "src/lmflow/optim/adagrad.py",
    "chars": 1008,
    "preview": "#!/usr/bin/env python\n\nimport torch\n\n\nclass AdaGrad(torch.optim.Optimizer):\n    def __init__(self, params, lr=0.001, eps"
  },
  {
    "path": "src/lmflow/optim/adam.py",
    "chars": 1455,
    "preview": "#!/usr/bin/env python\n\nimport torch\nfrom torch.optim.optimizer import Optimizer\n\n\nclass Adam(Optimizer):\n    def __init_"
  },
  {
    "path": "src/lmflow/optim/adamax.py",
    "chars": 2491,
    "preview": "#!/usr/bin/env python\n\nimport torch\nfrom torch.optim.optimizer import Optimizer\n\n\nclass Adamax(Optimizer):\n    def __ini"
  },
  {
    "path": "src/lmflow/optim/adamp.py",
    "chars": 4997,
    "preview": "#!/usr/bin/env python\n\nimport math\n\nimport torch\nfrom torch.optim.optimizer import Optimizer\n\n\nclass AdamP(Optimizer):\n "
  },
  {
    "path": "src/lmflow/optim/adamw_schedule_free.py",
    "chars": 6182,
    "preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the l"
  },
  {
    "path": "src/lmflow/optim/adan.py",
    "chars": 9593,
    "preview": "#!/usr/bin/env python\n\nimport math\n\nimport torch\nfrom torch import Tensor\nfrom torch.optim.optimizer import Optimizer\n\n\n"
  },
  {
    "path": "src/lmflow/optim/dummy.py",
    "chars": 2558,
    "preview": "#!/usr/bin/env python\n\"\"\"Dummy Optimizer.\"\"\"\n\nfrom collections.abc import Iterable\nfrom typing import Callable\n\nimport t"
  },
  {
    "path": "src/lmflow/optim/lamb.py",
    "chars": 4280,
    "preview": "#!/usr/bin/env python\n\nimport math\n\nimport torch\nfrom torch.optim.optimizer import Optimizer\n\n\nclass Lamb(Optimizer):\n  "
  },
  {
    "path": "src/lmflow/optim/lars.py",
    "chars": 4698,
    "preview": "#!/usr/bin/env python\n\nimport torch\nfrom torch.optim.optimizer import Optimizer\n\n\nclass LARS(Optimizer):\n    r\"\"\"Extends"
  },
  {
    "path": "src/lmflow/optim/muon.py",
    "chars": 3753,
    "preview": "#!/usr/bin/env python\nimport math\n\nimport torch\nfrom torch import Tensor\n\n\ndef zeropower_via_newtonschulz5(G: Tensor, st"
  },
  {
    "path": "src/lmflow/optim/nadam.py",
    "chars": 2808,
    "preview": "#!/usr/bin/env python\n\nimport math\n\nimport torch\n\n\nclass NAdam(torch.optim.Optimizer):\n    def __init__(self, params, lr"
  },
  {
    "path": "src/lmflow/optim/novograd.py",
    "chars": 3278,
    "preview": "#!/usr/bin/env python\n\nimport torch\nimport torch.optim as optim\n\n\nclass NovoGrad(optim.Optimizer):\n    def __init__(\n   "
  }
]

// ... and 81 more files (download for full content)

About this extraction

This page contains the full source code of the OptimalScale/LMFlow GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 281 files (2.7 MB), approximately 709.5k tokens, and a symbol index with 850 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!