Copy disabled (too large)
Download .txt
Showing preview only (33,298K chars total). Download the full file to get everything.
Repository: OpenGVLab/VideoChat-Flash
Branch: main
Commit: 2f8e2f578897
Files: 1928
Total size: 31.2 MB
Directory structure:
gitextract_mlwsex56/
├── .gitattributes
├── LICENSE
├── README.md
├── llava-train_videochat/
│ ├── .dockerignore
│ ├── .editorconfig
│ ├── .gitattributes
│ ├── .gitignore
│ ├── LICENSE
│ ├── README.md
│ ├── cog.yaml
│ ├── data/
│ │ ├── ablation_short-long_mix_sft.yaml
│ │ ├── stage1_init_connector_iv1m.yaml
│ │ ├── stage2_short_pretrain_iv6m.yaml
│ │ ├── stage3_short-long_mix_sft.yaml
│ │ └── stage4_highres_postsft.yaml
│ ├── llava/
│ │ ├── __init__.py
│ │ ├── constants.py
│ │ ├── conversation.py
│ │ ├── dist_utils.py
│ │ ├── mm_utils.py
│ │ ├── model/
│ │ │ ├── __init__.py
│ │ │ ├── apply_delta.py
│ │ │ ├── builder.py
│ │ │ ├── consolidate.py
│ │ │ ├── language_model/
│ │ │ │ ├── llava_qwen.py
│ │ │ │ ├── llava_qwen_flash.py
│ │ │ │ └── modeling_qwen2_flash.py
│ │ │ ├── llava_arch.py
│ │ │ ├── make_delta.py
│ │ │ ├── multimodal_encoder/
│ │ │ │ ├── builder.py
│ │ │ │ ├── clip_encoder.py
│ │ │ │ ├── internvideo2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── flash_attention_class.py
│ │ │ │ │ ├── pos_embed.py
│ │ │ │ │ └── vit_scale_clean.py
│ │ │ │ ├── internvideo2_encoder.py
│ │ │ │ ├── siglip_encoder.py
│ │ │ │ ├── umt/
│ │ │ │ │ └── vit.py
│ │ │ │ └── umt_encoder.py
│ │ │ ├── multimodal_projector/
│ │ │ │ ├── builder.py
│ │ │ │ └── tome16_mlp_hd64.py
│ │ │ └── utils.py
│ │ ├── serialize_utils.py
│ │ ├── train/
│ │ │ ├── llava_trainer.py
│ │ │ ├── llava_trainer_eval.py
│ │ │ ├── train.py
│ │ │ └── train_mem.py
│ │ ├── utils.py
│ │ └── video_utils.py
│ ├── pyproject.toml
│ ├── requirements.txt
│ └── scripts/
│ ├── train/
│ │ ├── stage1-init_connector/
│ │ │ ├── stage1_internvideo2_tome16_res224_qwen7b.sh
│ │ │ ├── stage1_umt_tome16_res224_qwen7b.sh
│ │ │ └── stage1_umt_tome16_res448_qwen1_5b.sh
│ │ ├── stage2-visual_pretraining/
│ │ │ ├── stage2_internvideo2_tome16_res224_qwen_7b.sh
│ │ │ ├── stage2_umt_tome16_res224_qwen_7b.sh
│ │ │ └── stage2_umt_tome16_res448_qwen_1_5b.sh
│ │ ├── stage3-video_sft/
│ │ │ ├── stage3_internvideo2_tome16_res224_qwen_7b.sh
│ │ │ ├── stage3_umt_tome16_res224_qwen_7b.sh
│ │ │ └── stage3_umt_tome16_res448_qwen_1_5b.sh
│ │ └── stage4_highres_postft/
│ │ └── stage4_umt_tome16_res448_qwen_7b.sh
│ ├── zero1.json
│ ├── zero2.json
│ ├── zero2_fused_adamw.json
│ ├── zero2_offload.json
│ ├── zero3.json
│ ├── zero3_offload.json
│ └── zero3pp.json
├── lmms-eval_videochat/
│ ├── .gitignore
│ ├── .pre-commit-config.yaml
│ ├── LICENSE
│ ├── README.md
│ ├── docs/
│ │ ├── README.md
│ │ ├── commands.md
│ │ ├── current_tasks.md
│ │ ├── model_guide.md
│ │ ├── run_examples.md
│ │ └── task_guide.md
│ ├── eval_annotations/
│ │ ├── LVBench/
│ │ │ ├── README.md
│ │ │ └── json/
│ │ │ ├── lvbench_clean.json
│ │ │ ├── lvbench_clean_cartoon.json
│ │ │ ├── lvbench_clean_documentary.json
│ │ │ ├── lvbench_clean_live.json
│ │ │ ├── lvbench_clean_selfmedia.json
│ │ │ ├── lvbench_clean_sport.json
│ │ │ └── lvbench_clean_tv.json
│ │ ├── LongVideoBench/
│ │ │ ├── README.md
│ │ │ ├── lvb_test_wo_gt.json
│ │ │ ├── lvb_val.json
│ │ │ ├── test-00000-of-00001.parquet
│ │ │ └── validation-00000-of-00001.parquet
│ │ ├── MLVU_MC/
│ │ │ ├── README.md
│ │ │ └── json/
│ │ │ ├── 1_plotQA.json
│ │ │ ├── 2_needle.json
│ │ │ ├── 3_ego.json
│ │ │ ├── 4_count.json
│ │ │ ├── 5_order.json
│ │ │ ├── 6_anomaly_reco.json
│ │ │ └── 7_topic_reasoning.json
│ │ ├── MVBench/
│ │ │ ├── README.md
│ │ │ └── json/
│ │ │ ├── action_antonym.json
│ │ │ ├── action_count.json
│ │ │ ├── action_localization.json
│ │ │ ├── action_prediction.json
│ │ │ ├── action_sequence.json
│ │ │ ├── character_order.json
│ │ │ ├── counterfactual_inference.json
│ │ │ ├── egocentric_navigation.json
│ │ │ ├── episodic_reasoning.json
│ │ │ ├── fine_grained_action.json
│ │ │ ├── fine_grained_pose.json
│ │ │ ├── moving_attribute.json
│ │ │ ├── moving_count.json
│ │ │ ├── moving_direction.json
│ │ │ ├── object_existence.json
│ │ │ ├── object_interaction.json
│ │ │ ├── object_shuffle.json
│ │ │ ├── scene_transition.json
│ │ │ ├── state_change.json
│ │ │ └── unexpected_action.json
│ │ ├── PerceptionTest/
│ │ │ ├── .gitattributes
│ │ │ └── README.md
│ │ ├── Temporal_Grounding/
│ │ │ ├── README.md
│ │ │ └── json/
│ │ │ └── temporal_grounding_charades.json
│ │ └── Video-MME/
│ │ ├── README.md
│ │ └── videomme/
│ │ └── test-00000-of-00001.parquet
│ ├── lmms_eval/
│ │ ├── __init__.py
│ │ ├── __main__.py
│ │ ├── api/
│ │ │ ├── __init__.py
│ │ │ ├── filter.py
│ │ │ ├── instance.py
│ │ │ ├── metrics.py
│ │ │ ├── model.py
│ │ │ ├── registry.py
│ │ │ ├── samplers.py
│ │ │ └── task.py
│ │ ├── evaluator.py
│ │ ├── filters/
│ │ │ ├── __init__.py
│ │ │ ├── decontamination.py
│ │ │ ├── extraction.py
│ │ │ ├── selection.py
│ │ │ └── transformation.py
│ │ ├── logging_utils.py
│ │ ├── models/
│ │ │ ├── __init__.py
│ │ │ └── videochat_flash.py
│ │ ├── tasks/
│ │ │ ├── __init__.py
│ │ │ ├── _task_utils/
│ │ │ │ ├── file_utils.py
│ │ │ │ ├── gpt_eval_utils.py
│ │ │ │ ├── video_loader.py
│ │ │ │ └── vqa_eval_metric.py
│ │ │ ├── longvideobench/
│ │ │ │ ├── longvideobench_test_v.yaml
│ │ │ │ ├── longvideobench_val_i.yaml
│ │ │ │ ├── longvideobench_val_v.yaml
│ │ │ │ └── utils.py
│ │ │ ├── lvbench/
│ │ │ │ ├── _default_template.yaml
│ │ │ │ ├── lvbench.yaml
│ │ │ │ ├── lvbench_cartoon.yaml
│ │ │ │ ├── lvbench_documentary.yaml
│ │ │ │ ├── lvbench_live.yaml
│ │ │ │ ├── lvbench_selfmedia.yaml
│ │ │ │ ├── lvbench_sport.yaml
│ │ │ │ ├── lvbench_tv.yaml
│ │ │ │ └── utils.py
│ │ │ ├── mlvu_mc/
│ │ │ │ ├── _default_template.yaml
│ │ │ │ ├── mlvu_mc.yaml
│ │ │ │ ├── mlvu_mc_anomaly_reco.yaml
│ │ │ │ ├── mlvu_mc_count.yaml
│ │ │ │ ├── mlvu_mc_ego.yaml
│ │ │ │ ├── mlvu_mc_needle.yaml
│ │ │ │ ├── mlvu_mc_order.yaml
│ │ │ │ ├── mlvu_mc_plotqa.yaml
│ │ │ │ ├── mlvu_mc_topic_reasoning.yaml
│ │ │ │ └── utils.py
│ │ │ ├── mvbench/
│ │ │ │ ├── _default_template.yaml
│ │ │ │ ├── mvbench.yaml
│ │ │ │ ├── mvbench_action_antonym.yaml
│ │ │ │ ├── mvbench_action_count.yaml
│ │ │ │ ├── mvbench_action_localization.yaml
│ │ │ │ ├── mvbench_action_prediction.yaml
│ │ │ │ ├── mvbench_action_sequence.yaml
│ │ │ │ ├── mvbench_character_order.yaml
│ │ │ │ ├── mvbench_counterfactual_inference.yaml
│ │ │ │ ├── mvbench_egocentric_navigation.yaml
│ │ │ │ ├── mvbench_episodic_reasoning.yaml
│ │ │ │ ├── mvbench_fine_grained_action.yaml
│ │ │ │ ├── mvbench_fine_grained_pose.yaml
│ │ │ │ ├── mvbench_moving_attribute.yaml
│ │ │ │ ├── mvbench_moving_count.yaml
│ │ │ │ ├── mvbench_moving_direction.yaml
│ │ │ │ ├── mvbench_object_existence.yaml
│ │ │ │ ├── mvbench_object_interaction.yaml
│ │ │ │ ├── mvbench_object_shuffle.yaml
│ │ │ │ ├── mvbench_scene_transition.yaml
│ │ │ │ ├── mvbench_state_change.yaml
│ │ │ │ ├── mvbench_unexpected_action.yaml
│ │ │ │ └── utils.py
│ │ │ ├── perceptiontest/
│ │ │ │ └── val/
│ │ │ │ ├── _default_template_yaml
│ │ │ │ ├── perceptiontest_mc.yaml
│ │ │ │ └── utils.py
│ │ │ ├── temporal_grounding/
│ │ │ │ ├── _default_template.yaml
│ │ │ │ ├── charades.yaml
│ │ │ │ ├── eval_tvg.py
│ │ │ │ └── utils.py
│ │ │ └── videomme/
│ │ │ ├── utils.py
│ │ │ ├── videomme.yaml
│ │ │ └── videomme_w_subtitle.yaml
│ │ └── utils.py
│ ├── pyproject.toml
│ ├── scripts/
│ │ ├── eval_longvideobench.sh
│ │ ├── eval_lvbench.sh
│ │ ├── eval_mlvu.sh
│ │ ├── eval_mvbench.sh
│ │ ├── eval_perceptiontest_val_mc.sh
│ │ ├── eval_temporal_grounding_chardes.sh
│ │ └── eval_videomme.sh
│ ├── setup.py
│ └── videochat-flash-7B@448_eval_log_videomme.json
├── xtuner-eval_niah/
│ ├── README.md
│ ├── llava/
│ │ ├── __init__.py
│ │ ├── constants.py
│ │ ├── conversation.py
│ │ ├── dist_utils.py
│ │ ├── mm_utils.py
│ │ ├── model/
│ │ │ ├── __init__.py
│ │ │ ├── apply_delta.py
│ │ │ ├── builder.py
│ │ │ ├── consolidate.py
│ │ │ ├── language_model/
│ │ │ │ ├── llava_qwen.py
│ │ │ │ ├── llava_qwen_flash.py
│ │ │ │ └── modeling_qwen2_flash.py
│ │ │ ├── llava_arch.py
│ │ │ ├── make_delta.py
│ │ │ ├── multimodal_encoder/
│ │ │ │ ├── builder.py
│ │ │ │ ├── clip_encoder.py
│ │ │ │ ├── internvideo2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── flash_attention_class.py
│ │ │ │ │ ├── pos_embed.py
│ │ │ │ │ └── vit_scale_clean.py
│ │ │ │ ├── internvideo2_encoder.py
│ │ │ │ ├── siglip_encoder.py
│ │ │ │ ├── umt/
│ │ │ │ │ └── vit.py
│ │ │ │ └── umt_encoder.py
│ │ │ ├── multimodal_projector/
│ │ │ │ ├── builder.py
│ │ │ │ └── tome16_mlp_hd64.py
│ │ │ └── utils.py
│ │ ├── serialize_utils.py
│ │ ├── train/
│ │ │ ├── llava_trainer.py
│ │ │ ├── llava_trainer_eval.py
│ │ │ ├── train.py
│ │ │ └── train_mem.py
│ │ ├── utils.py
│ │ └── video_utils.py
│ ├── longva/
│ │ ├── __init__.py
│ │ ├── constants.py
│ │ ├── conversation.py
│ │ ├── mm_utils.py
│ │ ├── model/
│ │ │ ├── __init__.py
│ │ │ ├── apply_delta.py
│ │ │ ├── builder.py
│ │ │ ├── consolidate.py
│ │ │ ├── language_model/
│ │ │ │ ├── llava_llama.py
│ │ │ │ ├── llava_mistral.py
│ │ │ │ ├── llava_mpt.py
│ │ │ │ ├── llava_qwen.py
│ │ │ │ └── modeling_llama.py
│ │ │ ├── llava_arch.py
│ │ │ ├── make_delta.py
│ │ │ ├── multimodal_encoder/
│ │ │ │ ├── builder.py
│ │ │ │ └── clip_encoder.py
│ │ │ ├── multimodal_projector/
│ │ │ │ ├── builder.py
│ │ │ │ └── pooler_projector.py
│ │ │ ├── multimodal_resampler/
│ │ │ │ ├── builder.py
│ │ │ │ ├── masked_drop.py
│ │ │ │ ├── perceiver.py
│ │ │ │ ├── qformer.py
│ │ │ │ └── spatial_pool.py
│ │ │ └── utils.py
│ │ ├── train/
│ │ │ ├── llama_flash_attn_monkey_patch.py
│ │ │ ├── llava_trainer.py
│ │ │ ├── train.py
│ │ │ ├── train_dpo.py
│ │ │ └── train_mem.py
│ │ └── utils.py
│ ├── niah_requirements.txt
│ ├── tmp/
│ │ └── git_placeholder
│ ├── vision_niah/
│ │ ├── data/
│ │ │ ├── haystack_embeddings/
│ │ │ │ └── git_placeholder
│ │ │ ├── haystack_videos/
│ │ │ │ └── git_placeholder
│ │ │ ├── needle_embeddings/
│ │ │ │ └── git_placeholder
│ │ │ └── source_data/
│ │ │ ├── git_placeholder
│ │ │ └── niah-coco-singlehop_20.json
│ │ ├── data_multi/
│ │ │ ├── needle_embeddings/
│ │ │ │ └── git_placeholder
│ │ │ └── source_data/
│ │ │ ├── git_placeholder
│ │ │ └── niah-coco-multihop-100.json
│ │ ├── flash_eval_xtuner_multi.sh
│ │ ├── flash_eval_xtuner_single.sh
│ │ ├── log/
│ │ │ ├── s1/
│ │ │ │ └── git_placeholder
│ │ │ ├── s2/
│ │ │ │ └── git_placeholder
│ │ │ └── s3/
│ │ │ └── git_placeholder
│ │ ├── longva_eval_xtuner_multi.sh
│ │ ├── longva_eval_xtuner_single.sh
│ │ ├── model_weights/
│ │ │ └── git_placeholder
│ │ ├── multi_eval_vision_niah.py
│ │ ├── multi_produce_needle_embedding.py
│ │ ├── niah_output_multi/
│ │ │ └── git_placeholder
│ │ ├── niah_output_single/
│ │ │ └── git_placeholder
│ │ ├── produce_haystack_embedding.py
│ │ ├── single_eval_vision_niah.py
│ │ └── single_produce_needle_embedding.py
│ └── xtuner/
│ ├── __init__.py
│ ├── _lite/
│ │ ├── __init__.py
│ │ ├── accelerate/
│ │ │ ├── __init__.py
│ │ │ ├── dispatches/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── _attention.py
│ │ │ │ ├── _fused/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── layer_norm.py
│ │ │ │ │ ├── rms_norm.py
│ │ │ │ │ └── rotary.py
│ │ │ │ ├── clip.py
│ │ │ │ ├── internlm2.py
│ │ │ │ ├── llama.py
│ │ │ │ └── qwen2.py
│ │ │ ├── generate.py
│ │ │ ├── lora.py
│ │ │ └── packed.py
│ │ ├── auto.py
│ │ ├── chat/
│ │ │ ├── __init__.py
│ │ │ ├── backends/
│ │ │ │ └── __init__.py
│ │ │ ├── messages/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ └── chat.py
│ │ │ └── templates/
│ │ │ ├── __init__.py
│ │ │ ├── chat.py
│ │ │ └── hybrid.py
│ │ ├── datasets/
│ │ │ ├── __init__.py
│ │ │ ├── cache.py
│ │ │ ├── format.py
│ │ │ ├── llava.py
│ │ │ ├── load.py
│ │ │ ├── pretrain.py
│ │ │ ├── text.py
│ │ │ └── tokenize.py
│ │ ├── modelings/
│ │ │ ├── __init__.py
│ │ │ ├── internlm2/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── configuration_internlm2.py
│ │ │ │ └── modeling_internlm2.py
│ │ │ └── llava/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_internlm2.py
│ │ │ ├── configuration_llava.py
│ │ │ ├── modeling_internlm2.py
│ │ │ ├── modeling_llava.py
│ │ │ └── processing_llava.py
│ │ ├── parallel/
│ │ │ ├── __init__.py
│ │ │ ├── comm.py
│ │ │ ├── fsdp/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── checkpointing.py
│ │ │ │ ├── lazy.py
│ │ │ │ ├── precision.py
│ │ │ │ └── wrap.py
│ │ │ ├── logger.py
│ │ │ ├── plans/
│ │ │ │ └── internlm2.py
│ │ │ ├── sampler.py
│ │ │ ├── sequence/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── attention.py
│ │ │ │ ├── data_collate.py
│ │ │ │ ├── ops.py
│ │ │ │ └── reduce_loss.py
│ │ │ └── setup.py
│ │ └── yunchang/
│ │ ├── __init__.py
│ │ ├── comm/
│ │ │ ├── __init__.py
│ │ │ ├── all_to_all.py
│ │ │ └── extract_local.py
│ │ ├── globals.py
│ │ ├── hybrid/
│ │ │ ├── __init__.py
│ │ │ ├── async_attn_layer.py
│ │ │ ├── attn_layer.py
│ │ │ └── utils.py
│ │ ├── ring/
│ │ │ ├── __init__.py
│ │ │ ├── llama3_flash_attn_varlen.py
│ │ │ ├── ring_flash_attn.py
│ │ │ ├── ring_flash_attn_varlen.py
│ │ │ ├── stripe_flash_attn.py
│ │ │ ├── triton_utils.py
│ │ │ ├── utils.py
│ │ │ ├── zigzag_ring_flash_attn.py
│ │ │ └── zigzag_ring_flash_attn_varlen.py
│ │ └── ulysses/
│ │ ├── __init__.py
│ │ └── attn_layer.py
│ ├── apis/
│ │ ├── __init__.py
│ │ ├── datasets/
│ │ │ ├── __init__.py
│ │ │ ├── alpaca.py
│ │ │ ├── arxiv.py
│ │ │ ├── code_alpaca.py
│ │ │ ├── colorist.py
│ │ │ ├── lawyer.py
│ │ │ ├── medical.py
│ │ │ ├── moss_003_sft.py
│ │ │ ├── oasst1.py
│ │ │ ├── open_orca.py
│ │ │ ├── sql.py
│ │ │ ├── tiny_codes.py
│ │ │ └── wizardlm.py
│ │ ├── model.py
│ │ └── training_args.py
│ ├── configs/
│ │ ├── __init__.py
│ │ ├── baichuan/
│ │ │ ├── baichuan2_13b_base/
│ │ │ │ ├── baichuan2_13b_base_qlora_alpaca_e3.py
│ │ │ │ ├── baichuan2_13b_base_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── baichuan2_13b_base_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── baichuan2_13b_base_qlora_alpaca_zh_e3.py
│ │ │ │ ├── baichuan2_13b_base_qlora_arxiv_gentitle_e3.py
│ │ │ │ ├── baichuan2_13b_base_qlora_code_alpaca_e3.py
│ │ │ │ ├── baichuan2_13b_base_qlora_colorist_e5.py
│ │ │ │ ├── baichuan2_13b_base_qlora_lawyer_e3.py
│ │ │ │ ├── baichuan2_13b_base_qlora_oasst1_512_e3.py
│ │ │ │ ├── baichuan2_13b_base_qlora_oasst1_e3.py
│ │ │ │ ├── baichuan2_13b_base_qlora_open_platypus_e3.py
│ │ │ │ └── baichuan2_13b_base_qlora_sql_e3.py
│ │ │ ├── baichuan2_13b_chat/
│ │ │ │ ├── baichuan2_13b_chat_qlora_alpaca_e3.py
│ │ │ │ ├── baichuan2_13b_chat_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── baichuan2_13b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── baichuan2_13b_chat_qlora_alpaca_zh_e3.py
│ │ │ │ ├── baichuan2_13b_chat_qlora_code_alpaca_e3.py
│ │ │ │ ├── baichuan2_13b_chat_qlora_lawyer_e3.py
│ │ │ │ ├── baichuan2_13b_chat_qlora_oasst1_512_e3.py
│ │ │ │ ├── baichuan2_13b_chat_qlora_oasst1_e3.py
│ │ │ │ └── baichuan2_13b_chat_qlora_open_platypus_e3.py
│ │ │ ├── baichuan2_7b_base/
│ │ │ │ ├── baichuan2_7b_base_qlora_alpaca_e3.py
│ │ │ │ ├── baichuan2_7b_base_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── baichuan2_7b_base_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── baichuan2_7b_base_qlora_alpaca_zh_e3.py
│ │ │ │ ├── baichuan2_7b_base_qlora_arxiv_gentitle_e3.py
│ │ │ │ ├── baichuan2_7b_base_qlora_code_alpaca_e3.py
│ │ │ │ ├── baichuan2_7b_base_qlora_colorist_e5.py
│ │ │ │ ├── baichuan2_7b_base_qlora_lawyer_e3.py
│ │ │ │ ├── baichuan2_7b_base_qlora_oasst1_512_e3.py
│ │ │ │ ├── baichuan2_7b_base_qlora_oasst1_e3.py
│ │ │ │ ├── baichuan2_7b_base_qlora_open_platypus_e3.py
│ │ │ │ └── baichuan2_7b_base_qlora_sql_e3.py
│ │ │ ├── baichuan2_7b_chat/
│ │ │ │ ├── baichuan2_7b_chat_qlora_alpaca_e3.py
│ │ │ │ ├── baichuan2_7b_chat_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── baichuan2_7b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── baichuan2_7b_chat_qlora_alpaca_zh_e3.py
│ │ │ │ ├── baichuan2_7b_chat_qlora_code_alpaca_e3.py
│ │ │ │ ├── baichuan2_7b_chat_qlora_lawyer_e3.py
│ │ │ │ ├── baichuan2_7b_chat_qlora_oasst1_512_e3.py
│ │ │ │ ├── baichuan2_7b_chat_qlora_oasst1_e3.py
│ │ │ │ └── baichuan2_7b_chat_qlora_open_platypus_e3.py
│ │ │ ├── baichuan_13b_base/
│ │ │ │ ├── baichuan_13b_base_qlora_alpaca_e3.py
│ │ │ │ ├── baichuan_13b_base_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── baichuan_13b_base_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── baichuan_13b_base_qlora_alpaca_zh_e3.py
│ │ │ │ ├── baichuan_13b_base_qlora_arxiv_gentitle_e3.py
│ │ │ │ ├── baichuan_13b_base_qlora_code_alpaca_e3.py
│ │ │ │ ├── baichuan_13b_base_qlora_colorist_e5.py
│ │ │ │ ├── baichuan_13b_base_qlora_lawyer_e3.py
│ │ │ │ ├── baichuan_13b_base_qlora_medical_e1.py
│ │ │ │ ├── baichuan_13b_base_qlora_moss_sft_all_e1.py
│ │ │ │ ├── baichuan_13b_base_qlora_moss_sft_all_e2_gpu8.py
│ │ │ │ ├── baichuan_13b_base_qlora_moss_sft_plugins_e1.py
│ │ │ │ ├── baichuan_13b_base_qlora_oasst1_512_e3.py
│ │ │ │ ├── baichuan_13b_base_qlora_oasst1_e3.py
│ │ │ │ ├── baichuan_13b_base_qlora_open_platypus_e3.py
│ │ │ │ ├── baichuan_13b_base_qlora_openorca_e1.py
│ │ │ │ ├── baichuan_13b_base_qlora_sql_e3.py
│ │ │ │ └── baichuan_13b_base_qlora_tiny_codes_e1.py
│ │ │ ├── baichuan_13b_chat/
│ │ │ │ ├── baichuan_13b_chat_qlora_alpaca_e3.py
│ │ │ │ ├── baichuan_13b_chat_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── baichuan_13b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── baichuan_13b_chat_qlora_alpaca_zh_e3.py
│ │ │ │ ├── baichuan_13b_chat_qlora_arxiv_gentitle_e3.py
│ │ │ │ ├── baichuan_13b_chat_qlora_code_alpaca_e3.py
│ │ │ │ ├── baichuan_13b_chat_qlora_colorist_e5.py
│ │ │ │ ├── baichuan_13b_chat_qlora_lawyer_e3.py
│ │ │ │ ├── baichuan_13b_chat_qlora_medical_e1.py
│ │ │ │ ├── baichuan_13b_chat_qlora_oasst1_512_e3.py
│ │ │ │ ├── baichuan_13b_chat_qlora_oasst1_e3.py
│ │ │ │ ├── baichuan_13b_chat_qlora_open_platypus_e3.py
│ │ │ │ ├── baichuan_13b_chat_qlora_openorca_e1.py
│ │ │ │ ├── baichuan_13b_chat_qlora_sql_e3.py
│ │ │ │ └── baichuan_13b_chat_qlora_tiny_codes_e1.py
│ │ │ └── baichuan_7b/
│ │ │ ├── baichuan_7b_qlora_alpaca_e3.py
│ │ │ ├── baichuan_7b_qlora_alpaca_enzh_e3.py
│ │ │ ├── baichuan_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── baichuan_7b_qlora_alpaca_zh_e3.py
│ │ │ ├── baichuan_7b_qlora_arxiv_gentitle_e3.py
│ │ │ ├── baichuan_7b_qlora_code_alpaca_e3.py
│ │ │ ├── baichuan_7b_qlora_colorist_e5.py
│ │ │ ├── baichuan_7b_qlora_lawyer_e3.py
│ │ │ ├── baichuan_7b_qlora_medical_e1.py
│ │ │ ├── baichuan_7b_qlora_moss_sft_all_e1.py
│ │ │ ├── baichuan_7b_qlora_moss_sft_all_e2_gpu8.py
│ │ │ ├── baichuan_7b_qlora_moss_sft_plugins_e1.py
│ │ │ ├── baichuan_7b_qlora_oasst1_512_e3.py
│ │ │ ├── baichuan_7b_qlora_oasst1_e3.py
│ │ │ ├── baichuan_7b_qlora_open_platypus_e3.py
│ │ │ ├── baichuan_7b_qlora_openorca_e1.py
│ │ │ ├── baichuan_7b_qlora_sql_e3.py
│ │ │ └── baichuan_7b_qlora_tiny_codes_e1.py
│ │ ├── chatglm/
│ │ │ ├── chatglm2_6b/
│ │ │ │ ├── chatglm2_6b_qlora_alpaca_e3.py
│ │ │ │ ├── chatglm2_6b_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── chatglm2_6b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── chatglm2_6b_qlora_alpaca_zh_e3.py
│ │ │ │ ├── chatglm2_6b_qlora_arxiv_gentitle_e3.py
│ │ │ │ ├── chatglm2_6b_qlora_code_alpaca_e3.py
│ │ │ │ ├── chatglm2_6b_qlora_colorist_e5.py
│ │ │ │ ├── chatglm2_6b_qlora_lawyer_e3.py
│ │ │ │ ├── chatglm2_6b_qlora_medical_e1.py
│ │ │ │ ├── chatglm2_6b_qlora_oasst1_512_e3.py
│ │ │ │ ├── chatglm2_6b_qlora_oasst1_e3.py
│ │ │ │ ├── chatglm2_6b_qlora_open_platypus_e3.py
│ │ │ │ ├── chatglm2_6b_qlora_openorca_e1.py
│ │ │ │ ├── chatglm2_6b_qlora_sql_e3.py
│ │ │ │ └── chatglm2_6b_qlora_tiny_codes_e1.py
│ │ │ ├── chatglm3_6b/
│ │ │ │ ├── chatglm3_6b_qlora_alpaca_e3.py
│ │ │ │ ├── chatglm3_6b_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── chatglm3_6b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── chatglm3_6b_qlora_alpaca_zh_e3.py
│ │ │ │ ├── chatglm3_6b_qlora_arxiv_gentitle_e3.py
│ │ │ │ ├── chatglm3_6b_qlora_code_alpaca_e3.py
│ │ │ │ ├── chatglm3_6b_qlora_colorist_e5.py
│ │ │ │ ├── chatglm3_6b_qlora_lawyer_e3.py
│ │ │ │ ├── chatglm3_6b_qlora_medical_e1.py
│ │ │ │ ├── chatglm3_6b_qlora_oasst1_512_e3.py
│ │ │ │ ├── chatglm3_6b_qlora_oasst1_e3.py
│ │ │ │ ├── chatglm3_6b_qlora_open_platypus_e3.py
│ │ │ │ ├── chatglm3_6b_qlora_openorca_e1.py
│ │ │ │ ├── chatglm3_6b_qlora_sql_e3.py
│ │ │ │ └── chatglm3_6b_qlora_tiny_codes_e1.py
│ │ │ └── chatglm3_6b_base/
│ │ │ ├── chatglm3_6b_base_qlora_alpaca_e3.py
│ │ │ ├── chatglm3_6b_base_qlora_alpaca_enzh_e3.py
│ │ │ ├── chatglm3_6b_base_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── chatglm3_6b_base_qlora_alpaca_zh_e3.py
│ │ │ ├── chatglm3_6b_base_qlora_arxiv_gentitle_e3.py
│ │ │ ├── chatglm3_6b_base_qlora_code_alpaca_e3.py
│ │ │ ├── chatglm3_6b_base_qlora_colorist_e5.py
│ │ │ ├── chatglm3_6b_base_qlora_lawyer_e3.py
│ │ │ ├── chatglm3_6b_base_qlora_medical_e1.py
│ │ │ ├── chatglm3_6b_base_qlora_oasst1_512_e3.py
│ │ │ ├── chatglm3_6b_base_qlora_oasst1_e3.py
│ │ │ ├── chatglm3_6b_base_qlora_open_platypus_e3.py
│ │ │ ├── chatglm3_6b_base_qlora_openorca_e1.py
│ │ │ ├── chatglm3_6b_base_qlora_sql_e3.py
│ │ │ └── chatglm3_6b_base_qlora_tiny_codes_e1.py
│ │ ├── cohere/
│ │ │ ├── README.md
│ │ │ └── cohere_104b/
│ │ │ └── cohere_100b_128k_sp32.py
│ │ ├── custom_dataset/
│ │ │ ├── pretrain/
│ │ │ │ ├── baichuan/
│ │ │ │ │ ├── baichuan2_13b_base_full_custom_pretrain_e1.py
│ │ │ │ │ └── baichuan2_7b_base_full_custom_pretrain_e1.py
│ │ │ │ ├── chatglm/
│ │ │ │ │ ├── chatglm2_6b_full_custom_pretrain_e1.py
│ │ │ │ │ └── chatglm3_6b_full_custom_pretrain_e1.py
│ │ │ │ ├── deepseek/
│ │ │ │ │ └── deepseek_moe_16b_base_full_custom_pretrain_e1.py
│ │ │ │ ├── gemma/
│ │ │ │ │ ├── gemma_2b_full_custom_pretrain_e1.py
│ │ │ │ │ └── gemma_7b_full_custom_pretrain_e1.py
│ │ │ │ ├── internlm/
│ │ │ │ │ ├── internlm2_1_8b_full_custom_pretrain_e1.py
│ │ │ │ │ ├── internlm2_20b_full_custom_pretrain_e1.py
│ │ │ │ │ └── internlm2_7b_full_custom_pretrain_e1.py
│ │ │ │ ├── llama/
│ │ │ │ │ ├── llama2_70b_full_custom_pretrain_e1.py
│ │ │ │ │ └── llama2_7b_full_custom_pretrain_e1.py
│ │ │ │ ├── mistral/
│ │ │ │ │ └── mistral_7b_full_custom_pretrain_e1.py
│ │ │ │ ├── mixtral/
│ │ │ │ │ └── mixtral_8x7b_full_custom_pretrain_e1.py
│ │ │ │ ├── qwen/
│ │ │ │ │ ├── qwen1_5_0_5b_full_custom_pretrain_e1.py
│ │ │ │ │ ├── qwen1_5_14b_full_custom_pretrain_e1.py
│ │ │ │ │ ├── qwen1_5_1_8b_full_custom_pretrain_e1.py
│ │ │ │ │ ├── qwen1_5_4b_full_custom_pretrain_e1.py
│ │ │ │ │ ├── qwen1_5_72b_full_custom_pretrain_e1.py
│ │ │ │ │ ├── qwen1_5_7b_full_custom_pretrain_e1.py
│ │ │ │ │ ├── qwen_1_8b_full_custom_pretrain_e1.py
│ │ │ │ │ ├── qwen_72b_full_custom_pretrain_e1.py
│ │ │ │ │ └── qwen_7b_full_custom_pretrain_e1.py
│ │ │ │ ├── starcoder/
│ │ │ │ │ └── starcoder_full_custom_pretrain_e1.py
│ │ │ │ ├── yi/
│ │ │ │ │ ├── yi_34b_full_custom_pretrain_e1.py
│ │ │ │ │ └── yi_6b_full_custom_pretrain_e1.py
│ │ │ │ └── zephyr/
│ │ │ │ └── zephyr_7b_beta_full_custom_pretrain_e1.py
│ │ │ └── sft/
│ │ │ ├── baichuan/
│ │ │ │ ├── baichuan2_13b_chat_qlora_custom_sft_e1.py
│ │ │ │ ├── baichuan2_7b_chat_qlora_custom_sft_e1.py
│ │ │ │ ├── baichuan_13b_chat_qlora_custom_sft_e1.py
│ │ │ │ └── baichuan_7b_qlora_custom_sft_e1.py
│ │ │ ├── chatglm/
│ │ │ │ ├── chatglm2_6b_qlora_custom_sft_e1.py
│ │ │ │ └── chatglm3_6b_qlora_custom_sft_e1.py
│ │ │ ├── deepseek/
│ │ │ │ ├── deepseek_moe_16b_chat_qlora_custom_sft_e1.py
│ │ │ │ └── deepseekcoder_6_7b_instruct_qlora_custom_sft_e1.py
│ │ │ ├── gemma/
│ │ │ │ ├── gemma_2b_it_qlora_custom_sft_e1.py
│ │ │ │ ├── gemma_2b_qlora_custom_sft_e1.py
│ │ │ │ ├── gemma_7b_it_qlora_custom_sft_e1.py
│ │ │ │ └── gemma_7b_qlora_custom_sft_e1.py
│ │ │ ├── internlm/
│ │ │ │ ├── internlm2_chat_1_8b_qlora_custom_sft_e1.py
│ │ │ │ ├── internlm2_chat_20b_qlora_custom_sft_e1.py
│ │ │ │ └── internlm2_chat_7b_qlora_custom_sft_e1.py
│ │ │ ├── llama/
│ │ │ │ ├── llama2_70b_qlora_custom_sft_e1.py
│ │ │ │ └── llama2_7b_chat_qlora_custom_sft_e1.py
│ │ │ ├── mistral/
│ │ │ │ └── mistral_7b_full_finetune_custom_sft_e1.py
│ │ │ ├── mixtral/
│ │ │ │ └── mixtral_8x7b_instruct_qlora_custom_sft_e1.py
│ │ │ ├── qwen/
│ │ │ │ ├── qwen1_5_0_5b_chat_qlora_custom_sft_e1.py
│ │ │ │ ├── qwen1_5_14b_chat_qlora_custom_sft_e1.py
│ │ │ │ ├── qwen1_5_1_8b_chat_qlora_custom_sft_e1.py
│ │ │ │ ├── qwen1_5_4b_chat_qlora_custom_sft_e1.py
│ │ │ │ ├── qwen1_5_72b_chat_qlora_custom_sft_e1.py
│ │ │ │ ├── qwen1_5_7b_chat_qlora_custom_sft_e1.py
│ │ │ │ ├── qwen_1_8b_chat_qlora_custom_sft_e1.py
│ │ │ │ ├── qwen_72b_qlora_custom_sft_e1.py
│ │ │ │ └── qwen_7b_chat_qlora_custom_sft_e1.py
│ │ │ ├── starcoder/
│ │ │ │ └── starcoder_qlora_custom_sft_e1.py
│ │ │ ├── yi/
│ │ │ │ ├── yi_34b_qlora_custom_sft_e1.py
│ │ │ │ └── yi_6b_qlora_custom_sft_e1.py
│ │ │ └── zephyr/
│ │ │ └── zephyr_7b_beta_qlora_custom_sft_e1.py
│ │ ├── deepseek/
│ │ │ ├── README.md
│ │ │ ├── deepseek_coder_6_7b_base/
│ │ │ │ └── deepseek_coder_6_7b_base_qlora_code_alpaca_e3.py
│ │ │ ├── deepseek_coder_6_7b_instruct/
│ │ │ │ └── deepseekcoder_6_7b_instruct_qlora_code_alpaca_e3.py
│ │ │ ├── deepseek_moe_16b_base/
│ │ │ │ ├── deepseek_moe_16b_base_full_oasst1_e3.py
│ │ │ │ └── deepseek_moe_16b_base_qlora_oasst1_e3.py
│ │ │ ├── deepseek_moe_16b_chat/
│ │ │ │ ├── deepseek_moe_16b_chat_full_oasst1_e3.py
│ │ │ │ └── deepseek_moe_16b_chat_qlora_oasst1_e3.py
│ │ │ ├── deepseek_v2_chat/
│ │ │ │ └── deepseek_v2_chat_full_alpaca_e3.py
│ │ │ └── deepseek_v2_lite_chat/
│ │ │ ├── deepseek_v2_lite_chat_full_alpaca_e3.py
│ │ │ └── deepseek_v2_lite_chat_full_alpaca_e3_32k_varlen.py
│ │ ├── deepspeed/
│ │ │ ├── deepspeed_zero1.json
│ │ │ ├── deepspeed_zero2.json
│ │ │ ├── deepspeed_zero2_offload.json
│ │ │ ├── deepspeed_zero3.json
│ │ │ └── deepspeed_zero3_offload.json
│ │ ├── dpo/
│ │ │ ├── internlm/
│ │ │ │ ├── internlm2_chat_1_8b_dpo_full.py
│ │ │ │ ├── internlm2_chat_1_8b_dpo_full_varlenattn.py
│ │ │ │ ├── internlm2_chat_1_8b_dpo_full_varlenattn_jsonl_dataset.py
│ │ │ │ └── internlm2_chat_7b_dpo_qlora_varlenattn.py
│ │ │ └── llama/
│ │ │ └── llama3_8b_instruct_dpo_qlora_varlenattn.py
│ │ ├── gemma/
│ │ │ ├── gemma_2b/
│ │ │ │ ├── gemma_2b_full_alpaca_e3.py
│ │ │ │ └── gemma_2b_qlora_alpaca_e3.py
│ │ │ ├── gemma_2b_it/
│ │ │ │ ├── gemma_2b_it_full_alpaca_e3.py
│ │ │ │ └── gemma_2b_it_qlora_alpaca_e3.py
│ │ │ ├── gemma_7b/
│ │ │ │ ├── gemma_7b_full_alpaca_e3.py
│ │ │ │ └── gemma_7b_qlora_alpaca_e3.py
│ │ │ └── gemma_7b_it/
│ │ │ ├── gemma_7b_it_full_alpaca_e3.py
│ │ │ └── gemma_7b_it_qlora_alpaca_e3.py
│ │ ├── internlm/
│ │ │ ├── internlm2_1_8b/
│ │ │ │ ├── internlm2_1_8b_full_alpaca_e3.py
│ │ │ │ └── internlm2_1_8b_qlora_alpaca_e3.py
│ │ │ ├── internlm2_20b/
│ │ │ │ ├── internlm2_20b_full_finetune_custom_dataset_e1.py
│ │ │ │ ├── internlm2_20b_qlora_alpaca_e3.py
│ │ │ │ ├── internlm2_20b_qlora_arxiv_gentitle_e3.py
│ │ │ │ ├── internlm2_20b_qlora_code_alpaca_e3.py
│ │ │ │ ├── internlm2_20b_qlora_colorist_e5.py
│ │ │ │ ├── internlm2_20b_qlora_lawyer_e3.py
│ │ │ │ ├── internlm2_20b_qlora_msagent_react_e3_gpu8.py
│ │ │ │ ├── internlm2_20b_qlora_oasst1_512_e3.py
│ │ │ │ ├── internlm2_20b_qlora_oasst1_e3.py
│ │ │ │ └── internlm2_20b_qlora_sql_e3.py
│ │ │ ├── internlm2_7b/
│ │ │ │ ├── internlm2_7b_full_finetune_custom_dataset_e1.py
│ │ │ │ ├── internlm2_7b_full_finetune_custom_dataset_e1_sequence_parallel_4.py
│ │ │ │ ├── internlm2_7b_qlora_alpaca_e3.py
│ │ │ │ ├── internlm2_7b_qlora_arxiv_gentitle_e3.py
│ │ │ │ ├── internlm2_7b_qlora_code_alpaca_e3.py
│ │ │ │ ├── internlm2_7b_qlora_colorist_e5.py
│ │ │ │ ├── internlm2_7b_qlora_json_e3.py
│ │ │ │ ├── internlm2_7b_qlora_lawyer_e3.py
│ │ │ │ ├── internlm2_7b_qlora_msagent_react_e3_gpu8.py
│ │ │ │ ├── internlm2_7b_qlora_oasst1_512_e3.py
│ │ │ │ ├── internlm2_7b_qlora_oasst1_e3.py
│ │ │ │ ├── internlm2_7b_qlora_sql_e3.py
│ │ │ │ ├── internlm2_7b_w_internevo_dataset.py
│ │ │ │ ├── internlm2_7b_w_tokenized_dataset.py
│ │ │ │ └── internlm2_7b_w_untokenized_dataset.py
│ │ │ ├── internlm2_chat_1_8b/
│ │ │ │ ├── internlm2_chat_1_8b_full_alpaca_e3.py
│ │ │ │ └── internlm2_chat_1_8b_qlora_alpaca_e3.py
│ │ │ ├── internlm2_chat_20b/
│ │ │ │ ├── internlm2_chat_20b_full_finetune_custom_dataset_e1.py
│ │ │ │ ├── internlm2_chat_20b_qlora_alpaca_e3.py
│ │ │ │ ├── internlm2_chat_20b_qlora_code_alpaca_e3.py
│ │ │ │ ├── internlm2_chat_20b_qlora_lawyer_e3.py
│ │ │ │ ├── internlm2_chat_20b_qlora_oasst1_512_e3.py
│ │ │ │ └── internlm2_chat_20b_qlora_oasst1_e3.py
│ │ │ ├── internlm2_chat_7b/
│ │ │ │ ├── internlm2_chat_7b_full_finetune_custom_dataset_e1.py
│ │ │ │ ├── internlm2_chat_7b_qlora_alpaca_e3.py
│ │ │ │ ├── internlm2_chat_7b_qlora_code_alpaca_e3.py
│ │ │ │ ├── internlm2_chat_7b_qlora_lawyer_e3.py
│ │ │ │ ├── internlm2_chat_7b_qlora_oasst1_512_e3.py
│ │ │ │ └── internlm2_chat_7b_qlora_oasst1_e3.py
│ │ │ ├── internlm_20b/
│ │ │ │ ├── internlm_20b_qlora_alpaca_e3.py
│ │ │ │ ├── internlm_20b_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── internlm_20b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── internlm_20b_qlora_alpaca_zh_e3.py
│ │ │ │ ├── internlm_20b_qlora_arxiv_gentitle_e3.py
│ │ │ │ ├── internlm_20b_qlora_code_alpaca_e3.py
│ │ │ │ ├── internlm_20b_qlora_colorist_e5.py
│ │ │ │ ├── internlm_20b_qlora_lawyer_e3.py
│ │ │ │ ├── internlm_20b_qlora_msagent_react_e3_gpu8.py
│ │ │ │ ├── internlm_20b_qlora_oasst1_512_e3.py
│ │ │ │ ├── internlm_20b_qlora_oasst1_e3.py
│ │ │ │ ├── internlm_20b_qlora_open_platypus_e3.py
│ │ │ │ └── internlm_20b_qlora_sql_e3.py
│ │ │ ├── internlm_7b/
│ │ │ │ ├── internlm_7b_full_alpaca_e3.py
│ │ │ │ ├── internlm_7b_full_alpaca_enzh_e3.py
│ │ │ │ ├── internlm_7b_full_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── internlm_7b_full_alpaca_zh_e3.py
│ │ │ │ ├── internlm_7b_full_intern_repo_dataset_template.py
│ │ │ │ ├── internlm_7b_full_oasst1_e3.py
│ │ │ │ ├── internlm_7b_qlora_alpaca_e3.py
│ │ │ │ ├── internlm_7b_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── internlm_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── internlm_7b_qlora_alpaca_zh_e3.py
│ │ │ │ ├── internlm_7b_qlora_arxiv_gentitle_e3.py
│ │ │ │ ├── internlm_7b_qlora_code_alpaca_e3.py
│ │ │ │ ├── internlm_7b_qlora_colorist_e5.py
│ │ │ │ ├── internlm_7b_qlora_json_e3.py
│ │ │ │ ├── internlm_7b_qlora_lawyer_e3.py
│ │ │ │ ├── internlm_7b_qlora_medical_e1.py
│ │ │ │ ├── internlm_7b_qlora_moss_sft_all_e1.py
│ │ │ │ ├── internlm_7b_qlora_moss_sft_all_e2_gpu8.py
│ │ │ │ ├── internlm_7b_qlora_moss_sft_plugins_e1.py
│ │ │ │ ├── internlm_7b_qlora_msagent_react_e3_gpu8.py
│ │ │ │ ├── internlm_7b_qlora_oasst1_512_e3.py
│ │ │ │ ├── internlm_7b_qlora_oasst1_e3.py
│ │ │ │ ├── internlm_7b_qlora_oasst1_e3_hf.py
│ │ │ │ ├── internlm_7b_qlora_oasst1_mmlu_e3.py
│ │ │ │ ├── internlm_7b_qlora_open_platypus_e3.py
│ │ │ │ ├── internlm_7b_qlora_openorca_e1.py
│ │ │ │ ├── internlm_7b_qlora_sql_e3.py
│ │ │ │ └── internlm_7b_qlora_tiny_codes_e1.py
│ │ │ ├── internlm_chat_20b/
│ │ │ │ ├── internlm_chat_20b_qlora_alpaca_e3.py
│ │ │ │ ├── internlm_chat_20b_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── internlm_chat_20b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── internlm_chat_20b_qlora_alpaca_zh_e3.py
│ │ │ │ ├── internlm_chat_20b_qlora_code_alpaca_e3.py
│ │ │ │ ├── internlm_chat_20b_qlora_lawyer_e3.py
│ │ │ │ ├── internlm_chat_20b_qlora_oasst1_512_e3.py
│ │ │ │ ├── internlm_chat_20b_qlora_oasst1_e3.py
│ │ │ │ └── internlm_chat_20b_qlora_open_platypus_e3.py
│ │ │ └── internlm_chat_7b/
│ │ │ ├── internlm_chat_7b_qlora_alpaca_e3.py
│ │ │ ├── internlm_chat_7b_qlora_alpaca_enzh_e3.py
│ │ │ ├── internlm_chat_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── internlm_chat_7b_qlora_alpaca_zh_e3.py
│ │ │ ├── internlm_chat_7b_qlora_arxiv_gentitle_e3.py
│ │ │ ├── internlm_chat_7b_qlora_code_alpaca_e3.py
│ │ │ ├── internlm_chat_7b_qlora_colorist_e5.py
│ │ │ ├── internlm_chat_7b_qlora_lawyer_e3.py
│ │ │ ├── internlm_chat_7b_qlora_medical_e1.py
│ │ │ ├── internlm_chat_7b_qlora_oasst1_512_e3.py
│ │ │ ├── internlm_chat_7b_qlora_oasst1_e3.py
│ │ │ ├── internlm_chat_7b_qlora_open_platypus_e3.py
│ │ │ ├── internlm_chat_7b_qlora_openorca_e1.py
│ │ │ ├── internlm_chat_7b_qlora_sql_e3.py
│ │ │ └── internlm_chat_7b_qlora_tiny_codes_e1.py
│ │ ├── llama/
│ │ │ ├── llama2_70b/
│ │ │ │ ├── llama2_70b_full_wizardlm_e1.py
│ │ │ │ ├── llama2_70b_int8_lora_open_platypus_e1.py
│ │ │ │ ├── llama2_70b_int8_lora_open_platypus_e1_hf.py
│ │ │ │ ├── llama2_70b_qlora_open_platypus_e1.py
│ │ │ │ └── llama2_70b_qlora_open_platypus_e1_hf.py
│ │ │ ├── llama2_7b/
│ │ │ │ ├── llama2_7b_full_pgbooks_400iters_sp1.py
│ │ │ │ ├── llama2_7b_full_pgbooks_400iters_sp4.py
│ │ │ │ ├── llama2_7b_full_wizardlm_e1.py
│ │ │ │ ├── llama2_7b_qlora_alpaca_e3.py
│ │ │ │ ├── llama2_7b_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── llama2_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── llama2_7b_qlora_alpaca_zh_e3.py
│ │ │ │ ├── llama2_7b_qlora_arxiv_gentitle_e3.py
│ │ │ │ ├── llama2_7b_qlora_code_alpaca_e3.py
│ │ │ │ ├── llama2_7b_qlora_colorist_e5.py
│ │ │ │ ├── llama2_7b_qlora_lawyer_e3.py
│ │ │ │ ├── llama2_7b_qlora_medical_e1.py
│ │ │ │ ├── llama2_7b_qlora_moss_sft_all_e1.py
│ │ │ │ ├── llama2_7b_qlora_moss_sft_all_e2_gpu8.py
│ │ │ │ ├── llama2_7b_qlora_moss_sft_plugins_e1.py
│ │ │ │ ├── llama2_7b_qlora_msagent_react_e3_gpu8.py
│ │ │ │ ├── llama2_7b_qlora_oasst1_512_e3.py
│ │ │ │ ├── llama2_7b_qlora_oasst1_e3.py
│ │ │ │ ├── llama2_7b_qlora_open_platypus_e3.py
│ │ │ │ ├── llama2_7b_qlora_openorca_e1.py
│ │ │ │ ├── llama2_7b_qlora_sql_e3.py
│ │ │ │ └── llama2_7b_qlora_tiny_codes_e1.py
│ │ │ ├── llama2_7b_chat/
│ │ │ │ ├── llama2_7b_chat_qlora_alpaca_e3.py
│ │ │ │ ├── llama2_7b_chat_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── llama2_7b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── llama2_7b_chat_qlora_alpaca_zh_e3.py
│ │ │ │ ├── llama2_7b_chat_qlora_arxiv_gentitle_e3.py
│ │ │ │ ├── llama2_7b_chat_qlora_code_alpaca_e3.py
│ │ │ │ ├── llama2_7b_chat_qlora_colorist_e5.py
│ │ │ │ ├── llama2_7b_chat_qlora_lawyer_e3.py
│ │ │ │ ├── llama2_7b_chat_qlora_medical_e1.py
│ │ │ │ ├── llama2_7b_chat_qlora_oasst1_512_e3.py
│ │ │ │ ├── llama2_7b_chat_qlora_oasst1_e3.py
│ │ │ │ ├── llama2_7b_chat_qlora_open_platypus_e3.py
│ │ │ │ ├── llama2_7b_chat_qlora_openorca_e1.py
│ │ │ │ ├── llama2_7b_chat_qlora_sql_e3.py
│ │ │ │ └── llama2_7b_chat_qlora_tiny_codes_e1.py
│ │ │ ├── llama3_70b_instruct/
│ │ │ │ └── llama3_70b_instruct_qlora_alpaca_e3_2k_gpu8.py
│ │ │ ├── llama3_8b/
│ │ │ │ ├── README.md
│ │ │ │ └── llama3_8b_full_alpaca_e3.py
│ │ │ ├── llama3_8b_instruct/
│ │ │ │ ├── llama3_8b_instruct_full_alpaca_e3.py
│ │ │ │ └── llama3_8b_instruct_qlora_alpaca_e3.py
│ │ │ └── llama_7b/
│ │ │ ├── llama_7b_qlora_alpaca_e3.py
│ │ │ ├── llama_7b_qlora_alpaca_enzh_e3.py
│ │ │ ├── llama_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── llama_7b_qlora_alpaca_zh_e3.py
│ │ │ ├── llama_7b_qlora_arxiv_gentitle_e3.py
│ │ │ ├── llama_7b_qlora_code_alpaca_e3.py
│ │ │ ├── llama_7b_qlora_colorist_e5.py
│ │ │ ├── llama_7b_qlora_lawyer_e3.py
│ │ │ ├── llama_7b_qlora_medical_e1.py
│ │ │ ├── llama_7b_qlora_moss_sft_all_e1.py
│ │ │ ├── llama_7b_qlora_moss_sft_all_e2_gpu8.py
│ │ │ ├── llama_7b_qlora_moss_sft_plugins_e1.py
│ │ │ ├── llama_7b_qlora_oasst1_512_e3.py
│ │ │ ├── llama_7b_qlora_oasst1_e3.py
│ │ │ ├── llama_7b_qlora_open_platypus_e3.py
│ │ │ ├── llama_7b_qlora_openorca_e1.py
│ │ │ ├── llama_7b_qlora_sql_e3.py
│ │ │ └── llama_7b_qlora_tiny_codes_e1.py
│ │ ├── llama_speed_benchmark/
│ │ │ ├── llama2_70b/
│ │ │ │ ├── llama2_70b_full_alpaca_enzh_128k_sp8.py
│ │ │ │ ├── llama2_70b_full_alpaca_enzh_256k_sp16.py
│ │ │ │ ├── llama2_70b_full_alpaca_enzh_32k_sp4.py
│ │ │ │ └── llama2_70b_full_alpaca_enzh_8k_sp1.py
│ │ │ ├── llama2_7b/
│ │ │ │ ├── llama2_7b_full_alpaca_enzh_128k_sp8.py
│ │ │ │ ├── llama2_7b_full_alpaca_enzh_1M_sp16.py
│ │ │ │ ├── llama2_7b_full_alpaca_enzh_256k_sp8.py
│ │ │ │ ├── llama2_7b_full_alpaca_enzh_32k_sp1.py
│ │ │ │ └── llama2_7b_full_alpaca_enzh_8k_sp1.py
│ │ │ └── yi_34b/
│ │ │ ├── yi_34b_200k_full_alpaca_enzh_128k_sp8.py
│ │ │ ├── yi_34b_200k_full_alpaca_enzh_256k_sp8.py
│ │ │ ├── yi_34b_200k_full_alpaca_enzh_32k_sp2.py
│ │ │ └── yi_34b_200k_full_alpaca_enzh_8k_sp1.py
│ │ ├── llava/
│ │ │ ├── README.md
│ │ │ ├── README_zh-CN.md
│ │ │ ├── internlm2_chat_1_8b_clip_vit_large_p14_336/
│ │ │ │ ├── finetune/
│ │ │ │ │ └── llava_internlm2_chat_1_8b_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ │ └── pretrain/
│ │ │ │ └── llava_internlm2_chat_1_8b_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ │ ├── internlm2_chat_20b_clip_vit_large_p14_336/
│ │ │ │ ├── finetune/
│ │ │ │ │ ├── llava_internlm2_chat_20b_clip_vit_large_p14_336_e1_gpu8_finetune.py
│ │ │ │ │ └── llava_internlm2_chat_20b_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ │ └── pretrain/
│ │ │ │ └── llava_internlm2_chat_20b_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ │ ├── internlm2_chat_7b_clip_vit_large_p14_336/
│ │ │ │ ├── finetune/
│ │ │ │ │ ├── llava_internlm2_chat_7b_clip_vit_large_p14_336_e1_gpu8_finetune.py
│ │ │ │ │ └── llava_internlm2_chat_7b_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ │ └── pretrain/
│ │ │ │ └── llava_internlm2_chat_7b_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ │ ├── internlm_chat_7b_clip_vit_large_p14_336/
│ │ │ │ ├── finetune/
│ │ │ │ │ └── llava_internlm_chat_7b_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ │ └── pretrain/
│ │ │ │ └── llava_internlm_chat_7b_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ │ ├── llama3_70b_instruct_clip_vit_large_p14_336/
│ │ │ │ └── pretrain/
│ │ │ │ └── llava_llama3_70b_instruct_quant_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ │ ├── llama3_8b_instruct_clip_vit_large_p14_336/
│ │ │ │ ├── README.md
│ │ │ │ ├── convert_xtuner_weights_to_hf.py
│ │ │ │ ├── convert_xtuner_weights_to_llava.py
│ │ │ │ ├── finetune/
│ │ │ │ │ ├── llava_llama3_8b_instruct_full_clip_vit_large_p14_336_e1_gpu8_finetune.py
│ │ │ │ │ ├── llava_llama3_8b_instruct_full_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ │ │ ├── llava_llama3_8b_instruct_full_clip_vit_large_p14_336_lora_e1_gpu8_internvl_finetune.py
│ │ │ │ │ └── llava_llama3_8b_instruct_qlora_clip_vit_large_p14_336_e1_gpu1_finetune.py
│ │ │ │ └── pretrain/
│ │ │ │ ├── llava_llama3_8b_instruct_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ │ │ ├── llava_llama3_8b_instruct_clip_vit_large_p14_336_e1_gpu8_sharegpt4v_pretrain.py
│ │ │ │ └── llava_llama3_8b_instruct_quant_clip_vit_large_p14_336_e1_gpu1_pretrain.py
│ │ │ ├── official/
│ │ │ │ ├── llava_v15_13b/
│ │ │ │ │ ├── llava_v15_13b_finetune.py
│ │ │ │ │ ├── llava_v15_13b_finetune_lora.py
│ │ │ │ │ └── llava_v15_13b_pretrain.py
│ │ │ │ └── llava_v15_7b/
│ │ │ │ ├── llava_v15_7b_finetune.py
│ │ │ │ ├── llava_v15_7b_finetune_lora.py
│ │ │ │ └── llava_v15_7b_pretrain.py
│ │ │ ├── phi3_mini_4k_instruct_clip_vit_large_p14_336/
│ │ │ │ ├── README.md
│ │ │ │ ├── convert_phi_to_llama.py
│ │ │ │ ├── convert_xtuner_weights_to_hf.py
│ │ │ │ ├── convert_xtuner_weights_to_llava.py
│ │ │ │ ├── finetune/
│ │ │ │ │ ├── llava_phi3_mini_4k_instruct_full_clip_vit_large_p14_336_e1_gpu8_finetune.py
│ │ │ │ │ └── llava_phi3_mini_4k_instruct_full_clip_vit_large_p14_336_full_e2_gpu8_internvl_finetune.py
│ │ │ │ └── pretrain/
│ │ │ │ ├── llava_phi3_mini_4k_instruct_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ │ │ └── llava_phi3_mini_4k_instruct_clip_vit_large_p14_336_e1_gpu8_sharegpt4v_pretrain.py
│ │ │ ├── vicuna_13b_v15_clip_vit_large_p14_336/
│ │ │ │ ├── finetune/
│ │ │ │ │ └── llava_vicuna_13b_v15_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ │ └── pretrain/
│ │ │ │ └── llava_vicuna_13b_v15_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ │ └── vicuna_7b_v15_clip_vit_large_p14_336/
│ │ │ ├── finetune/
│ │ │ │ ├── llava_vicuna_7b_v15_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ │ └── llava_vicuna_7b_v15_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune_refcoco.py
│ │ │ └── pretrain/
│ │ │ └── llava_vicuna_7b_v15_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ ├── mistral/
│ │ │ ├── mistral_7b_full_finetune_custom_dataset_e1.py
│ │ │ ├── mistral_7b_qlora_skypile_pretrain_e1.py
│ │ │ ├── mistral_7b_w_tokenized_dataset.py
│ │ │ └── mistral_7b_w_untokenized_dataset.py
│ │ ├── mixtral/
│ │ │ ├── README.md
│ │ │ ├── mixtral_8x7b/
│ │ │ │ ├── mixtral_8x7b_full_oasst1_e3.py
│ │ │ │ └── mixtral_8x7b_qlora_oasst1_e3.py
│ │ │ └── mixtral_8x7b_instruct/
│ │ │ ├── mixtral_8x7b_instruct_full_oasst1_e3.py
│ │ │ └── mixtral_8x7b_instruct_qlora_oasst1_e3.py
│ │ ├── orpo/
│ │ │ ├── internlm/
│ │ │ │ ├── internlm2_chat_1_8b_orpo_full.py
│ │ │ │ ├── internlm2_chat_1_8b_orpo_full_varlenattn.py
│ │ │ │ ├── internlm2_chat_1_8b_orpo_full_varlenattn_jsonl_dataset.py
│ │ │ │ └── internlm2_chat_7b_orpo_qlora_varlenattn_ultrafeedback_e5.py
│ │ │ └── llama/
│ │ │ └── llama3_8b_instruct_orpo_qlora_varlenattn_ultrafeedback_e5.py
│ │ ├── phi/
│ │ │ └── phi3/
│ │ │ ├── phi3_mini_128k_instruct_full_alpaca_e3.py
│ │ │ ├── phi3_mini_128k_instruct_qlora_alpaca_e3.py
│ │ │ ├── phi3_mini_4k_instruct_full_alpaca_e3.py
│ │ │ └── phi3_mini_4k_instruct_qlora_alpaca_e3.py
│ │ ├── qwen/
│ │ │ ├── qwen1/
│ │ │ │ ├── qwen_1_8b/
│ │ │ │ │ ├── qwen_1_8b_qlora_alpaca_e3.py
│ │ │ │ │ ├── qwen_1_8b_qlora_alpaca_enzh_e3.py
│ │ │ │ │ ├── qwen_1_8b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ │ ├── qwen_1_8b_qlora_alpaca_zh_e3.py
│ │ │ │ │ └── qwen_1_8b_qlora_code_alpaca_e3.py
│ │ │ │ ├── qwen_1_8b_chat/
│ │ │ │ │ ├── qwen_1_8b_chat_qlora_alpaca_e3.py
│ │ │ │ │ ├── qwen_1_8b_chat_qlora_alpaca_enzh_e3.py
│ │ │ │ │ ├── qwen_1_8b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ │ ├── qwen_1_8b_chat_qlora_alpaca_zh_e3.py
│ │ │ │ │ └── qwen_1_8b_chat_qlora_code_alpaca_e3.py
│ │ │ │ ├── qwen_72b/
│ │ │ │ │ ├── qwen_72b_qlora_alpaca_e3.py
│ │ │ │ │ ├── qwen_72b_qlora_alpaca_enzh_e3.py
│ │ │ │ │ ├── qwen_72b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ │ ├── qwen_72b_qlora_alpaca_zh_e3.py
│ │ │ │ │ └── qwen_72b_qlora_code_alpaca_e3.py
│ │ │ │ ├── qwen_7b/
│ │ │ │ │ ├── qwen_7b_qlora_alpaca_e3.py
│ │ │ │ │ ├── qwen_7b_qlora_alpaca_enzh_e3.py
│ │ │ │ │ ├── qwen_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ │ ├── qwen_7b_qlora_alpaca_zh_e3.py
│ │ │ │ │ ├── qwen_7b_qlora_arxiv_gentitle_e3.py
│ │ │ │ │ ├── qwen_7b_qlora_code_alpaca_e3.py
│ │ │ │ │ ├── qwen_7b_qlora_colorist_e5.py
│ │ │ │ │ ├── qwen_7b_qlora_lawyer_e3.py
│ │ │ │ │ ├── qwen_7b_qlora_medical_e1.py
│ │ │ │ │ ├── qwen_7b_qlora_moss_sft_all_e1.py
│ │ │ │ │ ├── qwen_7b_qlora_moss_sft_all_e2_gpu8.py
│ │ │ │ │ ├── qwen_7b_qlora_moss_sft_plugins_e1.py
│ │ │ │ │ ├── qwen_7b_qlora_oasst1_512_e3.py
│ │ │ │ │ ├── qwen_7b_qlora_oasst1_e3.py
│ │ │ │ │ ├── qwen_7b_qlora_open_platypus_e3.py
│ │ │ │ │ ├── qwen_7b_qlora_openorca_e1.py
│ │ │ │ │ ├── qwen_7b_qlora_sql_e3.py
│ │ │ │ │ └── qwen_7b_qlora_tiny_codes_e1.py
│ │ │ │ └── qwen_7b_chat/
│ │ │ │ ├── qwen_7b_chat_qlora_alpaca_e3.py
│ │ │ │ ├── qwen_7b_chat_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── qwen_7b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── qwen_7b_chat_qlora_alpaca_zh_e3.py
│ │ │ │ ├── qwen_7b_chat_qlora_arxiv_gentitle_e3.py
│ │ │ │ ├── qwen_7b_chat_qlora_code_alpaca_e3.py
│ │ │ │ ├── qwen_7b_chat_qlora_colorist_e5.py
│ │ │ │ ├── qwen_7b_chat_qlora_lawyer_e3.py
│ │ │ │ ├── qwen_7b_chat_qlora_medical_e1.py
│ │ │ │ ├── qwen_7b_chat_qlora_oasst1_512_e3.py
│ │ │ │ ├── qwen_7b_chat_qlora_oasst1_e3.py
│ │ │ │ ├── qwen_7b_chat_qlora_open_platypus_e3.py
│ │ │ │ ├── qwen_7b_chat_qlora_openorca_e1.py
│ │ │ │ ├── qwen_7b_chat_qlora_sql_e3.py
│ │ │ │ └── qwen_7b_chat_qlora_tiny_codes_e1.py
│ │ │ └── qwen1_5/
│ │ │ ├── qwen1_5_0_5b/
│ │ │ │ ├── qwen1_5_0_5b_full_alpaca_e3.py
│ │ │ │ └── qwen1_5_0_5b_qlora_alpaca_e3.py
│ │ │ ├── qwen1_5_0_5b_chat/
│ │ │ │ ├── qwen1_5_0_5b_chat_full_alpaca_e3.py
│ │ │ │ └── qwen1_5_0_5b_chat_qlora_alpaca_e3.py
│ │ │ ├── qwen1_5_110b/
│ │ │ │ ├── qwen1_5_110b_full_alpaca_e3.py
│ │ │ │ └── qwen1_5_110b_qlora_alpaca_e3.py
│ │ │ ├── qwen1_5_110b_chat/
│ │ │ │ ├── README.md
│ │ │ │ ├── qwen1_5_110b_chat_full_alpaca_e3.py
│ │ │ │ ├── qwen1_5_110b_chat_qlora_alpaca_e3.py
│ │ │ │ └── qwen1_5_110b_chat_qlora_alpaca_e3_16k_2gpus.py
│ │ │ ├── qwen1_5_14b/
│ │ │ │ ├── qwen1_5_14b_full_alpaca_e3.py
│ │ │ │ └── qwen1_5_14b_qlora_alpaca_e3.py
│ │ │ ├── qwen1_5_14b_chat/
│ │ │ │ ├── qwen1_5_14b_chat_full_alpaca_e3.py
│ │ │ │ └── qwen1_5_14b_chat_qlora_alpaca_e3.py
│ │ │ ├── qwen1_5_1_8b/
│ │ │ │ ├── qwen1_5_1_8b_full_alpaca_e3.py
│ │ │ │ └── qwen1_5_1_8b_qlora_alpaca_e3.py
│ │ │ ├── qwen1_5_1_8b_chat/
│ │ │ │ ├── qwen1_5_1_8b_chat_full_alpaca_e3.py
│ │ │ │ └── qwen1_5_1_8b_chat_qlora_alpaca_e3.py
│ │ │ ├── qwen1_5_4b/
│ │ │ │ ├── qwen1_5_4b_full_alpaca_e3.py
│ │ │ │ └── qwen1_5_4b_qlora_alpaca_e3.py
│ │ │ ├── qwen1_5_4b_chat/
│ │ │ │ ├── qwen1_5_4b_chat_full_alpaca_e3.py
│ │ │ │ └── qwen1_5_4b_chat_qlora_alpaca_e3.py
│ │ │ ├── qwen1_5_72b/
│ │ │ │ ├── qwen1_5_72b_full_alpaca_e3.py
│ │ │ │ └── qwen1_5_72b_qlora_alpaca_e3.py
│ │ │ ├── qwen1_5_72b_chat/
│ │ │ │ ├── qwen1_5_72b_chat_full_alpaca_e3.py
│ │ │ │ └── qwen1_5_72b_chat_qlora_alpaca_e3.py
│ │ │ ├── qwen1_5_7b/
│ │ │ │ ├── qwen1_5_7b_full_alpaca_e3.py
│ │ │ │ └── qwen1_5_7b_qlora_alpaca_e3.py
│ │ │ └── qwen1_5_7b_chat/
│ │ │ ├── qwen1_5_7b_chat_full_alpaca_e3.py
│ │ │ └── qwen1_5_7b_chat_qlora_alpaca_e3.py
│ │ ├── qwen_moe/
│ │ │ └── qwen1_5/
│ │ │ └── qwen1_5_moe_a2_7_b_chat/
│ │ │ └── qwen1_5_moe_a2_7_b_chat_full_alpaca_e3.py
│ │ ├── reward_model/
│ │ │ ├── internlm/
│ │ │ │ ├── internlm2_chat_1_8b_reward_full_ultrafeedback.py
│ │ │ │ ├── internlm2_chat_1_8b_reward_full_varlenattn_jsonl_dataset.py
│ │ │ │ ├── internlm2_chat_1_8b_reward_full_varlenattn_ultrafeedback.py
│ │ │ │ └── internlm2_chat_1_8b_reward_qlora_varlenattn_ultrafeedback.py
│ │ │ └── llama/
│ │ │ └── llama3_8b_instruct_reward_full_varlenattn_ultrafeedback.py
│ │ ├── starcoder/
│ │ │ └── starcoder_qlora_stack_exchange_example.py
│ │ ├── yi/
│ │ │ ├── yi_34b/
│ │ │ │ └── yi_34b_qlora_alpaca_enzh_e3.py
│ │ │ └── yi_6b/
│ │ │ └── yi_6b_qlora_alpaca_enzh_e3.py
│ │ └── zephyr/
│ │ └── zephyr_7b_beta_qlora_alpaca_e3.py
│ ├── dataset/
│ │ ├── __init__.py
│ │ ├── collate_fns/
│ │ │ ├── __init__.py
│ │ │ ├── default_collate_fn.py
│ │ │ ├── mmlu_collate_fn.py
│ │ │ └── preference_collate_fn.py
│ │ ├── concat_dataset.py
│ │ ├── huggingface.py
│ │ ├── intern_repo.py
│ │ ├── json_dataset.py
│ │ ├── llava.py
│ │ ├── map_fns/
│ │ │ ├── __init__.py
│ │ │ ├── dataset_map_fns/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── alpaca_map_fn.py
│ │ │ │ ├── alpaca_zh_map_fn.py
│ │ │ │ ├── arxiv_map_fn.py
│ │ │ │ ├── code_alpaca_map_fn.py
│ │ │ │ ├── colors_map_fn.py
│ │ │ │ ├── crime_kg_assitant_map_fn.py
│ │ │ │ ├── default_map_fn.py
│ │ │ │ ├── law_reference_map_fn.py
│ │ │ │ ├── llava_map_fn.py
│ │ │ │ ├── medical_map_fn.py
│ │ │ │ ├── msagent_map_fn.py
│ │ │ │ ├── oasst1_map_fn.py
│ │ │ │ ├── openai_map_fn.py
│ │ │ │ ├── openorca_map_fn.py
│ │ │ │ ├── pretrain_map_fn.py
│ │ │ │ ├── sql_map_fn.py
│ │ │ │ ├── stack_exchange_map_fn.py
│ │ │ │ ├── tiny_codes_map_fn.py
│ │ │ │ └── wizardlm_map_fn.py
│ │ │ └── template_map_fn.py
│ │ ├── modelscope.py
│ │ ├── moss_sft.py
│ │ ├── preference_dataset.py
│ │ ├── refcoco_json.py
│ │ ├── samplers/
│ │ │ ├── __init__.py
│ │ │ ├── intern_repo.py
│ │ │ └── length_grouped.py
│ │ └── utils.py
│ ├── engine/
│ │ ├── __init__.py
│ │ ├── _strategy/
│ │ │ ├── __init__.py
│ │ │ └── deepspeed.py
│ │ ├── hooks/
│ │ │ ├── __init__.py
│ │ │ ├── dataset_info_hook.py
│ │ │ ├── evaluate_chat_hook.py
│ │ │ ├── hf_checkpoint_hook.py
│ │ │ ├── throughput_hook.py
│ │ │ └── varlen_attn_args_to_messagehub_hook.py
│ │ └── runner/
│ │ ├── __init__.py
│ │ └── loops.py
│ ├── entry_point.py
│ ├── evaluation/
│ │ ├── __init__.py
│ │ └── metrics/
│ │ ├── __init__.py
│ │ ├── mmlu_metric.py
│ │ └── reward_metric.py
│ ├── model/
│ │ ├── __init__.py
│ │ ├── dpo.py
│ │ ├── llava.py
│ │ ├── modules/
│ │ │ ├── __init__.py
│ │ │ ├── dispatch/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── attention.py
│ │ │ │ ├── baichuan.py
│ │ │ │ ├── cohere.py
│ │ │ │ ├── deepseek_v2.py
│ │ │ │ ├── internlm.py
│ │ │ │ ├── internlm2.py
│ │ │ │ ├── llama.py
│ │ │ │ ├── mistral.py
│ │ │ │ ├── phi3.py
│ │ │ │ ├── qwen2.py
│ │ │ │ ├── triton_kernels/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── layer_norm.py
│ │ │ │ │ ├── rms_norm.py
│ │ │ │ │ └── rotary.py
│ │ │ │ ├── utils.py
│ │ │ │ └── yi.py
│ │ │ └── projector/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_projector.py
│ │ │ └── modeling_projector.py
│ │ ├── orpo.py
│ │ ├── reward.py
│ │ ├── sft.py
│ │ ├── transformers_models/
│ │ │ ├── __init__.py
│ │ │ ├── deepseek_v2/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── configuration_deepseek.py
│ │ │ │ ├── modeling_deepseek.py
│ │ │ │ └── tokenization_deepseek_fast.py
│ │ │ └── mixtral/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_mixtral.py
│ │ │ └── modeling_mixtral.py
│ │ └── utils.py
│ ├── parallel/
│ │ ├── __init__.py
│ │ └── sequence/
│ │ ├── __init__.py
│ │ ├── attention.py
│ │ ├── comm.py
│ │ ├── data_collate.py
│ │ ├── reduce_loss.py
│ │ ├── sampler.py
│ │ └── setup_distributed.py
│ ├── registry.py
│ ├── tools/
│ │ ├── chat.py
│ │ ├── check_custom_dataset.py
│ │ ├── copy_cfg.py
│ │ ├── data_preprocess/
│ │ │ ├── arxiv.py
│ │ │ └── convert_refcoco.py
│ │ ├── eval_refcoco.py
│ │ ├── get_data_order.py
│ │ ├── list_cfg.py
│ │ ├── list_dataset_format.py
│ │ ├── log_dataset.py
│ │ ├── mmbench.py
│ │ ├── model_converters/
│ │ │ ├── merge.py
│ │ │ ├── modeling_internlm2_reward/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── configuration_internlm2.py
│ │ │ │ └── modeling_internlm2.py
│ │ │ ├── pth_to_hf.py
│ │ │ └── split.py
│ │ ├── plugins/
│ │ │ ├── __init__.py
│ │ │ ├── api.py
│ │ │ ├── calculate.py
│ │ │ ├── search.py
│ │ │ └── solve.py
│ │ ├── process_untokenized_datasets.py
│ │ ├── process_untokenized_datasets_legacy.py
│ │ ├── process_untokenized_llava_data.py
│ │ ├── test.py
│ │ ├── tokenize_ftdp_datasets.py
│ │ ├── train.py
│ │ └── utils.py
│ ├── utils/
│ │ ├── __init__.py
│ │ ├── constants.py
│ │ ├── fileio.py
│ │ ├── handle_moe_load_and_save.py
│ │ ├── stop_criteria.py
│ │ ├── templates.py
│ │ └── zero_to_any_dtype.py
│ └── version.py
└── xtuner-train_internvideo2_5/
├── .gitignore
├── .owners.yml
├── .pre-commit-config-zh-cn.yaml
├── .pre-commit-config.yaml
├── LICENSE
├── MANIFEST.in
├── README.md
├── data/
│ ├── annotaions/
│ │ └── ft_data_example.jsonl
│ └── diy_ft_data.json
├── ft_internvideo_2_5.sh
├── ft_internvideo_2_5_datapacking.sh
├── requirements/
│ ├── deepspeed.txt
│ ├── docs.txt
│ ├── modelscope.txt
│ └── runtime.txt
├── requirements.txt
├── setup.cfg
├── setup.py
├── unify_internvl2_train_r16.py
└── xtuner/
├── __init__.py
├── _lite/
│ ├── __init__.py
│ ├── accelerate/
│ │ ├── __init__.py
│ │ ├── dispatches/
│ │ │ ├── __init__.py
│ │ │ ├── _attention.py
│ │ │ ├── _fused/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── layer_norm.py
│ │ │ │ ├── rms_norm.py
│ │ │ │ └── rotary.py
│ │ │ ├── clip.py
│ │ │ ├── internlm2.py
│ │ │ ├── internvl2.py
│ │ │ ├── llama3.py
│ │ │ ├── new.py
│ │ │ ├── phi3.py
│ │ │ ├── qwen2.py
│ │ │ └── qwen_vl2.py
│ │ ├── fsdp/
│ │ │ ├── __init__.py
│ │ │ ├── checkpointing.py
│ │ │ ├── clip_grad.py
│ │ │ ├── lazy.py
│ │ │ ├── precision.py
│ │ │ └── wrap.py
│ │ ├── generate.py
│ │ ├── lora.py
│ │ └── packed.py
│ ├── auto.py
│ ├── chat/
│ │ ├── __init__.py
│ │ ├── backends/
│ │ │ └── __init__.py
│ │ ├── messages/
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ └── chat.py
│ │ └── templates/
│ │ ├── __init__.py
│ │ ├── chat.py
│ │ └── hybrid.py
│ ├── checkpoint.py
│ ├── datasets/
│ │ ├── __init__.py
│ │ ├── dataset_fn.py
│ │ ├── format.py
│ │ ├── llava.py
│ │ ├── load.py
│ │ ├── load_new.py
│ │ ├── text.py
│ │ └── tokenize.py
│ ├── internvl/
│ │ ├── __init__.py
│ │ ├── constants.py
│ │ ├── conversation.py
│ │ ├── dataset.py
│ │ ├── new_dataset.py
│ │ ├── v1_5/
│ │ │ ├── configuration_intern_vit.py
│ │ │ ├── configuration_internvl_chat.py
│ │ │ ├── configuration_phi3.py
│ │ │ ├── conversation.py
│ │ │ ├── modeling_intern_vit.py
│ │ │ ├── modeling_internvl_chat.py
│ │ │ └── modeling_phi3.py
│ │ └── video_utils.py
│ ├── modelings/
│ │ ├── __init__.py
│ │ ├── internlm2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_internlm2.py
│ │ │ └── modeling_internlm2.py
│ │ └── model_fn.py
│ ├── parallel/
│ │ ├── __init__.py
│ │ ├── comm.py
│ │ ├── logger.py
│ │ ├── new_setup.py
│ │ ├── plans/
│ │ │ └── internlm2.py
│ │ ├── sampler.py
│ │ ├── sequence/
│ │ │ ├── __init__.py
│ │ │ ├── attention.py
│ │ │ ├── data_collate.py
│ │ │ ├── ops.py
│ │ │ └── reduce_loss.py
│ │ └── setup.py
│ └── yunchang/
│ ├── __init__.py
│ ├── comm/
│ │ ├── __init__.py
│ │ ├── all_to_all.py
│ │ └── extract_local.py
│ ├── globals.py
│ ├── hybrid/
│ │ ├── __init__.py
│ │ ├── async_attn_layer.py
│ │ ├── attn_layer.py
│ │ └── utils.py
│ ├── ring/
│ │ ├── __init__.py
│ │ ├── llama3_flash_attn_varlen.py
│ │ ├── ring_flash_attn.py
│ │ ├── ring_flash_attn_varlen.py
│ │ ├── stripe_flash_attn.py
│ │ ├── triton_utils.py
│ │ ├── utils.py
│ │ ├── zigzag_ring_flash_attn.py
│ │ └── zigzag_ring_flash_attn_varlen.py
│ └── ulysses/
│ ├── __init__.py
│ └── attn_layer.py
├── apis/
│ ├── __init__.py
│ ├── datasets/
│ │ ├── __init__.py
│ │ ├── alpaca.py
│ │ ├── arxiv.py
│ │ ├── code_alpaca.py
│ │ ├── colorist.py
│ │ ├── lawyer.py
│ │ ├── medical.py
│ │ ├── moss_003_sft.py
│ │ ├── oasst1.py
│ │ ├── open_orca.py
│ │ ├── sql.py
│ │ ├── tiny_codes.py
│ │ └── wizardlm.py
│ ├── model.py
│ └── training_args.py
├── configs/
│ ├── __init__.py
│ ├── baichuan/
│ │ ├── baichuan2_13b_base/
│ │ │ ├── baichuan2_13b_base_qlora_alpaca_e3.py
│ │ │ ├── baichuan2_13b_base_qlora_alpaca_enzh_e3.py
│ │ │ ├── baichuan2_13b_base_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── baichuan2_13b_base_qlora_alpaca_zh_e3.py
│ │ │ ├── baichuan2_13b_base_qlora_arxiv_gentitle_e3.py
│ │ │ ├── baichuan2_13b_base_qlora_code_alpaca_e3.py
│ │ │ ├── baichuan2_13b_base_qlora_colorist_e5.py
│ │ │ ├── baichuan2_13b_base_qlora_lawyer_e3.py
│ │ │ ├── baichuan2_13b_base_qlora_oasst1_512_e3.py
│ │ │ ├── baichuan2_13b_base_qlora_oasst1_e3.py
│ │ │ ├── baichuan2_13b_base_qlora_open_platypus_e3.py
│ │ │ └── baichuan2_13b_base_qlora_sql_e3.py
│ │ ├── baichuan2_13b_chat/
│ │ │ ├── baichuan2_13b_chat_qlora_alpaca_e3.py
│ │ │ ├── baichuan2_13b_chat_qlora_alpaca_enzh_e3.py
│ │ │ ├── baichuan2_13b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── baichuan2_13b_chat_qlora_alpaca_zh_e3.py
│ │ │ ├── baichuan2_13b_chat_qlora_code_alpaca_e3.py
│ │ │ ├── baichuan2_13b_chat_qlora_lawyer_e3.py
│ │ │ ├── baichuan2_13b_chat_qlora_oasst1_512_e3.py
│ │ │ ├── baichuan2_13b_chat_qlora_oasst1_e3.py
│ │ │ └── baichuan2_13b_chat_qlora_open_platypus_e3.py
│ │ ├── baichuan2_7b_base/
│ │ │ ├── baichuan2_7b_base_qlora_alpaca_e3.py
│ │ │ ├── baichuan2_7b_base_qlora_alpaca_enzh_e3.py
│ │ │ ├── baichuan2_7b_base_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── baichuan2_7b_base_qlora_alpaca_zh_e3.py
│ │ │ ├── baichuan2_7b_base_qlora_arxiv_gentitle_e3.py
│ │ │ ├── baichuan2_7b_base_qlora_code_alpaca_e3.py
│ │ │ ├── baichuan2_7b_base_qlora_colorist_e5.py
│ │ │ ├── baichuan2_7b_base_qlora_lawyer_e3.py
│ │ │ ├── baichuan2_7b_base_qlora_oasst1_512_e3.py
│ │ │ ├── baichuan2_7b_base_qlora_oasst1_e3.py
│ │ │ ├── baichuan2_7b_base_qlora_open_platypus_e3.py
│ │ │ └── baichuan2_7b_base_qlora_sql_e3.py
│ │ ├── baichuan2_7b_chat/
│ │ │ ├── baichuan2_7b_chat_qlora_alpaca_e3.py
│ │ │ ├── baichuan2_7b_chat_qlora_alpaca_enzh_e3.py
│ │ │ ├── baichuan2_7b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── baichuan2_7b_chat_qlora_alpaca_zh_e3.py
│ │ │ ├── baichuan2_7b_chat_qlora_code_alpaca_e3.py
│ │ │ ├── baichuan2_7b_chat_qlora_lawyer_e3.py
│ │ │ ├── baichuan2_7b_chat_qlora_oasst1_512_e3.py
│ │ │ ├── baichuan2_7b_chat_qlora_oasst1_e3.py
│ │ │ └── baichuan2_7b_chat_qlora_open_platypus_e3.py
│ │ ├── baichuan_13b_base/
│ │ │ ├── baichuan_13b_base_qlora_alpaca_e3.py
│ │ │ ├── baichuan_13b_base_qlora_alpaca_enzh_e3.py
│ │ │ ├── baichuan_13b_base_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── baichuan_13b_base_qlora_alpaca_zh_e3.py
│ │ │ ├── baichuan_13b_base_qlora_arxiv_gentitle_e3.py
│ │ │ ├── baichuan_13b_base_qlora_code_alpaca_e3.py
│ │ │ ├── baichuan_13b_base_qlora_colorist_e5.py
│ │ │ ├── baichuan_13b_base_qlora_lawyer_e3.py
│ │ │ ├── baichuan_13b_base_qlora_medical_e1.py
│ │ │ ├── baichuan_13b_base_qlora_moss_sft_all_e1.py
│ │ │ ├── baichuan_13b_base_qlora_moss_sft_all_e2_gpu8.py
│ │ │ ├── baichuan_13b_base_qlora_moss_sft_plugins_e1.py
│ │ │ ├── baichuan_13b_base_qlora_oasst1_512_e3.py
│ │ │ ├── baichuan_13b_base_qlora_oasst1_e3.py
│ │ │ ├── baichuan_13b_base_qlora_open_platypus_e3.py
│ │ │ ├── baichuan_13b_base_qlora_openorca_e1.py
│ │ │ ├── baichuan_13b_base_qlora_sql_e3.py
│ │ │ └── baichuan_13b_base_qlora_tiny_codes_e1.py
│ │ ├── baichuan_13b_chat/
│ │ │ ├── baichuan_13b_chat_qlora_alpaca_e3.py
│ │ │ ├── baichuan_13b_chat_qlora_alpaca_enzh_e3.py
│ │ │ ├── baichuan_13b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── baichuan_13b_chat_qlora_alpaca_zh_e3.py
│ │ │ ├── baichuan_13b_chat_qlora_arxiv_gentitle_e3.py
│ │ │ ├── baichuan_13b_chat_qlora_code_alpaca_e3.py
│ │ │ ├── baichuan_13b_chat_qlora_colorist_e5.py
│ │ │ ├── baichuan_13b_chat_qlora_lawyer_e3.py
│ │ │ ├── baichuan_13b_chat_qlora_medical_e1.py
│ │ │ ├── baichuan_13b_chat_qlora_oasst1_512_e3.py
│ │ │ ├── baichuan_13b_chat_qlora_oasst1_e3.py
│ │ │ ├── baichuan_13b_chat_qlora_open_platypus_e3.py
│ │ │ ├── baichuan_13b_chat_qlora_openorca_e1.py
│ │ │ ├── baichuan_13b_chat_qlora_sql_e3.py
│ │ │ └── baichuan_13b_chat_qlora_tiny_codes_e1.py
│ │ └── baichuan_7b/
│ │ ├── baichuan_7b_qlora_alpaca_e3.py
│ │ ├── baichuan_7b_qlora_alpaca_enzh_e3.py
│ │ ├── baichuan_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ ├── baichuan_7b_qlora_alpaca_zh_e3.py
│ │ ├── baichuan_7b_qlora_arxiv_gentitle_e3.py
│ │ ├── baichuan_7b_qlora_code_alpaca_e3.py
│ │ ├── baichuan_7b_qlora_colorist_e5.py
│ │ ├── baichuan_7b_qlora_lawyer_e3.py
│ │ ├── baichuan_7b_qlora_medical_e1.py
│ │ ├── baichuan_7b_qlora_moss_sft_all_e1.py
│ │ ├── baichuan_7b_qlora_moss_sft_all_e2_gpu8.py
│ │ ├── baichuan_7b_qlora_moss_sft_plugins_e1.py
│ │ ├── baichuan_7b_qlora_oasst1_512_e3.py
│ │ ├── baichuan_7b_qlora_oasst1_e3.py
│ │ ├── baichuan_7b_qlora_open_platypus_e3.py
│ │ ├── baichuan_7b_qlora_openorca_e1.py
│ │ ├── baichuan_7b_qlora_sql_e3.py
│ │ └── baichuan_7b_qlora_tiny_codes_e1.py
│ ├── chatglm/
│ │ ├── chatglm2_6b/
│ │ │ ├── chatglm2_6b_qlora_alpaca_e3.py
│ │ │ ├── chatglm2_6b_qlora_alpaca_enzh_e3.py
│ │ │ ├── chatglm2_6b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── chatglm2_6b_qlora_alpaca_zh_e3.py
│ │ │ ├── chatglm2_6b_qlora_arxiv_gentitle_e3.py
│ │ │ ├── chatglm2_6b_qlora_code_alpaca_e3.py
│ │ │ ├── chatglm2_6b_qlora_colorist_e5.py
│ │ │ ├── chatglm2_6b_qlora_lawyer_e3.py
│ │ │ ├── chatglm2_6b_qlora_medical_e1.py
│ │ │ ├── chatglm2_6b_qlora_oasst1_512_e3.py
│ │ │ ├── chatglm2_6b_qlora_oasst1_e3.py
│ │ │ ├── chatglm2_6b_qlora_open_platypus_e3.py
│ │ │ ├── chatglm2_6b_qlora_openorca_e1.py
│ │ │ ├── chatglm2_6b_qlora_sql_e3.py
│ │ │ └── chatglm2_6b_qlora_tiny_codes_e1.py
│ │ ├── chatglm3_6b/
│ │ │ ├── chatglm3_6b_qlora_alpaca_e3.py
│ │ │ ├── chatglm3_6b_qlora_alpaca_enzh_e3.py
│ │ │ ├── chatglm3_6b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── chatglm3_6b_qlora_alpaca_zh_e3.py
│ │ │ ├── chatglm3_6b_qlora_arxiv_gentitle_e3.py
│ │ │ ├── chatglm3_6b_qlora_code_alpaca_e3.py
│ │ │ ├── chatglm3_6b_qlora_colorist_e5.py
│ │ │ ├── chatglm3_6b_qlora_lawyer_e3.py
│ │ │ ├── chatglm3_6b_qlora_medical_e1.py
│ │ │ ├── chatglm3_6b_qlora_oasst1_512_e3.py
│ │ │ ├── chatglm3_6b_qlora_oasst1_e3.py
│ │ │ ├── chatglm3_6b_qlora_open_platypus_e3.py
│ │ │ ├── chatglm3_6b_qlora_openorca_e1.py
│ │ │ ├── chatglm3_6b_qlora_sql_e3.py
│ │ │ └── chatglm3_6b_qlora_tiny_codes_e1.py
│ │ └── chatglm3_6b_base/
│ │ ├── chatglm3_6b_base_qlora_alpaca_e3.py
│ │ ├── chatglm3_6b_base_qlora_alpaca_enzh_e3.py
│ │ ├── chatglm3_6b_base_qlora_alpaca_enzh_oasst1_e3.py
│ │ ├── chatglm3_6b_base_qlora_alpaca_zh_e3.py
│ │ ├── chatglm3_6b_base_qlora_arxiv_gentitle_e3.py
│ │ ├── chatglm3_6b_base_qlora_code_alpaca_e3.py
│ │ ├── chatglm3_6b_base_qlora_colorist_e5.py
│ │ ├── chatglm3_6b_base_qlora_lawyer_e3.py
│ │ ├── chatglm3_6b_base_qlora_medical_e1.py
│ │ ├── chatglm3_6b_base_qlora_oasst1_512_e3.py
│ │ ├── chatglm3_6b_base_qlora_oasst1_e3.py
│ │ ├── chatglm3_6b_base_qlora_open_platypus_e3.py
│ │ ├── chatglm3_6b_base_qlora_openorca_e1.py
│ │ ├── chatglm3_6b_base_qlora_sql_e3.py
│ │ └── chatglm3_6b_base_qlora_tiny_codes_e1.py
│ ├── cohere/
│ │ ├── README.md
│ │ └── cohere_104b/
│ │ └── cohere_100b_128k_sp32.py
│ ├── custom_dataset/
│ │ ├── pretrain/
│ │ │ ├── baichuan/
│ │ │ │ ├── baichuan2_13b_base_full_custom_pretrain_e1.py
│ │ │ │ └── baichuan2_7b_base_full_custom_pretrain_e1.py
│ │ │ ├── chatglm/
│ │ │ │ ├── chatglm2_6b_full_custom_pretrain_e1.py
│ │ │ │ └── chatglm3_6b_full_custom_pretrain_e1.py
│ │ │ ├── deepseek/
│ │ │ │ └── deepseek_moe_16b_base_full_custom_pretrain_e1.py
│ │ │ ├── gemma/
│ │ │ │ ├── gemma_2b_full_custom_pretrain_e1.py
│ │ │ │ └── gemma_7b_full_custom_pretrain_e1.py
│ │ │ ├── internlm/
│ │ │ │ ├── internlm2_1_8b_full_custom_pretrain_e1.py
│ │ │ │ ├── internlm2_20b_full_custom_pretrain_e1.py
│ │ │ │ └── internlm2_7b_full_custom_pretrain_e1.py
│ │ │ ├── llama/
│ │ │ │ ├── llama2_70b_full_custom_pretrain_e1.py
│ │ │ │ └── llama2_7b_full_custom_pretrain_e1.py
│ │ │ ├── mistral/
│ │ │ │ └── mistral_7b_full_custom_pretrain_e1.py
│ │ │ ├── mixtral/
│ │ │ │ └── mixtral_8x7b_full_custom_pretrain_e1.py
│ │ │ ├── qwen/
│ │ │ │ ├── qwen1_5_0_5b_full_custom_pretrain_e1.py
│ │ │ │ ├── qwen1_5_14b_full_custom_pretrain_e1.py
│ │ │ │ ├── qwen1_5_1_8b_full_custom_pretrain_e1.py
│ │ │ │ ├── qwen1_5_4b_full_custom_pretrain_e1.py
│ │ │ │ ├── qwen1_5_72b_full_custom_pretrain_e1.py
│ │ │ │ ├── qwen1_5_7b_full_custom_pretrain_e1.py
│ │ │ │ ├── qwen_1_8b_full_custom_pretrain_e1.py
│ │ │ │ ├── qwen_72b_full_custom_pretrain_e1.py
│ │ │ │ └── qwen_7b_full_custom_pretrain_e1.py
│ │ │ ├── starcoder/
│ │ │ │ └── starcoder_full_custom_pretrain_e1.py
│ │ │ ├── yi/
│ │ │ │ ├── yi_34b_full_custom_pretrain_e1.py
│ │ │ │ └── yi_6b_full_custom_pretrain_e1.py
│ │ │ └── zephyr/
│ │ │ └── zephyr_7b_beta_full_custom_pretrain_e1.py
│ │ └── sft/
│ │ ├── baichuan/
│ │ │ ├── baichuan2_13b_chat_qlora_custom_sft_e1.py
│ │ │ ├── baichuan2_7b_chat_qlora_custom_sft_e1.py
│ │ │ ├── baichuan_13b_chat_qlora_custom_sft_e1.py
│ │ │ └── baichuan_7b_qlora_custom_sft_e1.py
│ │ ├── chatglm/
│ │ │ ├── chatglm2_6b_qlora_custom_sft_e1.py
│ │ │ └── chatglm3_6b_qlora_custom_sft_e1.py
│ │ ├── deepseek/
│ │ │ ├── deepseek_moe_16b_chat_qlora_custom_sft_e1.py
│ │ │ └── deepseekcoder_6_7b_instruct_qlora_custom_sft_e1.py
│ │ ├── gemma/
│ │ │ ├── gemma_2b_it_qlora_custom_sft_e1.py
│ │ │ ├── gemma_2b_qlora_custom_sft_e1.py
│ │ │ ├── gemma_7b_it_qlora_custom_sft_e1.py
│ │ │ └── gemma_7b_qlora_custom_sft_e1.py
│ │ ├── internlm/
│ │ │ ├── internlm2_chat_1_8b_qlora_custom_sft_e1.py
│ │ │ ├── internlm2_chat_20b_qlora_custom_sft_e1.py
│ │ │ └── internlm2_chat_7b_qlora_custom_sft_e1.py
│ │ ├── llama/
│ │ │ ├── llama2_70b_qlora_custom_sft_e1.py
│ │ │ └── llama2_7b_chat_qlora_custom_sft_e1.py
│ │ ├── mistral/
│ │ │ └── mistral_7b_full_finetune_custom_sft_e1.py
│ │ ├── mixtral/
│ │ │ └── mixtral_8x7b_instruct_qlora_custom_sft_e1.py
│ │ ├── qwen/
│ │ │ ├── qwen1_5_0_5b_chat_qlora_custom_sft_e1.py
│ │ │ ├── qwen1_5_14b_chat_qlora_custom_sft_e1.py
│ │ │ ├── qwen1_5_1_8b_chat_qlora_custom_sft_e1.py
│ │ │ ├── qwen1_5_4b_chat_qlora_custom_sft_e1.py
│ │ │ ├── qwen1_5_72b_chat_qlora_custom_sft_e1.py
│ │ │ ├── qwen1_5_7b_chat_qlora_custom_sft_e1.py
│ │ │ ├── qwen_1_8b_chat_qlora_custom_sft_e1.py
│ │ │ ├── qwen_72b_qlora_custom_sft_e1.py
│ │ │ └── qwen_7b_chat_qlora_custom_sft_e1.py
│ │ ├── starcoder/
│ │ │ └── starcoder_qlora_custom_sft_e1.py
│ │ ├── yi/
│ │ │ ├── yi_34b_qlora_custom_sft_e1.py
│ │ │ └── yi_6b_qlora_custom_sft_e1.py
│ │ └── zephyr/
│ │ └── zephyr_7b_beta_qlora_custom_sft_e1.py
│ ├── deepseek/
│ │ ├── README.md
│ │ ├── deepseek_coder_6_7b_base/
│ │ │ └── deepseek_coder_6_7b_base_qlora_code_alpaca_e3.py
│ │ ├── deepseek_coder_6_7b_instruct/
│ │ │ └── deepseekcoder_6_7b_instruct_qlora_code_alpaca_e3.py
│ │ ├── deepseek_moe_16b_base/
│ │ │ ├── deepseek_moe_16b_base_full_oasst1_e3.py
│ │ │ └── deepseek_moe_16b_base_qlora_oasst1_e3.py
│ │ ├── deepseek_moe_16b_chat/
│ │ │ ├── deepseek_moe_16b_chat_full_oasst1_e3.py
│ │ │ └── deepseek_moe_16b_chat_qlora_oasst1_e3.py
│ │ ├── deepseek_v2_chat/
│ │ │ └── deepseek_v2_chat_full_alpaca_e3.py
│ │ └── deepseek_v2_lite_chat/
│ │ ├── deepseek_v2_lite_chat_full_alpaca_e3.py
│ │ └── deepseek_v2_lite_chat_full_alpaca_e3_32k_varlen.py
│ ├── deepspeed/
│ │ ├── deepspeed_zero1.json
│ │ ├── deepspeed_zero2.json
│ │ ├── deepspeed_zero2_offload.json
│ │ ├── deepspeed_zero3.json
│ │ └── deepspeed_zero3_offload.json
│ ├── dpo/
│ │ ├── internlm/
│ │ │ ├── internlm2_chat_1_8b_dpo_full.py
│ │ │ ├── internlm2_chat_1_8b_dpo_full_varlenattn.py
│ │ │ ├── internlm2_chat_1_8b_dpo_full_varlenattn_jsonl_dataset.py
│ │ │ └── internlm2_chat_7b_dpo_qlora_varlenattn.py
│ │ └── llama/
│ │ └── llama3_8b_instruct_dpo_qlora_varlenattn.py
│ ├── gemma/
│ │ ├── gemma_2b/
│ │ │ ├── gemma_2b_full_alpaca_e3.py
│ │ │ └── gemma_2b_qlora_alpaca_e3.py
│ │ ├── gemma_2b_it/
│ │ │ ├── gemma_2b_it_full_alpaca_e3.py
│ │ │ └── gemma_2b_it_qlora_alpaca_e3.py
│ │ ├── gemma_7b/
│ │ │ ├── gemma_7b_full_alpaca_e3.py
│ │ │ └── gemma_7b_qlora_alpaca_e3.py
│ │ └── gemma_7b_it/
│ │ ├── gemma_7b_it_full_alpaca_e3.py
│ │ └── gemma_7b_it_qlora_alpaca_e3.py
│ ├── internlm/
│ │ ├── internlm2_1_8b/
│ │ │ ├── internlm2_1_8b_full_alpaca_e3.py
│ │ │ └── internlm2_1_8b_qlora_alpaca_e3.py
│ │ ├── internlm2_20b/
│ │ │ ├── internlm2_20b_full_finetune_custom_dataset_e1.py
│ │ │ ├── internlm2_20b_qlora_alpaca_e3.py
│ │ │ ├── internlm2_20b_qlora_arxiv_gentitle_e3.py
│ │ │ ├── internlm2_20b_qlora_code_alpaca_e3.py
│ │ │ ├── internlm2_20b_qlora_colorist_e5.py
│ │ │ ├── internlm2_20b_qlora_lawyer_e3.py
│ │ │ ├── internlm2_20b_qlora_msagent_react_e3_gpu8.py
│ │ │ ├── internlm2_20b_qlora_oasst1_512_e3.py
│ │ │ ├── internlm2_20b_qlora_oasst1_e3.py
│ │ │ └── internlm2_20b_qlora_sql_e3.py
│ │ ├── internlm2_7b/
│ │ │ ├── internlm2_7b_full_finetune_custom_dataset_e1.py
│ │ │ ├── internlm2_7b_full_finetune_custom_dataset_e1_sequence_parallel_4.py
│ │ │ ├── internlm2_7b_qlora_alpaca_e3.py
│ │ │ ├── internlm2_7b_qlora_arxiv_gentitle_e3.py
│ │ │ ├── internlm2_7b_qlora_code_alpaca_e3.py
│ │ │ ├── internlm2_7b_qlora_colorist_e5.py
│ │ │ ├── internlm2_7b_qlora_json_e3.py
│ │ │ ├── internlm2_7b_qlora_lawyer_e3.py
│ │ │ ├── internlm2_7b_qlora_msagent_react_e3_gpu8.py
│ │ │ ├── internlm2_7b_qlora_oasst1_512_e3.py
│ │ │ ├── internlm2_7b_qlora_oasst1_e3.py
│ │ │ ├── internlm2_7b_qlora_sql_e3.py
│ │ │ ├── internlm2_7b_w_internevo_dataset.py
│ │ │ ├── internlm2_7b_w_tokenized_dataset.py
│ │ │ └── internlm2_7b_w_untokenized_dataset.py
│ │ ├── internlm2_chat_1_8b/
│ │ │ ├── internlm2_chat_1_8b_full_alpaca_e3.py
│ │ │ └── internlm2_chat_1_8b_qlora_alpaca_e3.py
│ │ ├── internlm2_chat_20b/
│ │ │ ├── internlm2_chat_20b_full_finetune_custom_dataset_e1.py
│ │ │ ├── internlm2_chat_20b_qlora_alpaca_e3.py
│ │ │ ├── internlm2_chat_20b_qlora_code_alpaca_e3.py
│ │ │ ├── internlm2_chat_20b_qlora_lawyer_e3.py
│ │ │ ├── internlm2_chat_20b_qlora_oasst1_512_e3.py
│ │ │ └── internlm2_chat_20b_qlora_oasst1_e3.py
│ │ ├── internlm2_chat_7b/
│ │ │ ├── internlm2_chat_7b_full_finetune_custom_dataset_e1.py
│ │ │ ├── internlm2_chat_7b_qlora_alpaca_e3.py
│ │ │ ├── internlm2_chat_7b_qlora_code_alpaca_e3.py
│ │ │ ├── internlm2_chat_7b_qlora_lawyer_e3.py
│ │ │ ├── internlm2_chat_7b_qlora_oasst1_512_e3.py
│ │ │ └── internlm2_chat_7b_qlora_oasst1_e3.py
│ │ ├── internlm_20b/
│ │ │ ├── internlm_20b_qlora_alpaca_e3.py
│ │ │ ├── internlm_20b_qlora_alpaca_enzh_e3.py
│ │ │ ├── internlm_20b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── internlm_20b_qlora_alpaca_zh_e3.py
│ │ │ ├── internlm_20b_qlora_arxiv_gentitle_e3.py
│ │ │ ├── internlm_20b_qlora_code_alpaca_e3.py
│ │ │ ├── internlm_20b_qlora_colorist_e5.py
│ │ │ ├── internlm_20b_qlora_lawyer_e3.py
│ │ │ ├── internlm_20b_qlora_msagent_react_e3_gpu8.py
│ │ │ ├── internlm_20b_qlora_oasst1_512_e3.py
│ │ │ ├── internlm_20b_qlora_oasst1_e3.py
│ │ │ ├── internlm_20b_qlora_open_platypus_e3.py
│ │ │ └── internlm_20b_qlora_sql_e3.py
│ │ ├── internlm_7b/
│ │ │ ├── internlm_7b_full_alpaca_e3.py
│ │ │ ├── internlm_7b_full_alpaca_enzh_e3.py
│ │ │ ├── internlm_7b_full_alpaca_enzh_oasst1_e3.py
│ │ │ ├── internlm_7b_full_alpaca_zh_e3.py
│ │ │ ├── internlm_7b_full_intern_repo_dataset_template.py
│ │ │ ├── internlm_7b_full_oasst1_e3.py
│ │ │ ├── internlm_7b_qlora_alpaca_e3.py
│ │ │ ├── internlm_7b_qlora_alpaca_enzh_e3.py
│ │ │ ├── internlm_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── internlm_7b_qlora_alpaca_zh_e3.py
│ │ │ ├── internlm_7b_qlora_arxiv_gentitle_e3.py
│ │ │ ├── internlm_7b_qlora_code_alpaca_e3.py
│ │ │ ├── internlm_7b_qlora_colorist_e5.py
│ │ │ ├── internlm_7b_qlora_json_e3.py
│ │ │ ├── internlm_7b_qlora_lawyer_e3.py
│ │ │ ├── internlm_7b_qlora_medical_e1.py
│ │ │ ├── internlm_7b_qlora_moss_sft_all_e1.py
│ │ │ ├── internlm_7b_qlora_moss_sft_all_e2_gpu8.py
│ │ │ ├── internlm_7b_qlora_moss_sft_plugins_e1.py
│ │ │ ├── internlm_7b_qlora_msagent_react_e3_gpu8.py
│ │ │ ├── internlm_7b_qlora_oasst1_512_e3.py
│ │ │ ├── internlm_7b_qlora_oasst1_e3.py
│ │ │ ├── internlm_7b_qlora_oasst1_e3_hf.py
│ │ │ ├── internlm_7b_qlora_oasst1_mmlu_e3.py
│ │ │ ├── internlm_7b_qlora_open_platypus_e3.py
│ │ │ ├── internlm_7b_qlora_openorca_e1.py
│ │ │ ├── internlm_7b_qlora_sql_e3.py
│ │ │ └── internlm_7b_qlora_tiny_codes_e1.py
│ │ ├── internlm_chat_20b/
│ │ │ ├── internlm_chat_20b_qlora_alpaca_e3.py
│ │ │ ├── internlm_chat_20b_qlora_alpaca_enzh_e3.py
│ │ │ ├── internlm_chat_20b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── internlm_chat_20b_qlora_alpaca_zh_e3.py
│ │ │ ├── internlm_chat_20b_qlora_code_alpaca_e3.py
│ │ │ ├── internlm_chat_20b_qlora_lawyer_e3.py
│ │ │ ├── internlm_chat_20b_qlora_oasst1_512_e3.py
│ │ │ ├── internlm_chat_20b_qlora_oasst1_e3.py
│ │ │ └── internlm_chat_20b_qlora_open_platypus_e3.py
│ │ └── internlm_chat_7b/
│ │ ├── internlm_chat_7b_qlora_alpaca_e3.py
│ │ ├── internlm_chat_7b_qlora_alpaca_enzh_e3.py
│ │ ├── internlm_chat_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ ├── internlm_chat_7b_qlora_alpaca_zh_e3.py
│ │ ├── internlm_chat_7b_qlora_arxiv_gentitle_e3.py
│ │ ├── internlm_chat_7b_qlora_code_alpaca_e3.py
│ │ ├── internlm_chat_7b_qlora_colorist_e5.py
│ │ ├── internlm_chat_7b_qlora_lawyer_e3.py
│ │ ├── internlm_chat_7b_qlora_medical_e1.py
│ │ ├── internlm_chat_7b_qlora_oasst1_512_e3.py
│ │ ├── internlm_chat_7b_qlora_oasst1_e3.py
│ │ ├── internlm_chat_7b_qlora_open_platypus_e3.py
│ │ ├── internlm_chat_7b_qlora_openorca_e1.py
│ │ ├── internlm_chat_7b_qlora_sql_e3.py
│ │ └── internlm_chat_7b_qlora_tiny_codes_e1.py
│ ├── llama/
│ │ ├── llama2_70b/
│ │ │ ├── llama2_70b_full_wizardlm_e1.py
│ │ │ ├── llama2_70b_int8_lora_open_platypus_e1.py
│ │ │ ├── llama2_70b_int8_lora_open_platypus_e1_hf.py
│ │ │ ├── llama2_70b_qlora_open_platypus_e1.py
│ │ │ └── llama2_70b_qlora_open_platypus_e1_hf.py
│ │ ├── llama2_7b/
│ │ │ ├── llama2_7b_full_pgbooks_400iters_sp1.py
│ │ │ ├── llama2_7b_full_pgbooks_400iters_sp4.py
│ │ │ ├── llama2_7b_full_wizardlm_e1.py
│ │ │ ├── llama2_7b_qlora_alpaca_e3.py
│ │ │ ├── llama2_7b_qlora_alpaca_enzh_e3.py
│ │ │ ├── llama2_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── llama2_7b_qlora_alpaca_zh_e3.py
│ │ │ ├── llama2_7b_qlora_arxiv_gentitle_e3.py
│ │ │ ├── llama2_7b_qlora_code_alpaca_e3.py
│ │ │ ├── llama2_7b_qlora_colorist_e5.py
│ │ │ ├── llama2_7b_qlora_lawyer_e3.py
│ │ │ ├── llama2_7b_qlora_medical_e1.py
│ │ │ ├── llama2_7b_qlora_moss_sft_all_e1.py
│ │ │ ├── llama2_7b_qlora_moss_sft_all_e2_gpu8.py
│ │ │ ├── llama2_7b_qlora_moss_sft_plugins_e1.py
│ │ │ ├── llama2_7b_qlora_msagent_react_e3_gpu8.py
│ │ │ ├── llama2_7b_qlora_oasst1_512_e3.py
│ │ │ ├── llama2_7b_qlora_oasst1_e3.py
│ │ │ ├── llama2_7b_qlora_open_platypus_e3.py
│ │ │ ├── llama2_7b_qlora_openorca_e1.py
│ │ │ ├── llama2_7b_qlora_sql_e3.py
│ │ │ └── llama2_7b_qlora_tiny_codes_e1.py
│ │ ├── llama2_7b_chat/
│ │ │ ├── llama2_7b_chat_qlora_alpaca_e3.py
│ │ │ ├── llama2_7b_chat_qlora_alpaca_enzh_e3.py
│ │ │ ├── llama2_7b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── llama2_7b_chat_qlora_alpaca_zh_e3.py
│ │ │ ├── llama2_7b_chat_qlora_arxiv_gentitle_e3.py
│ │ │ ├── llama2_7b_chat_qlora_code_alpaca_e3.py
│ │ │ ├── llama2_7b_chat_qlora_colorist_e5.py
│ │ │ ├── llama2_7b_chat_qlora_lawyer_e3.py
│ │ │ ├── llama2_7b_chat_qlora_medical_e1.py
│ │ │ ├── llama2_7b_chat_qlora_oasst1_512_e3.py
│ │ │ ├── llama2_7b_chat_qlora_oasst1_e3.py
│ │ │ ├── llama2_7b_chat_qlora_open_platypus_e3.py
│ │ │ ├── llama2_7b_chat_qlora_openorca_e1.py
│ │ │ ├── llama2_7b_chat_qlora_sql_e3.py
│ │ │ └── llama2_7b_chat_qlora_tiny_codes_e1.py
│ │ ├── llama3_70b_instruct/
│ │ │ └── llama3_70b_instruct_qlora_alpaca_e3_2k_gpu8.py
│ │ ├── llama3_8b/
│ │ │ ├── README.md
│ │ │ └── llama3_8b_full_alpaca_e3.py
│ │ ├── llama3_8b_instruct/
│ │ │ ├── llama3_8b_instruct_full_alpaca_e3.py
│ │ │ └── llama3_8b_instruct_qlora_alpaca_e3.py
│ │ └── llama_7b/
│ │ ├── llama_7b_qlora_alpaca_e3.py
│ │ ├── llama_7b_qlora_alpaca_enzh_e3.py
│ │ ├── llama_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ ├── llama_7b_qlora_alpaca_zh_e3.py
│ │ ├── llama_7b_qlora_arxiv_gentitle_e3.py
│ │ ├── llama_7b_qlora_code_alpaca_e3.py
│ │ ├── llama_7b_qlora_colorist_e5.py
│ │ ├── llama_7b_qlora_lawyer_e3.py
│ │ ├── llama_7b_qlora_medical_e1.py
│ │ ├── llama_7b_qlora_moss_sft_all_e1.py
│ │ ├── llama_7b_qlora_moss_sft_all_e2_gpu8.py
│ │ ├── llama_7b_qlora_moss_sft_plugins_e1.py
│ │ ├── llama_7b_qlora_oasst1_512_e3.py
│ │ ├── llama_7b_qlora_oasst1_e3.py
│ │ ├── llama_7b_qlora_open_platypus_e3.py
│ │ ├── llama_7b_qlora_openorca_e1.py
│ │ ├── llama_7b_qlora_sql_e3.py
│ │ └── llama_7b_qlora_tiny_codes_e1.py
│ ├── llama_speed_benchmark/
│ │ ├── llama2_70b/
│ │ │ ├── llama2_70b_full_alpaca_enzh_128k_sp8.py
│ │ │ ├── llama2_70b_full_alpaca_enzh_256k_sp16.py
│ │ │ ├── llama2_70b_full_alpaca_enzh_32k_sp4.py
│ │ │ └── llama2_70b_full_alpaca_enzh_8k_sp1.py
│ │ ├── llama2_7b/
│ │ │ ├── llama2_7b_full_alpaca_enzh_128k_sp8.py
│ │ │ ├── llama2_7b_full_alpaca_enzh_1M_sp16.py
│ │ │ ├── llama2_7b_full_alpaca_enzh_256k_sp8.py
│ │ │ ├── llama2_7b_full_alpaca_enzh_32k_sp1.py
│ │ │ └── llama2_7b_full_alpaca_enzh_8k_sp1.py
│ │ └── yi_34b/
│ │ ├── yi_34b_200k_full_alpaca_enzh_128k_sp8.py
│ │ ├── yi_34b_200k_full_alpaca_enzh_256k_sp8.py
│ │ ├── yi_34b_200k_full_alpaca_enzh_32k_sp2.py
│ │ └── yi_34b_200k_full_alpaca_enzh_8k_sp1.py
│ ├── llava/
│ │ ├── README.md
│ │ ├── README_zh-CN.md
│ │ ├── internlm2_chat_1_8b_clip_vit_large_p14_336/
│ │ │ ├── finetune/
│ │ │ │ └── llava_internlm2_chat_1_8b_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ └── pretrain/
│ │ │ └── llava_internlm2_chat_1_8b_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ ├── internlm2_chat_20b_clip_vit_large_p14_336/
│ │ │ ├── finetune/
│ │ │ │ ├── llava_internlm2_chat_20b_clip_vit_large_p14_336_e1_gpu8_finetune.py
│ │ │ │ └── llava_internlm2_chat_20b_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ └── pretrain/
│ │ │ └── llava_internlm2_chat_20b_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ ├── internlm2_chat_7b_clip_vit_large_p14_336/
│ │ │ ├── finetune/
│ │ │ │ ├── llava_internlm2_chat_7b_clip_vit_large_p14_336_e1_gpu8_finetune.py
│ │ │ │ └── llava_internlm2_chat_7b_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ └── pretrain/
│ │ │ └── llava_internlm2_chat_7b_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ ├── internlm_chat_7b_clip_vit_large_p14_336/
│ │ │ ├── finetune/
│ │ │ │ └── llava_internlm_chat_7b_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ └── pretrain/
│ │ │ └── llava_internlm_chat_7b_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ ├── llama3_70b_instruct_clip_vit_large_p14_336/
│ │ │ └── pretrain/
│ │ │ └── llava_llama3_70b_instruct_quant_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ ├── llama3_8b_instruct_clip_vit_large_p14_336/
│ │ │ ├── README.md
│ │ │ ├── convert_xtuner_weights_to_hf.py
│ │ │ ├── convert_xtuner_weights_to_llava.py
│ │ │ ├── finetune/
│ │ │ │ ├── llava_llama3_8b_instruct_full_clip_vit_large_p14_336_e1_gpu8_finetune.py
│ │ │ │ ├── llava_llama3_8b_instruct_full_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ │ ├── llava_llama3_8b_instruct_full_clip_vit_large_p14_336_lora_e1_gpu8_internvl_finetune.py
│ │ │ │ └── llava_llama3_8b_instruct_qlora_clip_vit_large_p14_336_e1_gpu1_finetune.py
│ │ │ └── pretrain/
│ │ │ ├── llava_llama3_8b_instruct_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ │ ├── llava_llama3_8b_instruct_clip_vit_large_p14_336_e1_gpu8_sharegpt4v_pretrain.py
│ │ │ └── llava_llama3_8b_instruct_quant_clip_vit_large_p14_336_e1_gpu1_pretrain.py
│ │ ├── official/
│ │ │ ├── llava_v15_13b/
│ │ │ │ ├── llava_v15_13b_finetune.py
│ │ │ │ ├── llava_v15_13b_finetune_lora.py
│ │ │ │ └── llava_v15_13b_pretrain.py
│ │ │ └── llava_v15_7b/
│ │ │ ├── llava_v15_7b_finetune.py
│ │ │ ├── llava_v15_7b_finetune_lora.py
│ │ │ └── llava_v15_7b_pretrain.py
│ │ ├── phi3_mini_4k_instruct_clip_vit_large_p14_336/
│ │ │ ├── README.md
│ │ │ ├── convert_phi_to_llama.py
│ │ │ ├── convert_xtuner_weights_to_hf.py
│ │ │ ├── convert_xtuner_weights_to_llava.py
│ │ │ ├── finetune/
│ │ │ │ ├── llava_phi3_mini_4k_instruct_full_clip_vit_large_p14_336_e1_gpu8_finetune.py
│ │ │ │ └── llava_phi3_mini_4k_instruct_full_clip_vit_large_p14_336_full_e2_gpu8_internvl_finetune.py
│ │ │ └── pretrain/
│ │ │ ├── llava_phi3_mini_4k_instruct_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ │ └── llava_phi3_mini_4k_instruct_clip_vit_large_p14_336_e1_gpu8_sharegpt4v_pretrain.py
│ │ ├── vicuna_13b_v15_clip_vit_large_p14_336/
│ │ │ ├── finetune/
│ │ │ │ └── llava_vicuna_13b_v15_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ └── pretrain/
│ │ │ └── llava_vicuna_13b_v15_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ └── vicuna_7b_v15_clip_vit_large_p14_336/
│ │ ├── finetune/
│ │ │ ├── llava_vicuna_7b_v15_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ └── llava_vicuna_7b_v15_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune_refcoco.py
│ │ └── pretrain/
│ │ └── llava_vicuna_7b_v15_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ ├── mistral/
│ │ ├── mistral_7b_full_finetune_custom_dataset_e1.py
│ │ ├── mistral_7b_qlora_skypile_pretrain_e1.py
│ │ ├── mistral_7b_w_tokenized_dataset.py
│ │ └── mistral_7b_w_untokenized_dataset.py
│ ├── mixtral/
│ │ ├── README.md
│ │ ├── mixtral_8x7b/
│ │ │ ├── mixtral_8x7b_full_oasst1_e3.py
│ │ │ └── mixtral_8x7b_qlora_oasst1_e3.py
│ │ └── mixtral_8x7b_instruct/
│ │ ├── mixtral_8x7b_instruct_full_oasst1_e3.py
│ │ └── mixtral_8x7b_instruct_qlora_oasst1_e3.py
│ ├── orpo/
│ │ ├── internlm/
│ │ │ ├── internlm2_chat_1_8b_orpo_full.py
│ │ │ ├── internlm2_chat_1_8b_orpo_full_varlenattn.py
│ │ │ ├── internlm2_chat_1_8b_orpo_full_varlenattn_jsonl_dataset.py
│ │ │ └── internlm2_chat_7b_orpo_qlora_varlenattn_ultrafeedback_e5.py
│ │ └── llama/
│ │ └── llama3_8b_instruct_orpo_qlora_varlenattn_ultrafeedback_e5.py
│ ├── phi/
│ │ └── phi3/
│ │ ├── phi3_mini_128k_instruct_full_alpaca_e3.py
│ │ ├── phi3_mini_128k_instruct_qlora_alpaca_e3.py
│ │ ├── phi3_mini_4k_instruct_full_alpaca_e3.py
│ │ └── phi3_mini_4k_instruct_qlora_alpaca_e3.py
│ ├── qwen/
│ │ ├── qwen1/
│ │ │ ├── qwen_1_8b/
│ │ │ │ ├── qwen_1_8b_qlora_alpaca_e3.py
│ │ │ │ ├── qwen_1_8b_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── qwen_1_8b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── qwen_1_8b_qlora_alpaca_zh_e3.py
│ │ │ │ └── qwen_1_8b_qlora_code_alpaca_e3.py
│ │ │ ├── qwen_1_8b_chat/
│ │ │ │ ├── qwen_1_8b_chat_qlora_alpaca_e3.py
│ │ │ │ ├── qwen_1_8b_chat_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── qwen_1_8b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── qwen_1_8b_chat_qlora_alpaca_zh_e3.py
│ │ │ │ └── qwen_1_8b_chat_qlora_code_alpaca_e3.py
│ │ │ ├── qwen_72b/
│ │ │ │ ├── qwen_72b_qlora_alpaca_e3.py
│ │ │ │ ├── qwen_72b_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── qwen_72b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── qwen_72b_qlora_alpaca_zh_e3.py
│ │ │ │ └── qwen_72b_qlora_code_alpaca_e3.py
│ │ │ ├── qwen_7b/
│ │ │ │ ├── qwen_7b_qlora_alpaca_e3.py
│ │ │ │ ├── qwen_7b_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── qwen_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── qwen_7b_qlora_alpaca_zh_e3.py
│ │ │ │ ├── qwen_7b_qlora_arxiv_gentitle_e3.py
│ │ │ │ ├── qwen_7b_qlora_code_alpaca_e3.py
│ │ │ │ ├── qwen_7b_qlora_colorist_e5.py
│ │ │ │ ├── qwen_7b_qlora_lawyer_e3.py
│ │ │ │ ├── qwen_7b_qlora_medical_e1.py
│ │ │ │ ├── qwen_7b_qlora_moss_sft_all_e1.py
│ │ │ │ ├── qwen_7b_qlora_moss_sft_all_e2_gpu8.py
│ │ │ │ ├── qwen_7b_qlora_moss_sft_plugins_e1.py
│ │ │ │ ├── qwen_7b_qlora_oasst1_512_e3.py
│ │ │ │ ├── qwen_7b_qlora_oasst1_e3.py
│ │ │ │ ├── qwen_7b_qlora_open_platypus_e3.py
│ │ │ │ ├── qwen_7b_qlora_openorca_e1.py
│ │ │ │ ├── qwen_7b_qlora_sql_e3.py
│ │ │ │ └── qwen_7b_qlora_tiny_codes_e1.py
│ │ │ └── qwen_7b_chat/
│ │ │ ├── qwen_7b_chat_qlora_alpaca_e3.py
│ │ │ ├── qwen_7b_chat_qlora_alpaca_enzh_e3.py
│ │ │ ├── qwen_7b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── qwen_7b_chat_qlora_alpaca_zh_e3.py
│ │ │ ├── qwen_7b_chat_qlora_arxiv_gentitle_e3.py
│ │ │ ├── qwen_7b_chat_qlora_code_alpaca_e3.py
│ │ │ ├── qwen_7b_chat_qlora_colorist_e5.py
│ │ │ ├── qwen_7b_chat_qlora_lawyer_e3.py
│ │ │ ├── qwen_7b_chat_qlora_medical_e1.py
│ │ │ ├── qwen_7b_chat_qlora_oasst1_512_e3.py
│ │ │ ├── qwen_7b_chat_qlora_oasst1_e3.py
│ │ │ ├── qwen_7b_chat_qlora_open_platypus_e3.py
│ │ │ ├── qwen_7b_chat_qlora_openorca_e1.py
│ │ │ ├── qwen_7b_chat_qlora_sql_e3.py
│ │ │ └── qwen_7b_chat_qlora_tiny_codes_e1.py
│ │ └── qwen1_5/
│ │ ├── qwen1_5_0_5b/
│ │ │ ├── qwen1_5_0_5b_full_alpaca_e3.py
│ │ │ └── qwen1_5_0_5b_qlora_alpaca_e3.py
│ │ ├── qwen1_5_0_5b_chat/
│ │ │ ├── qwen1_5_0_5b_chat_full_alpaca_e3.py
│ │ │ └── qwen1_5_0_5b_chat_qlora_alpaca_e3.py
│ │ ├── qwen1_5_110b/
│ │ │ ├── qwen1_5_110b_full_alpaca_e3.py
│ │ │ └── qwen1_5_110b_qlora_alpaca_e3.py
│ │ ├── qwen1_5_110b_chat/
│ │ │ ├── README.md
│ │ │ ├── qwen1_5_110b_chat_full_alpaca_e3.py
│ │ │ ├── qwen1_5_110b_chat_qlora_alpaca_e3.py
│ │ │ └── qwen1_5_110b_chat_qlora_alpaca_e3_16k_2gpus.py
│ │ ├── qwen1_5_14b/
│ │ │ ├── qwen1_5_14b_full_alpaca_e3.py
│ │ │ └── qwen1_5_14b_qlora_alpaca_e3.py
│ │ ├── qwen1_5_14b_chat/
│ │ │ ├── qwen1_5_14b_chat_full_alpaca_e3.py
│ │ │ └── qwen1_5_14b_chat_qlora_alpaca_e3.py
│ │ ├── qwen1_5_1_8b/
│ │ │ ├── qwen1_5_1_8b_full_alpaca_e3.py
│ │ │ └── qwen1_5_1_8b_qlora_alpaca_e3.py
│ │ ├── qwen1_5_1_8b_chat/
│ │ │ ├── qwen1_5_1_8b_chat_full_alpaca_e3.py
│ │ │ └── qwen1_5_1_8b_chat_qlora_alpaca_e3.py
│ │ ├── qwen1_5_4b/
│ │ │ ├── qwen1_5_4b_full_alpaca_e3.py
│ │ │ └── qwen1_5_4b_qlora_alpaca_e3.py
│ │ ├── qwen1_5_4b_chat/
│ │ │ ├── qwen1_5_4b_chat_full_alpaca_e3.py
│ │ │ └── qwen1_5_4b_chat_qlora_alpaca_e3.py
│ │ ├── qwen1_5_72b/
│ │ │ ├── qwen1_5_72b_full_alpaca_e3.py
│ │ │ └── qwen1_5_72b_qlora_alpaca_e3.py
│ │ ├── qwen1_5_72b_chat/
│ │ │ ├── qwen1_5_72b_chat_full_alpaca_e3.py
│ │ │ └── qwen1_5_72b_chat_qlora_alpaca_e3.py
│ │ ├── qwen1_5_7b/
│ │ │ ├── qwen1_5_7b_full_alpaca_e3.py
│ │ │ └── qwen1_5_7b_qlora_alpaca_e3.py
│ │ └── qwen1_5_7b_chat/
│ │ ├── qwen1_5_7b_chat_full_alpaca_e3.py
│ │ └── qwen1_5_7b_chat_qlora_alpaca_e3.py
│ ├── qwen_moe/
│ │ └── qwen1_5/
│ │ └── qwen1_5_moe_a2_7_b_chat/
│ │ └── qwen1_5_moe_a2_7_b_chat_full_alpaca_e3.py
│ ├── reward_model/
│ │ ├── internlm/
│ │ │ ├── internlm2_chat_1_8b_reward_full_ultrafeedback.py
│ │ │ ├── internlm2_chat_1_8b_reward_full_varlenattn_jsonl_dataset.py
│ │ │ ├── internlm2_chat_1_8b_reward_full_varlenattn_ultrafeedback.py
│ │ │ └── internlm2_chat_1_8b_reward_qlora_varlenattn_ultrafeedback.py
│ │ └── llama/
│ │ └── llama3_8b_instruct_reward_full_varlenattn_ultrafeedback.py
│ ├── starcoder/
│ │ └── starcoder_qlora_stack_exchange_example.py
│ ├── yi/
│ │ ├── yi_34b/
│ │ │ └── yi_34b_qlora_alpaca_enzh_e3.py
│ │ └── yi_6b/
│ │ └── yi_6b_qlora_alpaca_enzh_e3.py
│ └── zephyr/
│ └── zephyr_7b_beta_qlora_alpaca_e3.py
├── dataset/
│ ├── __init__.py
│ ├── collate_fns/
│ │ ├── __init__.py
│ │ ├── default_collate_fn.py
│ │ ├── mmlu_collate_fn.py
│ │ └── preference_collate_fn.py
│ ├── concat_dataset.py
│ ├── huggingface.py
│ ├── intern_repo.py
│ ├── json_dataset.py
│ ├── llava.py
│ ├── map_fns/
│ │ ├── __init__.py
│ │ ├── dataset_map_fns/
│ │ │ ├── __init__.py
│ │ │ ├── alpaca_map_fn.py
│ │ │ ├── alpaca_zh_map_fn.py
│ │ │ ├── arxiv_map_fn.py
│ │ │ ├── code_alpaca_map_fn.py
│ │ │ ├── colors_map_fn.py
│ │ │ ├── crime_kg_assitant_map_fn.py
│ │ │ ├── default_map_fn.py
│ │ │ ├── law_reference_map_fn.py
│ │ │ ├── llava_map_fn.py
│ │ │ ├── medical_map_fn.py
│ │ │ ├── msagent_map_fn.py
│ │ │ ├── oasst1_map_fn.py
│ │ │ ├── openai_map_fn.py
│ │ │ ├── openorca_map_fn.py
│ │ │ ├── pretrain_map_fn.py
│ │ │ ├── sql_map_fn.py
│ │ │ ├── stack_exchange_map_fn.py
│ │ │ ├── tiny_codes_map_fn.py
│ │ │ └── wizardlm_map_fn.py
│ │ └── template_map_fn.py
│ ├── modelscope.py
│ ├── moss_sft.py
│ ├── preference_dataset.py
│ ├── refcoco_json.py
│ ├── samplers/
│ │ ├── __init__.py
│ │ ├── intern_repo.py
│ │ └── length_grouped.py
│ └── utils.py
├── engine/
│ ├── __init__.py
│ ├── _strategy/
│ │ ├── __init__.py
│ │ └── deepspeed.py
│ ├── hooks/
│ │ ├── __init__.py
│ │ ├── dataset_info_hook.py
│ │ ├── evaluate_chat_hook.py
│ │ ├── hf_checkpoint_hook.py
│ │ ├── throughput_hook.py
│ │ └── varlen_attn_args_to_messagehub_hook.py
│ └── runner/
│ ├── __init__.py
│ └── loops.py
├── entry_point.py
├── evaluation/
│ ├── __init__.py
│ └── metrics/
│ ├── __init__.py
│ ├── mmlu_metric.py
│ └── reward_metric.py
├── model/
│ ├── __init__.py
│ ├── dpo.py
│ ├── llava.py
│ ├── modules/
│ │ ├── __init__.py
│ │ ├── dispatch/
│ │ │ ├── __init__.py
│ │ │ ├── attention.py
│ │ │ ├── baichuan.py
│ │ │ ├── cohere.py
│ │ │ ├── deepseek_v2.py
│ │ │ ├── internlm.py
│ │ │ ├── internlm2.py
│ │ │ ├── llama.py
│ │ │ ├── mistral.py
│ │ │ ├── phi3.py
│ │ │ ├── qwen2.py
│ │ │ ├── triton_kernels/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── layer_norm.py
│ │ │ │ ├── rms_norm.py
│ │ │ │ └── rotary.py
│ │ │ ├── utils.py
│ │ │ └── yi.py
│ │ └── projector/
│ │ ├── __init__.py
│ │ ├── configuration_projector.py
│ │ └── modeling_projector.py
│ ├── orpo.py
│ ├── reward.py
│ ├── sft.py
│ ├── transformers_models/
│ │ ├── __init__.py
│ │ ├── deepseek_v2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_deepseek.py
│ │ │ ├── modeling_deepseek.py
│ │ │ └── tokenization_deepseek_fast.py
│ │ └── mixtral/
│ │ ├── __init__.py
│ │ ├── configuration_mixtral.py
│ │ └── modeling_mixtral.py
│ └── utils.py
├── parallel/
│ ├── __init__.py
│ └── sequence/
│ ├── __init__.py
│ ├── attention.py
│ ├── comm.py
│ ├── data_collate.py
│ ├── reduce_loss.py
│ ├── sampler.py
│ └── setup_distributed.py
├── registry.py
├── tools/
│ ├── chat.py
│ ├── check_custom_dataset.py
│ ├── copy_cfg.py
│ ├── data_preprocess/
│ │ ├── arxiv.py
│ │ └── convert_refcoco.py
│ ├── eval_refcoco.py
│ ├── get_data_order.py
│ ├── list_cfg.py
│ ├── list_dataset_format.py
│ ├── log_dataset.py
│ ├── mmbench.py
│ ├── model_converters/
│ │ ├── merge.py
│ │ ├── modeling_internlm2_reward/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_internlm2.py
│ │ │ └── modeling_internlm2.py
│ │ ├── pth_to_hf.py
│ │ └── split.py
│ ├── plugins/
│ │ ├── __init__.py
│ │ ├── api.py
│ │ ├── calculate.py
│ │ ├── search.py
│ │ └── solve.py
│ ├── process_untokenized_datasets.py
│ ├── process_untokenized_datasets_legacy.py
│ ├── process_untokenized_llava_data.py
│ ├── test.py
│ ├── tokenize_ftdp_datasets.py
│ ├── train.py
│ └── utils.py
├── utils/
│ ├── __init__.py
│ ├── constants.py
│ ├── fileio.py
│ ├── handle_moe_load_and_save.py
│ ├── stop_criteria.py
│ ├── templates.py
│ └── zero_to_any_dtype.py
└── version.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitattributes
================================================
# Auto detect text files and perform LF normalization
logs
* text=auto
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2025 Yi Wang
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
<div align="center">
<h2><a href="https://www.arxiv.org/abs/2501.00574">VideoChat-Flash: Hierarchical Compression for Long-Context Video Modeling</a></h2>
[Xinhao Li](https://scholar.google.com/citations?user=evR3uR0AAAAJ), [Yi Wang](https://scholar.google.com.hk/citations?user=Xm2M8UwAAAAJ), [Jiashuo Yu](https://scholar.google.com.hk/citations?user=iH0Aq0YAAAAJ&oi=ao), [Xiangyu Zeng](https://scholar.google.com/citations?user=jS13DXkAAAAJ&hl=zh-CN), Yuhan Zhu, Haian Huang, Jianfei Gao, [Kunchang Li](https://scholar.google.com/citations?user=D4tLSbsAAAAJ), [Yinan He](https://dblp.org/pid/93/7763.html), Chenting Wang, [Yu Qiao](https://scholar.google.com/citations?user=gFtI-8QAAAAJ&hl), [Yali Wang](https://scholar.google.com/citations?user=hD948dkAAAAJ), and [Limin Wang](https://scholar.google.com/citations?user=HEuN8PcAAAAJ)
<p align="center">
🤗 <a href="https://huggingface.co/collections/OpenGVLab/videochat-flash-6781493748713b5ba2b705e0">Model & Data</a>    |   🖥️ <a href="">Demo</a>    |    📑 <a href="https://www.arxiv.org/abs/2501.00574">Paper</a>    |    🌐 <a href="https://internvideo.github.io/blog/2024-12-31-VideoChat-Flash/">Blog</a>
<br>
</p>
</div>
## :fire: Updates
- [x] **2025/06/13**: 🎉🎉🎉Our model achieves promising results on the [VideoEval-Pro](https://arxiv.org/abs/2505.14640) benchmark focused on long video understanding!
- [x] **2025/05/10**:🔥🔥🔥 We release most video of our [training data](https://huggingface.co/datasets/OpenGVLab/VideoChat-Flash-Training-Data), Hope it can be of help to you!
- [x] **2025/03/27**:🔥🔥 We release our dataset and evaluation codes for single-hop and multi-hop needle-in-a-haystack!
- [x] **2025/03/09**:🔥🔥 We release our weights of each training stage [here](https://github.com/OpenGVLab/VideoChat-Flash/blob/main/llava-train_videochat/README.), try to build your VideoChat-Flash on them!
- [x] **2025/02/25**:🔥🔥 We release our [training data](https://huggingface.co/datasets/OpenGVLab/VideoChat-Flash-Training-Data), [training codes based LLaVA](llava-train_videochat) for VideoChat-Flash and [training codes based XTuner](xtuner-train_internvideo2_5) for finetuning InternVideo2.5.
- [x] **2025/02/12**: 🎉🎉🎉Our VideoChat-Flash-7B@448 has achieved first place on the latest Video Detail Caption Benchmark, [AuroraCap](https://rese1f.github.io/aurora-web/).
- [x] **2025/01/15**: We provide [evaluation codes](lmms-eval_videochat) for QA & Grounding Benchmark.
- [x] **2025/01/12**: 🔥🔥🔥Release **VideoChat2-Flash**, a powerfull MLLM built on video encoder ([InternVideo](https://github.com/OpenGVLab/InternVideo)) and LLM ([Qwen](https://github.com/QwenLM/Qwen)).
- We offer five models, [VideoChat2-Flash-2B@224](https://huggingface.co/OpenGVLab/VideoChat-Flash-Qwen2_5-2B_res448) (Small LLM), [VideoChat2-Flash-7B@224](https://huggingface.co/OpenGVLab/VideoChat-Flash-Qwen2-7B_res224), [VideoChat2-Flash-7B@448](https://huggingface.co/OpenGVLab/VideoChat-Flash-Qwen2-7B_res448) (Overall best), [VideoChat-Flash-Qwen2_5-7B-1M](https://huggingface.co/OpenGVLab/VideoChat-Flash-Qwen2_5-7B-1M_res224) (Super long video input) and [VideoChat-Flash-Qwen2_5-7B_InternVideo2-1B](https://huggingface.co/OpenGVLab/VideoChat-Flash-Qwen2_5-7B_InternVideo2-1B) (Stronger short-term temporal understanding).
<!-- - We provide [online demos]() and demo codes that can be run locally. -->
## 📑 Future Plan
- [ ] lmdeploy/vllm support for Videochat-Flash and InternVideo2.5
- [ ] LoRA finetuning training code for Videochat-Flash and InternVideo2.5
- [ ] Mixing image/video training code for InternVideo2.5
- [ ] Faster training code with XTuner for VideoChat-Flash
As I am currently very busy with work and find it difficult to complete the above plans quickly, I sincerely ask friends in the community to join in and **submit a PR**.
## :parrot: Introduction
**🚀State-of-the-art performance** in short and long video understanding, with temporal localization capabilities comparable to expert models.

**🔭Supports ultra-long video inputs**, achieving a groundbreaking needle-in-a-haystack evaluation accuracy of **99.1% on 10,000 frames**, capable of processing videos up to three hours long.

**⚡Highly efficient model architecture** with exceptional inference speed, encoding each video frame into just **16 tokens**, making it **5–10** times faster than the previous model.

## Demo & Inference
Refer to [hf README](https://huggingface.co/OpenGVLab/VideoChat-Flash-Qwen2_5-2B_res448) to inference our model.
## Evaluation
See [evaluation codes](lmms-eval_videochat). And [lmms-eval](https://github.com/EvolvingLMMs-Lab/lmms-eval) have supported our model, you also could use it to evaluate our model on varous benchmarks.
## Training
See [training codes based LLaVA](llava-train_videochat) for VideoChat-Flash and [training codes based XTuner](xtuner-train_internvideo2_5) for finetuning InternVideo2.5.
## :bar_chart: [NIAH](./BENCHMARK.md)

See [xtuner-eval_niah](xtuner-eval_niah) for evaluation of Single-Hop NIAH-Video and Multi-Hop NIAH-Video.
# :page_facing_up: Citation
If you find this project useful in your research, please consider cite:
```BibTeX
@article{li2024videochat,
title={VideoChat-Flash: Hierarchical Compression for Long-Context Video Modeling},
author={Li, Xinhao and Wang, Yi and Yu, Jiashuo and Zeng, Xiangyu and Zhu, Yuhan and Huang, Haian and Gao, Jianfei and Li, Kunchang and He, Yinan and Wang, Chenting and Qiao, Yu and Wang, Yali and Wang, Limin},
journal={arXiv preprint arXiv:2501.00574},
year={2024}
}
```
# :dizzy: Acknowledgement
Thanks to the open source of the following projects: [InternVideo](https://github.com/OpenGVLab/InternVideo), [UMT](https://github.com/OpenGVLab/unmasked_teacher), [Qwen](https://github.com/QwenLM/Qwen), [LLaVA-VL](https://github.com/LLaVA-VL/LLaVA-NeXT), [lmms-eval](https://github.com/EvolvingLMMs-Lab/lmms-eval), [Ask-Anything](https://github.com/OpenGVLab/Ask-Anything), [ToMe](https://github.com/facebookresearch/ToMe), [LongVLM](https://github.com/ziplab/LongVLM), [FastV](https://github.com/pkunlp-icler/FastV), [LLaVolta](https://github.com/Beckschen/LLaVolta), [PyramidDrop](https://github.com/Cooperx521/PyramidDrop), [LongVA](https://github.com/EvolvingLMMs-Lab/LongVA), their implementation provides valuable reference experience for our project.
================================================
FILE: llava-train_videochat/.dockerignore
================================================
# The .dockerignore file excludes files from the container build process.
#
# https://docs.docker.com/engine/reference/builder/#dockerignore-file
# Exclude Git files
.git
.github
.gitignore
# Exclude Python cache files
__pycache__
.mypy_cache
.pytest_cache
.ruff_cache
# Exclude Python virtual environment
/venv
# Exclude some weights
/openai
/liuhaotian
================================================
FILE: llava-train_videochat/.editorconfig
================================================
root = true
# Unix-style newlines with a newline ending every file
[*]
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
charset = utf-8
# 4 space indentation
[*.{py,json}]
indent_style = space
indent_size = 4
# 2 space indentation
[*.{md,sh,yaml,yml}]
indent_style = space
indent_size = 2
================================================
FILE: llava-train_videochat/.gitattributes
================================================
# https://git-scm.com/docs/gitattributes
# Set the default behavior, in case people don't have core.autocrlf set.
# https://git-scm.com/docs/gitattributes#_end_of_line_conversion
* text=auto
# common python attributes, taken from https://github.com/alexkaratarakis/gitattributes/blob/710900479a2bedeec7003d381719521ffbb18bf8/Python.gitattributes
# Source files
# ============
*.pxd text diff=python
*.py text diff=python
*.py3 text diff=python
*.pyw text diff=python
*.pyx text diff=python
*.pyz text diff=python
*.pyi text diff=python
# Binary files
# ============
*.db binary
*.p binary
*.pkl binary
*.pickle binary
*.pyc binary export-ignore
*.pyo binary export-ignore
*.pyd binary
# Jupyter notebook
*.ipynb text eol=lf
================================================
FILE: llava-train_videochat/.gitignore
================================================
# Python
__pycache__
*.pyc
*.egg-info
dist
# Log
*.log
*.log.*
# *.json
# *.jsonl
# Data
!**/alpaca-data-conversation.json
# Editor
.idea
*.swp
.vscode
# Other
.DS_Store
wandb
output
llavavid
checkpoints
project_checkpoints
debug_checkpoints
playground/data
playground/cc3m_llava34b_cap
ckpts*
.ipynb_checkpoints
chunyl_scripts
*.ipynb
# DevContainer
!.devcontainer/*
# Demo
serve_images/
notebooks/
logs
scripts/dist_*
logs/
submissions/
cn_scripts/
internal_project_checkpoints/
work_dirs
scripts/i18n/*
playground/.nfs028b000000010add00000001
HIP
playground/.nfs028b0000017bff2c00000012
scripts/qwen
scripts/vicuna
scripts/mistral
scripts/baseline_rep
scripts/cn_boli01_hl
scripts/cn_boli01_lf
scripts/cn_lf
scripts/cn_lq
scripts/cn_yg
scripts/cn_yg_hao
scripts/eva_encoder
scripts/i18n
scripts/i18n_higher_res
scripts/multi-images
scratchpad
build/
playground/*.json
mlx_configs/
data_processing/
# demo/
================================================
FILE: llava-train_videochat/LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: llava-train_videochat/README.md
================================================
# 👀How to train and evaluate VideoChat-Flash?🦜
## 1. Prepare Training Data
We need to address the fact that our data has been collected and used in different projects/people. For the data that has already been uploaded, we will refer you the corresponding viewing locations. Please collect relevant data fragments and integrate them in your own environments. We use similar data format with [LLaVA-NeXT](https://github.com/LLaVA-VL/LLaVA-NeXT/tree/main/scripts/train). ***You can customize your own training data in this format***.
In [data](.data), we have provided the data used in each training stage, along with the corresponding annotation locations. We have made all the data annotations and some of the videos available on [OpenGVLab/VideoChat-Flash-Training-Data](https://huggingface.co/datasets/OpenGVLab/VideoChat-Flash-Training-Data), and I have listed all video source url in the annotation file.
## 2. Training
| Stage | Num. frames | ViT | Connector | LLM | CKPT |
|--------|:-------:|:------:|:------:|:------:|:------:|
| [stage1](scripts/train/stage1-init_connector) | 4 | :snowflake: | :fire: | :snowflake: | [all projector weights](https://huggingface.co/OpenGVLab/stage1-mm-projectors/tree/main) |
| [stage2](scripts/train/stage2-visual_pretraining) | 4-8 | :fire: | :fire: | :fire: | [UMT-Qwen2_7B](https://huggingface.co/OpenGVLab/stage2-UMT-Qwen2-7B-tome16_mlp), [UMT-Qwen2_5_1M_7B](https://huggingface.co/OpenGVLab/stage2-UMT-Qwen2_5_7B_1m-tome16_mlp), [UMT-HD-Qwen2_5_2B](https://huggingface.co/OpenGVLab/stage2-UMT-Qwen2_5_1.5B-tome16_mlp), [InternVideo2-Qwen2_5_7B](https://huggingface.co/OpenGVLab/stage2-InternVideo2-1B-Qwen2_5-7B-tome16_mlp) |
| [stage3](scripts/train/stage3-video_sft) | 64-512 | :fire: | :fire: | :fire: | [UMT-Qwen2_7B](https://huggingface.co/OpenGVLab/VideoChat-Flash-Qwen2-7B_res448),[UMT-HD-Qwen2_5-2B](https://huggingface.co/OpenGVLab/VideoChat-Flash-Qwen2_5-2B_res448),[UMT-Qwen2_5_1M_7B](https://huggingface.co/OpenGVLab/VideoChat-Flash-Qwen2_5-7B-1M_res224), [InternVideo2-Qwen2_5_7B](https://huggingface.co/OpenGVLab/VideoChat-Flash-Qwen2_5-7B_InternVideo2-1B) |
| [stage4](scripts/train/stage4_highres_postft) | 64-512 | :fire: | :fire: | :snowflake: | [UMT-HD-Qwen2-7B](https://huggingface.co/OpenGVLab/VideoChat-Flash-Qwen2-7B_res448)|
Training time with a 32 A100:
- stage1: under one hour:
- stage2: about 2 day
- stage3: about 2~3day
- stage4: about 2~3day
### Tips
- ***We recommend to start from stage3 based on our provided stage2 model to save training cost, and you could use [1/4 stage3 data](data/ablation_short-long_mix_sft.yaml) for ablation (as we do)! You also could ignore stage4 if you don't need a absolute SoTA performance!***
- We use slurm to train model on multple machines, **if you only have one machines or you don't use slurm**, please refer to [LLaVA-NeXT](https://github.com/LLaVA-VL/LLaVA-NeXT/blob/main/scripts/train/finetune_ov.sh) to modify the scripts.
- If you try to finetuning [UMT-Qwen2_5_1M_7B](https://huggingface.co/OpenGVLab/VideoChat-Flash-Qwen2_5-7B-1M_res224), modify [`max_position_embeddings`](https://huggingface.co/OpenGVLab/VideoChat-Flash-Qwen2_5-7B-1M_res224/blob/main/config.json#L185) to smaller value like 32768 to avoid Cuda OOM!
### Install
```bash
git clone https://github.com/OpenGVLab/VideoChat-Flash
cd llava-train_videochat
pip install -e .
```
### Stage-1: Video-Language Alignment
Please download pretrained video encoders in [Huggingfaces](https://huggingface.co/OpenGVLab/Video_Encoders_for_Training_VideoChat-Flash) first. Then modify ckpt_path in `build_vit` of `llava/model/multimodal_encoder/umt_encoder.py` or `llava/model/multimodal_encoder/internvideo2_encoder.py`.
```bash
bash scripts/train/stage1-init_connector/stage1_umt_tome16_res224_qwen7b.sh
```
### Stage-2: Short Video Pre-training
```bash
bash scripts/train/stage2-visual_pretraining/stage2_umt_tome16_res224_qwen_7b.sh
```
### Stage-3: Joint Short & Long Video Instruction Tuning
```bash
bash scripts/train/stage3-video_sft/stage3_umt_tome16_res224_qwen_7b.sh
```
### Stage-4: Efficient High-Resolution Post-finetuning
Please modify `vision_tower="umt-hd-large"` in `Your_stage3_checkpoint_path/config.json` first!
```bash
bash scripts/train/stage4_highres_postft/stage4_umt_tome16_res448_qwen_7b.sh
```
## Evaluation
Overwrite your checkpoints directory with the configurations (json) and Python files from OpenGVLab/VideoChat-Flash, and then you can use the lmms-eval_videochat we provided for evaluation.
================================================
FILE: llava-train_videochat/cog.yaml
================================================
# Configuration for Cog ⚙️
# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md
build:
gpu: true
python_version: "3.11"
python_packages:
- "torch==2.0.1"
- "accelerate==0.21.0"
- "bitsandbytes==0.41.0"
- "deepspeed==0.9.5"
- "einops-exts==0.0.4"
- "einops==0.6.1"
- "gradio==3.35.2"
- "gradio_client==0.2.9"
- "httpx==0.24.0"
- "markdown2==2.4.10"
- "numpy==1.26.0"
- "peft==0.4.0"
- "scikit-learn==1.2.2"
- "sentencepiece==0.1.99"
- "shortuuid==1.0.11"
- "timm==0.6.13"
- "tokenizers==0.13.3"
- "torch==2.0.1"
- "torchvision==0.15.2"
- "transformers==4.31.0"
- "wandb==0.15.12"
- "wavedrom==2.0.3.post3"
- "Pygments==2.16.1"
run:
- curl -o /usr/local/bin/pget -L "https://github.com/replicate/pget/releases/download/v0.0.3/pget" && chmod +x /usr/local/bin/pget
# predict.py defines how predictions are run on your model
predict: "predict.py:Predictor"
================================================
FILE: llava-train_videochat/data/ablation_short-long_mix_sft.yaml
================================================
datasets:
# image sft datasets
- json_path: annotations/image/textcaps.json # 21942
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/textcaps
- json_path: annotations/image/textocr(gpt4v).json # 25104
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/textocr(gpt4v)
- json_path: annotations/image/rendered_text(cauldron)_fix.json # 9995
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/rendered_text(cauldron)
- json_path: annotations/image/iam(cauldron)_fix.json # 5658
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/iam(cauldron)
- json_path: annotations/image/llavar_gpt4_20k.json # 19790
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/llavar_gpt4_20k
- json_path: annotations/image/allava_instruct_vflan4v.json
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/allava_instruct_vflan4v
- json_path: annotations/image/allava_instruct_laion4v.json
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/allava_instruct_laion4v
- json_path: annotations/image/sharegpt4o.json
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/sharegpt4o
- json_path: annotations/image/sharegpt4v(coco).json
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/sharegpt4v(coco)
- json_path: annotations/image/sharegpt4v(knowledge).json
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/sharegpt4v(knowledge)
- json_path: annotations/image/sharegpt4v(llava).json
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/sharegpt4v(llava)
- json_path: annotations/image/sharegpt4v(sam).json
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/sharegpt4v(sam)
- json_path: annotations/image/tallyqa(cauldron,llava_format)_fix.json # 98675
sampling_strategy: "first:10%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/tallyqa(cauldron,llava_format) # 98680
- json_path: annotations/image/st_vqa(cauldron,llava_format)_fix.json # 17242
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/st_vqa(cauldron,llava_format) # 17247
- json_path: annotations/image/llava_next_raw_format_processed_738k.json
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-NeXT-Data
- json_path: https://huggingface.co/datasets/lmms-lab/M4-Instruct-Data/m4_instruct_annotations.json
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/M4-Instruct-Data
# video sft datasets
- json_path: annotations/video/caption_sharegemini_webvid_core100k_clean.json
sampling_strategy: "first:20%"
data_root: https://github.com/m-bain/webvid
- json_path: annotations/video/caption_sharegemini_k400_223k.json
sampling_strategy: "first:25%"
data_root: https://opendatalab.com/OpenMMLab/Kinetics-400
- json_path: annotations/video/caption_youcook2-youcook2-train_debug_9k.json
sampling_strategy: "first:25%"
data_root: http://youcook2.eecs.umich.edu/
- json_path: annotations/video/caption_textvr-textvr-train_40k.json
sampling_strategy: "first:25%"
data_root: https://github.com/callsys/TextVR
- json_path: annotations/video/moviechat1k_caption-MovieChat-train_caption_1k.json
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/Enxin/MovieChat-1K_train
- json_path: annotations/video/caption_favd-favd-train_10k.json
sampling_strategy: "first:25%"
data_root: https://github.com/OpenNLPLab/FAVDBench
- json_path: annotations/video/caption_sharegptvideo_300k-sharegptvideo-train_300k_302k.json
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/ShareGPTVideo/train_video_and_instruction/tree/main/train_300k
video_read_type: img
- json_path: annotations/video/caption_sharegpt4o-sharegpt4o_3k.json
sampling_strategy: "first:25%"
data_root: https://sharegpt4o.github.io/
- json_path: annotations/video/vqa_tvqa-tvqa_123k.jsonl
sampling_strategy: "first:25%"
data_root: https://nlp.cs.unc.edu/data/jielei/tvqa/tvqa_public_html/index.html
video_read_type: img
- json_path: annotations/video/reasoning_next_qa-next_qa-train_35k.jsonl
sampling_strategy: "first:25%"
data_root: https://github.com/doc-doc/NExT-QA
- json_path: annotations/video/vqa_tgif_transition_qa-tgif_transition_qa-train_53k.jsonl
sampling_strategy: "first:25%"
data_root: https://github.com/YunseokJANG/tgif-qa
video_read_type: gif
- json_path: annotations/video/reasoning_clevrer_mc-clevrer_mc-train_43k_debug_43k.jsonl
sampling_strategy: "first:25%"
data_root: http://clevrer.csail.mit.edu/
- json_path: annotations/video/reasoning_clevrer_qa-clevrer_qa-train_mc_40k.jsonl
sampling_strategy: "first:25%"
data_root: http://clevrer.csail.mit.edu/
- json_path: annotations/video/classification_k710-k710-train_40k.jsonl
sampling_strategy: "first:25%"
- json_path: annotations/video/classification_ssv2-ssv2-train_40k.jsonl
sampling_strategy: "first:25%"
data_root: https://www.qualcomm.com/developer/software/something-something-v-2-dataset
- json_path: annotations/video/lsmdc-lsmdc_297k.json
sampling_strategy: "first:25%"
data_root: https://sites.google.com/site/describingmovies/
- json_path: annotations/video/vqa_rgbd-nturgbd_clean_110k.json
sampling_strategy: "first:25%"
data_root: https://rose1.ntu.edu.sg/dataset/actionRecognition/
- json_path: annotations/video/vqa_perception_train-mc_question_train_forchoice_8k.json
sampling_strategy: "first:25%"
data_root: https://github.com/google-deepmind/perception_test
- json_path: annotations/video/vqa_ego_qa-ego_qa-train_8k.jsonl
sampling_strategy: "first:25%"
data_root: https://ego4d-data.org/
- json_path: annotations/video/vqa_tgif_transition_qa_openend-openend_qa_annos-tgif_transition_qa_train_openend_53k.jsonl
sampling_strategy: "first:25%"
data_root: https://github.com/YunseokJANG/tgif-qa
video_read_type: gif
- json_path: annotations/video/vqa_tgif_frame_qa-tgif_frame_qa-train_40k.jsonl
sampling_strategy: "first:25%"
data_root: https://github.com/YunseokJANG/tgif-qa
video_read_type: gif
- json_path: annotations/video/vqa_tgif_count-openend_qa_train_openend_26839.jsonl
sampling_strategy: "first:25%"
data_root: https://github.com/YunseokJANG/tgif-qa
video_read_type: gif
- json_path: annotations/video/vqa_tgif_action-openend_qa_train_openend_20471.jsonl
sampling_strategy: "first:25%"
data_root: https://github.com/YunseokJANG/tgif-qa
video_read_type: gif
- json_path: annotations/video/reasoning_next_qa_oe-openend_qa_annos-next_qa_train_openend_35k.jsonl
sampling_strategy: "first:25%"
data_root: https://github.com/doc-doc/NExT-QA
- json_path: annotations/video/vqa_webvid_qa-webvid_qa-train_100k.jsonl
sampling_strategy: "first:25%"
data_root: https://github.com/m-bain/webvid
- json_path: annotations/video/moviechat1k_global-MovieChat-train_global_1k.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/Enxin/MovieChat-1K_train
- json_path: annotations/video/grounding_didemo-didemo-train_66k.json
sampling_strategy: "first:25%"
data_root: https://github.com/LisaAnne/TemporalLanguageRelease
- json_path: annotations/video/vqa_sharegptvideo_240k-sharegptvideo-train_240k_240k.json
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/ShareGPTVideo/train_video_and_instruction/tree/main/train_300k
video_read_type: img
- json_path: annotations/video/caption_vidln_kinetics-vidln-kinetics_train_28k.json
sampling_strategy: "first:25%"
data_root: https://opendatalab.com/OpenMMLab/Kinetics_700
- json_path: annotations/video/caption_vidln_oops-vidln-oops_train_11k.json
sampling_strategy: "first:25%"
data_root: https://oops.cs.columbia.edu/
- json_path: annotations/video/caption_vidln_ovis-vidln-ovis_train_1k.json
sampling_strategy: "first:25%"
data_root: https://songbai.site/ovis/
video_read_type: img
- json_path: annotations/video/caption_vidln_uvo_sparse-vidln-uvo_sparse_train_6k.json
sampling_strategy: "first:25%"
data_root: https://sites.google.com/view/unidentified-video-object/dataset
- json_path: annotations/video/caption_vidln_uvo_dense-vidln-uvo_dense_train_1k.json
sampling_strategy: "first:25%"
data_root: https://sites.google.com/view/unidentified-video-object/dataset
- json_path: annotations/video/reasoning_star-star-train_46k.json
sampling_strategy: "first:25%"
data_root: https://bobbywu.com/STAR/
- json_path: annotations/video/vcg-plus_112K_clean_97k.json
sampling_strategy: "first:10%"
data_root: http://activity-net.org/
- json_path: annotations/video/vript_long_videos_en_20240911_fix.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/Mutonix/Vript
- json_path: annotations/video/vript_short_videos_en_20240911_fix.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/Mutonix/Vript
- json_path: annotations/video/guiworld_en_20241029_fix.jsonl
sampling_strategy: "first:25%"
data_root: https://gui-world.github.io/
## llava video
- json_path: annotations/video/llava-video_2_3_m_academic_mc_v0_1_qa_processed_6901_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_2_3_m_nextqa_oe_qa_processed_61_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_youtube_oe_v0_1_qa_processed_420200_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_1_2_m_academic_oe_v0_1_qa_processed_26302_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_1_2_m_youtube_mc_v0_1_qa_processed_39710_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_nextqa_oe_qa_processed_6843_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_2_3_m_youtube_mc_v0_1_qa_processed_39967_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_2_3_m_academic_v0_1_cap_processed_3124_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_academic_oe_v0_1_qa_processed_57924_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_2_3_m_youtube_v0_1_cap_processed_24685_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_youtube_mc_v0_1_qa_processed_39927_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_activitynetqa_oe_qa_processed_2950_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_1_2_m_nextqa_oe_qa_processed_4694_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_youtube_oe_v0_1_qa_processed_110624_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_1_2_m_academic_mc_v0_1_qa_processed_4241_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_youtube_mc_v0_1_qa_processed_39353_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_activitynetqa_oe_qa_processed_4530_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_1_2_m_youtube_oe_v0_1_qa_processed_137645_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_academic_mc_v0_1_qa_processed_20346_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_youtube_v0_1_cap_processed_19995_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_nextqa_mc_qa_processed_5496_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_academic_mc_v0_1_qa_processed_5753_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_2_3_m_youtube_oe_v0_1_qa_processed_141495_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_1_2_m_nextqa_mc_qa_processed_4633_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_2_3_m_activitynetqa_oe_qa_processed_7460_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_2_3_m_nextqa_mc_qa_processed_52_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_1_2_m_activitynetqa_oe_qa_processed_8590_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_1_2_m_academic_v0_1_cap_processed_4627_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_academic_v0_1_cap_processed_10514_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_1_2_m_youtube_v0_1_cap_processed_24234_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_nextqa_mc_qa_processed_6843_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_nextqa_oe_qa_processed_5492_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_academic_oe_v0_1_qa_processed_48468_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_youtube_v0_1_cap_processed_79346_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_2_3_m_academic_oe_v0_1_qa_processed_18134_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_perceptiontest_mc_qa_processed_1785_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_perceptiontest_mc_qa_processed_618_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_academic_v0_1_cap_processed_11985_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/timeit_ANet-TimeIT-Activitynet_Captions_11k.json
sampling_strategy: "first:25%"
data_root: http://activity-net.org//train
- json_path: annotations/video/timeit_COIN-TimeIT-COIN_10k.json
sampling_strategy: "first:25%"
data_root: https://coin-dataset.github.io/
- json_path: annotations/video/timeit_DiDeMo-TimeIT-DiDeMo_33k.json
sampling_strategy: "first:25%"
data_root: https://github.com/LisaAnne/TemporalLanguageRelease
- json_path: annotations/video/timeit_HiREST-TimeIT-HiREST_1k.json
sampling_strategy: "first:25%"
data_root: https://hirest-cvpr2023.github.io/
- json_path: annotations/video/timeit_QuerYD-TimeIT-QuerYD_15k.json
sampling_strategy: "first:25%"
data_root: https://www.robots.ox.ac.uk/~vgg/data/queryd/
- json_path: annotations/video/timeit_ViTT-TimeIT-ViTT_6k.json
sampling_strategy: "first:25%"
data_root: https://github.com/google-research-datasets/Video-Timeline-Tags-ViTT
- json_path: annotations/video/grounding_ANetRTL-ActivityNet-RTL-ANet_RTL_34k.json
sampling_strategy: "first:25%"
data_root: http://activity-net.org//train
- json_path: annotations/video/grounding_ANetHL-ANet-HL-ANet_HL2_11k.json
sampling_strategy: "first:25%"
data_root: http://activity-net.org//train
- json_path: annotations/video/htstep_eventunderstanding-longvideo_annos-htstep_eventunderstanding_1k_1k.json
sampling_strategy: "first:25%"
video_read_type: img
data_root: https://huggingface.co/datasets/OpenGVLab/VideoChat-Flash-Training-Data/tree/main/longvid_subset
- json_path: annotations/video/htstep_eventcount-longvideo_annos-htstep_eventcount_2k_2k.json
sampling_strategy: "first:25%"
video_read_type: img
data_root: https://huggingface.co/datasets/OpenGVLab/VideoChat-Flash-Training-Data/tree/main/longvid_subset
- json_path: annotations/video/htstep_eventrelationship-longvideo_annos-htstep_eventrelationship_1k_1k.json
sampling_strategy: "first:25%"
video_read_type: img
data_root: https://huggingface.co/datasets/OpenGVLab/VideoChat-Flash-Training-Data/tree/main/longvid_subset
- json_path: annotations/video/ego4dhcap_eventunderstanding-longvideo_annos-ego4dhcap_eventunderstanding_2k_2k.json
sampling_strategy: "first:25%"
video_read_type: img
data_root: https://huggingface.co/datasets/OpenGVLab/VideoChat-Flash-Training-Data/tree/main/longvid_subset
================================================
FILE: llava-train_videochat/data/stage1_init_connector_iv1m.yaml
================================================
datasets:
- json_path: OpenGVLab/VideoChat-Flash-Training-Data/annotations/video/smit_caption_481k.json
sampling_strategy: all
data_root: http://moments.csail.mit.edu/spoken.html
- json_path: OpenGVLab/VideoChat-Flash-Training-Data/annotations/image/blip_laion_cc_sbu_558k.json
sampling_strategy: all
data_root: https://huggingface.co/datasets/liuhaotian/LLaVA-Pretrain
================================================
FILE: llava-train_videochat/data/stage2_short_pretrain_iv6m.yaml
================================================
datasets:
- json_path: annotations/image/LLaVA-ReCap-118K.json
sampling_strategy: all
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-ReCap-118K
- json_path: annotations/image/LLaVA-ReCap-CC3M.json
sampling_strategy: all
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-ReCap-CC3M
- json_path: annotations/image/LLaVA-ReCap-558K.json
sampling_strategy: all
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-ReCap-558K
- json_path: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Mid-Data/tree/main/evol_instruct/evol_instruct_processed.json
sampling_strategy: all
- json_path: annotations/video/webvid-fuse_caption_2m.json
sampling_strategy: all
data_root: https://github.com/m-bain/webvid
- json_path: annotations/video/caption_sharegemini_webvid_core100k_clean.json
sampling_strategy: all
data_root: https://github.com/m-bain/webvid
- json_path: annotations/video/caption_sharegemini_k400_223k.json
sampling_strategy: all
data_root: https://opendatalab.com/OpenMMLab/Kinetics-400
- json_path: annotations/image/ureader_tr_processed.json
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Mid-Data/tree/main/ureader_ur/
sampling_strategy: all
- json_path: annotations/image/synthdog_zh_processed.json
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Mid-Data/tree/main/synthdog_zh/synthdog_zh_images/
sampling_strategy: all
- json_path: annotations/image/synthdog_en_processed.json
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Mid-Data/tree/main/synthdog_en/synthdog_en_images/
sampling_strategy: all
- json_path: annotations/video/smit_caption_481k.json
sampling_strategy: all
data_root: http://moments.csail.mit.edu/spoken.html
- json_path: annotations/video/caption_sharegptvideo_300k-sharegptvideo-train_300k_302k.json
sampling_strategy: all
data_root: https://huggingface.co/datasets/ShareGPTVideo/train_video_and_instruction/tree/main/train_300k
video_read_type: img
================================================
FILE: llava-train_videochat/data/stage3_short-long_mix_sft.yaml
================================================
datasets:
# image sft datasets
- json_path: annotations/image/textcaps.json # 21942
sampling_strategy: all
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/textcaps
- json_path: annotations/image/textocr(gpt4v).json # 25104
sampling_strategy: all
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/textocr(gpt4v)
- json_path: annotations/image/rendered_text(cauldron)_fix.json # 9995
sampling_strategy: all
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/rendered_text(cauldron)
- json_path: annotations/image/iam(cauldron)_fix.json # 5658
sampling_strategy: all
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/iam(cauldron)
- json_path: annotations/image/llavar_gpt4_20k.json # 19790
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/llavar_gpt4_20k
- json_path: annotations/image/allava_instruct_vflan4v.json
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/allava_instruct_vflan4v
- json_path: annotations/image/allava_instruct_laion4v.json
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/allava_instruct_laion4v
- json_path: annotations/image/sharegpt4o.json
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/sharegpt4o
- json_path: annotations/image/sharegpt4v(coco).json
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/sharegpt4v(coco)
- json_path: annotations/image/sharegpt4v(knowledge).json
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/sharegpt4v(knowledge)
- json_path: annotations/image/sharegpt4v(llava).json
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/sharegpt4v(llava)
- json_path: annotations/image/sharegpt4v(sam).json
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/sharegpt4v(sam)
- json_path: annotations/image/tallyqa(cauldron,llava_format)_fix.json # 98675
sampling_strategy: "first:10%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/tallyqa(cauldron,llava_format) # 98680
- json_path: annotations/image/st_vqa(cauldron,llava_format)_fix.json # 17242
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/st_vqa(cauldron,llava_format) # 17247
- json_path: annotations/image/llava_next_raw_format_processed_738k.json
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-NeXT-Data
- json_path: https://huggingface.co/datasets/lmms-lab/M4-Instruct-Data/m4_instruct_annotations.json
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/M4-Instruct-Data
# video sft datasets
- json_path: annotations/video/caption_sharegemini_webvid_core100k_clean.json
sampling_strategy: "first:20%"
data_root: https://github.com/m-bain/webvid
- json_path: annotations/video/caption_sharegemini_k400_223k.json
sampling_strategy: "all"
data_root: https://opendatalab.com/OpenMMLab/Kinetics-400
- json_path: annotations/video/caption_youcook2-youcook2-train_debug_9k.json
sampling_strategy: "all"
data_root: http://youcook2.eecs.umich.edu/
- json_path: annotations/video/caption_textvr-textvr-train_40k.json
sampling_strategy: "all"
data_root: https://github.com/callsys/TextVR
- json_path: annotations/video/moviechat1k_caption-MovieChat-train_caption_1k.json
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/Enxin/MovieChat-1K_train
- json_path: annotations/video/caption_favd-favd-train_10k.json
sampling_strategy: "first:25%"
data_root: https://github.com/OpenNLPLab/FAVDBench
- json_path: annotations/video/caption_sharegptvideo_300k-sharegptvideo-train_300k_302k.json
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/ShareGPTVideo/train_video_and_instruction/tree/main/train_300k
video_read_type: img
- json_path: annotations/video/caption_sharegpt4o-sharegpt4o_3k.json
sampling_strategy: all
data_root: https://sharegpt4o.github.io/
- json_path: annotations/video/vqa_tvqa-tvqa_123k.jsonl
sampling_strategy: "all"
data_root: https://nlp.cs.unc.edu/data/jielei/tvqa/tvqa_public_html/index.html
video_read_type: img
- json_path: annotations/video/reasoning_next_qa-next_qa-train_35k.jsonl
sampling_strategy: all
data_root: https://github.com/doc-doc/NExT-QA
- json_path: annotations/video/vqa_tgif_transition_qa-tgif_transition_qa-train_53k.jsonl
sampling_strategy: "first:25%"
data_root: https://github.com/YunseokJANG/tgif-qa
video_read_type: gif
- json_path: annotations/video/reasoning_clevrer_mc-clevrer_mc-train_43k_debug_43k.jsonl
sampling_strategy: all
data_root: http://clevrer.csail.mit.edu/
- json_path: annotations/video/reasoning_clevrer_qa-clevrer_qa-train_mc_40k.jsonl
sampling_strategy: all
data_root: http://clevrer.csail.mit.edu/
- json_path: annotations/video/classification_k710-k710-train_40k.jsonl
sampling_strategy: "first:25%"
- json_path: annotations/video/classification_ssv2-ssv2-train_40k.jsonl
sampling_strategy: "first:25%"
data_root: https://www.qualcomm.com/developer/software/something-something-v-2-dataset
- json_path: annotations/video/lsmdc-lsmdc_297k.json
sampling_strategy: "first:25%"
data_root: https://sites.google.com/site/describingmovies/
- json_path: annotations/video/vqa_rgbd-nturgbd_clean_110k.json
sampling_strategy: "first:25%"
data_root: https://rose1.ntu.edu.sg/dataset/actionRecognition/
- json_path: annotations/video/vqa_perception_train-mc_question_train_forchoice_8k.json
sampling_strategy: all
data_root: https://github.com/google-deepmind/perception_test
- json_path: annotations/video/vqa_ego_qa-ego_qa-train_8k.jsonl
sampling_strategy: "all"
data_root: https://ego4d-data.org/
- json_path: annotations/video/vqa_tgif_transition_qa_openend-openend_qa_annos-tgif_transition_qa_train_openend_53k.jsonl
sampling_strategy: "first:25%"
data_root: https://github.com/YunseokJANG/tgif-qa
video_read_type: gif
- json_path: annotations/video/vqa_tgif_frame_qa-tgif_frame_qa-train_40k.jsonl
sampling_strategy: "first:25%"
data_root: https://github.com/YunseokJANG/tgif-qa
video_read_type: gif
- json_path: annotations/video/vqa_tgif_count-openend_qa_train_openend_26839.jsonl
sampling_strategy: "all"
data_root: https://github.com/YunseokJANG/tgif-qa
video_read_type: gif
- json_path: annotations/video/vqa_tgif_action-openend_qa_train_openend_20471.jsonl
sampling_strategy: "all"
data_root: https://github.com/YunseokJANG/tgif-qa
video_read_type: gif
- json_path: annotations/video/reasoning_next_qa_oe-openend_qa_annos-next_qa_train_openend_35k.jsonl
sampling_strategy: all
data_root: https://github.com/doc-doc/NExT-QA
- json_path: annotations/video/vqa_webvid_qa-webvid_qa-train_100k.jsonl
sampling_strategy: "first:25%"
data_root: https://github.com/m-bain/webvid
- json_path: annotations/video/moviechat1k_global-MovieChat-train_global_1k.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/Enxin/MovieChat-1K_train
- json_path: annotations/video/grounding_didemo-didemo-train_66k.json
sampling_strategy: all
data_root: https://github.com/LisaAnne/TemporalLanguageRelease
- json_path: annotations/video/vqa_sharegptvideo_240k-sharegptvideo-train_240k_240k.json
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/ShareGPTVideo/train_video_and_instruction/tree/main/train_300k
video_read_type: img
- json_path: annotations/video/caption_vidln_kinetics-vidln-kinetics_train_28k.json
sampling_strategy: all
data_root: https://opendatalab.com/OpenMMLab/Kinetics_700
- json_path: annotations/video/caption_vidln_oops-vidln-oops_train_11k.json
sampling_strategy: all
data_root: https://oops.cs.columbia.edu/
- json_path: annotations/video/caption_vidln_ovis-vidln-ovis_train_1k.json
sampling_strategy: all
data_root: https://songbai.site/ovis/
video_read_type: img
- json_path: annotations/video/caption_vidln_uvo_sparse-vidln-uvo_sparse_train_6k.json
sampling_strategy: all
data_root: https://sites.google.com/view/unidentified-video-object/dataset
- json_path: annotations/video/caption_vidln_uvo_dense-vidln-uvo_dense_train_1k.json
sampling_strategy: all
data_root: https://sites.google.com/view/unidentified-video-object/dataset
- json_path: annotations/video/reasoning_star-star-train_46k.json
sampling_strategy: all
data_root: https://bobbywu.com/STAR/
- json_path: annotations/video/vcg-plus_112K_clean_97k.json
sampling_strategy: "first:10%"
data_root: http://activity-net.org/
- json_path: annotations/video/vript_long_videos_en_20240911_fix.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/Mutonix/Vript
- json_path: annotations/video/vript_short_videos_en_20240911_fix.jsonl
sampling_strategy: all
data_root: https://huggingface.co/datasets/Mutonix/Vript
- json_path: annotations/video/guiworld_en_20241029_fix.jsonl
sampling_strategy: "all"
data_root: https://gui-world.github.io/
## llava video
- json_path: annotations/video/llava-video_2_3_m_academic_mc_v0_1_qa_processed_6901_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_2_3_m_nextqa_oe_qa_processed_61_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_youtube_oe_v0_1_qa_processed_420200_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_1_2_m_academic_oe_v0_1_qa_processed_26302_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_1_2_m_youtube_mc_v0_1_qa_processed_39710_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_nextqa_oe_qa_processed_6843_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_2_3_m_youtube_mc_v0_1_qa_processed_39967_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_2_3_m_academic_v0_1_cap_processed_3124_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_academic_oe_v0_1_qa_processed_57924_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_2_3_m_youtube_v0_1_cap_processed_24685_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_youtube_mc_v0_1_qa_processed_39927_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_activitynetqa_oe_qa_processed_2950_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_1_2_m_nextqa_oe_qa_processed_4694_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_youtube_oe_v0_1_qa_processed_110624_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_1_2_m_academic_mc_v0_1_qa_processed_4241_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_youtube_mc_v0_1_qa_processed_39353_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_activitynetqa_oe_qa_processed_4530_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_1_2_m_youtube_oe_v0_1_qa_processed_137645_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_academic_mc_v0_1_qa_processed_20346_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_youtube_v0_1_cap_processed_19995_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_nextqa_mc_qa_processed_5496_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_academic_mc_v0_1_qa_processed_5753_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_2_3_m_youtube_oe_v0_1_qa_processed_141495_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_1_2_m_nextqa_mc_qa_processed_4633_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_2_3_m_activitynetqa_oe_qa_processed_7460_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_2_3_m_nextqa_mc_qa_processed_52_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_1_2_m_activitynetqa_oe_qa_processed_8590_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_1_2_m_academic_v0_1_cap_processed_4627_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_academic_v0_1_cap_processed_10514_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_1_2_m_youtube_v0_1_cap_processed_24234_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_nextqa_mc_qa_processed_6843_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_nextqa_oe_qa_processed_5492_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_academic_oe_v0_1_qa_processed_48468_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_youtube_v0_1_cap_processed_79346_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_2_3_m_academic_oe_v0_1_qa_processed_18134_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_perceptiontest_mc_qa_processed_1785_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_perceptiontest_mc_qa_processed_618_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_academic_v0_1_cap_processed_11985_with_duration.jsonl
sampling_strategy: "all"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/timeit_ANet-TimeIT-Activitynet_Captions_11k.json
sampling_strategy: all
data_root: http://activity-net.org//train
- json_path: annotations/video/timeit_COIN-TimeIT-COIN_10k.json
sampling_strategy: all
data_root: https://coin-dataset.github.io/
- json_path: annotations/video/timeit_DiDeMo-TimeIT-DiDeMo_33k.json
sampling_strategy: all
data_root: https://github.com/LisaAnne/TemporalLanguageRelease
- json_path: annotations/video/timeit_HiREST-TimeIT-HiREST_1k.json
sampling_strategy: all
data_root: https://hirest-cvpr2023.github.io/
- json_path: annotations/video/timeit_QuerYD-TimeIT-QuerYD_15k.json
sampling_strategy: all
data_root: https://www.robots.ox.ac.uk/~vgg/data/queryd/
- json_path: annotations/video/timeit_ViTT-TimeIT-ViTT_6k.json
sampling_strategy: all
data_root: https://github.com/google-research-datasets/Video-Timeline-Tags-ViTT
- json_path: annotations/video/grounding_ANetRTL-ActivityNet-RTL-ANet_RTL_34k.json
sampling_strategy: all
data_root: http://activity-net.org//train
- json_path: annotations/video/grounding_ANetHL-ANet-HL-ANet_HL2_11k.json
sampling_strategy: all
data_root: http://activity-net.org//train
- json_path: annotations/video/htstep_eventunderstanding-longvideo_annos-htstep_eventunderstanding_1k_1k.json
sampling_strategy: all
video_read_type: img
data_root: https://huggingface.co/datasets/OpenGVLab/VideoChat-Flash-Training-Data/tree/main/longvid_subset
- json_path: annotations/video/htstep_eventcount-longvideo_annos-htstep_eventcount_2k_2k.json
sampling_strategy: all
video_read_type: img
data_root: https://huggingface.co/datasets/OpenGVLab/VideoChat-Flash-Training-Data/tree/main/longvid_subset
- json_path: annotations/video/htstep_eventrelationship-longvideo_annos-htstep_eventrelationship_1k_1k.json
sampling_strategy: all
video_read_type: img
data_root: https://huggingface.co/datasets/OpenGVLab/VideoChat-Flash-Training-Data/tree/main/longvid_subset
- json_path: annotations/video/ego4dhcap_eventunderstanding-longvideo_annos-ego4dhcap_eventunderstanding_2k_2k.json
sampling_strategy: all
video_read_type: img
data_root: https://huggingface.co/datasets/OpenGVLab/VideoChat-Flash-Training-Data/tree/main/longvid_subset
================================================
FILE: llava-train_videochat/data/stage4_highres_postsft.yaml
================================================
datasets:
# image sft datasets, 6w
- json_path: annotations/image/synthdog_zh_processed.json
data_root: https://huggingface.co/datasets/lmms-lab/OneVision-Mid-Data/synthdog_zh/synthdog_zh_images/
sampling_strategy: "first:10%"
- json_path: annotations/image/synthdog_en_processed.json
data_root: https://huggingface.co/datasets/lmms-lab/OneVision-Mid-Data/synthdog_en/synthdog_en_images/
sampling_strategy: "first:10%"
- json_path: annotations/image/textcaps.json # 21942
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/textcaps
- json_path: annotations/image/textocr(gpt4v).json # 25104
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/textocr(gpt4v)
- json_path: annotations/image/rendered_text(cauldron)_fix.json # 9995
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/rendered_text(cauldron)
- json_path: annotations/image/iam(cauldron)_fix.json # 5658
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/iam(cauldron)
- json_path: annotations/image/llavar_gpt4_20k.json # 19790
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/llavar_gpt4_20k
- json_path: annotations/image/allava_instruct_vflan4v.json
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/allava_instruct_vflan4v
- json_path: annotations/image/allava_instruct_laion4v.json
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/allava_instruct_laion4v
- json_path: annotations/image/sharegpt4o.json
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/sharegpt4o
- json_path: annotations/image/sharegpt4v(coco).json
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/sharegpt4v(coco)
- json_path: annotations/image/sharegpt4v(knowledge).json
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/sharegpt4v(knowledge)
- json_path: annotations/image/sharegpt4v(llava).json
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/sharegpt4v(llava)
- json_path: annotations/image/sharegpt4v(sam).json
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/sharegpt4v(sam)
- json_path: annotations/image/tallyqa(cauldron,llava_format)_fix.json # 98675
sampling_strategy: "first:10%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/tallyqa(cauldron,llava_format) # 98680
- json_path: annotations/image/st_vqa(cauldron,llava_format)_fix.json # 17242
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data/image/st_vqa(cauldron,llava_format) # 17247
- json_path: annotations/image/llava_next_raw_format_processed_738k.json
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-NeXT-Data
- json_path: https://huggingface.co/datasets/lmms-lab/M4-Instruct-Data/m4_instruct_annotations.json
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/M4-Instruct-Data
# video sft datasets
- json_path: annotations/video/caption_sharegemini_webvid_core100k_clean.json
sampling_strategy: "first:20%"
data_root: https://github.com/m-bain/webvid
- json_path: annotations/video/caption_sharegemini_k400_223k.json
sampling_strategy: "first:25%"
data_root: https://opendatalab.com/OpenMMLab/Kinetics-400
- json_path: annotations/video/caption_youcook2-youcook2-train_debug_9k.json
sampling_strategy: "first:25%"
data_root: http://youcook2.eecs.umich.edu/
- json_path: annotations/video/caption_textvr-textvr-train_40k.json
sampling_strategy: "first:25%"
data_root: https://github.com/callsys/TextVR
- json_path: annotations/video/moviechat1k_caption-MovieChat-train_caption_1k.json
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/Enxin/MovieChat-1K_train
- json_path: annotations/video/caption_favd-favd-train_10k.json
sampling_strategy: "first:25%"
data_root: https://github.com/OpenNLPLab/FAVDBench
- json_path: annotations/video/caption_sharegptvideo_300k-sharegptvideo-train_300k_302k.json
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/ShareGPTVideo/train_video_and_instruction/tree/main/train_300k
video_read_type: img
- json_path: annotations/video/caption_sharegpt4o-sharegpt4o_3k.json
sampling_strategy: "first:25%"
data_root: https://sharegpt4o.github.io/
- json_path: annotations/video/vqa_tvqa-tvqa_123k.jsonl
sampling_strategy: "first:25%"
data_root: https://nlp.cs.unc.edu/data/jielei/tvqa/tvqa_public_html/index.html
video_read_type: img
- json_path: annotations/video/reasoning_next_qa-next_qa-train_35k.jsonl
sampling_strategy: "first:25%"
data_root: https://github.com/doc-doc/NExT-QA
- json_path: annotations/video/vqa_tgif_transition_qa-tgif_transition_qa-train_53k.jsonl
sampling_strategy: "first:25%"
data_root: https://github.com/YunseokJANG/tgif-qa
video_read_type: gif
- json_path: annotations/video/reasoning_clevrer_mc-clevrer_mc-train_43k_debug_43k.jsonl
sampling_strategy: "first:25%"
data_root: http://clevrer.csail.mit.edu/
- json_path: annotations/video/reasoning_clevrer_qa-clevrer_qa-train_mc_40k.jsonl
sampling_strategy: "first:25%"
data_root: http://clevrer.csail.mit.edu/
- json_path: annotations/video/classification_k710-k710-train_40k.jsonl
sampling_strategy: "first:25%"
- json_path: annotations/video/classification_ssv2-ssv2-train_40k.jsonl
sampling_strategy: "first:25%"
data_root: https://www.qualcomm.com/developer/software/something-something-v-2-dataset
- json_path: annotations/video/lsmdc-lsmdc_297k.json
sampling_strategy: "first:25%"
data_root: https://sites.google.com/site/describingmovies/
- json_path: annotations/video/vqa_rgbd-nturgbd_clean_110k.json
sampling_strategy: "first:25%"
data_root: https://rose1.ntu.edu.sg/dataset/actionRecognition/
- json_path: annotations/video/vqa_perception_train-mc_question_train_forchoice_8k.json
sampling_strategy: "first:25%"
data_root: https://github.com/google-deepmind/perception_test
- json_path: annotations/video/vqa_ego_qa-ego_qa-train_8k.jsonl
sampling_strategy: "first:25%"
data_root: https://ego4d-data.org/
- json_path: annotations/video/vqa_tgif_transition_qa_openend-openend_qa_annos-tgif_transition_qa_train_openend_53k.jsonl
sampling_strategy: "first:25%"
data_root: https://github.com/YunseokJANG/tgif-qa
video_read_type: gif
- json_path: annotations/video/vqa_tgif_frame_qa-tgif_frame_qa-train_40k.jsonl
sampling_strategy: "first:25%"
data_root: https://github.com/YunseokJANG/tgif-qa
video_read_type: gif
- json_path: annotations/video/vqa_tgif_count-openend_qa_train_openend_26839.jsonl
sampling_strategy: "first:25%"
data_root: https://github.com/YunseokJANG/tgif-qa
video_read_type: gif
- json_path: annotations/video/vqa_tgif_action-openend_qa_train_openend_20471.jsonl
sampling_strategy: "first:25%"
data_root: https://github.com/YunseokJANG/tgif-qa
video_read_type: gif
- json_path: annotations/video/reasoning_next_qa_oe-openend_qa_annos-next_qa_train_openend_35k.jsonl
sampling_strategy: "first:25%"
data_root: https://github.com/doc-doc/NExT-QA
- json_path: annotations/video/vqa_webvid_qa-webvid_qa-train_100k.jsonl
sampling_strategy: "first:25%"
data_root: https://github.com/m-bain/webvid
- json_path: annotations/video/moviechat1k_global-MovieChat-train_global_1k.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/Enxin/MovieChat-1K_train
- json_path: annotations/video/grounding_didemo-didemo-train_66k.json
sampling_strategy: "first:25%"
data_root: https://github.com/LisaAnne/TemporalLanguageRelease
- json_path: annotations/video/vqa_sharegptvideo_240k-sharegptvideo-train_240k_240k.json
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/ShareGPTVideo/train_video_and_instruction/tree/main/train_300k
video_read_type: img
- json_path: annotations/video/caption_vidln_kinetics-vidln-kinetics_train_28k.json
sampling_strategy: "first:25%"
data_root: https://opendatalab.com/OpenMMLab/Kinetics_700
- json_path: annotations/video/caption_vidln_oops-vidln-oops_train_11k.json
sampling_strategy: "first:25%"
data_root: https://oops.cs.columbia.edu/
- json_path: annotations/video/caption_vidln_ovis-vidln-ovis_train_1k.json
sampling_strategy: "first:25%"
data_root: https://songbai.site/ovis/
video_read_type: img
- json_path: annotations/video/caption_vidln_uvo_sparse-vidln-uvo_sparse_train_6k.json
sampling_strategy: "first:25%"
data_root: https://sites.google.com/view/unidentified-video-object/dataset
- json_path: annotations/video/caption_vidln_uvo_dense-vidln-uvo_dense_train_1k.json
sampling_strategy: "first:25%"
data_root: https://sites.google.com/view/unidentified-video-object/dataset
- json_path: annotations/video/reasoning_star-star-train_46k.json
sampling_strategy: "first:25%"
data_root: https://bobbywu.com/STAR/
- json_path: annotations/video/vcg-plus_112K_clean_97k.json
sampling_strategy: "first:10%"
data_root: http://activity-net.org/
- json_path: annotations/video/vript_long_videos_en_20240911_fix.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/Mutonix/Vript
- json_path: annotations/video/vript_short_videos_en_20240911_fix.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/Mutonix/Vript
- json_path: annotations/video/guiworld_en_20241029_fix.jsonl
sampling_strategy: "first:25%"
data_root: https://gui-world.github.io/
## llava video
- json_path: annotations/video/llava-video_2_3_m_academic_mc_v0_1_qa_processed_6901_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_2_3_m_nextqa_oe_qa_processed_61_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_youtube_oe_v0_1_qa_processed_420200_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_1_2_m_academic_oe_v0_1_qa_processed_26302_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_1_2_m_youtube_mc_v0_1_qa_processed_39710_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_nextqa_oe_qa_processed_6843_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_2_3_m_youtube_mc_v0_1_qa_processed_39967_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_2_3_m_academic_v0_1_cap_processed_3124_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_academic_oe_v0_1_qa_processed_57924_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_2_3_m_youtube_v0_1_cap_processed_24685_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_youtube_mc_v0_1_qa_processed_39927_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_activitynetqa_oe_qa_processed_2950_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_1_2_m_nextqa_oe_qa_processed_4694_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_youtube_oe_v0_1_qa_processed_110624_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_1_2_m_academic_mc_v0_1_qa_processed_4241_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_youtube_mc_v0_1_qa_processed_39353_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_activitynetqa_oe_qa_processed_4530_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_1_2_m_youtube_oe_v0_1_qa_processed_137645_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_academic_mc_v0_1_qa_processed_20346_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_youtube_v0_1_cap_processed_19995_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_nextqa_mc_qa_processed_5496_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_academic_mc_v0_1_qa_processed_5753_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_2_3_m_youtube_oe_v0_1_qa_processed_141495_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_1_2_m_nextqa_mc_qa_processed_4633_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_2_3_m_activitynetqa_oe_qa_processed_7460_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_2_3_m_nextqa_mc_qa_processed_52_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_1_2_m_activitynetqa_oe_qa_processed_8590_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_1_2_m_academic_v0_1_cap_processed_4627_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_academic_v0_1_cap_processed_10514_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_1_2_m_youtube_v0_1_cap_processed_24234_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_nextqa_mc_qa_processed_6843_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_nextqa_oe_qa_processed_5492_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_academic_oe_v0_1_qa_processed_48468_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_youtube_v0_1_cap_processed_79346_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_2_3_m_academic_oe_v0_1_qa_processed_18134_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_perceptiontest_mc_qa_processed_1785_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_30_60_s_perceptiontest_mc_qa_processed_618_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/llava-video_0_30_s_academic_v0_1_cap_processed_11985_with_duration.jsonl
sampling_strategy: "first:25%"
data_root: https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K
- json_path: annotations/video/timeit_ANet-TimeIT-Activitynet_Captions_11k.json
sampling_strategy: "first:25%"
data_root: http://activity-net.org//train
- json_path: annotations/video/timeit_COIN-TimeIT-COIN_10k.json
sampling_strategy: "first:25%"
data_root: https://coin-dataset.github.io/
- json_path: annotations/video/timeit_DiDeMo-TimeIT-DiDeMo_33k.json
sampling_strategy: "first:25%"
data_root: https://github.com/LisaAnne/TemporalLanguageRelease
- json_path: annotations/video/timeit_HiREST-TimeIT-HiREST_1k.json
sampling_strategy: "first:25%"
data_root: https://hirest-cvpr2023.github.io/
- json_path: annotations/video/timeit_QuerYD-TimeIT-QuerYD_15k.json
sampling_strategy: "first:25%"
data_root: https://www.robots.ox.ac.uk/~vgg/data/queryd/
- json_path: annotations/video/timeit_ViTT-TimeIT-ViTT_6k.json
sampling_strategy: "first:25%"
data_root: https://github.com/google-research-datasets/Video-Timeline-Tags-ViTT
- json_path: annotations/video/grounding_ANetRTL-ActivityNet-RTL-ANet_RTL_34k.json
sampling_strategy: "first:25%"
data_root: http://activity-net.org//train
- json_path: annotations/video/grounding_ANetHL-ANet-HL-ANet_HL2_11k.json
sampling_strategy: "first:25%"
data_root: http://activity-net.org//train
- json_path: annotations/video/htstep_eventunderstanding-longvideo_annos-htstep_eventunderstanding_1k_1k.json
sampling_strategy: "first:25%"
video_read_type: img
data_root: https://huggingface.co/datasets/OpenGVLab/VideoChat-Flash-Training-Data/tree/main/longvid_subset
- json_path: annotations/video/htstep_eventcount-longvideo_annos-htstep_eventcount_2k_2k.json
sampling_strategy: "first:25%"
video_read_type: img
data_root: https://huggingface.co/datasets/OpenGVLab/VideoChat-Flash-Training-Data/tree/main/longvid_subset
- json_path: annotations/video/htstep_eventrelationship-longvideo_annos-htstep_eventrelationship_1k_1k.json
sampling_strategy: "first:25%"
video_read_type: img
data_root: https://huggingface.co/datasets/OpenGVLab/VideoChat-Flash-Training-Data/tree/main/longvid_subset
- json_path: annotations/video/ego4dhcap_eventunderstanding-longvideo_annos-ego4dhcap_eventunderstanding_2k_2k.json
sampling_strategy: "first:25%"
video_read_type: img
data_root: https://huggingface.co/datasets/OpenGVLab/VideoChat-Flash-Training-Data/tree/main/longvid_subset
================================================
FILE: llava-train_videochat/llava/__init__.py
================================================
from .model import LlavaQwenForCausalLM
from .train.train import LazySupervisedDataset, DataCollatorForSupervisedDataset
================================================
FILE: llava-train_videochat/llava/constants.py
================================================
CONTROLLER_HEART_BEAT_EXPIRATION = 30
WORKER_HEART_BEAT_INTERVAL = 15
LOGDIR = "."
# Model Constants
IGNORE_INDEX = -100
IMAGE_TOKEN_INDEX = -200
DEFAULT_IMAGE_TOKEN = "<image>"
DEFAULT_IMAGE_PATCH_TOKEN = "<im_patch>"
DEFAULT_IM_START_TOKEN = "<im_start>"
DEFAULT_IM_END_TOKEN = "<im_end>"
================================================
FILE: llava-train_videochat/llava/conversation.py
================================================
import dataclasses
from enum import auto, Enum
from typing import List, Any, Dict, Union, Tuple
import re
import base64
from io import BytesIO
from PIL import Image
from transformers import AutoTokenizer
class SeparatorStyle(Enum):
"""Different separator style."""
SINGLE = auto()
TWO = auto()
MPT = auto()
PLAIN = auto()
CHATML = auto()
LLAMA_2 = auto()
LLAMA_3 = auto()
QWEN = auto()
GEMMA = auto()
@dataclasses.dataclass
class Conversation:
"""A class that keeps all conversation history."""
system: str
roles: List[str]
messages: List[List[str]]
offset: int
sep_style: SeparatorStyle = SeparatorStyle.SINGLE
sep: str = "###"
sep2: str = None
version: str = "Unknown"
tokenizer_id: str = ""
tokenizer: Any = None
# Stop criteria (the default one is EOS token)
stop_str: Union[str, List[str]] = None
# Stops generation if meeting any token in this list
stop_token_ids: List[int] = None
skip_next: bool = False
def get_prompt(self):
messages = self.messages
if len(messages) > 0 and type(messages[0][1]) is tuple:
messages = self.messages.copy()
init_role, init_msg = messages[0].copy()
init_msg = init_msg[0]
if "mmtag" in self.version:
init_msg = init_msg.replace("<image>", "").strip()
messages[0] = (init_role, init_msg)
messages.insert(0, (self.roles[0], "<Image><image></Image>"))
messages.insert(1, (self.roles[1], "Received."))
elif not init_msg.startswith("<image>"):
init_msg = init_msg.replace("<image>", "").strip()
messages[0] = (init_role, "<image>\n" + init_msg)
else:
messages[0] = (init_role, init_msg)
if self.sep_style == SeparatorStyle.SINGLE:
ret = self.system + self.sep
for role, message in messages:
if message:
if type(message) is tuple:
message, _, _ = message
ret += role + ": " + message + self.sep
else:
ret += role + ":"
elif self.sep_style == SeparatorStyle.TWO:
seps = [self.sep, self.sep2]
ret = self.system + seps[0]
for i, (role, message) in enumerate(messages):
if message:
if type(message) is tuple:
message, _, _ = message
ret += role + ": " + message + seps[i % 2]
else:
ret += role + ":"
elif self.sep_style == SeparatorStyle.CHATML:
ret = "" if self.system == "" else self.system + self.sep + "\n"
for role, message in messages:
if message:
if type(message) is tuple:
message, images, _ = message
message = "<image>" * len(images) + message
ret += role + "\n" + message + self.sep + "\n"
else:
ret += role + "\n"
return ret
elif self.sep_style == SeparatorStyle.LLAMA_3:
chat_template_messages = [{"role": "system", "content": self.system}]
for role, message in messages:
if message:
if type(message) is tuple:
message, images = message
message = "<image>" * len(images) + message
chat_template_messages.append({"role": role, "content": message})
# print(chat_template_messages)
return self.tokenizer.apply_chat_template(chat_template_messages, tokenize=False, add_generation_prompt=True)
# ret = "" if self.system == "" else self.system + self.sep + "\n"
# for role, message in messages:
# if message:
# if type(message) is tuple:
# message, images = message
# message = "<image>" * len(images) + message
# ret += role + "\n" + message + self.sep + "\n"
# else:
# ret += role + "\n"
# return ret
elif self.sep_style == SeparatorStyle.MPT:
ret = self.system + self.sep
for role, message in messages:
if message:
if type(message) is tuple:
message, _, _ = message
ret += role + message + self.sep
else:
ret += role
elif self.sep_style == SeparatorStyle.GEMMA:
ret = ""
for i, (role, message) in enumerate(messages):
assert role == self.roles[i % 2], "Conversation should alternate user/assistant/user/assistant/..."
if message:
if type(message) is tuple:
message, _, _ = message
ret += role + message + self.sep
else:
ret += role
elif self.sep_style == SeparatorStyle.LLAMA_2:
wrap_sys = lambda msg: f"<<SYS>>\n{msg}\n<</SYS>>\n\n" if len(msg) > 0 else msg
wrap_inst = lambda msg: f"[INST] {msg} [/INST]"
ret = ""
for i, (role, message) in enumerate(messages):
if i == 0:
assert message, "first message should not be none"
assert role == self.roles[0], "first message should come from user"
if message:
if type(message) is tuple:
message, _, _ = message
if i == 0:
message = wrap_sys(self.system) + message
if i % 2 == 0:
message = wrap_inst(message)
ret += self.sep + message
else:
ret += " " + message + " " + self.sep2
else:
ret += ""
ret = ret.lstrip(self.sep)
elif self.sep_style == SeparatorStyle.PLAIN:
seps = [self.sep, self.sep2]
ret = self.system
for i, (role, message) in enumerate(messages):
if message:
if type(message) is tuple:
message, _, _ = message
ret += message + seps[i % 2]
else:
ret += ""
else:
raise ValueError(f"Invalid style: {self.sep_style}")
return ret
def append_message(self, role, message):
self.messages.append([role, message])
def process_image(self, image, image_process_mode, return_pil=False, image_format="PNG"):
if image_process_mode == "Pad":
def expand2square(pil_img, background_color=(122, 116, 104)):
width, height = pil_img.size
if width == height:
return pil_img
elif width > height:
result = Image.new(pil_img.mode, (width, width), background_color)
result.paste(pil_img, (0, (width - height) // 2))
return result
else:
result = Image.new(pil_img.mode, (height, height), background_color)
result.paste(pil_img, ((height - width) // 2, 0))
return result
image = expand2square(image)
elif image_process_mode in ["Default", "Crop"]:
pass
elif image_process_mode == "Resize":
image = image.resize((336, 336))
else:
raise ValueError(f"Invalid image_process_mode: {image_process_mode}")
if type(image) is not Image.Image:
image = Image.open(image).convert("RGB")
max_hw, min_hw = max(image.size), min(image.size)
aspect_ratio = max_hw / min_hw
max_len, min_len = 672, 448
shortest_edge = int(min(max_len / aspect_ratio, min_len, min_hw))
longest_edge = int(shortest_edge * aspect_ratio)
W, H = image.size
if H > W:
H, W = longest_edge, shortest_edge
else:
H, W = shortest_edge, longest_edge
image = image.resize((W, H))
if return_pil:
return image
else:
buffered = BytesIO()
image.save(buffered, format=image_format)
img_b64_str = base64.b64encode(buffered.getvalue()).decode()
return img_b64_str
def get_images(self, return_pil=False, return_path=False):
images = []
for i, (role, msg) in enumerate(self.messages[self.offset :]):
if i % 2 == 0:
if type(msg) is tuple:
msg, image, image_process_mode = msg
if type(image) != list:
image = [image]
for img in image:
if not return_path and self.is_image_file(img):
img = self.process_image(img, image_process_mode, return_pil=return_pil)
else:
images.append(img)
return images
def is_image_file(self, filename):
image_extensions = [".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".webp"]
return any(filename.lower().endswith(ext) for ext in image_extensions)
def is_video_file(self, filename):
video_extensions = [".mp4", ".mov", ".avi", ".mkv", ".wmv", ".flv", ".mpeg", ".mpg"]
return any(filename.lower().endswith(ext) for ext in video_extensions)
def to_gradio_chatbot(self):
ret = []
for i, (role, msg) in enumerate(self.messages[self.offset :]):
if i % 2 == 0:
if type(msg) is tuple:
msg, image, image_process_mode = msg
if type(image) != list:
image = [image]
if len(image) == 1:
msg = "<image>\n" + msg.replace("<image>", "").strip()
else:
msg = re.sub(r"(<image>)\n(?=<image>)", r"\1 ", msg)
img_str_list = []
for img in image:
if self.is_image_file(img):
img_b64_str = self.process_image(img, "Default", return_pil=False, image_format="JPEG")
img_str = f'<img src="data:image/jpeg;base64,{img_b64_str}" style="max-width: 256px; max-height: 256px; width: auto; height: auto; object-fit: contain;"/>'
img_str_list.append(img_str)
elif self.is_video_file(img):
ret.append(((img,), None))
msg = msg.strip()
img_place_holder = ""
for img_str in img_str_list:
img_place_holder += f"{img_str}\n\n"
if len(img_str_list) > 0:
msg = f"{img_place_holder}\n\n{msg}"
if len(msg) > 0:
ret.append([msg, None])
else:
ret.append([msg, None])
else:
ret[-1][-1] = msg
return ret
def copy(self):
return Conversation(system=self.system, roles=self.roles, messages=[[x, y] for x, y in self.messages], offset=self.offset, sep_style=self.sep_style, sep=self.sep, sep2=self.sep2, version=self.version)
def dict(self):
if len(self.get_images()) > 0:
return {
"system": self.system,
"roles": self.roles,
"messages": [[x, y[0] if type(y) is tuple else y] for x, y in self.messages],
"offset": self.offset,
"sep": self.sep,
"sep2": self.sep2,
}
return {
"system": self.system,
"roles": self.roles,
"messages": self.messages,
"offset": self.offset,
"sep": self.sep,
"sep2": self.sep2,
}
conv_vicuna_v0 = Conversation(
system="A chat between a curious human and an artificial intelligence assistant. " "The assistant gives helpful, detailed, and polite answers to the human's questions.",
roles=("Human", "Assistant"),
messages=[
["Human", "What are the key differences between renewable and non-renewable energy sources?"],
[
"Assistant",
"Renewable energy sources are those that can be replenished naturally in a relatively "
"short amount of time, such as solar, wind, hydro, geothermal, and biomass. "
"Non-renewable energy sources, on the other hand, are finite and will eventually be "
"depleted, such as coal, oil, and natural gas. Here are some key differences between "
"renewable and non-renewable energy sources:\n"
"1. Availability: Renewable energy sources are virtually inexhaustible, while non-renewable "
"energy sources are finite and will eventually run out.\n"
"2. Environmental impact: Renewable energy sources have a much lower environmental impact "
"than non-renewable sources, which can lead to air and water pollution, greenhouse gas emissions, "
"and other negative effects.\n"
"3. Cost: Renewable energy sources can be more expensive to initially set up, but they typically "
"have lower operational costs than non-renewable sources.\n"
"4. Reliability: Renewable energy sources are often more reliable and can be used in more remote "
"locations than non-renewable sources.\n"
"5. Flexibility: Renewable energy sources are often more flexible and can be adapted to different "
"situations and needs, while non-renewable sources are more rigid and inflexible.\n"
"6. Sustainability: Renewable energy sources are more sustainable over the long term, while "
"non-renewable sources are not, and their depletion can lead to economic and social instability.\n",
],
],
offset=2,
sep_style=SeparatorStyle.SINGLE,
sep="###",
)
conv_vicuna_v1 = Conversation(
system="A chat between a curious user and an artificial intelligence assistant. " "The assistant gives helpful, detailed, and polite answers to the user's questions.",
roles=("USER", "ASSISTANT"),
version="v1",
messages=[],
offset=0,
sep_style=SeparatorStyle.TWO,
sep=" ",
sep2="</s>",
)
conv_llama_2 = Conversation(
system="""You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.""",
roles=("USER", "ASSISTANT"),
version="llama_v2",
messages=[],
offset=0,
sep_style=SeparatorStyle.LLAMA_2,
sep="<s>",
sep2="</s>",
)
conv_llava_llama_2 = Conversation(
system="You are a helpful language and vision assistant. " "You are able to understand the visual content that the user provides, " "and assist the user with a variety of tasks using natural language.",
roles=("USER", "ASSISTANT"),
version="llama_v2",
messages=[],
offset=0,
sep_style=SeparatorStyle.LLAMA_2,
sep="<s>",
sep2="</s>",
)
# conv_llava_llama_3 = Conversation(
# system="You are a helpful language and vision assistant. " "You are able to understand the visual content that the user provides, " "and assist the user with a variety of tasks using natural language.",
# roles=("user", "assistant"),
# version="llama_v3",
# messages=[],
# offset=0,
# sep="<|eot_id|>",
# sep_style=SeparatorStyle.LLAMA_3,
# tokenizer_id="meta-llama/Meta-Llama-3-8B-Instruct",
# tokenizer=AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct"),
# stop_token_ids=[128009],
# )
conv_mistral_instruct = Conversation(
system="",
roles=("USER", "ASSISTANT"),
version="llama_v2",
messages=[],
offset=0,
sep_style=SeparatorStyle.LLAMA_2,
sep="",
sep2="</s>",
)
conv_llava_llama_2_simple = Conversation(
system="Answer the questions about the visual content that the user provides.",
roles=("USER", "ASSISTANT"),
version="llama_v2",
messages=[],
offset=0,
sep_style=SeparatorStyle.LLAMA_2,
sep="<s>",
sep2="</s>",
)
conv_llava_llama_2_mmtag = Conversation(
system="Answer the questions about the visual content that the user provides." "The visual content will be provided with the following format: <Image>visual content</Image>.",
roles=("USER", "ASSISTANT"),
version="llama_v2_mmtag",
messages=[],
offset=0,
sep_style=SeparatorStyle.LLAMA_2,
sep="<s>",
sep2="</s>",
)
conv_mpt = Conversation(
system="""<|im_start|>system
A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.""",
roles=("<|im_start|>user\n", "<|im_start|>assistant\n"),
version="mpt",
messages=[],
offset=0,
sep_style=SeparatorStyle.MPT,
sep="<|im_end|>",
)
conv_qwen = Conversation(
system="""<|im_start|>system
You are a helpful assistant.""",
roles=("<|im_start|>user", "<|im_start|>assistant"),
version="qwen",
messages=[],
offset=0,
sep_style=SeparatorStyle.CHATML,
sep="<|im_end|>",
)
conv_internlm_2 = Conversation(
system="""<|im_start|>system
You are a helpful assistant.""",
roles=("<|im_start|>user", "<|im_start|>assistant"),
version="internlm_2",
messages=[],
offset=0,
sep_style=SeparatorStyle.CHATML,
sep="<|im_end|>",
)
conv_gemma_instruct = Conversation(system="", roles=("<start_of_turn>user\n", "<start_of_turn>model\n"), version="gemma", messages=[], offset=0, sep_style=SeparatorStyle.GEMMA, sep="<end_of_turn>\n")
conv_llava_plain = Conversation(
system="",
roles=("", ""),
messages=[],
offset=0,
sep_style=SeparatorStyle.PLAIN,
sep="\n",
)
conv_llava_v0 = Conversation(
system="A chat between a curious human and an artificial intelligence assistant. " "The assistant gives helpful, detailed, and polite answers to the human's questions.",
roles=("Human", "Assistant"),
messages=[],
offset=0,
sep_style=SeparatorStyle.SINGLE,
sep="###",
)
conv_llava_v0_mmtag = Conversation(
system="A chat between a curious user and an artificial intelligence assistant. "
"The assistant is able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language."
"The visual content will be provided with the following format: <Image>visual content</Image>.",
roles=("Human", "Assistant"),
messages=[],
offset=0,
sep_style=SeparatorStyle.SINGLE,
sep="###",
version="v0_mmtag",
)
conv_llava_v1 = Conversation(
system="A chat between a curious human and an artificial intelligence assistant. " "The assistant gives helpful, detailed, and polite answers to the human's questions.",
roles=("USER", "ASSISTANT"),
version="v1",
messages=[],
offset=0,
sep_style=SeparatorStyle.TWO,
sep=" ",
sep2="</s>",
)
conv_llava_v1_mmtag = Conversation(
system="A chat between a curious user and an artificial intelligence assistant. "
"The assistant is able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language."
"The visual content will be provided with the following format: <Image>visual content</Image>.",
roles=("USER", "ASSISTANT"),
messages=[],
offset=0,
sep_style=SeparatorStyle.TWO,
sep=" ",
sep2="</s>",
version="v1_mmtag",
)
conv_mistral_orca = Conversation(
system="""<|im_start|>system
You are MistralOrca, a large language model trained by Alignment Lab AI. Write out your reasoning step-by-step to be sure you get the right answers!""",
roles=("<|im_start|>user\n", "<|im_start|>assistant\n"),
version="mpt",
messages=[],
offset=0,
sep_style=SeparatorStyle.MPT,
sep="<|im_end|>",
)
conv_mistral_zephyr = Conversation(
system="""<|system|>
You are a helpful AI assistant.""",
roles=("<|user|>\n", "<|assistant|>\n"),
version="mpt",
messages=[],
offset=0,
sep_style=SeparatorStyle.MPT,
sep="</s>",
)
conv_mistral_direct = Conversation(
system="""<|im_start|>system
Answer the questions.""",
roles=("<|im_start|>user\n", "<|im_start|>assistant\n"),
version="mpt",
messages=[],
offset=0,
sep_style=SeparatorStyle.MPT,
sep="<|im_end|>",
)
conv_chatml_direct = Conversation(
system="""<|im_start|>system
Answer the questions.""",
roles=("<|im_start|>user\n", "<|im_start|>assistant\n"),
version="mpt",
messages=[],
offset=0,
sep_style=SeparatorStyle.MPT,
sep="<|im_end|>",
)
default_conversation = conv_vicuna_v0
conv_templates = {
"default": conv_vicuna_v0,
"v0": conv_vicuna_v0,
"v1": conv_vicuna_v1,
"vicuna_v1": conv_vicuna_v1,
"llama_2": conv_llama_2,
"mistral_instruct": conv_mistral_instruct,
"mistral_orca": conv_mistral_orca,
"mistral_zephyr": conv_mistral_zephyr,
"mistral_direct": conv_mistral_direct,
"plain": conv_llava_plain,
"v0_plain": conv_llava_plain,
"chatml_direct": conv_chatml_direct,
"llava_v0": conv_llava_v0,
"llava_v0_mmtag": conv_llava_v0_mmtag,
"llava_v1": conv_llava_v1,
"llava_v1_mmtag": conv_llava_v1_mmtag,
"llava_llama_2": conv_llava_llama_2,
# "llava_llama_3": conv_llava_llama_3,
"llava_llama_2_simple": conv_llava_llama_2_simple,
"llava_llama_2_mmtag": conv_llava_llama_2_mmtag,
"llava_mistral_instruct": conv_mistral_instruct,
"mpt": conv_mpt,
"qwen_1_5": conv_qwen,
"qwen_2": conv_qwen,
"internlm_2": conv_internlm_2,
"gemma_instruct": conv_gemma_instruct,
}
if __name__ == "__main__":
print(default_conversation.get_prompt())
print(default_conversation)
================================================
FILE: llava-train_videochat/llava/dist_utils.py
================================================
import json
import os
import builtins
import datetime
import time
import subprocess
import torch
import torch.distributed as dist
def get_rank() -> int:
if not dist.is_available():
return 0
if not dist.is_initialized():
return 0
return dist.get_rank()
def get_world_size() -> int:
if not dist.is_available():
return 1
if not dist.is_initialized():
return 1
return dist.get_world_size()
def setup_for_distributed(is_master):
builtin_print = builtins.print
def print(*args, **kwargs):
force = kwargs.pop("force", False)
# force = force or (get_world_size() > 8)
if is_master or force:
now = datetime.datetime.now().time()
builtin_print("[{}] ".format(now), end="") # print with time stamp
builtin_print(*args, **kwargs)
builtins.print = print
def init_distributed_mode(use_dynamic_port: bool = True):
if "SLURM_PROCID" in os.environ:
rank = int(os.environ["SLURM_PROCID"])
local_rank = rank % torch.cuda.device_count()
world_size = int(os.environ["SLURM_NTASKS"])
try:
local_size = int(os.environ["SLURM_NTASKS_PER_NODE"])
except:
local_size = int(os.environ.get("LOCAL_SIZE", 1))
if "MASTER_PORT" not in os.environ:
port = 10023 # + random.randint(0, 20)
# if use_dynamic_port:
# for i in range(10042, 65535):
# cmd = f"netstat -aon|grep {i}"
# with os.popen(cmd, "r") as file:
# if file.read() == "":
# port = i
# break
print(f"MASTER_PORT = {port}")
os.environ["MASTER_PORT"] = str(port)
time.sleep(3)
node_list = os.environ["SLURM_STEP_NODELIST"]
addr = subprocess.getoutput(f"scontrol show hostname {node_list} | head -n1")
if "MASTER_ADDR" not in os.environ:
os.environ["MASTER_ADDR"] = addr
os.environ["RANK"] = str(rank)
os.environ["LOCAL_RANK"] = str(local_rank)
os.environ["LOCAL_WORLD_SIZE"] = str(local_size)
os.environ["WORLD_SIZE"] = str(world_size)
else:
rank = int(os.environ["RANK"])
setup_for_distributed(rank == 0)
print(
f"Rank {os.environ['RANK']} | Local Rank {os.environ['LOCAL_RANK']} | "
f"World Size {os.environ['WORLD_SIZE']} | Local World Size {os.environ['LOCAL_WORLD_SIZE']} |",
force=True
)
================================================
FILE: llava-train_videochat/llava/mm_utils.py
================================================
from PIL import Image
from io import BytesIO
import base64
import math
import ast
import re
import torch
from transformers import StoppingCriteria
from llava.constants import IMAGE_TOKEN_INDEX
def resize_and_center_crop(image, shortest_edge_length):
# Calculate new dimensions and resize
aspect_ratio = float(image.width) / float(image.height)
if aspect_ratio > 1:
new_width = int(shortest_edge_length * aspect_ratio)
new_height = shortest_edge_length
else:
new_width = shortest_edge_length
new_height = int(shortest_edge_length / aspect_ratio)
resized_image = image.resize((new_width, new_height), Image.ANTIALIAS)
# Calculate the position and perform the center crop
left = (new_width - shortest_edge_length) / 2
top = (new_height - shortest_edge_length) / 2
right = (new_width + shortest_edge_length) / 2
bottom = (new_height + shortest_edge_length) / 2
cropped_image = resized_image.crop((left, top, right, bottom))
return cropped_image
def auto_pad_images(image, grid_params):
assert isinstance(image, Image.Image), "Input should be a Pillow Image"
assert len(grid_params) > 0, "Grid parameters should not be empty"
# Step 1: Calculate and find the closest aspect ratio
input_width, input_height = image.size
input_aspect_ratio = input_width / input_height
candidate_resolutions = [(w / h, w, h) for w in grid_params for h in grid_params]
closest_aspect_ratio = min(candidate_resolutions, key=lambda x: abs(input_aspect_ratio - x[0]))
candidate_resolutions = [(x[1], x[2]) for x in candidate_resolutions if abs(x[0] - closest_aspect_ratio[0]) < 1e-3]
target_resolution = min(candidate_resolutions, key=lambda res: abs(max(input_width, input_height) / max(res) - 1))
resize_width, resize_height = target_resolution
if input_width > input_height:
resize_height = int(resize_width / input_aspect_ratio)
else:
resize_width = int(resize_height * input_aspect_ratio)
resized_image = image.resize((resize_width, resize_height), Image.ANTIALIAS)
# Step 5: Pad the resized image if necessary to match the target resolution
pad_width = target_resolution[0] - resize_width
pad_height = target_resolution[1] - resize_height
padded_image = Image.new("RGB", target_resolution, color=(0, 0, 0))
padded_image.paste(resized_image, (pad_width // 2, pad_height // 2))
return padded_image
def extract_patches(image, patch_size, overlap_ratio):
assert isinstance(image, Image.Image), "Input should be a Pillow Image"
assert patch_size > 0, "Patch size should be greater than 0"
assert 0 <= overlap_ratio < 1, "Overlap ratio should be between 0 and 1"
W, H = image.size
patches = []
stride = int(patch_size * (1 - overlap_ratio))
num_patches_y = (H - patch_size) // stride + 1
num_patches_x = (W - patch_size) // stride + 1
y_start = (H - (num_patches_y - 1) * stride - patch_size) // 2
x_start = (W - (num_patches_x - 1) * stride - patch_size) // 2
for y in range(y_start, y_start + num_patches_y * stride, stride):
for x in range(x_start, x_start + num_patches_x * stride, stride):
patch = image.crop((x, y, x + patch_size, y + patch_size))
patches.append(patch)
return patches
def process_highres_image_crop_split(image, data_args, processor=None):
crop_resolution = data_args.image_crop_resolution
split_resolution = data_args.image_split_resolution
if processor is None:
processor = data_args.image_processor
image_crop = resize_and_center_crop(image, crop_resolution)
image_patches = extract_patches(image_crop, patch_size=split_resolution, overlap_ratio=0)
image_patches = [processor.preprocess(image_patch, return_tensors="pt")["pixel_values"][0] for image_patch in image_patches]
return torch.stack(image_patches, dim=0)
def process_highres_image(image, processor, grid_pinpoints):
grid_params = [int(x) for x in grid_pinpoints.split(",")]
width_height = max(image.size)
fit_grid_params = [x for x in grid_params if x >= width_height]
if len(fit_grid_params) == 0:
select_size = max(grid_params)
else:
select_size = min(fit_grid_params)
# FIXME: always select the 448
select_size = max(grid_params)
image_padded = expand2square(image, tuple(int(x * 255) for x in processor.image_mean))
# FIXME: this seems to be a bug that it always resizes instead of padding
image_original_resize = image.resize((processor.size["shortest_edge"], processor.size["shortest_edge"]))
image_padded = image_padded.resize((select_size, select_size))
image_patches = extract_patches(image_padded, patch_size=processor.size["shortest_edge"], overlap_ratio=0)
image_patches = [image_original_resize] + image_patches
image_patches = [processor.preprocess(image_patch, return_tensors="pt")["pixel_values"][0] for image_patch in image_patches]
return torch.stack(image_patches, dim=0)
def select_best_resolution(original_size, possible_resolutions, max_resolutions, patch_size):
"""
Selects the best resolution from a list of possible resolutions based on the original size.
Args:
original_size (tuple): The original size of the image in the format (width, height).
possible_resolutions (list): A list of possible resolutions in the format [(width1, height1), (width2, height2), ...].
Returns:
tuple: The best fit resolution in the format (width, height).
"""
original_width, original_height = original_size
best_fit = None
max_effective_resolution = 0
min_wasted_resolution = float("inf")
for width, height in possible_resolutions:
if max_resolutions != None and (width * height != patch_size * patch_size):
if (width * height+patch_size*patch_size) > max_resolutions: # NOTE 要算一个global
continue
# Calculate the downscaled size to keep the aspect ratio
scale = min(width / original_width, height / original_height)
downscaled_width, downscaled_height = int(original_width * scale), int(original_height * scale)
# Calculate effective and wasted resolutions
effective_resolution = min(downscaled_width * downscaled_height, original_width * original_height)
wasted_resolution = (width * height) - effective_resolution
if effective_resolution > max_effective_resolution or (effective_resolution == max_effective_resolution and wasted_resolution < min_wasted_resolution):
max_effective_resolution = effective_resolution
min_wasted_resolution = wasted_resolution
best_fit = (width, height)
# print(f"original_size={original_size}, possible_resolutions={possible_resolutions}, max_resolutions={max_resolutions}, best_fit={best_fit}")
assert best_fit is not None, f"Can't find suitable fit in {possible_resolutions} at max:{max_resolutions}"
return best_fit
def resize_and_pad_image(image, target_resolution):
"""
Resize and pad an image to a target resolution while maintaining aspect ratio.
Args:
image (PIL.Image.Image): The input image.
target_resolution (tuple): The target resolution (width, height) of the image.
Returns:
PIL.Image.Image: The resized and padded image.
"""
original_width, original_height = image.size
target_width, target_height = target_resolution
# Determine which dimension (width or height) to fill
scale_w = target_width / original_width
scale_h = target_height / original_height
if scale_w < scale_h:
# Width will be filled completely
new_width = target_width
new_height = min(math.ceil(original_height * scale_w), target_height)
else:
# Height will be filled completely
new_height = target_height
new_width = min(math.ceil(original_width * scale_h), target_width)
# Resize the image
resized_image = image.resize((new_width, new_height))
# Create a new image with the target size and paste the resized image onto it
new_image = Image.new("RGB", (target_width, target_height), (0, 0, 0))
paste_x = (target_width - new_width) // 2
paste_y = (target_height - new_height) // 2
new_image.paste(resized_image, (paste_x, paste_y))
return new_image
def divide_to_patches(image, patch_size):
"""
Divides an image into patches of a specified size.
Args:
image (PIL.Image.Image): The input image.
patch_size (int): The size of each patch.
Returns:
list: A list of PIL.Image.Image objects representing the patches.
"""
patches = []
width, height = image.size
for i in range(0, height, patch_size):
for j in range(0, width, patch_size):
box = (j, i, j + patch_size, i + patch_size)
patch = image.crop(box)
patches.append(patch)
return patches
def get_anyres_image_grid_shape(image_size, grid_pinpoints, patch_size, max_resolutions=None):
"""
Calculate the shape of the image patch grid after the preprocessing for images of any resolution.
Args:
image_size (tuple): The size of the input image in the format (width, height).
grid_pinpoints (str): A string representation of a list of possible resolutions.
patch_size (int): The size of each image patch.
Returns:
tuple: The shape of the image patch grid in the format (width, height).
"""
if isinstance(grid_pinpoints, str) and "x" in grid_pinpoints:
assert patch_size in [224, 336, 384, 448, 512], "patch_size should be in [224, 336, 384, 448, 512]"
# Use regex to extract the range from the input string
matches = re.findall(r"\((\d+)x(\d+)\)", grid_pinpoints)
range_start = tuple(map(int, matches[0]))
range_end = tuple(map(int, matches[-1]))
# Generate a matrix of tuples from (range_start[0], range_start[1]) to (range_end[0], range_end[1])
grid_pinpoints = [(i, j) for i in range(range_start[0], range_end[0] + 1) for j in range(range_start[1], range_end[1] + 1)]
# Multiply all elements by patch_size
grid_pinpoints = [[dim * patch_size for dim in pair] for pair in grid_pinpoints]
if type(grid_pinpoints) is list:
possible_resolutions = grid_pinpoints
else:
possible_resolutions = ast.literal_eval(grid_pinpoints)
width, height = select_best_resolution(image_size, possible_resolutions, max_resolutions=max_resolutions, patch_size=patch_size)
# print("get width/patch size", width, patch_size, flush=True)
return width // patch_size, height // patch_size
def process_anyres_image(image, processor, grid_pinpoints):
"""
Process an image with variable resolutions.
Args:
image (PIL.Image.Image): The input image to be processed.
processor: The image processor object.
grid_pinpoints (str): A string representation of a list of possible resolutions.
Returns:
torch.Tensor: A tensor containing the processed image patches.
"""
raise NotImplementedError
# Convert grid_pinpoints from string to list
if isinstance(grid_pinpoints, str) and "x" in grid_pinpoints:
try:
patch_size = processor.size[0]
except Exception as e:
patch_size = processor.size["shortest_edge"]
assert patch_size in [224, 336, 384, 448, 512], "patch_size should be in [224, 336, 384, 448, 512]"
# Use regex to extract the range from the input string
matches = re.findall(r"\((\d+)x(\d+)\)", grid_pinpoints)
range_start = tuple(map(int, matches[0]))
range_end = tuple(map(int, matches[-1]))
# Generate a matrix of tuples from (range_start[0], range_start[1]) to (range_end[0], range_end[1])
grid_pinpoints = [(i, j) for i in range(range_start[0], range_end[0] + 1) for j in range(range_start[1], range_end[1] + 1)]
# Multiply all elements by patch_size
grid_pinpoints = [[dim * patch_size for dim in pair] for pair in grid_pinpoints]
if type(grid_pinpoints) is list:
possible_resolutions = grid_pinpoints
else:
possible_resolutions = ast.literal_eval(grid_pinpoints)
best_resolution = select_best_resolution(image.size, possible_resolutions)
image_padded = resize_and_pad_image(image, best_resolution)
patches = divide_to_patches(image_padded, processor.crop_size["height"])
# FIXME: this seems to be a bug that it resizes instead of pad.
# but to keep it consistent with previous, i will keep it as it is
# TODO: uncomment below to ablate with the padding
if isinstance(processor.size, dict):
shortest_edge = processor.size["shortest_edge"]
else:
shortest_edge = min(processor.size)
image_original_resize = image.resize((shortest_edge, shortest_edge))
# image_padded_square = expand2square(image, tuple(int(x*255) for x in processor.image_mean))
# image_original_resize = image_padded_square.resize((processor.size['shortest_edge'], processor.size['shortest_edge']))
image_patches = [image_original_resize] + patches
image_patches = [processor.preprocess(image_patch, return_tensors="pt")["pixel_values"][0] for image_patch in image_patches]
# print("image.size", image.size, "len(image_patches):", len(image_patches), "patch_size:", image_patches[0].shape)
return torch.stack(image_patches, dim=0)
def process_anyres_image_nopad(image, processor, grid_pinpoints):
"""
Process an image with variable resolutions.
Args:
image (PIL.Image.Image): The input image to be processed.
processor: The image processor object.
grid_pinpoints (str): A string representation of a list of possible resolutions.
Returns:
torch.Tensor: A tensor containing the processed image patches.
"""
# Convert grid_pinpoints from string to list
try:
patch_size = processor.size[0]
except Exception as e:
patch_size = processor.size["shortest_edge"]
assert patch_size in [224, 336, 384, 448, 512], "patch_size should be in [224, 336, 384, 448, 512]"
if isinstance(grid_pinpoints, str) and "x" in grid_pinpoints:
# Use regex to extract the range from the input string
matches = re.findall(r"\((\d+)x(\d+)\)", grid_pinpoints)
range_start = tuple(map(int, matches[0]))
range_end = tuple(map(int, matches[-1]))
# Generate a matrix of tuples from (range_start[0], range_start[1]) to (range_end[0], range_end[1])
grid_pinpoints = [(i, j) for i in range(range_start[0], range_end[0] + 1) for j in range(range_start[1], range_end[1] + 1)]
# Multiply all elements by patch_size
grid_pinpoints = [[dim * patch_size for dim in pair] for pair in grid_pinpoints]
if type(grid_pinpoints) is list:
possible_resolutions = grid_pinpoints
else:
possible_resolutions = ast.literal_eval(grid_pinpoints)
best_resolution = select_best_resolution(image.size, possible_resolutions, max_resolutions=None, patch_size=patch_size) # 目前图像无限制
# image_padded = resize_and_pad_image(image, best_resolution)
patches = divide_to_patches(image.resize(best_resolution), patch_size)
# FIXME: this seems to be a bug that it resizes instead of pad.
# but to keep it consistent with previous, i will keep it as it is
# TODO: uncomment below to ablate with the padding
if isinstance(processor.size, dict):
shortest_edge = processor.size["shortest_edge"]
else:
shortest_edge = min(processor.size)
image_original_resize = image.resize((shortest_edge, shortest_edge))
# image_padded_square = expand2square(image, tuple(int(x*255) for x in processor.image_mean))
# image_original_resize = image_padded_square.resize((processor.size['shortest_edge'], processor.size['shortest_edge']))
image_patches = [image_original_resize] + patches
image_patches = [processor.preprocess(image_patch, return_tensors="pt")["pixel_values"][0] for image_patch in image_patches]
# raise ValueError(f"image.size: {image.size} len(image_patches): {len(image_patches)}, patch_size:, {image_patches[0].shape}, possible_resolutions:, {possible_resolutions}, best: {best_resolution}")
return torch.stack(image_patches, dim=0)
def process_anyres_video_nopad(video, processor, grid_pinpoints, max_resolutions):
"""
Process an image with variable resolutions.
Args:
video (numpy.ndarray): (T, H, W, C)
image (PIL.Image.Image): The input image to be processed.
processor: The image processor object.
grid_pinpoints (str): A string representation of a list of possible resolutions.
Returns:
torch.Tensor: A tensor containing the processed image patches.
"""
# Convert grid_pinpoints from string to list
try:
patch_size = processor.size[0]
except Exception as e:
patch_size = processor.size["shortest_edge"]
assert patch_size in [224, 336, 384, 448, 512], "patch_size should be in [224, 336, 384, 448, 512]"
if isinstance(grid_pinpoints, str) and "x" in grid_pinpoints:
# Use regex to extract the range from the input string
matches = re.findall(r"\((\d+)x(\d+)\)", grid_pinpoints)
range_start = tuple(map(int, matches[0]))
range_end = tuple(map(int, matches[-1]))
# Generate a matrix of tuples from (range_start[0], range_start[1]) to (range_end[0], range_end[1])
grid_pinpoints = [(i, j) for i in range(range_start[0], range_end[0] + 1) for j in range(range_start[1], range_end[1] + 1)]
# Multiply all elements by patch_size
grid_pinpoints = [[dim * patch_size for dim in pair] for pair in grid_pinpoints]
if type(grid_pinpoints) is list:
possible_resolutions = grid_pinpoints
else:
possible_resolutions = ast.literal_eval(grid_pinpoints)
best_resolution = select_best_resolution(video[0].shape[0:2], possible_resolutions, max_resolutions=max_resolutions, patch_size=patch_size)
video = processor.preprocess(video, return_tensors="pt", target_size=best_resolution)["pixel_values"]
print("data: new_video.shape:", video.shape, "best_resolution:", best_resolution)
return video
def load_image_from_base64(image):
return Image.open(BytesIO(base64.b64decode(image)))
def expand2square(pil_img, background_color):
width, height = pil_img.size
if width == height:
return pil_img
elif width > height:
result = Image.new(pil_img.mode, (width, width), background_color)
result.paste(pil_img, (0, (width - height) // 2))
return result
else:
result = Image.new(pil_img.mode, (height, height), background_color)
result.paste(pil_img, ((height - width) // 2, 0))
return result
def process_images(images, image_processor, model_cfg):
image_aspect_ratio = getattr(model_cfg, "image_aspect_ratio", None)
new_images = []
if image_aspect_ratio == "highres":
raise NotImplementedError
for image in images:
image = process_highres_image(image, image_processor, model_cfg.image_grid_pinpoints)
new_images.append(image)
elif "anyres" in image_aspect_ratio:
for image in images:
if "nopad" in image_aspect_ratio:
image = process_anyres_image_nopad(image, image_processor, model_cfg.image_grid_pinpoints)
else:
image = process_anyres_image(image, image_processor, model_cfg.image_grid_pinpoints)
new_images.append(image)
elif image_aspect_ratio == "crop_split":
raise NotImplementedError
for image in images:
image = process_highres_image_crop_split(image, model_cfg, image_processor)
new_images.append(image)
elif image_aspect_ratio == "pad":
for image in images:
image = expand2square(image, tuple(int(x * 255) for x in image_processor.image_mean))
image = image_processor.preprocess(image, return_tensors="pt")["pixel_values"][0]
new_images.append(image)
else:
return image_processor.preprocess(images, return_tensors="pt")["pixel_values"]
if all(x.shape == new_images[0].shape for x in new_images):
new_images = torch.stack(new_images, dim=0)
return new_images
def tokenizer_image_token(prompt, tokenizer, image_token_index=IMAGE_TOKEN_INDEX, return_tensors=None):
prompt_chunks = [tokenizer(chunk).input_ids for chunk in prompt.split("<image>")]
def insert_separator(X, sep):
return [ele for sublist in zip(X, [sep] * len(X)) for ele in sublist][:-1]
input_ids = []
offset = 0
if len(prompt_chunks) > 0 and len(prompt_chunks[0]) > 0 and prompt_chunks[0][0] == tokenizer.bos_token_id:
offset = 1
input_ids.append(prompt_chunks[0][0])
for x in insert_separator(prompt_chunks, [image_token_index] * (offset + 1)):
input_ids.extend(x[offset:])
if return_tensors is not None:
if return_tensors == "pt":
return torch.tensor(input_ids, dtype=torch.long)
raise ValueError(f"Unsupported tensor type: {return_tensors}")
return input_ids
def get_model_name_from_path(model_path):
model_path = model_path.strip("/")
model_paths = model_path.split("/")
if model_paths[-1].startswith("checkpoint-"):
return model_paths[-2] + "_" + model_paths[-1]
else:
return model_paths[-1]
class KeywordsStoppingCriteria(StoppingCriteria):
def __init__(self, keywords, tokenizer, input_ids):
self.keywords = keywords
self.keyword_ids = []
for keyword in keywords:
cur_keyword_ids = tokenizer(keyword).input_ids
if len(cur_keyword_ids) > 1 and cur_keyword_ids[0] == tokenizer.bos_token_id:
cur_keyword_ids = cur_keyword_ids[1:]
self.keyword_ids.append(torch.tensor(cur_keyword_ids))
self.tokenizer = tokenizer
self.start_len = input_ids.shape[1]
def __call__(self, output_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
assert output_ids.shape[0] == 1, "Only support batch size 1 (yet)" # TODO
offset = min(output_ids.shape[1] - self.start_len, 3)
self.keyword_ids = [keyword_id.to(output_ids.device) for keyword_id in self.keyword_ids]
for keyword_id in self.keyword_ids:
if output_ids[0, -keyword_id.shape[0] :] == keyword_id:
return True
outputs = self.tokenizer.batch_decode(output_ids[:, -offset:], skip_special_tokens=True)[0]
for keyword in self.keywords:
if keyword in outputs:
return True
return False
================================================
FILE: llava-train_videochat/llava/model/__init__.py
================================================
import os
AVAILABLE_MODELS = {
"llava_qwen": "LlavaQwenForCausalLM, LlavaQwenConfig",
"llava_qwen_flash": "LlavaQwenForCausalLM_Flash, LlavaQwenConfig_Flash"
}
for model_name, model_classes in AVAILABLE_MODELS.items():
try:
exec(f"from .language_model.{model_name} import {model_classes}")
except Exception as e:
print(f"Failed to import {model_name} from llava.language_model.{model_name}. Error: {e}")
================================================
FILE: llava-train_videochat/llava/model/apply_delta.py
================================================
"""
Usage:
python3 -m fastchat.model.apply_delta --base ~/model_weights/llama-7b --target ~/model_weights/vicuna-7b --delta lmsys/vicuna-7b-delta
"""
import argparse
import torch
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForCausalLM
from llava import LlavaLlamaForCausalLM
def apply_delta(base_model_path, target_model_path, delta_path):
print("Loading base model")
base = AutoModelForCausalLM.from_pretrained(base_model_path, torch_dtype=torch.float16, low_cpu_mem_usage=True)
print("Loading delta")
delta = LlavaLlamaForCausalLM.from_pretrained(delta_path, torch_dtype=torch.float16, low_cpu_mem_usage=True)
delta_tokenizer = AutoTokenizer.from_pretrained(delta_path)
print("Applying delta")
for name, param in tqdm(delta.state_dict().items(), desc="Applying delta"):
if name not in base.state_dict():
assert name in ["model.mm_projector.weight", "model.mm_projector.bias"], f"{name} not in base model"
continue
if param.data.shape == base.state_dict()[name].shape:
param.data += base.state_dict()[name]
else:
assert name in ["model.embed_tokens.weight", "lm_head.weight"], f"{name} dimension mismatch: {param.data.shape} vs {base.state_dict()[name].shape}"
bparam = base.state_dict()[name]
param.data[: bparam.shape[0], : bparam.shape[1]] += bparam
print("Saving target model")
delta.save_pretrained(target_model_path)
delta_tokenizer.save_pretrained(target_model_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--base-model-path", type=str, required=True)
parser.add_argument("--target-model-path", type=str, required=True)
parser.add_argument("--delta-path", type=str, required=True)
args = parser.parse_args()
apply_delta(args.base_model_path, args.target_model_path, args.delta_path)
================================================
FILE: llava-train_videochat/llava/model/builder.py
================================================
# Copyright 2023 Haotian Liu
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import warnings
import shutil
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig, BitsAndBytesConfig
import torch
from llava.model import *
from llava.constants import DEFAULT_IMAGE_PATCH_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
from llava.utils import rank0_print
def load_pretrained_model(model_path, model_base, model_name, load_8bit=False, load_4bit=False, device_map="auto", attn_implementation="flash_attention_2", customized_config=None, overwrite_config=None, **kwargs):
kwargs["device_map"] = device_map
if load_8bit:
kwargs["load_in_8bit"] = True
elif load_4bit:
kwargs["load_in_4bit"] = True
kwargs["quantization_config"] = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4")
else:
kwargs["torch_dtype"] = torch.float16
if customized_config is not None:
kwargs["config"] = customized_config
if "multimodal" in kwargs:
if kwargs["multimodal"] is True:
is_multimodal = True
kwargs.pop("multimodal")
else:
is_multimodal = False
else:
is_multimodal = False
assert is_multimodal, "I need it!!!"
if "llava" in model_name.lower() or is_multimodal:
# Load LLaVA model
if "lora" in model_name.lower() and model_base is None:
raise NotImplementedError("I don't like lora.")
warnings.warn(
"There is `lora` in model name but no `model_base` is provided. If you are loading a LoRA model, please provide the `model_base` argument. Detailed instruction: https://github.com/haotian-liu/LLaVA#launch-a-model-worker-lora-weights-unmerged."
)
if "lora" in model_name.lower() and model_base is not None:
raise NotImplementedError("I don't like lora.")
lora_cfg_pretrained = AutoConfig.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False)
rank0_print("Loading LLaVA from base model...")
if "mixtral" in model_name.lower():
from llava.model.language_model.llava_mixtral import LlavaMixtralConfig
lora_cfg_pretrained = LlavaMixtralConfig.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False)
model = LlavaMixtralForCausalLM.from_pretrained(model_base, low_cpu_mem_usage=True, config=lora_cfg_pretrained, attn_implementation=attn_implementation, **kwargs)
elif "mistral" in model_name.lower():
from llava.model.language_model.llava_mistral import LlavaMistralConfig
lora_cfg_pretrained = LlavaMistralConfig.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False)
model = LlavaMistralForCausalLM.from_pretrained(model_base, low_cpu_mem_usage=True, config=lora_cfg_pretrained, attn_implementation=attn_implementation, **kwargs)
elif "gemma" in model_name.lower():
from llava.model.language_model.llava_gemma import LlavaGemmaConfig
lora_cfg_pretrained = LlavaGemmaConfig.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False)
model = LlavaGemmaForCausalLM.from_pretrained(model_base, low_cpu_mem_usage=True, config=lora_cfg_pretrained, attn_implementation=attn_implementation, **kwargs)
else:
from llava.model.language_model.llava_llama import LlavaConfig
lora_cfg_pretrained = LlavaConfig.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False)
model = LlavaLlamaForCausalLM.from_pretrained(model_base, low_cpu_mem_usage=True, config=lora_cfg_pretrained, attn_implementation=attn_implementation, **kwargs)
token_num, tokem_dim = model.lm_head.out_features, model.lm_head.in_features
if model.lm_head.weight.shape[0] != token_num:
model.lm_head.weight = torch.nn.Parameter(torch.empty(token_num, tokem_dim, device=model.device, dtype=model.dtype))
model.model.embed_tokens.weight = torch.nn.Parameter(torch.empty(token_num, tokem_dim, device=model.device, dtype=model.dtype))
rank0_print("Loading additional LLaVA weights...")
if os.path.exists(os.path.join(model_path, "non_lora_trainables.bin")):
non_lora_trainables = torch.load(os.path.join(model_path, "non_lora_trainables.bin"), map_location="cpu")
else:
# this is probably from HF Hub
from huggingface_hub import hf_hub_download
def load_from_hf(repo_id, filename, subfolder=None):
cache_file = hf_hub_download(repo_id=repo_id, filename=filename, subfolder=subfolder)
return torch.load(cache_file, map_location="cpu")
non_lora_trainables = load_from_hf(model_path, "non_lora_trainables.bin")
non_lora_trainables = {(k[11:] if k.startswith("base_model.") else k): v for k, v in non_lora_trainables.items()}
if any(k.startswith("model.model.") for k in non_lora_trainables):
non_lora_trainables = {(k[6:] if k.startswith("model.") else k): v for k, v in non_lora_trainables.items()}
model.load_state_dict(non_lora_trainables, strict=False)
from peft import PeftModel
rank0_print("Loading LoRA weights...")
model = PeftModel.from_pretrained(model, model_path)
rank0_print("Merging LoRA weights...")
model = model.merge_and_unload()
rank0_print("Model is loaded...")
elif model_base is not None: # this may be mm projector only, loading projector with preset language mdoel
rank0_print(f"Loading LLaVA from base model {model_base}...")
if "mixtral" in model_name.lower():
tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False)
cfg_pretrained = AutoConfig.from_pretrained(model_path)
model = LlavaMixtralForCausalLM.from_pretrained(model_base, low_cpu_mem_usage=True, config=cfg_pretrained, attn_implementation=attn_implementation, **kwargs)
elif "mistral" in model_name.lower() or "zephyr" in model_name.lower():
tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False)
cfg_pretrained = AutoConfig.from_pretrained(model_path)
model = LlavaMistralForCausalLM.from_pretrained(model_base, low_cpu_mem_usage=True, config=cfg_pretrained, attn_implementation=attn_implementation, **kwargs)
elif "gemma" in model_name.lower():
tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False)
cfg_pretrained = AutoConfig.from_pretrained(model_path)
model = LlavaGemmaForCausalLM.from_pretrained(model_base, low_cpu_mem_usage=True, config=cfg_pretrained, attn_implementation=attn_implementation, **kwargs)
elif (
"wizardlm-2" in model_name.lower()
and "vicuna" in model_name.lower()
or "llama" in model_name.lower()
or "yi" in model_name.lower()
or "nous-hermes" in model_name.lower()
or "llava-v1.6-34b" in model_name.lower()
or "llava-v1.5" in model_name.lower()
):
from llava.model.language_model.llava_llama import LlavaConfig
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)
if customized_config is None:
llava_cfg = LlavaConfig.from_pretrained(model_path)
if "v1.5" in model_name.lower():
llava_cfg.delay_load = True # a workaround for correctly loading v1.5 models
else:
llava_cfg = customized_config
tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False)
llava_cfg = LlavaConfig.from_pretrained(model_path)
model = LlavaLlamaForCausalLM.from_pretrained(model_base, low_cpu_mem_usage=True, config=llava_cfg, **kwargs)
else:
raise ValueError(f"Model {model_name} not supported")
mm_projector_weights = torch.load(os.path.join(model_path, "mm_projector.bin"), map_location="cpu")
mm_projector_weights = {k: v.to(torch.float16) for k, v in mm_projector_weights.items()}
model.load_state_dict(mm_projector_weights, strict=False)
else:
rank0_print(f"Loaded LLaVA model: {model_path}")
if "mixtral" in model_name.lower():
raise NotImplementedError("I don't like it.")
from llava.model.language_model.llava_mixtral import LlavaMixtralConfig
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)
if customized_config is None:
llava_cfg = LlavaMixtralConfig.from_pretrained(model_path)
else:
llava_cfg = customized_config
if overwrite_config is not None:
rank0_print(f"Overwriting config with {overwrite_config}")
for k, v in overwrite_config.items():
setattr(llava_cfg, k, v)
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = LlavaMixtralForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, attn_implementation=attn_implementation, config=llava_cfg, **kwargs)
elif "mistral" in model_name.lower() or "zephyr" in model_name.lower():
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = LlavaMistralForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, attn_implementation=attn_implementation, **kwargs)
elif (
"wizardlm-2" in model_name.lower()
and "vicuna" in model_name.lower()
or "llama" in model_name.lower()
# or "yi" in model_name.lower() # 太容易撞车了
or "nous-hermes" in model_name.lower()
or "llava-v1.6-34b" in model_name.lower()
or "llava-v1.5" in model_name.lower()
):
raise NotImplementedError("I don't like it")
from llava.model.language_model.llava_llama import LlavaConfig
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)
if customized_config is None:
llava_cfg = LlavaConfig.from_pretrained(model_path)
if "v1.5" in model_name.lower():
llava_cfg.delay_load = True # a workaround for correctly loading v1.5 models
else:
llava_cfg = customized_config
if overwrite_config is not None:
rank0_print(f"Overwriting config with {overwrite_config}")
for k, v in overwrite_config.items():
setattr(llava_cfg, k, v)
model = LlavaLlamaForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, attn_implementation=attn_implementation, config=llava_cfg, **kwargs)
elif "qwen" in model_name.lower() or "quyen" in model_name.lower():
tokenizer = AutoTokenizer.from_pretrained(model_path)
if "moe" in model_name.lower() or "A14B" in model_name.lower():
from llava.model.language_model.llava_qwen_moe import LlavaQwenMoeConfig
if overwrite_config is not None:
llava_cfg = LlavaQwenMoeConfig.from_pretrained(model_path)
rank0_print(f"Overwriting config with {overwrite_config}")
for k, v in overwrite_config.items():
setattr(llava_cfg, k, v)
model = LlavaQwenMoeForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, attn_implementation=attn_implementation, config=llava_cfg, **kwargs)
else:
model = LlavaQwenMoeForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, attn_implementation=attn_implementation, **kwargs)
elif "flash" in model_name.lower():
from llava.model.language_model.llava_qwen_flash import LlavaQwenConfig_Flash
if overwrite_config is not None:
llava_cfg = LlavaQwenConfig_Flash.from_pretrained(model_path)
rank0_print(f"Overwriting config with {overwrite_config}")
for k, v in overwrite_config.items():
setattr(llava_cfg, k, v)
model = LlavaQwenForCausalLM_Flash.from_pretrained(model_path, low_cpu_mem_usage=True, attn_implementation=attn_implementation, config=llava_cfg, **kwargs)
else:
model = LlavaQwenForCausalLM_Flash.from_pretrained(model_path, low_cpu_mem_usage=True, attn_implementation=attn_implementation, **kwargs)
else:
from llava.model.language_model.llava_qwen import LlavaQwenConfig
if overwrite_config is not None:
llava_cfg = LlavaQwenConfig.from_pretrained(model_path)
rank0_print(f"Overwriting config with {overwrite_config}")
for k, v in overwrite_config.items():
setattr(llava_cfg, k, v)
model = LlavaQwenForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, attn_implementation=attn_implementation, config=llava_cfg, **kwargs)
else:
model = LlavaQwenForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, attn_implementation=attn_implementation, **kwargs)
elif "internlm2" in model_name.lower():
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
from llava.model.language_model.llava_internlm2 import LlavaInternLM2Config
if overwrite_config is not None:
llava_cfg = LlavaInternLM2Config.from_pretrained(model_path, trust_remote_code=True)
rank0_print(f"Overwriting config with {overwrite_config}")
for k, v in overwrite_config.items():
setattr(llava_cfg, k, v)
model = LlavaInternLM2ForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, attn_implementation=attn_implementation, config=llava_cfg, trust_remote_code=True, **kwargs)
else:
model = LlavaInternLM2ForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, trust_remote_code=True, attn_implementation=attn_implementation, **kwargs)
elif "gemma" in model_name.lower():
raise NotImplementedError("I don't like it")
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)
cfg_pretrained = AutoConfig.from_pretrained(model_path)
model = LlavaGemmaForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, config=cfg_pretrained, attn_implementation=attn_implementation, **kwargs)
else:
# 默认用qwen
try:
tokenizer = AutoTokenizer.from_pretrained(model_path)
if "moe" in model_name.lower() or "A14B" in model_name.lower():
from llava.model.language_model.llava_qwen_moe import LlavaQwenMoeConfig
if overwrite_config is not None:
llava_cfg = LlavaQwenMoeConfig.from_pretrained(model_path)
rank0_print(f"Overwriting config with {overwrite_config}")
for k, v in overwrite_config.items():
setattr(llava_cfg, k, v)
model = LlavaQwenMoeForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, attn_implementation=attn_implementation, config=llava_cfg, **kwargs)
else:
model = LlavaQwenMoeForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, attn_implementation=attn_implementation, **kwargs)
elif "flash" in model_name.lower():
from llava.model.language_model.llava_qwen_flash import LlavaQwenConfig_Flash
if overwrite_config is not None:
llava_cfg = LlavaQwenConfig_Flash.from_pretrained(model_path)
rank0_print(f"Overwriting config with {overwrite_config}")
for k, v in overwrite_config.items():
setattr(llava_cfg, k, v)
model = LlavaQwenForCausalLM_Flash.from_pretrained(model_path, low_cpu_mem_usage=True, attn_implementation=attn_implementation, config=llava_cfg, **kwargs)
else:
model = LlavaQwenForCausalLM_Flash.from_pretrained(model_path, low_cpu_mem_usage=True, attn_implementation=attn_implementation, **kwargs)
elif "fastv" in model_name.lower():
from llava.model.language_model.llava_qwen_fastv import LlavaQwenConfig_FastV
if overwrite_config is not None:
llava_cfg = LlavaQwenConfig_FastV.from_pretrained(model_path)
rank0_print(f"Overwriting config with {overwrite_config}")
for k, v in overwrite_config.items():
setattr(llava_cfg, k, v)
model = LlavaQwenForCausalLM_FastV.from_pretrained(model_path, low_cpu_mem_usage=True, attn_implementation=attn_implementation, config=llava_cfg, **kwargs)
else:
model = LlavaQwenForCausalLM_FastV.from_pretrained(model_path, low_cpu_mem_usage=True, attn_implementation=attn_implementation, **kwargs)
else:
from llava.model.language_model.llava_qwen import LlavaQwenConfig
if overwrite_config is not None:
llava_cfg = LlavaQwenConfig.from_pretrained(model_path)
rank0_print(f"Overwriting config with {overwrite_config}")
for k, v in overwrite_config.items():
setattr(llava_cfg, k, v)
model = LlavaQwenForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, attn_implementation=attn_implementation, config=llava_cfg, **kwargs)
else:
model = LlavaQwenForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, attn_implementation=attn_implementation, **kwargs)
except:
raise ValueError(f"Model {model_name} not supported")
# try:
# from llava.model.language_model.llava_llama import LlavaConfig
# tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)
# if customized_config is None:
# llava_cfg = LlavaConfig.from_pretrained(model_path)
# if "v1.5" in model_path.lower():
# llava_cfg.delay_load = True # a workaround for correctly loading v1.5 models
# else:
# llava_cfg = customized_config
# if overwrite_config is not None:
# rank0_print(f"Overwriting config with {overwrite_config}")
# for k, v in overwrite_config.items():
# setattr(llava_cfg, k, v)
# model = LlavaLlamaForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, attn_implementation=attn_implementation, config=llava_cfg, **kwargs)
# except:
# raise ValueError(f"Model {model_name} not supported")
else:
NotImplementedError("I don't want language model only.")
# Load language model
if model_base is not None:
# PEFT model
from peft import PeftModel
tokenizer = AutoTokenizer.from_pretrained(model_base, use_fast=False)
model = AutoModelForCausalLM.from_pretrained(model_base, torch_dtype=torch.float16, low_cpu_mem_usage=True, device_map="auto")
print(f"Loading LoRA weights from {model_path}")
model = PeftModel.from_pretrained(model, model_path)
print(f"Merging weights")
model = model.merge_and_unload()
print("Convert to FP16...")
model.to(torch.float16)
else:
use_fast = False
if "mpt" in model_name.lower().replace("prompt", ""):
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, trust_remote_code=True, **kwargs)
else:
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)
model = AutoModelForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, **kwargs)
rank0_print(f"Model Class: {model.__class__.__name__}")
image_processor = None
if "llava" in model_name.lower() or is_multimodal:
mm_use_im_start_end = getattr(model.config, "mm_use_im_start_end", False)
mm_use_im_patch_token = getattr(model.config, "mm_use_im_patch_token", True)
if mm_use_im_patch_token:
tokenizer.add_tokens([DEFAULT_IMAGE_PATCH_TOKEN], special_tokens=True)
if mm_use_im_start_end:
tokenizer.add_tokens([DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN], special_tokens=True)
model.resize_token_embeddings(len(tokenizer))
vision_tower = model.get_vision_tower()
if not vision_tower.is_loaded:
vision_tower.load_model(device_map=device_map)
if device_map != "auto":
vision_tower.to(device="cuda", dtype=torch.float16)
image_processor = vision_tower.image_processor
if hasattr(model.config, "max_sequence_length"):
context_len = model.config.max_sequence_length
elif hasattr(model.config, "max_position_embeddings"):
context_len = model.config.max_position_embeddings
elif hasattr(model.config, "tokenizer_model_max_length"):
context_len = model.config.tokenizer_model_max_length
else:
context_len = 2048
return tokenizer, model, image_processor, context_len
================================================
FILE: llava-train_videochat/llava/model/consolidate.py
================================================
"""
Usage:
python3 -m llava.model.consolidate --src ~/model_weights/llava-7b --dst ~/model_weights/llava-7b_consolidate
"""
import argparse
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from llava.model import *
from llava.model.utils import auto_upgrade
def consolidate_ckpt(src_path, dst_path):
print("Loading model")
auto_upgrade(src_path)
src_model = AutoModelForCausalLM.from_pretrained(src_path, torch_dtype=torch.float16, low_cpu_mem_usage=True)
src_tokenizer = AutoTokenizer.from_pretrained(src_path, use_fast=False)
src_model.save_pretrained(dst_path)
src_tokenizer.save_pretrained(dst_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--src", type=str, required=True)
parser.add_argument("--dst", type=str, required=True)
args = parser.parse_args()
consolidate_ckpt(args.src, args.dst)
================================================
FILE: llava-train_videochat/llava/model/language_model/llava_qwen.py
================================================
# Copyright 2024 Hao Zhang
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List, Optional, Tuple, Union, Dict
import torch
import torch.nn as nn
from torch.nn import CrossEntropyLoss
import transformers
from transformers import AutoConfig, AutoModelForCausalLM, LlamaConfig, LlamaModel, LlamaForCausalLM
from transformers.modeling_outputs import CausalLMOutputWithPast
from transformers.generation.utils import GenerateOutput
# from ...constants import IGNORE_INDEX, IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
from llava.model.llava_arch import LlavaMetaModel, LlavaMetaForCausalLM
from transformers import Qwen2Config, Qwen2Model, Qwen2ForCausalLM
# from .qwen.modeling_qwen import QWenLMHeadModel, QWenModel
# from .qwen.configuration_qwen import QWenConfig
class LlavaQwenConfig(Qwen2Config):
model_type = "llava_qwen"
class LlavaQwenModel(LlavaMetaModel, Qwen2Model):
config_class = LlavaQwenConfig
def __init__(self, config: Qwen2Config):
super(LlavaQwenModel, self).__init__(config)
class LlavaQwenForCausalLM(Qwen2ForCausalLM, LlavaMetaForCausalLM):
config_class = LlavaQwenConfig
def __init__(self, config):
# super(Qwen2ForCausalLM, self).__init__(config)
Qwen2ForCausalLM.__init__(self, config)
config.model_type = "llava_qwen"
config.rope_scaling = None
self.model = LlavaQwenModel(config)
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
# Initialize weights and apply final processing
self.post_init()
def get_model(self):
return self.model
def forward(
self,
input_ids: torch.LongTensor = None,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None,
past_key_values: Optional[List[torch.FloatTensor]] = None,
inputs_embeds: Optional[torch.FloatTensor] = None,
labels: Optional[torch.LongTensor] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
images: Optional[torch.FloatTensor] = None,
image_sizes: Optional[List[List[int]]] = None,
return_dict: Optional[bool] = None,
modalities: Optional[List[str]] = ["image"],
dpo_forward: Optional[bool] = False,
cache_position=None,
) -> Union[Tuple, CausalLMOutputWithPast]:
# print("images[0].shape:", images[0].shape)
if inputs_embeds is None:
(input_ids, position_ids, attention_mask, past_key_values, inputs_embeds, labels) = self.prepare_inputs_labels_for_multimodal(input_ids, position_ids, attention_mask, past_key_values, labels, images, modalities, image_sizes)
# print("inputs_embeds.shape:", inputs_embeds.shape)
if dpo_forward:
outputs = self.model(
input_ids=input_ids,
attention_mask=attention_mask,
position_ids=position_ids,
past_key_values=past_key_values,
inputs_embeds=inputs_embeds,
use_cache=use_cache,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
)
hidden_states = outputs[0]
logits = self.lm_head(hidden_states)
return logits, labels
else:
return super().forward(
input_ids=input_ids,
attention_mask=attention_mask,
position_ids=position_ids,
past_key_values=past_key_values,
inputs_embeds=inputs_embeds,
labels=labels,
use_cache=use_cache,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
)
@torch.no_grad()
def generate(
self,
inputs: Optional[torch.Tensor] = None,
images: Optional[torch.Tensor] = None,
image_sizes: Optional[torch.Tensor] = None,
modalities: Optional[List[str]] = ["image"],
**kwargs,
) -> Union[GenerateOutput, torch.LongTensor]:
position_ids = kwargs.pop("position_ids", None)
attention_mask = kwargs.pop("attention_mask", None)
if "inputs_embeds" in kwargs:
raise NotImplementedError("`inputs_embeds` is not supported")
if images is not None:
(inputs, position_ids, attention_mask, _, inputs_embeds, _) = self.prepare_inputs_labels_for_multimodal(inputs, position_ids, attention_mask, None, None, images, modalities, image_sizes=image_sizes)
else:
inputs_embeds = self.get_model().embed_tokens(inputs)
return super().generate(position_ids=position_ids, attention_mask=attention_mask, inputs_embeds=inputs_embeds, **kwargs)
def prepare_inputs_for_generation(self, input_ids, past_key_values=None, inputs_embeds=None, **kwargs):
images = kwargs.pop("images", None)
image_sizes = kwargs.pop("image_sizes", None)
inputs = super().prepare_inputs_for_generation(input_ids, past_key_values=past_key_values, inputs_embeds=inputs_embeds, **kwargs)
if images is not None:
inputs["images"] = images
if image_sizes is not None:
inputs["image_sizes"] = image_sizes
return inputs
AutoConfig.register("llava_qwen", LlavaQwenConfig)
AutoModelForCausalLM.register(LlavaQwenConfig, LlavaQwenForCausalLM)
================================================
FILE: llava-train_videochat/llava/model/language_model/llava_qwen_flash.py
================================================
# Copyright 2024 Hao Zhang
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List, Optional, Tuple, Union, Dict
import torch
import torch.nn as nn
from torch.nn import CrossEntropyLoss
import transformers
from transformers import AutoConfig, AutoModelForCausalLM, LlamaConfig, LlamaModel, LlamaForCausalLM
from transformers.modeling_outputs import CausalLMOutputWithPast
from transformers.generation.utils import GenerateOutput
# from ...constants import IGNORE_INDEX, IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
from llava.model.llava_arch import LlavaMetaModel, LlavaMetaForCausalLM
from transformers import Qwen2Config
# from .qwen.modeling_qwen import QWenLMHeadModel, QWenModel
# from .qwen.configuration_qwen import QWenConfig
from .modeling_qwen2_flash import Qwen2Model_Flash, Qwen2ForCausalLM_Flash
class LlavaQwenConfig_Flash(Qwen2Config):
model_type = "llava_qwen_flash"
class LlavaQwenModel_Flash(LlavaMetaModel, Qwen2Model_Flash):
config_class = LlavaQwenConfig_Flash
def __init__(self, config: Qwen2Config):
super(LlavaQwenModel_Flash, self).__init__(config)
class LlavaQwenForCausalLM_Flash(Qwen2ForCausalLM_Flash, LlavaMetaForCausalLM):
config_class = LlavaQwenConfig_Flash
def __init__(self, config):
# super(Qwen2ForCausalLM, self).__init__(config)
Qwen2ForCausalLM_Flash.__init__(self, config)
config.model_type = "llava_qwen_flash"
# config.rope_scaling = None
self.model = LlavaQwenModel_Flash(config)
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
# Initialize weights and apply final processing
self.post_init()
def get_model(self):
return self.model
def forward(
self,
input_ids: torch.LongTensor = None,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None,
past_key_values: Optional[List[torch.FloatTensor]] = None,
inputs_embeds: Optional[torch.FloatTensor] = None,
labels: Optional[torch.LongTensor] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
images: Optional[torch.FloatTensor] = None,
image_sizes: Optional[List[List[int]]] = None,
return_dict: Optional[bool] = None,
modalities: Optional[List[str]] = ["image"],
dpo_forward: Optional[bool] = False,
cache_position=None,
) -> Union[Tuple, CausalLMOutputWithPast]:
if inputs_embeds is None:
(input_ids, position_ids, attention_mask, past_key_values, inputs_embeds, labels) = self.prepare_inputs_labels_for_multimodal(input_ids, position_ids, attention_mask, past_key_values, labels, images, modalities, image_sizes)
# print("inputs_embeds.shape:", inputs_embeds.shape)
if dpo_forward:
outputs, labels = self.model(
input_ids=input_ids,
attention_mask=attention_mask,
position_ids=position_ids,
past_key_values=past_key_values,
inputs_embeds=inputs_embeds,
use_cache=use_cache,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
labels=labels
)
hidden_states = outputs[0]
logits = self.lm_head(hidden_states)
return logits, labels
else:
return super().forward(
input_ids=input_ids,
attention_mask=attention_mask,
position_ids=position_ids,
past_key_values=past_key_values,
inputs_embeds=inputs_embeds,
labels=labels,
use_cache=use_cache,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
)
@torch.no_grad()
def generate(
self,
inputs: Optional[torch.Tensor] = None,
images: Optional[torch.Tensor] = None,
image_sizes: Optional[torch.Tensor] = None,
modalities: Optional[List[str]] = ["image"],
**kwargs,
) -> Union[GenerateOutput, torch.LongTensor]:
position_ids = kwargs.pop("position_ids", None)
attention_mask = kwargs.pop("attention_mask", None)
if "inputs_embeds" in kwargs:
raise NotImplementedError("`inputs_embeds` is not supported")
if images is not None:
(inputs, position_ids, attention_mask, _, inputs_embeds, _) = self.prepare_inputs_labels_for_multimodal(inputs, position_ids, attention_mask, None, None, images, modalities, image_sizes=image_sizes)
else:
self.model.image_token_posi = [-1]
self.model.prompt_len = None
self.model.image_tokens = [0]
inputs_embeds = self.get_model().embed_tokens(inputs)
return super().generate(position_ids=position_ids, attention_mask=attention_mask, inputs_embeds=inputs_embeds, **kwargs)
def prepare_inputs_for_generation(self, input_ids, past_key_values=None, inputs_embeds=None, **kwargs):
images = kwargs.pop("images", None)
image_sizes = kwargs.pop("image_sizes", None)
inputs = super().prepare_inputs_for_generation(input_ids, past_key_values=past_key_values, inputs_embeds=inputs_embeds, **kwargs)
if images is not None:
inputs["images"] = images
if image_sizes is not None:
inputs["image_sizes"] = image_sizes
return inputs
AutoConfig.register("llava_qwen_flash", LlavaQwenConfig_Flash)
AutoModelForCausalLM.register(LlavaQwenConfig_Flash, LlavaQwenForCausalLM_Flash)
================================================
FILE: llava-train_videochat/llava/model/language_model/modeling_qwen2_flash.py
================================================
# coding=utf-8
# transformers==4.39.2 or 4.40.1 NOTE
# Copyright 2024 The Qwen team, Alibaba Group and the HuggingFace Inc. team. All rights reserved.
#
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
# and OPT implementations in this library. It has been modified from its
# original forms to accommodate minor architectural differences compared
# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" PyTorch Qwen2 model."""
import inspect
import math
import warnings
from typing import List, Optional, Tuple, Union
import torch
import torch.nn.functional as F
import torch.utils.checkpoint
from torch import nn
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
from transformers.activations import ACT2FN
from transformers.cache_utils import Cache, DynamicCache
from transformers.modeling_attn_mask_utils import _prepare_4d_causal_attention_mask, _prepare_4d_causal_attention_mask_for_sdpa
from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast, SequenceClassifierOutputWithPast
from transformers.modeling_utils import PreTrainedModel
from transformers.utils import (
add_start_docstrings,
add_start_docstrings_to_model_forward,
is_flash_attn_2_available,
is_flash_attn_greater_or_equal_2_10,
logging,
replace_return_docstrings,
)
from transformers.models.qwen2.configuration_qwen2 import Qwen2Config
from llava.constants import IGNORE_INDEX
if is_flash_attn_2_available():
from flash_attn import flash_attn_func, flash_attn_varlen_func
from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input # noqa
_flash_supports_window_size = "window_size" in list(inspect.signature(flash_attn_func).parameters)
logger = logging.get_logger(__name__)
_CHECKPOINT_FOR_DOC = "Qwen/Qwen2-7B-beta"
_CONFIG_FOR_DOC = "Qwen2Config"
QWEN2_PRETRAINED_MODEL_ARCHIVE_LIST = [
"Qwen/Qwen2-7B-beta",
# See all Qwen2 models at https://huggingface.co/models?filter=qwen2
]
# Copied from transformers.models.llama.modeling_llama._get_unpad_data
def _get_unpad_data(attention_mask):
seqlens_in_batch = attention_mask.sum(dim=-1, dtype=torch.int32)
indices = torch.nonzero(attention_mask.flatten(), as_tuple=False).flatten()
max_seqlen_in_batch = seqlens_in_batch.max().item()
cu_seqlens = F.pad(torch.cumsum(seqlens_in_batch, dim=0, dtype=torch.int32), (1, 0))
return (
indices,
cu_seqlens,
max_seqlen_in_batch,
)
# Copied from transformers.models.llama.modeling_llama.LlamaRMSNorm with Llama->Qwen2
class Qwen2RMSNorm(nn.Module):
def __init__(self, hidden_size, eps=1e-6):
"""
Qwen2RMSNorm is equivalent to T5LayerNorm
"""
super().__init__()
self.weight = nn.Parameter(torch.ones(hidden_size))
self.variance_epsilon = eps
def forward(self, hidden_states):
input_dtype = hidden_states.dtype
hidden_states = hidden_states.to(torch.float32)
variance = hidden_states.pow(2).mean(-1, keepdim=True)
hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
return self.weight * hidden_states.to(input_dtype)
# Copied from transformers.models.mistral.modeling_mistral.MistralRotaryEmbedding with Mistral->Qwen2
class Qwen2RotaryEmbedding(nn.Module):
def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None):
super().__init__()
self.dim = dim
self.max_position_embeddings = max_position_embeddings
self.base = base
inv_freq = 1.0 / (self.base ** (torch.arange(0, self.dim, 2, dtype=torch.int64).float().to(device) / self.dim))
self.register_buffer("inv_freq", inv_freq, persistent=False)
# Build here to make `torch.jit.trace` work.
self._set_cos_sin_cache(
seq_len=max_position_embeddings, device=self.inv_freq.device, dtype=torch.get_default_dtype()
)
def _set_cos_sin_cache(self, seq_len, device, dtype):
self.max_seq_len_cached = seq_len
t = torch.arange(self.max_seq_len_cached, device=device, dtype=torch.int64).type_as(self.inv_freq)
freqs = torch.outer(t, self.inv_freq)
# Different from paper, but it uses a different permutation in order to obtain the same calculation
emb = torch.cat((freqs, freqs), dim=-1)
self.register_buffer("cos_cached", emb.cos().to(dtype), persistent=False)
self.register_buffer("sin_cached", emb.sin().to(dtype), persistent=False)
def forward(self, x, seq_len=None):
# x: [bs, num_attention_heads, seq_len, head_size]
if seq_len > self.max_seq_len_cached:
self._set_cos_sin_cache(seq_len=seq_len, device=x.device, dtype=x.dtype)
return (
self.cos_cached[:seq_len].to(dtype=x.dtype),
self.sin_cached[:seq_len].to(dtype=x.dtype),
)
# Copied from transformers.models.llama.modeling_llama.rotate_half
def rotate_half(x):
"""Rotates half the hidden dims of the input."""
x1 = x[..., : x.shape[-1] // 2]
x2 = x[..., x.shape[-1] // 2 :]
return torch.cat((-x2, x1), dim=-1)
# Copied from transformers.models.mistral.modeling_mistral.apply_rotary_pos_emb
def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1):
"""Applies Rotary Position Embedding to the query and key tensors.
Args:
q (`torch.Tensor`): The query tensor.
k (`torch.Tensor`): The key tensor.
cos (`torch.Tensor`): The cosine part of the rotary embedding.
sin (`torch.Tensor`): The sine part of the rotary embedding.
position_ids (`torch.Tensor`):
The position indices of the tokens corresponding to the query and key tensors. For example, this can be
used to pass offsetted position ids when working with a KV-cache.
unsqueeze_dim (`int`, *optional*, defaults to 1):
The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
Returns:
`tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
"""
cos = cos[position_ids].unsqueeze(unsqueeze_dim)
sin = sin[position_ids].unsqueeze(unsqueeze_dim)
q_embed = (q * cos) + (rotate_half(q) * sin)
k_embed = (k * cos) + (rotate_half(k) * sin)
return q_embed, k_embed
# Copied from transformers.models.mistral.modeling_mistral.MistralMLP with Mistral->Qwen2
class Qwen2MLP(nn.Module):
def __init__(self, config):
super().__init__()
self.config = config
self.hidden_size = config.hidden_size
self.intermediate_size = config.intermediate_size
self.gate_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False)
self.up_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False)
self.down_proj = nn.Linear(self.intermediate_size, self.hidden_size, bias=False)
self.act_fn = ACT2FN[config.hidden_act]
def forward(self, x):
return self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x))
# Copied from transformers.models.llama.modeling_llama.repeat_kv
def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
"""
This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
"""
batch, num_key_value_heads, slen, head_dim = hidden_states.shape
if n_rep == 1:
return hidden_states
hidden_states = hidden_states[:, :, None, :, :].expand(batch, num_key_value_heads, n_rep, slen, head_dim)
return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim)
class Qwen2Attention(nn.Module):
"""
Multi-headed attention from 'Attention Is All You Need' paper. Modified to use sliding window attention: Longformer
and "Generating Long Sequences with Sparse Transformers".
"""
def __init__(self, config: Qwen2Config, layer_idx: Optional[int] = None):
super().__init__()
self.config = config
self.layer_idx = layer_idx
if layer_idx is None:
logger.warning_once(
f"Instantiating {self.__class__.__name__} without passing `layer_idx` is not recommended and will "
"to errors during the forward call, if caching is used. Please make sure to provide a `layer_idx` "
"when creating this class."
)
self.hidden_size = config.hidden_size
self.num_heads = config.num_attention_heads
self.head_dim = self.hidden_size // self.num_heads
self.num_key_value_heads = config.num_key_value_heads
self.num_key_value_groups = self.num_heads // self.num_key_value_heads
self.max_position_embeddings = config.max_position_embeddings
self.rope_theta = config.rope_theta
self.is_causal = True
self.attention_dropout = config.attention_dropout
if (self.head_dim * self.num_heads) != self.hidden_size:
raise ValueError(
f"hidden_size must be divisible by num_heads (got `hidden_size`: {self.hidden_size}"
f" and `num_heads`: {self.num_heads})."
)
self.q_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=True)
self.k_proj = nn.Linear(self.hidden_size, self.num_key_value_heads * self.head_dim, bias=True)
self.v_proj = nn.Linear(self.hidden_size, self.num_key_value_heads * self.head_dim, bias=True)
self.o_proj = nn.Linear(self.num_heads * self.head_dim, self.hidden_size, bias=False)
self.rotary_emb = Qwen2RotaryEmbedding(
self.head_dim,
max_position_embeddings=self.max_position_embeddings,
base=self.rope_theta,
)
def forward(
self,
hidden_states: torch.Tensor,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None,
past_key_value: Optional[Cache] = None,
output_attentions: bool = False,
use_cache: bool = False,
**kwargs,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
if "padding_mask" in kwargs:
warnings.warn(
"Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
)
bsz, q_len, _ = hidden_states.size()
query_states = self.q_proj(hidden_states)
key_states = self.k_proj(hidden_states)
value_states = self.v_proj(hidden_states)
query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
kv_seq_len = key_states.shape[-2]
if past_key_value is not None:
if self.layer_idx is None:
raise ValueError(
f"The cache structure has changed since version v4.36. If you are using {self.__class__.__name__} "
"for auto-regressive decoding with k/v caching, please make sure to initialize the attention class "
"with a layer index."
)
kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx)
cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids)
if past_key_value is not None:
cache_kwargs = {"sin": sin, "cos": cos} # Specific to RoPE models
key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs)
# repeat k/v heads if n_kv_heads < n_heads
key_states = repeat_kv(key_states, self.num_key_value_groups)
value_states = repeat_kv(value_states, self.num_key_value_groups)
attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim)
if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len):
raise ValueError(
f"Attention weights should be of size {(bsz, self.num_heads, q_len, kv_seq_len)}, but is"
f" {attn_weights.size()}"
)
if attention_mask is not None:
if attention_mask.size() != (bsz, 1, q_len, kv_seq_len):
raise ValueError(
f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}"
)
attn_weights = attn_weights + attention_mask
# upcast attention to fp32
attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype)
attn_weights = nn.functional.dropout(attn_weights, p=self.attention_dropout, training=self.training)
attn_output = torch.matmul(attn_weights, value_states)
if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim):
raise ValueError(
f"`attn_output` should be of size {(bsz, self.num_heads, q_len, self.head_dim)}, but is"
f" {attn_output.size()}"
)
attn_output = attn_output.transpose(1, 2).contiguous()
attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
attn_output = self.o_proj(attn_output)
if not output_attentions:
attn_weights = None
return attn_output, attn_weights, past_key_value
class Qwen2FlashAttention2(Qwen2Attention):
"""
Qwen2 flash attention module, following Qwen2 attention module. This module inherits from `Qwen2Attention`
as the weights of the module stays untouched. The only required change would be on the forward pass
where it needs to correctly call the public API of flash attention and deal with padding tokens
in case the input contains any of them. Additionally, for sliding window attention, we apply SWA only to the bottom
config.max_window_layers layers.
"""
# Copied from transformers.models.llama.modeling_llama.LlamaFlashAttention2.__init__
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# TODO: Should be removed once Flash Attention for RoCm is bumped to 2.1.
# flash_attn<2.1 generates top-left aligned causal mask, while what is needed here is bottom-right alignement, that was made default for flash_attn>=2.1. This attribute is used to handle this difference. Reference: https://github.com/Dao-AILab/flash-attention/releases/tag/v2.1.0.
# Beware that with flash_attn<2.1, using q_seqlen != k_seqlen (except for the case q_seqlen == 1) produces a wrong mask (top-left).
self._flash_attn_uses_top_left_mask = not is_flash_attn_greater_or_equal_2_10()
def forward(
self,
hidden_states: torch.Tensor,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None,
past_key_value: Optional[Cache] = None,
output_attentions: bool = False,
use_cache: bool = False,
**kwargs,
):
if "padding_mask" in kwargs:
warnings.warn(
"Passing `padding_mask` is deprecated and will
gitextract_mlwsex56/
├── .gitattributes
├── LICENSE
├── README.md
├── llava-train_videochat/
│ ├── .dockerignore
│ ├── .editorconfig
│ ├── .gitattributes
│ ├── .gitignore
│ ├── LICENSE
│ ├── README.md
│ ├── cog.yaml
│ ├── data/
│ │ ├── ablation_short-long_mix_sft.yaml
│ │ ├── stage1_init_connector_iv1m.yaml
│ │ ├── stage2_short_pretrain_iv6m.yaml
│ │ ├── stage3_short-long_mix_sft.yaml
│ │ └── stage4_highres_postsft.yaml
│ ├── llava/
│ │ ├── __init__.py
│ │ ├── constants.py
│ │ ├── conversation.py
│ │ ├── dist_utils.py
│ │ ├── mm_utils.py
│ │ ├── model/
│ │ │ ├── __init__.py
│ │ │ ├── apply_delta.py
│ │ │ ├── builder.py
│ │ │ ├── consolidate.py
│ │ │ ├── language_model/
│ │ │ │ ├── llava_qwen.py
│ │ │ │ ├── llava_qwen_flash.py
│ │ │ │ └── modeling_qwen2_flash.py
│ │ │ ├── llava_arch.py
│ │ │ ├── make_delta.py
│ │ │ ├── multimodal_encoder/
│ │ │ │ ├── builder.py
│ │ │ │ ├── clip_encoder.py
│ │ │ │ ├── internvideo2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── flash_attention_class.py
│ │ │ │ │ ├── pos_embed.py
│ │ │ │ │ └── vit_scale_clean.py
│ │ │ │ ├── internvideo2_encoder.py
│ │ │ │ ├── siglip_encoder.py
│ │ │ │ ├── umt/
│ │ │ │ │ └── vit.py
│ │ │ │ └── umt_encoder.py
│ │ │ ├── multimodal_projector/
│ │ │ │ ├── builder.py
│ │ │ │ └── tome16_mlp_hd64.py
│ │ │ └── utils.py
│ │ ├── serialize_utils.py
│ │ ├── train/
│ │ │ ├── llava_trainer.py
│ │ │ ├── llava_trainer_eval.py
│ │ │ ├── train.py
│ │ │ └── train_mem.py
│ │ ├── utils.py
│ │ └── video_utils.py
│ ├── pyproject.toml
│ ├── requirements.txt
│ └── scripts/
│ ├── train/
│ │ ├── stage1-init_connector/
│ │ │ ├── stage1_internvideo2_tome16_res224_qwen7b.sh
│ │ │ ├── stage1_umt_tome16_res224_qwen7b.sh
│ │ │ └── stage1_umt_tome16_res448_qwen1_5b.sh
│ │ ├── stage2-visual_pretraining/
│ │ │ ├── stage2_internvideo2_tome16_res224_qwen_7b.sh
│ │ │ ├── stage2_umt_tome16_res224_qwen_7b.sh
│ │ │ └── stage2_umt_tome16_res448_qwen_1_5b.sh
│ │ ├── stage3-video_sft/
│ │ │ ├── stage3_internvideo2_tome16_res224_qwen_7b.sh
│ │ │ ├── stage3_umt_tome16_res224_qwen_7b.sh
│ │ │ └── stage3_umt_tome16_res448_qwen_1_5b.sh
│ │ └── stage4_highres_postft/
│ │ └── stage4_umt_tome16_res448_qwen_7b.sh
│ ├── zero1.json
│ ├── zero2.json
│ ├── zero2_fused_adamw.json
│ ├── zero2_offload.json
│ ├── zero3.json
│ ├── zero3_offload.json
│ └── zero3pp.json
├── lmms-eval_videochat/
│ ├── .gitignore
│ ├── .pre-commit-config.yaml
│ ├── LICENSE
│ ├── README.md
│ ├── docs/
│ │ ├── README.md
│ │ ├── commands.md
│ │ ├── current_tasks.md
│ │ ├── model_guide.md
│ │ ├── run_examples.md
│ │ └── task_guide.md
│ ├── eval_annotations/
│ │ ├── LVBench/
│ │ │ ├── README.md
│ │ │ └── json/
│ │ │ ├── lvbench_clean.json
│ │ │ ├── lvbench_clean_cartoon.json
│ │ │ ├── lvbench_clean_documentary.json
│ │ │ ├── lvbench_clean_live.json
│ │ │ ├── lvbench_clean_selfmedia.json
│ │ │ ├── lvbench_clean_sport.json
│ │ │ └── lvbench_clean_tv.json
│ │ ├── LongVideoBench/
│ │ │ ├── README.md
│ │ │ ├── lvb_test_wo_gt.json
│ │ │ ├── lvb_val.json
│ │ │ ├── test-00000-of-00001.parquet
│ │ │ └── validation-00000-of-00001.parquet
│ │ ├── MLVU_MC/
│ │ │ ├── README.md
│ │ │ └── json/
│ │ │ ├── 1_plotQA.json
│ │ │ ├── 2_needle.json
│ │ │ ├── 3_ego.json
│ │ │ ├── 4_count.json
│ │ │ ├── 5_order.json
│ │ │ ├── 6_anomaly_reco.json
│ │ │ └── 7_topic_reasoning.json
│ │ ├── MVBench/
│ │ │ ├── README.md
│ │ │ └── json/
│ │ │ ├── action_antonym.json
│ │ │ ├── action_count.json
│ │ │ ├── action_localization.json
│ │ │ ├── action_prediction.json
│ │ │ ├── action_sequence.json
│ │ │ ├── character_order.json
│ │ │ ├── counterfactual_inference.json
│ │ │ ├── egocentric_navigation.json
│ │ │ ├── episodic_reasoning.json
│ │ │ ├── fine_grained_action.json
│ │ │ ├── fine_grained_pose.json
│ │ │ ├── moving_attribute.json
│ │ │ ├── moving_count.json
│ │ │ ├── moving_direction.json
│ │ │ ├── object_existence.json
│ │ │ ├── object_interaction.json
│ │ │ ├── object_shuffle.json
│ │ │ ├── scene_transition.json
│ │ │ ├── state_change.json
│ │ │ └── unexpected_action.json
│ │ ├── PerceptionTest/
│ │ │ ├── .gitattributes
│ │ │ └── README.md
│ │ ├── Temporal_Grounding/
│ │ │ ├── README.md
│ │ │ └── json/
│ │ │ └── temporal_grounding_charades.json
│ │ └── Video-MME/
│ │ ├── README.md
│ │ └── videomme/
│ │ └── test-00000-of-00001.parquet
│ ├── lmms_eval/
│ │ ├── __init__.py
│ │ ├── __main__.py
│ │ ├── api/
│ │ │ ├── __init__.py
│ │ │ ├── filter.py
│ │ │ ├── instance.py
│ │ │ ├── metrics.py
│ │ │ ├── model.py
│ │ │ ├── registry.py
│ │ │ ├── samplers.py
│ │ │ └── task.py
│ │ ├── evaluator.py
│ │ ├── filters/
│ │ │ ├── __init__.py
│ │ │ ├── decontamination.py
│ │ │ ├── extraction.py
│ │ │ ├── selection.py
│ │ │ └── transformation.py
│ │ ├── logging_utils.py
│ │ ├── models/
│ │ │ ├── __init__.py
│ │ │ └── videochat_flash.py
│ │ ├── tasks/
│ │ │ ├── __init__.py
│ │ │ ├── _task_utils/
│ │ │ │ ├── file_utils.py
│ │ │ │ ├── gpt_eval_utils.py
│ │ │ │ ├── video_loader.py
│ │ │ │ └── vqa_eval_metric.py
│ │ │ ├── longvideobench/
│ │ │ │ ├── longvideobench_test_v.yaml
│ │ │ │ ├── longvideobench_val_i.yaml
│ │ │ │ ├── longvideobench_val_v.yaml
│ │ │ │ └── utils.py
│ │ │ ├── lvbench/
│ │ │ │ ├── _default_template.yaml
│ │ │ │ ├── lvbench.yaml
│ │ │ │ ├── lvbench_cartoon.yaml
│ │ │ │ ├── lvbench_documentary.yaml
│ │ │ │ ├── lvbench_live.yaml
│ │ │ │ ├── lvbench_selfmedia.yaml
│ │ │ │ ├── lvbench_sport.yaml
│ │ │ │ ├── lvbench_tv.yaml
│ │ │ │ └── utils.py
│ │ │ ├── mlvu_mc/
│ │ │ │ ├── _default_template.yaml
│ │ │ │ ├── mlvu_mc.yaml
│ │ │ │ ├── mlvu_mc_anomaly_reco.yaml
│ │ │ │ ├── mlvu_mc_count.yaml
│ │ │ │ ├── mlvu_mc_ego.yaml
│ │ │ │ ├── mlvu_mc_needle.yaml
│ │ │ │ ├── mlvu_mc_order.yaml
│ │ │ │ ├── mlvu_mc_plotqa.yaml
│ │ │ │ ├── mlvu_mc_topic_reasoning.yaml
│ │ │ │ └── utils.py
│ │ │ ├── mvbench/
│ │ │ │ ├── _default_template.yaml
│ │ │ │ ├── mvbench.yaml
│ │ │ │ ├── mvbench_action_antonym.yaml
│ │ │ │ ├── mvbench_action_count.yaml
│ │ │ │ ├── mvbench_action_localization.yaml
│ │ │ │ ├── mvbench_action_prediction.yaml
│ │ │ │ ├── mvbench_action_sequence.yaml
│ │ │ │ ├── mvbench_character_order.yaml
│ │ │ │ ├── mvbench_counterfactual_inference.yaml
│ │ │ │ ├── mvbench_egocentric_navigation.yaml
│ │ │ │ ├── mvbench_episodic_reasoning.yaml
│ │ │ │ ├── mvbench_fine_grained_action.yaml
│ │ │ │ ├── mvbench_fine_grained_pose.yaml
│ │ │ │ ├── mvbench_moving_attribute.yaml
│ │ │ │ ├── mvbench_moving_count.yaml
│ │ │ │ ├── mvbench_moving_direction.yaml
│ │ │ │ ├── mvbench_object_existence.yaml
│ │ │ │ ├── mvbench_object_interaction.yaml
│ │ │ │ ├── mvbench_object_shuffle.yaml
│ │ │ │ ├── mvbench_scene_transition.yaml
│ │ │ │ ├── mvbench_state_change.yaml
│ │ │ │ ├── mvbench_unexpected_action.yaml
│ │ │ │ └── utils.py
│ │ │ ├── perceptiontest/
│ │ │ │ └── val/
│ │ │ │ ├── _default_template_yaml
│ │ │ │ ├── perceptiontest_mc.yaml
│ │ │ │ └── utils.py
│ │ │ ├── temporal_grounding/
│ │ │ │ ├── _default_template.yaml
│ │ │ │ ├── charades.yaml
│ │ │ │ ├── eval_tvg.py
│ │ │ │ └── utils.py
│ │ │ └── videomme/
│ │ │ ├── utils.py
│ │ │ ├── videomme.yaml
│ │ │ └── videomme_w_subtitle.yaml
│ │ └── utils.py
│ ├── pyproject.toml
│ ├── scripts/
│ │ ├── eval_longvideobench.sh
│ │ ├── eval_lvbench.sh
│ │ ├── eval_mlvu.sh
│ │ ├── eval_mvbench.sh
│ │ ├── eval_perceptiontest_val_mc.sh
│ │ ├── eval_temporal_grounding_chardes.sh
│ │ └── eval_videomme.sh
│ ├── setup.py
│ └── videochat-flash-7B@448_eval_log_videomme.json
├── xtuner-eval_niah/
│ ├── README.md
│ ├── llava/
│ │ ├── __init__.py
│ │ ├── constants.py
│ │ ├── conversation.py
│ │ ├── dist_utils.py
│ │ ├── mm_utils.py
│ │ ├── model/
│ │ │ ├── __init__.py
│ │ │ ├── apply_delta.py
│ │ │ ├── builder.py
│ │ │ ├── consolidate.py
│ │ │ ├── language_model/
│ │ │ │ ├── llava_qwen.py
│ │ │ │ ├── llava_qwen_flash.py
│ │ │ │ └── modeling_qwen2_flash.py
│ │ │ ├── llava_arch.py
│ │ │ ├── make_delta.py
│ │ │ ├── multimodal_encoder/
│ │ │ │ ├── builder.py
│ │ │ │ ├── clip_encoder.py
│ │ │ │ ├── internvideo2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── flash_attention_class.py
│ │ │ │ │ ├── pos_embed.py
│ │ │ │ │ └── vit_scale_clean.py
│ │ │ │ ├── internvideo2_encoder.py
│ │ │ │ ├── siglip_encoder.py
│ │ │ │ ├── umt/
│ │ │ │ │ └── vit.py
│ │ │ │ └── umt_encoder.py
│ │ │ ├── multimodal_projector/
│ │ │ │ ├── builder.py
│ │ │ │ └── tome16_mlp_hd64.py
│ │ │ └── utils.py
│ │ ├── serialize_utils.py
│ │ ├── train/
│ │ │ ├── llava_trainer.py
│ │ │ ├── llava_trainer_eval.py
│ │ │ ├── train.py
│ │ │ └── train_mem.py
│ │ ├── utils.py
│ │ └── video_utils.py
│ ├── longva/
│ │ ├── __init__.py
│ │ ├── constants.py
│ │ ├── conversation.py
│ │ ├── mm_utils.py
│ │ ├── model/
│ │ │ ├── __init__.py
│ │ │ ├── apply_delta.py
│ │ │ ├── builder.py
│ │ │ ├── consolidate.py
│ │ │ ├── language_model/
│ │ │ │ ├── llava_llama.py
│ │ │ │ ├── llava_mistral.py
│ │ │ │ ├── llava_mpt.py
│ │ │ │ ├── llava_qwen.py
│ │ │ │ └── modeling_llama.py
│ │ │ ├── llava_arch.py
│ │ │ ├── make_delta.py
│ │ │ ├── multimodal_encoder/
│ │ │ │ ├── builder.py
│ │ │ │ └── clip_encoder.py
│ │ │ ├── multimodal_projector/
│ │ │ │ ├── builder.py
│ │ │ │ └── pooler_projector.py
│ │ │ ├── multimodal_resampler/
│ │ │ │ ├── builder.py
│ │ │ │ ├── masked_drop.py
│ │ │ │ ├── perceiver.py
│ │ │ │ ├── qformer.py
│ │ │ │ └── spatial_pool.py
│ │ │ └── utils.py
│ │ ├── train/
│ │ │ ├── llama_flash_attn_monkey_patch.py
│ │ │ ├── llava_trainer.py
│ │ │ ├── train.py
│ │ │ ├── train_dpo.py
│ │ │ └── train_mem.py
│ │ └── utils.py
│ ├── niah_requirements.txt
│ ├── tmp/
│ │ └── git_placeholder
│ ├── vision_niah/
│ │ ├── data/
│ │ │ ├── haystack_embeddings/
│ │ │ │ └── git_placeholder
│ │ │ ├── haystack_videos/
│ │ │ │ └── git_placeholder
│ │ │ ├── needle_embeddings/
│ │ │ │ └── git_placeholder
│ │ │ └── source_data/
│ │ │ ├── git_placeholder
│ │ │ └── niah-coco-singlehop_20.json
│ │ ├── data_multi/
│ │ │ ├── needle_embeddings/
│ │ │ │ └── git_placeholder
│ │ │ └── source_data/
│ │ │ ├── git_placeholder
│ │ │ └── niah-coco-multihop-100.json
│ │ ├── flash_eval_xtuner_multi.sh
│ │ ├── flash_eval_xtuner_single.sh
│ │ ├── log/
│ │ │ ├── s1/
│ │ │ │ └── git_placeholder
│ │ │ ├── s2/
│ │ │ │ └── git_placeholder
│ │ │ └── s3/
│ │ │ └── git_placeholder
│ │ ├── longva_eval_xtuner_multi.sh
│ │ ├── longva_eval_xtuner_single.sh
│ │ ├── model_weights/
│ │ │ └── git_placeholder
│ │ ├── multi_eval_vision_niah.py
│ │ ├── multi_produce_needle_embedding.py
│ │ ├── niah_output_multi/
│ │ │ └── git_placeholder
│ │ ├── niah_output_single/
│ │ │ └── git_placeholder
│ │ ├── produce_haystack_embedding.py
│ │ ├── single_eval_vision_niah.py
│ │ └── single_produce_needle_embedding.py
│ └── xtuner/
│ ├── __init__.py
│ ├── _lite/
│ │ ├── __init__.py
│ │ ├── accelerate/
│ │ │ ├── __init__.py
│ │ │ ├── dispatches/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── _attention.py
│ │ │ │ ├── _fused/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── layer_norm.py
│ │ │ │ │ ├── rms_norm.py
│ │ │ │ │ └── rotary.py
│ │ │ │ ├── clip.py
│ │ │ │ ├── internlm2.py
│ │ │ │ ├── llama.py
│ │ │ │ └── qwen2.py
│ │ │ ├── generate.py
│ │ │ ├── lora.py
│ │ │ └── packed.py
│ │ ├── auto.py
│ │ ├── chat/
│ │ │ ├── __init__.py
│ │ │ ├── backends/
│ │ │ │ └── __init__.py
│ │ │ ├── messages/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ └── chat.py
│ │ │ └── templates/
│ │ │ ├── __init__.py
│ │ │ ├── chat.py
│ │ │ └── hybrid.py
│ │ ├── datasets/
│ │ │ ├── __init__.py
│ │ │ ├── cache.py
│ │ │ ├── format.py
│ │ │ ├── llava.py
│ │ │ ├── load.py
│ │ │ ├── pretrain.py
│ │ │ ├── text.py
│ │ │ └── tokenize.py
│ │ ├── modelings/
│ │ │ ├── __init__.py
│ │ │ ├── internlm2/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── configuration_internlm2.py
│ │ │ │ └── modeling_internlm2.py
│ │ │ └── llava/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_internlm2.py
│ │ │ ├── configuration_llava.py
│ │ │ ├── modeling_internlm2.py
│ │ │ ├── modeling_llava.py
│ │ │ └── processing_llava.py
│ │ ├── parallel/
│ │ │ ├── __init__.py
│ │ │ ├── comm.py
│ │ │ ├── fsdp/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── checkpointing.py
│ │ │ │ ├── lazy.py
│ │ │ │ ├── precision.py
│ │ │ │ └── wrap.py
│ │ │ ├── logger.py
│ │ │ ├── plans/
│ │ │ │ └── internlm2.py
│ │ │ ├── sampler.py
│ │ │ ├── sequence/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── attention.py
│ │ │ │ ├── data_collate.py
│ │ │ │ ├── ops.py
│ │ │ │ └── reduce_loss.py
│ │ │ └── setup.py
│ │ └── yunchang/
│ │ ├── __init__.py
│ │ ├── comm/
│ │ │ ├── __init__.py
│ │ │ ├── all_to_all.py
│ │ │ └── extract_local.py
│ │ ├── globals.py
│ │ ├── hybrid/
│ │ │ ├── __init__.py
│ │ │ ├── async_attn_layer.py
│ │ │ ├── attn_layer.py
│ │ │ └── utils.py
│ │ ├── ring/
│ │ │ ├── __init__.py
│ │ │ ├── llama3_flash_attn_varlen.py
│ │ │ ├── ring_flash_attn.py
│ │ │ ├── ring_flash_attn_varlen.py
│ │ │ ├── stripe_flash_attn.py
│ │ │ ├── triton_utils.py
│ │ │ ├── utils.py
│ │ │ ├── zigzag_ring_flash_attn.py
│ │ │ └── zigzag_ring_flash_attn_varlen.py
│ │ └── ulysses/
│ │ ├── __init__.py
│ │ └── attn_layer.py
│ ├── apis/
│ │ ├── __init__.py
│ │ ├── datasets/
│ │ │ ├── __init__.py
│ │ │ ├── alpaca.py
│ │ │ ├── arxiv.py
│ │ │ ├── code_alpaca.py
│ │ │ ├── colorist.py
│ │ │ ├── lawyer.py
│ │ │ ├── medical.py
│ │ │ ├── moss_003_sft.py
│ │ │ ├── oasst1.py
│ │ │ ├── open_orca.py
│ │ │ ├── sql.py
│ │ │ ├── tiny_codes.py
│ │ │ └── wizardlm.py
│ │ ├── model.py
│ │ └── training_args.py
│ ├── configs/
│ │ ├── __init__.py
│ │ ├── baichuan/
│ │ │ ├── baichuan2_13b_base/
│ │ │ │ ├── baichuan2_13b_base_qlora_alpaca_e3.py
│ │ │ │ ├── baichuan2_13b_base_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── baichuan2_13b_base_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── baichuan2_13b_base_qlora_alpaca_zh_e3.py
│ │ │ │ ├── baichuan2_13b_base_qlora_arxiv_gentitle_e3.py
│ │ │ │ ├── baichuan2_13b_base_qlora_code_alpaca_e3.py
│ │ │ │ ├── baichuan2_13b_base_qlora_colorist_e5.py
│ │ │ │ ├── baichuan2_13b_base_qlora_lawyer_e3.py
│ │ │ │ ├── baichuan2_13b_base_qlora_oasst1_512_e3.py
│ │ │ │ ├── baichuan2_13b_base_qlora_oasst1_e3.py
│ │ │ │ ├── baichuan2_13b_base_qlora_open_platypus_e3.py
│ │ │ │ └── baichuan2_13b_base_qlora_sql_e3.py
│ │ │ ├── baichuan2_13b_chat/
│ │ │ │ ├── baichuan2_13b_chat_qlora_alpaca_e3.py
│ │ │ │ ├── baichuan2_13b_chat_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── baichuan2_13b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── baichuan2_13b_chat_qlora_alpaca_zh_e3.py
│ │ │ │ ├── baichuan2_13b_chat_qlora_code_alpaca_e3.py
│ │ │ │ ├── baichuan2_13b_chat_qlora_lawyer_e3.py
│ │ │ │ ├── baichuan2_13b_chat_qlora_oasst1_512_e3.py
│ │ │ │ ├── baichuan2_13b_chat_qlora_oasst1_e3.py
│ │ │ │ └── baichuan2_13b_chat_qlora_open_platypus_e3.py
│ │ │ ├── baichuan2_7b_base/
│ │ │ │ ├── baichuan2_7b_base_qlora_alpaca_e3.py
│ │ │ │ ├── baichuan2_7b_base_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── baichuan2_7b_base_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── baichuan2_7b_base_qlora_alpaca_zh_e3.py
│ │ │ │ ├── baichuan2_7b_base_qlora_arxiv_gentitle_e3.py
│ │ │ │ ├── baichuan2_7b_base_qlora_code_alpaca_e3.py
│ │ │ │ ├── baichuan2_7b_base_qlora_colorist_e5.py
│ │ │ │ ├── baichuan2_7b_base_qlora_lawyer_e3.py
│ │ │ │ ├── baichuan2_7b_base_qlora_oasst1_512_e3.py
│ │ │ │ ├── baichuan2_7b_base_qlora_oasst1_e3.py
│ │ │ │ ├── baichuan2_7b_base_qlora_open_platypus_e3.py
│ │ │ │ └── baichuan2_7b_base_qlora_sql_e3.py
│ │ │ ├── baichuan2_7b_chat/
│ │ │ │ ├── baichuan2_7b_chat_qlora_alpaca_e3.py
│ │ │ │ ├── baichuan2_7b_chat_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── baichuan2_7b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── baichuan2_7b_chat_qlora_alpaca_zh_e3.py
│ │ │ │ ├── baichuan2_7b_chat_qlora_code_alpaca_e3.py
│ │ │ │ ├── baichuan2_7b_chat_qlora_lawyer_e3.py
│ │ │ │ ├── baichuan2_7b_chat_qlora_oasst1_512_e3.py
│ │ │ │ ├── baichuan2_7b_chat_qlora_oasst1_e3.py
│ │ │ │ └── baichuan2_7b_chat_qlora_open_platypus_e3.py
│ │ │ ├── baichuan_13b_base/
│ │ │ │ ├── baichuan_13b_base_qlora_alpaca_e3.py
│ │ │ │ ├── baichuan_13b_base_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── baichuan_13b_base_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── baichuan_13b_base_qlora_alpaca_zh_e3.py
│ │ │ │ ├── baichuan_13b_base_qlora_arxiv_gentitle_e3.py
│ │ │ │ ├── baichuan_13b_base_qlora_code_alpaca_e3.py
│ │ │ │ ├── baichuan_13b_base_qlora_colorist_e5.py
│ │ │ │ ├── baichuan_13b_base_qlora_lawyer_e3.py
│ │ │ │ ├── baichuan_13b_base_qlora_medical_e1.py
│ │ │ │ ├── baichuan_13b_base_qlora_moss_sft_all_e1.py
│ │ │ │ ├── baichuan_13b_base_qlora_moss_sft_all_e2_gpu8.py
│ │ │ │ ├── baichuan_13b_base_qlora_moss_sft_plugins_e1.py
│ │ │ │ ├── baichuan_13b_base_qlora_oasst1_512_e3.py
│ │ │ │ ├── baichuan_13b_base_qlora_oasst1_e3.py
│ │ │ │ ├── baichuan_13b_base_qlora_open_platypus_e3.py
│ │ │ │ ├── baichuan_13b_base_qlora_openorca_e1.py
│ │ │ │ ├── baichuan_13b_base_qlora_sql_e3.py
│ │ │ │ └── baichuan_13b_base_qlora_tiny_codes_e1.py
│ │ │ ├── baichuan_13b_chat/
│ │ │ │ ├── baichuan_13b_chat_qlora_alpaca_e3.py
│ │ │ │ ├── baichuan_13b_chat_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── baichuan_13b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── baichuan_13b_chat_qlora_alpaca_zh_e3.py
│ │ │ │ ├── baichuan_13b_chat_qlora_arxiv_gentitle_e3.py
│ │ │ │ ├── baichuan_13b_chat_qlora_code_alpaca_e3.py
│ │ │ │ ├── baichuan_13b_chat_qlora_colorist_e5.py
│ │ │ │ ├── baichuan_13b_chat_qlora_lawyer_e3.py
│ │ │ │ ├── baichuan_13b_chat_qlora_medical_e1.py
│ │ │ │ ├── baichuan_13b_chat_qlora_oasst1_512_e3.py
│ │ │ │ ├── baichuan_13b_chat_qlora_oasst1_e3.py
│ │ │ │ ├── baichuan_13b_chat_qlora_open_platypus_e3.py
│ │ │ │ ├── baichuan_13b_chat_qlora_openorca_e1.py
│ │ │ │ ├── baichuan_13b_chat_qlora_sql_e3.py
│ │ │ │ └── baichuan_13b_chat_qlora_tiny_codes_e1.py
│ │ │ └── baichuan_7b/
│ │ │ ├── baichuan_7b_qlora_alpaca_e3.py
│ │ │ ├── baichuan_7b_qlora_alpaca_enzh_e3.py
│ │ │ ├── baichuan_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── baichuan_7b_qlora_alpaca_zh_e3.py
│ │ │ ├── baichuan_7b_qlora_arxiv_gentitle_e3.py
│ │ │ ├── baichuan_7b_qlora_code_alpaca_e3.py
│ │ │ ├── baichuan_7b_qlora_colorist_e5.py
│ │ │ ├── baichuan_7b_qlora_lawyer_e3.py
│ │ │ ├── baichuan_7b_qlora_medical_e1.py
│ │ │ ├── baichuan_7b_qlora_moss_sft_all_e1.py
│ │ │ ├── baichuan_7b_qlora_moss_sft_all_e2_gpu8.py
│ │ │ ├── baichuan_7b_qlora_moss_sft_plugins_e1.py
│ │ │ ├── baichuan_7b_qlora_oasst1_512_e3.py
│ │ │ ├── baichuan_7b_qlora_oasst1_e3.py
│ │ │ ├── baichuan_7b_qlora_open_platypus_e3.py
│ │ │ ├── baichuan_7b_qlora_openorca_e1.py
│ │ │ ├── baichuan_7b_qlora_sql_e3.py
│ │ │ └── baichuan_7b_qlora_tiny_codes_e1.py
│ │ ├── chatglm/
│ │ │ ├── chatglm2_6b/
│ │ │ │ ├── chatglm2_6b_qlora_alpaca_e3.py
│ │ │ │ ├── chatglm2_6b_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── chatglm2_6b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── chatglm2_6b_qlora_alpaca_zh_e3.py
│ │ │ │ ├── chatglm2_6b_qlora_arxiv_gentitle_e3.py
│ │ │ │ ├── chatglm2_6b_qlora_code_alpaca_e3.py
│ │ │ │ ├── chatglm2_6b_qlora_colorist_e5.py
│ │ │ │ ├── chatglm2_6b_qlora_lawyer_e3.py
│ │ │ │ ├── chatglm2_6b_qlora_medical_e1.py
│ │ │ │ ├── chatglm2_6b_qlora_oasst1_512_e3.py
│ │ │ │ ├── chatglm2_6b_qlora_oasst1_e3.py
│ │ │ │ ├── chatglm2_6b_qlora_open_platypus_e3.py
│ │ │ │ ├── chatglm2_6b_qlora_openorca_e1.py
│ │ │ │ ├── chatglm2_6b_qlora_sql_e3.py
│ │ │ │ └── chatglm2_6b_qlora_tiny_codes_e1.py
│ │ │ ├── chatglm3_6b/
│ │ │ │ ├── chatglm3_6b_qlora_alpaca_e3.py
│ │ │ │ ├── chatglm3_6b_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── chatglm3_6b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── chatglm3_6b_qlora_alpaca_zh_e3.py
│ │ │ │ ├── chatglm3_6b_qlora_arxiv_gentitle_e3.py
│ │ │ │ ├── chatglm3_6b_qlora_code_alpaca_e3.py
│ │ │ │ ├── chatglm3_6b_qlora_colorist_e5.py
│ │ │ │ ├── chatglm3_6b_qlora_lawyer_e3.py
│ │ │ │ ├── chatglm3_6b_qlora_medical_e1.py
│ │ │ │ ├── chatglm3_6b_qlora_oasst1_512_e3.py
│ │ │ │ ├── chatglm3_6b_qlora_oasst1_e3.py
│ │ │ │ ├── chatglm3_6b_qlora_open_platypus_e3.py
│ │ │ │ ├── chatglm3_6b_qlora_openorca_e1.py
│ │ │ │ ├── chatglm3_6b_qlora_sql_e3.py
│ │ │ │ └── chatglm3_6b_qlora_tiny_codes_e1.py
│ │ │ └── chatglm3_6b_base/
│ │ │ ├── chatglm3_6b_base_qlora_alpaca_e3.py
│ │ │ ├── chatglm3_6b_base_qlora_alpaca_enzh_e3.py
│ │ │ ├── chatglm3_6b_base_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── chatglm3_6b_base_qlora_alpaca_zh_e3.py
│ │ │ ├── chatglm3_6b_base_qlora_arxiv_gentitle_e3.py
│ │ │ ├── chatglm3_6b_base_qlora_code_alpaca_e3.py
│ │ │ ├── chatglm3_6b_base_qlora_colorist_e5.py
│ │ │ ├── chatglm3_6b_base_qlora_lawyer_e3.py
│ │ │ ├── chatglm3_6b_base_qlora_medical_e1.py
│ │ │ ├── chatglm3_6b_base_qlora_oasst1_512_e3.py
│ │ │ ├── chatglm3_6b_base_qlora_oasst1_e3.py
│ │ │ ├── chatglm3_6b_base_qlora_open_platypus_e3.py
│ │ │ ├── chatglm3_6b_base_qlora_openorca_e1.py
│ │ │ ├── chatglm3_6b_base_qlora_sql_e3.py
│ │ │ └── chatglm3_6b_base_qlora_tiny_codes_e1.py
│ │ ├── cohere/
│ │ │ ├── README.md
│ │ │ └── cohere_104b/
│ │ │ └── cohere_100b_128k_sp32.py
│ │ ├── custom_dataset/
│ │ │ ├── pretrain/
│ │ │ │ ├── baichuan/
│ │ │ │ │ ├── baichuan2_13b_base_full_custom_pretrain_e1.py
│ │ │ │ │ └── baichuan2_7b_base_full_custom_pretrain_e1.py
│ │ │ │ ├── chatglm/
│ │ │ │ │ ├── chatglm2_6b_full_custom_pretrain_e1.py
│ │ │ │ │ └── chatglm3_6b_full_custom_pretrain_e1.py
│ │ │ │ ├── deepseek/
│ │ │ │ │ └── deepseek_moe_16b_base_full_custom_pretrain_e1.py
│ │ │ │ ├── gemma/
│ │ │ │ │ ├── gemma_2b_full_custom_pretrain_e1.py
│ │ │ │ │ └── gemma_7b_full_custom_pretrain_e1.py
│ │ │ │ ├── internlm/
│ │ │ │ │ ├── internlm2_1_8b_full_custom_pretrain_e1.py
│ │ │ │ │ ├── internlm2_20b_full_custom_pretrain_e1.py
│ │ │ │ │ └── internlm2_7b_full_custom_pretrain_e1.py
│ │ │ │ ├── llama/
│ │ │ │ │ ├── llama2_70b_full_custom_pretrain_e1.py
│ │ │ │ │ └── llama2_7b_full_custom_pretrain_e1.py
│ │ │ │ ├── mistral/
│ │ │ │ │ └── mistral_7b_full_custom_pretrain_e1.py
│ │ │ │ ├── mixtral/
│ │ │ │ │ └── mixtral_8x7b_full_custom_pretrain_e1.py
│ │ │ │ ├── qwen/
│ │ │ │ │ ├── qwen1_5_0_5b_full_custom_pretrain_e1.py
│ │ │ │ │ ├── qwen1_5_14b_full_custom_pretrain_e1.py
│ │ │ │ │ ├── qwen1_5_1_8b_full_custom_pretrain_e1.py
│ │ │ │ │ ├── qwen1_5_4b_full_custom_pretrain_e1.py
│ │ │ │ │ ├── qwen1_5_72b_full_custom_pretrain_e1.py
│ │ │ │ │ ├── qwen1_5_7b_full_custom_pretrain_e1.py
│ │ │ │ │ ├── qwen_1_8b_full_custom_pretrain_e1.py
│ │ │ │ │ ├── qwen_72b_full_custom_pretrain_e1.py
│ │ │ │ │ └── qwen_7b_full_custom_pretrain_e1.py
│ │ │ │ ├── starcoder/
│ │ │ │ │ └── starcoder_full_custom_pretrain_e1.py
│ │ │ │ ├── yi/
│ │ │ │ │ ├── yi_34b_full_custom_pretrain_e1.py
│ │ │ │ │ └── yi_6b_full_custom_pretrain_e1.py
│ │ │ │ └── zephyr/
│ │ │ │ └── zephyr_7b_beta_full_custom_pretrain_e1.py
│ │ │ └── sft/
│ │ │ ├── baichuan/
│ │ │ │ ├── baichuan2_13b_chat_qlora_custom_sft_e1.py
│ │ │ │ ├── baichuan2_7b_chat_qlora_custom_sft_e1.py
│ │ │ │ ├── baichuan_13b_chat_qlora_custom_sft_e1.py
│ │ │ │ └── baichuan_7b_qlora_custom_sft_e1.py
│ │ │ ├── chatglm/
│ │ │ │ ├── chatglm2_6b_qlora_custom_sft_e1.py
│ │ │ │ └── chatglm3_6b_qlora_custom_sft_e1.py
│ │ │ ├── deepseek/
│ │ │ │ ├── deepseek_moe_16b_chat_qlora_custom_sft_e1.py
│ │ │ │ └── deepseekcoder_6_7b_instruct_qlora_custom_sft_e1.py
│ │ │ ├── gemma/
│ │ │ │ ├── gemma_2b_it_qlora_custom_sft_e1.py
│ │ │ │ ├── gemma_2b_qlora_custom_sft_e1.py
│ │ │ │ ├── gemma_7b_it_qlora_custom_sft_e1.py
│ │ │ │ └── gemma_7b_qlora_custom_sft_e1.py
│ │ │ ├── internlm/
│ │ │ │ ├── internlm2_chat_1_8b_qlora_custom_sft_e1.py
│ │ │ │ ├── internlm2_chat_20b_qlora_custom_sft_e1.py
│ │ │ │ └── internlm2_chat_7b_qlora_custom_sft_e1.py
│ │ │ ├── llama/
│ │ │ │ ├── llama2_70b_qlora_custom_sft_e1.py
│ │ │ │ └── llama2_7b_chat_qlora_custom_sft_e1.py
│ │ │ ├── mistral/
│ │ │ │ └── mistral_7b_full_finetune_custom_sft_e1.py
│ │ │ ├── mixtral/
│ │ │ │ └── mixtral_8x7b_instruct_qlora_custom_sft_e1.py
│ │ │ ├── qwen/
│ │ │ │ ├── qwen1_5_0_5b_chat_qlora_custom_sft_e1.py
│ │ │ │ ├── qwen1_5_14b_chat_qlora_custom_sft_e1.py
│ │ │ │ ├── qwen1_5_1_8b_chat_qlora_custom_sft_e1.py
│ │ │ │ ├── qwen1_5_4b_chat_qlora_custom_sft_e1.py
│ │ │ │ ├── qwen1_5_72b_chat_qlora_custom_sft_e1.py
│ │ │ │ ├── qwen1_5_7b_chat_qlora_custom_sft_e1.py
│ │ │ │ ├── qwen_1_8b_chat_qlora_custom_sft_e1.py
│ │ │ │ ├── qwen_72b_qlora_custom_sft_e1.py
│ │ │ │ └── qwen_7b_chat_qlora_custom_sft_e1.py
│ │ │ ├── starcoder/
│ │ │ │ └── starcoder_qlora_custom_sft_e1.py
│ │ │ ├── yi/
│ │ │ │ ├── yi_34b_qlora_custom_sft_e1.py
│ │ │ │ └── yi_6b_qlora_custom_sft_e1.py
│ │ │ └── zephyr/
│ │ │ └── zephyr_7b_beta_qlora_custom_sft_e1.py
│ │ ├── deepseek/
│ │ │ ├── README.md
│ │ │ ├── deepseek_coder_6_7b_base/
│ │ │ │ └── deepseek_coder_6_7b_base_qlora_code_alpaca_e3.py
│ │ │ ├── deepseek_coder_6_7b_instruct/
│ │ │ │ └── deepseekcoder_6_7b_instruct_qlora_code_alpaca_e3.py
│ │ │ ├── deepseek_moe_16b_base/
│ │ │ │ ├── deepseek_moe_16b_base_full_oasst1_e3.py
│ │ │ │ └── deepseek_moe_16b_base_qlora_oasst1_e3.py
│ │ │ ├── deepseek_moe_16b_chat/
│ │ │ │ ├── deepseek_moe_16b_chat_full_oasst1_e3.py
│ │ │ │ └── deepseek_moe_16b_chat_qlora_oasst1_e3.py
│ │ │ ├── deepseek_v2_chat/
│ │ │ │ └── deepseek_v2_chat_full_alpaca_e3.py
│ │ │ └── deepseek_v2_lite_chat/
│ │ │ ├── deepseek_v2_lite_chat_full_alpaca_e3.py
│ │ │ └── deepseek_v2_lite_chat_full_alpaca_e3_32k_varlen.py
│ │ ├── deepspeed/
│ │ │ ├── deepspeed_zero1.json
│ │ │ ├── deepspeed_zero2.json
│ │ │ ├── deepspeed_zero2_offload.json
│ │ │ ├── deepspeed_zero3.json
│ │ │ └── deepspeed_zero3_offload.json
│ │ ├── dpo/
│ │ │ ├── internlm/
│ │ │ │ ├── internlm2_chat_1_8b_dpo_full.py
│ │ │ │ ├── internlm2_chat_1_8b_dpo_full_varlenattn.py
│ │ │ │ ├── internlm2_chat_1_8b_dpo_full_varlenattn_jsonl_dataset.py
│ │ │ │ └── internlm2_chat_7b_dpo_qlora_varlenattn.py
│ │ │ └── llama/
│ │ │ └── llama3_8b_instruct_dpo_qlora_varlenattn.py
│ │ ├── gemma/
│ │ │ ├── gemma_2b/
│ │ │ │ ├── gemma_2b_full_alpaca_e3.py
│ │ │ │ └── gemma_2b_qlora_alpaca_e3.py
│ │ │ ├── gemma_2b_it/
│ │ │ │ ├── gemma_2b_it_full_alpaca_e3.py
│ │ │ │ └── gemma_2b_it_qlora_alpaca_e3.py
│ │ │ ├── gemma_7b/
│ │ │ │ ├── gemma_7b_full_alpaca_e3.py
│ │ │ │ └── gemma_7b_qlora_alpaca_e3.py
│ │ │ └── gemma_7b_it/
│ │ │ ├── gemma_7b_it_full_alpaca_e3.py
│ │ │ └── gemma_7b_it_qlora_alpaca_e3.py
│ │ ├── internlm/
│ │ │ ├── internlm2_1_8b/
│ │ │ │ ├── internlm2_1_8b_full_alpaca_e3.py
│ │ │ │ └── internlm2_1_8b_qlora_alpaca_e3.py
│ │ │ ├── internlm2_20b/
│ │ │ │ ├── internlm2_20b_full_finetune_custom_dataset_e1.py
│ │ │ │ ├── internlm2_20b_qlora_alpaca_e3.py
│ │ │ │ ├── internlm2_20b_qlora_arxiv_gentitle_e3.py
│ │ │ │ ├── internlm2_20b_qlora_code_alpaca_e3.py
│ │ │ │ ├── internlm2_20b_qlora_colorist_e5.py
│ │ │ │ ├── internlm2_20b_qlora_lawyer_e3.py
│ │ │ │ ├── internlm2_20b_qlora_msagent_react_e3_gpu8.py
│ │ │ │ ├── internlm2_20b_qlora_oasst1_512_e3.py
│ │ │ │ ├── internlm2_20b_qlora_oasst1_e3.py
│ │ │ │ └── internlm2_20b_qlora_sql_e3.py
│ │ │ ├── internlm2_7b/
│ │ │ │ ├── internlm2_7b_full_finetune_custom_dataset_e1.py
│ │ │ │ ├── internlm2_7b_full_finetune_custom_dataset_e1_sequence_parallel_4.py
│ │ │ │ ├── internlm2_7b_qlora_alpaca_e3.py
│ │ │ │ ├── internlm2_7b_qlora_arxiv_gentitle_e3.py
│ │ │ │ ├── internlm2_7b_qlora_code_alpaca_e3.py
│ │ │ │ ├── internlm2_7b_qlora_colorist_e5.py
│ │ │ │ ├── internlm2_7b_qlora_json_e3.py
│ │ │ │ ├── internlm2_7b_qlora_lawyer_e3.py
│ │ │ │ ├── internlm2_7b_qlora_msagent_react_e3_gpu8.py
│ │ │ │ ├── internlm2_7b_qlora_oasst1_512_e3.py
│ │ │ │ ├── internlm2_7b_qlora_oasst1_e3.py
│ │ │ │ ├── internlm2_7b_qlora_sql_e3.py
│ │ │ │ ├── internlm2_7b_w_internevo_dataset.py
│ │ │ │ ├── internlm2_7b_w_tokenized_dataset.py
│ │ │ │ └── internlm2_7b_w_untokenized_dataset.py
│ │ │ ├── internlm2_chat_1_8b/
│ │ │ │ ├── internlm2_chat_1_8b_full_alpaca_e3.py
│ │ │ │ └── internlm2_chat_1_8b_qlora_alpaca_e3.py
│ │ │ ├── internlm2_chat_20b/
│ │ │ │ ├── internlm2_chat_20b_full_finetune_custom_dataset_e1.py
│ │ │ │ ├── internlm2_chat_20b_qlora_alpaca_e3.py
│ │ │ │ ├── internlm2_chat_20b_qlora_code_alpaca_e3.py
│ │ │ │ ├── internlm2_chat_20b_qlora_lawyer_e3.py
│ │ │ │ ├── internlm2_chat_20b_qlora_oasst1_512_e3.py
│ │ │ │ └── internlm2_chat_20b_qlora_oasst1_e3.py
│ │ │ ├── internlm2_chat_7b/
│ │ │ │ ├── internlm2_chat_7b_full_finetune_custom_dataset_e1.py
│ │ │ │ ├── internlm2_chat_7b_qlora_alpaca_e3.py
│ │ │ │ ├── internlm2_chat_7b_qlora_code_alpaca_e3.py
│ │ │ │ ├── internlm2_chat_7b_qlora_lawyer_e3.py
│ │ │ │ ├── internlm2_chat_7b_qlora_oasst1_512_e3.py
│ │ │ │ └── internlm2_chat_7b_qlora_oasst1_e3.py
│ │ │ ├── internlm_20b/
│ │ │ │ ├── internlm_20b_qlora_alpaca_e3.py
│ │ │ │ ├── internlm_20b_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── internlm_20b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── internlm_20b_qlora_alpaca_zh_e3.py
│ │ │ │ ├── internlm_20b_qlora_arxiv_gentitle_e3.py
│ │ │ │ ├── internlm_20b_qlora_code_alpaca_e3.py
│ │ │ │ ├── internlm_20b_qlora_colorist_e5.py
│ │ │ │ ├── internlm_20b_qlora_lawyer_e3.py
│ │ │ │ ├── internlm_20b_qlora_msagent_react_e3_gpu8.py
│ │ │ │ ├── internlm_20b_qlora_oasst1_512_e3.py
│ │ │ │ ├── internlm_20b_qlora_oasst1_e3.py
│ │ │ │ ├── internlm_20b_qlora_open_platypus_e3.py
│ │ │ │ └── internlm_20b_qlora_sql_e3.py
│ │ │ ├── internlm_7b/
│ │ │ │ ├── internlm_7b_full_alpaca_e3.py
│ │ │ │ ├── internlm_7b_full_alpaca_enzh_e3.py
│ │ │ │ ├── internlm_7b_full_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── internlm_7b_full_alpaca_zh_e3.py
│ │ │ │ ├── internlm_7b_full_intern_repo_dataset_template.py
│ │ │ │ ├── internlm_7b_full_oasst1_e3.py
│ │ │ │ ├── internlm_7b_qlora_alpaca_e3.py
│ │ │ │ ├── internlm_7b_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── internlm_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── internlm_7b_qlora_alpaca_zh_e3.py
│ │ │ │ ├── internlm_7b_qlora_arxiv_gentitle_e3.py
│ │ │ │ ├── internlm_7b_qlora_code_alpaca_e3.py
│ │ │ │ ├── internlm_7b_qlora_colorist_e5.py
│ │ │ │ ├── internlm_7b_qlora_json_e3.py
│ │ │ │ ├── internlm_7b_qlora_lawyer_e3.py
│ │ │ │ ├── internlm_7b_qlora_medical_e1.py
│ │ │ │ ├── internlm_7b_qlora_moss_sft_all_e1.py
│ │ │ │ ├── internlm_7b_qlora_moss_sft_all_e2_gpu8.py
│ │ │ │ ├── internlm_7b_qlora_moss_sft_plugins_e1.py
│ │ │ │ ├── internlm_7b_qlora_msagent_react_e3_gpu8.py
│ │ │ │ ├── internlm_7b_qlora_oasst1_512_e3.py
│ │ │ │ ├── internlm_7b_qlora_oasst1_e3.py
│ │ │ │ ├── internlm_7b_qlora_oasst1_e3_hf.py
│ │ │ │ ├── internlm_7b_qlora_oasst1_mmlu_e3.py
│ │ │ │ ├── internlm_7b_qlora_open_platypus_e3.py
│ │ │ │ ├── internlm_7b_qlora_openorca_e1.py
│ │ │ │ ├── internlm_7b_qlora_sql_e3.py
│ │ │ │ └── internlm_7b_qlora_tiny_codes_e1.py
│ │ │ ├── internlm_chat_20b/
│ │ │ │ ├── internlm_chat_20b_qlora_alpaca_e3.py
│ │ │ │ ├── internlm_chat_20b_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── internlm_chat_20b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── internlm_chat_20b_qlora_alpaca_zh_e3.py
│ │ │ │ ├── internlm_chat_20b_qlora_code_alpaca_e3.py
│ │ │ │ ├── internlm_chat_20b_qlora_lawyer_e3.py
│ │ │ │ ├── internlm_chat_20b_qlora_oasst1_512_e3.py
│ │ │ │ ├── internlm_chat_20b_qlora_oasst1_e3.py
│ │ │ │ └── internlm_chat_20b_qlora_open_platypus_e3.py
│ │ │ └── internlm_chat_7b/
│ │ │ ├── internlm_chat_7b_qlora_alpaca_e3.py
│ │ │ ├── internlm_chat_7b_qlora_alpaca_enzh_e3.py
│ │ │ ├── internlm_chat_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── internlm_chat_7b_qlora_alpaca_zh_e3.py
│ │ │ ├── internlm_chat_7b_qlora_arxiv_gentitle_e3.py
│ │ │ ├── internlm_chat_7b_qlora_code_alpaca_e3.py
│ │ │ ├── internlm_chat_7b_qlora_colorist_e5.py
│ │ │ ├── internlm_chat_7b_qlora_lawyer_e3.py
│ │ │ ├── internlm_chat_7b_qlora_medical_e1.py
│ │ │ ├── internlm_chat_7b_qlora_oasst1_512_e3.py
│ │ │ ├── internlm_chat_7b_qlora_oasst1_e3.py
│ │ │ ├── internlm_chat_7b_qlora_open_platypus_e3.py
│ │ │ ├── internlm_chat_7b_qlora_openorca_e1.py
│ │ │ ├── internlm_chat_7b_qlora_sql_e3.py
│ │ │ └── internlm_chat_7b_qlora_tiny_codes_e1.py
│ │ ├── llama/
│ │ │ ├── llama2_70b/
│ │ │ │ ├── llama2_70b_full_wizardlm_e1.py
│ │ │ │ ├── llama2_70b_int8_lora_open_platypus_e1.py
│ │ │ │ ├── llama2_70b_int8_lora_open_platypus_e1_hf.py
│ │ │ │ ├── llama2_70b_qlora_open_platypus_e1.py
│ │ │ │ └── llama2_70b_qlora_open_platypus_e1_hf.py
│ │ │ ├── llama2_7b/
│ │ │ │ ├── llama2_7b_full_pgbooks_400iters_sp1.py
│ │ │ │ ├── llama2_7b_full_pgbooks_400iters_sp4.py
│ │ │ │ ├── llama2_7b_full_wizardlm_e1.py
│ │ │ │ ├── llama2_7b_qlora_alpaca_e3.py
│ │ │ │ ├── llama2_7b_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── llama2_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── llama2_7b_qlora_alpaca_zh_e3.py
│ │ │ │ ├── llama2_7b_qlora_arxiv_gentitle_e3.py
│ │ │ │ ├── llama2_7b_qlora_code_alpaca_e3.py
│ │ │ │ ├── llama2_7b_qlora_colorist_e5.py
│ │ │ │ ├── llama2_7b_qlora_lawyer_e3.py
│ │ │ │ ├── llama2_7b_qlora_medical_e1.py
│ │ │ │ ├── llama2_7b_qlora_moss_sft_all_e1.py
│ │ │ │ ├── llama2_7b_qlora_moss_sft_all_e2_gpu8.py
│ │ │ │ ├── llama2_7b_qlora_moss_sft_plugins_e1.py
│ │ │ │ ├── llama2_7b_qlora_msagent_react_e3_gpu8.py
│ │ │ │ ├── llama2_7b_qlora_oasst1_512_e3.py
│ │ │ │ ├── llama2_7b_qlora_oasst1_e3.py
│ │ │ │ ├── llama2_7b_qlora_open_platypus_e3.py
│ │ │ │ ├── llama2_7b_qlora_openorca_e1.py
│ │ │ │ ├── llama2_7b_qlora_sql_e3.py
│ │ │ │ └── llama2_7b_qlora_tiny_codes_e1.py
│ │ │ ├── llama2_7b_chat/
│ │ │ │ ├── llama2_7b_chat_qlora_alpaca_e3.py
│ │ │ │ ├── llama2_7b_chat_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── llama2_7b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── llama2_7b_chat_qlora_alpaca_zh_e3.py
│ │ │ │ ├── llama2_7b_chat_qlora_arxiv_gentitle_e3.py
│ │ │ │ ├── llama2_7b_chat_qlora_code_alpaca_e3.py
│ │ │ │ ├── llama2_7b_chat_qlora_colorist_e5.py
│ │ │ │ ├── llama2_7b_chat_qlora_lawyer_e3.py
│ │ │ │ ├── llama2_7b_chat_qlora_medical_e1.py
│ │ │ │ ├── llama2_7b_chat_qlora_oasst1_512_e3.py
│ │ │ │ ├── llama2_7b_chat_qlora_oasst1_e3.py
│ │ │ │ ├── llama2_7b_chat_qlora_open_platypus_e3.py
│ │ │ │ ├── llama2_7b_chat_qlora_openorca_e1.py
│ │ │ │ ├── llama2_7b_chat_qlora_sql_e3.py
│ │ │ │ └── llama2_7b_chat_qlora_tiny_codes_e1.py
│ │ │ ├── llama3_70b_instruct/
│ │ │ │ └── llama3_70b_instruct_qlora_alpaca_e3_2k_gpu8.py
│ │ │ ├── llama3_8b/
│ │ │ │ ├── README.md
│ │ │ │ └── llama3_8b_full_alpaca_e3.py
│ │ │ ├── llama3_8b_instruct/
│ │ │ │ ├── llama3_8b_instruct_full_alpaca_e3.py
│ │ │ │ └── llama3_8b_instruct_qlora_alpaca_e3.py
│ │ │ └── llama_7b/
│ │ │ ├── llama_7b_qlora_alpaca_e3.py
│ │ │ ├── llama_7b_qlora_alpaca_enzh_e3.py
│ │ │ ├── llama_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── llama_7b_qlora_alpaca_zh_e3.py
│ │ │ ├── llama_7b_qlora_arxiv_gentitle_e3.py
│ │ │ ├── llama_7b_qlora_code_alpaca_e3.py
│ │ │ ├── llama_7b_qlora_colorist_e5.py
│ │ │ ├── llama_7b_qlora_lawyer_e3.py
│ │ │ ├── llama_7b_qlora_medical_e1.py
│ │ │ ├── llama_7b_qlora_moss_sft_all_e1.py
│ │ │ ├── llama_7b_qlora_moss_sft_all_e2_gpu8.py
│ │ │ ├── llama_7b_qlora_moss_sft_plugins_e1.py
│ │ │ ├── llama_7b_qlora_oasst1_512_e3.py
│ │ │ ├── llama_7b_qlora_oasst1_e3.py
│ │ │ ├── llama_7b_qlora_open_platypus_e3.py
│ │ │ ├── llama_7b_qlora_openorca_e1.py
│ │ │ ├── llama_7b_qlora_sql_e3.py
│ │ │ └── llama_7b_qlora_tiny_codes_e1.py
│ │ ├── llama_speed_benchmark/
│ │ │ ├── llama2_70b/
│ │ │ │ ├── llama2_70b_full_alpaca_enzh_128k_sp8.py
│ │ │ │ ├── llama2_70b_full_alpaca_enzh_256k_sp16.py
│ │ │ │ ├── llama2_70b_full_alpaca_enzh_32k_sp4.py
│ │ │ │ └── llama2_70b_full_alpaca_enzh_8k_sp1.py
│ │ │ ├── llama2_7b/
│ │ │ │ ├── llama2_7b_full_alpaca_enzh_128k_sp8.py
│ │ │ │ ├── llama2_7b_full_alpaca_enzh_1M_sp16.py
│ │ │ │ ├── llama2_7b_full_alpaca_enzh_256k_sp8.py
│ │ │ │ ├── llama2_7b_full_alpaca_enzh_32k_sp1.py
│ │ │ │ └── llama2_7b_full_alpaca_enzh_8k_sp1.py
│ │ │ └── yi_34b/
│ │ │ ├── yi_34b_200k_full_alpaca_enzh_128k_sp8.py
│ │ │ ├── yi_34b_200k_full_alpaca_enzh_256k_sp8.py
│ │ │ ├── yi_34b_200k_full_alpaca_enzh_32k_sp2.py
│ │ │ └── yi_34b_200k_full_alpaca_enzh_8k_sp1.py
│ │ ├── llava/
│ │ │ ├── README.md
│ │ │ ├── README_zh-CN.md
│ │ │ ├── internlm2_chat_1_8b_clip_vit_large_p14_336/
│ │ │ │ ├── finetune/
│ │ │ │ │ └── llava_internlm2_chat_1_8b_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ │ └── pretrain/
│ │ │ │ └── llava_internlm2_chat_1_8b_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ │ ├── internlm2_chat_20b_clip_vit_large_p14_336/
│ │ │ │ ├── finetune/
│ │ │ │ │ ├── llava_internlm2_chat_20b_clip_vit_large_p14_336_e1_gpu8_finetune.py
│ │ │ │ │ └── llava_internlm2_chat_20b_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ │ └── pretrain/
│ │ │ │ └── llava_internlm2_chat_20b_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ │ ├── internlm2_chat_7b_clip_vit_large_p14_336/
│ │ │ │ ├── finetune/
│ │ │ │ │ ├── llava_internlm2_chat_7b_clip_vit_large_p14_336_e1_gpu8_finetune.py
│ │ │ │ │ └── llava_internlm2_chat_7b_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ │ └── pretrain/
│ │ │ │ └── llava_internlm2_chat_7b_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ │ ├── internlm_chat_7b_clip_vit_large_p14_336/
│ │ │ │ ├── finetune/
│ │ │ │ │ └── llava_internlm_chat_7b_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ │ └── pretrain/
│ │ │ │ └── llava_internlm_chat_7b_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ │ ├── llama3_70b_instruct_clip_vit_large_p14_336/
│ │ │ │ └── pretrain/
│ │ │ │ └── llava_llama3_70b_instruct_quant_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ │ ├── llama3_8b_instruct_clip_vit_large_p14_336/
│ │ │ │ ├── README.md
│ │ │ │ ├── convert_xtuner_weights_to_hf.py
│ │ │ │ ├── convert_xtuner_weights_to_llava.py
│ │ │ │ ├── finetune/
│ │ │ │ │ ├── llava_llama3_8b_instruct_full_clip_vit_large_p14_336_e1_gpu8_finetune.py
│ │ │ │ │ ├── llava_llama3_8b_instruct_full_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ │ │ ├── llava_llama3_8b_instruct_full_clip_vit_large_p14_336_lora_e1_gpu8_internvl_finetune.py
│ │ │ │ │ └── llava_llama3_8b_instruct_qlora_clip_vit_large_p14_336_e1_gpu1_finetune.py
│ │ │ │ └── pretrain/
│ │ │ │ ├── llava_llama3_8b_instruct_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ │ │ ├── llava_llama3_8b_instruct_clip_vit_large_p14_336_e1_gpu8_sharegpt4v_pretrain.py
│ │ │ │ └── llava_llama3_8b_instruct_quant_clip_vit_large_p14_336_e1_gpu1_pretrain.py
│ │ │ ├── official/
│ │ │ │ ├── llava_v15_13b/
│ │ │ │ │ ├── llava_v15_13b_finetune.py
│ │ │ │ │ ├── llava_v15_13b_finetune_lora.py
│ │ │ │ │ └── llava_v15_13b_pretrain.py
│ │ │ │ └── llava_v15_7b/
│ │ │ │ ├── llava_v15_7b_finetune.py
│ │ │ │ ├── llava_v15_7b_finetune_lora.py
│ │ │ │ └── llava_v15_7b_pretrain.py
│ │ │ ├── phi3_mini_4k_instruct_clip_vit_large_p14_336/
│ │ │ │ ├── README.md
│ │ │ │ ├── convert_phi_to_llama.py
│ │ │ │ ├── convert_xtuner_weights_to_hf.py
│ │ │ │ ├── convert_xtuner_weights_to_llava.py
│ │ │ │ ├── finetune/
│ │ │ │ │ ├── llava_phi3_mini_4k_instruct_full_clip_vit_large_p14_336_e1_gpu8_finetune.py
│ │ │ │ │ └── llava_phi3_mini_4k_instruct_full_clip_vit_large_p14_336_full_e2_gpu8_internvl_finetune.py
│ │ │ │ └── pretrain/
│ │ │ │ ├── llava_phi3_mini_4k_instruct_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ │ │ └── llava_phi3_mini_4k_instruct_clip_vit_large_p14_336_e1_gpu8_sharegpt4v_pretrain.py
│ │ │ ├── vicuna_13b_v15_clip_vit_large_p14_336/
│ │ │ │ ├── finetune/
│ │ │ │ │ └── llava_vicuna_13b_v15_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ │ └── pretrain/
│ │ │ │ └── llava_vicuna_13b_v15_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ │ └── vicuna_7b_v15_clip_vit_large_p14_336/
│ │ │ ├── finetune/
│ │ │ │ ├── llava_vicuna_7b_v15_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ │ └── llava_vicuna_7b_v15_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune_refcoco.py
│ │ │ └── pretrain/
│ │ │ └── llava_vicuna_7b_v15_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ ├── mistral/
│ │ │ ├── mistral_7b_full_finetune_custom_dataset_e1.py
│ │ │ ├── mistral_7b_qlora_skypile_pretrain_e1.py
│ │ │ ├── mistral_7b_w_tokenized_dataset.py
│ │ │ └── mistral_7b_w_untokenized_dataset.py
│ │ ├── mixtral/
│ │ │ ├── README.md
│ │ │ ├── mixtral_8x7b/
│ │ │ │ ├── mixtral_8x7b_full_oasst1_e3.py
│ │ │ │ └── mixtral_8x7b_qlora_oasst1_e3.py
│ │ │ └── mixtral_8x7b_instruct/
│ │ │ ├── mixtral_8x7b_instruct_full_oasst1_e3.py
│ │ │ └── mixtral_8x7b_instruct_qlora_oasst1_e3.py
│ │ ├── orpo/
│ │ │ ├── internlm/
│ │ │ │ ├── internlm2_chat_1_8b_orpo_full.py
│ │ │ │ ├── internlm2_chat_1_8b_orpo_full_varlenattn.py
│ │ │ │ ├── internlm2_chat_1_8b_orpo_full_varlenattn_jsonl_dataset.py
│ │ │ │ └── internlm2_chat_7b_orpo_qlora_varlenattn_ultrafeedback_e5.py
│ │ │ └── llama/
│ │ │ └── llama3_8b_instruct_orpo_qlora_varlenattn_ultrafeedback_e5.py
│ │ ├── phi/
│ │ │ └── phi3/
│ │ │ ├── phi3_mini_128k_instruct_full_alpaca_e3.py
│ │ │ ├── phi3_mini_128k_instruct_qlora_alpaca_e3.py
│ │ │ ├── phi3_mini_4k_instruct_full_alpaca_e3.py
│ │ │ └── phi3_mini_4k_instruct_qlora_alpaca_e3.py
│ │ ├── qwen/
│ │ │ ├── qwen1/
│ │ │ │ ├── qwen_1_8b/
│ │ │ │ │ ├── qwen_1_8b_qlora_alpaca_e3.py
│ │ │ │ │ ├── qwen_1_8b_qlora_alpaca_enzh_e3.py
│ │ │ │ │ ├── qwen_1_8b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ │ ├── qwen_1_8b_qlora_alpaca_zh_e3.py
│ │ │ │ │ └── qwen_1_8b_qlora_code_alpaca_e3.py
│ │ │ │ ├── qwen_1_8b_chat/
│ │ │ │ │ ├── qwen_1_8b_chat_qlora_alpaca_e3.py
│ │ │ │ │ ├── qwen_1_8b_chat_qlora_alpaca_enzh_e3.py
│ │ │ │ │ ├── qwen_1_8b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ │ ├── qwen_1_8b_chat_qlora_alpaca_zh_e3.py
│ │ │ │ │ └── qwen_1_8b_chat_qlora_code_alpaca_e3.py
│ │ │ │ ├── qwen_72b/
│ │ │ │ │ ├── qwen_72b_qlora_alpaca_e3.py
│ │ │ │ │ ├── qwen_72b_qlora_alpaca_enzh_e3.py
│ │ │ │ │ ├── qwen_72b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ │ ├── qwen_72b_qlora_alpaca_zh_e3.py
│ │ │ │ │ └── qwen_72b_qlora_code_alpaca_e3.py
│ │ │ │ ├── qwen_7b/
│ │ │ │ │ ├── qwen_7b_qlora_alpaca_e3.py
│ │ │ │ │ ├── qwen_7b_qlora_alpaca_enzh_e3.py
│ │ │ │ │ ├── qwen_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ │ ├── qwen_7b_qlora_alpaca_zh_e3.py
│ │ │ │ │ ├── qwen_7b_qlora_arxiv_gentitle_e3.py
│ │ │ │ │ ├── qwen_7b_qlora_code_alpaca_e3.py
│ │ │ │ │ ├── qwen_7b_qlora_colorist_e5.py
│ │ │ │ │ ├── qwen_7b_qlora_lawyer_e3.py
│ │ │ │ │ ├── qwen_7b_qlora_medical_e1.py
│ │ │ │ │ ├── qwen_7b_qlora_moss_sft_all_e1.py
│ │ │ │ │ ├── qwen_7b_qlora_moss_sft_all_e2_gpu8.py
│ │ │ │ │ ├── qwen_7b_qlora_moss_sft_plugins_e1.py
│ │ │ │ │ ├── qwen_7b_qlora_oasst1_512_e3.py
│ │ │ │ │ ├── qwen_7b_qlora_oasst1_e3.py
│ │ │ │ │ ├── qwen_7b_qlora_open_platypus_e3.py
│ │ │ │ │ ├── qwen_7b_qlora_openorca_e1.py
│ │ │ │ │ ├── qwen_7b_qlora_sql_e3.py
│ │ │ │ │ └── qwen_7b_qlora_tiny_codes_e1.py
│ │ │ │ └── qwen_7b_chat/
│ │ │ │ ├── qwen_7b_chat_qlora_alpaca_e3.py
│ │ │ │ ├── qwen_7b_chat_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── qwen_7b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── qwen_7b_chat_qlora_alpaca_zh_e3.py
│ │ │ │ ├── qwen_7b_chat_qlora_arxiv_gentitle_e3.py
│ │ │ │ ├── qwen_7b_chat_qlora_code_alpaca_e3.py
│ │ │ │ ├── qwen_7b_chat_qlora_colorist_e5.py
│ │ │ │ ├── qwen_7b_chat_qlora_lawyer_e3.py
│ │ │ │ ├── qwen_7b_chat_qlora_medical_e1.py
│ │ │ │ ├── qwen_7b_chat_qlora_oasst1_512_e3.py
│ │ │ │ ├── qwen_7b_chat_qlora_oasst1_e3.py
│ │ │ │ ├── qwen_7b_chat_qlora_open_platypus_e3.py
│ │ │ │ ├── qwen_7b_chat_qlora_openorca_e1.py
│ │ │ │ ├── qwen_7b_chat_qlora_sql_e3.py
│ │ │ │ └── qwen_7b_chat_qlora_tiny_codes_e1.py
│ │ │ └── qwen1_5/
│ │ │ ├── qwen1_5_0_5b/
│ │ │ │ ├── qwen1_5_0_5b_full_alpaca_e3.py
│ │ │ │ └── qwen1_5_0_5b_qlora_alpaca_e3.py
│ │ │ ├── qwen1_5_0_5b_chat/
│ │ │ │ ├── qwen1_5_0_5b_chat_full_alpaca_e3.py
│ │ │ │ └── qwen1_5_0_5b_chat_qlora_alpaca_e3.py
│ │ │ ├── qwen1_5_110b/
│ │ │ │ ├── qwen1_5_110b_full_alpaca_e3.py
│ │ │ │ └── qwen1_5_110b_qlora_alpaca_e3.py
│ │ │ ├── qwen1_5_110b_chat/
│ │ │ │ ├── README.md
│ │ │ │ ├── qwen1_5_110b_chat_full_alpaca_e3.py
│ │ │ │ ├── qwen1_5_110b_chat_qlora_alpaca_e3.py
│ │ │ │ └── qwen1_5_110b_chat_qlora_alpaca_e3_16k_2gpus.py
│ │ │ ├── qwen1_5_14b/
│ │ │ │ ├── qwen1_5_14b_full_alpaca_e3.py
│ │ │ │ └── qwen1_5_14b_qlora_alpaca_e3.py
│ │ │ ├── qwen1_5_14b_chat/
│ │ │ │ ├── qwen1_5_14b_chat_full_alpaca_e3.py
│ │ │ │ └── qwen1_5_14b_chat_qlora_alpaca_e3.py
│ │ │ ├── qwen1_5_1_8b/
│ │ │ │ ├── qwen1_5_1_8b_full_alpaca_e3.py
│ │ │ │ └── qwen1_5_1_8b_qlora_alpaca_e3.py
│ │ │ ├── qwen1_5_1_8b_chat/
│ │ │ │ ├── qwen1_5_1_8b_chat_full_alpaca_e3.py
│ │ │ │ └── qwen1_5_1_8b_chat_qlora_alpaca_e3.py
│ │ │ ├── qwen1_5_4b/
│ │ │ │ ├── qwen1_5_4b_full_alpaca_e3.py
│ │ │ │ └── qwen1_5_4b_qlora_alpaca_e3.py
│ │ │ ├── qwen1_5_4b_chat/
│ │ │ │ ├── qwen1_5_4b_chat_full_alpaca_e3.py
│ │ │ │ └── qwen1_5_4b_chat_qlora_alpaca_e3.py
│ │ │ ├── qwen1_5_72b/
│ │ │ │ ├── qwen1_5_72b_full_alpaca_e3.py
│ │ │ │ └── qwen1_5_72b_qlora_alpaca_e3.py
│ │ │ ├── qwen1_5_72b_chat/
│ │ │ │ ├── qwen1_5_72b_chat_full_alpaca_e3.py
│ │ │ │ └── qwen1_5_72b_chat_qlora_alpaca_e3.py
│ │ │ ├── qwen1_5_7b/
│ │ │ │ ├── qwen1_5_7b_full_alpaca_e3.py
│ │ │ │ └── qwen1_5_7b_qlora_alpaca_e3.py
│ │ │ └── qwen1_5_7b_chat/
│ │ │ ├── qwen1_5_7b_chat_full_alpaca_e3.py
│ │ │ └── qwen1_5_7b_chat_qlora_alpaca_e3.py
│ │ ├── qwen_moe/
│ │ │ └── qwen1_5/
│ │ │ └── qwen1_5_moe_a2_7_b_chat/
│ │ │ └── qwen1_5_moe_a2_7_b_chat_full_alpaca_e3.py
│ │ ├── reward_model/
│ │ │ ├── internlm/
│ │ │ │ ├── internlm2_chat_1_8b_reward_full_ultrafeedback.py
│ │ │ │ ├── internlm2_chat_1_8b_reward_full_varlenattn_jsonl_dataset.py
│ │ │ │ ├── internlm2_chat_1_8b_reward_full_varlenattn_ultrafeedback.py
│ │ │ │ └── internlm2_chat_1_8b_reward_qlora_varlenattn_ultrafeedback.py
│ │ │ └── llama/
│ │ │ └── llama3_8b_instruct_reward_full_varlenattn_ultrafeedback.py
│ │ ├── starcoder/
│ │ │ └── starcoder_qlora_stack_exchange_example.py
│ │ ├── yi/
│ │ │ ├── yi_34b/
│ │ │ │ └── yi_34b_qlora_alpaca_enzh_e3.py
│ │ │ └── yi_6b/
│ │ │ └── yi_6b_qlora_alpaca_enzh_e3.py
│ │ └── zephyr/
│ │ └── zephyr_7b_beta_qlora_alpaca_e3.py
│ ├── dataset/
│ │ ├── __init__.py
│ │ ├── collate_fns/
│ │ │ ├── __init__.py
│ │ │ ├── default_collate_fn.py
│ │ │ ├── mmlu_collate_fn.py
│ │ │ └── preference_collate_fn.py
│ │ ├── concat_dataset.py
│ │ ├── huggingface.py
│ │ ├── intern_repo.py
│ │ ├── json_dataset.py
│ │ ├── llava.py
│ │ ├── map_fns/
│ │ │ ├── __init__.py
│ │ │ ├── dataset_map_fns/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── alpaca_map_fn.py
│ │ │ │ ├── alpaca_zh_map_fn.py
│ │ │ │ ├── arxiv_map_fn.py
│ │ │ │ ├── code_alpaca_map_fn.py
│ │ │ │ ├── colors_map_fn.py
│ │ │ │ ├── crime_kg_assitant_map_fn.py
│ │ │ │ ├── default_map_fn.py
│ │ │ │ ├── law_reference_map_fn.py
│ │ │ │ ├── llava_map_fn.py
│ │ │ │ ├── medical_map_fn.py
│ │ │ │ ├── msagent_map_fn.py
│ │ │ │ ├── oasst1_map_fn.py
│ │ │ │ ├── openai_map_fn.py
│ │ │ │ ├── openorca_map_fn.py
│ │ │ │ ├── pretrain_map_fn.py
│ │ │ │ ├── sql_map_fn.py
│ │ │ │ ├── stack_exchange_map_fn.py
│ │ │ │ ├── tiny_codes_map_fn.py
│ │ │ │ └── wizardlm_map_fn.py
│ │ │ └── template_map_fn.py
│ │ ├── modelscope.py
│ │ ├── moss_sft.py
│ │ ├── preference_dataset.py
│ │ ├── refcoco_json.py
│ │ ├── samplers/
│ │ │ ├── __init__.py
│ │ │ ├── intern_repo.py
│ │ │ └── length_grouped.py
│ │ └── utils.py
│ ├── engine/
│ │ ├── __init__.py
│ │ ├── _strategy/
│ │ │ ├── __init__.py
│ │ │ └── deepspeed.py
│ │ ├── hooks/
│ │ │ ├── __init__.py
│ │ │ ├── dataset_info_hook.py
│ │ │ ├── evaluate_chat_hook.py
│ │ │ ├── hf_checkpoint_hook.py
│ │ │ ├── throughput_hook.py
│ │ │ └── varlen_attn_args_to_messagehub_hook.py
│ │ └── runner/
│ │ ├── __init__.py
│ │ └── loops.py
│ ├── entry_point.py
│ ├── evaluation/
│ │ ├── __init__.py
│ │ └── metrics/
│ │ ├── __init__.py
│ │ ├── mmlu_metric.py
│ │ └── reward_metric.py
│ ├── model/
│ │ ├── __init__.py
│ │ ├── dpo.py
│ │ ├── llava.py
│ │ ├── modules/
│ │ │ ├── __init__.py
│ │ │ ├── dispatch/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── attention.py
│ │ │ │ ├── baichuan.py
│ │ │ │ ├── cohere.py
│ │ │ │ ├── deepseek_v2.py
│ │ │ │ ├── internlm.py
│ │ │ │ ├── internlm2.py
│ │ │ │ ├── llama.py
│ │ │ │ ├── mistral.py
│ │ │ │ ├── phi3.py
│ │ │ │ ├── qwen2.py
│ │ │ │ ├── triton_kernels/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── layer_norm.py
│ │ │ │ │ ├── rms_norm.py
│ │ │ │ │ └── rotary.py
│ │ │ │ ├── utils.py
│ │ │ │ └── yi.py
│ │ │ └── projector/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_projector.py
│ │ │ └── modeling_projector.py
│ │ ├── orpo.py
│ │ ├── reward.py
│ │ ├── sft.py
│ │ ├── transformers_models/
│ │ │ ├── __init__.py
│ │ │ ├── deepseek_v2/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── configuration_deepseek.py
│ │ │ │ ├── modeling_deepseek.py
│ │ │ │ └── tokenization_deepseek_fast.py
│ │ │ └── mixtral/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_mixtral.py
│ │ │ └── modeling_mixtral.py
│ │ └── utils.py
│ ├── parallel/
│ │ ├── __init__.py
│ │ └── sequence/
│ │ ├── __init__.py
│ │ ├── attention.py
│ │ ├── comm.py
│ │ ├── data_collate.py
│ │ ├── reduce_loss.py
│ │ ├── sampler.py
│ │ └── setup_distributed.py
│ ├── registry.py
│ ├── tools/
│ │ ├── chat.py
│ │ ├── check_custom_dataset.py
│ │ ├── copy_cfg.py
│ │ ├── data_preprocess/
│ │ │ ├── arxiv.py
│ │ │ └── convert_refcoco.py
│ │ ├── eval_refcoco.py
│ │ ├── get_data_order.py
│ │ ├── list_cfg.py
│ │ ├── list_dataset_format.py
│ │ ├── log_dataset.py
│ │ ├── mmbench.py
│ │ ├── model_converters/
│ │ │ ├── merge.py
│ │ │ ├── modeling_internlm2_reward/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── configuration_internlm2.py
│ │ │ │ └── modeling_internlm2.py
│ │ │ ├── pth_to_hf.py
│ │ │ └── split.py
│ │ ├── plugins/
│ │ │ ├── __init__.py
│ │ │ ├── api.py
│ │ │ ├── calculate.py
│ │ │ ├── search.py
│ │ │ └── solve.py
│ │ ├── process_untokenized_datasets.py
│ │ ├── process_untokenized_datasets_legacy.py
│ │ ├── process_untokenized_llava_data.py
│ │ ├── test.py
│ │ ├── tokenize_ftdp_datasets.py
│ │ ├── train.py
│ │ └── utils.py
│ ├── utils/
│ │ ├── __init__.py
│ │ ├── constants.py
│ │ ├── fileio.py
│ │ ├── handle_moe_load_and_save.py
│ │ ├── stop_criteria.py
│ │ ├── templates.py
│ │ └── zero_to_any_dtype.py
│ └── version.py
└── xtuner-train_internvideo2_5/
├── .gitignore
├── .owners.yml
├── .pre-commit-config-zh-cn.yaml
├── .pre-commit-config.yaml
├── LICENSE
├── MANIFEST.in
├── README.md
├── data/
│ ├── annotaions/
│ │ └── ft_data_example.jsonl
│ └── diy_ft_data.json
├── ft_internvideo_2_5.sh
├── ft_internvideo_2_5_datapacking.sh
├── requirements/
│ ├── deepspeed.txt
│ ├── docs.txt
│ ├── modelscope.txt
│ └── runtime.txt
├── requirements.txt
├── setup.cfg
├── setup.py
├── unify_internvl2_train_r16.py
└── xtuner/
├── __init__.py
├── _lite/
│ ├── __init__.py
│ ├── accelerate/
│ │ ├── __init__.py
│ │ ├── dispatches/
│ │ │ ├── __init__.py
│ │ │ ├── _attention.py
│ │ │ ├── _fused/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── layer_norm.py
│ │ │ │ ├── rms_norm.py
│ │ │ │ └── rotary.py
│ │ │ ├── clip.py
│ │ │ ├── internlm2.py
│ │ │ ├── internvl2.py
│ │ │ ├── llama3.py
│ │ │ ├── new.py
│ │ │ ├── phi3.py
│ │ │ ├── qwen2.py
│ │ │ └── qwen_vl2.py
│ │ ├── fsdp/
│ │ │ ├── __init__.py
│ │ │ ├── checkpointing.py
│ │ │ ├── clip_grad.py
│ │ │ ├── lazy.py
│ │ │ ├── precision.py
│ │ │ └── wrap.py
│ │ ├── generate.py
│ │ ├── lora.py
│ │ └── packed.py
│ ├── auto.py
│ ├── chat/
│ │ ├── __init__.py
│ │ ├── backends/
│ │ │ └── __init__.py
│ │ ├── messages/
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ └── chat.py
│ │ └── templates/
│ │ ├── __init__.py
│ │ ├── chat.py
│ │ └── hybrid.py
│ ├── checkpoint.py
│ ├── datasets/
│ │ ├── __init__.py
│ │ ├── dataset_fn.py
│ │ ├── format.py
│ │ ├── llava.py
│ │ ├── load.py
│ │ ├── load_new.py
│ │ ├── text.py
│ │ └── tokenize.py
│ ├── internvl/
│ │ ├── __init__.py
│ │ ├── constants.py
│ │ ├── conversation.py
│ │ ├── dataset.py
│ │ ├── new_dataset.py
│ │ ├── v1_5/
│ │ │ ├── configuration_intern_vit.py
│ │ │ ├── configuration_internvl_chat.py
│ │ │ ├── configuration_phi3.py
│ │ │ ├── conversation.py
│ │ │ ├── modeling_intern_vit.py
│ │ │ ├── modeling_internvl_chat.py
│ │ │ └── modeling_phi3.py
│ │ └── video_utils.py
│ ├── modelings/
│ │ ├── __init__.py
│ │ ├── internlm2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_internlm2.py
│ │ │ └── modeling_internlm2.py
│ │ └── model_fn.py
│ ├── parallel/
│ │ ├── __init__.py
│ │ ├── comm.py
│ │ ├── logger.py
│ │ ├── new_setup.py
│ │ ├── plans/
│ │ │ └── internlm2.py
│ │ ├── sampler.py
│ │ ├── sequence/
│ │ │ ├── __init__.py
│ │ │ ├── attention.py
│ │ │ ├── data_collate.py
│ │ │ ├── ops.py
│ │ │ └── reduce_loss.py
│ │ └── setup.py
│ └── yunchang/
│ ├── __init__.py
│ ├── comm/
│ │ ├── __init__.py
│ │ ├── all_to_all.py
│ │ └── extract_local.py
│ ├── globals.py
│ ├── hybrid/
│ │ ├── __init__.py
│ │ ├── async_attn_layer.py
│ │ ├── attn_layer.py
│ │ └── utils.py
│ ├── ring/
│ │ ├── __init__.py
│ │ ├── llama3_flash_attn_varlen.py
│ │ ├── ring_flash_attn.py
│ │ ├── ring_flash_attn_varlen.py
│ │ ├── stripe_flash_attn.py
│ │ ├── triton_utils.py
│ │ ├── utils.py
│ │ ├── zigzag_ring_flash_attn.py
│ │ └── zigzag_ring_flash_attn_varlen.py
│ └── ulysses/
│ ├── __init__.py
│ └── attn_layer.py
├── apis/
│ ├── __init__.py
│ ├── datasets/
│ │ ├── __init__.py
│ │ ├── alpaca.py
│ │ ├── arxiv.py
│ │ ├── code_alpaca.py
│ │ ├── colorist.py
│ │ ├── lawyer.py
│ │ ├── medical.py
│ │ ├── moss_003_sft.py
│ │ ├── oasst1.py
│ │ ├── open_orca.py
│ │ ├── sql.py
│ │ ├── tiny_codes.py
│ │ └── wizardlm.py
│ ├── model.py
│ └── training_args.py
├── configs/
│ ├── __init__.py
│ ├── baichuan/
│ │ ├── baichuan2_13b_base/
│ │ │ ├── baichuan2_13b_base_qlora_alpaca_e3.py
│ │ │ ├── baichuan2_13b_base_qlora_alpaca_enzh_e3.py
│ │ │ ├── baichuan2_13b_base_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── baichuan2_13b_base_qlora_alpaca_zh_e3.py
│ │ │ ├── baichuan2_13b_base_qlora_arxiv_gentitle_e3.py
│ │ │ ├── baichuan2_13b_base_qlora_code_alpaca_e3.py
│ │ │ ├── baichuan2_13b_base_qlora_colorist_e5.py
│ │ │ ├── baichuan2_13b_base_qlora_lawyer_e3.py
│ │ │ ├── baichuan2_13b_base_qlora_oasst1_512_e3.py
│ │ │ ├── baichuan2_13b_base_qlora_oasst1_e3.py
│ │ │ ├── baichuan2_13b_base_qlora_open_platypus_e3.py
│ │ │ └── baichuan2_13b_base_qlora_sql_e3.py
│ │ ├── baichuan2_13b_chat/
│ │ │ ├── baichuan2_13b_chat_qlora_alpaca_e3.py
│ │ │ ├── baichuan2_13b_chat_qlora_alpaca_enzh_e3.py
│ │ │ ├── baichuan2_13b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── baichuan2_13b_chat_qlora_alpaca_zh_e3.py
│ │ │ ├── baichuan2_13b_chat_qlora_code_alpaca_e3.py
│ │ │ ├── baichuan2_13b_chat_qlora_lawyer_e3.py
│ │ │ ├── baichuan2_13b_chat_qlora_oasst1_512_e3.py
│ │ │ ├── baichuan2_13b_chat_qlora_oasst1_e3.py
│ │ │ └── baichuan2_13b_chat_qlora_open_platypus_e3.py
│ │ ├── baichuan2_7b_base/
│ │ │ ├── baichuan2_7b_base_qlora_alpaca_e3.py
│ │ │ ├── baichuan2_7b_base_qlora_alpaca_enzh_e3.py
│ │ │ ├── baichuan2_7b_base_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── baichuan2_7b_base_qlora_alpaca_zh_e3.py
│ │ │ ├── baichuan2_7b_base_qlora_arxiv_gentitle_e3.py
│ │ │ ├── baichuan2_7b_base_qlora_code_alpaca_e3.py
│ │ │ ├── baichuan2_7b_base_qlora_colorist_e5.py
│ │ │ ├── baichuan2_7b_base_qlora_lawyer_e3.py
│ │ │ ├── baichuan2_7b_base_qlora_oasst1_512_e3.py
│ │ │ ├── baichuan2_7b_base_qlora_oasst1_e3.py
│ │ │ ├── baichuan2_7b_base_qlora_open_platypus_e3.py
│ │ │ └── baichuan2_7b_base_qlora_sql_e3.py
│ │ ├── baichuan2_7b_chat/
│ │ │ ├── baichuan2_7b_chat_qlora_alpaca_e3.py
│ │ │ ├── baichuan2_7b_chat_qlora_alpaca_enzh_e3.py
│ │ │ ├── baichuan2_7b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── baichuan2_7b_chat_qlora_alpaca_zh_e3.py
│ │ │ ├── baichuan2_7b_chat_qlora_code_alpaca_e3.py
│ │ │ ├── baichuan2_7b_chat_qlora_lawyer_e3.py
│ │ │ ├── baichuan2_7b_chat_qlora_oasst1_512_e3.py
│ │ │ ├── baichuan2_7b_chat_qlora_oasst1_e3.py
│ │ │ └── baichuan2_7b_chat_qlora_open_platypus_e3.py
│ │ ├── baichuan_13b_base/
│ │ │ ├── baichuan_13b_base_qlora_alpaca_e3.py
│ │ │ ├── baichuan_13b_base_qlora_alpaca_enzh_e3.py
│ │ │ ├── baichuan_13b_base_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── baichuan_13b_base_qlora_alpaca_zh_e3.py
│ │ │ ├── baichuan_13b_base_qlora_arxiv_gentitle_e3.py
│ │ │ ├── baichuan_13b_base_qlora_code_alpaca_e3.py
│ │ │ ├── baichuan_13b_base_qlora_colorist_e5.py
│ │ │ ├── baichuan_13b_base_qlora_lawyer_e3.py
│ │ │ ├── baichuan_13b_base_qlora_medical_e1.py
│ │ │ ├── baichuan_13b_base_qlora_moss_sft_all_e1.py
│ │ │ ├── baichuan_13b_base_qlora_moss_sft_all_e2_gpu8.py
│ │ │ ├── baichuan_13b_base_qlora_moss_sft_plugins_e1.py
│ │ │ ├── baichuan_13b_base_qlora_oasst1_512_e3.py
│ │ │ ├── baichuan_13b_base_qlora_oasst1_e3.py
│ │ │ ├── baichuan_13b_base_qlora_open_platypus_e3.py
│ │ │ ├── baichuan_13b_base_qlora_openorca_e1.py
│ │ │ ├── baichuan_13b_base_qlora_sql_e3.py
│ │ │ └── baichuan_13b_base_qlora_tiny_codes_e1.py
│ │ ├── baichuan_13b_chat/
│ │ │ ├── baichuan_13b_chat_qlora_alpaca_e3.py
│ │ │ ├── baichuan_13b_chat_qlora_alpaca_enzh_e3.py
│ │ │ ├── baichuan_13b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── baichuan_13b_chat_qlora_alpaca_zh_e3.py
│ │ │ ├── baichuan_13b_chat_qlora_arxiv_gentitle_e3.py
│ │ │ ├── baichuan_13b_chat_qlora_code_alpaca_e3.py
│ │ │ ├── baichuan_13b_chat_qlora_colorist_e5.py
│ │ │ ├── baichuan_13b_chat_qlora_lawyer_e3.py
│ │ │ ├── baichuan_13b_chat_qlora_medical_e1.py
│ │ │ ├── baichuan_13b_chat_qlora_oasst1_512_e3.py
│ │ │ ├── baichuan_13b_chat_qlora_oasst1_e3.py
│ │ │ ├── baichuan_13b_chat_qlora_open_platypus_e3.py
│ │ │ ├── baichuan_13b_chat_qlora_openorca_e1.py
│ │ │ ├── baichuan_13b_chat_qlora_sql_e3.py
│ │ │ └── baichuan_13b_chat_qlora_tiny_codes_e1.py
│ │ └── baichuan_7b/
│ │ ├── baichuan_7b_qlora_alpaca_e3.py
│ │ ├── baichuan_7b_qlora_alpaca_enzh_e3.py
│ │ ├── baichuan_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ ├── baichuan_7b_qlora_alpaca_zh_e3.py
│ │ ├── baichuan_7b_qlora_arxiv_gentitle_e3.py
│ │ ├── baichuan_7b_qlora_code_alpaca_e3.py
│ │ ├── baichuan_7b_qlora_colorist_e5.py
│ │ ├── baichuan_7b_qlora_lawyer_e3.py
│ │ ├── baichuan_7b_qlora_medical_e1.py
│ │ ├── baichuan_7b_qlora_moss_sft_all_e1.py
│ │ ├── baichuan_7b_qlora_moss_sft_all_e2_gpu8.py
│ │ ├── baichuan_7b_qlora_moss_sft_plugins_e1.py
│ │ ├── baichuan_7b_qlora_oasst1_512_e3.py
│ │ ├── baichuan_7b_qlora_oasst1_e3.py
│ │ ├── baichuan_7b_qlora_open_platypus_e3.py
│ │ ├── baichuan_7b_qlora_openorca_e1.py
│ │ ├── baichuan_7b_qlora_sql_e3.py
│ │ └── baichuan_7b_qlora_tiny_codes_e1.py
│ ├── chatglm/
│ │ ├── chatglm2_6b/
│ │ │ ├── chatglm2_6b_qlora_alpaca_e3.py
│ │ │ ├── chatglm2_6b_qlora_alpaca_enzh_e3.py
│ │ │ ├── chatglm2_6b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── chatglm2_6b_qlora_alpaca_zh_e3.py
│ │ │ ├── chatglm2_6b_qlora_arxiv_gentitle_e3.py
│ │ │ ├── chatglm2_6b_qlora_code_alpaca_e3.py
│ │ │ ├── chatglm2_6b_qlora_colorist_e5.py
│ │ │ ├── chatglm2_6b_qlora_lawyer_e3.py
│ │ │ ├── chatglm2_6b_qlora_medical_e1.py
│ │ │ ├── chatglm2_6b_qlora_oasst1_512_e3.py
│ │ │ ├── chatglm2_6b_qlora_oasst1_e3.py
│ │ │ ├── chatglm2_6b_qlora_open_platypus_e3.py
│ │ │ ├── chatglm2_6b_qlora_openorca_e1.py
│ │ │ ├── chatglm2_6b_qlora_sql_e3.py
│ │ │ └── chatglm2_6b_qlora_tiny_codes_e1.py
│ │ ├── chatglm3_6b/
│ │ │ ├── chatglm3_6b_qlora_alpaca_e3.py
│ │ │ ├── chatglm3_6b_qlora_alpaca_enzh_e3.py
│ │ │ ├── chatglm3_6b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── chatglm3_6b_qlora_alpaca_zh_e3.py
│ │ │ ├── chatglm3_6b_qlora_arxiv_gentitle_e3.py
│ │ │ ├── chatglm3_6b_qlora_code_alpaca_e3.py
│ │ │ ├── chatglm3_6b_qlora_colorist_e5.py
│ │ │ ├── chatglm3_6b_qlora_lawyer_e3.py
│ │ │ ├── chatglm3_6b_qlora_medical_e1.py
│ │ │ ├── chatglm3_6b_qlora_oasst1_512_e3.py
│ │ │ ├── chatglm3_6b_qlora_oasst1_e3.py
│ │ │ ├── chatglm3_6b_qlora_open_platypus_e3.py
│ │ │ ├── chatglm3_6b_qlora_openorca_e1.py
│ │ │ ├── chatglm3_6b_qlora_sql_e3.py
│ │ │ └── chatglm3_6b_qlora_tiny_codes_e1.py
│ │ └── chatglm3_6b_base/
│ │ ├── chatglm3_6b_base_qlora_alpaca_e3.py
│ │ ├── chatglm3_6b_base_qlora_alpaca_enzh_e3.py
│ │ ├── chatglm3_6b_base_qlora_alpaca_enzh_oasst1_e3.py
│ │ ├── chatglm3_6b_base_qlora_alpaca_zh_e3.py
│ │ ├── chatglm3_6b_base_qlora_arxiv_gentitle_e3.py
│ │ ├── chatglm3_6b_base_qlora_code_alpaca_e3.py
│ │ ├── chatglm3_6b_base_qlora_colorist_e5.py
│ │ ├── chatglm3_6b_base_qlora_lawyer_e3.py
│ │ ├── chatglm3_6b_base_qlora_medical_e1.py
│ │ ├── chatglm3_6b_base_qlora_oasst1_512_e3.py
│ │ ├── chatglm3_6b_base_qlora_oasst1_e3.py
│ │ ├── chatglm3_6b_base_qlora_open_platypus_e3.py
│ │ ├── chatglm3_6b_base_qlora_openorca_e1.py
│ │ ├── chatglm3_6b_base_qlora_sql_e3.py
│ │ └── chatglm3_6b_base_qlora_tiny_codes_e1.py
│ ├── cohere/
│ │ ├── README.md
│ │ └── cohere_104b/
│ │ └── cohere_100b_128k_sp32.py
│ ├── custom_dataset/
│ │ ├── pretrain/
│ │ │ ├── baichuan/
│ │ │ │ ├── baichuan2_13b_base_full_custom_pretrain_e1.py
│ │ │ │ └── baichuan2_7b_base_full_custom_pretrain_e1.py
│ │ │ ├── chatglm/
│ │ │ │ ├── chatglm2_6b_full_custom_pretrain_e1.py
│ │ │ │ └── chatglm3_6b_full_custom_pretrain_e1.py
│ │ │ ├── deepseek/
│ │ │ │ └── deepseek_moe_16b_base_full_custom_pretrain_e1.py
│ │ │ ├── gemma/
│ │ │ │ ├── gemma_2b_full_custom_pretrain_e1.py
│ │ │ │ └── gemma_7b_full_custom_pretrain_e1.py
│ │ │ ├── internlm/
│ │ │ │ ├── internlm2_1_8b_full_custom_pretrain_e1.py
│ │ │ │ ├── internlm2_20b_full_custom_pretrain_e1.py
│ │ │ │ └── internlm2_7b_full_custom_pretrain_e1.py
│ │ │ ├── llama/
│ │ │ │ ├── llama2_70b_full_custom_pretrain_e1.py
│ │ │ │ └── llama2_7b_full_custom_pretrain_e1.py
│ │ │ ├── mistral/
│ │ │ │ └── mistral_7b_full_custom_pretrain_e1.py
│ │ │ ├── mixtral/
│ │ │ │ └── mixtral_8x7b_full_custom_pretrain_e1.py
│ │ │ ├── qwen/
│ │ │ │ ├── qwen1_5_0_5b_full_custom_pretrain_e1.py
│ │ │ │ ├── qwen1_5_14b_full_custom_pretrain_e1.py
│ │ │ │ ├── qwen1_5_1_8b_full_custom_pretrain_e1.py
│ │ │ │ ├── qwen1_5_4b_full_custom_pretrain_e1.py
│ │ │ │ ├── qwen1_5_72b_full_custom_pretrain_e1.py
│ │ │ │ ├── qwen1_5_7b_full_custom_pretrain_e1.py
│ │ │ │ ├── qwen_1_8b_full_custom_pretrain_e1.py
│ │ │ │ ├── qwen_72b_full_custom_pretrain_e1.py
│ │ │ │ └── qwen_7b_full_custom_pretrain_e1.py
│ │ │ ├── starcoder/
│ │ │ │ └── starcoder_full_custom_pretrain_e1.py
│ │ │ ├── yi/
│ │ │ │ ├── yi_34b_full_custom_pretrain_e1.py
│ │ │ │ └── yi_6b_full_custom_pretrain_e1.py
│ │ │ └── zephyr/
│ │ │ └── zephyr_7b_beta_full_custom_pretrain_e1.py
│ │ └── sft/
│ │ ├── baichuan/
│ │ │ ├── baichuan2_13b_chat_qlora_custom_sft_e1.py
│ │ │ ├── baichuan2_7b_chat_qlora_custom_sft_e1.py
│ │ │ ├── baichuan_13b_chat_qlora_custom_sft_e1.py
│ │ │ └── baichuan_7b_qlora_custom_sft_e1.py
│ │ ├── chatglm/
│ │ │ ├── chatglm2_6b_qlora_custom_sft_e1.py
│ │ │ └── chatglm3_6b_qlora_custom_sft_e1.py
│ │ ├── deepseek/
│ │ │ ├── deepseek_moe_16b_chat_qlora_custom_sft_e1.py
│ │ │ └── deepseekcoder_6_7b_instruct_qlora_custom_sft_e1.py
│ │ ├── gemma/
│ │ │ ├── gemma_2b_it_qlora_custom_sft_e1.py
│ │ │ ├── gemma_2b_qlora_custom_sft_e1.py
│ │ │ ├── gemma_7b_it_qlora_custom_sft_e1.py
│ │ │ └── gemma_7b_qlora_custom_sft_e1.py
│ │ ├── internlm/
│ │ │ ├── internlm2_chat_1_8b_qlora_custom_sft_e1.py
│ │ │ ├── internlm2_chat_20b_qlora_custom_sft_e1.py
│ │ │ └── internlm2_chat_7b_qlora_custom_sft_e1.py
│ │ ├── llama/
│ │ │ ├── llama2_70b_qlora_custom_sft_e1.py
│ │ │ └── llama2_7b_chat_qlora_custom_sft_e1.py
│ │ ├── mistral/
│ │ │ └── mistral_7b_full_finetune_custom_sft_e1.py
│ │ ├── mixtral/
│ │ │ └── mixtral_8x7b_instruct_qlora_custom_sft_e1.py
│ │ ├── qwen/
│ │ │ ├── qwen1_5_0_5b_chat_qlora_custom_sft_e1.py
│ │ │ ├── qwen1_5_14b_chat_qlora_custom_sft_e1.py
│ │ │ ├── qwen1_5_1_8b_chat_qlora_custom_sft_e1.py
│ │ │ ├── qwen1_5_4b_chat_qlora_custom_sft_e1.py
│ │ │ ├── qwen1_5_72b_chat_qlora_custom_sft_e1.py
│ │ │ ├── qwen1_5_7b_chat_qlora_custom_sft_e1.py
│ │ │ ├── qwen_1_8b_chat_qlora_custom_sft_e1.py
│ │ │ ├── qwen_72b_qlora_custom_sft_e1.py
│ │ │ └── qwen_7b_chat_qlora_custom_sft_e1.py
│ │ ├── starcoder/
│ │ │ └── starcoder_qlora_custom_sft_e1.py
│ │ ├── yi/
│ │ │ ├── yi_34b_qlora_custom_sft_e1.py
│ │ │ └── yi_6b_qlora_custom_sft_e1.py
│ │ └── zephyr/
│ │ └── zephyr_7b_beta_qlora_custom_sft_e1.py
│ ├── deepseek/
│ │ ├── README.md
│ │ ├── deepseek_coder_6_7b_base/
│ │ │ └── deepseek_coder_6_7b_base_qlora_code_alpaca_e3.py
│ │ ├── deepseek_coder_6_7b_instruct/
│ │ │ └── deepseekcoder_6_7b_instruct_qlora_code_alpaca_e3.py
│ │ ├── deepseek_moe_16b_base/
│ │ │ ├── deepseek_moe_16b_base_full_oasst1_e3.py
│ │ │ └── deepseek_moe_16b_base_qlora_oasst1_e3.py
│ │ ├── deepseek_moe_16b_chat/
│ │ │ ├── deepseek_moe_16b_chat_full_oasst1_e3.py
│ │ │ └── deepseek_moe_16b_chat_qlora_oasst1_e3.py
│ │ ├── deepseek_v2_chat/
│ │ │ └── deepseek_v2_chat_full_alpaca_e3.py
│ │ └── deepseek_v2_lite_chat/
│ │ ├── deepseek_v2_lite_chat_full_alpaca_e3.py
│ │ └── deepseek_v2_lite_chat_full_alpaca_e3_32k_varlen.py
│ ├── deepspeed/
│ │ ├── deepspeed_zero1.json
│ │ ├── deepspeed_zero2.json
│ │ ├── deepspeed_zero2_offload.json
│ │ ├── deepspeed_zero3.json
│ │ └── deepspeed_zero3_offload.json
│ ├── dpo/
│ │ ├── internlm/
│ │ │ ├── internlm2_chat_1_8b_dpo_full.py
│ │ │ ├── internlm2_chat_1_8b_dpo_full_varlenattn.py
│ │ │ ├── internlm2_chat_1_8b_dpo_full_varlenattn_jsonl_dataset.py
│ │ │ └── internlm2_chat_7b_dpo_qlora_varlenattn.py
│ │ └── llama/
│ │ └── llama3_8b_instruct_dpo_qlora_varlenattn.py
│ ├── gemma/
│ │ ├── gemma_2b/
│ │ │ ├── gemma_2b_full_alpaca_e3.py
│ │ │ └── gemma_2b_qlora_alpaca_e3.py
│ │ ├── gemma_2b_it/
│ │ │ ├── gemma_2b_it_full_alpaca_e3.py
│ │ │ └── gemma_2b_it_qlora_alpaca_e3.py
│ │ ├── gemma_7b/
│ │ │ ├── gemma_7b_full_alpaca_e3.py
│ │ │ └── gemma_7b_qlora_alpaca_e3.py
│ │ └── gemma_7b_it/
│ │ ├── gemma_7b_it_full_alpaca_e3.py
│ │ └── gemma_7b_it_qlora_alpaca_e3.py
│ ├── internlm/
│ │ ├── internlm2_1_8b/
│ │ │ ├── internlm2_1_8b_full_alpaca_e3.py
│ │ │ └── internlm2_1_8b_qlora_alpaca_e3.py
│ │ ├── internlm2_20b/
│ │ │ ├── internlm2_20b_full_finetune_custom_dataset_e1.py
│ │ │ ├── internlm2_20b_qlora_alpaca_e3.py
│ │ │ ├── internlm2_20b_qlora_arxiv_gentitle_e3.py
│ │ │ ├── internlm2_20b_qlora_code_alpaca_e3.py
│ │ │ ├── internlm2_20b_qlora_colorist_e5.py
│ │ │ ├── internlm2_20b_qlora_lawyer_e3.py
│ │ │ ├── internlm2_20b_qlora_msagent_react_e3_gpu8.py
│ │ │ ├── internlm2_20b_qlora_oasst1_512_e3.py
│ │ │ ├── internlm2_20b_qlora_oasst1_e3.py
│ │ │ └── internlm2_20b_qlora_sql_e3.py
│ │ ├── internlm2_7b/
│ │ │ ├── internlm2_7b_full_finetune_custom_dataset_e1.py
│ │ │ ├── internlm2_7b_full_finetune_custom_dataset_e1_sequence_parallel_4.py
│ │ │ ├── internlm2_7b_qlora_alpaca_e3.py
│ │ │ ├── internlm2_7b_qlora_arxiv_gentitle_e3.py
│ │ │ ├── internlm2_7b_qlora_code_alpaca_e3.py
│ │ │ ├── internlm2_7b_qlora_colorist_e5.py
│ │ │ ├── internlm2_7b_qlora_json_e3.py
│ │ │ ├── internlm2_7b_qlora_lawyer_e3.py
│ │ │ ├── internlm2_7b_qlora_msagent_react_e3_gpu8.py
│ │ │ ├── internlm2_7b_qlora_oasst1_512_e3.py
│ │ │ ├── internlm2_7b_qlora_oasst1_e3.py
│ │ │ ├── internlm2_7b_qlora_sql_e3.py
│ │ │ ├── internlm2_7b_w_internevo_dataset.py
│ │ │ ├── internlm2_7b_w_tokenized_dataset.py
│ │ │ └── internlm2_7b_w_untokenized_dataset.py
│ │ ├── internlm2_chat_1_8b/
│ │ │ ├── internlm2_chat_1_8b_full_alpaca_e3.py
│ │ │ └── internlm2_chat_1_8b_qlora_alpaca_e3.py
│ │ ├── internlm2_chat_20b/
│ │ │ ├── internlm2_chat_20b_full_finetune_custom_dataset_e1.py
│ │ │ ├── internlm2_chat_20b_qlora_alpaca_e3.py
│ │ │ ├── internlm2_chat_20b_qlora_code_alpaca_e3.py
│ │ │ ├── internlm2_chat_20b_qlora_lawyer_e3.py
│ │ │ ├── internlm2_chat_20b_qlora_oasst1_512_e3.py
│ │ │ └── internlm2_chat_20b_qlora_oasst1_e3.py
│ │ ├── internlm2_chat_7b/
│ │ │ ├── internlm2_chat_7b_full_finetune_custom_dataset_e1.py
│ │ │ ├── internlm2_chat_7b_qlora_alpaca_e3.py
│ │ │ ├── internlm2_chat_7b_qlora_code_alpaca_e3.py
│ │ │ ├── internlm2_chat_7b_qlora_lawyer_e3.py
│ │ │ ├── internlm2_chat_7b_qlora_oasst1_512_e3.py
│ │ │ └── internlm2_chat_7b_qlora_oasst1_e3.py
│ │ ├── internlm_20b/
│ │ │ ├── internlm_20b_qlora_alpaca_e3.py
│ │ │ ├── internlm_20b_qlora_alpaca_enzh_e3.py
│ │ │ ├── internlm_20b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── internlm_20b_qlora_alpaca_zh_e3.py
│ │ │ ├── internlm_20b_qlora_arxiv_gentitle_e3.py
│ │ │ ├── internlm_20b_qlora_code_alpaca_e3.py
│ │ │ ├── internlm_20b_qlora_colorist_e5.py
│ │ │ ├── internlm_20b_qlora_lawyer_e3.py
│ │ │ ├── internlm_20b_qlora_msagent_react_e3_gpu8.py
│ │ │ ├── internlm_20b_qlora_oasst1_512_e3.py
│ │ │ ├── internlm_20b_qlora_oasst1_e3.py
│ │ │ ├── internlm_20b_qlora_open_platypus_e3.py
│ │ │ └── internlm_20b_qlora_sql_e3.py
│ │ ├── internlm_7b/
│ │ │ ├── internlm_7b_full_alpaca_e3.py
│ │ │ ├── internlm_7b_full_alpaca_enzh_e3.py
│ │ │ ├── internlm_7b_full_alpaca_enzh_oasst1_e3.py
│ │ │ ├── internlm_7b_full_alpaca_zh_e3.py
│ │ │ ├── internlm_7b_full_intern_repo_dataset_template.py
│ │ │ ├── internlm_7b_full_oasst1_e3.py
│ │ │ ├── internlm_7b_qlora_alpaca_e3.py
│ │ │ ├── internlm_7b_qlora_alpaca_enzh_e3.py
│ │ │ ├── internlm_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── internlm_7b_qlora_alpaca_zh_e3.py
│ │ │ ├── internlm_7b_qlora_arxiv_gentitle_e3.py
│ │ │ ├── internlm_7b_qlora_code_alpaca_e3.py
│ │ │ ├── internlm_7b_qlora_colorist_e5.py
│ │ │ ├── internlm_7b_qlora_json_e3.py
│ │ │ ├── internlm_7b_qlora_lawyer_e3.py
│ │ │ ├── internlm_7b_qlora_medical_e1.py
│ │ │ ├── internlm_7b_qlora_moss_sft_all_e1.py
│ │ │ ├── internlm_7b_qlora_moss_sft_all_e2_gpu8.py
│ │ │ ├── internlm_7b_qlora_moss_sft_plugins_e1.py
│ │ │ ├── internlm_7b_qlora_msagent_react_e3_gpu8.py
│ │ │ ├── internlm_7b_qlora_oasst1_512_e3.py
│ │ │ ├── internlm_7b_qlora_oasst1_e3.py
│ │ │ ├── internlm_7b_qlora_oasst1_e3_hf.py
│ │ │ ├── internlm_7b_qlora_oasst1_mmlu_e3.py
│ │ │ ├── internlm_7b_qlora_open_platypus_e3.py
│ │ │ ├── internlm_7b_qlora_openorca_e1.py
│ │ │ ├── internlm_7b_qlora_sql_e3.py
│ │ │ └── internlm_7b_qlora_tiny_codes_e1.py
│ │ ├── internlm_chat_20b/
│ │ │ ├── internlm_chat_20b_qlora_alpaca_e3.py
│ │ │ ├── internlm_chat_20b_qlora_alpaca_enzh_e3.py
│ │ │ ├── internlm_chat_20b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── internlm_chat_20b_qlora_alpaca_zh_e3.py
│ │ │ ├── internlm_chat_20b_qlora_code_alpaca_e3.py
│ │ │ ├── internlm_chat_20b_qlora_lawyer_e3.py
│ │ │ ├── internlm_chat_20b_qlora_oasst1_512_e3.py
│ │ │ ├── internlm_chat_20b_qlora_oasst1_e3.py
│ │ │ └── internlm_chat_20b_qlora_open_platypus_e3.py
│ │ └── internlm_chat_7b/
│ │ ├── internlm_chat_7b_qlora_alpaca_e3.py
│ │ ├── internlm_chat_7b_qlora_alpaca_enzh_e3.py
│ │ ├── internlm_chat_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ ├── internlm_chat_7b_qlora_alpaca_zh_e3.py
│ │ ├── internlm_chat_7b_qlora_arxiv_gentitle_e3.py
│ │ ├── internlm_chat_7b_qlora_code_alpaca_e3.py
│ │ ├── internlm_chat_7b_qlora_colorist_e5.py
│ │ ├── internlm_chat_7b_qlora_lawyer_e3.py
│ │ ├── internlm_chat_7b_qlora_medical_e1.py
│ │ ├── internlm_chat_7b_qlora_oasst1_512_e3.py
│ │ ├── internlm_chat_7b_qlora_oasst1_e3.py
│ │ ├── internlm_chat_7b_qlora_open_platypus_e3.py
│ │ ├── internlm_chat_7b_qlora_openorca_e1.py
│ │ ├── internlm_chat_7b_qlora_sql_e3.py
│ │ └── internlm_chat_7b_qlora_tiny_codes_e1.py
│ ├── llama/
│ │ ├── llama2_70b/
│ │ │ ├── llama2_70b_full_wizardlm_e1.py
│ │ │ ├── llama2_70b_int8_lora_open_platypus_e1.py
│ │ │ ├── llama2_70b_int8_lora_open_platypus_e1_hf.py
│ │ │ ├── llama2_70b_qlora_open_platypus_e1.py
│ │ │ └── llama2_70b_qlora_open_platypus_e1_hf.py
│ │ ├── llama2_7b/
│ │ │ ├── llama2_7b_full_pgbooks_400iters_sp1.py
│ │ │ ├── llama2_7b_full_pgbooks_400iters_sp4.py
│ │ │ ├── llama2_7b_full_wizardlm_e1.py
│ │ │ ├── llama2_7b_qlora_alpaca_e3.py
│ │ │ ├── llama2_7b_qlora_alpaca_enzh_e3.py
│ │ │ ├── llama2_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── llama2_7b_qlora_alpaca_zh_e3.py
│ │ │ ├── llama2_7b_qlora_arxiv_gentitle_e3.py
│ │ │ ├── llama2_7b_qlora_code_alpaca_e3.py
│ │ │ ├── llama2_7b_qlora_colorist_e5.py
│ │ │ ├── llama2_7b_qlora_lawyer_e3.py
│ │ │ ├── llama2_7b_qlora_medical_e1.py
│ │ │ ├── llama2_7b_qlora_moss_sft_all_e1.py
│ │ │ ├── llama2_7b_qlora_moss_sft_all_e2_gpu8.py
│ │ │ ├── llama2_7b_qlora_moss_sft_plugins_e1.py
│ │ │ ├── llama2_7b_qlora_msagent_react_e3_gpu8.py
│ │ │ ├── llama2_7b_qlora_oasst1_512_e3.py
│ │ │ ├── llama2_7b_qlora_oasst1_e3.py
│ │ │ ├── llama2_7b_qlora_open_platypus_e3.py
│ │ │ ├── llama2_7b_qlora_openorca_e1.py
│ │ │ ├── llama2_7b_qlora_sql_e3.py
│ │ │ └── llama2_7b_qlora_tiny_codes_e1.py
│ │ ├── llama2_7b_chat/
│ │ │ ├── llama2_7b_chat_qlora_alpaca_e3.py
│ │ │ ├── llama2_7b_chat_qlora_alpaca_enzh_e3.py
│ │ │ ├── llama2_7b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── llama2_7b_chat_qlora_alpaca_zh_e3.py
│ │ │ ├── llama2_7b_chat_qlora_arxiv_gentitle_e3.py
│ │ │ ├── llama2_7b_chat_qlora_code_alpaca_e3.py
│ │ │ ├── llama2_7b_chat_qlora_colorist_e5.py
│ │ │ ├── llama2_7b_chat_qlora_lawyer_e3.py
│ │ │ ├── llama2_7b_chat_qlora_medical_e1.py
│ │ │ ├── llama2_7b_chat_qlora_oasst1_512_e3.py
│ │ │ ├── llama2_7b_chat_qlora_oasst1_e3.py
│ │ │ ├── llama2_7b_chat_qlora_open_platypus_e3.py
│ │ │ ├── llama2_7b_chat_qlora_openorca_e1.py
│ │ │ ├── llama2_7b_chat_qlora_sql_e3.py
│ │ │ └── llama2_7b_chat_qlora_tiny_codes_e1.py
│ │ ├── llama3_70b_instruct/
│ │ │ └── llama3_70b_instruct_qlora_alpaca_e3_2k_gpu8.py
│ │ ├── llama3_8b/
│ │ │ ├── README.md
│ │ │ └── llama3_8b_full_alpaca_e3.py
│ │ ├── llama3_8b_instruct/
│ │ │ ├── llama3_8b_instruct_full_alpaca_e3.py
│ │ │ └── llama3_8b_instruct_qlora_alpaca_e3.py
│ │ └── llama_7b/
│ │ ├── llama_7b_qlora_alpaca_e3.py
│ │ ├── llama_7b_qlora_alpaca_enzh_e3.py
│ │ ├── llama_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ ├── llama_7b_qlora_alpaca_zh_e3.py
│ │ ├── llama_7b_qlora_arxiv_gentitle_e3.py
│ │ ├── llama_7b_qlora_code_alpaca_e3.py
│ │ ├── llama_7b_qlora_colorist_e5.py
│ │ ├── llama_7b_qlora_lawyer_e3.py
│ │ ├── llama_7b_qlora_medical_e1.py
│ │ ├── llama_7b_qlora_moss_sft_all_e1.py
│ │ ├── llama_7b_qlora_moss_sft_all_e2_gpu8.py
│ │ ├── llama_7b_qlora_moss_sft_plugins_e1.py
│ │ ├── llama_7b_qlora_oasst1_512_e3.py
│ │ ├── llama_7b_qlora_oasst1_e3.py
│ │ ├── llama_7b_qlora_open_platypus_e3.py
│ │ ├── llama_7b_qlora_openorca_e1.py
│ │ ├── llama_7b_qlora_sql_e3.py
│ │ └── llama_7b_qlora_tiny_codes_e1.py
│ ├── llama_speed_benchmark/
│ │ ├── llama2_70b/
│ │ │ ├── llama2_70b_full_alpaca_enzh_128k_sp8.py
│ │ │ ├── llama2_70b_full_alpaca_enzh_256k_sp16.py
│ │ │ ├── llama2_70b_full_alpaca_enzh_32k_sp4.py
│ │ │ └── llama2_70b_full_alpaca_enzh_8k_sp1.py
│ │ ├── llama2_7b/
│ │ │ ├── llama2_7b_full_alpaca_enzh_128k_sp8.py
│ │ │ ├── llama2_7b_full_alpaca_enzh_1M_sp16.py
│ │ │ ├── llama2_7b_full_alpaca_enzh_256k_sp8.py
│ │ │ ├── llama2_7b_full_alpaca_enzh_32k_sp1.py
│ │ │ └── llama2_7b_full_alpaca_enzh_8k_sp1.py
│ │ └── yi_34b/
│ │ ├── yi_34b_200k_full_alpaca_enzh_128k_sp8.py
│ │ ├── yi_34b_200k_full_alpaca_enzh_256k_sp8.py
│ │ ├── yi_34b_200k_full_alpaca_enzh_32k_sp2.py
│ │ └── yi_34b_200k_full_alpaca_enzh_8k_sp1.py
│ ├── llava/
│ │ ├── README.md
│ │ ├── README_zh-CN.md
│ │ ├── internlm2_chat_1_8b_clip_vit_large_p14_336/
│ │ │ ├── finetune/
│ │ │ │ └── llava_internlm2_chat_1_8b_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ └── pretrain/
│ │ │ └── llava_internlm2_chat_1_8b_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ ├── internlm2_chat_20b_clip_vit_large_p14_336/
│ │ │ ├── finetune/
│ │ │ │ ├── llava_internlm2_chat_20b_clip_vit_large_p14_336_e1_gpu8_finetune.py
│ │ │ │ └── llava_internlm2_chat_20b_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ └── pretrain/
│ │ │ └── llava_internlm2_chat_20b_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ ├── internlm2_chat_7b_clip_vit_large_p14_336/
│ │ │ ├── finetune/
│ │ │ │ ├── llava_internlm2_chat_7b_clip_vit_large_p14_336_e1_gpu8_finetune.py
│ │ │ │ └── llava_internlm2_chat_7b_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ └── pretrain/
│ │ │ └── llava_internlm2_chat_7b_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ ├── internlm_chat_7b_clip_vit_large_p14_336/
│ │ │ ├── finetune/
│ │ │ │ └── llava_internlm_chat_7b_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ └── pretrain/
│ │ │ └── llava_internlm_chat_7b_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ ├── llama3_70b_instruct_clip_vit_large_p14_336/
│ │ │ └── pretrain/
│ │ │ └── llava_llama3_70b_instruct_quant_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ ├── llama3_8b_instruct_clip_vit_large_p14_336/
│ │ │ ├── README.md
│ │ │ ├── convert_xtuner_weights_to_hf.py
│ │ │ ├── convert_xtuner_weights_to_llava.py
│ │ │ ├── finetune/
│ │ │ │ ├── llava_llama3_8b_instruct_full_clip_vit_large_p14_336_e1_gpu8_finetune.py
│ │ │ │ ├── llava_llama3_8b_instruct_full_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ │ ├── llava_llama3_8b_instruct_full_clip_vit_large_p14_336_lora_e1_gpu8_internvl_finetune.py
│ │ │ │ └── llava_llama3_8b_instruct_qlora_clip_vit_large_p14_336_e1_gpu1_finetune.py
│ │ │ └── pretrain/
│ │ │ ├── llava_llama3_8b_instruct_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ │ ├── llava_llama3_8b_instruct_clip_vit_large_p14_336_e1_gpu8_sharegpt4v_pretrain.py
│ │ │ └── llava_llama3_8b_instruct_quant_clip_vit_large_p14_336_e1_gpu1_pretrain.py
│ │ ├── official/
│ │ │ ├── llava_v15_13b/
│ │ │ │ ├── llava_v15_13b_finetune.py
│ │ │ │ ├── llava_v15_13b_finetune_lora.py
│ │ │ │ └── llava_v15_13b_pretrain.py
│ │ │ └── llava_v15_7b/
│ │ │ ├── llava_v15_7b_finetune.py
│ │ │ ├── llava_v15_7b_finetune_lora.py
│ │ │ └── llava_v15_7b_pretrain.py
│ │ ├── phi3_mini_4k_instruct_clip_vit_large_p14_336/
│ │ │ ├── README.md
│ │ │ ├── convert_phi_to_llama.py
│ │ │ ├── convert_xtuner_weights_to_hf.py
│ │ │ ├── convert_xtuner_weights_to_llava.py
│ │ │ ├── finetune/
│ │ │ │ ├── llava_phi3_mini_4k_instruct_full_clip_vit_large_p14_336_e1_gpu8_finetune.py
│ │ │ │ └── llava_phi3_mini_4k_instruct_full_clip_vit_large_p14_336_full_e2_gpu8_internvl_finetune.py
│ │ │ └── pretrain/
│ │ │ ├── llava_phi3_mini_4k_instruct_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ │ └── llava_phi3_mini_4k_instruct_clip_vit_large_p14_336_e1_gpu8_sharegpt4v_pretrain.py
│ │ ├── vicuna_13b_v15_clip_vit_large_p14_336/
│ │ │ ├── finetune/
│ │ │ │ └── llava_vicuna_13b_v15_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ └── pretrain/
│ │ │ └── llava_vicuna_13b_v15_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ │ └── vicuna_7b_v15_clip_vit_large_p14_336/
│ │ ├── finetune/
│ │ │ ├── llava_vicuna_7b_v15_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune.py
│ │ │ └── llava_vicuna_7b_v15_qlora_clip_vit_large_p14_336_lora_e1_gpu8_finetune_refcoco.py
│ │ └── pretrain/
│ │ └── llava_vicuna_7b_v15_clip_vit_large_p14_336_e1_gpu8_pretrain.py
│ ├── mistral/
│ │ ├── mistral_7b_full_finetune_custom_dataset_e1.py
│ │ ├── mistral_7b_qlora_skypile_pretrain_e1.py
│ │ ├── mistral_7b_w_tokenized_dataset.py
│ │ └── mistral_7b_w_untokenized_dataset.py
│ ├── mixtral/
│ │ ├── README.md
│ │ ├── mixtral_8x7b/
│ │ │ ├── mixtral_8x7b_full_oasst1_e3.py
│ │ │ └── mixtral_8x7b_qlora_oasst1_e3.py
│ │ └── mixtral_8x7b_instruct/
│ │ ├── mixtral_8x7b_instruct_full_oasst1_e3.py
│ │ └── mixtral_8x7b_instruct_qlora_oasst1_e3.py
│ ├── orpo/
│ │ ├── internlm/
│ │ │ ├── internlm2_chat_1_8b_orpo_full.py
│ │ │ ├── internlm2_chat_1_8b_orpo_full_varlenattn.py
│ │ │ ├── internlm2_chat_1_8b_orpo_full_varlenattn_jsonl_dataset.py
│ │ │ └── internlm2_chat_7b_orpo_qlora_varlenattn_ultrafeedback_e5.py
│ │ └── llama/
│ │ └── llama3_8b_instruct_orpo_qlora_varlenattn_ultrafeedback_e5.py
│ ├── phi/
│ │ └── phi3/
│ │ ├── phi3_mini_128k_instruct_full_alpaca_e3.py
│ │ ├── phi3_mini_128k_instruct_qlora_alpaca_e3.py
│ │ ├── phi3_mini_4k_instruct_full_alpaca_e3.py
│ │ └── phi3_mini_4k_instruct_qlora_alpaca_e3.py
│ ├── qwen/
│ │ ├── qwen1/
│ │ │ ├── qwen_1_8b/
│ │ │ │ ├── qwen_1_8b_qlora_alpaca_e3.py
│ │ │ │ ├── qwen_1_8b_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── qwen_1_8b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── qwen_1_8b_qlora_alpaca_zh_e3.py
│ │ │ │ └── qwen_1_8b_qlora_code_alpaca_e3.py
│ │ │ ├── qwen_1_8b_chat/
│ │ │ │ ├── qwen_1_8b_chat_qlora_alpaca_e3.py
│ │ │ │ ├── qwen_1_8b_chat_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── qwen_1_8b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── qwen_1_8b_chat_qlora_alpaca_zh_e3.py
│ │ │ │ └── qwen_1_8b_chat_qlora_code_alpaca_e3.py
│ │ │ ├── qwen_72b/
│ │ │ │ ├── qwen_72b_qlora_alpaca_e3.py
│ │ │ │ ├── qwen_72b_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── qwen_72b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── qwen_72b_qlora_alpaca_zh_e3.py
│ │ │ │ └── qwen_72b_qlora_code_alpaca_e3.py
│ │ │ ├── qwen_7b/
│ │ │ │ ├── qwen_7b_qlora_alpaca_e3.py
│ │ │ │ ├── qwen_7b_qlora_alpaca_enzh_e3.py
│ │ │ │ ├── qwen_7b_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ │ ├── qwen_7b_qlora_alpaca_zh_e3.py
│ │ │ │ ├── qwen_7b_qlora_arxiv_gentitle_e3.py
│ │ │ │ ├── qwen_7b_qlora_code_alpaca_e3.py
│ │ │ │ ├── qwen_7b_qlora_colorist_e5.py
│ │ │ │ ├── qwen_7b_qlora_lawyer_e3.py
│ │ │ │ ├── qwen_7b_qlora_medical_e1.py
│ │ │ │ ├── qwen_7b_qlora_moss_sft_all_e1.py
│ │ │ │ ├── qwen_7b_qlora_moss_sft_all_e2_gpu8.py
│ │ │ │ ├── qwen_7b_qlora_moss_sft_plugins_e1.py
│ │ │ │ ├── qwen_7b_qlora_oasst1_512_e3.py
│ │ │ │ ├── qwen_7b_qlora_oasst1_e3.py
│ │ │ │ ├── qwen_7b_qlora_open_platypus_e3.py
│ │ │ │ ├── qwen_7b_qlora_openorca_e1.py
│ │ │ │ ├── qwen_7b_qlora_sql_e3.py
│ │ │ │ └── qwen_7b_qlora_tiny_codes_e1.py
│ │ │ └── qwen_7b_chat/
│ │ │ ├── qwen_7b_chat_qlora_alpaca_e3.py
│ │ │ ├── qwen_7b_chat_qlora_alpaca_enzh_e3.py
│ │ │ ├── qwen_7b_chat_qlora_alpaca_enzh_oasst1_e3.py
│ │ │ ├── qwen_7b_chat_qlora_alpaca_zh_e3.py
│ │ │ ├── qwen_7b_chat_qlora_arxiv_gentitle_e3.py
│ │ │ ├── qwen_7b_chat_qlora_code_alpaca_e3.py
│ │ │ ├── qwen_7b_chat_qlora_colorist_e5.py
│ │ │ ├── qwen_7b_chat_qlora_lawyer_e3.py
│ │ │ ├── qwen_7b_chat_qlora_medical_e1.py
│ │ │ ├── qwen_7b_chat_qlora_oasst1_512_e3.py
│ │ │ ├── qwen_7b_chat_qlora_oasst1_e3.py
│ │ │ ├── qwen_7b_chat_qlora_open_platypus_e3.py
│ │ │ ├── qwen_7b_chat_qlora_openorca_e1.py
│ │ │ ├── qwen_7b_chat_qlora_sql_e3.py
│ │ │ └── qwen_7b_chat_qlora_tiny_codes_e1.py
│ │ └── qwen1_5/
│ │ ├── qwen1_5_0_5b/
│ │ │ ├── qwen1_5_0_5b_full_alpaca_e3.py
│ │ │ └── qwen1_5_0_5b_qlora_alpaca_e3.py
│ │ ├── qwen1_5_0_5b_chat/
│ │ │ ├── qwen1_5_0_5b_chat_full_alpaca_e3.py
│ │ │ └── qwen1_5_0_5b_chat_qlora_alpaca_e3.py
│ │ ├── qwen1_5_110b/
│ │ │ ├── qwen1_5_110b_full_alpaca_e3.py
│ │ │ └── qwen1_5_110b_qlora_alpaca_e3.py
│ │ ├── qwen1_5_110b_chat/
│ │ │ ├── README.md
│ │ │ ├── qwen1_5_110b_chat_full_alpaca_e3.py
│ │ │ ├── qwen1_5_110b_chat_qlora_alpaca_e3.py
│ │ │ └── qwen1_5_110b_chat_qlora_alpaca_e3_16k_2gpus.py
│ │ ├── qwen1_5_14b/
│ │ │ ├── qwen1_5_14b_full_alpaca_e3.py
│ │ │ └── qwen1_5_14b_qlora_alpaca_e3.py
│ │ ├── qwen1_5_14b_chat/
│ │ │ ├── qwen1_5_14b_chat_full_alpaca_e3.py
│ │ │ └── qwen1_5_14b_chat_qlora_alpaca_e3.py
│ │ ├── qwen1_5_1_8b/
│ │ │ ├── qwen1_5_1_8b_full_alpaca_e3.py
│ │ │ └── qwen1_5_1_8b_qlora_alpaca_e3.py
│ │ ├── qwen1_5_1_8b_chat/
│ │ │ ├── qwen1_5_1_8b_chat_full_alpaca_e3.py
│ │ │ └── qwen1_5_1_8b_chat_qlora_alpaca_e3.py
│ │ ├── qwen1_5_4b/
│ │ │ ├── qwen1_5_4b_full_alpaca_e3.py
│ │ │ └── qwen1_5_4b_qlora_alpaca_e3.py
│ │ ├── qwen1_5_4b_chat/
│ │ │ ├── qwen1_5_4b_chat_full_alpaca_e3.py
│ │ │ └── qwen1_5_4b_chat_qlora_alpaca_e3.py
│ │ ├── qwen1_5_72b/
│ │ │ ├── qwen1_5_72b_full_alpaca_e3.py
│ │ │ └── qwen1_5_72b_qlora_alpaca_e3.py
│ │ ├── qwen1_5_72b_chat/
│ │ │ ├── qwen1_5_72b_chat_full_alpaca_e3.py
│ │ │ └── qwen1_5_72b_chat_qlora_alpaca_e3.py
│ │ ├── qwen1_5_7b/
│ │ │ ├── qwen1_5_7b_full_alpaca_e3.py
│ │ │ └── qwen1_5_7b_qlora_alpaca_e3.py
│ │ └── qwen1_5_7b_chat/
│ │ ├── qwen1_5_7b_chat_full_alpaca_e3.py
│ │ └── qwen1_5_7b_chat_qlora_alpaca_e3.py
│ ├── qwen_moe/
│ │ └── qwen1_5/
│ │ └── qwen1_5_moe_a2_7_b_chat/
│ │ └── qwen1_5_moe_a2_7_b_chat_full_alpaca_e3.py
│ ├── reward_model/
│ │ ├── internlm/
│ │ │ ├── internlm2_chat_1_8b_reward_full_ultrafeedback.py
│ │ │ ├── internlm2_chat_1_8b_reward_full_varlenattn_jsonl_dataset.py
│ │ │ ├── internlm2_chat_1_8b_reward_full_varlenattn_ultrafeedback.py
│ │ │ └── internlm2_chat_1_8b_reward_qlora_varlenattn_ultrafeedback.py
│ │ └── llama/
│ │ └── llama3_8b_instruct_reward_full_varlenattn_ultrafeedback.py
│ ├── starcoder/
│ │ └── starcoder_qlora_stack_exchange_example.py
│ ├── yi/
│ │ ├── yi_34b/
│ │ │ └── yi_34b_qlora_alpaca_enzh_e3.py
│ │ └── yi_6b/
│ │ └── yi_6b_qlora_alpaca_enzh_e3.py
│ └── zephyr/
│ └── zephyr_7b_beta_qlora_alpaca_e3.py
├── dataset/
│ ├── __init__.py
│ ├── collate_fns/
│ │ ├── __init__.py
│ │ ├── default_collate_fn.py
│ │ ├── mmlu_collate_fn.py
│ │ └── preference_collate_fn.py
│ ├── concat_dataset.py
│ ├── huggingface.py
│ ├── intern_repo.py
│ ├── json_dataset.py
│ ├── llava.py
│ ├── map_fns/
│ │ ├── __init__.py
│ │ ├── dataset_map_fns/
│ │ │ ├── __init__.py
│ │ │ ├── alpaca_map_fn.py
│ │ │ ├── alpaca_zh_map_fn.py
│ │ │ ├── arxiv_map_fn.py
│ │ │ ├── code_alpaca_map_fn.py
│ │ │ ├── colors_map_fn.py
│ │ │ ├── crime_kg_assitant_map_fn.py
│ │ │ ├── default_map_fn.py
│ │ │ ├── law_reference_map_fn.py
│ │ │ ├── llava_map_fn.py
│ │ │ ├── medical_map_fn.py
│ │ │ ├── msagent_map_fn.py
│ │ │ ├── oasst1_map_fn.py
│ │ │ ├── openai_map_fn.py
│ │ │ ├── openorca_map_fn.py
│ │ │ ├── pretrain_map_fn.py
│ │ │ ├── sql_map_fn.py
│ │ │ ├── stack_exchange_map_fn.py
│ │ │ ├── tiny_codes_map_fn.py
│ │ │ └── wizardlm_map_fn.py
│ │ └── template_map_fn.py
│ ├── modelscope.py
│ ├── moss_sft.py
│ ├── preference_dataset.py
│ ├── refcoco_json.py
│ ├── samplers/
│ │ ├── __init__.py
│ │ ├── intern_repo.py
│ │ └── length_grouped.py
│ └── utils.py
├── engine/
│ ├── __init__.py
│ ├── _strategy/
│ │ ├── __init__.py
│ │ └── deepspeed.py
│ ├── hooks/
│ │ ├── __init__.py
│ │ ├── dataset_info_hook.py
│ │ ├── evaluate_chat_hook.py
│ │ ├── hf_checkpoint_hook.py
│ │ ├── throughput_hook.py
│ │ └── varlen_attn_args_to_messagehub_hook.py
│ └── runner/
│ ├── __init__.py
│ └── loops.py
├── entry_point.py
├── evaluation/
│ ├── __init__.py
│ └── metrics/
│ ├── __init__.py
│ ├── mmlu_metric.py
│ └── reward_metric.py
├── model/
│ ├── __init__.py
│ ├── dpo.py
│ ├── llava.py
│ ├── modules/
│ │ ├── __init__.py
│ │ ├── dispatch/
│ │ │ ├── __init__.py
│ │ │ ├── attention.py
│ │ │ ├── baichuan.py
│ │ │ ├── cohere.py
│ │ │ ├── deepseek_v2.py
│ │ │ ├── internlm.py
│ │ │ ├── internlm2.py
│ │ │ ├── llama.py
│ │ │ ├── mistral.py
│ │ │ ├── phi3.py
│ │ │ ├── qwen2.py
│ │ │ ├── triton_kernels/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── layer_norm.py
│ │ │ │ ├── rms_norm.py
│ │ │ │ └── rotary.py
│ │ │ ├── utils.py
│ │ │ └── yi.py
│ │ └── projector/
│ │ ├── __init__.py
│ │ ├── configuration_projector.py
│ │ └── modeling_projector.py
│ ├── orpo.py
│ ├── reward.py
│ ├── sft.py
│ ├── transformers_models/
│ │ ├── __init__.py
│ │ ├── deepseek_v2/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_deepseek.py
│ │ │ ├── modeling_deepseek.py
│ │ │ └── tokenization_deepseek_fast.py
│ │ └── mixtral/
│ │ ├── __init__.py
│ │ ├── configuration_mixtral.py
│ │ └── modeling_mixtral.py
│ └── utils.py
├── parallel/
│ ├── __init__.py
│ └── sequence/
│ ├── __init__.py
│ ├── attention.py
│ ├── comm.py
│ ├── data_collate.py
│ ├── reduce_loss.py
│ ├── sampler.py
│ └── setup_distributed.py
├── registry.py
├── tools/
│ ├── chat.py
│ ├── check_custom_dataset.py
│ ├── copy_cfg.py
│ ├── data_preprocess/
│ │ ├── arxiv.py
│ │ └── convert_refcoco.py
│ ├── eval_refcoco.py
│ ├── get_data_order.py
│ ├── list_cfg.py
│ ├── list_dataset_format.py
│ ├── log_dataset.py
│ ├── mmbench.py
│ ├── model_converters/
│ │ ├── merge.py
│ │ ├── modeling_internlm2_reward/
│ │ │ ├── __init__.py
│ │ │ ├── configuration_internlm2.py
│ │ │ └── modeling_internlm2.py
│ │ ├── pth_to_hf.py
│ │ └── split.py
│ ├── plugins/
│ │ ├── __init__.py
│ │ ├── api.py
│ │ ├── calculate.py
│ │ ├── search.py
│ │ └── solve.py
│ ├── process_untokenized_datasets.py
│ ├── process_untokenized_datasets_legacy.py
│ ├── process_untokenized_llava_data.py
│ ├── test.py
│ ├── tokenize_ftdp_datasets.py
│ ├── train.py
│ └── utils.py
├── utils/
│ ├── __init__.py
│ ├── constants.py
│ ├── fileio.py
│ ├── handle_moe_load_and_save.py
│ ├── stop_criteria.py
│ ├── templates.py
│ └── zero_to_any_dtype.py
└── version.py
Showing preview only (393K chars total). Download the full file or copy to clipboard to get everything.
SYMBOL INDEX (4565 symbols across 509 files)
FILE: llava-train_videochat/llava/conversation.py
class SeparatorStyle (line 11) | class SeparatorStyle(Enum):
class Conversation (line 26) | class Conversation:
method get_prompt (line 47) | def get_prompt(self):
method append_message (line 178) | def append_message(self, role, message):
method process_image (line 181) | def process_image(self, image, image_process_mode, return_pil=False, i...
method get_images (line 227) | def get_images(self, return_pil=False, return_path=False):
method is_image_file (line 242) | def is_image_file(self, filename):
method is_video_file (line 246) | def is_video_file(self, filename):
method to_gradio_chatbot (line 250) | def to_gradio_chatbot(self):
method copy (line 288) | def copy(self):
method dict (line 291) | def dict(self):
FILE: llava-train_videochat/llava/dist_utils.py
function get_rank (line 16) | def get_rank() -> int:
function get_world_size (line 23) | def get_world_size() -> int:
function setup_for_distributed (line 30) | def setup_for_distributed(is_master):
function init_distributed_mode (line 44) | def init_distributed_mode(use_dynamic_port: bool = True):
FILE: llava-train_videochat/llava/mm_utils.py
function resize_and_center_crop (line 12) | def resize_and_center_crop(image, shortest_edge_length):
function auto_pad_images (line 33) | def auto_pad_images(image, grid_params):
function extract_patches (line 63) | def extract_patches(image, patch_size, overlap_ratio):
function process_highres_image_crop_split (line 87) | def process_highres_image_crop_split(image, data_args, processor=None):
function process_highres_image (line 98) | def process_highres_image(image, processor, grid_pinpoints):
function select_best_resolution (line 119) | def select_best_resolution(original_size, possible_resolutions, max_reso...
function resize_and_pad_image (line 157) | def resize_and_pad_image(image, target_resolution):
function divide_to_patches (line 196) | def divide_to_patches(image, patch_size):
function get_anyres_image_grid_shape (line 218) | def get_anyres_image_grid_shape(image_size, grid_pinpoints, patch_size, ...
function process_anyres_image (line 251) | def process_anyres_image(image, processor, grid_pinpoints):
function process_anyres_image_nopad (line 306) | def process_anyres_image_nopad(image, processor, grid_pinpoints):
function process_anyres_video_nopad (line 365) | def process_anyres_video_nopad(video, processor, grid_pinpoints, max_res...
function load_image_from_base64 (line 410) | def load_image_from_base64(image):
function expand2square (line 414) | def expand2square(pil_img, background_color):
function process_images (line 428) | def process_images(images, image_processor, model_cfg):
function tokenizer_image_token (line 460) | def tokenizer_image_token(prompt, tokenizer, image_token_index=IMAGE_TOK...
function get_model_name_from_path (line 482) | def get_model_name_from_path(model_path):
class KeywordsStoppingCriteria (line 491) | class KeywordsStoppingCriteria(StoppingCriteria):
method __init__ (line 492) | def __init__(self, keywords, tokenizer, input_ids):
method __call__ (line 503) | def __call__(self, output_ids: torch.LongTensor, scores: torch.FloatTe...
FILE: llava-train_videochat/llava/model/apply_delta.py
function apply_delta (line 14) | def apply_delta(base_model_path, target_model_path, delta_path):
FILE: llava-train_videochat/llava/model/builder.py
function load_pretrained_model (line 27) | def load_pretrained_model(model_path, model_base, model_name, load_8bit=...
FILE: llava-train_videochat/llava/model/consolidate.py
function consolidate_ckpt (line 14) | def consolidate_ckpt(src_path, dst_path):
FILE: llava-train_videochat/llava/model/language_model/llava_qwen.py
class LlavaQwenConfig (line 35) | class LlavaQwenConfig(Qwen2Config):
class LlavaQwenModel (line 39) | class LlavaQwenModel(LlavaMetaModel, Qwen2Model):
method __init__ (line 42) | def __init__(self, config: Qwen2Config):
class LlavaQwenForCausalLM (line 46) | class LlavaQwenForCausalLM(Qwen2ForCausalLM, LlavaMetaForCausalLM):
method __init__ (line 49) | def __init__(self, config):
method get_model (line 60) | def get_model(self):
method forward (line 63) | def forward(
method generate (line 119) | def generate(
method prepare_inputs_for_generation (line 139) | def prepare_inputs_for_generation(self, input_ids, past_key_values=Non...
FILE: llava-train_videochat/llava/model/language_model/llava_qwen_flash.py
class LlavaQwenConfig_Flash (line 35) | class LlavaQwenConfig_Flash(Qwen2Config):
class LlavaQwenModel_Flash (line 39) | class LlavaQwenModel_Flash(LlavaMetaModel, Qwen2Model_Flash):
method __init__ (line 42) | def __init__(self, config: Qwen2Config):
class LlavaQwenForCausalLM_Flash (line 46) | class LlavaQwenForCausalLM_Flash(Qwen2ForCausalLM_Flash, LlavaMetaForCau...
method __init__ (line 49) | def __init__(self, config):
method get_model (line 60) | def get_model(self):
method forward (line 63) | def forward(
method generate (line 120) | def generate(
method prepare_inputs_for_generation (line 143) | def prepare_inputs_for_generation(self, input_ids, past_key_values=Non...
FILE: llava-train_videochat/llava/model/language_model/modeling_qwen2_flash.py
function _get_unpad_data (line 70) | def _get_unpad_data(attention_mask):
class Qwen2RMSNorm (line 83) | class Qwen2RMSNorm(nn.Module):
method __init__ (line 84) | def __init__(self, hidden_size, eps=1e-6):
method forward (line 92) | def forward(self, hidden_states):
class Qwen2RotaryEmbedding (line 101) | class Qwen2RotaryEmbedding(nn.Module):
method __init__ (line 102) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
method _set_cos_sin_cache (line 116) | def _set_cos_sin_cache(self, seq_len, device, dtype):
method forward (line 126) | def forward(self, x, seq_len=None):
function rotate_half (line 138) | def rotate_half(x):
function apply_rotary_pos_emb (line 146) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1):
class Qwen2MLP (line 175) | class Qwen2MLP(nn.Module):
method __init__ (line 176) | def __init__(self, config):
method forward (line 186) | def forward(self, x):
function repeat_kv (line 191) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
class Qwen2Attention (line 203) | class Qwen2Attention(nn.Module):
method __init__ (line 209) | def __init__(self, config: Qwen2Config, layer_idx: Optional[int] = None):
method forward (line 246) | def forward(
class Qwen2FlashAttention2 (line 328) | class Qwen2FlashAttention2(Qwen2Attention):
method __init__ (line 338) | def __init__(self, *args, **kwargs):
method forward (line 346) | def forward(
method _flash_attention_forward (line 482) | def _flash_attention_forward(
method _upad_input (line 587) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m...
class Qwen2SdpaAttention (line 631) | class Qwen2SdpaAttention(Qwen2Attention):
method forward (line 639) | def forward(
class Qwen2DecoderLayer (line 725) | class Qwen2DecoderLayer(nn.Module):
method __init__ (line 726) | def __init__(self, config: Qwen2Config, layer_idx: int):
method forward (line 741) | def forward(
class Qwen2PreTrainedModel (line 823) | class Qwen2PreTrainedModel(PreTrainedModel):
method _init_weights (line 833) | def _init_weights(self, module):
class Qwen2Model_Flash (line 919) | class Qwen2Model_Flash(Qwen2PreTrainedModel):
method __init__ (line 927) | def __init__(self, config: Qwen2Config):
method get_input_embeddings (line 944) | def get_input_embeddings(self):
method set_input_embeddings (line 947) | def set_input_embeddings(self, value):
method forward (line 951) | def forward(
method flash_rank_drop (line 1159) | def flash_rank_drop(
class Qwen2ForCausalLM_Flash (line 1382) | class Qwen2ForCausalLM_Flash(Qwen2PreTrainedModel):
method __init__ (line 1385) | def __init__(self, config):
method get_input_embeddings (line 1394) | def get_input_embeddings(self):
method set_input_embeddings (line 1397) | def set_input_embeddings(self, value):
method get_output_embeddings (line 1400) | def get_output_embeddings(self):
method set_output_embeddings (line 1403) | def set_output_embeddings(self, new_embeddings):
method set_decoder (line 1406) | def set_decoder(self, decoder):
method get_decoder (line 1409) | def get_decoder(self):
method forward (line 1414) | def forward(
method prepare_inputs_for_generation (line 1502) | def prepare_inputs_for_generation(
method _reorder_cache (line 1560) | def _reorder_cache(past_key_values, beam_idx):
FILE: llava-train_videochat/llava/model/llava_arch.py
class LlavaMetaModel (line 35) | class LlavaMetaModel:
method __init__ (line 37) | def __init__(self, config):
method get_vision_tower (line 50) | def get_vision_tower(self):
method initialize_vision_modules (line 56) | def initialize_vision_modules(self, model_args, fsdp=None):
function unpad_image (line 116) | def unpad_image(tensor, original_size, is_frame=False):
class LlavaMetaForCausalLM (line 160) | class LlavaMetaForCausalLM(ABC):
method get_model (line 163) | def get_model(self):
method get_vision_tower (line 166) | def get_vision_tower(self):
method get_4dPool (line 169) | def get_4dPool(self, image_feature):
method get_2dPool (line 194) | def get_2dPool(self, image_feature):
method encode_image (line 220) | def encode_image(self, images_list):
method encode_image_video (line 230) | def encode_image_video(self, images_list, video_idx_in_batch):
method encode_video (line 255) | def encode_video(self, images_list, video_idx_in_batch):
method encode_video_image (line 309) | def encode_video_image(self, images_list, video_idx_in_batch):
method add_token_per_frame (line 364) | def add_token_per_frame(self, image_feature):
method add_different_token_per_frame (line 373) | def add_different_token_per_frame(self, image_feature):
method prepare_inputs_labels_for_multimodal (line 377) | def prepare_inputs_labels_for_multimodal(self, input_ids, position_ids...
method initialize_vision_tokenizer (line 801) | def initialize_vision_tokenizer(self, model_args, tokenizer):
FILE: llava-train_videochat/llava/model/make_delta.py
function make_delta (line 14) | def make_delta(base_model_path, target_model_path, delta_path, hub_repo_...
FILE: llava-train_videochat/llava/model/multimodal_encoder/builder.py
function build_vision_tower (line 11) | def build_vision_tower(vision_tower_cfg, **kwargs):
FILE: llava-train_videochat/llava/model/multimodal_encoder/clip_encoder.py
class CLIPVisionTower (line 12) | class CLIPVisionTower(nn.Module):
method __init__ (line 13) | def __init__(self, vision_tower, args, delay_load=False):
method load_model (line 35) | def load_model(self, device_map=None):
method feature_select (line 46) | def feature_select(self, image_forward_outs):
method forward (line 68) | def forward(self, images):
method dummy_feature (line 82) | def dummy_feature(self):
method dtype (line 86) | def dtype(self):
method device (line 90) | def device(self):
method config (line 94) | def config(self):
method hidden_size (line 101) | def hidden_size(self):
method num_patches_per_side (line 110) | def num_patches_per_side(self):
method num_patches (line 114) | def num_patches(self):
method image_size (line 121) | def image_size(self):
class CLIPVisionTowerS2 (line 125) | class CLIPVisionTowerS2(CLIPVisionTower):
method __init__ (line 126) | def __init__(self, vision_tower, args, delay_load=False):
method load_model (line 141) | def load_model(self, device_map=None):
method forward_feature (line 155) | def forward_feature(self, images):
method forward (line 160) | def forward(self, images):
method hidden_size (line 172) | def hidden_size(self):
FILE: llava-train_videochat/llava/model/multimodal_encoder/internvideo2/flash_attention_class.py
class FlashAttention (line 10) | class FlashAttention(nn.Module):
method __init__ (line 21) | def __init__(self, softmax_scale=None, attention_dropout=0.0, device=N...
method forward (line 26) | def forward(self, qkv, key_padding_mask=None, causal=False, cu_seqlens...
FILE: llava-train_videochat/llava/model/multimodal_encoder/internvideo2/pos_embed.py
function get_3d_sincos_pos_embed (line 12) | def get_3d_sincos_pos_embed(embed_dim, grid_size, t_size, cls_token=False):
function get_2d_sincos_pos_embed (line 66) | def get_2d_sincos_pos_embed(embed_dim, grid_size, cls_token=False):
function get_1d_sincos_pos_embed (line 86) | def get_1d_sincos_pos_embed(embed_dim, t_size, cls_token=False):
function get_2d_sincos_pos_embed_from_grid (line 101) | def get_2d_sincos_pos_embed_from_grid(embed_dim, grid):
function get_1d_sincos_pos_embed_from_grid (line 116) | def get_1d_sincos_pos_embed_from_grid(embed_dim, pos):
function interpolate_pos_embed_internvideo2 (line 137) | def interpolate_pos_embed_internvideo2(checkpoint_model, model, orig_t_s...
function interpolate_pos_embed_internvideo2_new (line 227) | def interpolate_pos_embed_internvideo2_new(checkpoint_model, model, orig...
FILE: llava-train_videochat/llava/model/multimodal_encoder/internvideo2/vit_scale_clean.py
class CrossAttention (line 20) | class CrossAttention(nn.Module):
method __init__ (line 21) | def __init__(
method forward (line 52) | def forward(self, x, k=None, v=None):
class AttentiveBlock (line 85) | class AttentiveBlock(nn.Module):
method __init__ (line 87) | def __init__(self, dim, num_heads, qkv_bias=False, qk_scale=None, drop...
method forward (line 100) | def forward(self, x_q, x_kv, pos_q, pos_k, bool_masked_pos, rel_pos_bi...
class AttentionPoolingBlock (line 109) | class AttentionPoolingBlock(AttentiveBlock):
method forward (line 111) | def forward(self, x):
class RMSNorm (line 119) | class RMSNorm(nn.Module):
method __init__ (line 120) | def __init__(self, hidden_size, eps=1e-6):
method forward (line 125) | def forward(self, hidden_states):
class LayerScale (line 133) | class LayerScale(nn.Module):
method __init__ (line 134) | def __init__(self, dim, init_values=1e-5, inplace=False, force_fp32=Fa...
method forward (line 141) | def forward(self, x):
class Attention (line 151) | class Attention(nn.Module):
method __init__ (line 152) | def __init__(self, dim, num_heads=8, qkv_bias=False, attn_drop=0., pro...
method _naive_attn (line 175) | def _naive_attn(self, x):
method _flash_attn (line 197) | def _flash_attn(self, x, key_padding_mask=None, need_weights=False):
method forward (line 219) | def forward(self, x):
class Mlp (line 224) | class Mlp(nn.Module):
method __init__ (line 228) | def __init__(self, in_features, hidden_features=None, out_features=Non...
method forward (line 242) | def forward(self, x):
class Block (line 251) | class Block(nn.Module):
method __init__ (line 253) | def __init__(
method forward (line 284) | def forward(self, x, residual=None):
class PatchEmbed (line 306) | class PatchEmbed(nn.Module):
method __init__ (line 310) | def __init__(
method forward (line 334) | def forward(self, x):
class PretrainVisionTransformer_clean (line 343) | class PretrainVisionTransformer_clean(nn.Module):
method __init__ (line 344) | def __init__(
method init_pos_embed (line 453) | def init_pos_embed(self):
method _init_weights (line 476) | def _init_weights(self, m):
method fix_init_weight (line 485) | def fix_init_weight(self):
method dtype (line 494) | def dtype(self):
method get_num_layers (line 497) | def get_num_layers(self):
method no_weight_decay (line 501) | def no_weight_decay(self):
method forward (line 512) | def forward(self, x, mask=None, use_image=False):
function pretrain_internvideo2_giant_patch14_224_clean (line 572) | def pretrain_internvideo2_giant_patch14_224_clean(config):
function pretrain_internvideo2_6b_patch14_224_clean (line 608) | def pretrain_internvideo2_6b_patch14_224_clean(config):
FILE: llava-train_videochat/llava/model/multimodal_encoder/internvideo2_encoder.py
class InternVideo2ImageProcessor (line 30) | class InternVideo2ImageProcessor:
method __init__ (line 31) | def __init__(self, image_mean=(0.485, 0.456, 0.406), image_std=(0.229,...
method preprocess (line 43) | def preprocess(self, images, return_tensors, target_size=None):
class InternVideo2VisionConfig (line 69) | class InternVideo2VisionConfig:
method __init__ (line 72) | def __init__(
function build_vit (line 101) | def build_vit(config, pt_type='origin'):
class InternVideo2VisionTower (line 145) | class InternVideo2VisionTower(nn.Module):
method __init__ (line 146) | def __init__(self, vision_tower, vision_tower_cfg, delay_load=False, p...
method load_model (line 172) | def load_model(self, device_map=None):
method forward (line 182) | def forward(self, images):
method dummy_feature (line 195) | def dummy_feature(self):
method dtype (line 199) | def dtype(self):
method device (line 204) | def device(self):
method hidden_size (line 209) | def hidden_size(self):
method num_patches (line 213) | def num_patches(self):
method num_patches_per_side (line 217) | def num_patches_per_side(self):
method image_size (line 222) | def image_size(self):
FILE: llava-train_videochat/llava/model/multimodal_encoder/siglip_encoder.py
class SigLipImageProcessor (line 34) | class SigLipImageProcessor:
method __init__ (line 35) | def __init__(self, image_mean=(0.5, 0.5, 0.5), image_std=(0.5, 0.5, 0....
method preprocess (line 47) | def preprocess(self, images, return_tensors):
class SigLipVisionConfig (line 70) | class SigLipVisionConfig(PretrainedConfig):
method __init__ (line 73) | def __init__(
method from_pretrained (line 103) | def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os....
class SigLipVisionModelOutput (line 120) | class SigLipVisionModelOutput(ModelOutput):
class SigLipVisionEmbeddings (line 148) | class SigLipVisionEmbeddings(nn.Module):
method __init__ (line 149) | def __init__(self, config: SigLipVisionConfig):
method forward (line 169) | def forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor:
class SigLipAttention (line 177) | class SigLipAttention(nn.Module):
method __init__ (line 181) | def __init__(self, config):
method forward (line 197) | def forward(
class SigLipMLP (line 243) | class SigLipMLP(nn.Module):
method __init__ (line 244) | def __init__(self, config):
method forward (line 251) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
class SigLipEncoderLayer (line 259) | class SigLipEncoderLayer(nn.Module):
method __init__ (line 260) | def __init__(self, config: SigLipVisionConfig):
method forward (line 269) | def forward(
class SigLipPreTrainedModel (line 308) | class SigLipPreTrainedModel(PreTrainedModel):
method _init_weights (line 318) | def _init_weights(self, module):
class SigLipEncoder (line 324) | class SigLipEncoder(nn.Module):
method __init__ (line 333) | def __init__(self, config: SigLipVisionConfig):
method forward (line 340) | def forward(
class SigLipVisionTransformer (line 408) | class SigLipVisionTransformer(nn.Module):
method __init__ (line 409) | def __init__(self, config: SigLipVisionConfig):
method forward (line 419) | def forward(
class SigLipMultiheadAttentionPoolingHead (line 459) | class SigLipMultiheadAttentionPoolingHead(nn.Module):
method __init__ (line 462) | def __init__(self, config: SigLipVisionConfig):
method forward (line 470) | def forward(self, hidden_state):
class SigLipVisionModel (line 483) | class SigLipVisionModel(SigLipPreTrainedModel):
method __init__ (line 488) | def __init__(self, config: SigLipVisionConfig):
method get_input_embeddings (line 496) | def get_input_embeddings(self) -> nn.Module:
method forward (line 499) | def forward(
class SigLipVisionTower (line 538) | class SigLipVisionTower(nn.Module):
method __init__ (line 539) | def __init__(self, vision_tower, vision_tower_cfg, delay_load=False):
method load_model (line 563) | def load_model(self, device_map=None):
method forward (line 576) | def forward(self, images):
method dummy_feature (line 592) | def dummy_feature(self):
method dtype (line 596) | def dtype(self):
method device (line 601) | def device(self):
method hidden_size (line 606) | def hidden_size(self):
method num_patches (line 610) | def num_patches(self):
method num_patches_per_side (line 614) | def num_patches_per_side(self):
method image_size (line 619) | def image_size(self):
FILE: llava-train_videochat/llava/model/multimodal_encoder/umt/vit.py
class DropPath (line 19) | class DropPath(nn.Module):
method __init__ (line 22) | def __init__(self, drop_prob=None):
method forward (line 26) | def forward(self, x):
method extra_repr (line 29) | def extra_repr(self) -> str:
class Mlp (line 33) | class Mlp(nn.Module):
method __init__ (line 34) | def __init__(self, in_features, hidden_features=None, out_features=Non...
method forward (line 43) | def forward(self, x):
class Attention (line 51) | class Attention(nn.Module):
method __init__ (line 52) | def __init__(
method forward (line 85) | def forward(self, x):
class Block (line 117) | class Block(nn.Module):
method __init__ (line 118) | def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_sc...
method forward (line 138) | def forward(self, x):
class PatchEmbed (line 148) | class PatchEmbed(nn.Module):
method __init__ (line 151) | def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=...
method forward (line 167) | def forward(self, x, **kwargs):
function get_sinusoid_encoding_table (line 177) | def get_sinusoid_encoding_table(n_position, d_hid, ckpt_num_frame=-1, cu...
function get_sinusoid_encoding_table2 (line 211) | def get_sinusoid_encoding_table2(n_position=784, d_hid=1024, cur_frame=8...
class PretrainVisionTransformerEncoder (line 258) | class PretrainVisionTransformerEncoder(nn.Module):
method __init__ (line 261) | def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=...
method no_weight_decay (line 312) | def no_weight_decay(self):
method forward_features (line 315) | def forward_features(self, x, use_image=False):
method forward (line 336) | def forward(self, x, use_image=False):
class PretrainVisionTransformer (line 341) | class PretrainVisionTransformer(nn.Module):
method __init__ (line 344) | def __init__(self,
method _init_weights (line 400) | def _init_weights(self, m):
method no_weight_decay (line 410) | def no_weight_decay(self):
method forward (line 413) | def forward(self, x, use_image=False):
FILE: llava-train_videochat/llava/model/multimodal_encoder/umt_encoder.py
class UMTImageProcessor (line 30) | class UMTImageProcessor:
method __init__ (line 31) | def __init__(self, image_mean=(0.485, 0.456, 0.406), image_std=(0.229,...
method preprocess (line 43) | def preprocess(self, images, return_tensors, target_size=None):
class UMTVisionConfig (line 69) | class UMTVisionConfig:
method __init__ (line 72) | def __init__(
function build_vit (line 95) | def build_vit(config, pt_type='origin'):
class UMTVisionTower (line 134) | class UMTVisionTower(nn.Module):
method __init__ (line 135) | def __init__(self, vision_tower, vision_tower_cfg, delay_load=False, p...
method load_model (line 160) | def load_model(self, device_map=None):
method forward (line 170) | def forward(self, images):
method dummy_feature (line 185) | def dummy_feature(self):
method dtype (line 189) | def dtype(self):
method device (line 194) | def device(self):
method hidden_size (line 199) | def hidden_size(self):
method num_patches (line 203) | def num_patches(self):
method num_patches_per_side (line 207) | def num_patches_per_side(self):
method image_size (line 212) | def image_size(self):
FILE: llava-train_videochat/llava/model/multimodal_projector/builder.py
class IdentityMap (line 7) | class IdentityMap(nn.Module):
method __init__ (line 8) | def __init__(self):
method forward (line 11) | def forward(self, x, *args, **kwargs):
method config (line 15) | def config(self):
class SimpleResBlock (line 19) | class SimpleResBlock(nn.Module):
method __init__ (line 20) | def __init__(self, channels):
method forward (line 25) | def forward(self, x):
function build_vision_projector (line 30) | def build_vision_projector(config, delay_load=False, **kwargs):
FILE: llava-train_videochat/llava/model/multimodal_projector/tome16_mlp_hd64.py
function bipartite_soft_matching (line 16) | def bipartite_soft_matching(
function merge_wavg (line 72) | def merge_wavg(
class ToMe16_mlp_hd64 (line 91) | class ToMe16_mlp_hd64(nn.Module):
method __init__ (line 92) | def __init__(self, config, vision_cfg):
method merge_tokens (line 107) | def merge_tokens(self, x, target_num_token):
method forward (line 143) | def forward(self, x, compress=False, local_num_frames=-1):
method config (line 169) | def config(self):
FILE: llava-train_videochat/llava/model/utils.py
function auto_upgrade (line 4) | def auto_upgrade(config):
FILE: llava-train_videochat/llava/serialize_utils.py
function get_world_size (line 25) | def get_world_size() -> int:
function get_rank (line 33) | def get_rank() -> int:
function get_rank (line 40) | def get_rank() -> int:
function get_local_rank (line 47) | def get_local_rank() -> int:
function get_local_size (line 62) | def get_local_size() -> int:
function _get_global_gloo_group (line 67) | def _get_global_gloo_group():
function all_gather (line 78) | def all_gather(data, group=None):
class NumpySerializedList (line 105) | class NumpySerializedList:
method __init__ (line 106) | def __init__(self, lst: list):
method __len__ (line 122) | def __len__(self):
method __getitem__ (line 125) | def __getitem__(self, idx):
class TorchSerializedList (line 132) | class TorchSerializedList(NumpySerializedList):
method __init__ (line 133) | def __init__(self, lst: list):
method __getitem__ (line 138) | def __getitem__(self, idx):
function local_scatter (line 145) | def local_scatter(array: Optional[List[Any]]):
class TorchShmSerializedList (line 167) | class TorchShmSerializedList(TorchSerializedList):
method __init__ (line 168) | def __init__(self, lst: list):
function local_broadcast_process_authkey (line 194) | def local_broadcast_process_authkey():
FILE: llava-train_videochat/llava/train/llava_trainer.py
function maybe_zero_3 (line 27) | def maybe_zero_3(param, ignore_status=False, name=None):
function get_mm_adapter_state_maybe_zero_3 (line 42) | def get_mm_adapter_state_maybe_zero_3(named_params, keys_to_match):
function split_to_even_chunks (line 48) | def split_to_even_chunks(indices, lengths, num_chunks):
function get_variable_length_grouped_indices (line 70) | def get_variable_length_grouped_indices(lengths, batch_size, world_size,...
function get_modality_length_grouped_indices (line 86) | def get_modality_length_grouped_indices(lengths, batch_size, world_size,...
function get_length_grouped_indices (line 126) | def get_length_grouped_indices(lengths, batch_size, world_size, generato...
function get_length_grouped_indices_auto_single (line 149) | def get_length_grouped_indices_auto_single(lengths, batch_size, world_si...
function get_modality_length_grouped_indices_auto (line 164) | def get_modality_length_grouped_indices_auto(lengths, batch_size, world_...
class LengthGroupedSampler (line 193) | class LengthGroupedSampler(Sampler):
method __init__ (line 199) | def __init__(
method __len__ (line 220) | def __len__(self):
method __iter__ (line 223) | def __iter__(self):
class LLaVATrainer (line 237) | class LLaVATrainer(Trainer):
method create_accelerator_and_postprocess (line 239) | def create_accelerator_and_postprocess(self):
method _get_train_sampler (line 270) | def _get_train_sampler(self) -> Optional[torch.utils.data.Sampler]:
method get_train_dataloader (line 316) | def get_train_dataloader(self) -> DataLoader:
method create_optimizer (line 353) | def create_optimizer(self):
method _save_checkpoint (line 432) | def _save_checkpoint(self, model, trial, metrics=None):
method _save (line 456) | def _save(self, output_dir: Optional[str] = None, state_dict=None):
FILE: llava-train_videochat/llava/train/llava_trainer_eval.py
class LLaVAEvalTrainer (line 7) | class LLaVAEvalTrainer(LLaVATrainer):
method evaluate (line 8) | def evaluate(self, evaluate_args):
FILE: llava-train_videochat/llava/train/train.py
class ModelArguments (line 64) | class ModelArguments:
class DataArguments (line 135) | class DataArguments:
class TrainingArguments (line 159) | class TrainingArguments(transformers.TrainingArguments):
function maybe_zero_3 (line 208) | def maybe_zero_3(param, ignore_status=False, name=None):
function get_peft_state_maybe_zero_3 (line 224) | def get_peft_state_maybe_zero_3(named_params, bias):
function get_peft_state_non_lora_maybe_zero_3 (line 249) | def get_peft_state_non_lora_maybe_zero_3(named_params, require_grad_only...
function get_mm_adapter_state_maybe_zero_3 (line 257) | def get_mm_adapter_state_maybe_zero_3(named_params, keys_to_match):
function find_all_linear_names (line 263) | def find_all_linear_names(model):
function safe_save_model_for_hf_trainer (line 279) | def safe_save_model_for_hf_trainer(trainer: transformers.Trainer, output...
function smart_tokenizer_and_embedding_resize (line 323) | def smart_tokenizer_and_embedding_resize(
function _tokenize_fn (line 346) | def _tokenize_fn(strings: Sequence[str], tokenizer: transformers.PreTrai...
function _mask_targets (line 368) | def _mask_targets(target, tokenized_lens, speakers):
function _add_speaker_and_signal (line 379) | def _add_speaker_and_signal(header, source, get_conversation=True):
function preprocess_multimodal (line 399) | def preprocess_multimodal(sources: Sequence[str], data_args: DataArgumen...
function preprocess_llama_2 (line 431) | def preprocess_llama_2(sources, tokenizer: transformers.PreTrainedTokeni...
function preprocess_gemma (line 506) | def preprocess_gemma(sources: List[List[Dict[str, str]]], tokenizer: tra...
function preprocess_qwen (line 585) | def preprocess_qwen(sources, tokenizer: transformers.PreTrainedTokenizer...
function preprocess_internlm2 (line 665) | def preprocess_internlm2(sources, tokenizer: transformers.PreTrainedToke...
function preprocess_llama3 (line 742) | def preprocess_llama3(
function preprocess_v1 (line 828) | def preprocess_v1(sources, tokenizer: transformers.PreTrainedTokenizer, ...
function preprocess_mpt (line 907) | def preprocess_mpt(sources, tokenizer: transformers.PreTrainedTokenizer,...
function preprocess_plain (line 988) | def preprocess_plain(
function preprocess (line 1010) | def preprocess(sources: Sequence[str], tokenizer: transformers.PreTraine...
class LazySupervisedDataset (line 1065) | class LazySupervisedDataset(Dataset):
method __init__ (line 1066) | def __init__(self, data_path: str, tokenizer: transformers.PreTrainedT...
method __len__ (line 1257) | def __len__(self):
method lengths (line 1261) | def lengths(self):
method modality_lengths (line 1274) | def modality_lengths(self):
method process_image (line 1285) | def process_image(self, image_file, overwrite_image_aspect_ratio=None):
method process_video (line 1344) | def process_video(self, video_file, data_anno, data_args):
method __getitem__ (line 1413) | def __getitem__(self, i) -> Dict[str, torch.Tensor]:
method _get_item (line 1448) | def _get_item(self, i) -> Dict[str, torch.Tensor]:
class DataCollatorForSupervisedDataset (line 1538) | class DataCollatorForSupervisedDataset(object):
method pad_sequence (line 1543) | def pad_sequence(self, input_ids, batch_first, padding_value):
method __call__ (line 1551) | def __call__(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]:
function make_supervised_data_module (line 1588) | def make_supervised_data_module(tokenizer: transformers.PreTrainedTokeni...
function get_model (line 1595) | def get_model(model_args, training_args, bnb_model_from_pretrained_args):
function train (line 1814) | def train(attn_implementation=None):
FILE: llava-train_videochat/llava/utils.py
function process_video_with_decord (line 25) | def process_video_with_decord(video_file, data_args):
function process_video_with_pyav (line 41) | def process_video_with_pyav(video_file, data_args):
function rank0_print (line 66) | def rank0_print(*args):
function rank_print (line 74) | def rank_print(*args):
function build_logger (line 80) | def build_logger(logger_name, logger_filename):
class StreamToLogger (line 122) | class StreamToLogger(object):
method __init__ (line 127) | def __init__(self, logger, log_level=logging.INFO):
method __getattr__ (line 133) | def __getattr__(self, attr):
method write (line 136) | def write(self, buf):
method flush (line 150) | def flush(self):
function disable_torch_init (line 156) | def disable_torch_init():
function violates_moderation (line 166) | def violates_moderation(text):
function pretty_print_semaphore (line 188) | def pretty_print_semaphore(semaphore):
FILE: llava-train_videochat/llava/video_utils.py
function get_index (line 18) | def get_index(num_frames, num_segments):
function lazy_load_s3video (line 27) | def lazy_load_s3video(s3path_video, num_frames, video_start, video_end, ...
function pts_to_secs (line 71) | def pts_to_secs(pts: int, time_base: float, start_pts: int) -> float:
function get_pyav_video_duration (line 86) | def get_pyav_video_duration(video_reader):
function get_frame_indices (line 97) | def get_frame_indices(num_frames, vlen, sample='middle', fix_start=None,...
function read_frames_av (line 167) | def read_frames_av(video_path, num_frames, sample='rand', client=None, f...
function read_frames_gif (line 196) | def read_frames_gif(
function read_frames_decord (line 248) | def read_frames_decord(
function read_frames_img (line 298) | def read_frames_img(
function read_frames_fake (line 400) | def read_frames_fake(
FILE: lmms-eval_videochat/lmms_eval/__main__.py
function _handle_non_serializable (line 30) | def _handle_non_serializable(o):
function parse_eval_args (line 39) | def parse_eval_args() -> argparse.Namespace:
function cli_evaluate (line 152) | def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
function cli_evaluate_single (line 228) | def cli_evaluate_single(args: Union[argparse.Namespace, None] = None) ->...
function print_results (line 345) | def print_results(args, results):
FILE: lmms-eval_videochat/lmms_eval/api/filter.py
class Filter (line 8) | class Filter:
method __init__ (line 17) | def __init__(self, *args, **kwargs) -> None:
method apply (line 22) | def apply(self, resps, docs):
class FilterEnsemble (line 33) | class FilterEnsemble:
method apply (line 44) | def apply(self, instances: List[Instance], docs: List[Dataset]) -> None:
FILE: lmms-eval_videochat/lmms_eval/api/instance.py
class Instance (line 6) | class Instance:
method __post_init__ (line 20) | def __post_init__(self) -> None:
method args (line 25) | def args(self):
FILE: lmms-eval_videochat/lmms_eval/api/metrics.py
function bypass_agg (line 17) | def bypass_agg(arr):
function mean (line 22) | def mean(arr):
function median (line 27) | def median(arr):
function perplexity (line 34) | def perplexity(items):
function weighted_perplexity (line 41) | def weighted_perplexity(items):
function bits_per_byte (line 46) | def bits_per_byte(items):
function f1_score (line 51) | def f1_score(items):
function matthews_corrcoef (line 61) | def matthews_corrcoef(items):
function bleu (line 70) | def bleu(items):
function chrf (line 88) | def chrf(items):
function ter (line 103) | def ter(items):
function acc_fn (line 124) | def acc_fn(items): # This is a passthrough function
function acc_norm_fn (line 134) | def acc_norm_fn(items): # This is a passthrough function
function acc_mutual_info_fn (line 144) | def acc_mutual_info_fn(items): # This is a passthrough function
function exact_match_fn (line 156) | def exact_match_fn(**kwargs):
function perplexity_fn (line 166) | def perplexity_fn(items): # This is a passthrough function
function levenshtein_distance (line 170) | def levenshtein_distance(s1, s2):
function anls (line 192) | def anls(
function pop_stddev (line 216) | def pop_stddev(arr):
function sample_stddev (line 221) | def sample_stddev(arr):
function mean_stderr (line 226) | def mean_stderr(arr):
function bypass (line 236) | def bypass(items):
function mcc_fn (line 246) | def mcc_fn(items): # This is a passthrough function
function f1_fn (line 256) | def f1_fn(items): # This is a passthrough function
function bleu_fn (line 266) | def bleu_fn(items): # This is a passthrough function
function chrf_fn (line 276) | def chrf_fn(items): # This is a passthrough function
function ter_fn (line 286) | def ter_fn(items): # This is a passthrough function
function acc_all (line 296) | def acc_all(items):
function acc_all_stderr (line 315) | def acc_all_stderr(items):
function metric_max_over_ground_truths (line 333) | def metric_max_over_ground_truths(metric_fn, prediction, ground_truths):
function weighted_mean (line 342) | def weighted_mean(items):
function is_non_str_iterable (line 347) | def is_non_str_iterable(obj):
function _sacreformat (line 351) | def _sacreformat(refs, preds):
class _bootstrap_internal (line 382) | class _bootstrap_internal:
method __init__ (line 383) | def __init__(self, f, n) -> None:
method __call__ (line 387) | def __call__(self, v):
function bootstrap_stderr (line 397) | def bootstrap_stderr(f, xs, iters):
function stderr_for_metric (line 426) | def stderr_for_metric(metric, bootstrap_iters):
FILE: lmms-eval_videochat/lmms_eval/api/model.py
class lmms (line 18) | class lmms(abc.ABC):
method __init__ (line 19) | def __init__(self) -> None:
method loglikelihood (line 31) | def loglikelihood(self, requests: List[Instance]) -> List[Tuple[float,...
method generate_until (line 59) | def generate_until(self, requests) -> List[str]:
method create_from_arg_string (line 78) | def create_from_arg_string(cls: Type[T], arg_string: str, additional_c...
method rank (line 95) | def rank(self):
method world_size (line 102) | def world_size(self):
method set_cache_hook (line 108) | def set_cache_hook(self, cache_hook) -> None:
function hash_args (line 113) | def hash_args(attr, args):
class CacheHook (line 118) | class CacheHook:
method __init__ (line 119) | def __init__(self, cachinglm) -> None:
method add_partial (line 126) | def add_partial(self, attr, req, res) -> None:
class CachingLMM (line 133) | class CachingLMM:
method __init__ (line 134) | def __init__(self, lm, cache_db) -> None:
method __getattr__ (line 151) | def __getattr__(self, attr):
method get_cache_hook (line 202) | def get_cache_hook(self):
FILE: lmms-eval_videochat/lmms_eval/api/registry.py
function register_model (line 11) | def register_model(*names):
function get_model (line 27) | def get_model(model_name):
function register_task (line 41) | def register_task(name):
function register_group (line 53) | def register_group(name):
function register_metric (line 82) | def register_metric(**args):
function get_metric (line 109) | def get_metric(name: str, hf_evaluate_metric=False) -> Callable:
function register_aggregation (line 125) | def register_aggregation(name):
function get_aggregation (line 135) | def get_aggregation(name):
function get_metric_aggregation (line 144) | def get_metric_aggregation(name):
function is_higher_better (line 153) | def is_higher_better(metric_name):
FILE: lmms-eval_videochat/lmms_eval/api/samplers.py
class ContextSampler (line 1) | class ContextSampler:
method __init__ (line 2) | def __init__(self, docs, task, fewshot_indices=None, rnd=None) -> None:
method get_context (line 20) | def get_context(self, doc, num_fewshot):
method sample (line 50) | def sample(self, n):
class FirstNSampler (line 58) | class FirstNSampler(ContextSampler):
method sample (line 59) | def sample(self, n) -> None:
class BalancedSampler (line 68) | class BalancedSampler(ContextSampler):
method sample (line 69) | def sample(self, n) -> None:
class ManualSampler (line 78) | class ManualSampler(ContextSampler):
method sample (line 79) | def sample(self, n) -> None:
function get_sampler (line 90) | def get_sampler(name):
FILE: lmms-eval_videochat/lmms_eval/api/task.py
class TaskConfig (line 55) | class TaskConfig(dict):
method __post_init__ (line 103) | def __post_init__(self) -> None:
method __getitem__ (line 130) | def __getitem__(self, item):
method __setitem__ (line 133) | def __setitem__(self, item, value):
method to_dict (line 136) | def to_dict(self):
class Task (line 157) | class Task(abc.ABC):
method __init__ (line 178) | def __init__(
method download (line 215) | def download(self, data_dir=None, cache_dir=None, download_mode=None) ...
method config (line 268) | def config(self):
method has_training_docs (line 273) | def has_training_docs(self):
method has_validation_docs (line 278) | def has_validation_docs(self):
method has_test_docs (line 283) | def has_test_docs(self):
method training_docs (line 287) | def training_docs(self):
method validation_docs (line 294) | def validation_docs(self):
method test_docs (line 301) | def test_docs(self):
method fewshot_docs (line 308) | def fewshot_docs(self):
method _process_doc (line 322) | def _process_doc(self, doc):
method instances (line 334) | def instances(self):
method fewshot_examples (line 340) | def fewshot_examples(self, k, rnd):
method doc_to_decontamination_query (line 346) | def doc_to_decontamination_query(self, doc) -> None:
method doc_to_text (line 351) | def doc_to_text(self, doc):
method doc_to_target (line 355) | def doc_to_target(self, doc):
method build_all_requests (line 359) | def build_all_requests(self, limit=None, rank=None, world_size=None) -...
method construct_requests (line 399) | def construct_requests(self, doc_id, ctx, **kwargs):
method process_results (line 418) | def process_results(self, doc, results):
method aggregation (line 431) | def aggregation(self):
method higher_is_better (line 440) | def higher_is_better(self):
method count_bytes (line 449) | def count_bytes(cls, doc):
method fewshot_context (line 454) | def fewshot_context(
method apply_filters (line 504) | def apply_filters(self):
method dump_config (line 512) | def dump_config(self) -> dict:
method override_metric (line 522) | def override_metric(self, metric_name: str) -> None:
class ConfigurableTask (line 546) | class ConfigurableTask(Task):
method __init__ (line 551) | def __init__(self, model_name) -> None: # TODO no super() call here
method _prepare_model_specific_config (line 639) | def _prepare_model_specific_config(self):
method _prepare_metric_and_aggregation (line 665) | def _prepare_metric_and_aggregation(self):
method download (line 717) | def download(self, dataset_kwargs=None) -> None:
method has_training_docs (line 915) | def has_training_docs(self) -> bool:
method has_validation_docs (line 921) | def has_validation_docs(self) -> bool:
method has_test_docs (line 927) | def has_test_docs(self) -> bool:
method training_docs (line 933) | def training_docs(self) -> datasets.Dataset:
method validation_docs (line 937) | def validation_docs(self) -> datasets.Dataset:
method test_docs (line 941) | def test_docs(self) -> datasets.Dataset:
method fewshot_docs (line 945) | def fewshot_docs(self):
method fewshot_context (line 954) | def fewshot_context(self, doc_id, num_fewshot, split):
method apply_filters (line 983) | def apply_filters(self):
method should_decontaminate (line 991) | def should_decontaminate(self):
method doc_to_decontamination_query (line 994) | def doc_to_decontamination_query(self, doc):
method _process_doc (line 1007) | def _process_doc(self, doc):
method doc_to_text (line 1018) | def doc_to_text(self, doc):
method doc_to_target (line 1055) | def doc_to_target(self, doc: dict) -> Union[int, str, list]:
method doc_to_visual (line 1092) | def doc_to_visual(self, doc: dict) -> Union[int, str, list]:
method doc_to_choice (line 1109) | def doc_to_choice(self, doc: Any) -> List[str]:
method construct_requests (line 1131) | def construct_requests(self, doc_id: int, ctx: str, **kwargs) -> Union...
method process_results (line 1185) | def process_results(self, doc, results, full_docs=None):
method aggregation (line 1330) | def aggregation(self):
method higher_is_better (line 1333) | def higher_is_better(self):
FILE: lmms-eval_videochat/lmms_eval/evaluator.py
function simple_evaluate (line 35) | def simple_evaluate(
function evaluate (line 169) | def evaluate(
FILE: lmms-eval_videochat/lmms_eval/filters/__init__.py
function get_filter (line 24) | def get_filter(filter_name):
function build_filter_ensemble (line 31) | def build_filter_ensemble(filter_name, components):
FILE: lmms-eval_videochat/lmms_eval/filters/decontamination.py
class DecontaminationFilter (line 4) | class DecontaminationFilter(Filter):
method __init__ (line 11) | def __init__(self, path) -> None:
method apply (line 19) | def apply(self, resps, docs) -> None:
FILE: lmms-eval_videochat/lmms_eval/filters/extraction.py
class WhitespaceFilter (line 7) | class WhitespaceFilter(Filter):
method __init__ (line 10) | def __init__(self) -> None:
method apply (line 13) | def apply(self, resps, docs):
class RegexFilter (line 29) | class RegexFilter(Filter):
method __init__ (line 32) | def __init__(
method apply (line 47) | def apply(self, resps, docs):
class MultiChoiceRegexFilter (line 73) | class MultiChoiceRegexFilter(RegexFilter):
method __init__ (line 81) | def __init__(
method apply (line 104) | def apply(self, resps, docs):
class ExtendedRegexFilter (line 175) | class ExtendedRegexFilter(RegexFilter):
method __init__ (line 178) | def __init__(
method filter_ignores (line 192) | def filter_ignores(self, st):
method find_match (line 205) | def find_match(self, regex, resp, convert_dict={}):
class SimpleMultiChoiceRegexFilter (line 218) | class SimpleMultiChoiceRegexFilter(ExtendedRegexFilter):
method __init__ (line 219) | def __init__(self, *args, **kwargs):
method apply (line 231) | def apply(self, resps, docs):
FILE: lmms-eval_videochat/lmms_eval/filters/selection.py
class TakeFirstFilter (line 6) | class TakeFirstFilter(Filter):
method __init__ (line 7) | def __init__(self) -> None:
method apply (line 12) | def apply(self, resps, docs):
class TakeKFilter (line 19) | class TakeKFilter(Filter):
method __init__ (line 20) | def __init__(self, *args, **kwargs) -> None:
method apply (line 25) | def apply(self, resps, docs):
class MajorityVoteFilter (line 31) | class MajorityVoteFilter(Filter):
method __init__ (line 32) | def __init__(self) -> None:
method apply (line 37) | def apply(self, resps, docs):
FILE: lmms-eval_videochat/lmms_eval/filters/transformation.py
class LowercaseFilter (line 4) | class LowercaseFilter(Filter):
method __init__ (line 5) | def __init__(self) -> None:
method apply (line 8) | def apply(self, resps, docs):
class UppercaseFilter (line 15) | class UppercaseFilter(Filter):
method __init__ (line 16) | def __init__(self) -> None:
method apply (line 19) | def apply(self, resps, docs):
class MapFilter (line 26) | class MapFilter(Filter):
method __init__ (line 27) | def __init__(self, mapping_dict: dict = {}, default_value=None) -> None:
method apply (line 44) | def apply(self, resps, docs):
FILE: lmms-eval_videochat/lmms_eval/logging_utils.py
function remove_none_pattern (line 27) | def remove_none_pattern(input_string):
function _handle_non_serializable (line 40) | def _handle_non_serializable(o: Any) -> Union[int, str, list]:
function get_wandb_printer (line 59) | def get_wandb_printer() -> Literal["Printer"]:
class WandbLogger (line 69) | class WandbLogger:
method __init__ (line 70) | def __init__(self, args):
method finish (line 82) | def finish(self):
method init_run (line 86) | def init_run(self):
method post_init (line 100) | def post_init(self, results: Dict[str, Any]) -> None:
method _get_config (line 105) | def _get_config(self) -> Dict[str, Any]:
method _sanitize_results_dict (line 116) | def _sanitize_results_dict(self) -> Tuple[Dict[str, str], Dict[str, An...
method _log_results_as_table (line 156) | def _log_results_as_table(self) -> None:
method _log_results_as_artifact (line 216) | def _log_results_as_artifact(self) -> None:
method log_eval_result (line 224) | def log_eval_result(self) -> None:
method _generate_dataset (line 240) | def _generate_dataset(self, data: List[Dict[str, Any]], config: Dict[s...
method _log_samples_as_artifact (line 306) | def _log_samples_as_artifact(self, data: List[Dict[str, Any]], task_na...
method log_eval_samples (line 320) | def log_eval_samples(self, samples: Dict[str, List[Dict[str, Any]]]) -...
FILE: lmms-eval_videochat/lmms_eval/models/videochat_flash.py
class VideoChat_Flash (line 40) | class VideoChat_Flash(lmms):
method __init__ (line 45) | def __init__(
method config (line 128) | def config(self):
method tokenizer (line 133) | def tokenizer(self):
method model (line 137) | def model(self):
method eot_token_id (line 145) | def eot_token_id(self):
method max_length (line 150) | def max_length(self):
method batch_size (line 156) | def batch_size(self):
method device (line 160) | def device(self):
method rank (line 164) | def rank(self):
method world_size (line 168) | def world_size(self):
method tok_encode (line 171) | def tok_encode(self, string: str, left_truncate_len=None, add_special_...
method tok_decode (line 180) | def tok_decode(self, tokens):
method loglikelihood (line 186) | def loglikelihood(self, requests: List[Instance]) -> List[Tuple[float,...
method flatten (line 190) | def flatten(self, input):
method generate_until (line 197) | def generate_until(self, requests: List[Instance]) -> List[str]:
FILE: lmms-eval_videochat/lmms_eval/tasks/__init__.py
function register_configurable_task (line 22) | def register_configurable_task(config: Dict[str, str]) -> int:
function register_configurable_group (line 47) | def register_configurable_group(config: Dict[str, str]) -> int:
function get_task_name_from_config (line 61) | def get_task_name_from_config(task_config: Dict[str, str]) -> str:
function include_task_folder (line 68) | def include_task_folder(task_dir: str, register_task: bool = True) -> None:
function include_path (line 107) | def include_path(task_dir):
function initialize_tasks (line 114) | def initialize_tasks(verbosity="INFO"):
function get_task (line 127) | def get_task(task_name, model_name):
function get_task_name_from_object (line 136) | def get_task_name_from_object(task_object):
function get_task_dict (line 147) | def get_task_dict(task_name_list: List[Union[str, Dict, Task]], model_na...
FILE: lmms-eval_videochat/lmms_eval/tasks/_task_utils/file_utils.py
function generate_submission_file (line 4) | def generate_submission_file(file_name, args, subpath="submissions"):
FILE: lmms-eval_videochat/lmms_eval/tasks/_task_utils/video_loader.py
function get_cache_dir (line 4) | def get_cache_dir(config, sub_dir="videos"):
function _get_video_file (line 12) | def _get_video_file(prefix: str, video_name: str, suffix: str):
function get_video (line 21) | def get_video(prefix: str, video_name: str, suffix: str = "mp4"):
FILE: lmms-eval_videochat/lmms_eval/tasks/_task_utils/vqa_eval_metric.py
class EvalAIAnswerProcessor (line 4) | class EvalAIAnswerProcessor:
method __init__ (line 175) | def __init__(self, *args, **kwargs):
method word_tokenize (line 178) | def word_tokenize(self, word):
method process_punctuation (line 183) | def process_punctuation(self, in_text):
method process_digit_article (line 193) | def process_digit_article(self, in_text):
method __call__ (line 208) | def __call__(self, item):
FILE: lmms-eval_videochat/lmms_eval/tasks/longvideobench/utils.py
function timestamp_to_seconds (line 22) | def timestamp_to_seconds(timestamp):
function load_video (line 30) | def load_video(video_file, duration, max_num_frames=16):
function compute_frame_timestamps (line 50) | def compute_frame_timestamps(duration, max_num_frames=16):
function insert_subtitles_into_frames (line 57) | def insert_subtitles_into_frames(frame_timestamps, subtitles, starting_t...
function longvideobench_doc_to_text (line 119) | def longvideobench_doc_to_text(doc, lmms_eval_specific_kwargs):
function longvideobench_doc_to_visual_v (line 164) | def longvideobench_doc_to_visual_v(doc):
function longvideobench_doc_to_visual_i (line 180) | def longvideobench_doc_to_visual_i(doc):
function get_multi_choice_info (line 197) | def get_multi_choice_info(options):
function parse_multi_choice_response (line 214) | def parse_multi_choice_response(response, all_choices, index2ans):
function evaluate_longvideobench (line 275) | def evaluate_longvideobench(samples):
function eval_multi_choice (line 294) | def eval_multi_choice(gold_i, pred_i):
function calculate_ins_level_acc (line 308) | def calculate_ins_level_acc(results):
function longvideobench_process_results (line 322) | def longvideobench_process_results(doc, results):
function longvideobench_aggregate_results (line 344) | def longvideobench_aggregate_results(results):
function longvideobench_aggregate_results_for_submission (line 370) | def longvideobench_aggregate_results_for_submission(results, args):
FILE: lmms-eval_videochat/lmms_eval/tasks/lvbench/utils.py
function lvbench_mc_doc_to_visual (line 30) | def lvbench_mc_doc_to_visual(doc, lmms_eval_specific_kwargs=None):
function lvbench_mc_doc_to_text (line 51) | def lvbench_mc_doc_to_text(doc, lmms_eval_specific_kwargs=None):
function mcq_acc (line 63) | def mcq_acc(answer, pred):
function lvbench_mc_process_results (line 115) | def lvbench_mc_process_results(doc, results):
function lvbench_mc_aggregate_results (line 144) | def lvbench_mc_aggregate_results(results):
FILE: lmms-eval_videochat/lmms_eval/tasks/mlvu_mc/utils.py
function mlvu_mc_doc_to_visual (line 43) | def mlvu_mc_doc_to_visual(doc, lmms_eval_specific_kwargs=None):
function mlvu_mc_frames_doc_to_visual (line 70) | def mlvu_mc_frames_doc_to_visual(doc, lmms_eval_specific_kwargs=None):
function mlvu_mc_doc_to_text (line 96) | def mlvu_mc_doc_to_text(doc, lmms_eval_specific_kwargs=None):
function mcq_acc (line 112) | def mcq_acc(answer, pred):
function mlvu_mc_process_results (line 164) | def mlvu_mc_process_results(doc, results):
function mlvu_mc_aggregate_results (line 193) | def mlvu_mc_aggregate_results(results):
FILE: lmms-eval_videochat/lmms_eval/tasks/mvbench/utils.py
function mvbench_doc_to_visual (line 56) | def mvbench_doc_to_visual(doc, lmms_eval_specific_kwargs=None):
function mvbench_frames_doc_to_visual (line 82) | def mvbench_frames_doc_to_visual(doc, lmms_eval_specific_kwargs=None):
function mvbench_doc_to_text (line 108) | def mvbench_doc_to_text(doc, lmms_eval_specific_kwargs=None):
function mcq_acc (line 125) | def mcq_acc(answer, pred):
function mvbench_process_results (line 177) | def mvbench_process_results(doc, results):
function mvbench_aggregate_results (line 206) | def mvbench_aggregate_results(results):
FILE: lmms-eval_videochat/lmms_eval/tasks/perceptiontest/val/utils.py
function perceptiontest_val_doc_to_visual (line 35) | def perceptiontest_val_doc_to_visual(doc):
function perceptiontest_val_doc_to_text (line 48) | def perceptiontest_val_doc_to_text(doc, lmms_eval_specific_kwargs=None):
function perceptiontest_val_doc_to_answer (line 75) | def perceptiontest_val_doc_to_answer(doc):
function perceptiontest_val_process_results_mc_ppl (line 80) | def perceptiontest_val_process_results_mc_ppl(doc, result):
function perceptiontest_val_process_results_mc (line 110) | def perceptiontest_val_process_results_mc(doc, result):
function perceptiontest_val_aggregate_accuracy (line 139) | def perceptiontest_val_aggregate_accuracy(results, args):
function perceptiontest_val_doc_to_choice (line 152) | def perceptiontest_val_doc_to_choice(doc):
FILE: lmms-eval_videochat/lmms_eval/tasks/temporal_grounding/eval_tvg.py
function read_json (line 11) | def read_json(path):
function write_json (line 17) | def write_json(path, data):
function extract_time (line 24) | def extract_time(paragraph):
function iou (line 101) | def iou(A, B):
FILE: lmms-eval_videochat/lmms_eval/tasks/temporal_grounding/utils.py
function temporal_grounding_doc_to_visual (line 32) | def temporal_grounding_doc_to_visual(doc, lmms_eval_specific_kwargs=None):
function temporal_grounding_doc_to_text (line 45) | def temporal_grounding_doc_to_text(doc, lmms_eval_specific_kwargs=None):
function temporal_grounding_doc_to_answer (line 59) | def temporal_grounding_doc_to_answer(doc):
function temporal_grounding_process_results_generation (line 64) | def temporal_grounding_process_results_generation(doc, result):
function temporal_grounding_aggregate_charades (line 69) | def temporal_grounding_aggregate_charades(results, args):
function temporal_grounding_aggregate_submissions (line 72) | def temporal_grounding_aggregate_submissions(results, args, task):
FILE: lmms-eval_videochat/lmms_eval/tasks/videomme/utils.py
function parse_subtitle_time (line 97) | def parse_subtitle_time(time_str):
function load_subtitles (line 103) | def load_subtitles(subtitle_path):
function convert_time_to_frame (line 119) | def convert_time_to_frame(time_in_seconds, fps):
function extract_subtitles (line 123) | def extract_subtitles(video_path, subtitle_path):
function parse_subtitle_time (line 138) | def parse_subtitle_time(time_str):
function load_subtitles (line 144) | def load_subtitles(subtitle_path):
function convert_time_to_frame (line 160) | def convert_time_to_frame(time_in_seconds, fps):
function extract_subtitles (line 164) | def extract_subtitles(video_path, subtitle_path):
function videomme_doc_to_visual (line 179) | def videomme_doc_to_visual(doc):
function videomme_doc_to_text (line 195) | def videomme_doc_to_text(doc, lmms_eval_specific_kwargs=None):
function videomme_doc_to_text_subtitle (line 221) | def videomme_doc_to_text_subtitle(doc, lmms_eval_specific_kwargs=None):
function extract_characters_regex (line 282) | def extract_characters_regex(s):
function videomme_process_results (line 313) | def videomme_process_results(doc, results):
function videomme_aggregate_results (line 334) | def videomme_aggregate_results(results):
FILE: lmms-eval_videochat/lmms_eval/utils.py
function is_json (line 45) | def is_json(string):
function escaped_split (line 53) | def escaped_split(text, sep_char, maxsplit=-1):
function handle_arg_string (line 74) | def handle_arg_string(arg):
function simple_parse_args_string (line 87) | def simple_parse_args_string(args_string):
function join_iters (line 111) | def join_iters(iters):
function chunks (line 116) | def chunks(iter, n: int = 0, fn=None):
function group (line 154) | def group(arr, fn):
class MultiChoice (line 163) | class MultiChoice:
method __init__ (line 164) | def __init__(self, choices) -> None:
method __contains__ (line 168) | def __contains__(self, values) -> bool:
method __iter__ (line 177) | def __iter__(self) -> Iterator:
function pattern_match (line 184) | def pattern_match(patterns, source_list):
function general_detokenize (line 195) | def general_detokenize(string):
function get_rolling_token_windows (line 205) | def get_rolling_token_windows(token_list, prefix_token, max_seq_len, con...
function make_disjoint_window (line 246) | def make_disjoint_window(pair):
class Reorderer (line 252) | class Reorderer:
method __init__ (line 253) | def __init__(self, arr: List[Any], fn: Callable) -> None:
method get_reordered (line 270) | def get_reordered(self):
method get_original (line 278) | def get_original(self, newarr):
class Grouper (line 300) | class Grouper:
method __init__ (line 307) | def __init__(self, arr, fn) -> None:
method get_grouped (line 325) | def get_grouped(self):
method get_original (line 336) | def get_original(self, grouped_dict):
function make_table (line 358) | def make_table(result_dict, column: str = "results"):
function positional_deprecated (line 434) | def positional_deprecated(fn):
function find_test_root (line 450) | def find_test_root(start_path: pathlib.Path) -> pathlib.Path:
function run_task_tests (line 466) | def run_task_tests(task_list: List[str]):
function get_git_commit_hash (line 486) | def get_git_commit_hash():
function get_datetime_str (line 500) | def get_datetime_str(timezone="Asia/Singapore"):
function import_function (line 511) | def import_function(loader, node):
function load_yaml_config (line 532) | def load_yaml_config(yaml_path=None, yaml_config=None, yaml_dir=None):
function regex_replace (line 571) | def regex_replace(string, pattern, repl, count: int = 0):
function apply_template (line 580) | def apply_template(template: str, doc: dict) -> str:
function create_iterator (line 585) | def create_iterator(raw_iterator, rank, world_size, limit=None):
function pad_and_concat (line 594) | def pad_and_concat(
function clear_torch_cache (line 643) | def clear_torch_cache() -> None:
function get_dtype (line 648) | def get_dtype(dtype: Union[str, torch.dtype]) -> torch.dtype:
class MultiTokenEOSCriteria (line 659) | class MultiTokenEOSCriteria(transformers.StoppingCriteria):
method __init__ (line 662) | def __init__(
method __call__ (line 683) | def __call__(self, input_ids, scores, **kwargs) -> bool:
function stop_sequences_criteria (line 694) | def stop_sequences_criteria(
function divide (line 708) | def divide(iterable, n) -> List[Iterator]:
class Collator (line 759) | class Collator:
method __init__ (line 766) | def __init__(
method group_by_index (line 782) | def group_by_index(self) -> None:
method get_batched (line 785) | def get_batched(self, n: int = 1, batch_fn: Optional[Callable] = None)...
method _reorder (line 809) | def _reorder(self, arr: Union[List, Tuple[Tuple[int, Any], ...]]) -> L...
method get_original (line 823) | def get_original(self, newarr: List) -> List:
method __len__ (line 844) | def __len__(self):
method group (line 848) | def group(arr: Iterable, fn: Callable, values: bool = False) -> Iterable:
method get_chunks (line 878) | def get_chunks(_iter, n: int = 0, fn=None):
FILE: xtuner-eval_niah/llava/conversation.py
class SeparatorStyle (line 11) | class SeparatorStyle(Enum):
class Conversation (line 26) | class Conversation:
method get_prompt (line 47) | def get_prompt(self):
method append_message (line 178) | def append_message(self, role, message):
method process_image (line 181) | def process_image(self, image, image_process_mode, return_pil=False, i...
method get_images (line 227) | def get_images(self, return_pil=False, return_path=False):
method is_image_file (line 242) | def is_image_file(self, filename):
method is_video_file (line 246) | def is_video_file(self, filename):
method to_gradio_chatbot (line 250) | def to_gradio_chatbot(self):
method copy (line 288) | def copy(self):
method dict (line 291) | def dict(self):
FILE: xtuner-eval_niah/llava/dist_utils.py
function get_rank (line 16) | def get_rank() -> int:
function get_world_size (line 23) | def get_world_size() -> int:
function setup_for_distributed (line 30) | def setup_for_distributed(is_master):
function init_distributed_mode (line 44) | def init_distributed_mode(use_dynamic_port: bool = True):
FILE: xtuner-eval_niah/llava/mm_utils.py
function resize_and_center_crop (line 12) | def resize_and_center_crop(image, shortest_edge_length):
function auto_pad_images (line 33) | def auto_pad_images(image, grid_params):
function extract_patches (line 63) | def extract_patches(image, patch_size, overlap_ratio):
function process_highres_image_crop_split (line 87) | def process_highres_image_crop_split(image, data_args, processor=None):
function process_highres_image (line 98) | def process_highres_image(image, processor, grid_pinpoints):
function select_best_resolution (line 119) | def select_best_resolution(original_size, possible_resolutions, max_reso...
function resize_and_pad_image (line 157) | def resize_and_pad_image(image, target_resolution):
function divide_to_patches (line 196) | def divide_to_patches(image, patch_size):
function get_anyres_image_grid_shape (line 218) | def get_anyres_image_grid_shape(image_size, grid_pinpoints, patch_size, ...
function process_anyres_image (line 251) | def process_anyres_image(image, processor, grid_pinpoints):
function process_anyres_image_nopad (line 306) | def process_anyres_image_nopad(image, processor, grid_pinpoints):
function process_anyres_video_nopad (line 365) | def process_anyres_video_nopad(video, processor, grid_pinpoints, max_res...
function load_image_from_base64 (line 410) | def load_image_from_base64(image):
function expand2square (line 414) | def expand2square(pil_img, background_color):
function process_images (line 428) | def process_images(images, image_processor, model_cfg):
function tokenizer_image_token (line 460) | def tokenizer_image_token(prompt, tokenizer, image_token_index=IMAGE_TOK...
function get_model_name_from_path (line 482) | def get_model_name_from_path(model_path):
class KeywordsStoppingCriteria (line 491) | class KeywordsStoppingCriteria(StoppingCriteria):
method __init__ (line 492) | def __init__(self, keywords, tokenizer, input_ids):
method __call__ (line 503) | def __call__(self, output_ids: torch.LongTensor, scores: torch.FloatTe...
FILE: xtuner-eval_niah/llava/model/apply_delta.py
function apply_delta (line 14) | def apply_delta(base_model_path, target_model_path, delta_path):
FILE: xtuner-eval_niah/llava/model/builder.py
function load_pretrained_model (line 27) | def load_pretrained_model(model_path, model_base, model_name, load_8bit=...
FILE: xtuner-eval_niah/llava/model/consolidate.py
function consolidate_ckpt (line 14) | def consolidate_ckpt(src_path, dst_path):
FILE: xtuner-eval_niah/llava/model/language_model/llava_qwen.py
class LlavaQwenConfig (line 35) | class LlavaQwenConfig(Qwen2Config):
class LlavaQwenModel (line 39) | class LlavaQwenModel(LlavaMetaModel, Qwen2Model):
method __init__ (line 42) | def __init__(self, config: Qwen2Config):
class LlavaQwenForCausalLM (line 46) | class LlavaQwenForCausalLM(Qwen2ForCausalLM, LlavaMetaForCausalLM):
method __init__ (line 49) | def __init__(self, config):
method get_model (line 60) | def get_model(self):
method forward (line 63) | def forward(
method generate (line 119) | def generate(
method prepare_inputs_for_generation (line 139) | def prepare_inputs_for_generation(self, input_ids, past_key_values=Non...
FILE: xtuner-eval_niah/llava/model/language_model/llava_qwen_flash.py
class LlavaQwenConfig_Flash (line 35) | class LlavaQwenConfig_Flash(Qwen2Config):
class LlavaQwenModel_Flash (line 39) | class LlavaQwenModel_Flash(LlavaMetaModel, Qwen2Model_Flash):
method __init__ (line 42) | def __init__(self, config: Qwen2Config):
class LlavaQwenForCausalLM_Flash (line 46) | class LlavaQwenForCausalLM_Flash(Qwen2ForCausalLM_Flash, LlavaMetaForCau...
method __init__ (line 49) | def __init__(self, config):
method get_model (line 60) | def get_model(self):
method forward (line 63) | def forward(
method generate (line 120) | def generate(
method prepare_inputs_for_generation (line 143) | def prepare_inputs_for_generation(self, input_ids, past_key_values=Non...
FILE: xtuner-eval_niah/llava/model/language_model/modeling_qwen2_flash.py
function _get_unpad_data (line 70) | def _get_unpad_data(attention_mask):
class Qwen2RMSNorm (line 83) | class Qwen2RMSNorm(nn.Module):
method __init__ (line 84) | def __init__(self, hidden_size, eps=1e-6):
method forward (line 92) | def forward(self, hidden_states):
class Qwen2RotaryEmbedding (line 101) | class Qwen2RotaryEmbedding(nn.Module):
method __init__ (line 102) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
method _set_cos_sin_cache (line 116) | def _set_cos_sin_cache(self, seq_len, device, dtype):
method forward (line 126) | def forward(self, x, seq_len=None):
function rotate_half (line 138) | def rotate_half(x):
function apply_rotary_pos_emb (line 146) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1):
class Qwen2MLP (line 175) | class Qwen2MLP(nn.Module):
method __init__ (line 176) | def __init__(self, config):
method forward (line 186) | def forward(self, x):
function repeat_kv (line 191) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
class Qwen2Attention (line 203) | class Qwen2Attention(nn.Module):
method __init__ (line 209) | def __init__(self, config: Qwen2Config, layer_idx: Optional[int] = None):
method forward (line 246) | def forward(
class Qwen2FlashAttention2 (line 328) | class Qwen2FlashAttention2(Qwen2Attention):
method __init__ (line 338) | def __init__(self, *args, **kwargs):
method forward (line 346) | def forward(
method _flash_attention_forward (line 482) | def _flash_attention_forward(
method _upad_input (line 587) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m...
class Qwen2SdpaAttention (line 631) | class Qwen2SdpaAttention(Qwen2Attention):
method forward (line 639) | def forward(
class Qwen2DecoderLayer (line 725) | class Qwen2DecoderLayer(nn.Module):
method __init__ (line 726) | def __init__(self, config: Qwen2Config, layer_idx: int):
method forward (line 741) | def forward(
class Qwen2PreTrainedModel (line 823) | class Qwen2PreTrainedModel(PreTrainedModel):
method _init_weights (line 833) | def _init_weights(self, module):
class Qwen2Model_Flash (line 919) | class Qwen2Model_Flash(Qwen2PreTrainedModel):
method __init__ (line 927) | def __init__(self, config: Qwen2Config):
method get_input_embeddings (line 944) | def get_input_embeddings(self):
method set_input_embeddings (line 947) | def set_input_embeddings(self, value):
method forward (line 951) | def forward(
method flash_rank_drop (line 1159) | def flash_rank_drop(
class Qwen2ForCausalLM_Flash (line 1382) | class Qwen2ForCausalLM_Flash(Qwen2PreTrainedModel):
method __init__ (line 1385) | def __init__(self, config):
method get_input_embeddings (line 1394) | def get_input_embeddings(self):
method set_input_embeddings (line 1397) | def set_input_embeddings(self, value):
method get_output_embeddings (line 1400) | def get_output_embeddings(self):
method set_output_embeddings (line 1403) | def set_output_embeddings(self, new_embeddings):
method set_decoder (line 1406) | def set_decoder(self, decoder):
method get_decoder (line 1409) | def get_decoder(self):
method forward (line 1414) | def forward(
method prepare_inputs_for_generation (line 1502) | def prepare_inputs_for_generation(
method _reorder_cache (line 1560) | def _reorder_cache(past_key_values, beam_idx):
FILE: xtuner-eval_niah/llava/model/llava_arch.py
class LlavaMetaModel (line 35) | class LlavaMetaModel:
method __init__ (line 37) | def __init__(self, config):
method get_vision_tower (line 50) | def get_vision_tower(self):
method initialize_vision_modules (line 56) | def initialize_vision_modules(self, model_args, fsdp=None):
function unpad_image (line 116) | def unpad_image(tensor, original_size, is_frame=False):
class LlavaMetaForCausalLM (line 160) | class LlavaMetaForCausalLM(ABC):
method get_model (line 163) | def get_model(self):
method get_vision_tower (line 166) | def get_vision_tower(self):
method get_4dPool (line 169) | def get_4dPool(self, image_feature):
method get_2dPool (line 194) | def get_2dPool(self, image_feature):
method encode_image (line 220) | def encode_image(self, images_list):
method encode_image_video (line 230) | def encode_image_video(self, images_list, video_idx_in_batch):
method encode_video (line 255) | def encode_video(self, images_list, video_idx_in_batch):
method encode_video_image (line 309) | def encode_video_image(self, images_list, video_idx_in_batch):
method add_token_per_frame (line 364) | def add_token_per_frame(self, image_feature):
method add_different_token_per_frame (line 373) | def add_different_token_per_frame(self, image_feature):
method prepare_inputs_labels_for_multimodal (line 377) | def prepare_inputs_labels_for_multimodal(self, input_ids, position_ids...
method initialize_vision_tokenizer (line 801) | def initialize_vision_tokenizer(self, model_args, tokenizer):
FILE: xtuner-eval_niah/llava/model/make_delta.py
function make_delta (line 14) | def make_delta(base_model_path, target_model_path, delta_path, hub_repo_...
FILE: xtuner-eval_niah/llava/model/multimodal_encoder/builder.py
function build_vision_tower (line 11) | def build_vision_tower(vision_tower_cfg, **kwargs):
FILE: xtuner-eval_niah/llava/model/multimodal_encoder/clip_encoder.py
class CLIPVisionTower (line 12) | class CLIPVisionTower(nn.Module):
method __init__ (line 13) | def __init__(self, vision_tower, args, delay_load=False):
method load_model (line 35) | def load_model(self, device_map=None):
method feature_select (line 46) | def feature_select(self, image_forward_outs):
method forward (line 68) | def forward(self, images):
method dummy_feature (line 82) | def dummy_feature(self):
method dtype (line 86) | def dtype(self):
method device (line 90) | def device(self):
method config (line 94) | def config(self):
method hidden_size (line 101) | def hidden_size(self):
method num_patches_per_side (line 110) | def num_patches_per_side(self):
method num_patches (line 114) | def num_patches(self):
method image_size (line 121) | def image_size(self):
class CLIPVisionTowerS2 (line 125) | class CLIPVisionTowerS2(CLIPVisionTower):
method __init__ (line 126) | def __init__(self, vision_tower, args, delay_load=False):
method load_model (line 141) | def load_model(self, device_map=None):
method forward_feature (line 155) | def forward_feature(self, images):
method forward (line 160) | def forward(self, images):
method hidden_size (line 172) | def hidden_size(self):
FILE: xtuner-eval_niah/llava/model/multimodal_encoder/internvideo2/flash_attention_class.py
class FlashAttention (line 10) | class FlashAttention(nn.Module):
method __init__ (line 21) | def __init__(self, softmax_scale=None, attention_dropout=0.0, device=N...
method forward (line 26) | def forward(self, qkv, key_padding_mask=None, causal=False, cu_seqlens...
FILE: xtuner-eval_niah/llava/model/multimodal_encoder/internvideo2/pos_embed.py
function get_3d_sincos_pos_embed (line 12) | def get_3d_sincos_pos_embed(embed_dim, grid_size, t_size, cls_token=False):
function get_2d_sincos_pos_embed (line 66) | def get_2d_sincos_pos_embed(embed_dim, grid_size, cls_token=False):
function get_1d_sincos_pos_embed (line 86) | def get_1d_sincos_pos_embed(embed_dim, t_size, cls_token=False):
function get_2d_sincos_pos_embed_from_grid (line 101) | def get_2d_sincos_pos_embed_from_grid(embed_dim, grid):
function get_1d_sincos_pos_embed_from_grid (line 116) | def get_1d_sincos_pos_embed_from_grid(embed_dim, pos):
function interpolate_pos_embed_internvideo2 (line 137) | def interpolate_pos_embed_internvideo2(checkpoint_model, model, orig_t_s...
function interpolate_pos_embed_internvideo2_new (line 227) | def interpolate_pos_embed_internvideo2_new(checkpoint_model, model, orig...
FILE: xtuner-eval_niah/llava/model/multimodal_encoder/internvideo2/vit_scale_clean.py
class CrossAttention (line 20) | class CrossAttention(nn.Module):
method __init__ (line 21) | def __init__(
method forward (line 52) | def forward(self, x, k=None, v=None):
class AttentiveBlock (line 85) | class AttentiveBlock(nn.Module):
method __init__ (line 87) | def __init__(self, dim, num_heads, qkv_bias=False, qk_scale=None, drop...
method forward (line 100) | def forward(self, x_q, x_kv, pos_q, pos_k, bool_masked_pos, rel_pos_bi...
class AttentionPoolingBlock (line 109) | class AttentionPoolingBlock(AttentiveBlock):
method forward (line 111) | def forward(self, x):
class RMSNorm (line 119) | class RMSNorm(nn.Module):
method __init__ (line 120) | def __init__(self, hidden_size, eps=1e-6):
method forward (line 125) | def forward(self, hidden_states):
class LayerScale (line 133) | class LayerScale(nn.Module):
method __init__ (line 134) | def __init__(self, dim, init_values=1e-5, inplace=False, force_fp32=Fa...
method forward (line 141) | def forward(self, x):
class Attention (line 151) | class Attention(nn.Module):
method __init__ (line 152) | def __init__(self, dim, num_heads=8, qkv_bias=False, attn_drop=0., pro...
method _naive_attn (line 175) | def _naive_attn(self, x):
method _flash_attn (line 197) | def _flash_attn(self, x, key_padding_mask=None, need_weights=False):
method forward (line 219) | def forward(self, x):
class Mlp (line 224) | class Mlp(nn.Module):
method __init__ (line 228) | def __init__(self, in_features, hidden_features=None, out_features=Non...
method forward (line 242) | def forward(self, x):
class Block (line 251) | class Block(nn.Module):
method __init__ (line 253) | def __init__(
method forward (line 284) | def forward(self, x, residual=None):
class PatchEmbed (line 306) | class PatchEmbed(nn.Module):
method __init__ (line 310) | def __init__(
method forward (line 334) | def forward(self, x):
class PretrainVisionTransformer_clean (line 343) | class PretrainVisionTransformer_clean(nn.Module):
method __init__ (line 344) | def __init__(
method init_pos_embed (line 453) | def init_pos_embed(self):
method _init_weights (line 476) | def _init_weights(self, m):
method fix_init_weight (line 485) | def fix_init_weight(self):
method dtype (line 494) | def dtype(self):
method get_num_layers (line 497) | def get_num_layers(self):
method no_weight_decay (line 501) | def no_weight_decay(self):
method forward (line 512) | def forward(self, x, mask=None, use_image=False):
function pretrain_internvideo2_giant_patch14_224_clean (line 572) | def pretrain_internvideo2_giant_patch14_224_clean(config):
function pretrain_internvideo2_6b_patch14_224_clean (line 608) | def pretrain_internvideo2_6b_patch14_224_clean(config):
FILE: xtuner-eval_niah/llava/model/multimodal_encoder/internvideo2_encoder.py
class InternVideo2ImageProcessor (line 30) | class InternVideo2ImageProcessor:
method __init__ (line 31) | def __init__(self, image_mean=(0.485, 0.456, 0.406), image_std=(0.229,...
method preprocess (line 43) | def preprocess(self, images, return_tensors, target_size=None):
class InternVideo2VisionConfig (line 69) | class InternVideo2VisionConfig:
method __init__ (line 72) | def __init__(
function build_vit (line 101) | def build_vit(config, pt_type='origin'):
class InternVideo2VisionTower (line 145) | class InternVideo2VisionTower(nn.Module):
method __init__ (line 146) | def __init__(self, vision_tower, vision_tower_cfg, delay_load=False, p...
method load_model (line 172) | def load_model(self, device_map=None):
method forward (line 182) | def forward(self, images):
method dummy_feature (line 195) | def dummy_feature(self):
method dtype (line 199) | def dtype(self):
method device (line 204) | def device(self):
method hidden_size (line 209) | def hidden_size(self):
method num_patches (line 213) | def num_patches(self):
method num_patches_per_side (line 217) | def num_patches_per_side(self):
method image_size (line 222) | def image_size(self):
FILE: xtuner-eval_niah/llava/model/multimodal_encoder/siglip_encoder.py
class SigLipImageProcessor (line 34) | class SigLipImageProcessor:
method __init__ (line 35) | def __init__(self, image_mean=(0.5, 0.5, 0.5), image_std=(0.5, 0.5, 0....
method preprocess (line 47) | def preprocess(self, images, return_tensors):
class SigLipVisionConfig (line 70) | class SigLipVisionConfig(PretrainedConfig):
method __init__ (line 73) | def __init__(
method from_pretrained (line 103) | def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os....
class SigLipVisionModelOutput (line 120) | class SigLipVisionModelOutput(ModelOutput):
class SigLipVisionEmbeddings (line 148) | class SigLipVisionEmbeddings(nn.Module):
method __init__ (line 149) | def __init__(self, config: SigLipVisionConfig):
method forward (line 169) | def forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor:
class SigLipAttention (line 177) | class SigLipAttention(nn.Module):
method __init__ (line 181) | def __init__(self, config):
method forward (line 197) | def forward(
class SigLipMLP (line 243) | class SigLipMLP(nn.Module):
method __init__ (line 244) | def __init__(self, config):
method forward (line 251) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
class SigLipEncoderLayer (line 259) | class SigLipEncoderLayer(nn.Module):
method __init__ (line 260) | def __init__(self, config: SigLipVisionConfig):
method forward (line 269) | def forward(
class SigLipPreTrainedModel (line 308) | class SigLipPreTrainedModel(PreTrainedModel):
method _init_weights (line 318) | def _init_weights(self, module):
class SigLipEncoder (line 324) | class SigLipEncoder(nn.Module):
method __init__ (line 333) | def __init__(self, config: SigLipVisionConfig):
method forward (line 340) | def forward(
class SigLipVisionTransformer (line 408) | class SigLipVisionTransformer(nn.Module):
method __init__ (line 409) | def __init__(self, config: SigLipVisionConfig):
method forward (line 419) | def forward(
class SigLipMultiheadAttentionPoolingHead (line 459) | class SigLipMultiheadAttentionPoolingHead(nn.Module):
method __init__ (line 462) | def __init__(self, config: SigLipVisionConfig):
method forward (line 470) | def forward(self, hidden_state):
class SigLipVisionModel (line 483) | class SigLipVisionModel(SigLipPreTrainedModel):
method __init__ (line 488) | def __init__(self, config: SigLipVisionConfig):
method get_input_embeddings (line 496) | def get_input_embeddings(self) -> nn.Module:
method forward (line 499) | def forward(
class SigLipVisionTower (line 538) | class SigLipVisionTower(nn.Module):
method __init__ (line 539) | def __init__(self, vision_tower, vision_tower_cfg, delay_load=False):
method load_model (line 563) | def load_model(self, device_map=None):
method forward (line 576) | def forward(self, images):
method dummy_feature (line 592) | def dummy_feature(self):
method dtype (line 596) | def dtype(self):
method device (line 601) | def device(self):
method hidden_size (line 606) | def hidden_size(self):
method num_patches (line 610) | def num_patches(self):
method num_patches_per_side (line 614) | def num_patches_per_side(self):
method image_size (line 619) | def image_size(self):
FILE: xtuner-eval_niah/llava/model/multimodal_encoder/umt/vit.py
class DropPath (line 19) | class DropPath(nn.Module):
method __init__ (line 22) | def __init__(self, drop_prob=None):
method forward (line 26) | def forward(self, x):
method extra_repr (line 29) | def extra_repr(self) -> str:
class Mlp (line 33) | class Mlp(nn.Module):
method __init__ (line 34) | def __init__(self, in_features, hidden_features=None, out_features=Non...
method forward (line 43) | def forward(self, x):
class Attention (line 51) | class Attention(nn.Module):
method __init__ (line 52) | def __init__(
method forward (line 85) | def forward(self, x):
class Block (line 117) | class Block(nn.Module):
method __init__ (line 118) | def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_sc...
method forward (line 138) | def forward(self, x):
class PatchEmbed (line 148) | class PatchEmbed(nn.Module):
method __init__ (line 151) | def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=...
method forward (line 167) | def forward(self, x, **kwargs):
function get_sinusoid_encoding_table (line 178) | def get_sinusoid_encoding_table(n_position, d_hid, ckpt_num_frame=-1, cu...
function get_sinusoid_encoding_table2 (line 212) | def get_sinusoid_encoding_table2(n_position=784, d_hid=1024, cur_frame=8...
class PretrainVisionTransformerEncoder (line 259) | class PretrainVisionTransformerEncoder(nn.Module):
method __init__ (line 262) | def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=...
method no_weight_decay (line 313) | def no_weight_decay(self):
method forward_features (line 316) | def forward_features(self, x, use_image=False):
method forward (line 337) | def forward(self, x, use_image=False):
class PretrainVisionTransformer (line 342) | class PretrainVisionTransformer(nn.Module):
method __init__ (line 345) | def __init__(self,
method _init_weights (line 401) | def _init_weights(self, m):
method no_weight_decay (line 411) | def no_weight_decay(self):
method forward (line 414) | def forward(self, x, use_image=False):
FILE: xtuner-eval_niah/llava/model/multimodal_encoder/umt_encoder.py
class UMTImageProcessor (line 30) | class UMTImageProcessor:
method __init__ (line 31) | def __init__(self, image_mean=(0.485, 0.456, 0.406), image_std=(0.229,...
method preprocess (line 43) | def preprocess(self, images, return_tensors, target_size=None):
class UMTVisionConfig (line 69) | class UMTVisionConfig:
method __init__ (line 72) | def __init__(
function build_vit (line 95) | def build_vit(config, pt_type='origin'):
class UMTVisionTower (line 134) | class UMTVisionTower(nn.Module):
method __init__ (line 135) | def __init__(self, vision_tower, vision_tower_cfg, delay_load=False, p...
method load_model (line 160) | def load_model(self, device_map=None):
method forward (line 170) | def forward(self, images):
method dummy_feature (line 185) | def dummy_feature(self):
method dtype (line 189) | def dtype(self):
method device (line 194) | def device(self):
method hidden_size (line 199) | def hidden_size(self):
method num_patches (line 203) | def num_patches(self):
method num_patches_per_side (line 207) | def num_patches_per_side(self):
method image_size (line 212) | def image_size(self):
FILE: xtuner-eval_niah/llava/model/multimodal_projector/builder.py
class IdentityMap (line 7) | class IdentityMap(nn.Module):
method __init__ (line 8) | def __init__(self):
method forward (line 11) | def forward(self, x, *args, **kwargs):
method config (line 15) | def config(self):
class SimpleResBlock (line 19) | class SimpleResBlock(nn.Module):
method __init__ (line 20) | def __init__(self, channels):
method forward (line 25) | def forward(self, x):
function build_vision_projector (line 30) | def build_vision_projector(config, delay_load=False, **kwargs):
FILE: xtuner-eval_niah/llava/model/multimodal_projector/tome16_mlp_hd64.py
function bipartite_soft_matching (line 16) | def bipartite_soft_matching(
function merge_wavg (line 72) | def merge_wavg(
class ToMe16_mlp_hd64 (line 91) | class ToMe16_mlp_hd64(nn.Module):
method __init__ (line 92) | def __init__(self, config, vision_cfg):
method merge_tokens (line 107) | def merge_tokens(self, x, target_num_token):
method forward (line 143) | def forward(self, x, compress=False, local_num_frames=-1):
method config (line 169) | def config(self):
FILE: xtuner-eval_niah/llava/model/utils.py
function auto_upgrade (line 4) | def auto_upgrade(config):
FILE: xtuner-eval_niah/llava/serialize_utils.py
function get_world_size (line 25) | def get_world_size() -> int:
function get_rank (line 33) | def get_rank() -> int:
function get_rank (line 40) | def get_rank() -> int:
function get_local_rank (line 47) | def get_local_rank() -> int:
function get_local_size (line 62) | def get_local_size() -> int:
function _get_global_gloo_group (line 67) | def _get_global_gloo_group():
function all_gather (line 78) | def all_gather(data, group=None):
class NumpySerializedList (line 105) | class NumpySerializedList:
method __init__ (line 106) | def __init__(self, lst: list):
method __len__ (line 122) | def __len__(self):
method __getitem__ (line 125) | def __getitem__(self, idx):
class TorchSerializedList (line 132) | class TorchSerializedList(NumpySerializedList):
method __init__ (line 133) | def __init__(self, lst: list):
method __getitem__ (line 138) | def __getitem__(self, idx):
function local_scatter (line 145) | def local_scatter(array: Optional[List[Any]]):
class TorchShmSerializedList (line 167) | class TorchShmSerializedList(TorchSerializedList):
method __init__ (line 168) | def __init__(self, lst: list):
function local_broadcast_process_authkey (line 194) | def local_broadcast_process_authkey():
FILE: xtuner-eval_niah/llava/train/llava_trainer.py
function maybe_zero_3 (line 27) | def maybe_zero_3(param, ignore_status=False, name=None):
function get_mm_adapter_state_maybe_zero_3 (line 42) | def get_mm_adapter_state_maybe_zero_3(named_params, keys_to_match):
function split_to_even_chunks (line 48) | def split_to_even_chunks(indices, lengths, num_chunks):
function get_variable_length_grouped_indices (line 70) | def get_variable_length_grouped_indices(lengths, batch_size, world_size,...
function get_modality_length_grouped_indices (line 86) | def get_modality_length_grouped_indices(lengths, batch_size, world_size,...
function get_length_grouped_indices (line 126) | def get_length_grouped_indices(lengths, batch_size, world_size, generato...
function get_length_grouped_indices_auto_single (line 149) | def get_length_grouped_indices_auto_single(lengths, batch_size, world_si...
function get_modality_length_grouped_indices_auto (line 164) | def get_modality_length_grouped_indices_auto(lengths, batch_size, world_...
class LengthGroupedSampler (line 193) | class LengthGroupedSampler(Sampler):
method __init__ (line 199) | def __init__(
method __len__ (line 220) | def __len__(self):
method __iter__ (line 223) | def __iter__(self):
class LLaVATrainer (line 237) | class LLaVATrainer(Trainer):
method create_accelerator_and_postprocess (line 239) | def create_accelerator_and_postprocess(self):
method _get_train_sampler (line 270) | def _get_train_sampler(self) -> Optional[torch.utils.data.Sampler]:
method get_train_dataloader (line 316) | def get_train_dataloader(self) -> DataLoader:
method create_optimizer (line 353) | def create_optimizer(self):
method _save_checkpoint (line 432) | def _save_checkpoint(self, model, trial, metrics=None):
method _save (line 456) | def _save(self, output_dir: Optional[str] = None, state_dict=None):
FILE: xtuner-eval_niah/llava/train/llava_trainer_eval.py
class LLaVAEvalTrainer (line 7) | class LLaVAEvalTrainer(LLaVATrainer):
method evaluate (line 8) | def evaluate(self, evaluate_args):
FILE: xtuner-eval_niah/llava/train/train.py
class ModelArguments (line 64) | class ModelArguments:
class DataArguments (line 135) | class DataArguments:
class TrainingArguments (line 159) | class TrainingArguments(transformers.TrainingArguments):
function maybe_zero_3 (line 208) | def maybe_zero_3(param, ignore_status=False, name=None):
function get_peft_state_maybe_zero_3 (line 224) | def get_peft_state_maybe_zero_3(named_params, bias):
function get_peft_state_non_lora_maybe_zero_3 (line 249) | def get_peft_state_non_lora_maybe_zero_3(named_params, require_grad_only...
function get_mm_adapter_state_maybe_zero_3 (line 257) | def get_mm_adapter_state_maybe_zero_3(named_params, keys_to_match):
function find_all_linear_names (line 263) | def find_all_linear_names(model):
function safe_save_model_for_hf_trainer (line 279) | def safe_save_model_for_hf_trainer(trainer: transformers.Trainer, output...
function smart_tokenizer_and_embedding_resize (line 323) | def smart_tokenizer_and_embedding_resize(
function _tokenize_fn (line 346) | def _tokenize_fn(strings: Sequence[str], tokenizer: transformers.PreTrai...
function _mask_targets (line 368) | def _mask_targets(target, tokenized_lens, speakers):
function _add_speaker_and_signal (line 379) | def _add_speaker_and_signal(header, source, get_conversation=True):
function preprocess_multimodal (line 399) | def preprocess_multimodal(sources: Sequence[str], data_args: DataArgumen...
function preprocess_llama_2 (line 431) | def preprocess_llama_2(sources, tokenizer: transformers.PreTrainedTokeni...
function preprocess_gemma (line 506) | def preprocess_gemma(sources: List[List[Dict[str, str]]], tokenizer: tra...
function preprocess_qwen (line 585) | def preprocess_qwen(sources, tokenizer: transformers.PreTrainedTokenizer...
function preprocess_internlm2 (line 665) | def preprocess_internlm2(sources, tokenizer: transformers.PreTrainedToke...
function preprocess_llama3 (line 742) | def preprocess_llama3(
function preprocess_v1 (line 828) | def preprocess_v1(sources, tokenizer: transformers.PreTrainedTokenizer, ...
function preprocess_mpt (line 907) | def preprocess_mpt(sources, tokenizer: transformers.PreTrainedTokenizer,...
function preprocess_plain (line 988) | def preprocess_plain(
function preprocess (line 1010) | def preprocess(sources: Sequence[str], tokenizer: transformers.PreTraine...
class LazySupervisedDataset (line 1065) | class LazySupervisedDataset(Dataset):
method __init__ (line 1066) | def __init__(self, data_path: str, tokenizer: transformers.PreTrainedT...
method __len__ (line 1257) | def __len__(self):
method lengths (line 1261) | def lengths(self):
method modality_lengths (line 1274) | def modality_lengths(self):
method process_image (line 1285) | def process_image(self, image_file, overwrite_image_aspect_ratio=None):
method process_video (line 1344) | def process_video(self, video_file, data_anno, data_args):
method __getitem__ (line 1413) | def __getitem__(self, i) -> Dict[str, torch.Tensor]:
method _get_item (line 1448) | def _get_item(self, i) -> Dict[str, torch.Tensor]:
class DataCollatorForSupervisedDataset (line 1538) | class DataCollatorForSupervisedDataset(object):
method pad_sequence (line 1543) | def pad_sequence(self, input_ids, batch_first, padding_value):
method __call__ (line 1551) | def __call__(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]:
function make_supervised_data_module (line 1588) | def make_supervised_data_module(tokenizer: transformers.PreTrainedTokeni...
function get_model (line 1595) | def get_model(model_args, training_args, bnb_model_from_pretrained_args):
function train (line 1814) | def train(attn_implementation=None):
FILE: xtuner-eval_niah/llava/utils.py
function process_video_with_decord (line 25) | def process_video_with_decord(video_file, data_args):
function process_video_with_pyav (line 41) | def process_video_with_pyav(video_file, data_args):
function rank0_print (line 66) | def rank0_print(*args):
function rank_print (line 74) | def rank_print(*args):
function build_logger (line 80) | def build_logger(logger_name, logger_filename):
class StreamToLogger (line 122) | class StreamToLogger(object):
method __init__ (line 127) | def __init__(self, logger, log_level=logging.INFO):
method __getattr__ (line 133) | def __getattr__(self, attr):
method write (line 136) | def write(self, buf):
method flush (line 150) | def flush(self):
function disable_torch_init (line 156) | def disable_torch_init():
function violates_moderation (line 166) | def violates_moderation(text):
function pretty_print_semaphore (line 188) | def pretty_print_semaphore(semaphore):
FILE: xtuner-eval_niah/llava/video_utils.py
function get_index (line 18) | def get_index(num_frames, num_segments):
function lazy_load_s3video (line 27) | def lazy_load_s3video(s3path_video, num_frames, video_start, video_end, ...
function pts_to_secs (line 71) | def pts_to_secs(pts: int, time_base: float, start_pts: int) -> float:
function get_pyav_video_duration (line 86) | def get_pyav_video_duration(video_reader):
function get_frame_indices (line 97) | def get_frame_indices(num_frames, vlen, sample='middle', fix_start=None,...
function read_frames_av (line 167) | def read_frames_av(video_path, num_frames, sample='rand', client=None, f...
function read_frames_gif (line 196) | def read_frames_gif(
function read_frames_decord (line 248) | def read_frames_decord(
function read_frames_img (line 298) | def read_frames_img(
function read_frames_fake (line 400) | def read_frames_fake(
FILE: xtuner-eval_niah/longva/conversation.py
class SeparatorStyle (line 11) | class SeparatorStyle(Enum):
class Conversation (line 26) | class Conversation:
method get_prompt (line 47) | def get_prompt(self):
method append_message (line 178) | def append_message(self, role, message):
method process_image (line 181) | def process_image(self, image, image_process_mode, return_pil=False, i...
method get_images (line 227) | def get_images(self, return_pil=False, return_path=False):
method to_gradio_chatbot (line 242) | def to_gradio_chatbot(self):
method copy (line 266) | def copy(self):
method dict (line 269) | def dict(self):
FILE: xtuner-eval_niah/longva/mm_utils.py
function resize_and_center_crop (line 12) | def resize_and_center_crop(image, shortest_edge_length):
function auto_pad_images (line 33) | def auto_pad_images(image, grid_params):
function extract_patches (line 63) | def extract_patches(image, patch_size, overlap_ratio):
function process_highres_image_crop_split (line 87) | def process_highres_image_crop_split(image, data_args, processor=None):
function process_highres_image (line 98) | def process_highres_image(image, processor, grid_pinpoints):
function select_best_resolution (line 119) | def select_best_resolution(original_size, possible_resolutions):
function resize_and_pad_image (line 152) | def resize_and_pad_image(image, target_resolution):
function divide_to_patches (line 191) | def divide_to_patches(image, patch_size):
function get_anyres_image_grid_shape (line 213) | def get_anyres_image_grid_shape(image_size, grid_pinpoints, patch_size):
function process_anyres_image (line 243) | def process_anyres_image(image, processor, grid_pinpoints):
function load_image_from_base64 (line 296) | def load_image_from_base64(image):
function expand2square (line 300) | def expand2square(pil_img, background_color):
function process_images (line 314) | def process_images(images, image_processor, model_cfg):
function tokenizer_image_token (line 341) | def tokenizer_image_token(prompt, tokenizer, image_token_index=IMAGE_TOK...
function get_model_name_from_path (line 363) | def get_model_name_from_path(model_path):
class KeywordsStoppingCriteria (line 372) | class KeywordsStoppingCriteria(StoppingCriteria):
method __init__ (line 373) | def __init__(self, keywords, tokenizer, input_ids):
method __call__ (line 384) | def __call__(self, output_ids: torch.LongTensor, scores: torch.FloatTe...
FILE: xtuner-eval_niah/longva/model/apply_delta.py
function apply_delta (line 14) | def apply_delta(base_model_path, target_model_path, delta_path):
FILE: xtuner-eval_niah/longva/model/builder.py
function load_pretrained_model (line 27) | def load_pretrained_model(model_path, model_base, model_name, load_8bit=...
FILE: xtuner-eval_niah/longva/model/consolidate.py
function consolidate_ckpt (line 14) | def consolidate_ckpt(src_path, dst_path):
FILE: xtuner-eval_niah/longva/model/language_model/llava_llama.py
class LlavaConfig (line 35) | class LlavaConfig(LlamaConfig):
class LlavaLlamaModel (line 44) | class LlavaLlamaModel(LlavaMetaModel, LlamaModel):
method __init__ (line 47) | def __init__(self, config: LlamaConfig):
class LlavaLlamaForCausalLM (line 51) | class LlavaLlamaForCausalLM(LlamaForCausalLM, LlavaMetaForCausalLM):
method __init__ (line 54) | def __init__(self, config):
method get_model (line 66) | def get_model(self):
method forward (line 69) | def forward(
method generate (line 123) | def generate(
method prepare_inputs_for_generation (line 144) | def prepare_inputs_for_generation(self, input_ids, past_key_values=Non...
FILE: xtuner-eval_niah/longva/model/language_model/llava_mistral.py
class LlavaMistralConfig (line 30) | class LlavaMistralConfig(MistralConfig):
class LlavaMistralModel (line 38) | class LlavaMistralModel(LlavaMetaModel, MistralModel):
method __init__ (line 41) | def __init__(self, config: MistralConfig):
class LlavaMistralForCausalLM (line 45) | class LlavaMistralForCausalLM(MistralForCausalLM, LlavaMetaForCausalLM):
method __init__ (line 48) | def __init__(self, config):
method get_model (line 59) | def get_model(self):
method forward (line 62) | def forward(
method generate (line 96) | def generate(
method prepare_inputs_for_generation (line 115) | def prepare_inputs_for_generation(self, input_ids, past_key_values=Non...
FILE: xtuner-eval_niah/longva/model/language_model/llava_mpt.py
class LlavaMptConfig (line 24) | class LlavaMptConfig(MptConfig):
class LlavaMptModel (line 28) | class LlavaMptModel(LlavaMetaModel, MptModel):
method __init__ (line 31) | def __init__(self, config: MptConfig):
method embed_tokens (line 35) | def embed_tokens(self, x):
class LlavaMptForCausalLM (line 39) | class LlavaMptForCausalLM(MptForCausalLM, LlavaMetaForCausalLM):
method __init__ (line 43) | def __init__(self, config):
method get_model (line 61) | def get_model(self):
method _set_gradient_checkpointing (line 64) | def _set_gradient_checkpointing(self, module, value=False):
method forward (line 68) | def forward(
method prepare_inputs_for_generation (line 97) | def prepare_inputs_for_generation(self, input_ids, past_key_values=Non...
FILE: xtuner-eval_niah/longva/model/language_model/llava_qwen.py
class LlavaQwenConfig (line 35) | class LlavaQwenConfig(Qwen2Config):
class LlavaQwenModel (line 39) | class LlavaQwenModel(LlavaMetaModel, Qwen2Model):
method __init__ (line 42) | def __init__(self, config: Qwen2Config):
class LlavaQwenForCausalLM (line 46) | class LlavaQwenForCausalLM(Qwen2ForCausalLM, LlavaMetaForCausalLM):
method __init__ (line 49) | def __init__(self, config):
method get_model (line 60) | def get_model(self):
method forward (line 63) | def forward(
method generate (line 117) | def generate(
method prepare_inputs_for_generation (line 137) | def prepare_inputs_for_generation(self, input_ids, past_key_values=Non...
FILE: xtuner-eval_niah/longva/model/language_model/modeling_llama.py
function _get_unpad_data (line 61) | def _get_unpad_data(attention_mask):
class LlamaRMSNorm (line 73) | class LlamaRMSNorm(nn.Module):
method __init__ (line 74) | def __init__(self, hidden_size, eps=1e-6):
method forward (line 82) | def forward(self, hidden_states):
class LlamaRotaryEmbedding (line 93) | class LlamaRotaryEmbedding(nn.Module):
method __init__ (line 94) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
method sin_cached (line 113) | def sin_cached(self):
method cos_cached (line 118) | def cos_cached(self):
method forward (line 123) | def forward(self, x, position_ids, seq_len=None):
class LlamaLinearScalingRotaryEmbedding (line 142) | class LlamaLinearScalingRotaryEmbedding(LlamaRotaryEmbedding):
method forward (line 145) | def forward(self, x, position_ids, seq_len=None):
class LlamaDynamicNTKScalingRotaryEmbedding (line 152) | class LlamaDynamicNTKScalingRotaryEmbedding(LlamaRotaryEmbedding):
method forward (line 155) | def forward(self, x, position_ids, seq_len=None):
function rotate_half (line 167) | def rotate_half(x):
function apply_rotary_pos_emb (line 174) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di...
class LlamaMLP (line 201) | class LlamaMLP(nn.Module):
method __init__ (line 202) | def __init__(self, config):
method forward (line 212) | def forward(self, x):
function repeat_kv (line 231) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
class LlamaAttention (line 243) | class LlamaAttention(nn.Module):
method __init__ (line 246) | def __init__(self, config: LlamaConfig, layer_idx: Optional[int] = None):
method _init_rope (line 276) | def _init_rope(self):
method forward (line 303) | def forward(
class LlamaRingFlashAttention2 (line 385) | class LlamaRingFlashAttention2(LlamaAttention):
method __init__ (line 392) | def __init__(self, *args, **kwargs):
method forward (line 400) | def forward(
method _flash_attention_forward (line 478) | def _flash_attention_forward(self, query_states, key_states, value_sta...
method _upad_input (line 535) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m...
class LlamaFlashAttention2 (line 566) | class LlamaFlashAttention2(LlamaAttention):
method __init__ (line 573) | def __init__(self, *args, **kwargs):
method forward (line 581) | def forward(
method _flash_attention_forward (line 659) | def _flash_attention_forward(self, query_states, key_states, value_sta...
method _upad_input (line 712) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m...
class LlamaSdpaAttention (line 743) | class LlamaSdpaAttention(LlamaAttention):
method forward (line 751) | def forward(
class LlamaDecoderLayer (line 841) | class LlamaDecoderLayer(nn.Module):
method __init__ (line 842) | def __init__(self, config: LlamaConfig, layer_idx: int):
method forward (line 852) | def forward(
class LlamaPreTrainedModel (line 935) | class LlamaPreTrainedModel(PreTrainedModel):
method _init_weights (line 945) | def _init_weights(self, module):
method _setup_cache (line 956) | def _setup_cache(self, cache_cls, max_batch_size, max_cache_len: Optio...
method _reset_cache (line 972) | def _reset_cache(self):
class LlamaModel (line 1051) | class LlamaModel(LlamaPreTrainedModel):
method __init__ (line 1059) | def __init__(self, config: LlamaConfig):
method get_input_embeddings (line 1076) | def get_input_embeddings(self):
method set_input_embeddings (line 1079) | def set_input_embeddings(self, value):
method forward (line 1083) | def forward(
method _update_causal_mask (line 1191) | def _update_causal_mask(self, attention_mask, input_tensor):
class LlamaForCausalLM (line 1228) | class LlamaForCausalLM(LlamaPreTrainedModel):
method __init__ (line 1231) | def __init__(self, config):
method get_input_embeddings (line 1240) | def get_input_embeddings(self):
method set_input_embeddings (line 1243) | def set_input_embeddings(self, value):
method get_output_embeddings (line 1246) | def get_output_embeddings(self):
method set_output_embeddings (line 1249) | def set_output_embeddings(self, new_embeddings):
method set_decoder (line 1252) | def set_decoder(self, decoder):
method get_decoder (line 1255) | def get_decoder(self):
method forward (line 1260) | def forward(
method prepare_inputs_for_generation (line 1351) | def prepare_inputs_for_generation(self, input_ids, past_key_values=Non...
method _reorder_cache (line 1423) | def _reorder_cache(past_key_values, beam_idx):
class LlamaForSequenceClassification (line 1445) | class LlamaForSequenceClassification(LlamaPreTrainedModel):
method __init__ (line 1446) | def __init__(self, config):
method get_input_embeddings (line 1455) | def get_input_embeddings(self):
method set_input_embeddings (line 1458) | def set_input_embeddings(self, value):
method forward (line 1462) | def forward(
class LlamaForQuestionAnswering (line 1560) | class LlamaForQuestionAnswering(LlamaPreTrainedModel):
method __init__ (line 1564) | def __init__(self, config):
method get_input_embeddings (line 1572) | def get_input_embeddings(self):
method set_input_embeddings (line 1575) | def set_input_embeddings(self, value):
method forward (line 1579) | def forward(
FILE: xtuner-eval_niah/longva/model/llava_arch.py
class LlavaMetaModel (line 34) | class LlavaMetaModel:
method __init__ (line 36) | def __init__(self, config):
method get_vision_tower (line 48) | def get_vision_tower(self):
method initialize_vision_modules (line 54) | def initialize_vision_modules(self, model_args, fsdp=None):
function unpad_image (line 119) | def unpad_image(tensor, original_size):
class LlavaMetaForCausalLM (line 154) | class LlavaMetaForCausalLM(ABC):
method get_model (line 157) | def get_model(self):
method get_vision_tower (line 160) | def get_vision_tower(self):
method get_2dPool (line 163) | def get_2dPool(self, image_feature):
method encode_images (line 179) | def encode_images(self, images):
method encode_multimodals (line 186) | def encode_multimodals(self, videos_or_images, video_idx_in_batch, spl...
method prepare_inputs_labels_for_multimodal (line 199) | def prepare_inputs_labels_for_multimodal(self, input_ids, position_ids...
method initialize_vision_tokenizer (line 409) | def initialize_vision_tokenizer(self, model_args, tokenizer):
FILE: xtuner-eval_niah/longva/model/make_delta.py
function make_delta (line 14) | def make_delta(base_model_path, target_model_path, delta_path, hub_repo_...
FILE: xtuner-eval_niah/longva/model/multimodal_encoder/builder.py
function build_vision_tower (line 9) | def build_vision_tower(vision_tower_cfg, **kwargs):
FILE: xtuner-eval_niah/longva/model/multimodal_encoder/clip_encoder.py
class CLIPVisionTower (line 12) | class CLIPVisionTower(nn.Module):
method __init__ (line 13) | def __init__(self, vision_tower, args, delay_load=False):
method load_model (line 35) | def load_model(self, device_map=None):
method feature_select (line 46) | def feature_select(self, image_forward_outs):
method forward (line 68) | def forward(self, images):
method dummy_feature (line 82) | def dummy_feature(self):
method dtype (line 86) | def dtype(self):
method device (line 90) | def device(self):
method config (line 94) | def config(self):
method hidden_size (line 101) | def hidden_size(self):
method num_patches_per_side (line 110) | def num_patches_per_side(self):
method num_patches (line 114) | def num_patches(self):
method image_size (line 121) | def image_size(self):
class CLIPVisionTowerS2 (line 125) | class CLIPVisionTowerS2(CLIPVisionTower):
method __init__ (line 126) | def __init__(self, vision_tower, args, delay_load=False):
method load_model (line 141) | def load_model(self, device_map=None):
method forward_feature (line 156) | def forward_feature(self, images):
method forward (line 162) | def forward(self, images):
method hidden_size (line 174) | def hidden_size(self):
FILE: xtuner-eval_niah/longva/model/multimodal_projector/builder.py
class IdentityMap (line 8) | class IdentityMap(nn.Module):
method __init__ (line 9) | def __init__(self):
method forward (line 12) | def forward(self, x, *args, **kwargs):
method config (line 16) | def config(self):
class SimpleResBlock (line 20) | class SimpleResBlock(nn.Module):
method __init__ (line 21) | def __init__(self, channels):
method forward (line 27) | def forward(self, x):
function build_vision_projector (line 32) | def build_vision_projector(config, delay_load=False, **kwargs):
FILE: xtuner-eval_niah/longva/model/multimodal_projector/pooler_projector.py
class PoolerProjector (line 9) | class PoolerProjector(nn.Module):
method __init__ (line 10) | def __init__(self, config, vision_cfg):
method forward (line 22) | def forward(self, x, *args, **kwargs):
method config (line 32) | def config(self):
FILE: xtuner-eval_niah/longva/model/multimodal_resampler/builder.py
class IdentityMap (line 9) | class IdentityMap(torch.nn.Module):
method __init__ (line 10) | def __init__(self):
method forward (line 13) | def forward(self, x, *args, **kwargs):
method config (line 17) | def config(self):
function build_vision_resampler (line 21) | def build_vision_resampler(model_args, delay_load=False, **kwargs):
FILE: xtuner-eval_niah/longva/model/multimodal_resampler/masked_drop.py
class MaskedDrop (line 7) | class MaskedDrop(nn.Module):
method __init__ (line 8) | def __init__(self, model_args):
method forward (line 17) | def forward(self, image_features, *args, **kwargs):
method config (line 46) | def config(self):
method random_masking (line 56) | def random_masking(self, x, len_keep):
FILE: xtuner-eval_niah/longva/model/multimodal_resampler/perceiver.py
function exists (line 16) | def exists(val):
function FeedForward (line 20) | def FeedForward(dim, mult=4):
class PerceiverAttention (line 30) | class PerceiverAttention(nn.Module):
method __init__ (line 31) | def __init__(self, *, dim, dim_head=64, heads=8):
method forward (line 44) | def forward(self, x, latents):
class PerceiverResamplerModule (line 73) | class PerceiverResamplerModule(nn.Module):
method __init__ (line 74) | def __init__(
method forward (line 104) | def forward(self, x):
class PerceiverResampler (line 130) | class PerceiverResampler(nn.Module):
method __init__ (line 131) | def __init__(self, model_args, vision_tower):
method forward (line 144) | def forward(self, image_features, *args, **kwargs):
method config (line 148) | def config(self):
FILE: xtuner-eval_niah/longva/model/multimodal_resampler/qformer.py
function disabled_train (line 51) | def disabled_train(self, mode=True):
class BertEmbeddings (line 57) | class BertEmbeddings(nn.Module):
method __init__ (line 60) | def __init__(self, config):
method forward (line 76) | def forward(
class BertSelfAttention (line 107) | class BertSelfAttention(nn.Module):
method __init__ (line 108) | def __init__(self, config, is_cross_attention):
method save_attn_gradients (line 133) | def save_attn_gradients(self, attn_gradients):
method get_attn_gradients (line 136) | def get_attn_gradients(self):
method save_attention_map (line 139) | def save_attention_map(self, attention_map):
method get_attention_map (line 142) | def get_attention_map(self):
method transpose_for_scores (line 145) | def transpose_for_scores(self, x):
method forward (line 153) | def forward(
class BertSelfOutput (line 239) | class BertSelfOutput(nn.Module):
method __init__ (line 240) | def __init__(self, config):
method forward (line 246) | def forward(self, hidden_states, input_tensor):
class BertAttention (line 253) | class BertAttention(nn.Module):
method __init__ (line 254) | def __init__(self, config, is_cross_attention=False):
method prune_heads (line 260) | def prune_heads(self, heads):
method forward (line 281) | def forward(
class BertIntermediate (line 306) | class BertIntermediate(nn.Module):
method __init__ (line 307) | def __init__(self, config):
method forward (line 315) | def forward(self, hidden_states):
class BertOutput (line 321) | class BertOutput(nn.Module):
method __init__ (line 322) | def __init__(self, config):
method forward (line 328) | def forward(self, hidden_states, input_tensor):
class BertLayer (line 335) | class BertLayer(nn.Module):
method __init__ (line 336) | def __init__(self, config, layer_num):
method forward (line 354) | def forward(
method feed_forward_chunk (line 422) | def feed_forward_chunk(self, attention_output):
method feed_forward_chunk_query (line 427) | def feed_forward_chunk_query(self, attention_output):
class BertEncoder (line 433) | class BertEncoder(nn.Module):
method __init__ (line 434) | def __init__(self, config):
method forward (line 439) | def forward(
class BertPooler (line 530) | class BertPooler(nn.Module):
method __init__ (line 531) | def __init__(self, config):
method forward (line 536) | def forward(self, hidden_states):
class BertPredictionHeadTransform (line 545) | class BertPredictionHeadTransform(nn.Module):
method __init__ (line 546) | def __init__(self, config):
method forward (line 555) | def forward(self, hidden_states):
class BertLMPredictionHead (line 562) | class BertLMPredictionHead(nn.Module):
method __init__ (line 563) | def __init__(self, config):
method forward (line 576) | def forward(self, hidden_states):
class BertOnlyMLMHead (line 582) | class BertOnlyMLMHead(nn.Module):
method __init__ (line 583) | def __init__(self, config):
method forward (line 587) | def forward(self, sequence_output):
class BertPreTrainedModel (line 592) | class BertPreTrainedModel(PreTrainedModel):
method _init_weights (line 602) | def _init_weights(self, module):
class BertModel (line 615) | class BertModel(BertPreTrainedModel):
method __init__ (line 625) | def __init__(self, config, add_pooling_layer=False):
method get_input_embeddings (line 637) | def get_input_embeddings(self):
method set_input_embeddings (line 640) | def set_input_embeddings(self, value):
method _prune_heads (line 643) | def _prune_heads(self, heads_to_prune):
method get_extended_attention_mask (line 651) | def get_extended_attention_mask(
method forward (line 731) | def forward(
class BertLMHeadModel (line 865) | class BertLMHeadModel(BertPreTrainedModel):
method __init__ (line 870) | def __init__(self, config):
method get_output_embeddings (line 878) | def get_output_embeddings(self):
method set_output_embeddings (line 881) | def set_output_embeddings(self, new_embeddings):
method forward (line 884) | def forward(
method prepare_inputs_for_generation (line 992) | def prepare_inputs_for_generation(self, input_ids, query_embeds, past=...
method _reorder_cache (line 1013) | def _reorder_cache(self, past, beam_idx):
class BertForMaskedLM (line 1020) | class BertForMaskedLM(BertPreTrainedModel):
method __init__ (line 1025) | def __init__(self, config):
method get_output_embeddings (line 1033) | def get_output_embeddings(self):
method set_output_embeddings (line 1036) | def set_output_embeddings(self, new_embeddings):
method forward (line 1039) | def forward(
class Qformer (line 1102) | class Qformer(nn.Module):
method __init__ (line 1103) | def __init__(self, model_args, vision_tower):
method build_Qformer (line 1117) | def build_Qformer(self, vision_width, cross_attention_freq, num_query_...
method forward (line 1135) | def forward(self, image_features, *args, **kwargs):
method hidden_size (line 1150) | def hidden_size(self):
method config (line 1154) | def config(self):
FILE: xtuner-eval_niah/longva/model/multimodal_resampler/spatial_pool.py
class SpatialPool (line 6) | class SpatialPool(nn.Module):
method __init__ (line 7) | def __init__(self, model_args, vision_tower):
method forward (line 23) | def forward(self, image_features, images, *args, **kwargs):
method config (line 35) | def config(self):
method hidden_size (line 44) | def hidden_size(self):
FILE: xtuner-eval_niah/longva/model/utils.py
function auto_upgrade (line 4) | def auto_upgrade(config):
FILE: xtuner-eval_niah/longva/train/llama_flash_attn_monkey_patch.py
function forward (line 16) | def forward(
function _prepare_decoder_attention_mask (line 77) | def _prepare_decoder_attention_mask(self, attention_mask, input_shape, i...
function replace_llama_attn_with_flash_attn (line 82) | def replace_llama_attn_with_flash_attn():
FILE: xtuner-eval_niah/longva/train/llava_trainer.py
function maybe_zero_3 (line 30) | def maybe_zero_3(param, ignore_status=False, name=None):
function get_mm_adapter_state_maybe_zero_3 (line 45) | def get_mm_adapter_state_maybe_zero_3(named_params, keys_to_match):
function split_to_even_chunks (line 51) | def split_to_even_chunks(indices, lengths, num_chunks):
function get_variable_length_grouped_indices (line 73) | def get_variable_length_grouped_indices(lengths, batch_size, world_size,...
function get_modality_length_grouped_indices (line 89) | def get_modality_length_grouped_indices(lengths, batch_size, world_size,...
function get_length_grouped_indices (line 129) | def get_length_grouped_indices(lengths, batch_size, world_size, generato...
function get_length_grouped_indices_auto_single (line 152) | def get_length_grouped_indices_auto_single(lengths, batch_size, world_si...
function get_modality_length_grouped_indices_auto (line 167) | def get_modality_length_grouped_indices_auto(lengths, batch_size, world_...
class LengthGroupedSampler (line 196) | class LengthGroupedSampler(Sampler):
method __init__ (line 202) | def __init__(
method __len__ (line 223) | def __len__(self):
method __iter__ (line 226) | def __iter__(self):
class LLaVATrainer (line 240) | class LLaVATrainer(Trainer):
method create_accelerator_and_postprocess (line 242) | def create_accelerator_and_postprocess(self):
method _get_train_sampler (line 273) | def _get_train_sampler(self) -> Optional[torch.utils.data.Sampler]:
method get_train_dataloader (line 319) | def get_train_dataloader(self) -> DataLoader:
method create_optimizer (line 356) | def create_optimizer(self):
method _save_checkpoint (line 435) | def _save_checkpoint(self, model, trial, metrics=None):
method _save (line 459) | def _save(self, output_dir: Optional[str] = None, state_dict=None):
class LLaVADPOTrainer (line 466) | class LLaVADPOTrainer(DPOTrainer):
method _get_train_sampler (line 467) | def _get_train_sampler(self) -> Optional[torch.utils.data.Sampler]:
method _save_checkpoint (line 483) | def _save_checkpoint(self, model, trial, metrics=None):
method _save (line 523) | def _save(self, output_dir: Optional[str] = None, state_dict=None):
FILE: xtuner-eval_niah/longva/train/train.py
class ModelArguments (line 59) | class ModelArguments:
class DataArguments (line 112) | class DataArguments:
class TrainingArguments (line 128) | class TrainingArguments(transformers.TrainingArguments):
function maybe_zero_3 (line 177) | def maybe_zero_3(param, ignore_status=False, name=None):
function get_peft_state_maybe_zero_3 (line 193) | def get_peft_state_maybe_zero_3(named_params, bias):
function get_peft_state_non_lora_maybe_zero_3 (line 218) | def get_peft_state_non_lora_maybe_zero_3(named_params, require_grad_only...
function get_mm_adapter_state_maybe_zero_3 (line 226) | def get_mm_adapter_state_maybe_zero_3(named_params, keys_to_match):
function find_all_linear_names (line 232) | def find_all_linear_names(model):
function safe_save_model_for_hf_trainer (line 248) | def safe_save_model_for_hf_trainer(trainer: transformers.Trainer, output...
function smart_tokenizer_and_embedding_resize (line 292) | def smart_tokenizer_and_embedding_resize(
function _tokenize_fn (line 315) | def _tokenize_fn(strings: Sequence[str], tokenizer: transformers.PreTrai...
function _mask_targets (line 337) | def _mask_targets(target, tokenized_lens, speakers):
function _add_speaker_and_signal (line 348) | def _add_speaker_and_signal(header, source, get_conversation=True):
function preprocess_multimodal (line 368) | def preprocess_multimodal(sources: Sequence[str], data_args: DataArgumen...
function preprocess_llama_2 (line 396) | def preprocess_llama_2(sources, tokenizer: transformers.PreTrainedTokeni...
function preprocess_gemma (line 471) | def preprocess_gemma(sources: List[List[Dict[str, str]]], tokenizer: tra...
function preprocess_qwen (line 550) | def preprocess_qwen(sources, tokenizer: transformers.PreTrainedTokenizer...
function preprocess_llama3 (line 605) | def preprocess_llama3(
function preprocess_v1 (line 687) | def preprocess_v1(sources, tokenizer: transformers.PreTrainedTokenizer, ...
function preprocess_mpt (line 766) | def preprocess_mpt(sources, tokenizer: transformers.PreTrainedTokenizer,...
function preprocess_plain (line 847) | def preprocess_plain(
function preprocess (line 869) | def preprocess(sources: Sequence[str], tokenizer: transformers.PreTraine...
class LazySupervisedDataset (line 920) | class LazySupervisedDataset(Dataset):
method __init__ (line 921) | def __init__(self, data_path: str, tokenizer: transformers.PreTrainedT...
method __len__ (line 992) | def __len__(self):
method lengths (line 996) | def lengths(self):
method modality_lengths (line 1004) | def modality_lengths(self):
method process_image (line 1012) | def process_image(self, image_file):
method __getitem__ (line 1050) | def __getitem__(self, i) -> Dict[str, torch.Tensor]:
method _get_item (line 1083) | def _get_item(self, i) -> Dict[str, torch.Tensor]:
class DataCollatorForSupervisedDataset (line 1184) | class DataCollatorForSupervisedDataset(object):
method pad_sequence (line 1189) | def pad_sequence(self, input_ids, batch_first, padding_value):
method __call__ (line 1197) | def __call__(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]:
function make_supervised_data_module (line 1230) | def make_supervised_data_module(tokenizer: transformers.PreTrainedTokeni...
function get_model (line 1237) | def get_model(model_args, training_args, bnb_model_from_pretrained_args):
function train (line 1387) | def train(attn_implementation=None):
FILE: xtuner-eval_niah/longva/train/train_dpo.py
class ModelArguments (line 67) | class ModelArguments:
class DataArguments (line 117) | class DataArguments:
class TrainingArguments (line 135) | class TrainingArguments(transformers.TrainingArguments):
function maybe_zero_3 (line 171) | def maybe_zero_3(param, ignore_status=False, name=None):
function get_peft_state_maybe_zero_3 (line 187) | def get_peft_state_maybe_zero_3(named_params, bias):
function get_peft_state_non_lora_maybe_zero_3 (line 212) | def get_peft_state_non_lora_maybe_zero_3(named_params, require_grad_only...
function get_mm_adapter_state_maybe_zero_3 (line 220) | def get_mm_adapter_state_maybe_zero_3(named_params, keys_to_match):
function find_all_linear_names (line 226) | def find_all_linear_names(model):
function safe_save_model_for_hf_trainer (line 242) | def safe_save_model_for_hf_trainer(trainer: transformers.Trainer, output...
function smart_tokenizer_and_embedding_resize (line 286) | def smart_tokenizer_and_embedding_resize(
function _tokenize_fn (line 309) | def _tokenize_fn(strings: Sequence[str], tokenizer: transformers.PreTrai...
function _mask_targets (line 331) | def _mask_targets(target, tokenized_lens, speakers):
function _add_speaker_and_signal (line 342) | def _add_speaker_and_signal(header, source, get_conversation=True):
function preprocess_multimodal (line 362) | def preprocess_multimodal(sources: Sequence[str], data_args: DataArgumen...
function preprocess_multimodal_movie (line 383) | def preprocess_multimodal_movie(sources: Sequence[str], data_args: DataA...
function preprocess_llama_2 (line 400) | def preprocess_llama_2(sources, tokenizer: transformers.PreTrainedTokeni...
function make_conv (line 475) | def make_conv(prompt, answer):
function preprocess_gemma (line 488) | def preprocess_gemma(sources: List[List[Dict[str, str]]], tokenizer: tra...
function preprocess_qwen (line 567) | def preprocess_qwen(sources, tokenizer: transformers.PreTrainedTokenizer...
function preprocess_llama3 (line 618) | def preprocess_llama3(
function preprocess_v1 (line 667) | def preprocess_v1(sources, tokenizer: transformers.PreTrainedTokenizer, ...
function preprocess_mpt (line 746) | def preprocess_mpt(sources, tokenizer: transformers.PreTrainedTokenizer,...
function preprocess_plain (line 827) | def preprocess_plain(
function preprocess (line 849) | def preprocess(sources: Sequence[str], tokenizer: transformers.PreTraine...
function load_data (line 900) | def load_data(data_path):
class DPODataset (line 908) | class DPODataset(Dataset):
method __init__ (line 911) | def __init__(self, data_path: str, tokenizer: transformers.PreTrainedT...
method __len__ (line 978) | def __len__(self):
method lengths (line 982) | def lengths(self):
method modality_lengths (line 993) | def modality_lengths(self):
method process_image (line 1003) | def process_image(self, image_file):
method __getitem__ (line 1041) | def __getitem__(self, i) -> Dict[str, torch.Tensor]:
method _get_item (line 1081) | def _get_item(self, i) -> Dict[str, torch.Tensor]:
class DPODataCollator (line 1187) | class DPODataCollator(DPODataCollatorWithPadding):
method collate (line 1192) | def collate(self, batch):
method tokenize_batch_element (line 1239) | def tokenize_batch_element(self, prompt: str, chosen: str, rejected: s...
method __call__ (line 1274) | def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]:
function make_dpo_data_module (line 1308) | def make_dpo_data_module(tokenizer: transformers.PreTrainedTokenizer, da...
function get_model (line 1314) | def get_model(model_args, training_args, bnb_model_from_pretrained_args):
function train (line 1484) | def train(attn_implementation=None):
FILE: xtuner-eval_niah/longva/utils.py
function process_video_with_pyav (line 25) | def process_video_with_pyav(video_file, data_args):
function rank0_print (line 49) | def rank0_print(*args):
function build_logger (line 57) | def build_logger(logger_name, logger_filename):
class StreamToLogger (line 99) | class StreamToLogger(object):
method __init__ (line 104) | def __init__(self, logger, log_level=logging.INFO):
method __getattr__ (line 110) | def __getattr__(self, attr):
method write (line 113) | def write(self, buf):
method flush (line 127) | def flush(self):
function disable_torch_init (line 133) | def disable_torch_init():
function violates_moderation (line 143) | def violates_moderation(text):
function pretty_print_semaphore (line 165) | def pretty_print_semaphore(semaphore):
FILE: xtuner-eval_niah/vision_niah/multi_eval_vision_niah.py
function safe_tokenize (line 67) | def safe_tokenize(tokenizer, text):
function eval_forward (line 74) | def eval_forward(model, input_embeds, answer_embeds, pad_id, answer_ids,...
function load_haystack (line 134) | def load_haystack(args):
function load_text_embeddings (line 143) | def load_text_embeddings(str, tokenizer, model, replace_double_newline=F...
function inference (line 164) | def inference(args):
function plot (line 318) | def plot(args, all_accuries):
function main (line 380) | def main(args):
FILE: xtuner-eval_niah/vision_niah/multi_produce_needle_embedding.py
function main (line 19) | def main(args):
FILE: xtuner-eval_niah/vision_niah/produce_haystack_embedding.py
function load_video_batches (line 12) | def load_video_batches(video_path, batch_size):
function main (line 23) | def main(args):
FILE: xtuner-eval_niah/vision_niah/single_eval_vision_niah.py
function safe_tokenize (line 67) | def safe_tokenize(tokenizer, text):
function eval_forward (line 74) | def eval_forward( model, input_embeds, answer_embeds, pad_id, answer_ids...
function load_haystack (line 132) | def load_haystack(args):
function load_text_embeddings (line 141) | def load_text_embeddings(str, tokenizer, model, replace_double_newline=F...
function inference (line 162) | def inference(args):
function plot (line 275) | def plot(args, all_accuries):
function main (line 337) | def main(args):
FILE: xtuner-eval_niah/vision_niah/single_produce_needle_embedding.py
function main (line 18) | def main(args):
FILE: xtuner-eval_niah/xtuner/_lite/__init__.py
function get_logger (line 11) | def get_logger():
FILE: xtuner-eval_niah/xtuner/_lite/accelerate/dispatches/__init__.py
function _dispatch_forward_fn (line 9) | def _dispatch_forward_fn(module, dispatch_fn):
function dispatch_internlm_varlen_attn_forward (line 13) | def dispatch_internlm_varlen_attn_forward(module):
function dispatch_llama_varlen_attn_forward (line 20) | def dispatch_llama_varlen_attn_forward(module):
function dispatch_qwen2_varlen_attn_forward (line 27) | def dispatch_qwen2_varlen_attn_forward(module):
function dispatch_clip_attn_forward (line 34) | def dispatch_clip_attn_forward(module):
function dispatch_rms_norm_forward (line 41) | def dispatch_rms_norm_forward(module):
function dispatch_internlm_mla_varlen_attn_forward (line 47) | def dispatch_internlm_mla_varlen_attn_forward(module):
function dispatch_modules (line 66) | def dispatch_modules(model, use_varlen_attn=False):
FILE: xtuner-eval_niah/xtuner/_lite/accelerate/dispatches/_attention.py
function _get_unpad_data (line 17) | def _get_unpad_data(attention_mask):
function upad_qkv (line 30) | def upad_qkv(query_layer, key_layer, value_layer, attention_mask,
function flash_attn_wo_mask (line 75) | def flash_attn_wo_mask(
function flash_attn_w_mask (line 96) | def flash_attn_w_mask(
function varlen_flash_attn (line 128) | def varlen_flash_attn(
FILE: xtuner-eval_niah/xtuner/_lite/accelerate/dispatches/_fused/layer_norm.py
function layer_norm_forward (line 6) | def layer_norm_forward(self, hidden_states):
FILE: xtuner-eval_niah/xtuner/_lite/accelerate/dispatches/_fused/rms_norm.py
function rms_norm_forward (line 14) | def rms_norm_forward(self, hidden_states):
FILE: xtuner-eval_niah/xtuner/_lite/accelerate/dispatches/_fused/rotary.py
function rotary_kernel (line 11) | def rotary_kernel(
function apply_rotary (line 163) | def apply_rotary(
class ApplyRotaryEmb (line 265) | class ApplyRotaryEmb(torch.autograd.Function):
method forward (line 268) | def forward(
method backward (line 302) | def backward(ctx, do):
FILE: xtuner-eval_niah/xtuner/_lite/accelerate/dispatches/clip.py
function clip_flash_attn_forward (line 11) | def clip_flash_attn_forward(
FILE: xtuner-eval_niah/xtuner/_lite/accelerate/dispatches/internlm2.py
class InternLM2RotaryEmbedding (line 22) | class InternLM2RotaryEmbedding(torch.nn.Module):
method __init__ (line 24) | def __init__(self,
method forward (line 47) | def forward(self, x, seq_len):
function rotate_half (line 68) | def rotate_half(x):
function apply_rotary_pos_emb (line 75) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di...
function apply_rotary_pos_emb_mla (line 83) | def apply_rotary_pos_emb_mla(q, k, cos, sin, position_ids=None, unsqueez...
function repeat_kv (line 113) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
function repeat_kv_bshd (line 131) | def repeat_kv_bshd(hidden_states: torch.Tensor, n_rep: int) -> torch.Ten...
function _internlm2_varlen_attn_forward (line 145) | def _internlm2_varlen_attn_forward(
function internlm2_attn_forward_inference (line 324) | def internlm2_attn_forward_inference(
function internlm2_varlen_attn_forward (line 451) | def internlm2_varlen_attn_forward(
function _internlm2_mla_varlen_attn_forward (line 478) | def _internlm2_mla_varlen_attn_forward(
function internlm2_mla_varlen_attn_forward (line 596) | def internlm2_mla_varlen_attn_forward(
FILE: xtuner-eval_niah/xtuner/_lite/accelerate/dispatches/llama.py
class Cache (line 15) | class Cache:
function repeat_kv_bshd (line 19) | def repeat_kv_bshd(hidden_states: torch.Tensor, n_rep: int) -> torch.Ten...
function llama_varlen_attn_forward (line 33) | def llama_varlen_attn_forward(
FILE: xtuner-eval_niah/xtuner/_lite/accelerate/dispatches/qwen2.py
function qwen2_attn_forward_inference (line 32) | def qwen2_attn_forward_inference(
function qwen2_varlen_attn_forward (line 148) | def qwen2_varlen_attn_forward(
FILE: xtuner-eval_niah/xtuner/_lite/accelerate/packed.py
function packed_sequence (line 9) | def packed_sequence(num_tokens, enable=False, sp_size=1):
FILE: xtuner-eval_niah/xtuner/_lite/auto.py
function download_model_from_hub (line 20) | def download_model_from_hub(
class AutoModelForCausalLM (line 64) | class AutoModelForCausalLM:
method from_config (line 91) | def from_config(cls,
method from_pretrained (line 102) | def from_pretrained(
method _flash_attn_kwargs (line 139) | def _flash_attn_kwargs(config: PretrainedConfig) -> dict:
method _long_ctx_kwargs (line 162) | def _long_ctx_kwargs(config: PretrainedConfig,
FILE: xtuner-eval_niah/xtuner/_lite/chat/messages/base.py
class BaseMessages (line 10) | class BaseMessages(BaseModel):
method add (line 13) | def add(self, role: str, content):
method pop (line 17) | def pop(self):
method get_prompt (line 21) | def get_prompt(self, chat_template: ChatTemplate) -> str:
method tokenize (line 25) | def tokenize(self, tokenizer: PreTrainedTokenizer,
method from_dict (line 30) | def from_dict(cls, item: Dict) -> 'BaseMessages':
FILE: xtuner-eval_niah/xtuner/_lite/chat/messages/chat.py
class TextContentItem (line 15) | class TextContentItem(BaseModel):
method apply_chat_template (line 19) | def apply_chat_template(self, chat_template: HybridChatTemplate) -> str:
class ImageContentItem (line 23) | class ImageContentItem(BaseModel):
method apply_chat_template (line 27) | def apply_chat_template(self, chat_template: HybridChatTemplate) -> str:
class ChatMsg (line 35) | class ChatMsg(BaseModel):
method __init__ (line 41) | def __init__(self, *args, **kwargs):
method collect_img_urls (line 53) | def collect_img_urls(self) -> List[str]:
method get_prompt (line 61) | def get_prompt(self, chat_template: ChatTemplate) -> str:
method tokenize (line 86) | def tokenize(
class ChatMessages (line 107) | class ChatMessages(BaseMessages):
method add (line 111) | def add(self, role, content, loss=False):
method pop (line 114) | def pop(self):
method get_prompt (line 117) | def get_prompt(self, chat_template: ChatTemplate) -> str:
method tokenize (line 127) | def tokenize(self, tokenizer: PreTrainedTokenizer,
method from_str (line 169) | def from_str(cls, prompt: str) -> 'ChatMessages':
method from_dict (line 175) | def from_dict(cls, item: dict) -> 'ChatMessages':
FILE: xtuner-eval_niah/xtuner/_lite/chat/templates/chat.py
class ChatTemplate (line 6) | class ChatTemplate(BaseModel):
method decorate_system (line 18) | def decorate_system(self, text: str) -> str:
method decorate_assistant (line 22) | def decorate_assistant(self, text: str) -> str:
method decorate_user (line 26) | def decorate_user(self, text: str) -> str:
method check_system (line 31) | def check_system(cls, v: str) -> str:
method check_user (line 41) | def check_user(cls, v: str) -> str:
method check_assistant (line 51) | def check_assistant(cls, v: str) -> str:
FILE: xtuner-eval_niah/xtuner/_lite/chat/templates/hybrid.py
class HybridChatTemplate (line 6) | class HybridChatTemplate(BaseModel):
method mm_token_maps (line 42) | def mm_token_maps(self) -> Dict[str, int]:
method decorate_system (line 47) | def decorate_system(self, text: str) -> str:
method decorate_assistant (line 51) | def decorate_assistant(self, text: str) -> str:
method decorate_user (line 55) | def decorate_user(self, text: str) -> str:
method decorate_files (line 59) | def decorate_files(self, text: str) -> str:
method decorate_functions (line 63) | def decorate_functions(self, text: str) -> str:
method decorate_function_call (line 67) | def decorate_function_call(self, text: str, func: str) -> str:
method decorate_function_result (line 71) | def decorate_function_result(self, text: str) -> str:
method decorate_code_interpreter (line 75) | def decorate_code_interpreter(self, text: str) -> str:
method decorate_code_interpreter_call (line 79) | def decorate_code_interpreter_call(self, text: str, func: str) -> str:
method decorate_code_interpreter_result (line 84) | def decorate_code_interpreter_result(self, text: str) -> str:
method check_system (line 90) | def check_system(cls, v: str) -> str:
method check_user (line 100) | def check_user(cls, v: str) -> str:
method check_assistant (line 110) | def check_assistant(cls, v: str) -> str:
method check_function_call (line 121) | def check_function_call(cls, v: str) -> str:
method check_function_result (line 137) | def check_function_result(cls, v: str) -> str:
method check_functions (line 148) | def check_functions(cls, v: str) -> str:
method check_code_interpreter (line 159) | def check_code_interpreter(cls, v: str) -> str:
method check_code_interpreter_call (line 170) | def check_code_interpreter_call(cls, v: str) -> str:
method check_code_interpreter_result (line 186) | def check_code_interpreter_result(cls, v: str) -> str:
FILE: xtuner-eval_niah/xtuner/_lite/datasets/cache.py
class CacheDataset (line 4) | class CacheDataset(torch.utils.data.Dataset):
method cached_dir (line 7) | def cached_dir(self):
method cached (line 11) | def cached(self):
method cache (line 14) | def cache(self, cache_dir):
method load_cache (line 17) | def load_cache(self):
method from_cache (line 21) | def from_cache(self, cache_dir):
FILE: xtuner-eval_niah/xtuner/_lite/datasets/format.py
class Alpaca2Openai (line 4) | class Alpaca2Openai():
method source_format (line 7) | def source_format(cls):
method target_format (line 16) | def target_format(cls):
method convert (line 32) | def convert(data):
function llava_to_openai (line 50) | def llava_to_openai(data):
function llava_to_openai_interleave (line 101) | def llava_to_openai_interleave(data):
FILE: xtuner-eval_niah/xtuner/_lite/datasets/llava.py
class LlavaTokenizeFunction (line 16) | class LlavaTokenizeFunction():
method __init__ (line 18) | def __init__(self,
method __call__ (line 31) | def __call__(self, item):
class LlavaTokenizedDataset (line 58) | class LlavaTokenizedDataset(TextTokenizedDataset):
method __init__ (line 60) | def __init__(self, dataset, image_processor, max_length):
method process_tokenized_data (line 64) | def process_tokenized_data(self, tokenized_data):
method from_cache (line 87) | def from_cache(cls, cache_dir, image_processor, max_length):
method __getitem__ (line 93) | def __getitem__(self, item):
class LlavaRawDataset (line 112) | class LlavaRawDataset(torch.utils.data.Dataset):
method __init__ (line 114) | def __init__(self, dataset, image_processor, max_length, tokenize_fn):
method process_tokenized_data (line 122) | def process_tokenized_data(self, tokenized_data):
method __getitem__ (line 144) | def __getitem__(self, item):
class SoftPackerForLlava (line 151) | class SoftPackerForLlava(SoftPackerForText):
method __init__ (line 153) | def __init__(self,
method __getitem__ (line 161) | def __getitem__(self, item):
method from_cache (line 230) | def from_cache(cls, cache_dir, image_processor, max_length):
class LlavaCollator (line 245) | class LlavaCollator():
method __init__ (line 247) | def __init__(self, pack_batch=False):
method __call__ (line 250) | def __call__(self, instances):
FILE: xtuner-eval_niah/xtuner/_lite/datasets/load.py
function load_json (line 22) | def load_json(file):
function load_jsonl (line 28) | def load_jsonl(file):
function load_bin (line 39) | def load_bin(file):
function master_only_load (line 46) | def master_only_load(load_fn):
function multi_thread_map (line 75) | def multi_thread_map(map_fns, dataset, desc, num_proc=8):
function openai_format (line 95) | def openai_format(item):
function load_hf_dataset (line 102) | def load_hf_dataset(path,
function load_from_cache (line 133) | def load_from_cache(cache_dir, init_fn):
function _gpu_parallel_load_local_datasets (line 174) | def _gpu_parallel_load_local_datasets(paths,
function _cpu_parallel_load_local_datasets (line 361) | def _cpu_parallel_load_local_datasets(paths,
function load_local_datasets (line 579) | def load_local_datasets(paths,
function load_datasets (line 601) | def load_datasets(paths,
function load_ms_dataset (line 721) | def load_ms_dataset():
FILE: xtuner-eval_niah/xtuner/_lite/datasets/pretrain.py
class SoftPackerForPretrain (line 7) | class SoftPackerForPretrain(SoftPackerForText):
method __getitem__ (line 9) | def __getitem__(self, item):
FILE: xtuner-eval_niah/xtuner/_lite/datasets/text.py
function sort_and_return_indices (line 21) | def sort_and_return_indices(lst):
class TextTokenizeFunction (line 25) | class TextTokenizeFunction():
method __init__ (line 27) | def __init__(self, tokenizer, chat_template, raw_format='openai'):
method __call__ (line 33) | def __call__(self, item):
class TextTokenizedDataset (line 41) | class TextTokenizedDataset(CacheDataset):
method __init__ (line 43) | def __init__(self, dataset, max_length):
method cached (line 58) | def cached(self):
method cached_dir (line 62) | def cached_dir(self):
method cache (line 65) | def cache(self, cache_dir):
method load_cache (line 76) | def load_cache(self):
method from_cache (line 82) | def from_cache(cls, cache_dir, max_length):
method _free (line 88) | def _free(self):
method __len__ (line 91) | def __len__(self):
method __getitem__ (line 94) | def __getitem__(self, item):
class TextOnlineTokenizeDataset (line 119) | class TextOnlineTokenizeDataset(torch.utils.data.Dataset):
method __init__ (line 121) | def __init__(self, dataset, tokenize_fn):
method __len__ (line 127) | def __len__(self):
method __getitem__ (line 130) | def __getitem__(self, item):
class SoftPackerForText (line 151) | class SoftPackerForText(CacheDataset):
method __init__ (line 153) | def __init__(self, dataset, max_length=2048, pack_info=None, seed=None):
method max_length_per_pack (line 176) | def max_length_per_pack(self):
method cached (line 184) | def cached(self):
method cached_dir (line 188) | def cached_dir(self):
method cache (line 191) | def cache(self, cache_dir):
method load_cache (line 208) | def load_cache(self):
method _free (line 216) | def _free(self):
method __len__ (line 220) | def __len__(self):
method __getitem__ (line 223) | def __getitem__(self, item):
method get_pack_info (line 286) | def get_pack_info(cls, dataset, max_length, seed=None):
method from_cache (line 326) | def from_cache(cls, cache_dir, max_length, seed=None):
class HardPackerForText (line 342) | class HardPackerForText(SoftPackerForText):
method __init__ (line 361) | def __init__(self, dataset, max_length=2048, pack_info=None):
method _cal_max_length (line 367) | def _cal_max_length(cls, begin, end, shfl_item_rngs_left,
method get_pack_info (line 382) | def get_pack_info(cls, dataset, max_length, seed=None):
method _pack_ids_and_labels_in_range (line 426) | def _pack_ids_and_labels_in_range(self, begin: int, end: int):
method __len__ (line 471) | def __len__(self):
method __getitem__ (line 474) | def __getitem__(self, item):
class TextCollator (line 502) | class TextCollator:
method __init__ (line 504) | def __init__(self, pack_batch=False, force_div_ring=False, ring_size=1...
method __call__ (line 512) | def __call__(self, instances):
FILE: xtuner-eval_niah/xtuner/_lite/modelings/__init__.py
function register_remote_code (line 6) | def register_remote_code():
FILE: xtuner-eval_niah/xtuner/_lite/modelings/internlm2/configuration_internlm2.py
class InternLM2Config (line 27) | class InternLM2Config(PretrainedConfig):
method __init__ (line 94) | def __init__( # pylint: disable=W0102
method _rope_scaling_validation (line 150) | def _rope_scaling_validation(self):
FILE: xtuner-eval_niah/xtuner/_lite/modelings/internlm2/modeling_internlm2.py
function _get_unpad_data (line 62) | def _get_unpad_data(attention_mask):
class InternLM2RMSNorm (line 75) | class InternLM2RMSNorm(nn.Module):
method __init__ (line 78) | def __init__(self, hidden_size, eps=1e-6):
method forward (line 83) | def forward(self, hidden_states):
class InternLM2RotaryEmbedding (line 95) | class InternLM2RotaryEmbedding(nn.Module):
method __init__ (line 98) | def __init__(self,
method forward (line 118) | def forward(self, x, position_ids):
class InternLM2LinearScalingRotaryEmbedding (line 137) | class InternLM2LinearScalingRotaryEmbedding(InternLM2RotaryEmbedding):
method forward (line 140) | def forward(self, x, position_ids):
class InternLM2DynamicNTKScalingRotaryEmbedding (line 147) | class InternLM2DynamicNTKScalingRotaryEmbedding(InternLM2RotaryEmbedding):
method forward (line 151) | def forward(self, x, position_ids):
function rotate_half (line 171) | def rotate_half(x):
function apply_rotary_pos_emb (line 178) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di...
class InternLM2MLP (line 204) | class InternLM2MLP(nn.Module):
method __init__ (line 207) | def __init__(self, config):
method forward (line 220) | def forward(self, x):
function repeat_kv (line 226) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
class InternLM2Attention (line 242) | class InternLM2Attention(nn.Module):
method __init__ (line 245) | def __init__(self,
method _init_rope (line 281) | def _init_rope(self):
method forward (line 308) | def forward(
class InternLM2FlashAttention2 (line 405) | class InternLM2FlashAttention2(InternLM2Attention):
method __init__ (line 412) | def __init__(self, *args, **kwargs):
method forward (line 424) | def forward(
method _flash_attention_forward (line 529) | def _flash_attention_forward(self,
method _upad_input (line 598) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m...
class InternLM2SdpaAttention (line 641) | class InternLM2SdpaAttention(InternLM2Attention):
method forward (line 649) | def forward(
class InternLM2DecoderLayer (line 757) | class InternLM2DecoderLayer(nn.Module):
method __init__ (line 760) | def __init__(self, config: InternLM2Config, layer_idx: int):
method forward (line 775) | def forward(
class InternLM2PreTrainedModel (line 853) | class InternLM2PreTrainedModel(PreTrainedModel):
method _init_weights (line 869) | def _init_weights(self, module):
class InternLM2Model (line 948) | class InternLM2Model(InternLM2PreTrainedModel):
method __init__ (line 957) | def __init__(self, config: InternLM2Config):
method get_input_embeddings (line 978) | def get_input_embeddings(self):
method set_input_embeddings (line 981) | def set_input_embeddings(self, value):
method forward (line 985) | def forward(
method _update_causal_mask (line 1106) | def _update_causal_mask(
class InternLM2ForCausalLM (line 1199) | class InternLM2ForCausalLM(InternLM2PreTrainedModel):
method __init__ (line 1205) | def __init__(self, config):
method get_input_embeddings (line 1215) | def get_input_embeddings(self):
method set_input_embeddings (line 1218) | def set_input_embeddings(self, value):
method get_output_embeddings (line 1221) | def get_output_embeddings(self):
method set_output_embeddings (line 1224) | def set_output_embeddings(self, new_embeddings):
method set_decoder (line 1227) | def set_decoder(self, decoder):
method get_decoder (line 1230) | def get_decoder(self):
method forward (line 1236) | def forward(
method prepare_inputs_for_generation (line 1329) | def prepare_inputs_for_generation(
method _reorder_cache (line 1413) | def _reorder_cache(past_key_values, beam_idx):
method build_inputs (line 1421) | def build_inputs(self,
method chat (line 1440) | def chat(
method stream_chat (line 1487) | def stream_chat(
class InternLM2ForSequenceClassification (line 1594) | class InternLM2ForSequenceClassification(InternLM2PreTrainedModel):
method __init__ (line 1597) | def __init__(self, config):
method get_input_embeddings (line 1606) | def get_input_embeddings(self):
method set_input_embeddings (line 1609) | def set_input_embeddings(self, value):
method forward (line 1613) | def forward(
class InternLM2ForQuestionAnswering (line 1719) | class InternLM2ForQuestionAnswering(InternLM2PreTrainedModel):
method __init__ (line 1724) | def __init__(self, config):
method get_input_embeddings (line 1732) | def get_input_embeddings(self):
method set_input_embeddings (line 1735) | def set_input_embeddings(self, value):
method forward (line 1739) | def forward(
class InternLM2ForTokenClassification (line 1823) | class InternLM2ForTokenClassification(InternLM2PreTrainedModel):
method __init__ (line 1826) | def __init__(self, config):
method get_input_embeddings (line 1842) | def get_input_embeddings(self):
method set_input_embeddings (line 1845) | def set_input_embeddings(self, value):
method forward (line 1849) | def forward(
FILE: xtuner-eval_niah/xtuner/_lite/modelings/llava/configuration_internlm2.py
class InternLM2Config (line 27) | class InternLM2Config(PretrainedConfig):
method __init__ (line 94) | def __init__( # pylint: disable=W0102
method _rope_scaling_validation (line 150) | def _rope_scaling_validation(self):
FILE: xtuner-eval_niah/xtuner/_lite/modelings/llava/configuration_llava.py
class EnhancedLlavaConfig (line 23) | class EnhancedLlavaConfig(PretrainedConfig):
method __init__ (line 76) | def __init__(
method save_pretrained (line 143) | def save_pretrained(self, save_directory: Union[str, os.PathLike], pus...
FILE: xtuner-eval_niah/xtuner/_lite/modelings/llava/modeling_internlm2.py
function _get_unpad_data (line 62) | def _get_unpad_data(attention_mask):
class InternLM2RMSNorm (line 75) | class InternLM2RMSNorm(nn.Module):
method __init__ (line 78) | def __init__(self, hidden_size, eps=1e-6):
method forward (line 83) | def forward(self, hidden_states):
class InternLM2RotaryEmbedding (line 95) | class InternLM2RotaryEmbedding(nn.Module):
method __init__ (line 98) | def __init__(self,
method forward (line 118) | def forward(self, x, position_ids):
class InternLM2LinearScalingRotaryEmbedding (line 137) | class InternLM2LinearScalingRotaryEmbedding(InternLM2RotaryEmbedding):
method forward (line 140) | def forward(self, x, position_ids):
class InternLM2DynamicNTKScalingRotaryEmbedding (line 147) | class InternLM2DynamicNTKScalingRotaryEmbedding(InternLM2RotaryEmbedding):
method forward (line 151) | def forward(self, x, position_ids):
function rotate_half (line 171) | def rotate_half(x):
function apply_rotary_pos_emb (line 178) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di...
class InternLM2MLP (line 204) | class InternLM2MLP(nn.Module):
method __init__ (line 207) | def __init__(self, config):
method forward (line 220) | def forward(self, x):
function repeat_kv (line 226) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
class InternLM2Attention (line 242) | class InternLM2Attention(nn.Module):
method __init__ (line 245) | def __init__(self,
method _init_rope (line 281) | def _init_rope(self):
method forward (line 308) | def forward(
class InternLM2FlashAttention2 (line 405) | class InternLM2FlashAttention2(InternLM2Attention):
method __init__ (line 412) | def __init__(self, *args, **kwargs):
method forward (line 424) | def forward(
method _flash_attention_forward (line 529) | def _flash_attention_forward(self,
method _upad_input (line 598) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m...
class InternLM2SdpaAttention (line 641) | class InternLM2SdpaAttention(InternLM2Attention):
method forward (line 649) | def forward(
class InternLM2DecoderLayer (line 757) | class InternLM2DecoderLayer(nn.Module):
method __init__ (line 760) | def __init__(self, config: InternLM2Config, layer_idx: int):
method forward (line 775) | def forward(
class InternLM2PreTrainedModel (line 853) | class InternLM2PreTrainedModel(PreTrainedModel):
method _init_weights (line 869) | def _init_weights(self, module):
class InternLM2Model (line 948) | class InternLM2Model(InternLM2PreTrainedModel):
method __init__ (line 957) | def __init__(self, config: InternLM2Config):
method get_input_embeddings (line 978) | def get_input_embeddings(self):
method set_input_embeddings (line 981) | def set_input_embeddings(self, value):
method forward (line 985) | def forward(
method _update_causal_mask (line 1106) | def _update_causal_mask(
class InternLM2ForCausalLM (line 1199) | class InternLM2ForCausalLM(InternLM2PreTrainedModel):
method __init__ (line 1205) | def __init__(self, config):
method get_input_embeddings (line 1215) | def get_input_embeddings(self):
method set_input_embeddings (line 1218) | def set_input_embeddings(self, value):
method get_output_embeddings (line 1221) | def get_output_embeddings(self):
method set_output_embeddings (line 1224) | def set_output_embeddings(self, new_embeddings):
method set_decoder (line 1227) | def set_decoder(self, decoder):
method get_decoder (line 1230) | def get_decoder(self):
method forward (line 1236) | def forward(
method prepare_inputs_for_generation (line 1329) | def prepare_inputs_for_generation(
method _reorder_cache (line 1413) | def _reorder_cache(past_key_values, beam_idx):
method build_inputs (line 1421) | def build_inputs(self,
method chat (line 1440) | def chat(
method stream_chat (line 1487) | def stream_chat(
class InternLM2ForSequenceClassification (line 1594) | class InternLM2ForSequenceClassification(InternLM2PreTrainedModel):
method __init__ (line 1597) | def __init__(self, config):
method get_input_embeddings (line 1606) | def get_input_embeddings(self):
method set_input_embeddings (line 1609) | def set_input_embeddings(self, value):
method forward (line 1613) | def forward(
class InternLM2ForQuestionAnswering (line 1719) | class InternLM2ForQuestionAnswering(InternLM2PreTrainedModel):
method __init__ (line 1724) | def __init__(self, config):
method get_input_embeddings (line 1732) | def get_input_embeddings(self):
method set_input_embeddings (line 1735) | def set_input_embeddings(self, value):
method forward (line 1739) | def forward(
class InternLM2ForTokenClassification (line 1823) | class InternLM2ForTokenClassification(InternLM2PreTrainedModel):
method __init__ (line 1826) | def __init__(self, config):
method get_input_embeddings (line 1842) | def get_input_embeddings(self):
method set_input_embeddings (line 1845) | def set_input_embeddings(self, value):
method forward (line 1849) | def forward(
FILE: xtuner-eval_niah/xtuner/_lite/modelings/llava/modeling_llava.py
class LlavaCausalLMOutputWithPast (line 46) | class LlavaCausalLMOutputWithPast(ModelOutput):
class LlavaMultiModalProjector (line 87) | class LlavaMultiModalProjector(nn.Module):
method __init__ (line 88) | def __init__(self, config: EnhancedLlavaConfig):
method forward (line 95) | def forward(self, image_features):
class LlavaPreTrainedModel (line 123) | class LlavaPreTrainedModel(PreTrainedModel):
method _init_weights (line 131) | def _init_weights(self, module):
method _supports_sdpa (line 154) | def _supports_sdpa(self):
class LlavaForConditionalGeneration (line 237) | class LlavaForConditionalGeneration(LlavaPreTrainedModel):
method __init__ (line 241) | def __init__(self, config: EnhancedLlavaConfig):
method get_input_embeddings (line 253) | def get_input_embeddings(self):
method set_input_embeddings (line 256) | def set_input_embeddings(self, value):
method get_output_embeddings (line 259) | def get_output_embeddings(self):
method set_output_embeddings (line 262) | def set_output_embeddings(self, new_embeddings):
method set_decoder (line 265) | def set_decoder(self, decoder):
method get_decoder (line 268) | def get_decoder(self):
method tie_weights (line 271) | def tie_weights(self):
method resize_token_embeddings (line 274) | def resize_token_embeddings(self, new_num_tokens: Optional[int] = None...
method _merge_input_ids_with_image_features (line 281) | def _merge_input_ids_with_image_features(self, image_features, inputs_...
method forward (line 360) | def forward(
method prepare_inputs_for_generation (line 516) | def prepare_inputs_for_generation(
method _reorder_cache (line 569) | def _reorder_cache(self, *args, **kwargs):
FILE: xtuner-eval_niah/xtuner/_lite/modelings/llava/processing_llava.py
class LlavaProcessor (line 28) | class LlavaProcessor(ProcessorMixin):
method __init__ (line 49) | def __init__(self, image_processor=None, tokenizer=None, chat_template...
method __call__ (line 52) | def __call__(
method batch_decode (line 117) | def batch_decode(self, *args, **kwargs):
method decode (line 125) | def decode(self, *args, **kwargs):
method model_input_names (line 134) | def model_input_names(self):
FILE: xtuner-eval_niah/xtuner/_lite/parallel/comm.py
function _all_to_all (line 13) | def _all_to_all(
class _AllToAll (line 29) | class _AllToAll(torch.autograd.Function):
method forward (line 40) | def forward(ctx: Any, input: Tensor, sp_group: dist.ProcessGroup,
method backward (line 51) | def backward(ctx: Any, grad_output: Tensor) -> Tuple:
function all_to_all (line 67) | def all_to_all(
function all_to_all_list (line 94) | def all_to_all_list(object_list, group=None):
FILE: xtuner-eval_niah/xtuner/_lite/parallel/fsdp/checkpointing.py
function checkpoint_check_fn (line 7) | def checkpoint_check_fn(submodule, target=RECOMPUTE_MODULES, selective=1...
FILE: xtuner-eval_niah/xtuner/_lite/parallel/fsdp/lazy.py
function dp_lazy_init (line 7) | def dp_lazy_init(module, module_map, dp_mesh):
function dp_sp_lazy_init (line 35) | def dp_sp_lazy_init(module, module_map, dp_mesh, sp_mesh):
function dp_tp_lazy_init (line 61) | def dp_tp_lazy_init(module, module_map, dp_mesh, tp_mesh):
class LoadWoInit (line 139) | class LoadWoInit:
method __init__ (line 142) | def __init__(self):
method __enter__ (line 151) | def __enter__(self, *args, **kwargs):
method __exit__ (line 160) | def __exit__(self, *args, **kwargs):
FILE: xtuner-eval_niah/xtuner/_lite/parallel/fsdp/precision.py
function set_require_grad_param_to_fp32 (line 5) | def set_require_grad_param_to_fp32(model: nn.Module):
FILE: xtuner-eval_niah/xtuner/_lite/parallel/fsdp/wrap.py
function layer_auto_wrap_policy (line 12) | def layer_auto_wrap_policy(
function layer_and_emb_wrap_policy (line 27) | def layer_and_emb_wrap_policy(
function token_embedding_wrap_policy (line 49) | def token_embedding_wrap_policy(
function all_required_grad_wrap_policy (line 66) | def all_required_grad_wrap_policy(
FILE: xtuner-eval_niah/xtuner/_lite/parallel/sampler.py
class ParallelSampler (line 13) | class ParallelSampler(Sampler):
method __init__ (line 42) | def __init__(
method __iter__ (line 78) | def __iter__(self) -> Iterator[int]:
method __len__ (line 99) | def __len__(self) -> int:
method set_epoch (line 103) | def set_epoch(self, epoch: int, step=0) -> None:
function get_length_grouped_indices (line 117) | def get_length_grouped_indices(max_lengths,
class LengthGroupedSampler (line 146) | class LengthGroupedSampler(Sampler):
method __init__ (line 148) | def __init__(self,
method __iter__ (line 200) | def __iter__(self) -> Iterator[int]:
method __len__ (line 222) | def __len__(self) -> int:
method set_epoch (line 226) | def set_epoch(self, epoch: int, step=0) -> None:
FILE: xtuner-eval_niah/xtuner/_lite/parallel/sequence/attention.py
function pre_process_for_sequence_parallel_attn (line 8) | def pre_process_for_sequence_parallel_attn(query_states,
function post_process_for_sequence_parallel_attn (line 41) | def post_process_for_sequence_parallel_attn(attn_output,
function sequence_parallel_wrapper (line 54) | def sequence_parallel_wrapper(local_attn):
FILE: xtuner-eval_niah/xtuner/_lite/parallel/sequence/data_collate.py
function pad_for_sequence_parallel (line 7) | def pad_for_sequence_parallel(tensor, padding_value, dim=-1):
function pad_cumulative_len_for_sequence_parallel (line 26) | def pad_cumulative_len_for_sequence_parallel(cumulative_len):
FILE: xtuner-eval_niah/xtuner/_lite/parallel/sequence/ops.py
function split_for_sequence_parallel (line 6) | def split_for_sequence_parallel(input, dim: int, sp_group: dist.ProcessG...
function gather_for_sequence_parallel (line 33) | def gather_for_sequence_parallel(input, dim: int, sp_group: dist.Process...
class _GatherForwardSplitBackward (line 60) | class _GatherForwardSplitBackward(torch.autograd.Function):
method forward (line 68) | def forward(ctx, input, dim, sp_group, grad_scale):
method backward (line 75) | def backward(ctx, grad_output):
class _SplitForwardGatherBackward (line 85) | class _SplitForwardGatherBackward(torch.autograd.Function):
method forward (line 93) | def forward(ctx, input, dim, sp_group, grad_scale):
method backward (line 100) | def backward(ctx, grad_output):
function split_forward_gather_backward (line 109) | def split_forward_gather_backward(input, dim, sp_group, grad_scale=None):
function gather_forward_split_backward (line 123) | def gather_forward_split_backward(input, dim, sp_group, grad_scale=None):
FILE: xtuner-eval_niah/xtuner/_lite/parallel/sequence/reduce_loss.py
class _ReduceLoss (line 7) | class _ReduceLoss(torch.autograd.Function):
method forward (line 10) | def forward(ctx, mean_loss, loss_scale, process_group):
method backward (line 22) | def backward(ctx, grad_output):
function reduce_sequence_parallel_loss (line 26) | def reduce_sequence_parallel_loss(mean_loss,
FILE: xtuner-eval_niah/xtuner/_lite/parallel/setup.py
function setup_sp (line 15) | def setup_sp(sp_size):
function setup_tp (line 31) | def setup_tp(tp_size):
function setup_dp (line 47) | def setup_dp():
function set_seq_parallel_pg (line 67) | def set_seq_parallel_pg(sp_ulysses_degree, sp_ring_degree):
function setup_parallel (line 93) | def setup_parallel(sp_size=1, tp_size=1, ring_size=1):
function get_ulysess_mesh (line 108) | def get_ulysess_mesh():
function get_ring_mesh (line 112) | def get_ring_mesh():
function get_ulysess_group (line 116) | def get_ulysess_group():
function get_ring_group (line 120) | def get_ring_group():
function get_ulysess_world_size (line 124) | def get_ulysess_world_size():
function get_ring_world_size (line 135) | def get_ring_world_size():
function get_dp_mesh (line 146) | def get_dp_mesh():
function get_dp_group (line 150) | def get_dp_group():
function get_dp_world_size (line 154) | def get_dp_world_size():
function get_sp_mesh (line 165) | def get_sp_mesh():
function get_sp_group (line 169) | def get_sp_group():
function get_sp_world_size (line 173) | def get_sp_world_size():
function get_tp_mesh (line 184) | def get_tp_mesh():
function get_tp_group (line 188) | def get_tp_group():
function get_tp_world_size (line 192) | def get_tp_world_size():
FILE: xtuner-eval_niah/xtuner/_lite/yunchang/comm/all_to_all.py
function all_to_all_4D (line 15) | def all_to_all_4D(
class SeqAllToAll4D (line 96) | class SeqAllToAll4D(torch.autograd.Function):
method forward (line 98) | def forward(
method backward (line 113) | def backward(ctx: Any, *grad_output: Tensor) -> Tuple[None, Tensor, No...
function all_to_all_5D (line 124) | def all_to_all_5D(
class SeqAllToAll5D (line 207) | class SeqAllToAll5D(torch.autograd.Function):
method forward (line 209) | def forward(
method backward (line 224) | def backward(ctx: Any, *grad_output: Tensor) -> Tuple[None, Tensor, No...
FILE: xtuner-eval_niah/xtuner/_lite/yunchang/comm/extract_local.py
function stripe_extract_local (line 7) | def stripe_extract_local(value, rank, world_size, rd, ud, *args, **kwargs):
function basic_extract_local (line 29) | def basic_extract_local(value, rank, world_size, *args, **kwargs):
function zigzag_extract_local (line 33) | def zigzag_extract_local(value, rank, world_size, rd, ud, dim=1, *args, ...
FILE: xtuner-eval_niah/xtuner/_lite/yunchang/globals.py
class Singleton (line 4) | class Singleton:
method __new__ (line 7) | def __new__(cls, *args, **kwargs):
class ProcessGroupSingleton (line 13) | class ProcessGroupSingleton(Singleton):
method __init__ (line 14) | def __init__(self):
function set_seq_parallel_pg (line 22) | def set_seq_parallel_pg(
FILE: xtuner-eval_niah/xtuner/_lite/yunchang/hybrid/async_attn_layer.py
class AsyncLongContextAttention (line 11) | class AsyncLongContextAttention(torch.nn.Module):
method __init__ (line 21) | def __init__(
method forward (line 42) | def forward(
method backward (line 195) | def backward(self, *args, **kwargs):
FILE: xtuner-eval_niah/xtuner/_lite/yunchang/hybrid/attn_layer.py
class LongContextAttention (line 15) | class LongContextAttention(torch.nn.Module):
method __init__ (line 25) | def __init__(
method forward (line 45) | def forward(
class LongContextAttentionQKVPacked (line 134) | class LongContextAttentionQKVPacked(torch.nn.Module):
method __init__ (line 144) | def __init__(
method forward (line 164) | def forward(
function llama3_varlen_attention_sp_ulysses_ring (line 225) | def llama3_varlen_attention_sp_ulysses_ring(
function attention_sp_ulysses_ring (line 299) | def attention_sp_ulysses_ring(
class LongContextVarLenAttentionForLlaMa3 (line 347) | class LongContextVarLenAttentionForLlaMa3(torch.nn.Module):
method __init__ (line 357) | def __init__(
method forward (line 372) | def forward(
FILE: xtuner-eval_niah/xtuner/_lite/yunchang/ring/llama3_flash_attn_varlen.py
class AsyncHandles (line 10) | class AsyncHandles:
method __init__ (line 12) | def __init__(self) -> None:
method register (line 15) | def register(self, handle):
method wait (line 18) | def wait(self):
function llama3_flash_attn_prepare_cu_seqlens (line 24) | def llama3_flash_attn_prepare_cu_seqlens(cu_seqlens, causal, rank, world...
function llama3_flash_attn_varlen_forward (line 63) | def llama3_flash_attn_varlen_forward(
function llama3_flash_attn_varlen_backward (line 164) | def llama3_flash_attn_varlen_backward(
class Llama3FlashAttnVarlenFunc (line 313) | class Llama3FlashAttnVarlenFunc(torch.autograd.Function):
method forward (line 315) | def forward(
method backward (line 375) | def backward(ctx, dout, *args):
function llama3_flash_attn_varlen_qkvpacked_func (line 401) | def llama3_flash_attn_varlen_qkvpacked_func(
function llama3_flash_attn_varlen_kvpacked_func (line 439) | def llama3_flash_attn_varlen_kvpacked_func(
function llama3_flash_attn_varlen_func (line 478) | def llama3_flash_attn_varlen_func(
FILE: xtuner-eval_niah/xtuner/_lite/yunchang/ring/ring_flash_attn.py
function ring_flash_attn_forward (line 8) | def ring_flash_attn_forward(
function ring_flash_attn_backward (line 61) | def ring_flash_attn_backward(
class RingFlashAttnFunc (line 145) | class RingFlashAttnFunc(torch.autograd.Function):
method forward (line 147) | def forward(
method backward (line 191) | def backward(ctx, dout, *args):
function ring_flash_attn_qkvpacked_func (line 211) | def ring_flash_attn_qkvpacked_func(
function ring_flash_attn_kvpacked_func (line 237) | def ring_flash_attn_kvpacked_func(
function ring_flash_attn_func (line 264) | def ring_flash_attn_func(
function ring_flash_attn_inference_func (line 293) | def ring_flash_attn_inference_func(
FILE: xtuner-eval_niah/xtuner/_lite/yunchang/ring/ring_flash_attn_varlen.py
function ring_flash_attn_varlen_forward (line 25) | def ring_flash_attn_varlen_forward(
function ring_flash_attn_varlen_backward (line 95) | def ring_flash_attn_varlen_backward(
class RingFlashAttnVarlenFunc (line 184) | class RingFlashAttnVarlenFunc(torch.autograd.Function):
method forward (line 186) | def forward(
method backward (line 235) | def backward(ctx, dout, *args):
function ring_flash_attn_varlen_qkvpacked_func (line 257) | def ring_flash_attn_varlen_qkvpacked_func(
function ring_flash_attn_varlen_kvpacked_func (line 287) | def ring_flash_attn_varlen_kvpacked_func(
function ring_flash_attn_varlen_func (line 318) | def ring_flash_attn_varlen_func(
FILE: xtuner-eval_niah/xtuner/_lite/yunchang/ring/stripe_flash_attn.py
function stripe_flash_attn_forward (line 7) | def stripe_flash_attn_forward(
function stripe_flash_attn_backward (line 81) | def stripe_flash_attn_backward(
class StripeFlashAttnFunc (line 199) | class StripeFlashAttnFunc(torch.autograd.Function):
method forward (line 201) | def forward(
method backward (line 245) | def backward(ctx, dout, *args):
function stripe_flash_attn_qkvpacked_func (line 265) | def stripe_flash_attn_qkvpacked_func(
function stripe_flash_attn_kvpacked_func (line 291) | def stripe_flash_attn_kvpacked_func(
function stripe_flash_attn_func (line 318) | def stripe_flash_attn_func(
FILE: xtuner-eval_niah/xtuner/_lite/yunchang/ring/triton_utils.py
function flatten_kernel (line 7) | def flatten_kernel(
function flatten_varlen_lse (line 39) | def flatten_varlen_lse(lse, cu_seqlens):
function unflatten_kernel (line 71) | def unflatten_kernel(
function unflatten_varlen_lse (line 103) | def unflatten_varlen_lse(lse, cu_seqlens, max_seqlen: int):
FILE: xtuner-eval_niah/xtuner/_lite/yunchang/ring/utils.py
function get_default_args (line 14) | def get_default_args(func):
function _update_out_and_lse (line 25) | def _update_out_and_lse(
function update_out_and_lse (line 45) | def update_out_and_lse(
function flatten_varlen_lse (line 69) | def flatten_varlen_lse(lse, cu_seqlens):
function unflatten_varlen_lse (line 78) | def unflatten_varlen_lse(lse, cu_seqlens, max_seqlen: int):
class RingComm (line 90) | class RingComm:
method __init__ (line 91) | def __init__(self, process_group: dist.ProcessGroup):
method send_recv (line 105) | def send_recv(
method commit (line 121) | def commit(self):
method wait (line 126) | def wait(self):
FILE: xtuner-eval_niah/xtuner/_lite/yunchang/ring/zigzag_ring_flash_attn.py
function zigzag_ring_flash_attn_forward (line 7) | def zigzag_ring_flash_attn_forward(
function zigzag_ring_flash_attn_backward (line 81) | def zigzag_ring_flash_attn_backward(
class ZigZagRingFlashAttnFunc (line 187) | class ZigZagRingFlashAttnFunc(torch.autograd.Function):
method forward (line 189) | def forward(
method backward (line 233) | def backward(ctx, dout, *args):
function zigzag_ring_flash_attn_qkvpacked_func (line 253) | def zigzag_ring_flash_attn_qkvpacked_func(
function zigzag_ring_flash_attn_kvpacked_func (line 279) | def zigzag_ring_flash_attn_kvpacked_func(
function zigzag_ring_flash_attn_func (line 306) | def zigzag_ring_flash_attn_func(
FILE: xtuner-eval_niah/xtuner/_lite/yunchang/ring/zigzag_ring_flash_attn_varlen.py
function get_half_index (line 24) | def get_half_index(cu_seqlens, *, front: bool):
function get_half_lse (line 43) | def get_half_lse(lse, cu_seqlens, *, front: bool):
function zigzag_ring_flash_attn_varlen_forward (line 74) | def zigzag_ring_flash_attn_varlen_forward(
function zigzag_ring_flash_attn_varlen_backward (line 184) | def zigzag_ring_flash_attn_varlen_backward(
class ZigZagRingFlashAttnVarlenFunc (line 305) | class ZigZagRingFlashAttnVarlenFunc(torch.autograd.Function):
method forward (line 307) | def forward(
method backward (line 369) | def backward(ctx, dout, *args):
function zigzag_ring_flash_attn_varlen_qkvpacked_func (line 400) | def zigzag_ring_flash_attn_varlen_qkvpacked_func(
function zigzag_ring_flash_attn_varlen_kvpacked_func (line 430) | def zigzag_ring_flash_attn_varlen_kvpacked_func(
function zigzag_ring_flash_attn_varlen_func (line 461) | def zigzag_ring_flash_attn_varlen_func(
FILE: xtuner-eval_niah/xtuner/_lite/yunchang/ulysses/attn_layer.py
function torch_attn (line 16) | def torch_attn(query,
class UlyssesAttention (line 42) | class UlyssesAttention(torch.nn.Module):
method __init__ (line 52) | def __init__(
method forward (line 70) | def forward(
FILE: xtuner-eval_niah/xtuner/apis/datasets/alpaca.py
function alpaca_enzh_dataset (line 14) | def alpaca_enzh_dataset(tokenizer,
function alpaca_enzh_data_collator (line 41) | def alpaca_enzh_data_collator(return_hf_format=False):
function alpaca_zh_dataset (line 45) | def alpaca_zh_dataset(tokenizer,
function alpaca_zh_data_collator (line 66) | def alpaca_zh_data_collator(return_hf_format=False):
function alpaca_dataset (line 70) | def alpaca_dataset(tokenizer,
function alpaca_data_collator (line 91) | def alpaca_data_collator(return_hf_format=False):
FILE: xtuner-eval_niah/xtuner/apis/datasets/arxiv.py
function arxiv_dataset (line 12) | def arxiv_dataset(tokenizer,
function arxiv_data_collator (line 37) | def arxiv_data_collator(return_hf_format=False):
FILE: xtuner-eval_niah/xtuner/apis/datasets/code_alpaca.py
function code_alpaca_dataset (line 12) | def code_alpaca_dataset(tokenizer,
function code_alpaca_data_collator (line 33) | def code_alpaca_data_collator(return_hf_format=False):
FILE: xtuner-eval_niah/xtuner/apis/datasets/colorist.py
function colorist_dataset (line 12) | def colorist_dataset(tokenizer,
function colorist_data_collator (line 33) | def colorist_data_collator(return_hf_format=False):
FILE: xtuner-eval_niah/xtuner/apis/datasets/lawyer.py
function lawyer_dataset (line 15) | def lawyer_dataset(tokenizer,
function lawyer_data_collator (line 40) | def lawyer_data_collator(return_hf_format=False):
function lawyer_crime_dataset (line 44) | def lawyer_crime_dataset(tokenizer,
function lawyer_crime_data_collator (line 68) | def lawyer_crime_data_collator(return_hf_format=False):
function lawyer_reference_dataset (line 72) | def lawyer_reference_dataset(tokenizer,
function lawyer_reference_data_collator (line 96) | def lawyer_reference_data_collator(return_hf_format=False):
FILE: xtuner-eval_niah/xtuner/apis/datasets/medical.py
function medical_dataset (line 12) | def medical_dataset(tokenizer,
function medical_data_collator (line 33) | def medical_data_collator(return_hf_format=False):
FILE: xtuner-eval_niah/xtuner/apis/datasets/moss_003_sft.py
function moss_003_sft_dataset (line 10) | def moss_003_sft_dataset(tokenizer,
function moss_003_sft_data_collator (line 29) | def moss_003_sft_data_collator(return_hf_format=False):
function moss_003_sft_no_plugins_dataset (line 33) | def moss_003_sft_no_plugins_dataset(tokenizer,
function moss_003_sft_no_plugins_data_collator (line 50) | def moss_003_sft_no_plugins_data_collator(return_hf_format=False):
function moss_003_sft_plugins_dataset (line 54) | def moss_003_sft_plugins_dataset(tokenizer,
function moss_003_sft_plugins_data_collator (line 71) | def moss_003_sft_plugins_data_collator(return_hf_format=False):
FILE: xtuner-eval_niah/xtuner/apis/datasets/oasst1.py
function oasst1_dataset (line 12) | def oasst1_dataset(tokenizer,
function oasst1_data_collator (line 33) | def oasst1_data_collator(return_hf_format=False):
FILE: xtuner-eval_niah/xtuner/apis/datasets/open_orca.py
function openorca_dataset (line 12) | def openorca_dataset(tokenizer,
function openorca_data_collator (line 33) | def openorca_data_collator(return_hf_format=False):
FILE: xtuner-eval_niah/xtuner/apis/datasets/sql.py
function sql_dataset (line 12) | def sql_dataset(tokenizer,
function sql_data_collator (line 33) | def sql_data_collator(return_hf_format=False):
FILE: xtuner-eval_niah/xtuner/apis/datasets/tiny_codes.py
function tiny_codes_dataset (line 12) | def tiny_codes_dataset(tokenizer,
function tiny_codes_data_collator (line 33) | def tiny_codes_data_collator(return_hf_format=False):
FILE: xtuner-eval_niah/xtuner/apis/datasets/wizardlm.py
function wizardlm_dataset (line 12) | def wizardlm_dataset(tokenizer,
function wizardlm_data_collator (line 33) | def wizardlm_data_collator(return_hf_format=False):
FILE: xtuner-eval_niah/xtuner/apis/model.py
function build_qlora_model (line 12) | def build_qlora_model(model_name_or_path,
function build_lora_model (line 52) | def build_lora_model(model_name_or_path,
function build_model (line 78) | def build_model(model_name_or_path, return_tokenizer=True):
FILE: xtuner-eval_niah/xtuner/apis/training_args.py
class DefaultTrainingArguments (line 12) | class DefaultTrainingArguments(TrainingArguments):
FILE: xtuner-eval_niah/xtuner/configs/__init__.py
function get_cfgs_name_path (line 5) | def get_cfgs_name_path():
FILE: xtuner-eval_niah/xtuner/configs/llava/llama3_8b_instruct_clip_vit_large_p14_336/convert_xtuner_weights_to_hf.py
function convert_state_dict_to_hf (line 25) | def convert_state_dict_to_hf(state_dict, mapping):
function convert_to_hf (line 38) | def convert_to_hf(text_model_id, vision_model_id, projector_weight, save...
function main (line 131) | def main():
FILE: xtuner-eval_niah/xtuner/configs/llava/llama3_8b_instruct_clip_vit_large_p14_336/convert_xtuner_weights_to_llava.py
function convert_state_dict_to_hf (line 27) | def convert_state_dict_to_hf(state_dict, mapping):
function convert_to_llava (line 39) | def convert_to_llava(text_model_id, vision_model_id, projector_weight,
function main (line 94) | def main():
FILE: xtuner-eval_niah/xtuner/configs/llava/phi3_mini_4k_instruct_clip_vit_large_p14_336/convert_phi_to_llama.py
function convert_phi_to_llama (line 13) | def convert_phi_to_llama(phi_path, save_path):
function main (line 91) | def main():
FILE: xtuner-eval_niah/xtuner/configs/llava/phi3_mini_4k_instruct_clip_vit_large_p14_336/convert_xtuner_weights_to_hf.py
function convert_state_dict_to_hf (line 25) | def convert_state_dict_to_hf(state_dict, mapping):
function convert_to_hf (line 38) | def convert_to_hf(text_model_id, vision_model_id, projector_weight, save...
function main (line 128) | def main():
FILE: xtuner-eval_niah/xtuner/configs/llava/phi3_mini_4k_instruct_clip_vit_large_p14_336/convert_xtuner_weights_to_llava.py
function convert_state_dict_to_hf (line 27) | def convert_state_dict_to_hf(state_dict, mapping):
function convert_to_llava (line 39) | def convert_to_llava(text_model_id, vision_model_id, projector_weight,
function main (line 94) | def main():
FILE: xtuner-eval_niah/xtuner/dataset/collate_fns/default_collate_fn.py
function default_collate_fn (line 12) | def default_collate_fn(instances: Sequence[Dict],
FILE: xtuner-eval_niah/xtuner/dataset/collate_fns/mmlu_collate_fn.py
function mmlu_collate_fn (line 10) | def mmlu_collate_fn(instances: Sequence[Dict],
FILE: xtuner-eval_niah/xtuner/dataset/collate_fns/preference_collate_fn.py
function preference_collate_fn (line 13) | def preference_collate_fn(instances: Sequence[Dict],
FILE: xtuner-eval_niah/xtuner/dataset/concat_dataset.py
class ConcatDataset (line 7) | class ConcatDataset(TorchConcatDataset):
method __init__ (line 9) | def __init__(self, datasets):
method __repr__ (line 15) | def __repr__(self):
FILE: xtuner-eval_niah/xtuner/dataset/huggingface.py
function get_lengths (line 18) | def get_lengths(example):
function build_origin_dataset (line 22) | def build_origin_dataset(dataset, split):
function map_dataset (line 39) | def map_dataset(dataset, dataset_map_fn, map_num_proc):
function add_template_to_dataset (line 54) | def add_template_to_dataset(dataset, template_map_fn, map_num_proc):
function tokenize_dataset (line 67) | def tokenize_dataset(dataset, tokenizer, max_length, with_image_token,
function pack_dataset (line 88) | def pack_dataset(dataset, max_length, use_varlen_attn, shuffle_before_pack,
function process (line 100) | def process(dataset,
function process_hf_dataset (line 222) | def process_hf_dataset(dataset,
FILE: xtuner-eval_niah/xtuner/dataset/intern_repo.py
class JsonlDataset (line 22) | class JsonlDataset(torch.utils.data.Dataset):
method __init__ (line 36) | def __init__(self, path: str, min_length=50):
method __getitem__ (line 62) | def __getitem__(self, idx):
method get_dataset_name (line 84) | def get_dataset_name(self):
method _get_mmap (line 87) | def _get_mmap(self):
method __setstate__ (line 100) | def __setstate__(self, state):
method __getstate__ (line 104) | def __getstate__(self):
method __del__ (line 111) | def __del__(self):
method exists (line 118) | def exists(path):
method __len__ (line 121) | def __len__(self):
class PackedDataset (line 128) | class PackedDataset(torch.utils.data.Dataset):
method __init__ (line 137) | def __init__(self, dataset, packed_length: int = 8192, seed: int = 1024):
method __len__ (line 162) | def __len__(self):
method search_sample_index (line 165) | def search_sample_index(self, pack_idx: int = 0):
method mapping (line 172) | def mapping(self, pack_idx: int = 0):
method build_pack (line 192) | def build_pack(self, begin_sample_idx: int, begin_token_id: int,
method __getitem__ (line 228) | def __getitem__(self, item: int):
function load_intern_repo_tokenized_dataset (line 235) | def load_intern_repo_tokenized_dataset(folder,
function load_intern_repo_untokenized_dataset (line 268) | def load_intern_repo_untokenized_dataset(processed_dataset_dict_path=None,
function build_packed_dataset_rank0 (line 332) | def build_packed_dataset_rank0(dataset_cfg, packed_length=8192, seed=1024):
function build_packed_dataset (line 352) | def build_packed_dataset(*args, **kwargs):
FILE: xtuner-eval_niah/xtuner/dataset/json_dataset.py
function load_json_file (line 7) | def load_json_file(data_files=None, data_dir=None, suffix=None):
FILE: xtuner-eval_niah/xtuner/dataset/llava.py
function load_jsonl (line 19) | def load_jsonl(json_file):
class LLaVADataset (line 28) | class LLaVADataset(Dataset):
method __init__ (line 30) | def __init__(self,
method modality_length (line 89) | def modality_length(self):
method __len__ (line 98) | def __len__(self):
method __getitem__ (line 101) | def __getitem__(self, index):
FILE: xtuner-eval_niah/xtuner/dataset/map_fns/dataset_map_fns/alpaca_map_fn.py
function alpaca_map_fn (line 4) | def alpaca_map_fn(example):
FILE: xtuner-eval_niah/xtuner/dataset/map_fns/dataset_map_fns/alpaca_zh_map_fn.py
function alpaca_zh_map_fn (line 4) | def alpaca_zh_map_fn(example):
FILE: xtuner-eval_niah/xtuner/dataset/map_fns/dataset_map_fns/arxiv_map_fn.py
function arxiv_map_fn (line 5) | def arxiv_map_fn(example):
FILE: xtuner-eval_niah/xtuner/dataset/map_fns/dataset_map_fns/code_alpaca_map_fn.py
function code_alpaca_map_fn (line 5) | def code_alpaca_map_fn(example):
FILE: xtuner-eval_niah/xtuner/dataset/map_fns/dataset_map_fns/colors_map_fn.py
function colors_map_fn (line 5) | def colors_map_fn(example):
FILE: xtuner-eval_niah/xtuner/dataset/map_fns/dataset_map_fns/crime_kg_assitant_map_fn.py
function crime_kg_assitant_map_fn (line 5) | def crime_kg_assitant_map_fn(example):
FILE: xtuner-eval_niah/xtuner/dataset/map_fns/dataset_map_fns/default_map_fn.py
function default_map_fn (line 2) | def default_map_fn(example):
FILE: xtuner-eval_niah/xtuner/dataset/map_fns/dataset_map_fns/law_reference_map_fn.py
function law_reference_map_fn (line 5) | def law_reference_map_fn(example):
FILE: xtuner-eval_niah/xtuner/dataset/map_fns/dataset_map_fns/llava_map_fn.py
function llava_image_only_map_fn (line 5) | def llava_image_only_map_fn(example):
function llava_map_fn (line 25) | def llava_map_fn(example):
FILE: xtuner-eval_niah/xtuner/dataset/map_fns/dataset_map_fns/medical_map_fn.py
function medical_map_fn (line 5) | def medical_map_fn(example):
FILE: xtuner-eval_niah/xtuner/dataset/map_fns/dataset_map_fns/msagent_map_fn.py
function replace_think (line 9) | def replace_think(match):
function replace_exec (line 22) | def replace_exec(match):
function extract_json_objects (line 34) | def extract_json_objects(text, decoder=json.JSONDecoder()):
function msagent_react_map_fn (line 53) | def msagent_react_map_fn(example):
FILE: xtuner-eval_niah/xtuner/dataset/map_fns/dataset_map_fns/oasst1_map_fn.py
function oasst1_map_fn (line 2) | def oasst1_map_fn(example):
FILE: xtuner-eval_niah/xtuner/dataset/map_fns/dataset_map_fns/openai_map_fn.py
function openai_map_fn (line 2) | def openai_map_fn(example):
FILE: xtuner-eval_niah/xtuner/dataset/map_fns/dataset_map_fns/openorca_map_fn.py
function openorca_map_fn (line 2) | def openorca_map_fn(example):
FILE: xtuner-eval_niah/xtuner/dataset/map_fns/dataset_map_fns/pretrain_map_fn.py
function pretrain_map_fn (line 2) | def pretrain_map_fn(example):
FILE: xtuner-eval_niah/xtuner/dataset/map_fns/dataset_map_fns/sql_map_fn.py
function sql_map_fn (line 5) | def sql_map_fn(example):
FILE: xtuner-eval_niah/xtuner/dataset/map_fns/dataset_map_fns/stack_exchange_map_fn.py
function stack_exchange_map_fn (line 2) | def stack_exchange_map_fn(example):
FILE: xtuner-eval_niah/xtuner/dataset/map_fns/dataset_map_fns/tiny_codes_map_fn.py
function tiny_codes_map_fn (line 5) | def tiny_codes_map_fn(example):
FILE: xtuner-eval_niah/xtuner/dataset/map_fns/dataset_map_fns/wizardlm_map_fn.py
function wizardlm_map_fn (line 2) | def wizardlm_map_fn(example):
FILE: xtuner-eval_niah/xtuner/dataset/map_fns/template_map_fn.py
function template_map_fn (line 7) | def template_map_fn(example, template):
function template_map_fn_factory (line 33) | def template_map_fn_factory(template):
FILE: xtuner-eval_niah/xtuner/dataset/modelscope.py
function process_ms_dataset (line 8) | def process_ms_dataset(dataset, split='train', *args, **kwargs):
FILE: xtuner-eval_niah/xtuner/dataset/moss_sft.py
class MOSSSFTDataset (line 15) | class MOSSSFTDataset(Dataset):
method __init__ (line 17) | def __init__(self, data_file, tokenizer, max_length=2048, bot_name=None):
method load_data (line 43) | def load_data(self):
method process_data (line 118) | def process_data(self):
method __len__ (line 125) | def __len__(self):
method __getitem__ (line 128) | def __getitem__(self, index):
FILE: xtuner-eval_niah/xtuner/dataset/preference_dataset.py
function _worker (line 24) | def _worker(
function _chunk_data_to_queue (line 41) | def _chunk_data_to_queue(data_queue: Queue, data: List[Dict], chunk_size...
function _multi_progress (line 61) | def _multi_progress(tokenize_fun_p, dataset, nproc, task_num, chunksize,
function load_jsonl_dataset (line 90) | def load_jsonl_dataset(data_files=None, data_dir=None, suffix=None):
function tokenize (line 110) | def tokenize(pair: str,
class PreferenceDataset (line 156) | class PreferenceDataset(Dataset):
method __init__ (line 158) | def __init__(
method __len__ (line 194) | def __len__(self):
method __getitem__ (line 197) | def __getitem__(self, idx):
class PackedDatasetWrapper (line 201) | class PackedDatasetWrapper(Dataset):
method __init__ (line 203) | def __init__(self,
method __len__ (line 252) | def __len__(self):
method __getitem__ (line 255) | def __getitem__(self, index):
function unpack_seq (line 280) | def unpack_seq(seq, cu_seqlens):
function broad_cast_dataset (line 288) | def broad_cast_dataset(dataset):
function map_dataset (line 309) | def map_dataset(dataset, dataset_map_fn, map_num_proc):
function build_preference_dataset (line 324) | def build_preference_dataset(
function intel_orca_dpo_map_fn (line 368) | def intel_orca_dpo_map_fn(example):
function orpo_dpo_mix_40k_map_fn (line 381) | def orpo_dpo_mix_40k_map_fn(example):
FILE: xtuner-eval_niah/xtuner/dataset/refcoco_json.py
class RefCOCOJsonDataset (line 26) | class RefCOCOJsonDataset(LLaVADataset):
method __init__ (line 37) | def __init__(
method reformat_data (line 81) | def reformat_data(self, json_data):
method gen_refcoco_conversations (line 91) | def gen_refcoco_conversations(cls, data, instruction_template='{}'):
method get_data_json (line 127) | def get_data_json(
method normalize_bbox (line 168) | def normalize_bbox(cls, bbox, height, width):
class RefCOCOJsonEvalDataset (line 176) | class RefCOCOJsonEvalDataset(RefCOCOJsonDataset):
method reformat_data (line 179) | def reformat_data(self, json_data):
class InvRefCOCOJsonDataset (line 188) | class InvRefCOCOJsonDataset(RefCOCOJsonDataset):
method gen_refcoco_conversations (line 200) | def gen_refcoco_conversations(cls, data, instruction_template='{}'):
class REFER (line 241) | class REFER:
method __init__ (line 243) | def __init__(self, data_root, vis_root, dataset='refcoco', splitBy='un...
method createIndex (line 276) | def createIndex(self):
method getRefIds (line 340) | def getRefIds(self, image_ids=[], cat_ids=[], ref_ids=[], split=''):
method getAnnIds (line 372) | def getAnnIds(self, image_ids=[], cat_ids=[], ref_ids=[]):
method getImgIds (line 397) | def getImgIds(self, ref_ids=[]):
method getCatIds (line 408) | def getCatIds(self):
method loadRefs (line 411) | def loadRefs(self, ref_ids=[]):
method loadAnns (line 417) | def loadAnns(self, ann_ids=[]):
method loadImgs (line 423) | def loadImgs(self, image_ids=[]):
method loadCats (line 429) | def loadCats(self, cat_ids=[]):
method getRefBox (line 435) | def getRefBox(self, ref_id):
method showRef (line 440) | def showRef(self, ref, seg_box='box'):
FILE: xtuner-eval_niah/xtuner/dataset/samplers/intern_repo.py
class InternRepoSampler (line 13) | class InternRepoSampler(Sampler):
method __init__ (line 15) | def __init__(self,
method __iter__ (line 35) | def __iter__(self) -> Iterator[int]:
method __len__ (line 54) | def __len__(self) -> int:
method set_epoch (line 58) | def set_epoch(self, epoch: int) -> None:
class InternlmRepoSampler (line 71) | class InternlmRepoSampler(InternRepoSampler):
method __init__ (line 73) | def __init__(self,
FILE: xtuner-eval_niah/xtuner/dataset/samplers/length_grouped.py
function get_length_grouped_indices (line 11) | def get_length_grouped_indices(lengths, group_batch_size, generator=None):
class LengthGroupedSampler (line 72) | class LengthGroupedSampler(Sampler):
method __init__ (line 74) | def __init__(self,
method __iter__ (line 124) | def __iter__(self) -> Iterator[int]:
method __len__ (line 144) | def __len__(self) -> int:
method set_epoch (line 148) | def set_epoch(self, epoch: int) -> None:
FILE: xtuner-eval_niah/xtuner/dataset/utils.py
function get_bos_eos_token_ids (line 15) | def get_bos_eos_token_ids(tokenizer):
function encode_fn (line 36) | def encode_fn(example,
class Packer (line 132) | class Packer:
method __init__ (line 135) | def __init__(self,
method get_cumulative_len (line 146) | def get_cumulative_len(self, chunk_num):
method get_position_ids (line 179) | def get_position_ids(self, cumulative_len):
method __call__ (line 191) | def __call__(self, batch):
function expand2square (line 245) | def expand2square(pil_img, background_color):
function load_image (line 259) | def load_image(image_file):
function decode_base64_to_image (line 268) | def decode_base64_to_image(base64_string):
FILE: xtuner-eval_niah/xtuner/engine/_strategy/deepspeed.py
class DeepSpeedStrategy (line 11) | class DeepSpeedStrategy(MMEngineDeepSpeedStrategy):
method __init__ (line 13) | def __init__(self, *args, **kwargs):
method _wrap_model (line 24) | def _wrap_model(self, model):
method save_checkpoint (line 33) | def save_checkpoint(self, *args, **kwargs) -> None:
method load_checkpoint (line 45) | def load_checkpoint(self, *args, **kwargs) -> None:
method resume (line 54) | def resume(self, *args, **kwargs) -> None:
method _setup_distributed (line 63) | def _setup_distributed( # type: ignore
FILE: xtuner-eval_niah/xtuner/engine/hooks/dataset_info_hook.py
function split_list (line 8) | def split_list(lst, value):
class DatasetInfoHook (line 21) | class DatasetInfoHook(Hook):
method __init__ (line 23) | def __init__(self, tokenizer, is_intern_repo_dataset=False):
method log (line 27) | def log(self, runner, dataset, mode='train'):
method before_train (line 42) | def before_train(self, runner) -> None:
method before_val (line 52) | def before_val(self, runner) -> None:
method before_test (line 56) | def before_test(self, runner) -> None:
FILE: xtuner-eval_niah/xtuner/engine/hooks/evaluate_chat_hook.py
class EvaluateChatHook (line 20) | class EvaluateChatHook(Hook):
method __init__ (line 24) | def __init__(self,
method _save_eval_output (line 98) | def _save_eval_output(self, runner, eval_outputs):
method _eval_images (line 106) | def _eval_images(self,
method _eval_language (line 167) | def _eval_language(self,
method _generate_samples (line 194) | def _generate_samples(self,
method before_train (line 232) | def before_train(self, runner):
method _is_save_checkpoint (line 236) | def _is_save_checkpoint(self, runner):
method after_train_iter (line 254) | def after_train_iter(self,
method after_train (line 273) | def after_train(self, runner):
method after_val (line 277) | def after_val(self, runner) -> None:
FILE: xtuner-eval_niah/xtuner/engine/hooks/hf_checkpoint_hook.py
class HFCheckpointHook (line 19) | class HFCheckpointHook(Hook):
method __init__ (line 23) | def __init__(self, out_dir: Optional[Union[str, Path]] = None) -> None:
method _use_shard_moe (line 27) | def _use_shard_moe(llm):
method after_run (line 32) | def after_run(self,
Copy disabled (too large)
Download .json
Condensed preview — 1928 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (34,732K chars).
[
{
"path": ".gitattributes",
"chars": 71,
"preview": "# Auto detect text files and perform LF normalization\nlogs\n* text=auto\n"
},
{
"path": "LICENSE",
"chars": 1064,
"preview": "MIT License\n\nCopyright (c) 2025 Yi Wang\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof"
},
{
"path": "README.md",
"chars": 6511,
"preview": "\n<div align=\"center\">\n\n\n<h2><a href=\"https://www.arxiv.org/abs/2501.00574\">VideoChat-Flash: Hierarchical Compression for"
},
{
"path": "llava-train_videochat/.dockerignore",
"chars": 359,
"preview": "# The .dockerignore file excludes files from the container build process.\n#\n# https://docs.docker.com/engine/reference/b"
},
{
"path": "llava-train_videochat/.editorconfig",
"chars": 319,
"preview": "root = true\n\n# Unix-style newlines with a newline ending every file\n[*]\nend_of_line = lf\ninsert_final_newline = true\ntri"
},
{
"path": "llava-train_videochat/.gitattributes",
"chars": 772,
"preview": "# https://git-scm.com/docs/gitattributes\n\n# Set the default behavior, in case people don't have core.autocrlf set.\n# htt"
},
{
"path": "llava-train_videochat/.gitignore",
"chars": 916,
"preview": "# Python\n__pycache__\n*.pyc\n*.egg-info\ndist\n\n# Log\n*.log\n*.log.*\n# *.json\n# *.jsonl\n\n# Data\n!**/alpaca-data-conversation."
},
{
"path": "llava-train_videochat/LICENSE",
"chars": 11357,
"preview": " Apache License\n Version 2.0, January 2004\n "
},
{
"path": "llava-train_videochat/README.md",
"chars": 4523,
"preview": "# 👀How to train and evaluate VideoChat-Flash?🦜\n\n\n## 1. Prepare Training Data\n\n\nWe need to address the fact that our data"
},
{
"path": "llava-train_videochat/cog.yaml",
"chars": 977,
"preview": "# Configuration for Cog ⚙️\n# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md\n\nbuild:\n gpu: true\n\n p"
},
{
"path": "llava-train_videochat/data/ablation_short-long_mix_sft.yaml",
"chars": 20693,
"preview": "datasets:\n # image sft datasets\n - json_path: annotations/image/textcaps.json # 21942 \n sampling_strategy: \"first:2"
},
{
"path": "llava-train_videochat/data/stage1_init_connector_iv1m.yaml",
"chars": 389,
"preview": "datasets:\n - json_path: OpenGVLab/VideoChat-Flash-Training-Data/annotations/video/smit_caption_481k.json\n sampling_s"
},
{
"path": "llava-train_videochat/data/stage2_short_pretrain_iv6m.yaml",
"chars": 2088,
"preview": "datasets:\n - json_path: annotations/image/LLaVA-ReCap-118K.json\n sampling_strategy: all\n data_root: https://huggi"
},
{
"path": "llava-train_videochat/data/stage3_short-long_mix_sft.yaml",
"chars": 20105,
"preview": "datasets:\n # image sft datasets\n - json_path: annotations/image/textcaps.json # 21942 \n sampling_strategy: all \n "
},
{
"path": "llava-train_videochat/data/stage4_highres_postsft.yaml",
"chars": 21101,
"preview": "datasets:\n # image sft datasets, 6w\n\n - json_path: annotations/image/synthdog_zh_processed.json\n data_root: https:/"
},
{
"path": "llava-train_videochat/llava/__init__.py",
"chars": 120,
"preview": "from .model import LlavaQwenForCausalLM\nfrom .train.train import LazySupervisedDataset, DataCollatorForSupervisedDataset"
},
{
"path": "llava-train_videochat/llava/constants.py",
"chars": 293,
"preview": "CONTROLLER_HEART_BEAT_EXPIRATION = 30\nWORKER_HEART_BEAT_INTERVAL = 15\n\nLOGDIR = \".\"\n\n# Model Constants\nIGNORE_INDEX = -1"
},
{
"path": "llava-train_videochat/llava/conversation.py",
"chars": 22577,
"preview": "import dataclasses\nfrom enum import auto, Enum\nfrom typing import List, Any, Dict, Union, Tuple\nimport re\nimport base64\n"
},
{
"path": "llava-train_videochat/llava/dist_utils.py",
"chars": 2566,
"preview": "import json\nimport os\nimport builtins\nimport datetime\nimport time\nimport subprocess\n\nimport torch\nimport torch.distribut"
},
{
"path": "llava-train_videochat/llava/mm_utils.py",
"chars": 22883,
"preview": "from PIL import Image\nfrom io import BytesIO\nimport base64\nimport math\nimport ast\nimport re\nimport torch\nfrom transforme"
},
{
"path": "llava-train_videochat/llava/model/__init__.py",
"chars": 438,
"preview": "import os\n\nAVAILABLE_MODELS = {\n \"llava_qwen\": \"LlavaQwenForCausalLM, LlavaQwenConfig\",\n \"llava_qwen_flash\": \"Llav"
},
{
"path": "llava-train_videochat/llava/model/apply_delta.py",
"chars": 1932,
"preview": "\"\"\"\nUsage:\npython3 -m fastchat.model.apply_delta --base ~/model_weights/llama-7b --target ~/model_weights/vicuna-7b --de"
},
{
"path": "llava-train_videochat/llava/model/builder.py",
"chars": 23729,
"preview": "# Copyright 2023 Haotian Liu\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not "
},
{
"path": "llava-train_videochat/llava/model/consolidate.py",
"chars": 915,
"preview": "\"\"\"\nUsage:\npython3 -m llava.model.consolidate --src ~/model_weights/llava-7b --dst ~/model_weights/llava-7b_consolidate\n"
},
{
"path": "llava-train_videochat/llava/model/language_model/llava_qwen.py",
"chars": 6169,
"preview": "# Copyright 2024 Hao Zhang\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not us"
},
{
"path": "llava-train_videochat/llava/model/language_model/llava_qwen_flash.py",
"chars": 6448,
"preview": "# Copyright 2024 Hao Zhang\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not us"
},
{
"path": "llava-train_videochat/llava/model/language_model/modeling_qwen2_flash.py",
"chars": 73837,
"preview": "# coding=utf-8\n# transformers==4.39.2 or 4.40.1 NOTE\n# Copyright 2024 The Qwen team, Alibaba Group and the HuggingFace I"
},
{
"path": "llava-train_videochat/llava/model/llava_arch.py",
"chars": 44570,
"preview": "# Copyright 2023 Haotian Liu\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not "
},
{
"path": "llava-train_videochat/llava/model/make_delta.py",
"chars": 2251,
"preview": "\"\"\"\nUsage:\npython3 -m llava.model.make_delta --base ~/model_weights/llama-7b --target ~/model_weights/llava-7b --delta ~"
},
{
"path": "llava-train_videochat/llava/model/multimodal_encoder/builder.py",
"chars": 1621,
"preview": "import os\nfrom .clip_encoder import CLIPVisionTower\nfrom .siglip_encoder import SigLipVisionTower\nfrom .clip_encoder imp"
},
{
"path": "llava-train_videochat/llava/model/multimodal_encoder/clip_encoder.py",
"chars": 7275,
"preview": "import torch\nimport torch.nn as nn\nfrom llava.utils import rank0_print\nfrom transformers import CLIPVisionModel, CLIPIma"
},
{
"path": "llava-train_videochat/llava/model/multimodal_encoder/internvideo2/__init__.py",
"chars": 61,
"preview": "\nfrom .vit_scale_clean import PretrainVisionTransformer_clean"
},
{
"path": "llava-train_videochat/llava/model/multimodal_encoder/internvideo2/flash_attention_class.py",
"chars": 3203,
"preview": "import torch\nimport torch.nn as nn\n\nfrom einops import rearrange\n\nfrom flash_attn.flash_attn_interface import flash_attn"
},
{
"path": "llava-train_videochat/llava/model/multimodal_encoder/internvideo2/pos_embed.py",
"chars": 13731,
"preview": "import numpy as np\nimport torch\nimport logging\n\nlogger = logging.getLogger(__name__)\n\n# --------------------------------"
},
{
"path": "llava-train_videochat/llava/model/multimodal_encoder/internvideo2/vit_scale_clean.py",
"chars": 26020,
"preview": "import math\nimport logging\nimport torch\nimport torch.nn.functional as F\nfrom timm.models.layers import DropPath, to_2tup"
},
{
"path": "llava-train_videochat/llava/model/multimodal_encoder/internvideo2_encoder.py",
"chars": 8177,
"preview": "\"\"\"\n# Adapted from https://huggingface.co/MILVLG/imp-v1-3b/blob/main/vision_encoder.py\n\"\"\"\n\nfrom typing import Optional,"
},
{
"path": "llava-train_videochat/llava/model/multimodal_encoder/siglip_encoder.py",
"chars": 25483,
"preview": "\"\"\"\n# Adapted from https://huggingface.co/MILVLG/imp-v1-3b/blob/main/vision_encoder.py\n\"\"\"\n\nfrom typing import Optional,"
},
{
"path": "llava-train_videochat/llava/model/multimodal_encoder/umt/vit.py",
"chars": 17981,
"preview": "import numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torch.utils.checkpoint as c"
},
{
"path": "llava-train_videochat/llava/model/multimodal_encoder/umt_encoder.py",
"chars": 7410,
"preview": "\"\"\"\n# Adapted from https://huggingface.co/MILVLG/imp-v1-3b/blob/main/vision_encoder.py\n\"\"\"\n\nfrom typing import Optional,"
},
{
"path": "llava-train_videochat/llava/model/multimodal_projector/builder.py",
"chars": 2136,
"preview": "import torch\nimport torch.nn as nn\nimport re\n\nfrom .tome16_mlp_hd64 import ToMe16_mlp_hd64\n\nclass IdentityMap(nn.Module)"
},
{
"path": "llava-train_videochat/llava/model/multimodal_projector/tome16_mlp_hd64.py",
"chars": 5418,
"preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n\n# This source code is licensed under the li"
},
{
"path": "llava-train_videochat/llava/model/utils.py",
"chars": 927,
"preview": "from transformers import AutoConfig\n\n\ndef auto_upgrade(config):\n cfg = AutoConfig.from_pretrained(config)\n if \"lla"
},
{
"path": "llava-train_videochat/llava/serialize_utils.py",
"chars": 6440,
"preview": "# Description: This file contains the code for serializing the dataset.\n# From https://github.com/ppwwyyxx/RAM-multiproc"
},
{
"path": "llava-train_videochat/llava/train/llava_trainer.py",
"chars": 23577,
"preview": "import os\nimport torch\nimport torch.nn as nn\nimport datetime\n\nfrom accelerate import Accelerator\nfrom accelerate.utils i"
},
{
"path": "llava-train_videochat/llava/train/llava_trainer_eval.py",
"chars": 3439,
"preview": "import json\nimport subprocess\n\nfrom llava.train.llava_trainer import LLaVATrainer\n\n\nclass LLaVAEvalTrainer(LLaVATrainer)"
},
{
"path": "llava-train_videochat/llava/train/train.py",
"chars": 98727,
"preview": "# Adopted from https://github.com/lm-sys/FastChat. Below is the original copyright:\n# Adopted from tatsu-lab@stanford_al"
},
{
"path": "llava-train_videochat/llava/train/train_mem.py",
"chars": 156,
"preview": "from llava.train.train import train\nfrom llava.dist_utils import init_distributed_mode\n\n\nif __name__ == \"__main__\":\n "
},
{
"path": "llava-train_videochat/llava/utils.py",
"chars": 6342,
"preview": "import datetime\nimport logging\nimport logging.handlers\nimport os\nimport sys\nimport numpy as np\n\nimport requests\n\nfrom ll"
},
{
"path": "llava-train_videochat/llava/video_utils.py",
"chars": 15097,
"preview": "import random\nimport os\nimport io\nimport av\nimport cv2\nimport decord\nimport imageio\nfrom decord import VideoReader\nimpor"
},
{
"path": "llava-train_videochat/pyproject.toml",
"chars": 2376,
"preview": "[tool.black]\nline-length = 240\n\n[build-system]\nrequires = [\"setuptools>=61.0\"]\nbuild-backend = \"setuptools.build_meta\"\n\n"
},
{
"path": "llava-train_videochat/requirements.txt",
"chars": 5887,
"preview": "Babel==2.14.0\nDataProperty==1.0.1\nDeprecated==1.2.14\nGitPython==3.1.43\nJinja2==3.1.3\nLevenshtein==0.25.1\nMarkupSafe==2.1"
},
{
"path": "llava-train_videochat/scripts/train/stage1-init_connector/stage1_internvideo2_tome16_res224_qwen7b.sh",
"chars": 2480,
"preview": "export OMP_NUM_THREADS=1\nexport DISABLE_ADDMM_CUDA_LT=1\nexport TORCH_CUDNN_USE_HEURISTIC_MODE_B=1\n\nDATA_VERSION=\"data/st"
},
{
"path": "llava-train_videochat/scripts/train/stage1-init_connector/stage1_umt_tome16_res224_qwen7b.sh",
"chars": 2470,
"preview": "export OMP_NUM_THREADS=1\nexport DISABLE_ADDMM_CUDA_LT=1\nexport TORCH_CUDNN_USE_HEURISTIC_MODE_B=1\n\nDATA_VERSION=\"data/st"
},
{
"path": "llava-train_videochat/scripts/train/stage1-init_connector/stage1_umt_tome16_res448_qwen1_5b.sh",
"chars": 2484,
"preview": "export OMP_NUM_THREADS=1\nexport DISABLE_ADDMM_CUDA_LT=1\nexport TORCH_CUDNN_USE_HEURISTIC_MODE_B=1\n\nDATA_VERSION=\"data/st"
},
{
"path": "llava-train_videochat/scripts/train/stage2-visual_pretraining/stage2_internvideo2_tome16_res224_qwen_7b.sh",
"chars": 2953,
"preview": "export OMP_NUM_THREADS=1\nexport DISABLE_ADDMM_CUDA_LT=1\nexport TORCH_CUDNN_USE_HEURISTIC_MODE_B=1\n\n\nDATA_VERSION=\"data/s"
},
{
"path": "llava-train_videochat/scripts/train/stage2-visual_pretraining/stage2_umt_tome16_res224_qwen_7b.sh",
"chars": 2943,
"preview": "export OMP_NUM_THREADS=1\nexport DISABLE_ADDMM_CUDA_LT=1\nexport TORCH_CUDNN_USE_HEURISTIC_MODE_B=1\n\n\nDATA_VERSION=\"data/s"
},
{
"path": "llava-train_videochat/scripts/train/stage2-visual_pretraining/stage2_umt_tome16_res448_qwen_1_5b.sh",
"chars": 2957,
"preview": "export OMP_NUM_THREADS=1\nexport DISABLE_ADDMM_CUDA_LT=1\nexport TORCH_CUDNN_USE_HEURISTIC_MODE_B=1\n\n\nDATA_VERSION=\"data/s"
},
{
"path": "llava-train_videochat/scripts/train/stage3-video_sft/stage3_internvideo2_tome16_res224_qwen_7b.sh",
"chars": 2828,
"preview": "export OMP_NUM_THREADS=1\nexport DISABLE_ADDMM_CUDA_LT=1\nexport TORCH_CUDNN_USE_HEURISTIC_MODE_B=1\n\n\nDATA_VERSION=\"data/s"
},
{
"path": "llava-train_videochat/scripts/train/stage3-video_sft/stage3_umt_tome16_res224_qwen_7b.sh",
"chars": 2821,
"preview": "export OMP_NUM_THREADS=1\nexport DISABLE_ADDMM_CUDA_LT=1\nexport TORCH_CUDNN_USE_HEURISTIC_MODE_B=1\n\n\n\nDATA_VERSION=\"data/"
},
{
"path": "llava-train_videochat/scripts/train/stage3-video_sft/stage3_umt_tome16_res448_qwen_1_5b.sh",
"chars": 2831,
"preview": "export OMP_NUM_THREADS=1\nexport DISABLE_ADDMM_CUDA_LT=1\nexport TORCH_CUDNN_USE_HEURISTIC_MODE_B=1\n\n\n\nDATA_VERSION=\"data/"
},
{
"path": "llava-train_videochat/scripts/train/stage4_highres_postft/stage4_umt_tome16_res448_qwen_7b.sh",
"chars": 2956,
"preview": "export OMP_NUM_THREADS=1\nexport DISABLE_ADDMM_CUDA_LT=1\nexport TORCH_CUDNN_USE_HEURISTIC_MODE_B=1\n\n\n\nDATA_VERSION=\"data/"
},
{
"path": "llava-train_videochat/scripts/zero1.json",
"chars": 732,
"preview": "{\n \"fp16\": {\n \"enabled\": \"auto\",\n \"loss_scale\": 0,\n \"loss_scale_window\": 1000,\n \"initial_"
},
{
"path": "llava-train_videochat/scripts/zero2.json",
"chars": 1028,
"preview": "{\n \"fp16\": {\n \"enabled\": \"auto\",\n \"loss_scale\": 0,\n \"loss_scale_window\": 1000,\n \"initial_"
},
{
"path": "llava-train_videochat/scripts/zero2_fused_adamw.json",
"chars": 1027,
"preview": "{\n \"fp16\": {\n \"enabled\": \"auto\",\n \"loss_scale\": 0,\n \"loss_scale_window\": 1000,\n \"initial_"
},
{
"path": "llava-train_videochat/scripts/zero2_offload.json",
"chars": 756,
"preview": "{\n \"fp16\": {\n \"enabled\": \"auto\",\n \"loss_scale\": 0,\n \"loss_scale_window\": 1000,\n \"initial_"
},
{
"path": "llava-train_videochat/scripts/zero3.json",
"chars": 1101,
"preview": "{\n \"fp16\": {\n \"enabled\": \"auto\",\n \"loss_scale\": 0,\n \"loss_scale_window\": 1000,\n \"initial_"
},
{
"path": "llava-train_videochat/scripts/zero3_offload.json",
"chars": 1288,
"preview": "{\n \"fp16\": {\n \"enabled\": \"auto\",\n \"loss_scale\": 0,\n \"loss_scale_window\": 1000,\n \"initial_"
},
{
"path": "llava-train_videochat/scripts/zero3pp.json",
"chars": 1420,
"preview": "{\n \"fp16\": {\n \"enabled\": \"auto\",\n \"loss_scale\": 0,\n \"loss_scale_window\": 1000,\n \"initial_"
},
{
"path": "lmms-eval_videochat/.gitignore",
"chars": 400,
"preview": "env\n*.pyc\noutput/\ndata/\nlm_cache\n.idea\nbuild\ndist\n*.egg-info\nvenv\n.vscode/\ntemp\n__pycache__\n.ipynb_checkpoints\ntemp\n.DS_"
},
{
"path": "lmms-eval_videochat/.pre-commit-config.yaml",
"chars": 125,
"preview": "repos:\n - repo: https://github.com/psf/black\n rev: 23.12.1\n hooks:\n - id: black\n language_version: py"
},
{
"path": "lmms-eval_videochat/LICENSE",
"chars": 2732,
"preview": "# For the main pipeline structure-related code, we maintain the original license provided with lm-evaluation-harness, wh"
},
{
"path": "lmms-eval_videochat/README.md",
"chars": 2247,
"preview": "\n# How to use\n\nWe have modified the data loading method for lmms-eval: instead of loading from Huggingface, the data is "
},
{
"path": "lmms-eval_videochat/docs/README.md",
"chars": 640,
"preview": "# LMMs Eval Documentation\n\nWelcome to the docs for `lmms-eval`!\n\nMajority of this documentation is adapted from [lm-eval"
},
{
"path": "lmms-eval_videochat/docs/commands.md",
"chars": 3783,
"preview": "# User Guide\nThis document details the interface exposed by `lmms_eval` and provides details on what flags are available"
},
{
"path": "lmms-eval_videochat/docs/current_tasks.md",
"chars": 4131,
"preview": "# Current Tasks\n\n> () indicates the task name in the lmms_eval. The task name is also used to specify the dataset in the"
},
{
"path": "lmms-eval_videochat/docs/model_guide.md",
"chars": 5277,
"preview": "# New Model Guide\nIn order to properly evaluate a given LM, we require implementation of a wrapper class subclassing the"
},
{
"path": "lmms-eval_videochat/docs/run_examples.md",
"chars": 10194,
"preview": "# User Guide\nThis document details the running examples for different models in `lmms_eval`. We include commandas on how"
},
{
"path": "lmms-eval_videochat/docs/task_guide.md",
"chars": 6775,
"preview": "# Task Configuration\n\nThe `lmms_eval` is meant to be an extensible and flexible framework within which many different ev"
},
{
"path": "lmms-eval_videochat/eval_annotations/LVBench/README.md",
"chars": 1160,
"preview": "---\nlicense: mit\nextra_gated_prompt: >-\n You agree to not use the dataset to conduct experiments that cause harm to\n h"
},
{
"path": "lmms-eval_videochat/eval_annotations/LVBench/json/lvbench_clean.json",
"chars": 801607,
"preview": "[\n {\n \"video\": \"00000000\",\n \"type\": \"cartoon\",\n \"question\": \"What year appears in the opening ca"
},
{
"path": "lmms-eval_videochat/eval_annotations/LVBench/json/lvbench_clean_cartoon.json",
"chars": 139863,
"preview": "[\n {\n \"video\": \"00000000\",\n \"type\": \"cartoon\",\n \"question\": \"What year appears in the opening ca"
},
{
"path": "lmms-eval_videochat/eval_annotations/LVBench/json/lvbench_clean_documentary.json",
"chars": 104453,
"preview": "[\n {\n \"video\": \"00000007\",\n \"type\": \"documentary\",\n \"question\": \"There is a scene of of cutting "
},
{
"path": "lmms-eval_videochat/eval_annotations/LVBench/json/lvbench_clean_live.json",
"chars": 123404,
"preview": "[\n {\n \"video\": \"00000014\",\n \"type\": \"live\",\n \"question\": \"Which movie does the host parody in hi"
},
{
"path": "lmms-eval_videochat/eval_annotations/LVBench/json/lvbench_clean_selfmedia.json",
"chars": 163880,
"preview": "[\n {\n \"video\": \"00000021\",\n \"type\": \"selfmedia\",\n \"question\": \"What is she doing on the stairs a"
},
{
"path": "lmms-eval_videochat/eval_annotations/LVBench/json/lvbench_clean_sport.json",
"chars": 137411,
"preview": "[\n {\n \"video\": \"00000032\",\n \"type\": \"sport\",\n \"question\": \"Which player is responsible for the f"
},
{
"path": "lmms-eval_videochat/eval_annotations/LVBench/json/lvbench_clean_tv.json",
"chars": 132606,
"preview": "[\n {\n \"video\": \"00000038\",\n \"type\": \"tv\",\n \"question\": \"What type of video is this?\",\n \"c"
},
{
"path": "lmms-eval_videochat/eval_annotations/LongVideoBench/README.md",
"chars": 10050,
"preview": "---\nlicense: cc-by-nc-sa-4.0\nextra_gated_prompt: >-\n The LongVideoBench dataset contains links to web videos for data c"
},
{
"path": "lmms-eval_videochat/eval_annotations/LongVideoBench/lvb_test_wo_gt.json",
"chars": 4888045,
"preview": "[{\"video_id\": \"G1D9C7kRx10\", \"question\": \"On a desk with a needle-shaped green leaf, there is a picture. A person is dra"
},
{
"path": "lmms-eval_videochat/eval_annotations/LongVideoBench/lvb_val.json",
"chars": 1622244,
"preview": "[\n {\n \"video_id\": \"86CxyhFV9MI\",\n \"question\": \"In the video, which subtitles appear at the same time as"
},
{
"path": "lmms-eval_videochat/eval_annotations/MLVU_MC/README.md",
"chars": 1064,
"preview": "---\nlicense: mit\nextra_gated_prompt: >-\n You agree to not use the dataset to conduct experiments that cause harm to\n h"
},
{
"path": "lmms-eval_videochat/eval_annotations/MLVU_MC/json/1_plotQA.json",
"chars": 201075,
"preview": "[\n {\n \"video\": \"movie101_66.mp4\",\n \"duration\": 246,\n \"question\": \"What color is the main male ch"
},
{
"path": "lmms-eval_videochat/eval_annotations/MLVU_MC/json/2_needle.json",
"chars": 143990,
"preview": "[\n {\n \"video\": \"needle_32.mp4\",\n \"duration\": 467.98,\n \"question\": \"What does the hand coming out"
},
{
"path": "lmms-eval_videochat/eval_annotations/MLVU_MC/json/3_ego.json",
"chars": 125238,
"preview": "[\n {\n \"video\": \"ego_35.mp4\",\n \"duration\": 408.63333328750014,\n \"question\": \"What did I put in th"
},
{
"path": "lmms-eval_videochat/eval_annotations/MLVU_MC/json/4_count.json",
"chars": 73315,
"preview": "[\n {\n \"video\": \"count_126.mp4\",\n \"duration\": 572.8599999999999,\n \"question\": \"Throughout this vi"
},
{
"path": "lmms-eval_videochat/eval_annotations/MLVU_MC/json/5_order.json",
"chars": 168502,
"preview": "[\n {\n \"video\": \"order_126.mp4\",\n \"duration\": 665.3399999999999,\n \"question\": \"Arrange the follow"
},
{
"path": "lmms-eval_videochat/eval_annotations/MLVU_MC/json/6_anomaly_reco.json",
"chars": 78241,
"preview": "[\n {\n \"video\": \"surveil_20.mp4\",\n \"duration\": 485.17,\n \"question\": \"Does this surveillance foota"
},
{
"path": "lmms-eval_videochat/eval_annotations/MLVU_MC/json/7_topic_reasoning.json",
"chars": 94879,
"preview": "[\n {\n \"video\": \"AWA-6.mp4\",\n \"duration\": 450.0,\n \"question\": \"What is the main background of the"
},
{
"path": "lmms-eval_videochat/eval_annotations/MVBench/README.md",
"chars": 2144,
"preview": "---\nlicense: mit\nextra_gated_prompt: >-\n You agree to not use the dataset to conduct experiments that cause harm to\n h"
},
{
"path": "lmms-eval_videochat/eval_annotations/MVBench/json/action_antonym.json",
"chars": 70940,
"preview": "[\n {\n \"video\": \"166583.webm\",\n \"question\": \"What is the action performed by the person in the video?\",\n"
},
{
"path": "lmms-eval_videochat/eval_annotations/MVBench/json/action_count.json",
"chars": 32742,
"preview": "[{\"video\": \"video_6480.mp4\", \"question\": \"How many times did the person launch objects on the table?\", \"candidates\": [\"3"
},
{
"path": "lmms-eval_videochat/eval_annotations/MVBench/json/action_localization.json",
"chars": 78029,
"preview": "[{\"video\": \"RPY8D.mp4\", \"question\": \"During which part of the video does the action 'person sitting on a couch' occur?\","
},
{
"path": "lmms-eval_videochat/eval_annotations/MVBench/json/action_prediction.json",
"chars": 46197,
"preview": "[{\"video\": \"AJTDO.mp4\", \"question\": \"What will the person do next?\", \"candidates\": [\"Put down the pillow.\", \"Open the do"
},
{
"path": "lmms-eval_videochat/eval_annotations/MVBench/json/action_sequence.json",
"chars": 51606,
"preview": "[{\"video\": \"ZS9XR.mp4\", \"question\": \"What happened after the person took the food?\", \"candidates\": [\"Ate the medicine.\","
},
{
"path": "lmms-eval_videochat/eval_annotations/MVBench/json/character_order.json",
"chars": 30205,
"preview": "[{\"video\": \"video_1238.mp4\", \"question\": \"What letter did the person write first on the paper?\", \"candidates\": [\"l\", \"v\""
},
{
"path": "lmms-eval_videochat/eval_annotations/MVBench/json/counterfactual_inference.json",
"chars": 65394,
"preview": "[{\"video\": \"video_11363.mp4\", \"question\": \"Which of the following will happen if the cylinder is removed?\", \"candidates\""
},
{
"path": "lmms-eval_videochat/eval_annotations/MVBench/json/egocentric_navigation.json",
"chars": 82642,
"preview": "[{\"video\": \"left/4504_frame52.mp4\", \"question\": \"This is a navigation video of an agent following instruction: \\\"Go up t"
},
{
"path": "lmms-eval_videochat/eval_annotations/MVBench/json/episodic_reasoning.json",
"chars": 177477,
"preview": "[{\"video\": \"castle_s07e04_seg02_clip_14\", \"question\": \"Why did Castle dress like a fairy when he was speaking to Emily?\""
},
{
"path": "lmms-eval_videochat/eval_annotations/MVBench/json/fine_grained_action.json",
"chars": 48694,
"preview": "[{\"video\": \"validation/bathing/giphy-DtWRNT4M9ym6Q_0.mp4\", \"question\": \"What is the action performed by the person in th"
},
{
"path": "lmms-eval_videochat/eval_annotations/MVBench/json/fine_grained_pose.json",
"chars": 42247,
"preview": "[{\"video\": \"S010C002P017R002A005_rgb.avi\", \"question\": \"What is the action performed by the person in the video?\", \"cand"
},
{
"path": "lmms-eval_videochat/eval_annotations/MVBench/json/moving_attribute.json",
"chars": 34139,
"preview": "[{\"video\": \"video_12600.mp4\", \"question\": \"What color is the object that is stationary?\", \"candidates\": [\"gray\", \"green\""
},
{
"path": "lmms-eval_videochat/eval_annotations/MVBench/json/moving_count.json",
"chars": 27859,
"preview": "[{\"video\": \"video_11123.mp4\", \"question\": \"How many red objects are moving?\", \"candidates\": [\"3\", \"5\", \"4\", \"2\"], \"answe"
},
{
"path": "lmms-eval_videochat/eval_annotations/MVBench/json/moving_direction.json",
"chars": 52782,
"preview": "[{\"video\": \"video_14727.mp4\", \"question\": \"What direction is the cyan sphere moving in within the video?\", \"candidates\":"
},
{
"path": "lmms-eval_videochat/eval_annotations/MVBench/json/object_existence.json",
"chars": 30079,
"preview": "[{\"video\": \"video_13344.mp4\", \"question\": \"Are there any moving green objects when the video ends?\", \"candidates\": [\"not"
},
{
"path": "lmms-eval_videochat/eval_annotations/MVBench/json/object_interaction.json",
"chars": 52932,
"preview": "[{\"video\": \"J84TZ.mp4\", \"question\": \"Which object was taken by the person?\", \"candidates\": [\"The dish.\", \"The box.\", \"Th"
},
{
"path": "lmms-eval_videochat/eval_annotations/MVBench/json/object_shuffle.json",
"chars": 77141,
"preview": "[{\"video\": \"video_10336.mp4\", \"question\": \"The person uses multiple similar objects to play an occlusion game. Where is "
},
{
"path": "lmms-eval_videochat/eval_annotations/MVBench/json/scene_transition.json",
"chars": 67576,
"preview": "[{\"video\": \"Top010_04755.mp4\", \"question\": \"What's the right option for how the scenes in the video change?\", \"candidate"
},
{
"path": "lmms-eval_videochat/eval_annotations/MVBench/json/state_change.json",
"chars": 60586,
"preview": "[{\"video\": \"video_2370.mp4\", \"question\": \"What can you say about the temperature of the water being poured?\", \"candidate"
},
{
"path": "lmms-eval_videochat/eval_annotations/MVBench/json/unexpected_action.json",
"chars": 81671,
"preview": "[{\"video\": \"test_humor/H_T_1227_0000_0191.mp4\", \"question\": \"What unusual action is the man next to the table tennis pla"
},
{
"path": "lmms-eval_videochat/eval_annotations/PerceptionTest/.gitattributes",
"chars": 2307,
"preview": "*.7z filter=lfs diff=lfs merge=lfs -text\n*.arrow filter=lfs diff=lfs merge=lfs -text\n*.bin filter=lfs diff=lfs merge=lfs"
},
{
"path": "lmms-eval_videochat/eval_annotations/PerceptionTest/README.md",
"chars": 1018,
"preview": "---\nlicense: mit\ndataset_info:\n- config_name: mc_question\n features:\n - name: video_name\n dtype: string\n - name: q"
},
{
"path": "lmms-eval_videochat/eval_annotations/Temporal_Grounding/README.md",
"chars": 721,
"preview": "---\nlicense: mit\nextra_gated_prompt: >-\n You agree to not use the dataset to conduct experiments that cause harm to\n h"
},
{
"path": "lmms-eval_videochat/eval_annotations/Temporal_Grounding/json/temporal_grounding_charades.json",
"chars": 618208,
"preview": "[\n {\n \"video\": \"3MSZA.mp4\",\n \"caption\": \"person turn a light on.\",\n \"timestamp\": [\n 2"
},
{
"path": "lmms-eval_videochat/eval_annotations/Video-MME/README.md",
"chars": 677,
"preview": "---\ndataset_info:\n config_name: videomme\n features:\n - name: video_id\n dtype: string\n - name: duration\n dtype:"
},
{
"path": "lmms-eval_videochat/lmms_eval/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "lmms-eval_videochat/lmms_eval/__main__.py",
"chars": 14256,
"preview": "import importlib\nimport os\nimport yaml\nimport sys\nimport json\n\nimport traceback\nimport argparse\nimport numpy as np\nimpor"
},
{
"path": "lmms-eval_videochat/lmms_eval/api/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "lmms-eval_videochat/lmms_eval/api/filter.py",
"chars": 2025,
"preview": "from dataclasses import dataclass\nfrom typing import List\n\nfrom lmms_eval.api.instance import Instance\nfrom datasets imp"
},
{
"path": "lmms-eval_videochat/lmms_eval/api/instance.py",
"chars": 973,
"preview": "from dataclasses import dataclass, field\nfrom typing import Literal, Tuple\n\n\n@dataclass\nclass Instance:\n request_type"
},
{
"path": "lmms-eval_videochat/lmms_eval/api/metrics.py",
"chars": 12426,
"preview": "import math\nfrom collections.abc import Iterable\n\nimport numpy as np\nimport sacrebleu\nimport sklearn.metrics\nimport rand"
},
{
"path": "lmms-eval_videochat/lmms_eval/api/model.py",
"chars": 7290,
"preview": "import abc\nimport os\n\nfrom typing import Union, List, Tuple, Optional, Type, TypeVar\nfrom sqlitedict import SqliteDict\ni"
},
{
"path": "lmms-eval_videochat/lmms_eval/api/registry.py",
"chars": 4545,
"preview": "from lmms_eval.api.model import lmms\n\nfrom typing import Callable, Dict\nimport evaluate as hf_evaluate\n\nfrom loguru impo"
},
{
"path": "lmms-eval_videochat/lmms_eval/api/samplers.py",
"chars": 3548,
"preview": "class ContextSampler:\n def __init__(self, docs, task, fewshot_indices=None, rnd=None) -> None:\n self.rnd = rnd"
},
{
"path": "lmms-eval_videochat/lmms_eval/api/task.py",
"chars": 60168,
"preview": "import abc\nimport ast\nimport itertools\nimport json\n\nimport os\nimport re\nimport random\nimport shutil\nimport inspect\nimpor"
},
{
"path": "lmms-eval_videochat/lmms_eval/evaluator.py",
"chars": 27966,
"preview": "import os\nimport time\nimport random\nimport itertools\nimport json\nimport collections\nimport sys\nimport inspect\nfrom tqdm "
},
{
"path": "lmms-eval_videochat/lmms_eval/filters/__init__.py",
"chars": 1585,
"preview": "from lmms_eval.api.filter import FilterEnsemble, Filter\nfrom . import selection\nfrom . import extraction\nfrom . import t"
},
{
"path": "lmms-eval_videochat/lmms_eval/filters/decontamination.py",
"chars": 631,
"preview": "from lmms_eval.api.filter import Filter\n\n\nclass DecontaminationFilter(Filter):\n \"\"\"\n A filter which evaluates\n "
},
{
"path": "lmms-eval_videochat/lmms_eval/filters/extraction.py",
"chars": 10468,
"preview": "import re\nimport sys\nimport unicodedata\nfrom lmms_eval.api.filter import Filter\n\n\nclass WhitespaceFilter(Filter):\n \"\""
},
{
"path": "lmms-eval_videochat/lmms_eval/filters/selection.py",
"chars": 1584,
"preview": "from collections import Counter\n\nfrom lmms_eval.api.filter import Filter\n\n\nclass TakeFirstFilter(Filter):\n def __init"
},
{
"path": "lmms-eval_videochat/lmms_eval/filters/transformation.py",
"chars": 1512,
"preview": "from lmms_eval.api.filter import Filter\n\n\nclass LowercaseFilter(Filter):\n def __init__(self) -> None:\n pass\n\n "
},
{
"path": "lmms-eval_videochat/lmms_eval/logging_utils.py",
"chars": 15069,
"preview": "# Code mostly from: https://github.com/EleutherAI/lm-evaluation-harness/pull/1339, credit to: https://github.com/ayulock"
},
{
"path": "lmms-eval_videochat/lmms_eval/models/__init__.py",
"chars": 1026,
"preview": "import importlib\nimport os\nimport hf_transfer\nfrom loguru import logger\nimport sys\nimport hf_transfer\n\nos.environ[\"HF_HU"
},
{
"path": "lmms-eval_videochat/lmms_eval/models/videochat_flash.py",
"chars": 11636,
"preview": "import logging\nimport warnings\nfrom datetime import timedelta\nfrom typing import List, Optional, Union, Tuple\nimport PIL"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/__init__.py",
"chars": 6113,
"preview": "import os, sys\nfrom typing import List, Union, Dict\n\nfrom lmms_eval import utils\n\n# from lmms_eval import prompts\nfrom l"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/_task_utils/file_utils.py",
"chars": 244,
"preview": "import os\n\n\ndef generate_submission_file(file_name, args, subpath=\"submissions\"):\n path = os.path.join(args.output_pa"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/_task_utils/gpt_eval_utils.py",
"chars": 0,
"preview": ""
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/_task_utils/video_loader.py",
"chars": 1036,
"preview": "import os\n\n\ndef get_cache_dir(config, sub_dir=\"videos\"):\n HF_HOME = os.environ[\"HF_HOME\"]\n cache_dir = config[\"dat"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/_task_utils/vqa_eval_metric.py",
"chars": 6056,
"preview": "import re\n\n\nclass EvalAIAnswerProcessor:\n \"\"\"\n Processes an answer similar to Eval AI\n copied from\n "
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/longvideobench/longvideobench_test_v.yaml",
"chars": 871,
"preview": "dataset_path: eval_data_jsons/LongVideoBench\ndataset_kwargs:\n token: True\n cache_dir: your_eval_data_dir/LongVideoBenc"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/longvideobench/longvideobench_val_i.yaml",
"chars": 853,
"preview": "dataset_path: lmms-eval_data/LongVideoBench\ndataset_kwargs:\n token: True\n cache_dir: phdd2:s3://LongVideoBench/\n vide"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/longvideobench/longvideobench_val_v.yaml",
"chars": 857,
"preview": "dataset_path: eval_data_jsons/LongVideoBench\ndataset_kwargs:\n token: True\n cache_dir: your_eval_data_dir/LongVideoBenc"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/longvideobench/utils.py",
"chars": 14118,
"preview": "import json\nimport os\nimport random\nimport re\nimport sys\nfrom collections import Counter, defaultdict\nfrom pathlib impor"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/lvbench/_default_template.yaml",
"chars": 955,
"preview": "dataset_path: eval_data_jsons/LVBench\ndataset_kwargs:\n token: True\n cache_dir: your_eval_data_dir/LVBench/frames/ # NO"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/lvbench/lvbench.yaml",
"chars": 136,
"preview": "group: lvbench\ntask:\n - lvbench_cartoon\n - lvbench_documentary\n - lvbench_live\n - lvbench_selfmedia\n - lvbench_spor"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/lvbench/lvbench_cartoon.yaml",
"chars": 253,
"preview": "include: _default_template.yaml\ntask: lvbench_cartoon\ndataset_name: lvbench_cartoon\ntest_split: train\nlmms_eval_specific"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/lvbench/lvbench_documentary.yaml",
"chars": 265,
"preview": "include: _default_template.yaml\ntask: lvbench_documentary\ndataset_name: lvbench_documentary\ntest_split: train\nlmms_eval_"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/lvbench/lvbench_live.yaml",
"chars": 244,
"preview": "include: _default_template.yaml\ntask: lvbench_live\ndataset_name: lvbench_live\ntest_split: train\nlmms_eval_specific_kwarg"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/lvbench/lvbench_selfmedia.yaml",
"chars": 259,
"preview": "include: _default_template.yaml\ntask: lvbench_selfmedia\ndataset_name: lvbench_selfmedia\ntest_split: train\nlmms_eval_spec"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/lvbench/lvbench_sport.yaml",
"chars": 247,
"preview": "include: _default_template.yaml\ntask: lvbench_sport\ndataset_name: lvbench_sport\ntest_split: train\nlmms_eval_specific_kwa"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/lvbench/lvbench_tv.yaml",
"chars": 238,
"preview": "include: _default_template.yaml\ntask: lvbench_tv\ndataset_name: lvbench_tv\ntest_split: train\nlmms_eval_specific_kwargs:\n "
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/lvbench/utils.py",
"chars": 4990,
"preview": "from collections import defaultdict\nimport os\nimport datetime\nimport json\nfrom lmms_eval.tasks._task_utils.file_utils im"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mlvu_mc/_default_template.yaml",
"chars": 765,
"preview": "dataset_path: eval_data_jsons/MLVU_MC\ndataset_kwargs:\n token: True\n cache_dir: your_eval_data_dir/MLVU_MC\n video: Tru"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mlvu_mc/mlvu_mc.yaml",
"chars": 165,
"preview": "group: mlvu_mc\ntask:\n - mlvu_mc_count\n - mlvu_mc_ego\n - mlvu_mc_needle\n - mlvu_mc_order\n - mlvu_mc_plotqa\n - mlvu_"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mlvu_mc/mlvu_mc_anomaly_reco.yaml",
"chars": 534,
"preview": "include: _default_template.yaml\ntask: mlvu_mc_anomaly_reco\ndataset_name: 6_anomaly_reco\ntest_split: train\nlmms_eval_spec"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mlvu_mc/mlvu_mc_count.yaml",
"chars": 499,
"preview": "include: _default_template.yaml\ntask: mlvu_mc_count\ndataset_name: 4_count\ntest_split: train\nlmms_eval_specific_kwargs:\n "
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mlvu_mc/mlvu_mc_ego.yaml",
"chars": 489,
"preview": "include: _default_template.yaml\ntask: mlvu_mc_ego\ndataset_name: 3_ego\ntest_split: train\nlmms_eval_specific_kwargs:\n def"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mlvu_mc/mlvu_mc_needle.yaml",
"chars": 504,
"preview": "include: _default_template.yaml\ntask: mlvu_mc_needle\ndataset_name: 2_needle\ntest_split: train\nlmms_eval_specific_kwargs:"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mlvu_mc/mlvu_mc_order.yaml",
"chars": 499,
"preview": "include: _default_template.yaml\ntask: mlvu_mc_order\ndataset_name: 5_order\ntest_split: train\nlmms_eval_specific_kwargs:\n "
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mlvu_mc/mlvu_mc_plotqa.yaml",
"chars": 504,
"preview": "include: _default_template.yaml\ntask: mlvu_mc_plotqa\ndataset_name: 1_plotQA\ntest_split: train\nlmms_eval_specific_kwargs:"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mlvu_mc/mlvu_mc_topic_reasoning.yaml",
"chars": 549,
"preview": "include: _default_template.yaml\ntask: mlvu_mc_topic_reasoning\ndataset_name: 7_topic_reasoning\ntest_split: train\nlmms_eva"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mlvu_mc/utils.py",
"chars": 7064,
"preview": "from collections import defaultdict\nimport os\nimport datetime\nimport json\nfrom lmms_eval.tasks._task_utils.file_utils im"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mvbench/_default_template.yaml",
"chars": 765,
"preview": "dataset_path: eval_data_jsons/MVBench\ndataset_kwargs:\n token: True\n cache_dir: your_eval_data_dir/MVBench\n video: Tru"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mvbench/mvbench.yaml",
"chars": 609,
"preview": "group: mvbench\ntask:\n - mvbench_action_sequence\n - mvbench_moving_count\n - mvbench_action_prediction\n - mvbench_epis"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mvbench/mvbench_action_antonym.yaml",
"chars": 223,
"preview": "include: _default_template.yaml\ntask: mvbench_action_antonym\ndataset_name: action_antonym\ntest_split: train\nlmms_eval_sp"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mvbench/mvbench_action_count.yaml",
"chars": 217,
"preview": "include: _default_template.yaml\ntask: mvbench_action_count\ndataset_name: action_count\ntest_split: train\nlmms_eval_specif"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mvbench/mvbench_action_localization.yaml",
"chars": 238,
"preview": "include: _default_template.yaml\ntask: mvbench_action_localization\ndataset_name: action_localization\ntest_split: train\nlm"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mvbench/mvbench_action_prediction.yaml",
"chars": 232,
"preview": "include: _default_template.yaml\ntask: mvbench_action_prediction\ndataset_name: action_prediction\ntest_split: train\nlmms_e"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mvbench/mvbench_action_sequence.yaml",
"chars": 226,
"preview": "include: _default_template.yaml\ntask: mvbench_action_sequence\ndataset_name: action_sequence\ntest_split: train\nlmms_eval_"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mvbench/mvbench_character_order.yaml",
"chars": 226,
"preview": "include: _default_template.yaml\ntask: mvbench_character_order\ndataset_name: character_order\ntest_split: train\nlmms_eval_"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mvbench/mvbench_counterfactual_inference.yaml",
"chars": 253,
"preview": "include: _default_template.yaml\ntask: mvbench_counterfactual_inference\ndataset_name: counterfactual_inference\ntest_split"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mvbench/mvbench_egocentric_navigation.yaml",
"chars": 244,
"preview": "include: _default_template.yaml\ntask: mvbench_egocentric_navigation\ndataset_name: egocentric_navigation\ntest_split: trai"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mvbench/mvbench_episodic_reasoning.yaml",
"chars": 402,
"preview": "include: _default_template.yaml\ntask: mvbench_episodic_reasoning\ndataset_name: episodic_reasoning\ntest_split: train\ndoc_"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mvbench/mvbench_fine_grained_action.yaml",
"chars": 238,
"preview": "include: _default_template.yaml\ntask: mvbench_fine_grained_action\ndataset_name: fine_grained_action\ntest_split: train\nlm"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mvbench/mvbench_fine_grained_pose.yaml",
"chars": 232,
"preview": "include: _default_template.yaml\ntask: mvbench_fine_grained_pose\ndataset_name: fine_grained_pose\ntest_split: train\nlmms_e"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mvbench/mvbench_moving_attribute.yaml",
"chars": 229,
"preview": "include: _default_template.yaml\ntask: mvbench_moving_attribute\ndataset_name: moving_attribute\ntest_split: train\nlmms_eva"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mvbench/mvbench_moving_count.yaml",
"chars": 217,
"preview": "include: _default_template.yaml\ntask: mvbench_moving_count\ndataset_name: moving_count\ntest_split: train\nlmms_eval_specif"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mvbench/mvbench_moving_direction.yaml",
"chars": 229,
"preview": "include: _default_template.yaml\ntask: mvbench_moving_direction\ndataset_name: moving_direction\ntest_split: train\nlmms_eva"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mvbench/mvbench_object_existence.yaml",
"chars": 229,
"preview": "include: _default_template.yaml\ntask: mvbench_object_existence\ndataset_name: object_existence\ntest_split: train\nlmms_eva"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mvbench/mvbench_object_interaction.yaml",
"chars": 235,
"preview": "include: _default_template.yaml\ntask: mvbench_object_interaction\ndataset_name: object_interaction\ntest_split: train\nlmms"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mvbench/mvbench_object_shuffle.yaml",
"chars": 223,
"preview": "include: _default_template.yaml\ntask: mvbench_object_shuffle\ndataset_name: object_shuffle\ntest_split: train\nlmms_eval_sp"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mvbench/mvbench_scene_transition.yaml",
"chars": 228,
"preview": "include: _default_template.yaml\ntask: mvbench_scene_transition\ndataset_name: scene_transition\ntest_split: train\nlmms_eva"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mvbench/mvbench_state_change.yaml",
"chars": 216,
"preview": "include: _default_template.yaml\ntask: mvbench_state_change\ndataset_name: state_change\ntest_split: train\nlmms_eval_specif"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mvbench/mvbench_unexpected_action.yaml",
"chars": 231,
"preview": "include: _default_template.yaml\ntask: mvbench_unexpected_action\ndataset_name: unexpected_action\ntest_split: train\nlmms_e"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/mvbench/utils.py",
"chars": 7815,
"preview": "from collections import defaultdict\nimport os\nimport datetime\nimport json\nfrom lmms_eval.tasks._task_utils.file_utils im"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/perceptiontest/val/_default_template_yaml",
"chars": 205,
"preview": "dataset_path: eval_data_jsons/PerceptionTest_Val\ndataset_kwargs:\n token: True\n video: True\n cache_dir: pssd:s3://per"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/perceptiontest/val/perceptiontest_mc.yaml",
"chars": 536,
"preview": "dataset_name: \"mc_question_val\"\ntask: \"perceptiontest_val_mc\"\ntest_split: validation\noutput_type: generate_until\ndoc_to_"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/perceptiontest/val/utils.py",
"chars": 4439,
"preview": "from decord import VideoReader, cpu\nimport numpy as np\nimport os\nimport sys\nimport datetime\nimport lmms_eval.tasks._task"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/temporal_grounding/_default_template.yaml",
"chars": 543,
"preview": "dataset_path: eval_data_jsons/Temporal_Grounding\ndataset_kwargs:\n token: True\n cache_dir: your_eval_data_dir/Temporal_"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/temporal_grounding/charades.yaml",
"chars": 689,
"preview": "include: _default_template.yaml\ntask: temporal_grounding_charades\ndataset_name: charades\ntest_split: train\nmetric_list:\n"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/temporal_grounding/eval_tvg.py",
"chars": 4435,
"preview": "import json\nimport argparse\nimport os\nimport re\nfrom copy import deepcopy\nimport pdb\nimport numpy as np\nfrom pathlib imp"
},
{
"path": "lmms-eval_videochat/lmms_eval/tasks/temporal_grounding/utils.py",
"chars": 2807,
"preview": "from decord import VideoReader, cpu\nimport numpy as np\nimport os\nimport sys\nimport datetime\nimport lmms_eval.tasks._task"
}
]
// ... and 1728 more files (download for full content)
About this extraction
This page contains the full source code of the OpenGVLab/VideoChat-Flash GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 1928 files (31.2 MB), approximately 8.3M tokens, and a symbol index with 4565 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.