gitextract_i3i5r_p7/

├── .flake8
├── .github/
│   ├── CONTRIBUTING.md
│   └── ISSUE_TEMPLATE/
│       ├── 1-bug-report.yml
│       ├── 2-feature-request.yml
│       └── 3-documentation.yml
├── .gitignore
├── .isort.cfg
├── .pre-commit-config.yaml
├── INSTALLATION.md
├── LICENSE
├── README.md
├── README_zh.md
├── classification/
│   ├── README.md
│   ├── config.py
│   ├── configs/
│   │   ├── attn_pooling_probing/
│   │   │   ├── attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_a.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_r.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_real.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_sketch.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenetv2.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_224px_in1k_224to448_64gpu.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_a.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_r.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_real.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_sketch.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenetv2.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_a.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_r.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_real.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_sketch.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenetv2.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_a.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_r.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_real.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_sketch.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenetv2.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu_imagenet_a.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu_imagenet_r.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu_imagenet_real.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu_imagenet_sketch.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu_imagenetv2.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenet_a.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenet_r.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenet_real.yaml
│   │   │   ├── attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenet_sketch.yaml
│   │   │   └── attn_pooling_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenetv2.yaml
│   │   ├── intern_vit_6b_1k_224.yaml
│   │   ├── intern_vit_6b_1k_224_test_imagenet_a.yaml
│   │   ├── intern_vit_6b_1k_224_test_imagenet_r.yaml
│   │   ├── intern_vit_6b_1k_224_test_imagenet_real.yaml
│   │   ├── intern_vit_6b_1k_224_test_imagenet_sketch.yaml
│   │   ├── intern_vit_6b_1k_224_test_imagenetv2.yaml
│   │   └── linear_probing/
│   │       ├── linear_probing_intern_vit_6b_224px_in1k_224_64gpu.yaml
│   │       ├── linear_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_a.yaml
│   │       ├── linear_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_r.yaml
│   │       ├── linear_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_real.yaml
│   │       ├── linear_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenet_sketch.yaml
│   │       ├── linear_probing_intern_vit_6b_224px_in1k_224_64gpu_imagenetv2.yaml
│   │       ├── linear_probing_intern_vit_6b_224px_in1k_224to448_64gpu.yaml
│   │       ├── linear_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_a.yaml
│   │       ├── linear_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_r.yaml
│   │       ├── linear_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_real.yaml
│   │       ├── linear_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenet_sketch.yaml
│   │       ├── linear_probing_intern_vit_6b_224px_in1k_224to448_64gpu_imagenetv2.yaml
│   │       ├── linear_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu.yaml
│   │       ├── linear_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_a.yaml
│   │       ├── linear_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_r.yaml
│   │       ├── linear_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_real.yaml
│   │       ├── linear_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenet_sketch.yaml
│   │       ├── linear_probing_intern_vit_6b_448px_v1_0_in1k_448_64gpu_imagenetv2.yaml
│   │       ├── linear_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu.yaml
│   │       ├── linear_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_a.yaml
│   │       ├── linear_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_r.yaml
│   │       ├── linear_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_real.yaml
│   │       ├── linear_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenet_sketch.yaml
│   │       ├── linear_probing_intern_vit_6b_448px_v1_2_in1k_448_64gpu_imagenetv2.yaml
│   │       ├── linear_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu.yaml
│   │       ├── linear_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu_imagenet_a.yaml
│   │       ├── linear_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu_imagenet_r.yaml
│   │       ├── linear_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu_imagenet_real.yaml
│   │       ├── linear_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu_imagenet_sketch.yaml
│   │       ├── linear_probing_intern_vit_6b_448px_v1_5_in1k_448_64gpu_imagenetv2.yaml
│   │       ├── linear_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu.yaml
│   │       ├── linear_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenet_a.yaml
│   │       ├── linear_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenet_r.yaml
│   │       ├── linear_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenet_real.yaml
│   │       ├── linear_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenet_sketch.yaml
│   │       └── linear_probing_intern_vit_6b_448px_v2_5_in1k_448_64gpu_imagenetv2.yaml
│   ├── dataset/
│   │   ├── __init__.py
│   │   ├── build.py
│   │   ├── cached_image_folder.py
│   │   ├── imagenet_a_r_indices.py
│   │   ├── imagenet_real.py
│   │   ├── imagenetv2.py
│   │   ├── samplers.py
│   │   └── zipreader.py
│   ├── ddp_hooks.py
│   ├── gflops.py
│   ├── hf2pytorch.py
│   ├── logger.py
│   ├── lr_scheduler.py
│   ├── main.py
│   ├── meta_data/
│   │   ├── 22k_class_to_idx.json
│   │   ├── imagenet_classes.json
│   │   ├── map22kto1k.txt
│   │   └── real.json
│   ├── models/
│   │   ├── __init__.py
│   │   ├── build.py
│   │   ├── clip_vit.py
│   │   ├── flash_attention.py
│   │   └── intern_vit_6b.py
│   ├── optimizer.py
│   ├── train_in1k.sh
│   ├── utils.py
│   └── work_dirs/
│       └── intern_vit_6b_1k_224/
│           └── log_rank0.txt
├── clip_benchmark/
│   ├── AUTHORS.rst
│   ├── CONTRIBUTING.rst
│   ├── HISTORY.rst
│   ├── LICENSE
│   ├── MANIFEST.in
│   ├── Makefile
│   ├── README.md
│   ├── benchmark/
│   │   ├── README.md
│   │   ├── benchmark.csv
│   │   ├── dataset_type.csv
│   │   ├── datasets.txt
│   │   ├── datasets_multilingual.txt
│   │   ├── models.txt
│   │   ├── results.ipynb
│   │   └── webdatasets.txt
│   ├── clip_benchmark/
│   │   ├── __init__.py
│   │   ├── cli.py
│   │   ├── datasets/
│   │   │   ├── __init__.py
│   │   │   ├── ar_classnames.json
│   │   │   ├── ar_zeroshot_classification_templates.json
│   │   │   ├── birdsnap.py
│   │   │   ├── builder.py
│   │   │   ├── caltech101.py
│   │   │   ├── cn_classnames.json
│   │   │   ├── cn_zeroshot_classification_templates.json
│   │   │   ├── cupl_prompts.json
│   │   │   ├── en_classnames.json
│   │   │   ├── en_zeroshot_classification_templates.json
│   │   │   ├── flickr.py
│   │   │   ├── imagenetv2.py
│   │   │   ├── it_classnames.json
│   │   │   ├── it_zeroshot_classification_templates.json
│   │   │   ├── jp_classnames.json
│   │   │   ├── jp_zeroshot_classification_templates.json
│   │   │   ├── kitti.py
│   │   │   ├── multilingual_mscoco.py
│   │   │   ├── objectnet.py
│   │   │   ├── tfds.py
│   │   │   ├── tools.py
│   │   │   └── voc2007.py
│   │   ├── metrics/
│   │   │   ├── __init__.py
│   │   │   ├── linear_probe.py
│   │   │   ├── mscoco_generative.py
│   │   │   ├── zeroshot_classification.py
│   │   │   └── zeroshot_retrieval.py
│   │   ├── model_collection.py
│   │   ├── models/
│   │   │   ├── __init__.py
│   │   │   ├── intern_vit_6b/
│   │   │   │   ├── configuration_intern_vit.py
│   │   │   │   ├── flash_attention.py
│   │   │   │   └── modeling_intern_vit.py
│   │   │   ├── internvl.py
│   │   │   ├── internvl_c_pytorch/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── chinese_alpaca_lora_7b/
│   │   │   │   │   ├── config.json
│   │   │   │   │   ├── generation_config.json
│   │   │   │   │   ├── pytorch_model.bin.index.json
│   │   │   │   │   ├── special_tokens_map.json
│   │   │   │   │   ├── tokenizer.model
│   │   │   │   │   └── tokenizer_config.json
│   │   │   │   ├── flash_attention.py
│   │   │   │   └── internvl_c.py
│   │   │   ├── internvl_huggingface/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── configuration_intern_vit.py
│   │   │   │   ├── configuration_internvl.py
│   │   │   │   ├── flash_attention.py
│   │   │   │   ├── modeling_intern_vit.py
│   │   │   │   ├── modeling_internvl.py
│   │   │   │   └── modeling_qllama.py
│   │   │   ├── japanese_clip.py
│   │   │   └── open_clip.py
│   │   └── webdataset_builder.py
│   ├── data/
│   │   ├── birdsnap/
│   │   │   └── test_images_valid.txt
│   │   ├── flickr30k/
│   │   │   └── flickr30k_cn_test.txt
│   │   └── mscoco_captions/
│   │       └── coco-cn_test.json
│   ├── probe_benchmark/
│   │   ├── PROBES.md
│   │   ├── build_df_scaling_experiments.py
│   │   ├── clip_table_2.csv
│   │   ├── generate_table.py
│   │   ├── laion5b_fewshot_experiments.py
│   │   ├── openclip_results.csv
│   │   ├── process_vtab.py
│   │   ├── scaling_experiment_data2.json
│   │   ├── scaling_experiment_data_vtab.json
│   │   ├── scaling_experiments.py
│   │   └── scaling_plot.ipynb
│   ├── requirements-test.txt
│   ├── requirements.txt
│   ├── setup.cfg
│   ├── setup.py
│   ├── test_internvl_c_classification.sh
│   ├── test_internvl_c_imagenet.sh
│   ├── test_internvl_c_retrieval.sh
│   ├── test_internvl_c_xtd.sh
│   ├── test_internvl_g_classification.sh
│   ├── test_internvl_g_imagenet.sh
│   ├── test_internvl_g_retrieval.sh
│   ├── test_internvl_g_retrieval_finetune.sh
│   ├── test_internvl_g_xtd.sh
│   ├── tests/
│   │   └── test_clip_benchmark.py
│   └── tox.ini
├── internvl_chat/
│   ├── README.md
│   ├── eval/
│   │   ├── README.md
│   │   ├── caption/
│   │   │   ├── README.md
│   │   │   └── evaluate_caption.py
│   │   ├── domain_specific/
│   │   │   ├── drivelm/
│   │   │   │   └── evaluate.py
│   │   │   ├── mme_rw/
│   │   │   │   └── evaluate.py
│   │   │   ├── rs_det/
│   │   │   │   ├── caculate.py
│   │   │   │   └── evaluate.py
│   │   │   └── rs_vqa/
│   │   │       ├── evaluate.py
│   │   │       └── score.py
│   │   ├── llava_bench/
│   │   │   ├── README.md
│   │   │   ├── eval_gpt_review_bench.py
│   │   │   ├── evaluate_llava_bench.py
│   │   │   ├── rule.json
│   │   │   └── summarize_gpt_review.py
│   │   ├── mantis_eval/
│   │   │   ├── README.md
│   │   │   └── evaluate_mantis.py
│   │   ├── mathvista/
│   │   │   ├── README.md
│   │   │   ├── calculate_score.py
│   │   │   ├── evaluate_mathvista.py
│   │   │   ├── extract_answer.py
│   │   │   ├── prompts/
│   │   │   │   └── ext_ans.py
│   │   │   └── utilities.py
│   │   ├── mirb/
│   │   │   ├── README.md
│   │   │   └── evaluate_mirb.py
│   │   ├── mmbench/
│   │   │   ├── README.md
│   │   │   └── evaluate_mmbench.py
│   │   ├── mme/
│   │   │   ├── README.md
│   │   │   ├── Your_Results/
│   │   │   │   ├── OCR.txt
│   │   │   │   ├── artwork.txt
│   │   │   │   ├── celebrity.txt
│   │   │   │   ├── code_reasoning.txt
│   │   │   │   ├── color.txt
│   │   │   │   ├── commonsense_reasoning.txt
│   │   │   │   ├── count.txt
│   │   │   │   ├── existence.txt
│   │   │   │   ├── landmark.txt
│   │   │   │   ├── numerical_calculation.txt
│   │   │   │   ├── position.txt
│   │   │   │   ├── posters.txt
│   │   │   │   ├── scene.txt
│   │   │   │   └── text_translation.txt
│   │   │   ├── calculation.py
│   │   │   └── eval.py
│   │   ├── mmhal/
│   │   │   ├── README.md
│   │   │   ├── eval_gpt_mmhal.py
│   │   │   └── evaluate_mmhal.py
│   │   ├── mmiu/
│   │   │   ├── README.md
│   │   │   ├── evaluate_mmiu.py
│   │   │   └── mmiu.jsonl
│   │   ├── mmmu/
│   │   │   ├── README.md
│   │   │   ├── answer_dict_val.json
│   │   │   ├── data_utils.py
│   │   │   ├── eval_utils.py
│   │   │   ├── evaluate_mmmu.py
│   │   │   └── main_eval_only.py
│   │   ├── mmmu_pro/
│   │   │   ├── README.md
│   │   │   ├── evaluate.py
│   │   │   ├── evaluate_mmmu_pro.py
│   │   │   └── prompts.yaml
│   │   ├── mmvet/
│   │   │   ├── README.md
│   │   │   └── evaluate_mmvet.py
│   │   ├── mmvetv2/
│   │   │   ├── README.md
│   │   │   └── evaluate_mmvet_v2.py
│   │   ├── mmvp/
│   │   │   ├── README.md
│   │   │   └── evaluate_mmvp.py
│   │   ├── mpdocvqa/
│   │   │   ├── README.md
│   │   │   ├── evaluate_vqa.py
│   │   │   └── infographicsvqa_eval.py
│   │   ├── mvbench/
│   │   │   ├── README.md
│   │   │   └── evaluate_mvbench.py
│   │   ├── pope/
│   │   │   ├── README.md
│   │   │   ├── eval_pope.py
│   │   │   └── evaluate_pope.py
│   │   ├── refcoco/
│   │   │   ├── README.md
│   │   │   └── evaluate_grounding.py
│   │   ├── scienceqa/
│   │   │   ├── README.md
│   │   │   └── evaluate_scienceqa.py
│   │   ├── seed/
│   │   │   ├── README.md
│   │   │   ├── calculation.py
│   │   │   └── evaluate_seed.py
│   │   ├── tiny_lvlm/
│   │   │   ├── README.md
│   │   │   ├── calculate_score.py
│   │   │   ├── evaluate_lvlm.py
│   │   │   └── tools.py
│   │   └── vqa/
│   │       ├── README.md
│   │       ├── convert_gqa_for_eval.py
│   │       ├── evaluate_vqa.py
│   │       ├── infographicsvqa_eval.py
│   │       └── textvqa_eval.py
│   ├── evaluate.sh
│   ├── internvl/
│   │   ├── conversation.py
│   │   ├── dist_utils.py
│   │   ├── model/
│   │   │   ├── __init__.py
│   │   │   ├── internlm2/
│   │   │   │   ├── configuration_internlm2.py
│   │   │   │   ├── modeling_internlm2.py
│   │   │   │   ├── tokenization_internlm2.py
│   │   │   │   └── tokenization_internlm2_fast.py
│   │   │   ├── internvl_chat/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── configuration_intern_vit.py
│   │   │   │   ├── configuration_internvl_chat.py
│   │   │   │   ├── modeling_intern_vit.py
│   │   │   │   └── modeling_internvl_chat.py
│   │   │   └── phi3/
│   │   │       ├── configuration_phi3.py
│   │   │       └── modeling_phi3.py
│   │   ├── patch/
│   │   │   ├── __init__.py
│   │   │   ├── internlm2_packed_training_patch.py
│   │   │   ├── internvit_liger_monkey_patch.py
│   │   │   ├── llama2_flash_attn_monkey_patch.py
│   │   │   ├── llama_flash_attn_monkey_patch.py
│   │   │   ├── llama_packed_training_patch.py
│   │   │   ├── llama_rmsnorm_monkey_patch.py
│   │   │   ├── pad_data_collator.py
│   │   │   ├── phi3_packed_training_patch.py
│   │   │   ├── qwen2_packed_training_patch.py
│   │   │   ├── train_dataloader_patch.py
│   │   │   └── train_sampler_patch.py
│   │   └── train/
│   │       ├── __init__.py
│   │       ├── constants.py
│   │       ├── dataset.py
│   │       ├── dataset_packed.py
│   │       ├── internvl_chat_finetune.py
│   │       ├── internvl_chat_mpo.py
│   │       ├── internvl_chat_pretrain.py
│   │       └── trainer_dpo.py
│   ├── pyproject.toml
│   ├── shell/
│   │   ├── data/
│   │   │   ├── coco_caption.json
│   │   │   ├── internvl_1_2_finetune.json
│   │   │   └── internvl_1_2_finetune_custom.json
│   │   ├── internvl1.2/
│   │   │   ├── 2nd_finetune/
│   │   │   │   ├── internvl_chat_v1_2_hermes2_yi34b_448_res_2nd_finetune_full.sh
│   │   │   │   └── internvl_chat_v1_2_hermes2_yi34b_448_res_2nd_finetune_lora.sh
│   │   │   └── hermes2_yi34b/
│   │   │       └── internvl_chat_v1_2_hermes2_yi34b_448_res_finetune.sh
│   │   ├── internvl1.5/
│   │   │   ├── 2nd_finetune/
│   │   │   │   ├── internvl_chat_v1_5_internlm2_1_8b_dynamic_res_2nd_finetune_full.sh
│   │   │   │   ├── internvl_chat_v1_5_internlm2_1_8b_dynamic_res_2nd_finetune_lora.sh
│   │   │   │   ├── internvl_chat_v1_5_internlm2_20b_dynamic_res_2nd_finetune_full.sh
│   │   │   │   ├── internvl_chat_v1_5_internlm2_20b_dynamic_res_2nd_finetune_lora.sh
│   │   │   │   ├── internvl_chat_v1_5_phi3_3_8b_dynamic_res_2nd_finetune_full.sh
│   │   │   │   └── internvl_chat_v1_5_phi3_3_8b_dynamic_res_2nd_finetune_lora.sh
│   │   │   ├── hermes2_yi34b/
│   │   │   │   ├── internvl_chat_v1_5_hermes2_yi34b_dynamic_res_finetune.sh
│   │   │   │   └── internvl_chat_v1_5_hermes2_yi34b_dynamic_res_pretrain.sh
│   │   │   ├── internlm2_1_8b/
│   │   │   │   ├── internvl_chat_v1_5_internlm2_1_8b_dynamic_res_finetune.sh
│   │   │   │   └── internvl_chat_v1_5_internlm2_1_8b_dynamic_res_pretrain.sh
│   │   │   ├── internlm2_20b/
│   │   │   │   ├── internvl_chat_v1_5_internlm2_20b_dynamic_res_finetune.sh
│   │   │   │   └── internvl_chat_v1_5_internlm2_20b_dynamic_res_pretrain.sh
│   │   │   └── phi3_3_8b/
│   │   │       ├── internvl_chat_v1_5_phi3_3_8b_dynamic_res_finetune.sh
│   │   │       └── internvl_chat_v1_5_phi3_3_8b_dynamic_res_pretrain.sh
│   │   ├── internvl2.0/
│   │   │   └── 2nd_finetune/
│   │   │       ├── internvl2_1b_qwen2_0_5b_dynamic_res_2nd_finetune_full.sh
│   │   │       ├── internvl2_1b_qwen2_0_5b_dynamic_res_2nd_finetune_lora.sh
│   │   │       ├── internvl2_26b_internlm2_20b_dynamic_res_2nd_finetune_full.sh
│   │   │       ├── internvl2_26b_internlm2_20b_dynamic_res_2nd_finetune_lora.sh
│   │   │       ├── internvl2_2b_internlm2_1_8b_dynamic_res_2nd_finetune_full.sh
│   │   │       ├── internvl2_2b_internlm2_1_8b_dynamic_res_2nd_finetune_lora.sh
│   │   │       ├── internvl2_2b_internlm2_1_8b_dynamic_res_2nd_finetune_lora_coco.sh
│   │   │       ├── internvl2_40b_hermes2_yi_34b_dynamic_res_2nd_finetune_full.sh
│   │   │       ├── internvl2_40b_hermes2_yi_34b_dynamic_res_2nd_finetune_lora.sh
│   │   │       ├── internvl2_4b_phi3_3_8b_dynamic_res_2nd_finetune_full.sh
│   │   │       ├── internvl2_4b_phi3_3_8b_dynamic_res_2nd_finetune_lora.sh
│   │   │       ├── internvl2_76b_hermes2_llama3_70b_dynamic_res_2nd_finetune_full.sh
│   │   │       ├── internvl2_76b_hermes2_llama3_70b_dynamic_res_2nd_finetune_lora.sh
│   │   │       ├── internvl2_8b_internlm2_7b_dynamic_res_2nd_finetune_full.sh
│   │   │       └── internvl2_8b_internlm2_7b_dynamic_res_2nd_finetune_lora.sh
│   │   ├── internvl2.0_mpo/
│   │   │   ├── README.md
│   │   │   └── preference_optimization/
│   │   │       └── internvl2_8b_internlm2_7b_dynamic_res_mpo_full.sh
│   │   ├── internvl2.5/
│   │   │   ├── 2nd_finetune/
│   │   │   │   ├── internvl2_5_1b_dynamic_res_2nd_finetune_full.sh
│   │   │   │   ├── internvl2_5_1b_dynamic_res_2nd_finetune_lora.sh
│   │   │   │   ├── internvl2_5_26b_dynamic_res_2nd_finetune_full.sh
│   │   │   │   ├── internvl2_5_26b_dynamic_res_2nd_finetune_lora.sh
│   │   │   │   ├── internvl2_5_2b_dynamic_res_2nd_finetune_full.sh
│   │   │   │   ├── internvl2_5_2b_dynamic_res_2nd_finetune_lora.sh
│   │   │   │   ├── internvl2_5_2b_dynamic_res_2nd_finetune_lora_coco.sh
│   │   │   │   ├── internvl2_5_38b_dynamic_res_2nd_finetune_full.sh
│   │   │   │   ├── internvl2_5_38b_dynamic_res_2nd_finetune_lora.sh
│   │   │   │   ├── internvl2_5_4b_dynamic_res_2nd_finetune_full.sh
│   │   │   │   ├── internvl2_5_4b_dynamic_res_2nd_finetune_lora.sh
│   │   │   │   ├── internvl2_5_78b_dynamic_res_2nd_finetune_full.sh
│   │   │   │   ├── internvl2_5_78b_dynamic_res_2nd_finetune_lora.sh
│   │   │   │   ├── internvl2_5_8b_dynamic_res_2nd_finetune_full.sh
│   │   │   │   └── internvl2_5_8b_dynamic_res_2nd_finetune_lora.sh
│   │   │   ├── stage1/
│   │   │   │   ├── internvl2_5_1b_qwen2_5_0_5b_dynamic_res_stage1.sh
│   │   │   │   ├── internvl2_5_26b_internlm2_5_20b_dynamic_res_stage1.sh
│   │   │   │   ├── internvl2_5_2b_internlm2_5_1_8b_dynamic_res_stage1.sh
│   │   │   │   ├── internvl2_5_38b_qwen2_5_32b_dynamic_res_stage1.sh
│   │   │   │   ├── internvl2_5_4b_qwen2_5_3b_dynamic_res_stage1.sh
│   │   │   │   ├── internvl2_5_78b_qwen2_5_72b_dynamic_res_stage1.sh
│   │   │   │   └── internvl2_5_8b_internlm2_5_7b_dynamic_res_stage1.sh
│   │   │   ├── stage1.5/
│   │   │   │   ├── internvl2_5_26b_internlm2_5_20b_dynamic_res_stage1_5.sh
│   │   │   │   └── internvl2_5_8b_internlm2_5_7b_dynamic_res_stage1_5.sh
│   │   │   └── stage2/
│   │   │       ├── internvl2_5_1b_qwen2_5_0_5b_dynamic_res_stage2.sh
│   │   │       ├── internvl2_5_26b_internlm2_5_20b_dynamic_res_stage2.sh
│   │   │       ├── internvl2_5_2b_internlm2_5_1_8b_dynamic_res_stage2.sh
│   │   │       ├── internvl2_5_38b_qwen2_5_32b_dynamic_res_stage2.sh
│   │   │       ├── internvl2_5_4b_qwen2_5_3b_dynamic_res_stage2.sh
│   │   │       ├── internvl2_5_78b_qwen2_5_72b_dynamic_res_stage2.sh
│   │   │       └── internvl2_5_8b_internlm2_5_7b_dynamic_res_stage2.sh
│   │   ├── internvl2.5_mpo/
│   │   │   └── preference_optimization/
│   │   │       ├── internvl2_5_1b_qwen2_5_0_5b_dynamic_res_mpo.sh
│   │   │       ├── internvl2_5_26b_internlm2_5_20b_dynamic_res_mpo.sh
│   │   │       ├── internvl2_5_2b_internlm2_5_1_8b_dynamic_res_mpo.sh
│   │   │       ├── internvl2_5_38b_qwen2_5_32b_dynamic_res_mpo.sh
│   │   │       ├── internvl2_5_4b_qwen2_5_3b_dynamic_res_mpo.sh
│   │   │       ├── internvl2_5_78b_qwen2_5_72b_dynamic_res_mpo.sh
│   │   │       └── internvl2_5_8b_internlm2_5_7b_dynamic_res_mpo.sh
│   │   ├── internvl3.0/
│   │   │   ├── 2nd_finetune/
│   │   │   │   ├── internvl3_14b_dynamic_res_2nd_finetune_full.sh
│   │   │   │   ├── internvl3_1b_dynamic_res_2nd_finetune_full.sh
│   │   │   │   ├── internvl3_2b_dynamic_res_2nd_finetune_full.sh
│   │   │   │   ├── internvl3_38b_dynamic_res_2nd_finetune_full.sh
│   │   │   │   ├── internvl3_78b_dynamic_res_2nd_finetune_full.sh
│   │   │   │   ├── internvl3_8b_dynamic_res_2nd_finetune_full.sh
│   │   │   │   └── internvl3_9b_dynamic_res_2nd_finetune_full.sh
│   │   │   ├── mpo/
│   │   │   │   ├── internvl3_14b_mpo.sh
│   │   │   │   ├── internvl3_1b_mpo.sh
│   │   │   │   ├── internvl3_2b_mpo.sh
│   │   │   │   ├── internvl3_38b_mpo.sh
│   │   │   │   ├── internvl3_78b_mpo.sh
│   │   │   │   ├── internvl3_8b_mpo.sh
│   │   │   │   └── internvl3_9b_mpo.sh
│   │   │   ├── mpo_data_construction/
│   │   │   │   ├── correctness_build_data.sh
│   │   │   │   └── correctness_mmpr_8b.sh
│   │   │   └── visualprm_data_construction/
│   │   │       ├── visualprm_build_data.sh
│   │   │       └── visualprm_mmpr_8b.sh
│   │   └── mini_internvl/
│   │       ├── README.md
│   │       └── domain_adaptation/
│   │           ├── internvl2_1b_qwen2_0_5b_dynamic_res_finetune_bdd.sh
│   │           ├── internvl2_1b_qwen2_0_5b_dynamic_res_finetune_drivelm.sh
│   │           ├── internvl2_1b_qwen2_0_5b_dynamic_res_finetune_medical.sh
│   │           ├── internvl2_1b_qwen2_0_5b_dynamic_res_finetune_remote.sh
│   │           ├── internvl2_2b_internlm2_1_8b_dynamic_res_finetune_bdd.sh
│   │           ├── internvl2_2b_internlm2_1_8b_dynamic_res_finetune_drivelm.sh
│   │           ├── internvl2_2b_internlm2_1_8b_dynamic_res_finetune_medical.sh
│   │           ├── internvl2_2b_internlm2_1_8b_dynamic_res_finetune_remote.sh
│   │           ├── internvl2_4b_phi3_3_8b_dynamic_res_finetune_bdd.sh
│   │           ├── internvl2_4b_phi3_3_8b_dynamic_res_finetune_drivelm.sh
│   │           ├── internvl2_4b_phi3_3_8b_dynamic_res_finetune_medical.sh
│   │           └── internvl2_4b_phi3_3_8b_dynamic_res_finetune_remote.sh
│   ├── tools/
│   │   ├── README.md
│   │   ├── convert_to_int8.py
│   │   ├── extract_mlp.py
│   │   ├── extract_video_frames.py
│   │   ├── extract_vit.py
│   │   ├── images_stitching.py
│   │   ├── internvl_custom2hf.py
│   │   ├── internvl_hf2custom.py
│   │   ├── json2jsonl.py
│   │   ├── jsonl2jsonl.py
│   │   ├── merge_lora.py
│   │   ├── reasoning_data_pipeline/
│   │   │   ├── mmpr_data_pipeline_correctness.py
│   │   │   ├── mmpr_data_pipeline_correctness_postprocess.py
│   │   │   ├── mmpr_data_pipeline_dropout_ntp.py
│   │   │   ├── utils/
│   │   │   │   ├── accuracy_reward.py
│   │   │   │   ├── constants.py
│   │   │   │   └── utils.py
│   │   │   ├── visualprm_data_pieline.py
│   │   │   └── visualprm_data_pipeline_postprocess.py
│   │   ├── replace_llm.py
│   │   └── resize_pos_embed.py
│   ├── zero_stage1_config.json
│   ├── zero_stage2_config.json
│   ├── zero_stage3_config.json
│   ├── zero_stage3_config_100b.json
│   ├── zero_stage3_config_100b_1e7_offload.json
│   ├── zero_stage3_config_100b_1e8.json
│   ├── zero_stage3_config_34b.json
│   └── zero_stage3_config_70b.json
├── internvl_chat_gpt_oss/
│   ├── README.md
│   ├── internvl/
│   │   ├── dist_utils.py
│   │   ├── model/
│   │   │   └── internvl_chat/
│   │   │       ├── __init__.py
│   │   │       ├── configuration_intern_vit.py
│   │   │       ├── configuration_internvl_chat.py
│   │   │       ├── conversation.py
│   │   │       ├── modeling_intern_vit.py
│   │   │       └── modeling_internvl_chat.py
│   │   ├── patch/
│   │   │   ├── __init__.py
│   │   │   ├── flash_sink_attn/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── flash_attn_with_sink.py
│   │   │   │   ├── flash_sink_attn.py
│   │   │   │   ├── flash_sink_attn_gpt_oss.py
│   │   │   │   ├── flash_sink_varlen_attn_gpt_oss.py
│   │   │   │   └── sliding_cache.py
│   │   │   ├── flash_sink_attn_monkey_patch.py
│   │   │   ├── pad_data_collator.py
│   │   │   ├── qwen3_flash_monkey_patch.py
│   │   │   └── train_dataloader_patch.py
│   │   ├── train/
│   │   │   ├── constants.py
│   │   │   ├── dataset.py
│   │   │   ├── dataset_packed.py
│   │   │   ├── internvl_chat_finetune.py
│   │   │   ├── internvl_chat_mpo.py
│   │   │   └── trainer_dpo.py
│   │   └── utils/
│   │       ├── s3_config.py
│   │       ├── s3_exception.py
│   │       └── s3_fileio.py
│   ├── requirements.txt
│   ├── shell/
│   │   ├── data/
│   │   │   ├── debug_mpo.json
│   │   │   └── debug_sft.json
│   │   ├── internvl3_5_gpt_oss/
│   │   │   ├── internvl3_5_gpt_oss_20b_stage0_mlp_warmup.sh
│   │   │   ├── internvl3_5_gpt_oss_20b_stage1_pretrain.sh
│   │   │   ├── internvl3_5_gpt_oss_20b_stage2_sft.sh
│   │   │   └── internvl3_5_gpt_oss_20b_stage3_mpo.sh
│   │   └── internvl3_5_qwen3/
│   │       ├── internvl3_5_14b_mpo.sh
│   │       ├── internvl3_5_14b_sft.sh
│   │       ├── internvl3_5_1b_mpo.sh
│   │       ├── internvl3_5_1b_sft.sh
│   │       ├── internvl3_5_241b_mpo.sh
│   │       ├── internvl3_5_241b_sft.sh
│   │       ├── internvl3_5_2b_mpo.sh
│   │       ├── internvl3_5_2b_sft.sh
│   │       ├── internvl3_5_30b_mpo.sh
│   │       ├── internvl3_5_30b_sft.sh
│   │       ├── internvl3_5_38b_mpo.sh
│   │       ├── internvl3_5_38b_sft.sh
│   │       ├── internvl3_5_4b_mpo.sh
│   │       ├── internvl3_5_4b_sft.sh
│   │       ├── internvl3_5_8b_mpo.sh
│   │       └── internvl3_5_8b_sft.sh
│   ├── zero_stage1_config.json
│   └── zero_stage3_config.json
├── internvl_chat_llava/
│   ├── LICENSE
│   ├── README.md
│   ├── docs/
│   │   ├── Customize_Component.md
│   │   ├── Data.md
│   │   ├── Evaluation.md
│   │   ├── LLaVA_Bench.md
│   │   ├── LLaVA_from_LLaMA2.md
│   │   ├── LoRA.md
│   │   ├── MODEL_ZOO.md
│   │   └── ScienceQA.md
│   ├── llava/
│   │   ├── __init__.py
│   │   ├── constants.py
│   │   ├── conversation.py
│   │   ├── eval/
│   │   │   ├── eval_gpt_review.py
│   │   │   ├── eval_gpt_review_bench.py
│   │   │   ├── eval_gpt_review_visual.py
│   │   │   ├── eval_pope.py
│   │   │   ├── eval_science_qa.py
│   │   │   ├── eval_science_qa_gpt4.py
│   │   │   ├── eval_science_qa_gpt4_requery.py
│   │   │   ├── eval_textvqa.py
│   │   │   ├── generate_webpage_data_from_table.py
│   │   │   ├── m4c_evaluator.py
│   │   │   ├── model_qa.py
│   │   │   ├── model_vqa.py
│   │   │   ├── model_vqa_loader.py
│   │   │   ├── model_vqa_mmbench.py
│   │   │   ├── model_vqa_science.py
│   │   │   ├── qa_baseline_gpt35.py
│   │   │   ├── run_llava.py
│   │   │   ├── summarize_gpt_review.py
│   │   │   ├── table/
│   │   │   │   ├── answer/
│   │   │   │   │   ├── answer_alpaca-13b.jsonl
│   │   │   │   │   ├── answer_bard.jsonl
│   │   │   │   │   ├── answer_gpt35.jsonl
│   │   │   │   │   ├── answer_llama-13b.jsonl
│   │   │   │   │   └── answer_vicuna-13b.jsonl
│   │   │   │   ├── caps_boxes_coco2014_val_80.jsonl
│   │   │   │   ├── model.jsonl
│   │   │   │   ├── prompt.jsonl
│   │   │   │   ├── question.jsonl
│   │   │   │   ├── review/
│   │   │   │   │   ├── review_alpaca-13b_vicuna-13b.jsonl
│   │   │   │   │   ├── review_bard_vicuna-13b.jsonl
│   │   │   │   │   ├── review_gpt35_vicuna-13b.jsonl
│   │   │   │   │   └── review_llama-13b_vicuna-13b.jsonl
│   │   │   │   ├── reviewer.jsonl
│   │   │   │   └── rule.json
│   │   │   └── webpage/
│   │   │       ├── index.html
│   │   │       ├── script.js
│   │   │       └── styles.css
│   │   ├── mm_utils.py
│   │   ├── model/
│   │   │   ├── __init__.py
│   │   │   ├── apply_delta.py
│   │   │   ├── builder.py
│   │   │   ├── consolidate.py
│   │   │   ├── language_model/
│   │   │   │   ├── llava_llama.py
│   │   │   │   ├── llava_mpt.py
│   │   │   │   └── mpt/
│   │   │   │       ├── adapt_tokenizer.py
│   │   │   │       ├── attention.py
│   │   │   │       ├── blocks.py
│   │   │   │       ├── configuration_mpt.py
│   │   │   │       ├── custom_embedding.py
│   │   │   │       ├── flash_attn_triton.py
│   │   │   │       ├── hf_prefixlm_converter.py
│   │   │   │       ├── meta_init_context.py
│   │   │   │       ├── modeling_mpt.py
│   │   │   │       ├── norm.py
│   │   │   │       └── param_init_fns.py
│   │   │   ├── llava_arch.py
│   │   │   ├── make_delta.py
│   │   │   ├── multimodal_encoder/
│   │   │   │   ├── builder.py
│   │   │   │   ├── clip_encoder.py
│   │   │   │   ├── eva_clip/
│   │   │   │   │   ├── configuration_evaclip.py
│   │   │   │   │   └── modeling_evaclip.py
│   │   │   │   ├── intern_vit_6b/
│   │   │   │   │   ├── configuration_intern_vit.py
│   │   │   │   │   ├── flash_attention.py
│   │   │   │   │   └── modeling_intern_vit.py
│   │   │   │   └── internvl_14b/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── configuration_intern_vit.py
│   │   │   │       ├── configuration_internvl.py
│   │   │   │       ├── flash_attention.py
│   │   │   │       ├── modeling_intern_vit.py
│   │   │   │       ├── modeling_internvl.py
│   │   │   │       └── modeling_qllama.py
│   │   │   ├── multimodal_projector/
│   │   │   │   └── builder.py
│   │   │   └── utils.py
│   │   ├── serve/
│   │   │   ├── __init__.py
│   │   │   ├── cli.py
│   │   │   ├── controller.py
│   │   │   ├── gradio_web_server.py
│   │   │   ├── model_worker.py
│   │   │   ├── register_worker.py
│   │   │   └── test_message.py
│   │   ├── train/
│   │   │   ├── dist_utils.py
│   │   │   ├── llama_flash_attn_monkey_patch.py
│   │   │   ├── llava_trainer.py
│   │   │   ├── train.py
│   │   │   ├── train_custom.py
│   │   │   ├── train_mem.py
│   │   │   └── train_mem_custom.py
│   │   └── utils.py
│   ├── pyproject.toml
│   ├── scripts/
│   │   ├── convert_gqa_for_eval.py
│   │   ├── convert_mmbench_for_submission.py
│   │   ├── convert_mmvet_for_eval.py
│   │   ├── convert_seed_for_submission.py
│   │   ├── convert_sqa_to_llava.py
│   │   ├── convert_sqa_to_llava_base_prompt.py
│   │   ├── convert_vizwiz_for_submission.py
│   │   ├── convert_vqav2_for_submission.py
│   │   ├── finetune.sh
│   │   ├── finetune_full_schedule.sh
│   │   ├── finetune_lora.sh
│   │   ├── finetune_qlora.sh
│   │   ├── finetune_sqa.sh
│   │   ├── merge_lora_weights.py
│   │   ├── pretrain.sh
│   │   ├── sqa_eval_batch.sh
│   │   ├── sqa_eval_gather.sh
│   │   ├── v1_5/
│   │   │   ├── eval/
│   │   │   │   ├── gqa.sh
│   │   │   │   ├── llavabench.sh
│   │   │   │   ├── mmbench.sh
│   │   │   │   ├── mmbench_cn.sh
│   │   │   │   ├── mme.sh
│   │   │   │   ├── mmvet.sh
│   │   │   │   ├── pope.sh
│   │   │   │   ├── seed.sh
│   │   │   │   ├── sqa.sh
│   │   │   │   ├── textvqa.sh
│   │   │   │   ├── vizwiz.sh
│   │   │   │   └── vqav2.sh
│   │   │   ├── finetune.sh
│   │   │   └── pretrain.sh
│   │   ├── zero1.json
│   │   ├── zero2.json
│   │   ├── zero3.json
│   │   └── zero3_offload.json
│   └── scripts_internvl/
│       ├── eval/
│       │   ├── gqa.sh
│       │   ├── llavabench.sh
│       │   ├── mmbench.sh
│       │   ├── mme.sh
│       │   ├── mmvet.sh
│       │   ├── pope.sh
│       │   ├── sqa.sh
│       │   ├── textvqa.sh
│       │   ├── vizwiz.sh
│       │   └── vqav2.sh
│       ├── finetune_internvit6b_224to336_vicuna13b.sh
│       ├── finetune_internvit6b_224to336_vicuna13b_custom_data.sh
│       ├── finetune_internvit6b_224to336_vicuna7b.sh
│       ├── finetune_internvit6b_448_v1_2_vicuna13b.sh
│       ├── finetune_internvit6b_448_v1_5_vicuna13b.sh
│       ├── finetune_internvit6b_448_vicuna13b.sh
│       ├── finetune_internvit6b_448_vicuna7b.sh
│       ├── meta/
│       │   └── custom_data.json
│       ├── pretrain_internvit6b_224to336_vicuna13b.sh
│       ├── pretrain_internvit6b_224to336_vicuna7b.sh
│       ├── pretrain_internvit6b_448_v1_2_vicuna13b.sh
│       ├── pretrain_internvit6b_448_v1_5_vicuna13b.sh
│       ├── pretrain_internvit6b_448_vicuna13b.sh
│       └── pretrain_internvit6b_448_vicuna7b.sh
├── internvl_g/
│   ├── README.md
│   ├── eval/
│   │   └── evaluate_caption.py
│   ├── evaluate.sh
│   ├── internvl/
│   │   ├── dist_utils.py
│   │   ├── model/
│   │   │   ├── __init__.py
│   │   │   ├── internvl_stage2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── configuration_intern_vit.py
│   │   │   │   ├── configuration_internvl.py
│   │   │   │   ├── flash_attention.py
│   │   │   │   ├── modeling_intern_vit.py
│   │   │   │   ├── modeling_internvl.py
│   │   │   │   └── modeling_qllama.py
│   │   │   └── internvl_stage2_retrieval/
│   │   │       ├── __init__.py
│   │   │       ├── configuration_intern_vit.py
│   │   │       ├── configuration_internvl.py
│   │   │       ├── flash_attention.py
│   │   │       ├── modeling_intern_vit.py
│   │   │       ├── modeling_internvl.py
│   │   │       └── modeling_qllama.py
│   │   └── train/
│   │       ├── __init__.py
│   │       ├── dataset.py
│   │       ├── internvl_stage2_finetune.py
│   │       └── trainer_monkey_patch.py
│   ├── shell/
│   │   ├── finetune/
│   │   │   ├── internvl_stage2_finetune_coco_364_bs1024_ep5.sh
│   │   │   ├── internvl_stage2_finetune_flickr_364_bs1024_ep10.sh
│   │   │   └── internvl_stage2_finetune_flickrcn_364_bs1024_ep10.sh
│   │   ├── head_finetune/
│   │   │   ├── internvl_stage2_finetune_coco_224_bs1024_ep5_head_4gpu.sh
│   │   │   ├── internvl_stage2_finetune_flickr_224_bs1024_ep10_head_4gpu.sh
│   │   │   └── internvl_stage2_finetune_flickrcn_224_bs1024_ep10_head_4gpu.sh
│   │   └── lora_finetune/
│   │       ├── internvl_stage2_finetune_coco_224_bs1024_ep5_lora16_4gpu.sh
│   │       ├── internvl_stage2_finetune_flickr_224_bs1024_ep10_lora16_4gpu.sh
│   │       └── internvl_stage2_finetune_flickrcn_224_bs1024_ep10_lora16_4gpu.sh
│   ├── zero_stage1_config.json
│   ├── zero_stage2_config.json
│   └── zero_stage3_config.json
├── requirements/
│   ├── classification.txt
│   ├── clip_benchmark.txt
│   ├── internvl_chat.txt
│   ├── segmentation.txt
│   └── streamlit_demo.txt
├── requirements.txt
├── segmentation/
│   ├── README.md
│   ├── configs/
│   │   ├── _base_/
│   │   │   ├── datasets/
│   │   │   │   ├── ade20k.py
│   │   │   │   ├── ade20k_504x504.py
│   │   │   │   ├── ade20k_504x504_1of16.py
│   │   │   │   ├── ade20k_504x504_1of2.py
│   │   │   │   ├── ade20k_504x504_1of4.py
│   │   │   │   ├── ade20k_504x504_1of8.py
│   │   │   │   ├── ade20k_640x640.py
│   │   │   │   ├── ade20k_896x896.py
│   │   │   │   ├── chase_db1.py
│   │   │   │   ├── cityscapes.py
│   │   │   │   ├── cityscapes_1024x1024.py
│   │   │   │   ├── cityscapes_768x768.py
│   │   │   │   ├── cityscapes_769x769.py
│   │   │   │   ├── cityscapes_832x832.py
│   │   │   │   ├── coco-stuff10k.py
│   │   │   │   ├── coco-stuff164k.py
│   │   │   │   ├── coco-stuff164k_896x896.py
│   │   │   │   ├── drive.py
│   │   │   │   ├── hrf.py
│   │   │   │   ├── isaid.py
│   │   │   │   ├── loveda.py
│   │   │   │   ├── pascal_context.py
│   │   │   │   ├── pascal_context_59.py
│   │   │   │   ├── pascal_voc12.py
│   │   │   │   ├── pascal_voc12_aug.py
│   │   │   │   ├── potsdam.py
│   │   │   │   ├── stare.py
│   │   │   │   └── vaihingen.py
│   │   │   ├── default_runtime.py
│   │   │   ├── models/
│   │   │   │   ├── ann_r50-d8.py
│   │   │   │   ├── apcnet_r50-d8.py
│   │   │   │   ├── bisenetv1_r18-d32.py
│   │   │   │   ├── bisenetv2.py
│   │   │   │   ├── ccnet_r50-d8.py
│   │   │   │   ├── cgnet.py
│   │   │   │   ├── danet_r50-d8.py
│   │   │   │   ├── deeplabv3_r50-d8.py
│   │   │   │   ├── deeplabv3_unet_s5-d16.py
│   │   │   │   ├── deeplabv3plus_r50-d8.py
│   │   │   │   ├── dmnet_r50-d8.py
│   │   │   │   ├── dnl_r50-d8.py
│   │   │   │   ├── dpt_vit-b16.py
│   │   │   │   ├── emanet_r50-d8.py
│   │   │   │   ├── encnet_r50-d8.py
│   │   │   │   ├── erfnet_fcn.py
│   │   │   │   ├── fast_scnn.py
│   │   │   │   ├── fastfcn_r50-d32_jpu_psp.py
│   │   │   │   ├── fcn_hr18.py
│   │   │   │   ├── fcn_r50-d8.py
│   │   │   │   ├── fcn_unet_s5-d16.py
│   │   │   │   ├── fpn_r50.py
│   │   │   │   ├── gcnet_r50-d8.py
│   │   │   │   ├── icnet_r50-d8.py
│   │   │   │   ├── isanet_r50-d8.py
│   │   │   │   ├── lraspp_m-v3-d8.py
│   │   │   │   ├── mask2former_beit.py
│   │   │   │   ├── nonlocal_r50-d8.py
│   │   │   │   ├── ocrnet_hr18.py
│   │   │   │   ├── ocrnet_r50-d8.py
│   │   │   │   ├── pointrend_r50.py
│   │   │   │   ├── psanet_r50-d8.py
│   │   │   │   ├── pspnet_r50-d8.py
│   │   │   │   ├── pspnet_unet_s5-d16.py
│   │   │   │   ├── segformer_mit-b0.py
│   │   │   │   ├── segmenter_vit-b16_mask.py
│   │   │   │   ├── setr_mla.py
│   │   │   │   ├── setr_naive.py
│   │   │   │   ├── setr_pup.py
│   │   │   │   ├── stdc.py
│   │   │   │   ├── twins_pcpvt-s_fpn.py
│   │   │   │   ├── twins_pcpvt-s_upernet.py
│   │   │   │   ├── upernet_beit.py
│   │   │   │   ├── upernet_convnext.py
│   │   │   │   ├── upernet_mae.py
│   │   │   │   ├── upernet_r50.py
│   │   │   │   ├── upernet_swin.py
│   │   │   │   └── upernet_vit-b16_ln_mln.py
│   │   │   └── schedules/
│   │   │       ├── schedule_10k.py
│   │   │       ├── schedule_160k.py
│   │   │       ├── schedule_20k.py
│   │   │       ├── schedule_320k.py
│   │   │       ├── schedule_40k.py
│   │   │       ├── schedule_5k.py
│   │   │       └── schedule_80k.py
│   │   └── intern_vit_6b/
│   │       ├── few_shot/
│   │       │   ├── linear_intern_vit_6b_504_10k_ade20k_bs16_lr4e-5_1of8.py
│   │       │   ├── linear_intern_vit_6b_504_20k_ade20k_bs16_lr4e-5_1of4.py
│   │       │   ├── linear_intern_vit_6b_504_40k_ade20k_bs16_lr4e-5_1of2.py
│   │       │   ├── linear_intern_vit_6b_504_5k_ade20k_bs16_lr4e-5_1of16.py
│   │       │   └── linear_intern_vit_6b_504_80k_ade20k_bs16_lr4e-5_1of1.py
│   │       ├── full_tuning/
│   │       │   └── upernet_intern_vit_6b_504_80k_ade20k_bs16_lr4e-5.py
│   │       ├── head_tuning/
│   │       │   └── upernet_intern_vit_6b_504_80k_ade20k_bs16_lr4e-5_frozen.py
│   │       └── linear_probing/
│   │           └── linear_intern_vit_6b_504_80k_ade20k_bs16_lr4e-5_frozen.py
│   ├── dist_test.sh
│   ├── dist_train.sh
│   ├── mmcv_custom/
│   │   ├── __init__.py
│   │   ├── ddp_hooks.py
│   │   └── layer_decay_optimizer_constructor.py
│   ├── mmseg_custom/
│   │   ├── __init__.py
│   │   ├── datasets/
│   │   │   ├── __init__.py
│   │   │   ├── ade.py
│   │   │   └── pipelines/
│   │   │       ├── __init__.py
│   │   │       └── transform.py
│   │   └── models/
│   │       ├── __init__.py
│   │       ├── backbones/
│   │       │   ├── __init__.py
│   │       │   ├── flash_attention.py
│   │       │   └── intern_vit_6b.py
│   │       └── decode_heads/
│   │           ├── __init__.py
│   │           └── fcn_head.py
│   ├── release.py
│   ├── slurm_test.sh
│   ├── slurm_train.sh
│   ├── test.py
│   ├── train.py
│   └── zero_configs/
│       ├── adam_fp16.json
│       ├── adam_zero1_amp.json
│       ├── adam_zero1_bf16.json
│       ├── adam_zero1_fp16.json
│       ├── adam_zero2_bf16.json
│       ├── adam_zero2_fp16.json
│       └── adam_zero3_fp16.json
├── streamlit_demo/
│   ├── .streamlit/
│   │   └── config.toml
│   ├── api.py
│   ├── app.py
│   ├── constants.py
│   ├── controller.py
│   ├── library.py
│   ├── model_worker.py
│   ├── sd_worker.py
│   └── utils.py
└── video_retrieval/
    └── test_msrvtt.py