gitextract_1lgs5ey9/

├── LICENSE
├── PrepareVicuna.md
├── README.md
├── config/
│   ├── instructblipbase_avp.yaml
│   ├── instructblipbase_stllm_conversation.yaml
│   ├── instructblipbase_stllm_qa.yaml
│   ├── minigpt4base_avp.yaml
│   └── minigpt4base_stllm_qa.yaml
├── demo.py
├── demo_gradio.py
├── prompts/
│   └── alignment.txt
├── requirement.txt
├── script/
│   ├── inference/
│   │   ├── mvbench/
│   │   │   └── test_mvbench.sh
│   │   ├── qabench/
│   │   │   ├── anet_qa.sh
│   │   │   ├── msrvtt_qa.sh
│   │   │   ├── msvd_qa.sh
│   │   │   ├── score_anet.sh
│   │   │   ├── score_msrvtt.sh
│   │   │   └── score_msvd.sh
│   │   └── vcgbench/
│   │       ├── score_consist.sh
│   │       ├── score_context.sh
│   │       ├── score_correct.sh
│   │       ├── score_detail.sh
│   │       ├── score_temporal.sh
│   │       ├── test_consist.sh
│   │       ├── test_general.sh
│   │       └── test_temporal.sh
│   └── train/
│       └── train.sh
├── stllm/
│   ├── __init__.py
│   ├── common/
│   │   ├── __init__.py
│   │   ├── config.py
│   │   ├── dist_utils.py
│   │   ├── gradcam.py
│   │   ├── logger.py
│   │   ├── optims.py
│   │   ├── registry.py
│   │   └── utils.py
│   ├── configs/
│   │   ├── datasets/
│   │   │   ├── cc_sbu/
│   │   │   │   ├── align.yaml
│   │   │   │   └── defaults.yaml
│   │   │   └── laion/
│   │   │       └── defaults.yaml
│   │   ├── default.yaml
│   │   └── models/
│   │       ├── instructblip_vicuna0.yaml
│   │       ├── instructblip_vicuna0_btadapter.yaml
│   │       ├── minigpt4_vicuna0.yaml
│   │       └── minigpt4_vicuna0_btadapter.yaml
│   ├── conversation/
│   │   ├── __init__.py
│   │   ├── conversation.py
│   │   └── mvbench_conversation.py
│   ├── datasets/
│   │   ├── __init__.py
│   │   ├── builders/
│   │   │   ├── __init__.py
│   │   │   ├── base_dataset_builder.py
│   │   │   └── image_text_pair_builder.py
│   │   ├── data_utils.py
│   │   └── datasets/
│   │       ├── __init__.py
│   │       ├── base_dataset.py
│   │       ├── caption_datasets.py
│   │       ├── cc_sbu_dataset.py
│   │       ├── dataloader_utils.py
│   │       ├── image_video_itdatasets.py
│   │       ├── instruction_data.py
│   │       ├── laion_dataset.py
│   │       └── utils.py
│   ├── models/
│   │   ├── Qformer.py
│   │   ├── __init__.py
│   │   ├── base_decoder.py
│   │   ├── base_model.py
│   │   ├── blip2.py
│   │   ├── blip2_outputs.py
│   │   ├── eva_btadapter.py
│   │   ├── eva_vit.py
│   │   ├── modeling_llama_mem.py
│   │   ├── peft_model.py
│   │   ├── st_llm.py
│   │   └── utils.py
│   ├── processors/
│   │   ├── __init__.py
│   │   ├── base_processor.py
│   │   ├── blip_processors.py
│   │   ├── randaugment.py
│   │   └── video_transform.py
│   ├── runners/
│   │   ├── __init__.py
│   │   └── runner_base.py
│   ├── tasks/
│   │   ├── __init__.py
│   │   ├── base_task.py
│   │   └── image_text_pretrain.py
│   ├── test/
│   │   ├── __init__.py
│   │   ├── gpt_evaluation/
│   │   │   ├── evaluate_activitynet_qa.py
│   │   │   ├── evaluate_benchmark_1_correctness.py
│   │   │   ├── evaluate_benchmark_2_detailed_orientation.py
│   │   │   ├── evaluate_benchmark_3_context.py
│   │   │   ├── evaluate_benchmark_4_temporal.py
│   │   │   └── evaluate_benchmark_5_consistency.py
│   │   ├── mvbench/
│   │   │   ├── mv_bench.py
│   │   │   └── mv_bench_infer.py
│   │   ├── qabench/
│   │   │   ├── activitynet_qa.py
│   │   │   ├── msrvtt_qa.py
│   │   │   └── msvd_qa.py
│   │   ├── vcgbench/
│   │   │   ├── videochatgpt_benchmark_consist.py
│   │   │   └── videochatgpt_benchmark_general.py
│   │   ├── video_transforms.py
│   │   └── video_utils.py
│   └── train/
│       ├── stllm_trainer.py
│       ├── train.py
│       ├── train_hf.py
│       ├── zero2.json
│       ├── zero3.json
│       └── zero3_offload.json
└── trainval.md