gitextract_fx4cm0n9/

├── .github/
│   └── workflows/
│       └── documentation.yml
├── .gitignore
├── FlagEmbedding/
│   ├── __init__.py
│   ├── abc/
│   │   ├── __init__.py
│   │   ├── evaluation/
│   │   │   ├── __init__.py
│   │   │   ├── arguments.py
│   │   │   ├── data_loader.py
│   │   │   ├── evaluator.py
│   │   │   ├── runner.py
│   │   │   ├── searcher.py
│   │   │   └── utils.py
│   │   ├── finetune/
│   │   │   ├── __init__.py
│   │   │   ├── embedder/
│   │   │   │   ├── AbsArguments.py
│   │   │   │   ├── AbsDataset.py
│   │   │   │   ├── AbsModeling.py
│   │   │   │   ├── AbsRunner.py
│   │   │   │   ├── AbsTrainer.py
│   │   │   │   └── __init__.py
│   │   │   └── reranker/
│   │   │       ├── AbsArguments.py
│   │   │       ├── AbsDataset.py
│   │   │       ├── AbsModeling.py
│   │   │       ├── AbsRunner.py
│   │   │       ├── AbsTrainer.py
│   │   │       └── __init__.py
│   │   └── inference/
│   │       ├── AbsEmbedder.py
│   │       ├── AbsReranker.py
│   │       └── __init__.py
│   ├── evaluation/
│   │   ├── __init__.py
│   │   ├── air_bench/
│   │   │   ├── __init__.py
│   │   │   ├── __main__.py
│   │   │   ├── arguments.py
│   │   │   ├── examples/
│   │   │   │   ├── long-doc/
│   │   │   │   │   ├── arxiv-gemini.jsonl
│   │   │   │   │   ├── arxiv-gpt3.jsonl
│   │   │   │   │   ├── arxiv-llama2.jsonl
│   │   │   │   │   ├── arxiv-llm-survey.jsonl
│   │   │   │   │   ├── book-a-brief-history-of-time_stephen-hawking.jsonl
│   │   │   │   │   ├── book-origin-of-species_darwin.jsonl
│   │   │   │   │   ├── healthcare-pubmed_100k-200k_1.jsonl
│   │   │   │   │   ├── healthcare-pubmed_100k-200k_2.jsonl
│   │   │   │   │   ├── healthcare-pubmed_100k-200k_3.jsonl
│   │   │   │   │   ├── healthcare-pubmed_30k-40k_10-merged.jsonl
│   │   │   │   │   ├── healthcare-pubmed_40k-50k_5-merged.jsonl
│   │   │   │   │   ├── law-lex_files_300k-400k.jsonl
│   │   │   │   │   ├── law-lex_files_400k-500k.jsonl
│   │   │   │   │   ├── law-lex_files_500k-600k.jsonl
│   │   │   │   │   └── law-lex_files_600k-700k.jsonl
│   │   │   │   └── qa/
│   │   │   │       ├── arxiv.jsonl
│   │   │   │       ├── finance.jsonl
│   │   │   │       ├── healthcare.jsonl
│   │   │   │       ├── law.jsonl
│   │   │   │       ├── msmarco.jsonl
│   │   │   │       ├── news.jsonl
│   │   │   │       ├── web.jsonl
│   │   │   │       └── wiki.jsonl
│   │   │   └── runner.py
│   │   ├── beir/
│   │   │   ├── __init__.py
│   │   │   ├── __main__.py
│   │   │   ├── arguments.py
│   │   │   ├── data_loader.py
│   │   │   ├── evaluator.py
│   │   │   ├── prompts.py
│   │   │   └── runner.py
│   │   ├── bright/
│   │   │   ├── __init__.py
│   │   │   ├── __main__.py
│   │   │   ├── arguments.py
│   │   │   ├── data_loader.py
│   │   │   ├── prompts.py
│   │   │   ├── runner.py
│   │   │   └── searcher.py
│   │   ├── custom/
│   │   │   ├── __init__.py
│   │   │   ├── __main__.py
│   │   │   ├── data_loader.py
│   │   │   └── runner.py
│   │   ├── miracl/
│   │   │   ├── __init__.py
│   │   │   ├── __main__.py
│   │   │   ├── data_loader.py
│   │   │   └── runner.py
│   │   ├── mkqa/
│   │   │   ├── __init__.py
│   │   │   ├── __main__.py
│   │   │   ├── data_loader.py
│   │   │   ├── evaluator.py
│   │   │   ├── runner.py
│   │   │   └── utils/
│   │   │       ├── compute_metrics.py
│   │   │       └── normalize_text.py
│   │   ├── mldr/
│   │   │   ├── __init__.py
│   │   │   ├── __main__.py
│   │   │   ├── data_loader.py
│   │   │   └── runner.py
│   │   ├── msmarco/
│   │   │   ├── __init__.py
│   │   │   ├── __main__.py
│   │   │   ├── data_loader.py
│   │   │   └── runner.py
│   │   └── mteb/
│   │       ├── __init__.py
│   │       ├── __main__.py
│   │       ├── arguments.py
│   │       ├── examples/
│   │       │   ├── AmazonCounterfactualClassification.csv
│   │       │   ├── AmazonPolarityClassification.csv
│   │       │   ├── AmazonReviewsClassification.csv
│   │       │   ├── ArguAna.csv
│   │       │   ├── ArxivClusteringP2P.csv
│   │       │   ├── ArxivClusteringS2S.csv
│   │       │   ├── AskUbuntuDupQuestions.csv
│   │       │   ├── BIOSSES.csv
│   │       │   ├── Banking77Classification.csv
│   │       │   ├── BiorxivClusteringP2P.csv
│   │       │   ├── BiorxivClusteringS2S.csv
│   │       │   ├── CQADupstack.csv
│   │       │   ├── CQADupstackRetrieval.csv
│   │       │   ├── ClimateFEVER.csv
│   │       │   ├── DBPedia.csv
│   │       │   ├── EmotionClassification.csv
│   │       │   ├── FEVER.csv
│   │       │   ├── FiQA2018.csv
│   │       │   ├── HotpotQA.csv
│   │       │   ├── ImdbClassification.csv
│   │       │   ├── MSMARCO.csv
│   │       │   ├── MTOPDomainClassification.csv
│   │       │   ├── MTOPIntentClassification.csv
│   │       │   ├── MassiveIntentClassification.csv
│   │       │   ├── MassiveScenarioClassification.csv
│   │       │   ├── MedrxivClusteringP2P.csv
│   │       │   ├── MedrxivClusteringS2S.csv
│   │       │   ├── MindSmallReranking.csv
│   │       │   ├── NFCorpus.csv
│   │       │   ├── NQ.csv
│   │       │   ├── QuoraRetrieval.csv
│   │       │   ├── RedditClustering.csv
│   │       │   ├── RedditClusteringP2P.csv
│   │       │   ├── SCIDOCS.csv
│   │       │   ├── SICK-R.csv
│   │       │   ├── STS12.csv
│   │       │   ├── STS13.csv
│   │       │   ├── STS14.csv
│   │       │   ├── STS15.csv
│   │       │   ├── STS16.csv
│   │       │   ├── STS17.csv
│   │       │   ├── STS22.csv
│   │       │   ├── STSBenchmark.csv
│   │       │   ├── SciDocsRR.csv
│   │       │   ├── SciFact.csv
│   │       │   ├── SprintDuplicateQuestions.csv
│   │       │   ├── StackExchangeClustering.csv
│   │       │   ├── StackExchangeClusteringP2P.csv
│   │       │   ├── StackOverflowDupQuestions.csv
│   │       │   ├── SummEval.csv
│   │       │   ├── TRECCOVID.csv
│   │       │   ├── Touche2020.csv
│   │       │   ├── ToxicConversationsClassification.csv
│   │       │   ├── TweetSentimentExtractionClassification.csv
│   │       │   ├── TwentyNewsgroupsClustering.csv
│   │       │   ├── TwitterSemEval2015.csv
│   │       │   └── TwitterURLCorpus.csv
│   │       ├── prompts.py
│   │       ├── runner.py
│   │       └── searcher.py
│   ├── finetune/
│   │   ├── __init__.py
│   │   ├── embedder/
│   │   │   ├── __init__.py
│   │   │   ├── decoder_only/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── base/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── __main__.py
│   │   │   │   │   ├── arguments.py
│   │   │   │   │   ├── load_model.py
│   │   │   │   │   ├── modeling.py
│   │   │   │   │   ├── runner.py
│   │   │   │   │   └── trainer.py
│   │   │   │   └── icl/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── __main__.py
│   │   │   │       ├── arguments.py
│   │   │   │       ├── dataset.py
│   │   │   │       ├── load_model.py
│   │   │   │       ├── modeling.py
│   │   │   │       ├── runner.py
│   │   │   │       └── trainer.py
│   │   │   └── encoder_only/
│   │   │       ├── __init__.py
│   │   │       ├── base/
│   │   │       │   ├── __init__.py
│   │   │       │   ├── __main__.py
│   │   │       │   ├── modeling.py
│   │   │       │   ├── runner.py
│   │   │       │   └── trainer.py
│   │   │       └── m3/
│   │   │           ├── __init__.py
│   │   │           ├── __main__.py
│   │   │           ├── arguments.py
│   │   │           ├── modeling.py
│   │   │           ├── runner.py
│   │   │           └── trainer.py
│   │   └── reranker/
│   │       ├── __init__.py
│   │       ├── decoder_only/
│   │       │   ├── __init__.py
│   │       │   ├── base/
│   │       │   │   ├── __init__.py
│   │       │   │   ├── __main__.py
│   │       │   │   ├── arguments.py
│   │       │   │   ├── load_model.py
│   │       │   │   ├── modeling.py
│   │       │   │   ├── runner.py
│   │       │   │   └── trainer.py
│   │       │   └── layerwise/
│   │       │       ├── __init__.py
│   │       │       ├── __main__.py
│   │       │       ├── arguments.py
│   │       │       ├── configuration_minicpm_reranker.py
│   │       │       ├── load_model.py
│   │       │       ├── modeling.py
│   │       │       ├── modeling_minicpm_reranker.py
│   │       │       ├── runner.py
│   │       │       └── trainer.py
│   │       └── encoder_only/
│   │           ├── __init__.py
│   │           └── base/
│   │               ├── __init__.py
│   │               ├── __main__.py
│   │               ├── modeling.py
│   │               ├── runner.py
│   │               └── trainer.py
│   ├── inference/
│   │   ├── __init__.py
│   │   ├── auto_embedder.py
│   │   ├── auto_reranker.py
│   │   ├── embedder/
│   │   │   ├── __init__.py
│   │   │   ├── decoder_only/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── base.py
│   │   │   │   └── icl.py
│   │   │   ├── encoder_only/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── base.py
│   │   │   │   └── m3.py
│   │   │   └── model_mapping.py
│   │   └── reranker/
│   │       ├── __init__.py
│   │       ├── decoder_only/
│   │       │   ├── __init__.py
│   │       │   ├── base.py
│   │       │   ├── layerwise.py
│   │       │   ├── lightweight.py
│   │       │   └── models/
│   │       │       ├── __init__.py
│   │       │       ├── configuration_minicpm_reranker.py
│   │       │       ├── gemma_config.py
│   │       │       ├── gemma_model.py
│   │       │       └── modeling_minicpm_reranker.py
│   │       ├── encoder_only/
│   │       │   ├── __init__.py
│   │       │   └── base.py
│   │       └── model_mapping.py
│   └── utils/
│       ├── __init__.py
│       └── transformers_compat.py
├── LICENSE
├── Manifest.in
├── README.md
├── README_zh.md
├── Tutorials/
│   ├── 1_Embedding/
│   │   ├── 1.1_Intro&Inference.ipynb
│   │   ├── 1.2.1_BGE_Series.ipynb
│   │   ├── 1.2.2_Auto_Embedder.ipynb
│   │   ├── 1.2.3_BGE_v1&1.5.ipynb
│   │   ├── 1.2.4_BGE-M3.ipynb
│   │   ├── 1.2.5_BGE_EN_ICL.ipynb
│   │   ├── 1.2.6_BGE_VL.ipynb
│   │   └── 1.2.7_BGE_Code_v1.ipynb
│   ├── 2_Metrics/
│   │   ├── 2.1_Similarity_Metrics.ipynb
│   │   └── 2.2_Eval_Metrics.ipynb
│   ├── 3_Indexing/
│   │   ├── 3.1.1_Intro_to_Faiss.ipynb
│   │   ├── 3.1.2_Faiss_GPU.ipynb
│   │   ├── 3.1.3_Faiss_Indexes.ipynb
│   │   ├── 3.1.4_Faiss_Quantizers.ipynb
│   │   └── 3.1.5_Faiss_Index_Choosing.ipynb
│   ├── 4_Evaluation/
│   │   ├── 4.1.1_Evaluation_MSMARCO.ipynb
│   │   ├── 4.2.1_MTEB_Intro.ipynb
│   │   ├── 4.2.2_MTEB_Leaderboard.ipynb
│   │   ├── 4.2.3_C-MTEB.ipynb
│   │   ├── 4.3.1_Sentence_Transformers_Eval.ipynb
│   │   ├── 4.4.1_BEIR.ipynb
│   │   ├── 4.5.1_MIRACL.ipynb
│   │   ├── 4.5.2_MLDR.ipynb
│   │   └── utils/
│   │       ├── compute_metrics.py
│   │       └── normalize_text.py
│   ├── 5_Reranking/
│   │   ├── 5.1_Intro.ipynb
│   │   ├── 5.2_BGE_Reranker.ipynb
│   │   └── 5.3_Reranker_Eval.ipynb
│   ├── 6_RAG/
│   │   ├── 6.1_RAG_From_Scratch.ipynb
│   │   ├── 6.2_RAG_LangChain.ipynb
│   │   └── 6.3_RAG_LlamaIndex.ipynb
│   ├── 7_Fine-tuning/
│   │   ├── 7.1.1_Data_preparation.ipynb
│   │   ├── 7.1.2_Fine-tune.ipynb
│   │   ├── 7.1.3_Eval_FT_Model.ipynb
│   │   ├── 7.2.1_Hard_Negative_Mining.ipynb
│   │   └── config/
│   │       ├── ds_stage0.json
│   │       └── ds_stage1.json
│   ├── README.md
│   └── quick_start.ipynb
├── dataset/
│   └── README.md
├── docs/
│   ├── Makefile
│   ├── README.md
│   ├── make.bat
│   ├── requirements.txt
│   └── source/
│       ├── API/
│       │   ├── abc/
│       │   │   ├── evaluation/
│       │   │   │   ├── arguments.rst
│       │   │   │   ├── data_loader.rst
│       │   │   │   ├── evaluator.rst
│       │   │   │   ├── runner.rst
│       │   │   │   └── searcher.rst
│       │   │   ├── evaluation.rst
│       │   │   ├── finetune/
│       │   │   │   ├── embedder/
│       │   │   │   │   ├── AbsArguments.rst
│       │   │   │   │   ├── AbsDataset.rst
│       │   │   │   │   ├── AbsModeling.rst
│       │   │   │   │   ├── AbsRunner.rst
│       │   │   │   │   └── AbsTrainer.rst
│       │   │   │   ├── embedder.rst
│       │   │   │   ├── reranker/
│       │   │   │   │   ├── AbsArguments.rst
│       │   │   │   │   ├── AbsDataset.rst
│       │   │   │   │   ├── AbsModeling.rst
│       │   │   │   │   ├── AbsRunner.rst
│       │   │   │   │   └── AbsTrainer.rst
│       │   │   │   └── reranker.rst
│       │   │   ├── finetune.rst
│       │   │   ├── inference/
│       │   │   │   ├── AbsEmbedder.rst
│       │   │   │   └── AbsReranker.rst
│       │   │   └── inference.rst
│       │   ├── abc.rst
│       │   ├── evaluation/
│       │   │   ├── airbench/
│       │   │   │   ├── arguments.rst
│       │   │   │   └── runner.rst
│       │   │   ├── airbench.rst
│       │   │   ├── beir/
│       │   │   │   ├── arguments.rst
│       │   │   │   ├── data_loader.rst
│       │   │   │   ├── evaluator.rst
│       │   │   │   └── runner.rst
│       │   │   ├── beir.rst
│       │   │   ├── miracl/
│       │   │   │   ├── data_loader.rst
│       │   │   │   └── runner.rst
│       │   │   ├── miracl.rst
│       │   │   ├── mkqa/
│       │   │   │   ├── data_loader.rst
│       │   │   │   ├── evaluator.rst
│       │   │   │   └── runner.rst
│       │   │   ├── mkqa.rst
│       │   │   ├── mldr/
│       │   │   │   ├── data_loader.rst
│       │   │   │   └── runner.rst
│       │   │   ├── mldr.rst
│       │   │   ├── msmarco/
│       │   │   │   ├── data_loader.rst
│       │   │   │   └── runner.rst
│       │   │   ├── msmarco.rst
│       │   │   ├── mteb/
│       │   │   │   ├── arguments.rst
│       │   │   │   ├── runner.rst
│       │   │   │   └── searcher.rst
│       │   │   └── mteb.rst
│       │   ├── evaluation.rst
│       │   ├── finetune/
│       │   │   ├── embedder/
│       │   │   │   ├── decoder_only/
│       │   │   │   │   ├── base/
│       │   │   │   │   │   ├── arguments.rst
│       │   │   │   │   │   ├── modeling.rst
│       │   │   │   │   │   ├── runner.rst
│       │   │   │   │   │   └── trainer.rst
│       │   │   │   │   ├── base.rst
│       │   │   │   │   ├── icl/
│       │   │   │   │   │   ├── arguments.rst
│       │   │   │   │   │   ├── dataset.rst
│       │   │   │   │   │   ├── modeling.rst
│       │   │   │   │   │   ├── runner.rst
│       │   │   │   │   │   └── trainer.rst
│       │   │   │   │   └── icl.rst
│       │   │   │   ├── decoder_only.rst
│       │   │   │   ├── encoder_only/
│       │   │   │   │   ├── base/
│       │   │   │   │   │   ├── modeling.rst
│       │   │   │   │   │   ├── runner.rst
│       │   │   │   │   │   └── trainer.rst
│       │   │   │   │   ├── base.rst
│       │   │   │   │   ├── m3/
│       │   │   │   │   │   ├── arguments.rst
│       │   │   │   │   │   ├── modeling.rst
│       │   │   │   │   │   ├── runner.rst
│       │   │   │   │   │   └── trainer.rst
│       │   │   │   │   └── m3.rst
│       │   │   │   └── encoder_only.rst
│       │   │   ├── embedder.rst
│       │   │   ├── reranker/
│       │   │   │   ├── decoder_only/
│       │   │   │   │   ├── base/
│       │   │   │   │   │   ├── arguments.rst
│       │   │   │   │   │   ├── modeling.rst
│       │   │   │   │   │   ├── runner.rst
│       │   │   │   │   │   └── trainer.rst
│       │   │   │   │   ├── base.rst
│       │   │   │   │   ├── layerwise/
│       │   │   │   │   │   ├── arguments.rst
│       │   │   │   │   │   ├── modeling.rst
│       │   │   │   │   │   ├── runner.rst
│       │   │   │   │   │   └── trainer.rst
│       │   │   │   │   └── layerwise.rst
│       │   │   │   ├── decoder_only.rst
│       │   │   │   ├── encoder_only/
│       │   │   │   │   ├── base/
│       │   │   │   │   │   ├── modeling.rst
│       │   │   │   │   │   ├── runner.rst
│       │   │   │   │   │   └── trainer.rst
│       │   │   │   │   └── base.rst
│       │   │   │   └── encoder_only.rst
│       │   │   └── reranker.rst
│       │   ├── finetune.rst
│       │   ├── index.rst
│       │   ├── inference/
│       │   │   ├── FlagAutoModel.rst
│       │   │   ├── FlagAutoReranker.rst
│       │   │   ├── embedder/
│       │   │   │   ├── decoder_only/
│       │   │   │   │   ├── BaseLLMEmbedder.rst
│       │   │   │   │   └── ICLLLMEmbedder.rst
│       │   │   │   ├── embedder.rst
│       │   │   │   └── encoder_only/
│       │   │   │       ├── BaseEmbedder.rst
│       │   │   │       └── M3Embedder.rst
│       │   │   └── reranker/
│       │   │       ├── decoder_only/
│       │   │       │   ├── BaseLLMReranker.rst
│       │   │       │   ├── LayerWiseLLMReranker.rst
│       │   │       │   └── LightweightLLMReranker.rst
│       │   │       ├── encoder_only/
│       │   │       │   └── BaseReranker.rst
│       │   │       └── reranker.rst
│       │   └── inference.rst
│       ├── C-MTEB.rst
│       ├── FAQ/
│       │   └── index.rst
│       ├── Introduction/
│       │   ├── IR.rst
│       │   ├── embedder.rst
│       │   ├── index.rst
│       │   ├── installation.rst
│       │   ├── overview.rst
│       │   ├── quick_start.rst
│       │   ├── reranker.rst
│       │   ├── retrieval_demo.ipynb
│       │   └── similarity.rst
│       ├── _static/
│       │   └── css/
│       │       └── custom.css
│       ├── bge/
│       │   ├── bge_code.rst
│       │   ├── bge_icl.rst
│       │   ├── bge_m3.rst
│       │   ├── bge_reranker.rst
│       │   ├── bge_reranker_v2.rst
│       │   ├── bge_v1_v1.5.rst
│       │   ├── bge_vl.rst
│       │   └── index.rst
│       ├── community/
│       │   └── index.rst
│       ├── conf.py
│       ├── index.rst
│       └── tutorial/
│           ├── 1_Embedding/
│           │   ├── 1.1.1.ipynb
│           │   ├── 1.2.1.ipynb
│           │   ├── 1.2.2.ipynb
│           │   ├── 1.2.3.ipynb
│           │   ├── 1.2.4.ipynb
│           │   └── 1.2.5.ipynb
│           ├── 1_Embedding.rst
│           ├── 2_Metrics/
│           │   ├── 2.1.ipynb
│           │   └── 2.2.ipynb
│           ├── 2_Metrics.rst
│           ├── 3_Indexing/
│           │   ├── 3.1.1.ipynb
│           │   ├── 3.1.2.ipynb
│           │   ├── 3.1.3.ipynb
│           │   ├── 3.1.4.ipynb
│           │   └── 3.1.5.ipynb
│           ├── 3_Indexing.rst
│           ├── 4_Evaluation/
│           │   ├── 4.1.1.ipynb
│           │   ├── 4.2.1.ipynb
│           │   ├── 4.2.2.ipynb
│           │   ├── 4.2.3.ipynb
│           │   ├── 4.3.1.ipynb
│           │   ├── 4.4.1.ipynb
│           │   ├── 4.5.1.ipynb
│           │   └── 4.5.2.ipynb
│           ├── 4_Evaluation.rst
│           ├── 5_Reranking/
│           │   ├── 5.1.ipynb
│           │   ├── 5.2.ipynb
│           │   └── 5.3.ipynb
│           ├── 5_Reranking.rst
│           ├── 6_RAG/
│           │   ├── 6.1.ipynb
│           │   ├── 6.2.ipynb
│           │   └── 6.3.ipynb
│           ├── 6_RAG.rst
│           ├── 7_Finetuning/
│           │   ├── 7.1.1.ipynb
│           │   ├── 7.1.2.ipynb
│           │   ├── 7.1.3.ipynb
│           │   └── 7.2.1.ipynb
│           ├── 7_Finetuning.rst
│           └── index.rst
├── examples/
│   ├── README.md
│   ├── evaluation/
│   │   ├── README.md
│   │   ├── air_bench/
│   │   │   └── eval_air_bench.sh
│   │   ├── beir/
│   │   │   └── eval_beir.sh
│   │   ├── bright/
│   │   │   └── eval_bright_short.sh
│   │   ├── miracl/
│   │   │   └── eval_miracl.sh
│   │   ├── mkqa/
│   │   │   └── eval_mkqa.sh
│   │   ├── mldr/
│   │   │   └── eval_mldr.sh
│   │   ├── msmarco/
│   │   │   └── eval_msmarco.sh
│   │   └── mteb/
│   │       └── eval_mteb.sh
│   ├── finetune/
│   │   ├── ds_stage0.json
│   │   ├── ds_stage1.json
│   │   ├── embedder/
│   │   │   ├── README.md
│   │   │   ├── decoder_only/
│   │   │   │   ├── base.sh
│   │   │   │   ├── base_same_dataset.sh
│   │   │   │   └── icl_same_dataset.sh
│   │   │   ├── encoder_only/
│   │   │   │   ├── base.sh
│   │   │   │   ├── base_same_dataset.sh
│   │   │   │   ├── m3.sh
│   │   │   │   └── m3_same_dataset.sh
│   │   │   └── example_data/
│   │   │       ├── classification-no_in_batch_neg/
│   │   │       │   ├── AmazonClassification.jsonl
│   │   │       │   └── Banking77Classification.jsonl
│   │   │       ├── clustering-no_in_batch_neg/
│   │   │       │   ├── arXiv_title.jsonl
│   │   │       │   └── bioRXiv_title.jsonl
│   │   │       ├── retrieval/
│   │   │       │   ├── msmarco.jsonl
│   │   │       │   ├── nli.jsonl
│   │   │       │   └── nq.jsonl
│   │   │       └── sts/
│   │   │           └── sts.jsonl
│   │   └── reranker/
│   │       ├── README.md
│   │       ├── decoder_only/
│   │       │   ├── base.sh
│   │       │   └── layerwise.sh
│   │       ├── encoder_only/
│   │       │   └── base.sh
│   │       └── example_data/
│   │           ├── normal/
│   │           │   └── examples.jsonl
│   │           └── prompt_based/
│   │               └── examples.jsonl
│   └── inference/
│       ├── embedder/
│       │   ├── README.md
│       │   ├── decoder_only/
│       │   │   ├── auto_base_multi_devices.py
│       │   │   ├── auto_base_single_device.py
│       │   │   ├── auto_icl_multi_devices.py
│       │   │   ├── auto_icl_single_device.py
│       │   │   ├── base_multi_devices.py
│       │   │   ├── base_single_device.py
│       │   │   ├── icl_multi_devices.py
│       │   │   └── icl_single_device.py
│       │   └── encoder_only/
│       │       ├── auto_base_multi_devices.py
│       │       ├── auto_base_single_device.py
│       │       ├── auto_m3_multi_devices.py
│       │       ├── auto_m3_single_device.py
│       │       ├── base_multi_devices.py
│       │       ├── base_single_device.py
│       │       ├── m3_multi_devices.py
│       │       ├── m3_multi_devices_compute_score.py
│       │       ├── m3_single_device.py
│       │       └── m3_single_device_compute_score.py
│       └── reranker/
│           ├── README.md
│           ├── decoder_only/
│           │   ├── auto_base_multi_devices.py
│           │   ├── auto_base_single_device.py
│           │   ├── auto_layerwise_multi_devices.py
│           │   ├── auto_layerwise_single_device.py
│           │   ├── auto_lightweight_multi_devices.py
│           │   ├── auto_lightweight_single_device.py
│           │   ├── base_multi_devices.py
│           │   ├── base_single_device.py
│           │   ├── layerwise_multi_devices.py
│           │   ├── layerwise_single_device.py
│           │   ├── lightweight_multi_devices.py
│           │   └── lightweight_single_device.py
│           └── encoder_only/
│               ├── auto_base_multi_devices.py
│               ├── auto_base_single_device.py
│               ├── base_multi_devices.py
│               └── base_single_device.py
├── research/
│   ├── BGE_Coder/
│   │   ├── README.md
│   │   ├── data_generation/
│   │   │   ├── constant.py
│   │   │   ├── corpus_generator.py
│   │   │   ├── format_generated_examples.py
│   │   │   ├── llm.py
│   │   │   ├── run_generation.py
│   │   │   ├── search.py
│   │   │   ├── triplet_generator.py
│   │   │   └── utils.py
│   │   └── evaluation/
│   │       ├── coderag_eval/
│   │       │   ├── eval.sh
│   │       │   ├── prepare_data.sh
│   │       │   └── test/
│   │       │       ├── arguments.py
│   │       │       ├── create/
│   │       │       │   ├── code_search_net.py
│   │       │       │   ├── ds1000.py
│   │       │       │   ├── general_programming.py
│   │       │       │   ├── humaneval.py
│   │       │       │   ├── live_code_bench.py
│   │       │       │   ├── mbpp.py
│   │       │       │   ├── odex.py
│   │       │       │   ├── repoeval.py
│   │       │       │   ├── repoeval_repo.py
│   │       │       │   ├── swebench.py
│   │       │       │   ├── swebench_repo.py
│   │       │       │   └── utils.py
│   │       │       ├── main.py
│   │       │       └── prompts.py
│   │       └── coir_eval/
│   │           ├── arguments.py
│   │           ├── eval.sh
│   │           ├── main.py
│   │           └── prompts.py
│   ├── BGE_M3/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── arguments.py
│   │   ├── data.py
│   │   ├── modeling.py
│   │   ├── run.py
│   │   ├── split_data_by_length.py
│   │   └── trainer.py
│   ├── BGE_Reasoner/
│   │   └── README.md
│   ├── BGE_VL/
│   │   ├── LICENSE
│   │   ├── README.md
│   │   ├── eval/
│   │   │   ├── data/
│   │   │   │   ├── circo_corpus.jsonl
│   │   │   │   ├── circo_query.jsonl
│   │   │   │   ├── fashioniq_dress_corpus.jsonl
│   │   │   │   ├── fashioniq_dress_query_val.jsonl
│   │   │   │   ├── fashioniq_shirt_corpus.jsonl
│   │   │   │   ├── fashioniq_shirt_query_val.jsonl
│   │   │   │   ├── fashioniq_toptee_corpus.jsonl
│   │   │   │   └── fashioniq_toptee_query_val.jsonl
│   │   │   ├── eval_Circo.py
│   │   │   ├── eval_fashioniq.py
│   │   │   ├── flag_dataset.py
│   │   │   ├── flag_mmret.py
│   │   │   └── results/
│   │   │       ├── mmret_base_circo.json
│   │   │       └── mmret_large_circo.json
│   │   ├── modeling_MMRet_CLIP.py
│   │   └── retrieval_demo.ipynb
│   ├── BGE_VL_Screenshot/
│   │   └── README.md
│   ├── C_MTEB/
│   │   ├── C_MTEB/
│   │   │   ├── __init__.py
│   │   │   └── tasks/
│   │   │       ├── Classification.py
│   │   │       ├── Clustering.py
│   │   │       ├── MultiLongDocRetrieval.py
│   │   │       ├── PairClassification.py
│   │   │       ├── Reranking.py
│   │   │       ├── Retrieval.py
│   │   │       ├── STS.py
│   │   │       └── __init__.py
│   │   ├── MKQA/
│   │   │   ├── README.md
│   │   │   ├── dense_retrieval/
│   │   │   │   ├── step0-generate_embedding.py
│   │   │   │   ├── step1-search_results.py
│   │   │   │   └── step2-eval_dense_mkqa.py
│   │   │   ├── hybrid_retrieval/
│   │   │   │   ├── step0-hybrid_search_results.py
│   │   │   │   └── step1-eval_hybrid_mkqa.py
│   │   │   ├── multi_vector_rerank/
│   │   │   │   ├── hybrid_all_results.py
│   │   │   │   ├── step0-rerank_results.py
│   │   │   │   └── step1-eval_rerank_mkqa.py
│   │   │   ├── sparse_retrieval/
│   │   │   │   ├── bm25_baseline.py
│   │   │   │   ├── bm25_baseline_same_tokenizer.py
│   │   │   │   ├── step0-encode_query-and-corpus.py
│   │   │   │   ├── step1-search_results.py
│   │   │   │   └── step2-eval_sparse_mkqa.py
│   │   │   └── utils/
│   │   │       ├── __init__.py
│   │   │       ├── evaluation.py
│   │   │       └── normalize_text.py
│   │   ├── MLDR/
│   │   │   ├── README.md
│   │   │   ├── dense_retrieval/
│   │   │   │   ├── step0-generate_embedding.py
│   │   │   │   ├── step1-search_results.py
│   │   │   │   └── step2-eval_dense_mldr.py
│   │   │   ├── hybrid_retrieval/
│   │   │   │   ├── step0-hybrid_search_results.py
│   │   │   │   └── step1-eval_hybrid_mldr.py
│   │   │   ├── mteb_dense_eval/
│   │   │   │   ├── eval_MLDR.py
│   │   │   │   └── flag_dres_model.py
│   │   │   ├── multi_vector_rerank/
│   │   │   │   ├── hybrid_all_results.py
│   │   │   │   ├── step0-rerank_results.py
│   │   │   │   └── step1-eval_rerank_mldr.py
│   │   │   └── sparse_retrieval/
│   │   │       ├── bm25_baseline.py
│   │   │       ├── bm25_baseline_same_tokenizer.py
│   │   │       ├── step0-encode_query-and-corpus.py
│   │   │       ├── step1-search_results.py
│   │   │       └── step2-eval_sparse_mldr.py
│   │   ├── README.md
│   │   ├── eval_C-MTEB.py
│   │   ├── eval_MTEB.py
│   │   ├── eval_cross_encoder.py
│   │   ├── flag_dres_model.py
│   │   ├── setup.py
│   │   └── summarize_results.py
│   ├── LLARA/
│   │   ├── README.md
│   │   ├── data/
│   │   │   ├── finetune/
│   │   │   │   └── toy_finetune_data.jsonl
│   │   │   └── pretrain/
│   │   │       └── toy_pretrain_data.jsonl
│   │   ├── finetune/
│   │   │   ├── __init__.py
│   │   │   ├── arguments.py
│   │   │   ├── data.py
│   │   │   ├── load_model.py
│   │   │   ├── modeling.py
│   │   │   ├── run.py
│   │   │   └── trainer.py
│   │   ├── pretrain/
│   │   │   ├── __init__.py
│   │   │   ├── arguments.py
│   │   │   ├── data.py
│   │   │   ├── load_model.py
│   │   │   ├── modeling.py
│   │   │   ├── run.py
│   │   │   └── trainer.py
│   │   └── stage1.json
│   ├── LM_Cocktail/
│   │   ├── LM_Cocktail/
│   │   │   ├── __init__.py
│   │   │   ├── cocktail.py
│   │   │   └── utils.py
│   │   ├── README.md
│   │   ├── embedder_examples.json
│   │   ├── llm_examples.json
│   │   └── setup.py
│   ├── Long_LLM/
│   │   ├── activation_beacon/
│   │   │   ├── README.md
│   │   │   ├── data/
│   │   │   │   ├── config/
│   │   │   │   │   ├── code.json
│   │   │   │   │   ├── even.json
│   │   │   │   │   ├── fsdp-offload.yaml
│   │   │   │   │   ├── fsdp.yaml
│   │   │   │   │   ├── slimpajama.json
│   │   │   │   │   ├── zero3-infer-offload.yaml
│   │   │   │   │   └── zero3-infer.yaml
│   │   │   │   ├── deepspeed/
│   │   │   │   │   ├── stage2-offload.json
│   │   │   │   │   ├── stage2.json
│   │   │   │   │   ├── stage3-offload-optim.json
│   │   │   │   │   ├── stage3-offload.json
│   │   │   │   │   └── stage3.json
│   │   │   │   └── toy/
│   │   │   │       └── infbench.json
│   │   │   ├── examples/
│   │   │   │   ├── evaluation.md
│   │   │   │   └── training.md
│   │   │   ├── main/
│   │   │   │   ├── eval_generation.py
│   │   │   │   ├── eval_infbench.py
│   │   │   │   ├── eval_lm.py
│   │   │   │   ├── eval_longbench.py
│   │   │   │   ├── eval_mmlu.py
│   │   │   │   ├── eval_msc.py
│   │   │   │   ├── eval_multiturn.py
│   │   │   │   ├── eval_needle.py
│   │   │   │   ├── eval_passkey.py
│   │   │   │   ├── eval_topic.py
│   │   │   │   ├── infbench_utils.py
│   │   │   │   ├── longbench_utils.py
│   │   │   │   ├── pretrain_data.py
│   │   │   │   ├── train.py
│   │   │   │   └── vllm_symlink.py
│   │   │   └── src/
│   │   │       ├── __init__.py
│   │   │       ├── args.py
│   │   │       ├── chat.py
│   │   │       ├── data.py
│   │   │       ├── llama/
│   │   │       │   ├── __init__.py
│   │   │       │   ├── configuration_llama.py
│   │   │       │   └── modeling_llama.py
│   │   │       ├── metrics.py
│   │   │       ├── mistral/
│   │   │       │   ├── __init__.py
│   │   │       │   ├── configuration_mistral.py
│   │   │       │   └── modeling_mistral.py
│   │   │       ├── modeling_beacon.py
│   │   │       ├── modeling_utils.py
│   │   │       ├── qwen2/
│   │   │       │   ├── __init__.py
│   │   │       │   ├── configuration_qwen2.py
│   │   │       │   └── modeling_qwen2.py
│   │   │       ├── trainer.py
│   │   │       ├── utils.py
│   │   │       └── vllm_utils.py
│   │   └── longllm_qlora/
│   │       ├── README.md
│   │       ├── data/
│   │       │   └── narrativeqa.json
│   │       ├── data_pipeline/
│   │       │   ├── README.md
│   │       │   ├── _openai.py
│   │       │   ├── data/
│   │       │   │   └── README.md
│   │       │   ├── prepare_bio_book.ipynb
│   │       │   ├── prepare_multi_details_book.ipynb
│   │       │   ├── prepare_multi_details_paper_long.ipynb
│   │       │   ├── prepare_one_detail_book.ipynb
│   │       │   ├── prepare_one_detail_paper_long.ipynb
│   │       │   └── raw_data/
│   │       │       └── README.md
│   │       ├── main/
│   │       │   ├── eval_generation.py
│   │       │   ├── eval_infbench.py
│   │       │   ├── eval_lm.py
│   │       │   ├── eval_longbench.py
│   │       │   ├── eval_mmlu.py
│   │       │   ├── eval_needle.py
│   │       │   ├── eval_passkey.py
│   │       │   ├── eval_topic.py
│   │       │   ├── infbench_utils.py
│   │       │   ├── longbench_utils.py
│   │       │   └── train.py
│   │       └── src/
│   │           ├── __init__.py
│   │           ├── args.py
│   │           ├── chat.py
│   │           ├── data.py
│   │           ├── metrics.py
│   │           ├── modeling_utils.py
│   │           ├── trainer.py
│   │           └── utils.py
│   ├── MLVU/
│   │   ├── README.md
│   │   ├── data/
│   │   │   ├── 1_plotQA.json
│   │   │   ├── 2_needle.json
│   │   │   ├── 3_ego.json
│   │   │   ├── 4_count.json
│   │   │   ├── 5_order.json
│   │   │   ├── 6_anomaly_reco.json
│   │   │   ├── 7_topic_reasoning.json
│   │   │   ├── 8_sub_scene.json
│   │   │   └── 9_summary.json
│   │   └── evaluation/
│   │       ├── README.md
│   │       ├── generation_evaluation/
│   │       │   ├── calculate.py
│   │       │   ├── calculate_sum.py
│   │       │   ├── evaluate_ssc.py
│   │       │   ├── evaluate_summary.py
│   │       │   └── open_bench.py
│   │       ├── models/
│   │       │   ├── videochat2/
│   │       │   │   ├── choice_bench.py
│   │       │   │   └── open_bench.py
│   │       │   └── videollava/
│   │       │       ├── choice_bench.py
│   │       │       └── open_bench.py
│   │       └── multiple_choice_evaluation/
│   │           └── choice_bench.py
│   ├── Matroyshka_reranker/
│   │   ├── README.md
│   │   ├── finetune/
│   │   │   ├── compensation/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── arguments.py
│   │   │   │   ├── data.py
│   │   │   │   ├── load_model.py
│   │   │   │   ├── mistral_config.py
│   │   │   │   ├── mistral_model.py
│   │   │   │   ├── modeling.py
│   │   │   │   ├── run.py
│   │   │   │   ├── stage1.json
│   │   │   │   └── trainer.py
│   │   │   └── self_distillation/
│   │   │       ├── __init__.py
│   │   │       ├── arguments.py
│   │   │       ├── data.py
│   │   │       ├── load_model.py
│   │   │       ├── mistral_config.py
│   │   │       ├── mistral_model.py
│   │   │       ├── modeling.py
│   │   │       ├── run.py
│   │   │       ├── stage1.json
│   │   │       └── trainer.py
│   │   ├── inference/
│   │   │   ├── __init__.py
│   │   │   ├── mistral_config.py
│   │   │   ├── mistral_model.py
│   │   │   └── rank_model.py
│   │   └── requirements.txt
│   ├── README.md
│   ├── Reinforced_IR/
│   │   ├── README.md
│   │   ├── data_generation/
│   │   │   ├── agent/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── gpt.py
│   │   │   │   ├── vllm.py
│   │   │   │   └── vllm_instruct.py
│   │   │   ├── generate_generator_data.py
│   │   │   ├── generate_retriever_data.py
│   │   │   ├── generate_retriever_distill_data.py
│   │   │   ├── generate_universal_query.py
│   │   │   ├── prompts/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── generate_prompts.py
│   │   │   │   ├── get_prompts.py
│   │   │   │   ├── hyde_prompts.py
│   │   │   │   ├── teacher_prompts.py
│   │   │   │   └── train_prompts.py
│   │   │   └── utils.py
│   │   ├── finetune/
│   │   │   ├── generator/
│   │   │   │   ├── save_tokenizer.py
│   │   │   │   └── update_file.py
│   │   │   ├── retriever/
│   │   │   │   ├── arguments.py
│   │   │   │   ├── dataset.py
│   │   │   │   ├── modeling.py
│   │   │   │   ├── run.py
│   │   │   │   ├── runner.py
│   │   │   │   └── trainer.py
│   │   │   └── stage1.json
│   │   ├── inference/
│   │   │   ├── agent/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── gpt.py
│   │   │   │   ├── vllm.py
│   │   │   │   └── vllm_instruct.py
│   │   │   ├── ir_model.py
│   │   │   ├── multi.py
│   │   │   └── test.py
│   │   └── requirements.txt
│   ├── baai_general_embedding/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── finetune/
│   │   │   ├── __init__.py
│   │   │   ├── arguments.py
│   │   │   ├── data.py
│   │   │   ├── eval_msmarco.py
│   │   │   ├── hn_mine.py
│   │   │   ├── modeling.py
│   │   │   ├── run.py
│   │   │   └── trainer.py
│   │   └── retromae_pretrain/
│   │       ├── __init__.py
│   │       ├── arguments.py
│   │       ├── data.py
│   │       ├── enhancedDecoder.py
│   │       ├── modeling.py
│   │       ├── run.py
│   │       ├── trainer.py
│   │       └── utils.py
│   ├── llm_dense_retriever/
│   │   ├── README.md
│   │   ├── examples/
│   │   │   └── bge-en-icl/
│   │   │       ├── AIR-Bench/
│   │   │       │   ├── long-doc/
│   │   │       │   │   ├── arxiv-gemini.jsonl
│   │   │       │   │   ├── arxiv-gpt3.jsonl
│   │   │       │   │   ├── arxiv-llama2.jsonl
│   │   │       │   │   ├── arxiv-llm-survey.jsonl
│   │   │       │   │   ├── book-a-brief-history-of-time_stephen-hawking.jsonl
│   │   │       │   │   ├── book-origin-of-species_darwin.jsonl
│   │   │       │   │   ├── healthcare-pubmed_100k-200k_1.jsonl
│   │   │       │   │   ├── healthcare-pubmed_100k-200k_2.jsonl
│   │   │       │   │   ├── healthcare-pubmed_100k-200k_3.jsonl
│   │   │       │   │   ├── healthcare-pubmed_30k-40k_10-merged.jsonl
│   │   │       │   │   ├── healthcare-pubmed_40k-50k_5-merged.jsonl
│   │   │       │   │   ├── law-lex_files_300k-400k.jsonl
│   │   │       │   │   ├── law-lex_files_400k-500k.jsonl
│   │   │       │   │   ├── law-lex_files_500k-600k.jsonl
│   │   │       │   │   └── law-lex_files_600k-700k.jsonl
│   │   │       │   └── qa/
│   │   │       │       ├── arxiv.jsonl
│   │   │       │       ├── finance.jsonl
│   │   │       │       ├── healthcare.jsonl
│   │   │       │       ├── law.jsonl
│   │   │       │       ├── msmarco.jsonl
│   │   │       │       ├── news.jsonl
│   │   │       │       ├── web.jsonl
│   │   │       │       └── wiki.jsonl
│   │   │       └── MTEB/
│   │   │           ├── AmazonCounterfactualClassification.json
│   │   │           ├── AmazonPolarityClassification.json
│   │   │           ├── AmazonReviewsClassification.json
│   │   │           ├── ArguAna.json
│   │   │           ├── ArxivClusteringP2P.json
│   │   │           ├── ArxivClusteringS2S.json
│   │   │           ├── AskUbuntuDupQuestions.json
│   │   │           ├── BIOSSES.json
│   │   │           ├── Banking77Classification.json
│   │   │           ├── BiorxivClusteringP2P.json
│   │   │           ├── BiorxivClusteringS2S.json
│   │   │           ├── CQADupstackRetrieval.json
│   │   │           ├── ClimateFEVER.json
│   │   │           ├── DBPedia.json
│   │   │           ├── EmotionClassification.json
│   │   │           ├── FEVER.json
│   │   │           ├── FiQA2018.json
│   │   │           ├── HotpotQA.json
│   │   │           ├── ImdbClassification.json
│   │   │           ├── MSMARCO.json
│   │   │           ├── MTOPDomainClassification.json
│   │   │           ├── MTOPIntentClassification.json
│   │   │           ├── MassiveIntentClassification.json
│   │   │           ├── MassiveScenarioClassification.json
│   │   │           ├── MedrxivClusteringP2P.json
│   │   │           ├── MedrxivClusteringS2S.json
│   │   │           ├── MindSmallReranking.json
│   │   │           ├── NFCorpus.json
│   │   │           ├── NQ.json
│   │   │           ├── QuoraRetrieval.json
│   │   │           ├── RedditClustering.json
│   │   │           ├── RedditClusteringP2P.json
│   │   │           ├── SCIDOCS.json
│   │   │           ├── SICK-R.json
│   │   │           ├── STS12.json
│   │   │           ├── STS13.json
│   │   │           ├── STS14.json
│   │   │           ├── STS15.json
│   │   │           ├── STS16.json
│   │   │           ├── STS17.json
│   │   │           ├── STS22.json
│   │   │           ├── STSBenchmark.json
│   │   │           ├── SciDocsRR.json
│   │   │           ├── SciFact.json
│   │   │           ├── SprintDuplicateQuestions.json
│   │   │           ├── StackExchangeClustering.json
│   │   │           ├── StackExchangeClusteringP2P.json
│   │   │           ├── StackOverflowDupQuestions.json
│   │   │           ├── SummEval.json
│   │   │           ├── TRECCOVID.json
│   │   │           ├── Touche2020.json
│   │   │           ├── ToxicConversationsClassification.json
│   │   │           ├── TweetSentimentExtractionClassification.json
│   │   │           ├── TwentyNewsgroupsClustering.json
│   │   │           ├── TwitterSemEval2015.json
│   │   │           └── TwitterURLCorpus.json
│   │   └── finetune/
│   │       ├── arguments.py
│   │       ├── data.py
│   │       ├── load_model.py
│   │       ├── modeling.py
│   │       ├── run.py
│   │       └── trainer.py
│   ├── llm_embedder/
│   │   ├── README.md
│   │   ├── data/
│   │   │   ├── deepspeed/
│   │   │   │   ├── stage0.json
│   │   │   │   ├── stage2-offload.json
│   │   │   │   ├── stage2.json
│   │   │   │   ├── stage3-offload-all.json
│   │   │   │   ├── stage3-offload-optim.json
│   │   │   │   └── stage3.json
│   │   │   └── toy/
│   │   │       ├── chat.json
│   │   │       ├── convsearch.json
│   │   │       ├── icl.json
│   │   │       ├── lrlm.json
│   │   │       ├── qa.json
│   │   │       └── tool.json
│   │   ├── docs/
│   │   │   ├── evaluation.md
│   │   │   └── fine-tune.md
│   │   ├── environment.yaml
│   │   ├── evaluation/
│   │   │   ├── __init__.py
│   │   │   ├── eval_icl.py
│   │   │   ├── eval_lrlm.py
│   │   │   ├── eval_mmlu.py
│   │   │   ├── eval_msc.py
│   │   │   ├── eval_popqa.py
│   │   │   ├── eval_qa.py
│   │   │   ├── eval_qrecc.py
│   │   │   ├── eval_retrieval.py
│   │   │   ├── eval_tool.py
│   │   │   └── icl_utils.py
│   │   ├── run_dense.py
│   │   ├── run_lm_score.py
│   │   ├── run_ranker.py
│   │   ├── scripts/
│   │   │   ├── llm-embedder.sh
│   │   │   └── ours2st.py
│   │   └── src/
│   │       ├── __init__.py
│   │       ├── lm/
│   │       │   ├── __init__.py
│   │       │   ├── args.py
│   │       │   ├── modeling_lm.py
│   │       │   └── modeling_srlm.py
│   │       ├── retrieval/
│   │       │   ├── __init__.py
│   │       │   ├── args.py
│   │       │   ├── data.py
│   │       │   ├── evalnq.py
│   │       │   ├── metrics.py
│   │       │   ├── modeling_bm25.py
│   │       │   ├── modeling_dense.py
│   │       │   ├── modeling_ranker.py
│   │       │   ├── modeling_unified.py
│   │       │   └── trainer.py
│   │       └── utils/
│   │           ├── __init__.py
│   │           ├── llama_patch.py
│   │           └── util.py
│   ├── llm_reranker/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── evaluate.py
│   │   ├── finetune_for_instruction/
│   │   │   ├── __init__.py
│   │   │   ├── arguments.py
│   │   │   ├── data.py
│   │   │   ├── load_model.py
│   │   │   ├── modeling.py
│   │   │   ├── run.py
│   │   │   └── trainer.py
│   │   ├── finetune_for_layerwise/
│   │   │   ├── __init__.py
│   │   │   ├── arguments.py
│   │   │   ├── configuration_minicpm_reranker.py
│   │   │   ├── data.py
│   │   │   ├── load_model.py
│   │   │   ├── modeling.py
│   │   │   ├── modeling_minicpm_reranker.py
│   │   │   ├── run.py
│   │   │   └── trainer.py
│   │   ├── merge/
│   │   │   ├── __init__.py
│   │   │   ├── configuration_minicpm_reranker.py
│   │   │   ├── merge_base_model.py
│   │   │   ├── merge_layerwise_model_from_finetuned_model.py
│   │   │   ├── merge_layerwise_model_from_raw_model.py
│   │   │   └── modeling_minicpm_reranker.py
│   │   ├── stage1.json
│   │   └── toy_finetune_data.jsonl
│   ├── old-examples/
│   │   ├── finetune/
│   │   │   ├── README.md
│   │   │   ├── ds_config.json
│   │   │   ├── toy_evaluation_data/
│   │   │   │   ├── toy_corpus.json
│   │   │   │   └── toy_query.json
│   │   │   └── toy_finetune_data.jsonl
│   │   ├── pretrain/
│   │   │   ├── README.md
│   │   │   ├── retromae_pretrain/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── arguments.py
│   │   │   │   ├── data.py
│   │   │   │   ├── enhancedDecoder.py
│   │   │   │   ├── modeling.py
│   │   │   │   ├── run.py
│   │   │   │   ├── trainer.py
│   │   │   │   └── utils.py
│   │   │   └── toy_pretrain_data.jsonl
│   │   ├── reranker/
│   │   │   ├── README.md
│   │   │   ├── ds_config.json
│   │   │   └── toy_finetune_data.jsonl
│   │   ├── search_demo/
│   │   │   ├── __init__.py
│   │   │   ├── arguments.py
│   │   │   ├── pre_process.py
│   │   │   ├── readme.md
│   │   │   ├── requirements.txt
│   │   │   ├── run.py
│   │   │   └── tool.py
│   │   └── unified_finetune/
│   │       ├── README.md
│   │       ├── toy_train_data/
│   │       │   ├── toy_train_data1.jsonl
│   │       │   └── toy_train_data2.jsonl
│   │       └── unified_finetune_bge-m3_exmaple.sh
│   ├── reranker/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── arguments.py
│   │   ├── data.py
│   │   ├── modeling.py
│   │   ├── run.py
│   │   └── trainer.py
│   └── visual_bge/
│       ├── README.md
│       ├── __init__.py
│       ├── setup.py
│       └── visual_bge/
│           ├── eva_clip/
│           │   ├── __init__.py
│           │   ├── constants.py
│           │   ├── eva_vit_model.py
│           │   ├── factory.py
│           │   ├── hf_configs.py
│           │   ├── hf_model.py
│           │   ├── loss.py
│           │   ├── model.py
│           │   ├── model_configs/
│           │   │   ├── EVA01-CLIP-B-16.json
│           │   │   ├── EVA01-CLIP-g-14-plus.json
│           │   │   ├── EVA01-CLIP-g-14.json
│           │   │   ├── EVA02-CLIP-B-16.json
│           │   │   ├── EVA02-CLIP-L-14-336.json
│           │   │   ├── EVA02-CLIP-L-14.json
│           │   │   ├── EVA02-CLIP-bigE-14-plus.json
│           │   │   └── EVA02-CLIP-bigE-14.json
│           │   ├── modified_resnet.py
│           │   ├── openai.py
│           │   ├── pretrained.py
│           │   ├── rope.py
│           │   ├── timm_model.py
│           │   ├── tokenizer.py
│           │   ├── transform.py
│           │   ├── transformer.py
│           │   └── utils.py
│           └── modeling.py
├── scripts/
│   ├── README.md
│   ├── add_reranker_score.py
│   ├── hn_mine.py
│   └── split_data_by_length.py
├── setup.py
└── tests/
    ├── README.md
    ├── conftest.py
    ├── test_imports_v5.py
    ├── test_infer_embedder_basic.py
    └── test_infer_reranker_basic.py