SYMBOL INDEX (3104 symbols across 447 files)

FILE: FlagEmbedding/abc/evaluation/arguments.py
  class AbsEvalArgs (line 10) | class AbsEvalArgs:
  class AbsEvalModelArgs (line 82) | class AbsEvalModelArgs:
    method __post_init__ (line 181) | def __post_init__(self):

FILE: FlagEmbedding/abc/evaluation/data_loader.py
  class AbsEvalDataLoader (line 14) | class AbsEvalDataLoader(ABC):
    method __init__ (line 25) | def __init__(
    method available_dataset_names (line 42) | def available_dataset_names(self) -> List[str]:
    method available_splits (line 49) | def available_splits(self, dataset_name: Optional[str] = None) -> List...
    method check_dataset_names (line 55) | def check_dataset_names(self, dataset_names: Union[str, List[str]]) ->...
    method check_splits (line 76) | def check_splits(self, splits: Union[str, List[str]], dataset_name: Op...
    method load_corpus (line 97) | def load_corpus(self, dataset_name: Optional[str] = None) -> datasets....
    method load_qrels (line 115) | def load_qrels(self, dataset_name: Optional[str] = None, split: str = ...
    method load_queries (line 143) | def load_queries(self, dataset_name: Optional[str] = None, split: str ...
    method _load_remote_corpus (line 171) | def _load_remote_corpus(
    method _load_remote_qrels (line 190) | def _load_remote_qrels(
    method _load_remote_queries (line 211) | def _load_remote_queries(
    method _load_local_corpus (line 232) | def _load_local_corpus(self, save_dir: str, dataset_name: Optional[str...
    method _load_local_qrels (line 255) | def _load_local_qrels(self, save_dir: str, dataset_name: Optional[str]...
    method _load_local_queries (line 290) | def _load_local_queries(self, save_dir: str, dataset_name: Optional[st...
    method _download_file (line 319) | def _download_file(self, download_url: str, save_dir: str):
    method _get_fpath_size (line 350) | def _get_fpath_size(self, fpath: str) -> int:
    method _download_gz_file (line 369) | def _download_gz_file(self, download_url: str, save_dir: str):
    method _download_zip_file (line 395) | def _download_zip_file(self, download_url: str, save_dir: str):

FILE: FlagEmbedding/abc/evaluation/evaluator.py
  class AbsEvaluator (line 18) | class AbsEvaluator:
    method __init__ (line 27) | def __init__(
    method check_data_info (line 37) | def check_data_info(
    method get_corpus_embd_save_dir (line 80) | def get_corpus_embd_save_dir(
    method __call__ (line 102) | def __call__(
    method save_search_results (line 267) | def save_search_results(
    method load_search_results (line 302) | def load_search_results(input_path: str):
    method compute_metrics (line 318) | def compute_metrics(
    method evaluate_results (line 358) | def evaluate_results(
    method output_eval_results_to_json (line 403) | def output_eval_results_to_json(eval_results_dict: dict, output_path: ...
    method get_results_df (line 417) | def get_results_df(metric: str, eval_results_dict: dict):
    method output_eval_results_to_markdown (line 467) | def output_eval_results_to_markdown(eval_results_dict: dict, output_pa...

FILE: FlagEmbedding/abc/evaluation/runner.py
  class AbsEvalRunner (line 16) | class AbsEvalRunner:
    method __init__ (line 24) | def __init__(
    method get_models (line 37) | def get_models(model_args: AbsEvalModelArgs) -> Tuple[AbsEmbedder, Uni...
    method load_retriever_and_reranker (line 92) | def load_retriever_and_reranker(self) -> Tuple[EvalDenseRetriever, Uni...
    method load_data_loader (line 109) | def load_data_loader(self) -> AbsEvalDataLoader:
    method load_evaluator (line 124) | def load_evaluator(self) -> AbsEvaluator:
    method evaluate_metrics (line 138) | def evaluate_metrics(
    method run (line 183) | def run(self):

FILE: FlagEmbedding/abc/evaluation/searcher.py
  class EvalRetriever (line 18) | class EvalRetriever(ABC):
    method __init__ (line 22) | def __init__(self, embedder: AbsEmbedder, search_top_k: int = 1000, ov...
    method __str__ (line 27) | def __str__(self) -> str:
    method stop_multi_process_pool (line 33) | def stop_multi_process_pool(self):
    method __call__ (line 43) | def __call__(
  class EvalDenseRetriever (line 71) | class EvalDenseRetriever(EvalRetriever):
    method __call__ (line 75) | def __call__(
  class EvalReranker (line 160) | class EvalReranker:
    method __init__ (line 164) | def __init__(self, reranker: AbsReranker, rerank_top_k: int = 100):
    method __str__ (line 168) | def __str__(self) -> str:
    method stop_multi_process_pool (line 174) | def stop_multi_process_pool(self):
    method __call__ (line 183) | def __call__(

FILE: FlagEmbedding/abc/evaluation/utils.py
  function evaluate_mrr (line 14) | def evaluate_mrr(
  function evaluate_recall_cap (line 56) | def evaluate_recall_cap(
  function evaluate_metrics (line 95) | def evaluate_metrics(
  function index (line 150) | def index(
  function search (line 192) | def search(

FILE: FlagEmbedding/abc/finetune/embedder/AbsArguments.py
  class AbsEmbedderModelArguments (line 9) | class AbsEmbedderModelArguments:
  class AbsEmbedderDataArguments (line 44) | class AbsEmbedderDataArguments:
    method __post_init__ (line 120) | def __post_init__(self):
  class AbsEmbedderTrainingArguments (line 134) | class AbsEmbedderTrainingArguments(TrainingArguments):

FILE: FlagEmbedding/abc/finetune/embedder/AbsDataset.py
  class AbsEmbedderTrainDataset (line 23) | class AbsEmbedderTrainDataset(Dataset):
    method __init__ (line 30) | def __init__(
    method _load_dataset (line 54) | def _load_dataset(self, file_path: str):
    method _shuffle_text (line 83) | def _shuffle_text(self, text):
    method __len__ (line 102) | def __len__(self):
    method __getitem__ (line 105) | def __getitem__(self, item):
  class AbsEmbedderCollator (line 154) | class AbsEmbedderCollator(DataCollatorWithPadding):
    method __call__ (line 162) | def __call__(self, features):
  class AbsEmbedderSameDatasetTrainDataset (line 245) | class AbsEmbedderSameDatasetTrainDataset(AbsEmbedderTrainDataset):
    method __init__ (line 256) | def __init__(
    method _load_dataset (line 337) | def _load_dataset(self, file_path: str):
    method _get_file_batch_size (line 361) | def _get_file_batch_size(temp_dataset: datasets.Dataset, default_batch...
    method refresh_epoch (line 379) | def refresh_epoch(self):
    method __len__ (line 403) | def __len__(self):
    method __getitem__ (line 406) | def __getitem__(self, _):
    method _get_train_group_size (line 415) | def _get_train_group_size(self, batch_raw_data):
    method _create_batch_data (line 441) | def _create_batch_data(self, batch_raw_data):
  class AbsEmbedderSameDatasetCollator (line 514) | class AbsEmbedderSameDatasetCollator(DataCollatorWithPadding):
    method __call__ (line 527) | def __call__(self, features):
  class EmbedderTrainerCallbackForDataRefresh (line 607) | class EmbedderTrainerCallbackForDataRefresh(TrainerCallback):
    method __init__ (line 611) | def __init__(self, train_dataset: AbsEmbedderSameDatasetTrainDataset):
    method on_epoch_end (line 614) | def on_epoch_end(

FILE: FlagEmbedding/abc/finetune/embedder/AbsModeling.py
  class EmbedderOutput (line 17) | class EmbedderOutput(ModelOutput):
  class AbsEmbedderModel (line 27) | class AbsEmbedderModel(ABC, nn.Module):
    method __init__ (line 39) | def __init__(
    method encode (line 64) | def encode(self, features):
    method compute_loss (line 73) | def compute_loss(self, scores, target):
    method compute_score (line 83) | def compute_score(self, q_reps, p_reps):
    method save (line 93) | def save(self, output_dir: str):
    method get_local_score (line 101) | def get_local_score(self, q_reps, p_reps, all_scores):
    method compute_local_score (line 121) | def compute_local_score(self, q_reps, p_reps, compute_score_func=None,...
    method _compute_no_in_batch_neg_loss (line 140) | def _compute_no_in_batch_neg_loss(self, q_reps, p_reps, teacher_target...
    method _compute_in_batch_neg_loss (line 162) | def _compute_in_batch_neg_loss(self, q_reps, p_reps, teacher_targets=N...
    method _compute_cross_device_neg_loss (line 194) | def _compute_cross_device_neg_loss(self, q_reps, p_reps, teacher_targe...
    method forward (line 234) | def forward(
    method distill_loss (line 280) | def distill_loss(kd_loss_type, teacher_targets, student_scores, group_...
    method _dist_gather_tensor (line 320) | def _dist_gather_tensor(self, t: Optional[torch.Tensor]):

FILE: FlagEmbedding/abc/finetune/embedder/AbsRunner.py
  class AbsEmbedderRunner (line 24) | class AbsEmbedderRunner(ABC):
    method __init__ (line 32) | def __init__(
    method load_tokenizer_and_model (line 79) | def load_tokenizer_and_model(self) -> Tuple[PreTrainedTokenizer, AbsEm...
    method load_trainer (line 88) | def load_trainer(self) -> AbsEmbedderTrainer:
    method load_train_dataset (line 96) | def load_train_dataset(self) -> AbsEmbedderTrainDataset:
    method load_data_collator (line 120) | def load_data_collator(self) -> AbsEmbedderCollator:
    method run (line 142) | def run(self):

FILE: FlagEmbedding/abc/finetune/embedder/AbsTrainer.py
  class AbsEmbedderTrainer (line 9) | class AbsEmbedderTrainer(ABC, Trainer):
    method _save (line 14) | def _save(self, output_dir: Optional[str] = None, state_dict=None):
    method compute_loss (line 17) | def compute_loss(self, model, inputs, return_outputs=False, **kwargs):

FILE: FlagEmbedding/abc/finetune/reranker/AbsArguments.py
  class AbsRerankerModelArguments (line 9) | class AbsRerankerModelArguments:
  class AbsRerankerDataArguments (line 52) | class AbsRerankerDataArguments:
    method __post_init__ (line 126) | def __post_init__(self):
  class AbsRerankerTrainingArguments (line 140) | class AbsRerankerTrainingArguments(TrainingArguments):

FILE: FlagEmbedding/abc/finetune/reranker/AbsDataset.py
  class AbsRerankerTrainDataset (line 23) | class AbsRerankerTrainDataset(Dataset):
    method __init__ (line 30) | def __init__(
    method _load_dataset (line 55) | def _load_dataset(self, file_path: str):
    method _shuffle_text (line 84) | def _shuffle_text(self, text):
    method __len__ (line 103) | def __len__(self):
    method create_one_example (line 106) | def create_one_example(self, qry_encoding: str, doc_encoding: str):
    method __getitem__ (line 127) | def __getitem__(self, item):
  class AbsRerankerCollator (line 180) | class AbsRerankerCollator(DataCollatorWithPadding):
    method __call__ (line 187) | def __call__(self, features) -> List[BatchEncoding]:
  class AbsLLMRerankerTrainDataset (line 211) | class AbsLLMRerankerTrainDataset(AbsRerankerTrainDataset):
    method __init__ (line 218) | def __init__(
    method __getitem__ (line 231) | def __getitem__(self, item) -> List[BatchEncoding]:
  class AbsLLMRerankerCollator (line 341) | class AbsLLMRerankerCollator(DataCollatorForSeq2Seq):
    method __call__ (line 350) | def __call__(self, features, return_tensors='pt'):

FILE: FlagEmbedding/abc/finetune/reranker/AbsModeling.py
  class RerankerOutput (line 15) | class RerankerOutput(ModelOutput):
  class AbsRerankerModel (line 20) | class AbsRerankerModel(ABC, nn.Module):
    method __init__ (line 28) | def __init__(
    method gradient_checkpointing_enable (line 47) | def gradient_checkpointing_enable(self, **kwargs):
    method enable_input_require_grads (line 53) | def enable_input_require_grads(self, **kwargs):
    method encode (line 60) | def encode(self, features):
    method forward (line 68) | def forward(self, pair: Union[Dict[str, Tensor], List[Dict[str, Tensor...
    method compute_loss (line 101) | def compute_loss(self, scores, target):
    method save (line 113) | def save(self, output_dir: str):
    method save_pretrained (line 127) | def save_pretrained(self, *args, **kwargs):

FILE: FlagEmbedding/abc/finetune/reranker/AbsRunner.py
  class AbsRerankerRunner (line 24) | class AbsRerankerRunner(ABC):
    method __init__ (line 32) | def __init__(
    method load_tokenizer_and_model (line 79) | def load_tokenizer_and_model(self) -> Tuple[PreTrainedTokenizer, AbsRe...
    method load_trainer (line 88) | def load_trainer(self) -> AbsRerankerTrainer:
    method load_train_dataset (line 96) | def load_train_dataset(self) -> AbsRerankerTrainDataset:
    method load_data_collator (line 114) | def load_data_collator(self) -> AbsRerankerCollator:
    method run (line 135) | def run(self):

FILE: FlagEmbedding/abc/finetune/reranker/AbsTrainer.py
  class AbsRerankerTrainer (line 9) | class AbsRerankerTrainer(ABC, Trainer):
    method _save (line 14) | def _save(self, output_dir: Optional[str] = None, state_dict=None):
    method compute_loss (line 17) | def compute_loss(self, model, inputs, return_outputs=False, **kwargs):

FILE: FlagEmbedding/abc/inference/AbsEmbedder.py
  class AbsEmbedder (line 24) | class AbsEmbedder(ABC):
    method __init__ (line 47) | def __init__(
    method stop_self_pool (line 84) | def stop_self_pool(self):
    method get_target_devices (line 97) | def get_target_devices(devices: Union[str, int, List[str], List[int]])...
    method get_detailed_instruct (line 144) | def get_detailed_instruct(instruction_format: str, instruction: str, s...
    method encode_queries (line 159) | def encode_queries(
    method encode_corpus (line 193) | def encode_corpus(
    method encode (line 230) | def encode(
    method __del__ (line 287) | def __del__(self):
    method encode_single_device (line 291) | def encode_single_device(
    method start_multi_process_pool (line 306) | def start_multi_process_pool(
    method _encode_multi_process_worker (line 346) | def _encode_multi_process_worker(
    method stop_multi_process_pool (line 369) | def stop_multi_process_pool(pool: Dict[Literal["input", "output", "pro...
    method encode_multi_process (line 391) | def encode_multi_process(
    method _concatenate_results_from_multi_process (line 424) | def _concatenate_results_from_multi_process(self, results_list: List[U...

FILE: FlagEmbedding/abc/inference/AbsReranker.py
  class AbsReranker (line 23) | class AbsReranker(ABC):
    method __init__ (line 47) | def __init__(
    method stop_self_pool (line 86) | def stop_self_pool(self):
    method get_target_devices (line 99) | def get_target_devices(devices: Union[str, int, List[str], List[int]])...
    method get_detailed_instruct (line 142) | def get_detailed_instruct(self, instruction_format: str, instruction: ...
    method get_detailed_inputs (line 157) | def get_detailed_inputs(self, sentence_pairs: Union[str, List[str]]):
    method compute_score (line 200) | def compute_score(
    method __del__ (line 231) | def __del__(self):
    method compute_score_single_gpu (line 235) | def compute_score_single_gpu(
    method start_multi_process_pool (line 251) | def start_multi_process_pool(self) -> Dict[Literal["input", "output", ...
    method encode_multi_process (line 284) | def encode_multi_process(
    method _encode_multi_process_worker (line 319) | def _encode_multi_process_worker(
    method stop_multi_process_pool (line 342) | def stop_multi_process_pool(pool: Dict[Literal["input", "output", "pro...

FILE: FlagEmbedding/evaluation/air_bench/__main__.py
  function main (line 9) | def main():

FILE: FlagEmbedding/evaluation/air_bench/arguments.py
  class AIRBenchEvalModelArgs (line 7) | class AIRBenchEvalModelArgs:
    method __post_init__ (line 106) | def __post_init__(self):

FILE: FlagEmbedding/evaluation/air_bench/runner.py
  class AIRBenchEvalRunner (line 12) | class AIRBenchEvalRunner:
    method __init__ (line 20) | def __init__(
    method load_retriever_and_reranker (line 31) | def load_retriever_and_reranker(self) -> Tuple[EvalDenseRetriever, Uni...
    method run (line 48) | def run(self):

FILE: FlagEmbedding/evaluation/beir/__main__.py
  function main (line 9) | def main():

FILE: FlagEmbedding/evaluation/beir/arguments.py
  class BEIREvalArgs (line 7) | class BEIREvalArgs(AbsEvalArgs):

FILE: FlagEmbedding/evaluation/beir/data_loader.py
  class BEIREvalDataLoader (line 15) | class BEIREvalDataLoader(AbsEvalDataLoader):
    method available_dataset_names (line 19) | def available_dataset_names(self) -> List[str]:
    method available_sub_dataset_names (line 28) | def available_sub_dataset_names(self, dataset_name: Optional[str] = No...
    method available_splits (line 42) | def available_splits(self, dataset_name: Optional[str] = None) -> List...
    method _load_remote_corpus (line 56) | def _load_remote_corpus(
    method _load_remote_qrels (line 127) | def _load_remote_qrels(
    method _load_remote_queries (line 212) | def _load_remote_queries(
    method load_corpus (line 291) | def load_corpus(self, dataset_name: Optional[str] = None, sub_dataset_...
    method load_qrels (line 310) | def load_qrels(self, dataset_name: Optional[str] = None, sub_dataset_n...
    method load_queries (line 339) | def load_queries(self, dataset_name: Optional[str] = None, sub_dataset...
    method _load_local_corpus (line 368) | def _load_local_corpus(self, save_dir: str, dataset_name: Optional[str...
    method _load_local_qrels (line 397) | def _load_local_qrels(self, save_dir: str, dataset_name: Optional[str]...
    method _load_local_queries (line 438) | def _load_local_queries(self, save_dir: str, dataset_name: Optional[st...

FILE: FlagEmbedding/evaluation/beir/evaluator.py
  class BEIREvaluator (line 12) | class BEIREvaluator(AbsEvaluator):
    method check_data_info (line 16) | def check_data_info(
    method __call__ (line 66) | def __call__(
    method evaluate_results (line 351) | def evaluate_results(
    method save_search_results (line 418) | def save_search_results(

FILE: FlagEmbedding/evaluation/beir/runner.py
  class BEIREvalRunner (line 11) | class BEIREvalRunner(AbsEvalRunner):
    method run (line 15) | def run(self):
    method load_data_loader (line 63) | def load_data_loader(self) -> BEIREvalDataLoader:
    method load_evaluator (line 78) | def load_evaluator(self) -> BEIREvaluator:

FILE: FlagEmbedding/evaluation/bright/__main__.py
  function main (line 9) | def main():

FILE: FlagEmbedding/evaluation/bright/arguments.py
  class BrightEvalArgs (line 7) | class BrightEvalArgs(AbsEvalArgs):

FILE: FlagEmbedding/evaluation/bright/data_loader.py
  class BrightShortEvalDataLoader (line 14) | class BrightShortEvalDataLoader(AbsEvalDataLoader):
    method available_dataset_names (line 18) | def available_dataset_names(self) -> List[str]:
    method available_splits (line 34) | def available_splits(self, dataset_name: str) -> List[str]:
    method _load_remote_corpus (line 51) | def _load_remote_corpus(
    method _load_remote_qrels (line 89) | def _load_remote_qrels(
    method _load_remote_queries (line 165) | def _load_remote_queries(
  class BrightLongEvalDataLoader (line 209) | class BrightLongEvalDataLoader(AbsEvalDataLoader):
    method available_dataset_names (line 213) | def available_dataset_names(self) -> List[str]:
    method available_splits (line 227) | def available_splits(self, dataset_name: str) -> List[str]:
    method _load_remote_corpus (line 244) | def _load_remote_corpus(
    method _load_remote_qrels (line 282) | def _load_remote_qrels(
    method _load_remote_queries (line 358) | def _load_remote_queries(

FILE: FlagEmbedding/evaluation/bright/runner.py
  class BrightEvalRunner (line 14) | class BrightEvalRunner(AbsEvalRunner):
    method __init__ (line 18) | def __init__(self, eval_args: BrightEvalArgs, model_args: BrightEvalMo...
    method load_data_loader (line 23) | def load_data_loader(self) -> Union[BrightShortEvalDataLoader, BrightL...
    method load_retriever_and_reranker (line 45) | def load_retriever_and_reranker(self) -> Tuple[BrightEvalDenseRetrieve...
    method run (line 62) | def run(self):

FILE: FlagEmbedding/evaluation/bright/searcher.py
  class BrightEvalDenseRetriever (line 15) | class BrightEvalDenseRetriever(EvalRetriever):
    method __call__ (line 19) | def __call__(

FILE: FlagEmbedding/evaluation/custom/__main__.py
  function main (line 9) | def main():

FILE: FlagEmbedding/evaluation/custom/data_loader.py
  class CustomEvalDataLoader (line 10) | class CustomEvalDataLoader(AbsEvalDataLoader):
    method available_dataset_names (line 11) | def available_dataset_names(self) -> List[str]:
    method available_splits (line 14) | def available_splits(self, dataset_name: Optional[str] = None) -> List...

FILE: FlagEmbedding/evaluation/custom/runner.py
  class CustomEvalRunner (line 6) | class CustomEvalRunner(AbsEvalRunner):
    method load_data_loader (line 7) | def load_data_loader(self) -> CustomEvalDataLoader:

FILE: FlagEmbedding/evaluation/miracl/__main__.py
  function main (line 9) | def main():

FILE: FlagEmbedding/evaluation/miracl/data_loader.py
  class MIRACLEvalDataLoader (line 13) | class MIRACLEvalDataLoader(AbsEvalDataLoader):
    method available_dataset_names (line 17) | def available_dataset_names(self) -> List[str]:
    method available_splits (line 26) | def available_splits(self, dataset_name: str) -> List[str]:
    method _load_remote_corpus (line 41) | def _load_remote_corpus(
    method _load_remote_qrels (line 84) | def _load_remote_qrels(
    method _load_remote_queries (line 135) | def _load_remote_queries(

FILE: FlagEmbedding/evaluation/miracl/runner.py
  class MIRACLEvalRunner (line 6) | class MIRACLEvalRunner(AbsEvalRunner):
    method load_data_loader (line 10) | def load_data_loader(self) -> MIRACLEvalDataLoader:

FILE: FlagEmbedding/evaluation/mkqa/__main__.py
  function main (line 9) | def main():

FILE: FlagEmbedding/evaluation/mkqa/data_loader.py
  class MKQAEvalDataLoader (line 15) | class MKQAEvalDataLoader(AbsEvalDataLoader):
    method available_dataset_names (line 19) | def available_dataset_names(self) -> List[str]:
    method available_splits (line 28) | def available_splits(self, dataset_name: Optional[str] = None) -> List...
    method load_corpus (line 40) | def load_corpus(self, dataset_name: Optional[str] = None) -> datasets....
    method _load_local_qrels (line 56) | def _load_local_qrels(self, save_dir: str, dataset_name: Optional[str]...
    method _load_remote_corpus (line 89) | def _load_remote_corpus(
    method _load_remote_qrels (line 132) | def _load_remote_qrels(
    method _load_remote_queries (line 183) | def _load_remote_queries(

FILE: FlagEmbedding/evaluation/mkqa/evaluator.py
  class MKQAEvaluator (line 10) | class MKQAEvaluator(AbsEvaluator):
    method get_corpus_embd_save_dir (line 14) | def get_corpus_embd_save_dir(
    method evaluate_results (line 35) | def evaluate_results(
    method compute_metrics (line 87) | def compute_metrics(

FILE: FlagEmbedding/evaluation/mkqa/runner.py
  class MKQAEvalRunner (line 7) | class MKQAEvalRunner(AbsEvalRunner):
    method load_data_loader (line 11) | def load_data_loader(self) -> MKQAEvalDataLoader:
    method load_evaluator (line 26) | def load_evaluator(self) -> MKQAEvaluator:

FILE: FlagEmbedding/evaluation/mkqa/utils/compute_metrics.py
  class SimpleTokenizer (line 10) | class SimpleTokenizer:
    method __init__ (line 14) | def __init__(self):
    method tokenize (line 24) | def tokenize(self, text, uncased=False):
  function _normalize (line 33) | def _normalize(text):
  function has_answer (line 37) | def has_answer(answers, text, tokenizer) -> bool:
  function check_answer (line 51) | def check_answer(example, tokenizer) -> List[bool]:
  function evaluate_qa_recall (line 65) | def evaluate_qa_recall(ctxs, answers, k_values: Union[int, List[int]]=100):

FILE: FlagEmbedding/evaluation/mkqa/utils/normalize_text.py
  function normalize_text (line 133) | def normalize_text(text: str):

FILE: FlagEmbedding/evaluation/mldr/__main__.py
  function main (line 9) | def main():

FILE: FlagEmbedding/evaluation/mldr/data_loader.py
  class MLDREvalDataLoader (line 13) | class MLDREvalDataLoader(AbsEvalDataLoader):
    method available_dataset_names (line 17) | def available_dataset_names(self) -> List[str]:
    method available_splits (line 26) | def available_splits(self, dataset_name: Optional[str] = None) -> List...
    method _load_remote_corpus (line 38) | def _load_remote_corpus(
    method _load_remote_qrels (line 77) | def _load_remote_qrels(
    method _load_remote_queries (line 142) | def _load_remote_queries(

FILE: FlagEmbedding/evaluation/mldr/runner.py
  class MLDREvalRunner (line 6) | class MLDREvalRunner(AbsEvalRunner):
    method load_data_loader (line 10) | def load_data_loader(self) -> MLDREvalDataLoader:

FILE: FlagEmbedding/evaluation/msmarco/__main__.py
  function main (line 9) | def main():

FILE: FlagEmbedding/evaluation/msmarco/data_loader.py
  class MSMARCOEvalDataLoader (line 13) | class MSMARCOEvalDataLoader(AbsEvalDataLoader):
    method available_dataset_names (line 17) | def available_dataset_names(self) -> List[str]:
    method available_splits (line 26) | def available_splits(self, dataset_name: Optional[str] = None) -> List...
    method _load_remote_corpus (line 38) | def _load_remote_corpus(
    method _load_remote_qrels (line 104) | def _load_remote_qrels(
    method _load_remote_queries (line 198) | def _load_remote_queries(

FILE: FlagEmbedding/evaluation/msmarco/runner.py
  class MSMARCOEvalRunner (line 6) | class MSMARCOEvalRunner(AbsEvalRunner):
    method load_data_loader (line 10) | def load_data_loader(self) -> MSMARCOEvalDataLoader:

FILE: FlagEmbedding/evaluation/mteb/__main__.py
  function main (line 9) | def main():

FILE: FlagEmbedding/evaluation/mteb/arguments.py
  class MTEBEvalArgs (line 8) | class MTEBEvalArgs(AbsEvalArgs):

FILE: FlagEmbedding/evaluation/mteb/prompts.py
  function get_task_def_by_task_name_and_type (line 4) | def get_task_def_by_task_name_and_type(task_name: str, task_type: str) -...

FILE: FlagEmbedding/evaluation/mteb/runner.py
  function ensure_dir (line 17) | def ensure_dir(file_path):
  class MTEBEvalRunner (line 22) | class MTEBEvalRunner(AbsEvalRunner):
    method __init__ (line 26) | def __init__(
    method load_retriever_and_reranker (line 36) | def load_retriever_and_reranker(self) -> Tuple[MTEBEvalDenseRetriever,...
    method read_results (line 52) | def read_results(self, output_folder, tasks):
    method output_json (line 97) | def output_json(self, tasks_results, save_file):
    method run (line 136) | def run(self):

FILE: FlagEmbedding/evaluation/mteb/searcher.py
  class MTEBEvalDenseRetriever (line 7) | class MTEBEvalDenseRetriever(EvalDenseRetriever):
    method __init__ (line 11) | def __init__(self, embedder, **kwargs):
    method set_examples (line 14) | def set_examples(self, examples_for_task: Optional[List[dict]] = None):
    method set_instruction (line 22) | def set_instruction(self, instruction: Optional[str] = None):
    method get_instruction (line 30) | def get_instruction(self):
    method set_normalize_embeddings (line 38) | def set_normalize_embeddings(self, normalize_embeddings: bool = True):
    method stop_pool (line 46) | def stop_pool(self):
    method encode_queries (line 53) | def encode_queries(self, queries: List[str], **kwargs):
    method encode_corpus (line 67) | def encode_corpus(self, corpus: List[Dict[str, str]], **kwargs):
    method encode (line 85) | def encode(self, corpus: List[Dict[str, str]], **kwargs):
  class MTEBEvalReranker (line 103) | class MTEBEvalReranker(EvalReranker):
    method __init__ (line 107) | def __init__(self, reranker, **kwargs):

FILE: FlagEmbedding/finetune/embedder/decoder_only/base/__main__.py
  function main (line 11) | def main():

FILE: FlagEmbedding/finetune/embedder/decoder_only/base/arguments.py
  function default_target_modules (line 7) | def default_target_modules() -> List[int]:
  class DecoderOnlyEmbedderModelArguments (line 12) | class DecoderOnlyEmbedderModelArguments(AbsEmbedderModelArguments):

FILE: FlagEmbedding/finetune/embedder/decoder_only/base/load_model.py
  function find_largest_checkpoint (line 13) | def find_largest_checkpoint(checkpoint_dir):
  function get_model (line 38) | def get_model(model_args: DecoderOnlyEmbedderModelArguments, output_dir:...
  function save_merged_model (line 123) | def save_merged_model(model_args: DecoderOnlyEmbedderModelArguments, out...

FILE: FlagEmbedding/finetune/embedder/decoder_only/base/modeling.py
  class BiDecoderOnlyEmbedderModel (line 11) | class BiDecoderOnlyEmbedderModel(AbsEmbedderModel):
    method __init__ (line 27) | def __init__(
    method encode (line 50) | def encode(self, features):
    method _sentence_embedding (line 94) | def _sentence_embedding(self, last_hidden_state, attention_mask):
    method compute_score (line 129) | def compute_score(self, q_reps, p_reps):
    method _compute_similarity (line 143) | def _compute_similarity(self, q_reps, p_reps):
    method compute_loss (line 157) | def compute_loss(self, scores, target):
    method gradient_checkpointing_enable (line 169) | def gradient_checkpointing_enable(self, **kwargs):
    method enable_input_require_grads (line 175) | def enable_input_require_grads(self, **kwargs):
    method save (line 181) | def save(self, output_dir: str):

FILE: FlagEmbedding/finetune/embedder/decoder_only/base/runner.py
  class DecoderOnlyEmbedderRunner (line 17) | class DecoderOnlyEmbedderRunner(AbsEmbedderRunner):
    method __init__ (line 25) | def __init__(
    method load_tokenizer_and_model (line 36) | def load_tokenizer_and_model(self) -> Tuple[PreTrainedTokenizer, AbsEm...
    method load_trainer (line 102) | def load_trainer(self) -> DecoderOnlyEmbedderTrainer:
    method run (line 119) | def run(self):

FILE: FlagEmbedding/finetune/embedder/decoder_only/base/trainer.py
  class DecoderOnlyEmbedderTrainer (line 11) | class DecoderOnlyEmbedderTrainer(AbsEmbedderTrainer):
    method _save (line 15) | def _save(self, output_dir: Optional[str] = None, state_dict=None):

FILE: FlagEmbedding/finetune/embedder/decoder_only/icl/__main__.py
  function main (line 11) | def main():

FILE: FlagEmbedding/finetune/embedder/decoder_only/icl/arguments.py
  function default_target_modules (line 10) | def default_target_modules() -> List[int]:
  class DecoderOnlyEmbedderICLModelArguments (line 15) | class DecoderOnlyEmbedderICLModelArguments(AbsEmbedderModelArguments):
  class DecoderOnlyEmbedderICLDataArguments (line 83) | class DecoderOnlyEmbedderICLDataArguments(AbsEmbedderDataArguments):

FILE: FlagEmbedding/finetune/embedder/decoder_only/icl/dataset.py
  class DecoderOnlyEmbedderICLSameDatasetTrainDataset (line 17) | class DecoderOnlyEmbedderICLSameDatasetTrainDataset(AbsEmbedderSameDatas...
    method __init__ (line 28) | def __init__(
    method _create_batch_data (line 51) | def _create_batch_data(self, batch_raw_data):
  class AbsEmbedderSameDatasetCollator (line 201) | class AbsEmbedderSameDatasetCollator(DataCollatorWithPadding):
    method __call__ (line 214) | def __call__(self, features):

FILE: FlagEmbedding/finetune/embedder/decoder_only/icl/load_model.py
  function find_largest_checkpoint (line 13) | def find_largest_checkpoint(checkpoint_dir):
  function get_model (line 38) | def get_model(model_args: DecoderOnlyEmbedderICLModelArguments, output_d...
  function save_merged_model (line 123) | def save_merged_model(model_args: DecoderOnlyEmbedderICLModelArguments, ...

FILE: FlagEmbedding/finetune/embedder/decoder_only/icl/modeling.py
  class BiDecoderOnlyEmbedderICLModel (line 11) | class BiDecoderOnlyEmbedderICLModel(AbsEmbedderModel):
    method __init__ (line 27) | def __init__(
    method encode (line 50) | def encode(self, features):
    method _sentence_embedding (line 94) | def _sentence_embedding(self, last_hidden_state, attention_mask):
    method compute_score (line 129) | def compute_score(self, q_reps, p_reps):
    method _compute_similarity (line 143) | def _compute_similarity(self, q_reps, p_reps):
    method compute_loss (line 157) | def compute_loss(self, scores, target):
    method gradient_checkpointing_enable (line 169) | def gradient_checkpointing_enable(self, **kwargs):
    method enable_input_require_grads (line 175) | def enable_input_require_grads(self, **kwargs):
    method save (line 181) | def save(self, output_dir: str):

FILE: FlagEmbedding/finetune/embedder/decoder_only/icl/runner.py
  class DecoderOnlyEmbedderICLRunner (line 18) | class DecoderOnlyEmbedderICLRunner(AbsEmbedderRunner):
    method __init__ (line 26) | def __init__(
    method load_tokenizer_and_model (line 37) | def load_tokenizer_and_model(self) -> Tuple[PreTrainedTokenizer, AbsEm...
    method load_trainer (line 103) | def load_trainer(self) -> DecoderOnlyEmbedderICLTrainer:
    method load_train_dataset (line 120) | def load_train_dataset(self) -> DecoderOnlyEmbedderICLSameDatasetTrain...
    method run (line 144) | def run(self):

FILE: FlagEmbedding/finetune/embedder/decoder_only/icl/trainer.py
  class DecoderOnlyEmbedderICLTrainer (line 11) | class DecoderOnlyEmbedderICLTrainer(AbsEmbedderTrainer):
    method _save (line 15) | def _save(self, output_dir: Optional[str] = None, state_dict=None):

FILE: FlagEmbedding/finetune/embedder/encoder_only/base/__main__.py
  function main (line 11) | def main():

FILE: FlagEmbedding/finetune/embedder/encoder_only/base/modeling.py
  class BiEncoderOnlyEmbedderModel (line 11) | class BiEncoderOnlyEmbedderModel(AbsEmbedderModel):
    method __init__ (line 27) | def __init__(
    method encode (line 50) | def encode(self, features):
    method _sentence_embedding (line 93) | def _sentence_embedding(self, last_hidden_state, attention_mask):
    method compute_score (line 128) | def compute_score(self, q_reps, p_reps):
    method _compute_similarity (line 142) | def _compute_similarity(self, q_reps, p_reps):
    method compute_loss (line 156) | def compute_loss(self, scores, target):
    method gradient_checkpointing_enable (line 168) | def gradient_checkpointing_enable(self, **kwargs):
    method enable_input_require_grads (line 174) | def enable_input_require_grads(self, **kwargs):
    method save (line 180) | def save(self, output_dir: str):

FILE: FlagEmbedding/finetune/embedder/encoder_only/base/runner.py
  class EncoderOnlyEmbedderRunner (line 15) | class EncoderOnlyEmbedderRunner(AbsEmbedderRunner):
    method load_tokenizer_and_model (line 19) | def load_tokenizer_and_model(self) -> Tuple[PreTrainedTokenizer, AbsEm...
    method load_trainer (line 70) | def load_trainer(self) -> EncoderOnlyEmbedderTrainer:

FILE: FlagEmbedding/finetune/embedder/encoder_only/base/trainer.py
  class EncoderOnlyEmbedderTrainer (line 11) | class EncoderOnlyEmbedderTrainer(AbsEmbedderTrainer):
    method _save (line 15) | def _save(self, output_dir: Optional[str] = None, state_dict=None):

FILE: FlagEmbedding/finetune/embedder/encoder_only/m3/__main__.py
  function main (line 11) | def main():

FILE: FlagEmbedding/finetune/embedder/encoder_only/m3/arguments.py
  class EncoderOnlyEmbedderM3ModelArguments (line 10) | class EncoderOnlyEmbedderM3ModelArguments(AbsEmbedderModelArguments):
  class EncoderOnlyEmbedderM3TrainingArguments (line 18) | class EncoderOnlyEmbedderM3TrainingArguments(AbsEmbedderTrainingArguments):

FILE: FlagEmbedding/finetune/embedder/encoder_only/m3/modeling.py
  class EncoderOnlyEmbedderM3Model (line 15) | class EncoderOnlyEmbedderM3Model(AbsEmbedderModel):
    method __init__ (line 32) | def __init__(
    method _dense_embedding (line 75) | def _dense_embedding(self, last_hidden_state, attention_mask):
    method _sparse_embedding (line 110) | def _sparse_embedding(self, hidden_state, input_ids, return_embedding:...
    method _colbert_embedding (line 153) | def _colbert_embedding(self, last_hidden_state, mask):
    method compute_score (line 167) | def compute_score(
    method compute_dense_score (line 189) | def compute_dense_score(self, q_reps, p_reps):
    method compute_sparse_score (line 203) | def compute_sparse_score(self, q_reps, p_reps):
    method compute_colbert_score (line 217) | def compute_colbert_score(self, q_reps, p_reps, q_mask: torch.Tensor=N...
    method ensemble_score (line 233) | def ensemble_score(self, q_reps, p_reps, dense_scores=None, sparse_sco...
    method _encode (line 253) | def _encode(self, features):
    method encode (line 276) | def encode(self, features):
    method _compute_similarity (line 328) | def _compute_similarity(self, q_reps, p_reps):
    method _get_queries_attention_mask (line 342) | def _get_queries_attention_mask(self, queries: Union[Dict[str, Tensor]...
    method forward (line 368) | def forward(
    method compute_loss (line 472) | def compute_loss(self, scores, target):
    method gradient_checkpointing_enable (line 484) | def gradient_checkpointing_enable(self, **kwargs):
    method enable_input_require_grads (line 490) | def enable_input_require_grads(self, **kwargs):
    method save (line 496) | def save(self, output_dir: str):
  class EncoderOnlyEmbedderM3ModelForInference (line 518) | class EncoderOnlyEmbedderM3ModelForInference(EncoderOnlyEmbedderM3Model):
    method forward (line 522) | def forward(self,

FILE: FlagEmbedding/finetune/embedder/encoder_only/m3/runner.py
  class EncoderOnlyEmbedderM3Runner (line 22) | class EncoderOnlyEmbedderM3Runner(AbsEmbedderRunner):
    method __init__ (line 31) | def __init__(
    method get_model (line 43) | def get_model(
    method load_tokenizer_and_model (line 100) | def load_tokenizer_and_model(self) -> Tuple[PreTrainedTokenizer, AbsEm...
    method load_trainer (line 156) | def load_trainer(self) -> EncoderOnlyEmbedderM3Trainer:

FILE: FlagEmbedding/finetune/embedder/encoder_only/m3/trainer.py
  class EncoderOnlyEmbedderM3Trainer (line 11) | class EncoderOnlyEmbedderM3Trainer(AbsEmbedderTrainer):
    method _save (line 15) | def _save(self, output_dir: Optional[str] = None, state_dict=None):

FILE: FlagEmbedding/finetune/reranker/decoder_only/base/__main__.py
  function main (line 14) | def main():

FILE: FlagEmbedding/finetune/reranker/decoder_only/base/arguments.py
  function default_target_modules (line 7) | def default_target_modules() -> List[int]:
  class RerankerModelArguments (line 12) | class RerankerModelArguments(AbsRerankerModelArguments):

FILE: FlagEmbedding/finetune/reranker/decoder_only/base/load_model.py
  function find_largest_checkpoint (line 12) | def find_largest_checkpoint(checkpoint_dir):
  function get_model (line 37) | def get_model(model_args: RerankerModelArguments):
  function save_merged_model (line 106) | def save_merged_model(model_args: RerankerModelArguments, output_dir: str):

FILE: FlagEmbedding/finetune/reranker/decoder_only/base/modeling.py
  class CrossDecoderModel (line 10) | class CrossDecoderModel(AbsRerankerModel):
    method __init__ (line 19) | def __init__(
    method encode (line 31) | def encode(self, features):

FILE: FlagEmbedding/finetune/reranker/decoder_only/base/runner.py
  class DecoderOnlyRerankerRunner (line 19) | class DecoderOnlyRerankerRunner(AbsRerankerRunner):
    method __init__ (line 28) | def __init__(
    method load_tokenizer_and_model (line 36) | def load_tokenizer_and_model(self) -> Tuple[PreTrainedTokenizer, AbsRe...
    method load_trainer (line 81) | def load_trainer(self) -> DecoderOnlyRerankerTrainer:
    method run (line 96) | def run(self):

FILE: FlagEmbedding/finetune/reranker/decoder_only/base/trainer.py
  class DecoderOnlyRerankerTrainer (line 13) | class DecoderOnlyRerankerTrainer(AbsRerankerTrainer):
    method _save (line 17) | def _save(self, output_dir: Optional[str] = None, state_dict=None):

FILE: FlagEmbedding/finetune/reranker/decoder_only/layerwise/__main__.py
  function main (line 14) | def main():

FILE: FlagEmbedding/finetune/reranker/decoder_only/layerwise/arguments.py
  function default_target_modules (line 7) | def default_target_modules() -> List[int]:
  class RerankerModelArguments (line 12) | class RerankerModelArguments(AbsRerankerModelArguments):

FILE: FlagEmbedding/finetune/reranker/decoder_only/layerwise/configuration_minicpm_reranker.py
  class LayerWiseMiniCPMConfig (line 30) | class LayerWiseMiniCPMConfig(PretrainedConfig):
    method __init__ (line 116) | def __init__(
    method _rope_scaling_validation (line 189) | def _rope_scaling_validation(self):

FILE: FlagEmbedding/finetune/reranker/decoder_only/layerwise/load_model.py
  function find_largest_checkpoint (line 16) | def find_largest_checkpoint(checkpoint_dir):
  function get_model (line 41) | def get_model(model_args: RerankerModelArguments, only_for_one_logit):
  function save_merged_model (line 170) | def save_merged_model(model_args: RerankerModelArguments, output_dir: str):

FILE: FlagEmbedding/finetune/reranker/decoder_only/layerwise/modeling.py
  class CrossDecoderModel (line 12) | class CrossDecoderModel(AbsRerankerModel):
    method __init__ (line 22) | def __init__(
    method encode (line 37) | def encode(self, features):
    method forward (line 50) | def forward(self, pair: Union[Dict[str, Tensor], List[Dict[str, Tensor...

FILE: FlagEmbedding/finetune/reranker/decoder_only/layerwise/modeling_minicpm_reranker.py
  function _get_unpad_data (line 77) | def _get_unpad_data(attention_mask):
  function _expand_mask (line 89) | def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Option...
  function _make_causal_mask (line 96) | def _make_causal_mask(
  function rms_layernorm (line 108) | def rms_layernorm(hidden: torch.Tensor, weight: torch.Tensor, eps: float):
  class MiniCPMRMSNorm (line 115) | class MiniCPMRMSNorm(nn.Module):
    method __init__ (line 116) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 124) | def forward(self, hidden_states):
  class MiniCPMRotaryEmbedding (line 131) | class MiniCPMRotaryEmbedding(nn.Module):
    method __init__ (line 132) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method _set_cos_sin_cache (line 147) | def _set_cos_sin_cache(self, seq_len, device, dtype):
    method forward (line 157) | def forward(self, x, seq_len=None):
  class MiniCPMLinearScalingRotaryEmbedding (line 168) | class MiniCPMLinearScalingRotaryEmbedding(MiniCPMRotaryEmbedding):
    method __init__ (line 171) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method _set_cos_sin_cache (line 175) | def _set_cos_sin_cache(self, seq_len, device, dtype):
  class MiniCPMDynamicNTKScalingRotaryEmbedding (line 187) | class MiniCPMDynamicNTKScalingRotaryEmbedding(MiniCPMRotaryEmbedding):
    method __init__ (line 190) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method _set_cos_sin_cache (line 194) | def _set_cos_sin_cache(self, seq_len, device, dtype):
  function rotate_half (line 214) | def rotate_half(x):
  function apply_rotary_pos_emb (line 221) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1):
  class MiniCPMMLP (line 256) | class MiniCPMMLP(nn.Module):
    method __init__ (line 257) | def __init__(self, config):
    method forward (line 267) | def forward(self, x):
  function repeat_kv (line 290) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  class MiniCPMAttention (line 302) | class MiniCPMAttention(nn.Module):
    method __init__ (line 305) | def __init__(self, config: LayerWiseMiniCPMConfig, layer_idx: Optional...
    method _init_rope (line 338) | def _init_rope(self):
    method _shape (line 365) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
    method forward (line 368) | def forward(
  class MiniCPMFlashAttention2 (line 473) | class MiniCPMFlashAttention2(MiniCPMAttention):
    method __init__ (line 480) | def __init__(self, *args, **kwargs):
    method forward (line 488) | def forward(
    method _flash_attention_forward (line 576) | def _flash_attention_forward(
    method _upad_input (line 633) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m...
  class MiniCPMSdpaAttention (line 672) | class MiniCPMSdpaAttention(MiniCPMAttention):
    method forward (line 680) | def forward(
  class MiniCPMDecoderLayer (line 766) | class MiniCPMDecoderLayer(nn.Module):
    method __init__ (line 767) | def __init__(self, config: LayerWiseMiniCPMConfig, layer_idx: int):
    method forward (line 779) | def forward(
  class MiniCPMPreTrainedModel (line 862) | class MiniCPMPreTrainedModel(PreTrainedModel):
    method _init_weights (line 872) | def _init_weights(self, module):
  class LayerWiseMiniCPMModel (line 958) | class LayerWiseMiniCPMModel(MiniCPMPreTrainedModel):
    method __init__ (line 966) | def __init__(self, config: LayerWiseMiniCPMConfig):
    method get_input_embeddings (line 984) | def get_input_embeddings(self):
    method set_input_embeddings (line 987) | def set_input_embeddings(self, value):
    method forward (line 991) | def forward(
  class LayerWiseHead (line 1135) | class LayerWiseHead(nn.Module):
    method __init__ (line 1138) | def __init__(self, input_size, output_size):
    method forward (line 1142) | def forward(self, **kwargs):
  class LayerWiseMiniCPMForCausalLM (line 1145) | class LayerWiseMiniCPMForCausalLM(MiniCPMPreTrainedModel):
    method __init__ (line 1148) | def __init__(self, config):
    method get_input_embeddings (line 1191) | def get_input_embeddings(self):
    method set_input_embeddings (line 1194) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 1197) | def get_output_embeddings(self):
    method set_output_embeddings (line 1200) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 1203) | def set_decoder(self, decoder):
    method get_decoder (line 1206) | def get_decoder(self):
    method forward (line 1211) | def forward(
    method prepare_inputs_for_generation (line 1408) | def prepare_inputs_for_generation(
    method _reorder_cache (line 1465) | def _reorder_cache(past_key_values, beam_idx):
    method chat (line 1474) | def chat(self, tokenizer, query: str, history: List[Dict] = None, role...

FILE: FlagEmbedding/finetune/reranker/decoder_only/layerwise/runner.py
  class DecoderOnlyRerankerRunner (line 18) | class DecoderOnlyRerankerRunner(AbsRerankerRunner):
    method __init__ (line 27) | def __init__(
    method load_tokenizer_and_model (line 35) | def load_tokenizer_and_model(self) -> Tuple[PreTrainedTokenizer, AbsRe...
    method load_trainer (line 82) | def load_trainer(self) -> DecoderOnlyRerankerTrainer:
    method run (line 97) | def run(self):

FILE: FlagEmbedding/finetune/reranker/decoder_only/layerwise/trainer.py
  class DecoderOnlyRerankerTrainer (line 13) | class DecoderOnlyRerankerTrainer(AbsRerankerTrainer):
    method _save (line 17) | def _save(self, output_dir: Optional[str] = None, state_dict=None):

FILE: FlagEmbedding/finetune/reranker/encoder_only/base/__main__.py
  function main (line 11) | def main():

FILE: FlagEmbedding/finetune/reranker/encoder_only/base/modeling.py
  class CrossEncoderModel (line 9) | class CrossEncoderModel(AbsRerankerModel):
    method __init__ (line 17) | def __init__(
    method encode (line 29) | def encode(self, features):

FILE: FlagEmbedding/finetune/reranker/encoder_only/base/runner.py
  class EncoderOnlyRerankerRunner (line 15) | class EncoderOnlyRerankerRunner(AbsRerankerRunner):
    method load_tokenizer_and_model (line 19) | def load_tokenizer_and_model(self) -> Tuple[PreTrainedTokenizer, AbsRe...
    method load_trainer (line 63) | def load_trainer(self) -> EncoderOnlyRerankerTrainer:

FILE: FlagEmbedding/finetune/reranker/encoder_only/base/trainer.py
  class EncoderOnlyRerankerTrainer (line 11) | class EncoderOnlyRerankerTrainer(AbsRerankerTrainer):
    method _save (line 15) | def _save(self, output_dir: Optional[str] = None, state_dict=None):

FILE: FlagEmbedding/inference/auto_embedder.py
  class FlagAutoModel (line 13) | class FlagAutoModel:
    method __init__ (line 17) | def __init__(self):
    method from_finetuned (line 23) | def from_finetuned(

FILE: FlagEmbedding/inference/auto_reranker.py
  class FlagAutoReranker (line 14) | class FlagAutoReranker:
    method __init__ (line 18) | def __init__(self):
    method from_finetuned (line 24) | def from_finetuned(

FILE: FlagEmbedding/inference/embedder/decoder_only/base.py
  function last_token_pool (line 12) | def last_token_pool(last_hidden_states: torch.Tensor,
  class BaseLLMEmbedder (line 32) | class BaseLLMEmbedder(AbsEmbedder):
    method __init__ (line 58) | def __init__(
    method encode_queries (line 104) | def encode_queries(
    method encode_corpus (line 132) | def encode_corpus(
    method encode (line 160) | def encode(
    method encode_single_device (line 189) | def encode_single_device(

FILE: FlagEmbedding/inference/embedder/decoder_only/icl.py
  function last_token_pool (line 16) | def last_token_pool(last_hidden_states: torch.Tensor,
  class ICLLLMEmbedder (line 36) | class ICLLLMEmbedder(AbsEmbedder):
    method __init__ (line 66) | def __init__(
    method __del__ (line 124) | def __del__(self):
    method set_examples (line 128) | def set_examples(self, examples_for_task: Optional[List[dict]] = None):
    method get_detailed_example (line 163) | def get_detailed_example(instruction_format: str, instruction: str, qu...
    method stop_self_query_pool (line 179) | def stop_self_query_pool(self):
    method encode_queries (line 190) | def encode_queries(
    method encode_corpus (line 237) | def encode_corpus(
    method encode (line 266) | def encode(
    method _encode_queries_multi_process_worker (line 296) | def _encode_queries_multi_process_worker(
    method encode_queries_single_device (line 318) | def encode_queries_single_device(
    method encode_single_device (line 454) | def encode_single_device(

FILE: FlagEmbedding/inference/embedder/encoder_only/base.py
  class BaseEmbedder (line 11) | class BaseEmbedder(AbsEmbedder):
    method __init__ (line 40) | def __init__(
    method encode_queries (line 85) | def encode_queries(
    method encode_corpus (line 113) | def encode_corpus(
    method encode (line 141) | def encode(
    method encode_single_device (line 170) | def encode_single_device(
    method pooling (line 278) | def pooling(

FILE: FlagEmbedding/inference/embedder/encoder_only/m3.py
  class M3Embedder (line 20) | class M3Embedder(AbsEmbedder):
    method __init__ (line 50) | def __init__(
    method convert_id_to_token (line 106) | def convert_id_to_token(self, lexical_weights: List[Dict]):
    method compute_lexical_matching_score (line 129) | def compute_lexical_matching_score(
    method colbert_score (line 163) | def colbert_score(self, q_reps, p_reps):
    method encode_queries (line 179) | def encode_queries(
    method encode_corpus (line 221) | def encode_corpus(
    method encode (line 263) | def encode(
    method encode_single_device (line 306) | def encode_single_device(
    method compute_score (line 482) | def compute_score(
    method compute_score_multi_process (line 535) | def compute_score_multi_process(
    method _compute_score_multi_process_worker (line 571) | def _compute_score_multi_process_worker(
    method compute_score_single_device (line 593) | def compute_score_single_device(
    method _concatenate_results_from_multi_process (line 719) | def _concatenate_results_from_multi_process(
    method _concatenate_compute_score_results_from_multi_process (line 753) | def _concatenate_compute_score_results_from_multi_process(

FILE: FlagEmbedding/inference/embedder/model_mapping.py
  class EmbedderModelClass (line 10) | class EmbedderModelClass(Enum):
  class PoolingMethod (line 25) | class PoolingMethod(Enum):
  class EmbedderConfig (line 32) | class EmbedderConfig:
  function support_native_bge_model_list (line 268) | def support_native_bge_model_list()->List[str]:
  function support_model_list (line 271) | def support_model_list()->List[str]:

FILE: FlagEmbedding/inference/reranker/decoder_only/base.py
  function last_logit_pool (line 15) | def last_logit_pool(logits: Tensor,
  class DatasetForReranker (line 35) | class DatasetForReranker(Dataset):
    method __init__ (line 47) | def __init__(
    method __len__ (line 85) | def __len__(self):
    method __getitem__ (line 88) | def __getitem__(self, item):
  class Collater (line 122) | class Collater:
    method __init__ (line 130) | def __init__(self, tokenizer, max_len):
    method __call__ (line 138) | def __call__(self, data):
  class BaseLLMReranker (line 171) | class BaseLLMReranker(AbsReranker):
    method __init__ (line 199) | def __init__(
    method compute_score_single_gpu (line 257) | def compute_score_single_gpu(

FILE: FlagEmbedding/inference/reranker/decoder_only/layerwise.py
  function last_logit_pool_layerwise (line 18) | def last_logit_pool_layerwise(logits: Tensor,
  class LayerWiseLLMReranker (line 38) | class LayerWiseLLMReranker(AbsReranker):
    method __init__ (line 67) | def __init__(
    method compute_score_single_gpu (line 136) | def compute_score_single_gpu(

FILE: FlagEmbedding/inference/reranker/decoder_only/lightweight.py
  function last_logit_pool_lightweight (line 15) | def last_logit_pool_lightweight(logits: Tensor,
  class Collater_for_lightweight (line 35) | class Collater_for_lightweight:
    method __init__ (line 43) | def __init__(self, tokenizer, max_len):
    method __call__ (line 51) | def __call__(self, data):
  class LightweightLLMReranker (line 90) | class LightweightLLMReranker(AbsReranker):
    method __init__ (line 122) | def __init__(
    method compute_score_single_gpu (line 206) | def compute_score_single_gpu(

FILE: FlagEmbedding/inference/reranker/decoder_only/models/configuration_minicpm_reranker.py
  class LayerWiseMiniCPMConfig (line 30) | class LayerWiseMiniCPMConfig(PretrainedConfig):
    method __init__ (line 116) | def __init__(
    method _rope_scaling_validation (line 189) | def _rope_scaling_validation(self):

FILE: FlagEmbedding/inference/reranker/decoder_only/models/gemma_config.py
  class CostWiseGemmaConfig (line 26) | class CostWiseGemmaConfig(Gemma2Config):
    method __init__ (line 54) | def __init__(

FILE: FlagEmbedding/inference/reranker/decoder_only/models/gemma_model.py
  function _get_unpad_data (line 69) | def _get_unpad_data(attention_mask):
  class CostWiseGemma2PreTrainedModel (line 84) | class CostWiseGemma2PreTrainedModel(PreTrainedModel):
    method _init_weights (line 97) | def _init_weights(self, module):
  class CostWiseModelOutputWithPast (line 112) | class CostWiseModelOutputWithPast(ModelOutput):
  class CostWiseCausalLMOutputWithPast (line 120) | class CostWiseCausalLMOutputWithPast(ModelOutput):
  function token_compress (line 128) | def token_compress(compress_ratio,
  class CostWiseGemmaModel (line 237) | class CostWiseGemmaModel(CostWiseGemma2PreTrainedModel):
    method __init__ (line 245) | def __init__(self, config: CostWiseGemmaConfig):
    method get_input_embeddings (line 260) | def get_input_embeddings(self):
    method set_input_embeddings (line 263) | def set_input_embeddings(self, value):
    method forward (line 267) | def forward(
    method _update_causal_mask (line 434) | def _update_causal_mask(
  class CostWiseHead (line 479) | class CostWiseHead(nn.Module):
    method __init__ (line 482) | def __init__(self, input_size, output_size):
    method forward (line 486) | def forward(self, **kwargs):
  class CostWiseGemmaForCausalLM (line 490) | class CostWiseGemmaForCausalLM(CostWiseGemma2PreTrainedModel):
    method __init__ (line 493) | def __init__(self, config: CostWiseGemmaConfig):
    method get_input_embeddings (line 510) | def get_input_embeddings(self):
    method set_input_embeddings (line 513) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 516) | def get_output_embeddings(self):
    method set_output_embeddings (line 519) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 522) | def set_decoder(self, decoder):
    method get_decoder (line 525) | def get_decoder(self):
    method forward (line 530) | def forward(
    method prepare_inputs_for_generation (line 664) | def prepare_inputs_for_generation(
    method _reorder_cache (line 739) | def _reorder_cache(past_key_values, beam_idx):

FILE: FlagEmbedding/inference/reranker/decoder_only/models/modeling_minicpm_reranker.py
  function _get_unpad_data (line 80) | def _get_unpad_data(attention_mask):
  function _expand_mask (line 92) | def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Option...
  function _make_causal_mask (line 99) | def _make_causal_mask(
  function rms_layernorm (line 111) | def rms_layernorm(hidden: torch.Tensor, weight: torch.Tensor, eps: float):
  class MiniCPMRMSNorm (line 118) | class MiniCPMRMSNorm(nn.Module):
    method __init__ (line 119) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 127) | def forward(self, hidden_states):
  class MiniCPMRotaryEmbedding (line 134) | class MiniCPMRotaryEmbedding(nn.Module):
    method __init__ (line 135) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method _set_cos_sin_cache (line 150) | def _set_cos_sin_cache(self, seq_len, device, dtype):
    method forward (line 160) | def forward(self, x, seq_len=None):
  class MiniCPMLinearScalingRotaryEmbedding (line 171) | class MiniCPMLinearScalingRotaryEmbedding(MiniCPMRotaryEmbedding):
    method __init__ (line 174) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method _set_cos_sin_cache (line 178) | def _set_cos_sin_cache(self, seq_len, device, dtype):
  class MiniCPMDynamicNTKScalingRotaryEmbedding (line 190) | class MiniCPMDynamicNTKScalingRotaryEmbedding(MiniCPMRotaryEmbedding):
    method __init__ (line 193) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method _set_cos_sin_cache (line 197) | def _set_cos_sin_cache(self, seq_len, device, dtype):
  function rotate_half (line 217) | def rotate_half(x):
  function apply_rotary_pos_emb (line 224) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1):
  class MiniCPMMLP (line 259) | class MiniCPMMLP(nn.Module):
    method __init__ (line 260) | def __init__(self, config):
    method forward (line 270) | def forward(self, x):
  function repeat_kv (line 293) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  class MiniCPMAttention (line 305) | class MiniCPMAttention(nn.Module):
    method __init__ (line 308) | def __init__(self, config: LayerWiseMiniCPMConfig, layer_idx: Optional...
    method _init_rope (line 341) | def _init_rope(self):
    method _shape (line 368) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
    method forward (line 371) | def forward(
  class MiniCPMFlashAttention2 (line 476) | class MiniCPMFlashAttention2(MiniCPMAttention):
    method __init__ (line 483) | def __init__(self, *args, **kwargs):
    method forward (line 491) | def forward(
    method _flash_attention_forward (line 579) | def _flash_attention_forward(
    method _upad_input (line 636) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m...
  class MiniCPMSdpaAttention (line 675) | class MiniCPMSdpaAttention(MiniCPMAttention):
    method forward (line 683) | def forward(
  class MiniCPMDecoderLayer (line 769) | class MiniCPMDecoderLayer(nn.Module):
    method __init__ (line 770) | def __init__(self, config: LayerWiseMiniCPMConfig, layer_idx: int):
    method forward (line 782) | def forward(
  class MiniCPMPreTrainedModel (line 865) | class MiniCPMPreTrainedModel(PreTrainedModel):
    method _init_weights (line 875) | def _init_weights(self, module):
  class LayerWiseMiniCPMModel (line 961) | class LayerWiseMiniCPMModel(MiniCPMPreTrainedModel):
    method __init__ (line 969) | def __init__(self, config: LayerWiseMiniCPMConfig):
    method get_input_embeddings (line 987) | def get_input_embeddings(self):
    method set_input_embeddings (line 990) | def set_input_embeddings(self, value):
    method forward (line 994) | def forward(
  class LayerWiseHead (line 1138) | class LayerWiseHead(nn.Module):
    method __init__ (line 1141) | def __init__(self, input_size, output_size):
    method forward (line 1145) | def forward(self, **kwargs):
  class LayerWiseMiniCPMForCausalLM (line 1148) | class LayerWiseMiniCPMForCausalLM(MiniCPMPreTrainedModel):
    method __init__ (line 1151) | def __init__(self, config):
    method get_input_embeddings (line 1194) | def get_input_embeddings(self):
    method set_input_embeddings (line 1197) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 1200) | def get_output_embeddings(self):
    method set_output_embeddings (line 1203) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 1206) | def set_decoder(self, decoder):
    method get_decoder (line 1209) | def get_decoder(self):
    method forward (line 1214) | def forward(
    method prepare_inputs_for_generation (line 1411) | def prepare_inputs_for_generation(
    method _reorder_cache (line 1468) | def _reorder_cache(past_key_values, beam_idx):
    method chat (line 1477) | def chat(self, tokenizer, query: str, history: List[Dict] = None, role...

FILE: FlagEmbedding/inference/reranker/encoder_only/base.py
  function sigmoid (line 10) | def sigmoid(x):
  class BaseReranker (line 14) | class BaseReranker(AbsReranker):
    method __init__ (line 34) | def __init__(
    method compute_score_single_gpu (line 78) | def compute_score_single_gpu(

FILE: FlagEmbedding/inference/reranker/model_mapping.py
  class RerankerModelClass (line 10) | class RerankerModelClass(Enum):
  class RerankerConfig (line 26) | class RerankerConfig:

FILE: FlagEmbedding/utils/transformers_compat.py
  function is_torch_fx_available (line 10) | def is_torch_fx_available():
  function import_from_candidates (line 22) | def import_from_candidates(candidates, default=None):

FILE: Tutorials/4_Evaluation/utils/compute_metrics.py
  class SimpleTokenizer (line 10) | class SimpleTokenizer:
    method __init__ (line 14) | def __init__(self):
    method tokenize (line 24) | def tokenize(self, text, uncased=False):
  function _normalize (line 33) | def _normalize(text):
  function has_answer (line 37) | def has_answer(answers, text, tokenizer) -> bool:
  function check_answer (line 51) | def check_answer(example, tokenizer) -> List[bool]:
  function evaluate_qa_recall (line 65) | def evaluate_qa_recall(ctxs, answers, k_values: Union[int, List[int]]=100):

FILE: Tutorials/4_Evaluation/utils/normalize_text.py
  function normalize_text (line 133) | def normalize_text(text: str):

FILE: examples/inference/embedder/decoder_only/auto_base_multi_devices.py
  function test_base_multi_devices (line 5) | def test_base_multi_devices():

FILE: examples/inference/embedder/decoder_only/auto_base_single_device.py
  function test_base_single_device (line 5) | def test_base_single_device():

FILE: examples/inference/embedder/decoder_only/auto_icl_multi_devices.py
  function test_icl_multi_devices (line 5) | def test_icl_multi_devices():

FILE: examples/inference/embedder/decoder_only/auto_icl_single_device.py
  function test_icl_single_device (line 5) | def test_icl_single_device():

FILE: examples/inference/embedder/decoder_only/base_multi_devices.py
  function test_base_multi_devices (line 5) | def test_base_multi_devices():

FILE: examples/inference/embedder/decoder_only/base_single_device.py
  function test_base_single_device (line 5) | def test_base_single_device():

FILE: examples/inference/embedder/decoder_only/icl_multi_devices.py
  function test_icl_multi_devices (line 5) | def test_icl_multi_devices():

FILE: examples/inference/embedder/decoder_only/icl_single_device.py
  function test_icl_single_device (line 5) | def test_icl_single_device():

FILE: examples/inference/embedder/encoder_only/auto_base_multi_devices.py
  function test_base_multi_devices (line 5) | def test_base_multi_devices():

FILE: examples/inference/embedder/encoder_only/auto_base_single_device.py
  function test_base_single_device (line 5) | def test_base_single_device():

FILE: examples/inference/embedder/encoder_only/auto_m3_multi_devices.py
  function test_m3_multi_devices (line 5) | def test_m3_multi_devices():

FILE: examples/inference/embedder/encoder_only/auto_m3_single_device.py
  function test_m3_single_device (line 5) | def test_m3_single_device():

FILE: examples/inference/embedder/encoder_only/base_multi_devices.py
  function test_base_multi_devices (line 5) | def test_base_multi_devices():

FILE: examples/inference/embedder/encoder_only/base_single_device.py
  function test_base_single_device (line 5) | def test_base_single_device():

FILE: examples/inference/embedder/encoder_only/m3_multi_devices.py
  function test_m3_multi_devices (line 5) | def test_m3_multi_devices():

FILE: examples/inference/embedder/encoder_only/m3_multi_devices_compute_score.py
  function test_m3_multi_devices (line 5) | def test_m3_multi_devices():

FILE: examples/inference/embedder/encoder_only/m3_single_device.py
  function test_m3_single_device (line 5) | def test_m3_single_device():

FILE: examples/inference/embedder/encoder_only/m3_single_device_compute_score.py
  function test_m3_single_device (line 5) | def test_m3_single_device():

FILE: examples/inference/reranker/decoder_only/auto_base_multi_devices.py
  function test_base_multi_devices (line 5) | def test_base_multi_devices():

FILE: examples/inference/reranker/decoder_only/auto_base_single_device.py
  function test_base_multi_devices (line 5) | def test_base_multi_devices():

FILE: examples/inference/reranker/decoder_only/auto_layerwise_multi_devices.py
  function test_base_multi_devices (line 5) | def test_base_multi_devices():

FILE: examples/inference/reranker/decoder_only/auto_layerwise_single_device.py
  function test_base_multi_devices (line 5) | def test_base_multi_devices():

FILE: examples/inference/reranker/decoder_only/auto_lightweight_multi_devices.py
  function test_base_multi_devices (line 5) | def test_base_multi_devices():

FILE: examples/inference/reranker/decoder_only/auto_lightweight_single_device.py
  function test_base_multi_devices (line 5) | def test_base_multi_devices():

FILE: examples/inference/reranker/decoder_only/base_multi_devices.py
  function test_base_multi_devices (line 5) | def test_base_multi_devices():

FILE: examples/inference/reranker/decoder_only/base_single_device.py
  function test_base_multi_devices (line 5) | def test_base_multi_devices():

FILE: examples/inference/reranker/decoder_only/layerwise_multi_devices.py
  function test_base_multi_devices (line 5) | def test_base_multi_devices():

FILE: examples/inference/reranker/decoder_only/layerwise_single_device.py
  function test_base_multi_devices (line 5) | def test_base_multi_devices():

FILE: examples/inference/reranker/decoder_only/lightweight_multi_devices.py
  function test_base_multi_devices (line 5) | def test_base_multi_devices():

FILE: examples/inference/reranker/decoder_only/lightweight_single_device.py
  function test_base_multi_devices (line 5) | def test_base_multi_devices():

FILE: examples/inference/reranker/encoder_only/auto_base_multi_devices.py
  function test_base_multi_devices (line 5) | def test_base_multi_devices():

FILE: examples/inference/reranker/encoder_only/auto_base_single_device.py
  function test_base_multi_devices (line 5) | def test_base_multi_devices():

FILE: examples/inference/reranker/encoder_only/base_multi_devices.py
  function test_base_multi_devices (line 5) | def test_base_multi_devices():

FILE: examples/inference/reranker/encoder_only/base_single_device.py
  function test_base_multi_devices (line 5) | def test_base_multi_devices():

FILE: research/BGE_Coder/data_generation/constant.py
  class TaskType (line 6) | class TaskType(Enum):
  function get_task_def_by_task_type (line 65) | def get_task_def_by_task_type(task_type: Union[str, TaskType]) -> Tuple[...
  class Language (line 206) | class Language(Enum):
  class CodeLanguage (line 270) | class CodeLanguage(Enum):
  class Task (line 337) | class Task:
  function get_task (line 346) | def get_task(
  function get_pos_as_input_by_task_type (line 391) | def get_pos_as_input_by_task_type(task_type: TaskType) -> bool:
  function get_generation_prompt (line 423) | def get_generation_prompt(
  function get_quality_control_prompt (line 759) | def get_quality_control_prompt(
  class DocLength (line 1256) | class DocLength(Enum):
  function get_gen_hard_neg_prompt (line 1267) | def get_gen_hard_neg_prompt(task: Task, query: str, pos: str) -> str:

FILE: research/BGE_Coder/data_generation/corpus_generator.py
  class CorpusGenerator (line 11) | class CorpusGenerator:
    method __init__ (line 12) | def __init__(
    method _load_corpus (line 18) | def _load_corpus(self, corpus_dir: str, doc_length: List[str], externa...
    method run (line 80) | def run(

FILE: research/BGE_Coder/data_generation/format_generated_examples.py
  function format_generated_examples (line 7) | def format_generated_examples(
  function main (line 56) | def main():

FILE: research/BGE_Coder/data_generation/llm.py
  class LLM (line 11) | class LLM:
    method __init__ (line 12) | def __init__(
    method split_text (line 41) | def split_text(self, text: str, anchor_points: Tuple[float, float] = (...
    method chat (line 47) | def chat(

FILE: research/BGE_Coder/data_generation/run_generation.py
  function compute_md5 (line 18) | def compute_md5(text: str):
  function get_args (line 22) | def get_args():
  function gen_triplets (line 161) | def gen_triplets(
  function get_save_path (line 196) | def get_save_path(
  function save_triplets (line 213) | def save_triplets(
  function main (line 250) | def main(args):

FILE: research/BGE_Coder/data_generation/search.py
  function create_index (line 8) | def create_index(embeddings: np.ndarray, use_gpu: bool = False):
  function search (line 20) | def search(
  function get_top1 (line 45) | def get_top1(

FILE: research/BGE_Coder/data_generation/triplet_generator.py
  function compute_md5 (line 17) | def compute_md5(text: str):
  class TripletGenerator (line 21) | class TripletGenerator(LLM):
    method __init__ (line 22) | def __init__(
    method _gen_for_code_modification_retrieval (line 34) | def _gen_for_code_modification_retrieval(
    method _gen_for_code_comparison_retrieval (line 81) | def _gen_for_code_comparison_retrieval(
    method _gen_for_code_context_retrieval (line 126) | def _gen_for_code_context_retrieval(
    method _arrange_query_and_pos (line 146) | def _arrange_query_and_pos(task: Task, input_text: str, response: str):
    method _gen_for_normal_task (line 168) | def _gen_for_normal_task(
    method _gen_for_bug_desc_retrieval (line 208) | def _gen_for_bug_desc_retrieval(
    method _gen_for_two_step_not_use_last (line 253) | def _gen_for_two_step_not_use_last(
    method _gen_for_two_step_use_last (line 297) | def _gen_for_two_step_use_last(
    method generate_triplets (line 341) | def generate_triplets(
    method gen_hard_negatives (line 470) | def gen_hard_negatives(self, result: dict, task: Task, num_negatives: ...
    method run_single (line 485) | def run_single(
    method run (line 552) | def run(
    method run_for_gen_neg (line 605) | def run_for_gen_neg(

FILE: research/BGE_Coder/data_generation/utils.py
  function clean_content (line 4) | def clean_content(content: str):
  function clean_code (line 19) | def clean_code(code: str, lang: str, length_threshold: int = 30) -> str:

FILE: research/BGE_Coder/evaluation/coderag_eval/test/arguments.py
  class CodeRAGEvalArgs (line 9) | class CodeRAGEvalArgs:

FILE: research/BGE_Coder/evaluation/coderag_eval/test/create/code_search_net.py
  function document2code (line 8) | def document2code(data, split="train"):
  function main (line 25) | def main():

FILE: research/BGE_Coder/evaluation/coderag_eval/test/create/ds1000.py
  function download_source (line 16) | def download_source(source_dir):
  function download_dataset (line 32) | def download_dataset(source_dir):
  function get_dataset (line 47) | def get_dataset(source_dir, mode: str = "Completion", key: str = "All"):
  function document2code (line 65) | def document2code(data: list):
  function main (line 95) | def main():

FILE: research/BGE_Coder/evaluation/coderag_eval/test/create/general_programming.py
  function save_file_jsonl (line 25) | def save_file_jsonl(data, path):
  function main (line 31) | def main():

FILE: research/BGE_Coder/evaluation/coderag_eval/test/create/humaneval.py
  function document2code (line 8) | def document2code(data, split="test"):
  function main (line 25) | def main():

FILE: research/BGE_Coder/evaluation/coderag_eval/test/create/live_code_bench.py
  function get_queries (line 9) | def get_queries(data, split="test") -> list[dict]:
  function get_corpus (line 17) | def get_corpus(hf_name: str, cache_dir: str) -> list[dict]:
  function main (line 26) | def main():

FILE: research/BGE_Coder/evaluation/coderag_eval/test/create/mbpp.py
  function get_function_name (line 8) | def get_function_name(code: str) -> str:
  function document2code (line 19) | def document2code(data, split="test"):
  function main (line 36) | def main():

FILE: research/BGE_Coder/evaluation/coderag_eval/test/create/odex.py
  function document2code (line 12) | def document2code(data, split="test"):
  function main (line 43) | def main():

FILE: research/BGE_Coder/evaluation/coderag_eval/test/create/repoeval.py
  function iterate_repository (line 38) | def iterate_repository(base_dir: str, repo: str) -> dict:
  function window_overlap (line 61) | def window_overlap(span: tuple, target_span: tuple) -> bool:
  class RepoWindowMaker (line 67) | class RepoWindowMaker:
    method __init__ (line 68) | def __init__(self, base_dir, repo, tasks, window_size, slice_size):
    method _buid_windows_for_a_file (line 77) | def _buid_windows_for_a_file(self, fpath_tuple, code):
    method _merge_windows_with_same_context (line 102) | def _merge_windows_with_same_context(self, code_windows):
    method build_windows (line 116) | def build_windows(self):
  function download_data (line 144) | def download_data(directory: str = "repoeval"):
  function repo2code (line 177) | def repo2code(
  function main (line 221) | def main():

FILE: research/BGE_Coder/evaluation/coderag_eval/test/create/repoeval_repo.py
  function iterate_repository (line 38) | def iterate_repository(base_dir: str, repo: str) -> dict:
  function window_overlap (line 61) | def window_overlap(span: tuple, target_span: tuple) -> bool:
  class RepoWindowMaker (line 67) | class RepoWindowMaker:
    method __init__ (line 68) | def __init__(self, base_dir, repo, tasks, window_size, slice_size):
    method _buid_windows_for_a_file (line 77) | def _buid_windows_for_a_file(self, fpath_tuple, code):
    method _merge_windows_with_same_context (line 102) | def _merge_windows_with_same_context(self, code_windows):
    method build_windows (line 116) | def build_windows(self):
  function download_data (line 147) | def download_data(directory: str = "repoeval"):
  function repo2code (line 174) | def repo2code(
  function main (line 210) | def main():

FILE: research/BGE_Coder/evaluation/coderag_eval/test/create/swebench.py
  class ContextManager (line 18) | class ContextManager:
    method __init__ (line 19) | def __init__(self, repo_path, base_commit, verbose=False):
    method __enter__ (line 25) | def __enter__(self):
    method get_environment (line 40) | def get_environment(self):
    method get_readme_files (line 43) | def get_readme_files(self):
    method __exit__ (line 49) | def __exit__(self, exc_type, exc_val, exc_tb):
  class AutoContextManager (line 53) | class AutoContextManager(ContextManager):
    method __init__ (line 56) | def __init__(self, instance, root_dir=None, verbose=False, token=None):
    method __exit__ (line 77) | def __exit__(self, exc_type, exc_val, exc_tb):
  function ingest_files (line 83) | def ingest_files(filenames):
  function get_oracle_filenames (line 91) | def get_oracle_filenames(instance):
  function is_test (line 106) | def is_test(name, test_phrases=None):
  function list_files (line 112) | def list_files(root_dir, include_tests=False):
  function detect_encoding (line 120) | def detect_encoding(filename):
  function ingest_directory_contents (line 128) | def ingest_directory_contents(root_dir, include_tests=False):
  function get_file_contents (line 144) | def get_file_contents(input_instances, verbose: bool = False, tmp_dir: s...
  function document2code (line 173) | def document2code(data, split: str = "test"):
  function main (line 219) | def main():

FILE: research/BGE_Coder/evaluation/coderag_eval/test/create/swebench_repo.py
  class ContextManager (line 18) | class ContextManager:
    method __init__ (line 19) | def __init__(self, repo_path, base_commit, verbose=False):
    method __enter__ (line 25) | def __enter__(self):
    method get_environment (line 40) | def get_environment(self):
    method get_readme_files (line 43) | def get_readme_files(self):
    method __exit__ (line 49) | def __exit__(self, exc_type, exc_val, exc_tb):
  class AutoContextManager (line 53) | class AutoContextManager(ContextManager):
    method __init__ (line 56) | def __init__(self, instance, root_dir=None, verbose=False, token=None):
    method __exit__ (line 77) | def __exit__(self, exc_type, exc_val, exc_tb):
  function ingest_files (line 83) | def ingest_files(filenames):
  function get_oracle_filenames (line 91) | def get_oracle_filenames(instance):
  function is_test (line 106) | def is_test(name, test_phrases=None):
  function list_files (line 112) | def list_files(root_dir, include_tests=False):
  function detect_encoding (line 120) | def detect_encoding(filename):
  function ingest_directory_contents (line 128) | def ingest_directory_contents(root_dir, include_tests=False):
  function get_file_contents (line 144) | def get_file_contents(input_instances, verbose: bool = False, tmp_dir: s...
  function process_single_item (line 174) | def process_single_item(item, args):
  function main (line 226) | def main():

FILE: research/BGE_Coder/evaluation/coderag_eval/test/create/utils.py
  function load_jsonlines (line 5) | def load_jsonlines(file):
  function save_file_jsonl (line 10) | def save_file_jsonl(data, fp):
  function save_tsv_dict (line 14) | def save_tsv_dict(data, fp, fields):
  function cost_esitmate (line 25) | def cost_esitmate(path):

FILE: research/BGE_Coder/evaluation/coderag_eval/test/main.py
  function get_model (line 23) | def get_model(model_args: CodeRAGEvalModelArgs):
  function get_top_docs (line 102) | def get_top_docs(results: dict, corpus: dict, task_id: str, topk: int = ...
  function main (line 111) | def main(

FILE: research/BGE_Coder/evaluation/coderag_eval/test/prompts.py
  function get_task_def_by_task_name (line 4) | def get_task_def_by_task_name(task_name: str) -> str:

FILE: research/BGE_Coder/evaluation/coir_eval/arguments.py
  function coir_tasks (line 9) | def coir_tasks():
  class COIREvalArgs (line 35) | class COIREvalArgs:

FILE: research/BGE_Coder/evaluation/coir_eval/main.py
  function get_model (line 11) | def get_model(model_args: COIREvalModelArgs):
  function main (line 84) | def main(

FILE: research/BGE_Coder/evaluation/coir_eval/prompts.py
  function get_task_def_by_task_name (line 4) | def get_task_def_by_task_name(task_name: str) -> str:

FILE: research/BGE_M3/arguments.py
  class ModelArguments (line 9) | class ModelArguments:
  class DataArguments (line 29) | class DataArguments:
    method __post_init__ (line 82) | def __post_init__(self):
  class RetrieverTrainingArguments (line 88) | class RetrieverTrainingArguments(TrainingArguments):

FILE: research/BGE_M3/data.py
  class SameDatasetTrainDataset (line 16) | class SameDatasetTrainDataset(Dataset):
    method __init__ (line 19) | def __init__(self, args: DataArguments, batch_size: int, seed: int, pr...
    method print_batch_size (line 108) | def print_batch_size(self, batch_size: int, train_group_size: int):
    method get_file_batch_size (line 122) | def get_file_batch_size(file: str, batch_size: int, train_group_size: ...
    method refresh_epoch (line 170) | def refresh_epoch(self):
    method __getitem__ (line 188) | def __getitem__(self, _):
    method shuffle_text (line 198) | def shuffle_text(self, text):
    method create_batch_data (line 209) | def create_batch_data(self, batch_raw_data):
    method __len__ (line 248) | def __len__(self):
  class EmbedCollator (line 253) | class EmbedCollator(DataCollatorWithPadding):
    method __call__ (line 262) | def __call__(self, features):

FILE: research/BGE_M3/modeling.py
  class EncoderOutput (line 18) | class EncoderOutput(ModelOutput):
  class BGEM3Model (line 25) | class BGEM3Model(nn.Module):
    method __init__ (line 27) | def __init__(self,
    method load_model (line 69) | def load_model(self, model_name, colbert_dim: int = -1):
    method gradient_checkpointing_enable (line 91) | def gradient_checkpointing_enable(self, **kwargs):
    method dense_embedding (line 94) | def dense_embedding(self, hidden_state, mask):
    method sparse_embedding (line 102) | def sparse_embedding(self, hidden_state, input_ids, return_embedding: ...
    method colbert_embedding (line 132) | def colbert_embedding(self, last_hidden_state, mask):
    method dense_score (line 137) | def dense_score(self, q_reps, p_reps):
    method sparse_score (line 142) | def sparse_score(self, q_reps, p_reps):
    method colbert_score (line 147) | def colbert_score(self, q_reps, p_reps, q_mask: torch.Tensor):
    method _encode (line 154) | def _encode(self, features):
    method encode (line 167) | def encode(self, features, sub_batch_size=None):
    method compute_sub_batch_size (line 196) | def compute_sub_batch_size(self, features):
    method compute_similarity (line 203) | def compute_similarity(self, q_reps, p_reps):
    method distill_loss (line 208) | def distill_loss(self, teacher_targets, student_scores, group_size):
    method forward (line 223) | def forward(self, query: Dict[str, Tensor] = None, passage: Dict[str, ...
    method compute_loss (line 317) | def compute_loss(self, scores, target):
    method _dist_gather_tensor (line 320) | def _dist_gather_tensor(self, t: Optional[torch.Tensor]):
    method save (line 333) | def save(self, output_dir: str):
    method load_pooler (line 349) | def load_pooler(self, model_dir):
  class BGEM3ForInference (line 356) | class BGEM3ForInference(BGEM3Model):
    method forward (line 358) | def forward(self,

FILE: research/BGE_M3/run.py
  class TrainerCallbackForDataRefresh (line 28) | class TrainerCallbackForDataRefresh(TrainerCallback):
    method __init__ (line 29) | def __init__(self, train_dataset):
    method on_epoch_end (line 32) | def on_epoch_end(self, args: TrainingArguments, state: TrainerState, c...
  function main (line 39) | def main():

FILE: research/BGE_M3/split_data_by_length.py
  function get_args (line 24) | def get_args():
  class SplitByLengthHandler (line 38) | class SplitByLengthHandler:
    method __init__ (line 39) | def __init__(self,
    method _get_length_ranges_list (line 76) | def _get_length_ranges_list(length_list: list):
    method _process_dir (line 90) | def _process_dir(self, dir_path: str, output_dir: str):
    method _process_file (line 104) | def _process_file(self, file_path: str, output_path: str):
    method run (line 165) | def run(self, input_path: str, output_dir: str, log_name: str=None):

FILE: research/BGE_M3/trainer.py
  function save_ckpt_for_sentence_transformers (line 5) | def save_ckpt_for_sentence_transformers(ckpt_dir, pooling_mode: str = 'c...
  class BiTrainer (line 16) | class BiTrainer(Trainer):
    method _save (line 17) | def _save(self, output_dir: Optional[str] = None, state_dict=None):
    method compute_loss (line 41) | def compute_loss(self, model, inputs, return_outputs=False):

FILE: research/BGE_VL/eval/eval_Circo.py
  class Args (line 18) | class Args:
  function index (line 70) | def index(model: Flag_mmret, corpus: datasets.Dataset, batch_size: int =...
  function search (line 134) | def search(model: Flag_mmret, queries: datasets, faiss_index: faiss.Inde...
  function main (line 162) | def main():

FILE: research/BGE_VL/eval/eval_fashioniq.py
  class Args (line 23) | class Args:
  function index (line 71) | def index(model: Flag_mmret, corpus: datasets.Dataset, batch_size: int =...
  function search (line 135) | def search(model: Flag_mmret, queries: datasets, faiss_index: faiss.Inde...
  function evaluate (line 162) | def evaluate(preds, labels, cutoffs=[1,5,10,20,50,100]):
  function main (line 200) | def main():

FILE: research/BGE_VL/eval/flag_dataset.py
  class MMIT_Dataset (line 22) | class MMIT_Dataset(Dataset):
    method __init__ (line 23) | def __init__(self, captions, image_ids, image_dir, image_processor) ->...
    method __getitem__ (line 34) | def __getitem__(self, item):
    method __len__ (line 46) | def __len__(self):
  class MMIT_Collator (line 50) | class MMIT_Collator:
    method __init__ (line 51) | def __init__(self, tokenizer, caption_max_len):
    method __call__ (line 57) | def __call__(self, features):
  class Image_Dataset (line 79) | class Image_Dataset(Dataset):
    method __init__ (line 80) | def __init__(self, image_ids, image_dir, image_processor) -> None:
    method __getitem__ (line 85) | def __getitem__(self, item):
    method __len__ (line 91) | def __len__(self):
  class Image_Collator (line 94) | class Image_Collator:
    method __init__ (line 95) | def __init__(self, tokenizer, caption_max_len):
    method __call__ (line 100) | def __call__(self, features):

FILE: research/BGE_VL/eval/flag_mmret.py
  class Flag_mmret (line 11) | class Flag_mmret(nn.Module):
    method __init__ (line 12) | def __init__(
    method encode_queries (line 47) | def encode_queries(self, queries: Union[List[str], str],
    method encode_corpus (line 72) | def encode_corpus(self,
    method encode_text (line 90) | def encode_text(self, sentences: Union[List[str], str], batch_size: in...
    method encode_mm_it (line 123) | def encode_mm_it(self, captions: Union[List[str], str], image_ids: Uni...
    method encode_image (line 172) | def encode_image(self, image_ids: Union[List[str], str],  batch_size: ...

FILE: research/BGE_VL/modeling_MMRet_CLIP.py
  function contrastive_loss (line 59) | def contrastive_loss(logits: torch.Tensor) -> torch.Tensor:
  function clip_loss (line 63) | def clip_loss(similarity: torch.Tensor) -> torch.Tensor:
  function _get_vector_norm (line 69) | def _get_vector_norm(tensor: torch.Tensor) -> torch.Tensor:
  class CLIPVisionModelOutput (line 81) | class CLIPVisionModelOutput(ModelOutput):
  class CLIPTextModelOutput (line 110) | class CLIPTextModelOutput(ModelOutput):
  class CLIPOutput (line 139) | class CLIPOutput(ModelOutput):
    method to_tuple (line 168) | def to_tuple(self) -> Tuple[Any]:
  class CLIPVisionEmbeddings (line 175) | class CLIPVisionEmbeddings(nn.Module):
    method __init__ (line 176) | def __init__(self, config: CLIPVisionConfig):
    method forward (line 198) | def forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor:
  class CLIPTextEmbeddings (line 210) | class CLIPTextEmbeddings(nn.Module):
    method __init__ (line 211) | def __init__(self, config: CLIPTextConfig):
    method forward (line 223) | def forward(
  class CLIPAttention (line 243) | class CLIPAttention(nn.Module):
    method __init__ (line 246) | def __init__(self, config):
    method _shape (line 265) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
    method forward (line 268) | def forward(
  class CLIPFlashAttention2 (line 347) | class CLIPFlashAttention2(CLIPAttention):
    method __init__ (line 355) | def __init__(self, *args, **kwargs):
    method forward (line 364) | def forward(
  class CLIPSdpaAttention (line 434) | class CLIPSdpaAttention(CLIPAttention):
    method forward (line 442) | def forward(
  class CLIPMLP (line 514) | class CLIPMLP(nn.Module):
    method __init__ (line 515) | def __init__(self, config):
    method forward (line 522) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class CLIPEncoderLayer (line 529) | class CLIPEncoderLayer(nn.Module):
    method __init__ (line 530) | def __init__(self, config: CLIPConfig):
    method forward (line 538) | def forward(
  class CLIPPreTrainedModel (line 579) | class CLIPPreTrainedModel(PreTrainedModel):
    method _init_weights (line 591) | def _init_weights(self, module):
  class CLIPEncoder (line 748) | class CLIPEncoder(nn.Module):
    method __init__ (line 757) | def __init__(self, config: CLIPConfig):
    method forward (line 763) | def forward(
  class CLIPTextTransformer (line 845) | class CLIPTextTransformer(nn.Module):
    method __init__ (line 846) | def __init__(self, config: CLIPTextConfig):
    method forward (line 862) | def forward(
  class CLIPTextModel (line 949) | class CLIPTextModel(CLIPPreTrainedModel):
    method __init__ (line 954) | def __init__(self, config: CLIPTextConfig):
    method get_input_embeddings (line 960) | def get_input_embeddings(self) -> nn.Module:
    method set_input_embeddings (line 963) | def set_input_embeddings(self, value):
    method forward (line 968) | def forward(
  class CLIPVisionTransformer (line 1006) | class CLIPVisionTransformer(nn.Module):
    method __init__ (line 1007) | def __init__(self, config: CLIPVisionConfig):
    method forward (line 1019) | def forward(
  class CLIPVisionModel (line 1068) | class CLIPVisionModel(CLIPPreTrainedModel):
    method __init__ (line 1073) | def __init__(self, config: CLIPVisionConfig):
    method get_input_embeddings (line 1079) | def get_input_embeddings(self) -> nn.Module:
    method forward (line 1084) | def forward(
  class CLIPModel (line 1124) | class CLIPModel(CLIPPreTrainedModel):
    method __init__ (line 1128) | def __init__(self, config: CLIPConfig):
    method set_processor (line 1163) | def set_processor(self, model_name):
    method get_text_features (line 1167) | def get_text_features(
    method get_image_features (line 1214) | def get_image_features(
    method encode_image (line 1263) | def encode_image(self, images):
    method encode_text (line 1268) | def encode_text(self, text):
    method encode_multimodal (line 1273) | def encode_multimodal(self, images, text):
    method data_process (line 1282) | def data_process(self, images=None, text=None):
    method encode (line 1309) | def encode(self, images=None, text=None):
    method forward (line 1321) | def forward(
  class CLIPTextModelWithProjection (line 1421) | class CLIPTextModelWithProjection(CLIPPreTrainedModel):
    method __init__ (line 1426) | def __init__(self, config: CLIPTextConfig):
    method get_input_embeddings (line 1437) | def get_input_embeddings(self) -> nn.Module:
    method set_input_embeddings (line 1440) | def set_input_embeddings(self, value):
    method forward (line 1445) | def forward(
  class CLIPVisionModelWithProjection (line 1503) | class CLIPVisionModelWithProjection(CLIPPreTrainedModel):
    method __init__ (line 1507) | def __init__(self, config: CLIPVisionConfig):
    method get_input_embeddings (line 1518) | def get_input_embeddings(self) -> nn.Module:
    method forward (line 1523) | def forward(
  class CLIPForImageClassification (line 1583) | class CLIPForImageClassification(CLIPPreTrainedModel):
    method __init__ (line 1586) | def __init__(self, config: CLIPConfig) -> None:
    method forward (line 1610) | def forward(

FILE: research/C_MTEB/C_MTEB/tasks/Classification.py
  class TNews (line 7) | class TNews(AbsTaskClassification):
    method metadata_dict (line 76) | def metadata_dict(self) -> dict[str, str]:
  class IFlyTek (line 82) | class IFlyTek(AbsTaskClassification):
    method metadata_dict (line 152) | def metadata_dict(self) -> dict[str, str]:
  class MultilingualSentiment (line 159) | class MultilingualSentiment(AbsTaskClassification):
    method metadata_dict (line 186) | def metadata_dict(self) -> dict[str, str]:
  class JDReview (line 192) | class JDReview(AbsTaskClassification):
    method metadata_dict (line 224) | def metadata_dict(self) -> dict[str, str]:
  class OnlineShopping (line 230) | class OnlineShopping(AbsTaskClassification):
    method metadata_dict (line 262) | def metadata_dict(self) -> dict[str, str]:
  class Waimai (line 268) | class Waimai(AbsTaskClassification):
    method metadata_dict (line 300) | def metadata_dict(self) -> dict[str, str]:

FILE: research/C_MTEB/C_MTEB/tasks/Clustering.py
  class CLSClusteringFastS2S (line 17) | class CLSClusteringFastS2S(AbsTaskClusteringFast):
    method dataset_transform (line 56) | def dataset_transform(self):
  class CLSClusteringFastP2P (line 77) | class CLSClusteringFastP2P(AbsTaskClusteringFast):
    method dataset_transform (line 116) | def dataset_transform(self):
  class CLSClusteringS2S (line 137) | class CLSClusteringS2S(AbsTaskClustering):
  class CLSClusteringP2P (line 173) | class CLSClusteringP2P(AbsTaskClustering):
  class ThuNewsClusteringFastS2S (line 207) | class ThuNewsClusteringFastS2S(AbsTaskClusteringFast):
    method dataset_transform (line 246) | def dataset_transform(self):
  class ThuNewsClusteringFastP2P (line 267) | class ThuNewsClusteringFastP2P(AbsTaskClusteringFast):
    method dataset_transform (line 306) | def dataset_transform(self):
  class ThuNewsClusteringS2S (line 327) | class ThuNewsClusteringS2S(AbsTaskClustering):
  class ThuNewsClusteringP2P (line 370) | class ThuNewsClusteringP2P(AbsTaskClustering):

FILE: research/C_MTEB/C_MTEB/tasks/MultiLongDocRetrieval.py
  function load_mldr_data (line 11) | def load_mldr_data(path: str, langs: list, eval_splits: list, cache_dir:...
  class MultiLongDocRetrieval (line 31) | class MultiLongDocRetrieval(MultilingualTask, AbsTaskRetrieval):
    method description (line 33) | def description(self):
    method load_data (line 46) | def load_data(self, **kwargs):
    method evaluate (line 58) | def evaluate(

FILE: research/C_MTEB/C_MTEB/tasks/PairClassification.py
  class Ocnli (line 7) | class Ocnli(AbsTaskPairClassification):
    method dataset_transform (line 40) | def dataset_transform(self):
  class Cmnli (line 45) | class Cmnli(AbsTaskPairClassification):
    method dataset_transform (line 113) | def dataset_transform(self):

FILE: research/C_MTEB/C_MTEB/tasks/Reranking.py
  class T2Reranking (line 7) | class T2Reranking(AbsTaskReranking):
  class MMarcoReranking (line 42) | class MMarcoReranking(AbsTaskReranking):
  class CMedQAv1 (line 77) | class CMedQAv1(AbsTaskReranking):
  class CMedQAv2 (line 116) | class CMedQAv2(AbsTaskReranking):

FILE: research/C_MTEB/C_MTEB/tasks/Retrieval.py
  function load_retrieval_data (line 11) | def load_retrieval_data(dataset_path, dataset_revision, qrel_revision, e...
  class T2Retrieval (line 28) | class T2Retrieval(AbsTaskRetrieval):
    method load_data (line 75) | def load_data(self, **kwargs):
  class MMarcoRetrieval (line 88) | class MMarcoRetrieval(AbsTaskRetrieval):
    method load_data (line 135) | def load_data(self, **kwargs):
  class DuRetrieval (line 148) | class DuRetrieval(AbsTaskRetrieval):
    method load_data (line 193) | def load_data(self, **kwargs):
  class CovidRetrieval (line 206) | class CovidRetrieval(AbsTaskRetrieval):
    method load_data (line 244) | def load_data(self, **kwargs):
  class CmedqaRetrieval (line 257) | class CmedqaRetrieval(AbsTaskRetrieval):
    method load_data (line 295) | def load_data(self, **kwargs):
  class EcomRetrieval (line 308) | class EcomRetrieval(AbsTaskRetrieval):
    method load_data (line 348) | def load_data(self, **kwargs):
  class MedicalRetrieval (line 361) | class MedicalRetrieval(AbsTaskRetrieval):
    method load_data (line 401) | def load_data(self, **kwargs):
  class VideoRetrieval (line 414) | class VideoRetrieval(AbsTaskRetrieval):
    method load_data (line 454) | def load_data(self, **kwargs):

FILE: research/C_MTEB/C_MTEB/tasks/STS.py
  class ATEC (line 8) | class ATEC(AbsTaskSTS):
    method metadata_dict (line 54) | def metadata_dict(self) -> dict[str, str]:
  class BQ (line 61) | class BQ(AbsTaskSTS):
    method metadata_dict (line 96) | def metadata_dict(self) -> dict[str, str]:
  class LCQMC (line 103) | class LCQMC(AbsTaskSTS):
    method metadata_dict (line 138) | def metadata_dict(self) -> dict[str, str]:
  class PAWSX (line 145) | class PAWSX(AbsTaskSTS):
    method metadata_dict (line 180) | def metadata_dict(self) -> dict[str, str]:
  class STSB (line 187) | class STSB(AbsTaskSTS):
    method metadata_dict (line 222) | def metadata_dict(self) -> dict[str, str]:
  class AFQMC (line 229) | class AFQMC(AbsTaskSTS):
    method metadata_dict (line 275) | def metadata_dict(self) -> dict[str, str]:
  class QBQTC (line 282) | class QBQTC(AbsTaskSTS):
    method metadata_dict (line 309) | def metadata_dict(self) -> dict[str, str]:

FILE: research/C_MTEB/MKQA/dense_retrieval/step0-generate_embedding.py
  class ModelArgs (line 28) | class ModelArgs:
  class EvalArgs (line 48) | class EvalArgs:
  function get_model (line 67) | def get_model(model_args: ModelArgs):
  function parse_corpus (line 77) | def parse_corpus(corpus: datasets.Dataset):
  function generate_index (line 89) | def generate_index(model: FlagModel, corpus: datasets.Dataset, max_passa...
  function save_result (line 100) | def save_result(index: faiss.Index, docid: list, index_save_dir: str):
  function main (line 109) | def main():

FILE: research/C_MTEB/MKQA/dense_retrieval/step1-search_results.py
  class ModelArgs (line 28) | class ModelArgs:
  class EvalArgs (line 52) | class EvalArgs:
  function get_query_encoder (line 88) | def get_query_encoder(model_args: ModelArgs):
  function check_languages (line 104) | def check_languages(languages):
  function get_queries_and_qids (line 114) | def get_queries_and_qids(qa_data_dir: str, lang: str, add_instruction: b...
  function save_result (line 131) | def save_result(search_results, result_save_path: str, qids: list, max_h...
  function main (line 142) | def main():

FILE: research/C_MTEB/MKQA/dense_retrieval/step2-eval_dense_mkqa.py
  class EvalArgs (line 58) | class EvalArgs:
  function check_languages (line 99) | def check_languages(languages):
  function compute_average (line 109) | def compute_average(results: dict):
  function save_results (line 121) | def save_results(model_name: str, pooling_method: str, normalize_embeddi...
  function get_corpus_dict (line 141) | def get_corpus_dict():
  function get_qa_dict (line 152) | def get_qa_dict(qa_path: str):
  function get_search_result_dict (line 162) | def get_search_result_dict(search_result_path: str, top_k: int=100):
  function evaluate (line 181) | def evaluate(corpus_dict: dict, qa_dict: dict, search_result_path: str, ...
  function main (line 204) | def main():

FILE: research/C_MTEB/MKQA/hybrid_retrieval/step0-hybrid_search_results.py
  class EvalArgs (line 21) | class EvalArgs:
  function check_languages (line 61) | def check_languages(languages):
  function get_search_result_dict (line 71) | def get_search_result_dict(search_result_path: str, top_k: int=1000):
  function get_queries_dict (line 91) | def get_queries_dict(queries_path: str):
  function save_hybrid_results (line 100) | def save_hybrid_results(sparse_search_result_path: str, dense_search_res...
  function main (line 134) | def main():

FILE: research/C_MTEB/MKQA/hybrid_retrieval/step1-eval_hybrid_mkqa.py
  class EvalArgs (line 50) | class EvalArgs:
  function check_languages (line 91) | def check_languages(languages):
  function compute_average (line 101) | def compute_average(results: dict):
  function save_results (line 113) | def save_results(model_name: str, pooling_method: str, normalize_embeddi...
  function get_corpus_dict (line 133) | def get_corpus_dict():
  function get_qa_dict (line 144) | def get_qa_dict(qa_path: str):
  function get_search_result_dict (line 154) | def get_search_result_dict(search_result_path: str, top_k: int=100):
  function evaluate (line 173) | def evaluate(corpus_dict: dict, qa_dict: dict, search_result_path: str, ...
  function main (line 196) | def main():

FILE: research/C_MTEB/MKQA/multi_vector_rerank/hybrid_all_results.py
  class EvalArgs (line 23) | class EvalArgs:
  function check_languages (line 75) | def check_languages(languages):
  function get_search_result_dict (line 85) | def get_search_result_dict(search_result_path: str, top_k: int=1000):
  function get_queries_dict (line 105) | def get_queries_dict(queries_path: str):
  function save_hybrid_results (line 114) | def save_hybrid_results(sparse_search_result_dict: dict, dense_search_re...
  function main (line 151) | def main():

FILE: research/C_MTEB/MKQA/multi_vector_rerank/step0-rerank_results.py
  class ModelArgs (line 33) | class ModelArgs:
  class EvalArgs (line 53) | class EvalArgs:
  function check_languages (line 113) | def check_languages(languages):
  function get_reranker (line 123) | def get_reranker(model_args: ModelArgs, device: str=None):
  function get_search_result_dict (line 133) | def get_search_result_dict(search_result_path: str, top_k: int=100):
  function get_queries_dict (line 152) | def get_queries_dict(queries_path: str):
  function get_corpus_dict (line 162) | def get_corpus_dict(corpus: datasets.Dataset):
  function save_rerank_results (line 172) | def save_rerank_results(queries_dict: dict, corpus_dict: dict, reranker:...
  function get_shard (line 211) | def get_shard(search_result_dict: dict, num_shards: int, shard_id: int):
  function rerank_results (line 225) | def rerank_results(corpus_dict: dict, languages: list, eval_args: EvalAr...
  function main (line 277) | def main():

FILE: research/C_MTEB/MKQA/multi_vector_rerank/step1-eval_rerank_mkqa.py
  class EvalArgs (line 48) | class EvalArgs:
  function check_languages (line 85) | def check_languages(languages):
  function compute_average (line 95) | def compute_average(results: dict):
  function save_results (line 107) | def save_results(model_name: str, reranker_name: str, results: dict, sav...
  function get_corpus_dict (line 126) | def get_corpus_dict():
  function get_qa_dict (line 137) | def get_qa_dict(qa_path: str):
  function get_search_result_dict (line 147) | def get_search_result_dict(search_result_path: str, top_k: int=100):
  function evaluate (line 166) | def evaluate(corpus_dict: dict, qa_dict: dict, search_result_path: str, ...
  function main (line 189) | def main():

FILE: research/C_MTEB/MKQA/sparse_retrieval/bm25_baseline.py
  function generate_corpus (line 25) | def generate_corpus(corpus_save_path: str):
  function generate_queries (line 42) | def generate_queries(qa_data_dir: str, lang: str, queries_save_dir: str):
  function index (line 65) | def index(corpus_save_dir: str, index_save_dir: str):
  function search (line 77) | def search(index_save_dir: str, queries_save_dir: str, lang: str, result...
  function main (line 92) | def main():

FILE: research/C_MTEB/MKQA/sparse_retrieval/bm25_baseline_same_tokenizer.py
  function _map_func_corpus (line 32) | def _map_func_corpus(examples):
  function _map_func_query (line 51) | def _map_func_query(examples):
  function generate_corpus (line 70) | def generate_corpus(corpus_save_path: str):
  function generate_queries (line 90) | def generate_queries(qa_data_dir: str, lang: str, queries_save_dir: str):
  function index (line 115) | def index(corpus_save_dir: str, index_save_dir: str):
  function search (line 127) | def search(index_save_dir: str, queries_save_dir: str, lang: str, result...
  function main (line 141) | def main():

FILE: research/C_MTEB/MKQA/sparse_retrieval/step0-encode_query-and-corpus.py
  class ModelArgs (line 31) | class ModelArgs:
  class EvalArgs (line 51) | class EvalArgs:
  function get_model (line 82) | def get_model(model_args: ModelArgs):
  function check_languages (line 92) | def check_languages(languages):
  function parse_corpus (line 102) | def parse_corpus(corpus: datasets.Dataset):
  function get_queries (line 114) | def get_queries(qa_data_dir: str, lang: str):
  function encode_and_save_corpus (line 134) | def encode_and_save_corpus(corpus_save_path: str, model: BGEM3FlagModel,...
  function encode_and_save_queries (line 161) | def encode_and_save_queries(queries_save_path: str, model: BGEM3FlagMode...
  function main (line 191) | def main():

FILE: research/C_MTEB/MKQA/sparse_retrieval/step1-search_results.py
  class ModelArgs (line 20) | class ModelArgs:
  class EvalArgs (line 28) | class EvalArgs:
  function check_languages (line 64) | def check_languages(languages):
  function generate_index (line 74) | def generate_index(corpus_embd_dir: str, index_save_dir: str, threads: i...
  function search_and_save_results (line 87) | def search_and_save_results(index_save_dir: str, query_embd_path: str, r...
  function parse_corpus (line 101) | def parse_corpus(corpus: datasets.Dataset):
  function main (line 107) | def main():

FILE: research/C_MTEB/MKQA/sparse_retrieval/step2-eval_sparse_mkqa.py
  class EvalArgs (line 56) | class EvalArgs:
  function check_languages (line 97) | def check_languages(languages):
  function compute_average (line 107) | def compute_average(results: dict):
  function save_results (line 119) | def save_results(model_name: str, pooling_method: str, normalize_embeddi...
  function get_corpus_dict (line 139) | def get_corpus_dict():
  function get_qa_dict (line 150) | def get_qa_dict(qa_path: str):
  function get_search_result_dict (line 160) | def get_search_result_dict(search_result_path: str, top_k: int=100):
  function evaluate (line 179) | def evaluate(corpus_dict: dict, qa_dict: dict, search_result_path: str, ...
  function main (line 202) | def main():

FILE: research/C_MTEB/MKQA/utils/evaluation.py
  class SimpleTokenizer (line 8) | class SimpleTokenizer:
    method __init__ (line 12) | def __init__(self):
    method tokenize (line 22) | def tokenize(self, text, uncased=False):
  function _normalize (line 31) | def _normalize(text):
  function has_answer (line 35) | def has_answer(answers, text, tokenizer) -> bool:
  function check_answer (line 49) | def check_answer(example, tokenizer) -> List[bool]:
  function evaluate_recall_qa (line 63) | def evaluate_recall_qa(ctxs, answers, k=100):

FILE: research/C_MTEB/MKQA/utils/normalize_text.py
  function normalize (line 133) | def normalize(text):

FILE: research/C_MTEB/MLDR/dense_retrieval/step0-generate_embedding.py
  class ModelArgs (line 23) | class ModelArgs:
  class EvalArgs (line 43) | class EvalArgs:
  function get_model (line 67) | def get_model(model_args: ModelArgs):
  function check_languages (line 77) | def check_languages(languages):
  function load_corpus (line 87) | def load_corpus(lang: str):
  function generate_index (line 95) | def generate_index(model: FlagModel, corpus: datasets.Dataset, max_passa...
  function save_result (line 106) | def save_result(index: faiss.Index, docid: list, index_save_dir: str):
  function main (line 115) | def main():

FILE: research/C_MTEB/MLDR/dense_retrieval/step1-search_results.py
  class ModelArgs (line 24) | class ModelArgs:
  class EvalArgs (line 48) | class EvalArgs:
  function get_query_encoder (line 76) | def get_query_encoder(model_args: ModelArgs):
  function check_languages (line 92) | def check_languages(languages):
  function get_queries_and_qids (line 102) | def get_queries_and_qids(lang: str, split: str='test', add_instruction: ...
  function save_result (line 115) | def save_result(search_results, result_save_path: str, qids: list, max_h...
  function main (line 126) | def main():

FILE: research/C_MTEB/MLDR/dense_retrieval/step2-eval_dense_mldr.py
  class EvalArgs (line 49) | class EvalArgs:
  function check_languages (line 86) | def check_languages(languages):
  function compute_average (line 96) | def compute_average(results: dict):
  function save_results (line 108) | def save_results(model_name: str, pooling_method: str, normalize_embeddi...
  function map_metric (line 128) | def map_metric(metric: str):
  function evaluate (line 138) | def evaluate(script_path, qrels_path, search_result_path, metrics: list):
  function main (line 164) | def main():

FILE: research/C_MTEB/MLDR/hybrid_retrieval/step0-hybrid_search_results.py
  class EvalArgs (line 19) | class EvalArgs:
  function check_languages (line 55) | def check_languages(languages):
  function get_search_result_dict (line 65) | def get_search_result_dict(search_result_path: str, top_k: int=1000):
  function save_hybrid_results (line 85) | def save_hybrid_results(sparse_search_result_dict: dict, dense_search_re...
  function main (line 116) | def main():

FILE: research/C_MTEB/MLDR/hybrid_retrieval/step1-eval_hybrid_mldr.py
  class EvalArgs (line 39) | class EvalArgs:
  function check_languages (line 76) | def check_languages(languages):
  function compute_average (line 86) | def compute_average(results: dict):
  function save_results (line 98) | def save_results(model_name: str, pooling_method: str, normalize_embeddi...
  function map_metric (line 118) | def map_metric(metric: str):
  function evaluate (line 128) | def evaluate(script_path, qrels_path, search_result_path, metrics: list):
  function main (line 154) | def main():

FILE: research/C_MTEB/MLDR/mteb_dense_eval/eval_MLDR.py
  class EvalArgs (line 26) | class EvalArgs:
  class ModelArgs (line 43) | class ModelArgs:
  function check_languages (line 86) | def check_languages(languages):
  function main (line 98) | def main():

FILE: research/C_MTEB/MLDR/mteb_dense_eval/flag_dres_model.py
  function _transform_func (line 13) | def _transform_func(examples: Dict[str, List],
  function _transform_func_v2 (line 24) | def _transform_func_v2(examples: Dict[str, List],
  class FlagDRESModel (line 39) | class FlagDRESModel(DRESModel):
    method __init__ (line 40) | def __init__(
    method encode_queries (line 81) | def encode_queries(self, queries: List[str], **kwargs) -> np.ndarray:
    method encode_corpus (line 98) | def encode_corpus(self, corpus: List[Union[Dict[str, str], str]], **kw...
    method encode (line 116) | def encode(self, sentences: List[str], max_length: int, batch_size: in...
    method pooling (line 161) | def pooling(self,

FILE: research/C_MTEB/MLDR/multi_vector_rerank/hybrid_all_results.py
  class EvalArgs (line 21) | class EvalArgs:
  function check_languages (line 69) | def check_languages(languages):
  function get_search_result_dict (line 79) | def get_search_result_dict(search_result_path: str, top_k: int=1000):
  function save_hybrid_results (line 99) | def save_hybrid_results(sparse_search_result_dict: dict, dense_search_re...
  function main (line 136) | def main():

FILE: research/C_MTEB/MLDR/multi_vector_rerank/step0-rerank_results.py
  class ModelArgs (line 28) | class ModelArgs:
  class EvalArgs (line 48) | class EvalArgs:
  function check_languages (line 108) | def check_languages(languages):
  function get_reranker (line 118) | def get_reranker(model_args: ModelArgs, device: str=None):
  function get_search_result_dict (line 128) | def get_search_result_dict(search_result_path: str, top_k: int=200):
  function get_queries_dict (line 147) | def get_queries_dict(lang: str, split: str='test'):
  function get_corpus_dict (line 158) | def get_corpus_dict(lang: str):
  function save_rerank_results (line 169) | def save_rerank_results(queries_dict: dict, corpus_dict: dict, reranker:...
  function get_shard (line 207) | def get_shard(search_result_dict: dict, num_shards: int, shard_id: int):
  function rerank_results (line 221) | def rerank_results(languages: list, eval_args: EvalArgs, model_args: Mod...
  function main (line 278) | def main():

FILE: research/C_MTEB/MLDR/multi_vector_rerank/step1-eval_rerank_mldr.py
  class EvalArgs (line 41) | class EvalArgs:
  function check_languages (line 74) | def check_languages(languages):
  function compute_average (line 84) | def compute_average(results: dict):
  function save_results (line 96) | def save_results(model_name: str, reranker_name: str, results: dict, sav...
  function map_metric (line 115) | def map_metric(metric: str):
  function evaluate (line 125) | def evaluate(script_path: str, qrels_path, search_result_path, metrics: ...
  function merge_search_result (line 151) | def merge_search_result(search_result_save_dir: str, lang: str):
  function main (line 175) | def main():

FILE: research/C_MTEB/MLDR/sparse_retrieval/bm25_baseline.py
  function generate_corpus (line 19) | def generate_corpus(lang: str, corpus_save_dir: str):
  function generate_queries (line 32) | def generate_queries(lang: str, queries_save_dir: str, split: str='test'):
  function index (line 52) | def index(lang: str, corpus_save_dir: str, index_save_dir: str):
  function search (line 64) | def search(index_save_dir: str, queries_save_dir: str, lang: str, result...
  function main (line 79) | def main():

FILE: research/C_MTEB/MLDR/sparse_retrieval/bm25_baseline_same_tokenizer.py
  function _map_func_corpus (line 26) | def _map_func_corpus(examples):
  function _map_func_query (line 46) | def _map_func_query(examples):
  function generate_corpus (line 67) | def generate_corpus(lang: str, corpus_save_dir: str):
  function generate_queries (line 82) | def generate_queries(lang: str, queries_save_dir: str, split: str='test'):
  function index (line 104) | def index(corpus_save_dir: str, index_save_dir: str):
  function search (line 115) | def search(index_save_dir: str, queries_save_dir: str, lang: str, result...
  function main (line 129) | def main():

FILE: research/C_MTEB/MLDR/sparse_retrieval/step0-encode_query-and-corpus.py
  class ModelArgs (line 25) | class ModelArgs:
  class EvalArgs (line 45) | class EvalArgs:
  function get_model (line 77) | def get_model(model_args: ModelArgs):
  function check_languages (line 87) | def check_languages(languages):
  function load_corpus (line 97) | def load_corpus(lang: str):
  function get_queries (line 105) | def get_queries(lang: str, split: str='test'):
  function encode_corpus (line 119) | def encode_corpus(model: BGEM3FlagModel, corpus: datasets.Dataset, max_p...
  function encode_queries (line 143) | def encode_queries(model: BGEM3FlagModel, queries: datasets.Dataset, max...
  function save_result (line 170) | def save_result(encoded_queries_list: list, encoded_corpus_list: list, s...
  function main (line 185) | def main():

FILE: research/C_MTEB/MLDR/sparse_retrieval/step1-search_results.py
  class ModelArgs (line 16) | class ModelArgs:
  class EvalArgs (line 24) | class EvalArgs:
  function check_languages (line 56) | def check_languages(languages):
  function generate_index (line 66) | def generate_index(lang: str, corpus_embd_dir: str, index_save_dir: str,...
  function search_and_save_results (line 79) | def search_and_save_results(index_save_dir: str, query_embd_path: str, r...
  function main (line 93) | def main():

FILE: research/C_MTEB/MLDR/sparse_retrieval/step2-eval_sparse_mldr.py
  class EvalArgs (line 47) | class EvalArgs:
  function check_languages (line 84) | def check_languages(languages):
  function compute_average (line 94) | def compute_average(results: dict):
  function save_results (line 106) | def save_results(model_name: str, pooling_method: str, normalize_embeddi...
  function map_metric (line 130) | def map_metric(metric: str):
  function evaluate (line 140) | def evaluate(script_path, qrels_path, search_result_path, metrics: list):
  function main (line 166) | def main():

FILE: research/C_MTEB/eval_C-MTEB.py
  function get_args (line 18) | def get_args():

FILE: research/C_MTEB/eval_MTEB.py
  function get_args (line 16) | def get_args():

FILE: research/C_MTEB/eval_cross_encoder.py
  function get_args (line 9) | def get_args():

FILE: research/C_MTEB/flag_dres_model.py
  class FlagDRESModel (line 9) | class FlagDRESModel:
    method __init__ (line 10) | def __init__(
    method encode_queries (line 40) | def encode_queries(self, queries: List[str], **kwargs) -> np.ndarray:
    method encode_corpus (line 52) | def encode_corpus(self, corpus: List[Union[Dict[str, str], str]], **kw...
    method encode (line 65) | def encode(self, sentences: List[str], **kwargs) -> np.ndarray:
    method pooling (line 87) | def pooling(self,

FILE: research/C_MTEB/summarize_results.py
  function read_results (line 21) | def read_results(task_types, args):
  function output_markdown (line 47) | def output_markdown(tasks_results, model, save_file):
  function get_args (line 119) | def get_args():

FILE: research/LLARA/finetune/arguments.py
  function default_list (line 8) | def default_list() -> List[int]:
  class ModelArguments (line 13) | class ModelArguments:
  class DataArguments (line 83) | class DataArguments:
    method __post_init__ (line 153) | def __post_init__(self):
  class RetrieverTrainingArguments (line 158) | class RetrieverTrainingArguments(TrainingArguments):

FILE: research/LLARA/finetune/data.py
  class TrainDatasetForEmbedding (line 19) | class TrainDatasetForEmbedding(Dataset):
    method __init__ (line 20) | def __init__(
    method __len__ (line 52) | def __len__(self):
    method __getitem__ (line 56) | def __getitem__(self, item) -> Tuple[BatchEncoding, List[BatchEncoding]]:
  class EmbedCollator (line 96) | class EmbedCollator(DataCollatorForSeq2Seq):
    method __call__ (line 106) | def __call__(self, features, return_tensors='pt'):

FILE: research/LLARA/finetune/load_model.py
  function get_model (line 8) | def get_model(model_args):

FILE: research/LLARA/finetune/modeling.py
  class EncoderOutput (line 17) | class EncoderOutput(ModelOutput):
  class BiEncoderModel (line 24) | class BiEncoderModel(nn.Module):
    method __init__ (line 27) | def __init__(self,
    method gradient_checkpointing_enable (line 59) | def gradient_checkpointing_enable(self, **kwargs):
    method enable_input_require_grads (line 62) | def enable_input_require_grads(self, **kwargs):
    method encode (line 65) | def encode(self, features):
    method compute_similarity (line 107) | def compute_similarity(self, q_reps, p_reps):
    method forward (line 112) | def forward(self, query: Union[Dict[str, Tensor], List[Dict[str, Tenso...
    method compute_loss (line 144) | def compute_loss(self, scores, target):
    method _dist_gather_tensor (line 147) | def _dist_gather_tensor(self, t: Optional[torch.Tensor]):
    method save (line 159) | def save(self, output_dir: str):

FILE: research/LLARA/finetune/run.py
  function main (line 21) | def main():

FILE: research/LLARA/finetune/trainer.py
  class BiTrainer (line 4) | class BiTrainer(Trainer):
    method _save (line 5) | def _save(self, output_dir: Optional[str] = None, state_dict=None):
    method compute_loss (line 28) | def compute_loss(self, model, inputs, return_outputs=False):

FILE: research/LLARA/pretrain/arguments.py
  function default_list (line 8) | def default_list() -> List[int]:
  class ModelArguments (line 13) | class ModelArguments:
  class DataArguments (line 70) | class DataArguments:
    method __post_init__ (line 95) | def __post_init__(self):
  class PretrainTrainingArguments (line 100) | class PretrainTrainingArguments(TrainingArguments):

FILE: research/LLARA/pretrain/data.py
  class TrainDatasetForEmbedding (line 16) | class TrainDatasetForEmbedding(Dataset):
    method __init__ (line 17) | def __init__(
    method __len__ (line 52) | def __len__(self):
    method __getitem__ (line 55) | def __getitem__(self, item):
  class EmbedCollator (line 90) | class EmbedCollator(DataCollatorForSeq2Seq):
    method __call__ (line 98) | def __call__(self, features, return_tensors='pt'):

FILE: research/LLARA/pretrain/load_model.py
  function get_model (line 6) | def get_model(model_args, use_gradient_checkpointing: bool = False):

FILE: research/LLARA/pretrain/modeling.py
  class NewLlamaModel (line 20) | class NewLlamaModel(LlamaModel):
    method forward (line 23) | def forward(
    method _update_causal_mask (line 156) | def _update_causal_mask(
  class PreLlamaModel (line 236) | class PreLlamaModel(LlamaForCausalLM):
    method __init__ (line 237) | def __init__(self, config):
    method forward (line 266) | def forward(
  class PreModel (line 419) | class PreModel(nn.Module):
    method __init__ (line 420) | def __init__(self,
    method gradient_checkpointing_enable (line 426) | def gradient_checkpointing_enable(self, **kwargs):
    method enable_input_require_grads (line 429) | def enable_input_require_grads(self, **kwargs):
    method forward (line 432) | def forward(self, *args, **kwargs):
    method save (line 435) | def save(self, output_dir: str):

FILE: research/LLARA/pretrain/run.py
  function main (line 21) | def main():

FILE: research/LLARA/pretrain/trainer.py
  class PreTrainer (line 3) | class PreTrainer(Trainer):
    method _save (line 4) | def _save(self, output_dir: Optional[str] = None, state_dict=None):
    method compute_loss (line 21) | def compute_loss(self, model, inputs, return_outputs=False):

FILE: research/LM_Cocktail/LM_Cocktail/cocktail.py
  function save_ckpt_for_sentence_transformers (line 17) | def save_ckpt_for_sentence_transformers(ckpt_dir, pooling_mode: str = 'c...
  function mix_models (line 30) | def mix_models(model_names_or_paths: List[str],
  function mix_models_with_data (line 72) | def mix_models_with_data(model_names_or_paths: List[str],
  function mix_models_by_layers (line 125) | def mix_models_by_layers(model_names_or_paths: List[str],

FILE: research/LM_Cocktail/LM_Cocktail/utils.py
  function load_llm (line 14) | def load_llm(model_name:str, trust_remote_code:bool):
  function load_embedder (line 19) | def load_embedder(model_name:str, trust_remote_code:bool):
  function load_reranker (line 24) | def load_reranker(model_name:str, trust_remote_code:bool):
  function load_seq2seq_model (line 29) | def load_seq2seq_model(model_name:str, trust_remote_code:bool):
  function load_model (line 34) | def load_model(model_name:str, model_type:str, trust_remote_code:bool=Tr...
  function get_model_param_list (line 48) | def get_model_param_list(model_names: List[str], model_type:str):
  function merge_param (line 57) | def merge_param(model_param_list: List[Dict], weights: List[float]):
  function get_model_param_dirs (line 70) | def get_model_param_dirs(model_names: List[str], model_type:str):
  function merge_param_by_layer (line 95) | def merge_param_by_layer(model_param_dirs: List[str], weights: List[floa...
  function compute_weights (line 127) | def compute_weights(base_model, tokenizer, param_list: List[Dict], model...
  function preprocess_data_for_seq2seq (line 158) | def preprocess_data_for_seq2seq(example_data, tokenizer, device, batch_s...
  function preprocess_data_for_embedder (line 181) | def preprocess_data_for_embedder(example_data, tokenizer, device, batch_...
  function seq2seq_loss (line 207) | def seq2seq_loss(base_model, input_data):
  function embedder_loss (line 219) | def embedder_loss(base_model, input_data):
  function preprocess_data_for_llm (line 239) | def preprocess_data_for_llm(example_data, tokenizer, device, batch_size:...
  function llm_loss (line 275) | def llm_loss(base_model, input_data):

FILE: research/Long_LLM/activation_beacon/main/eval_generation.py
  class Args (line 18) | class Args(ModelArgs):
  function main (line 55) | def main():

FILE: research/Long_LLM/activation_beacon/main/eval_infbench.py
  class Args (line 23) | class Args(ModelArgs):
  function process_infbench (line 62) | def process_infbench(data, indices, tokenizer, chat_template, task:str, ...
  function main (line 97) | def main():

FILE: research/Long_LLM/activation_beacon/main/eval_lm.py
  class Args (line 16) | class Args(ModelArgs):
  function process_lm_pre (line 50) | def process_lm_pre(tokenizer, tokenize_max_char=None):
  function process_lm (line 62) | def process_lm(tokenizer, max_length=4096, stride=1024, min_length=None):
  function main (line 124) | def main():

FILE: research/Long_LLM/activation_beacon/main/eval_longbench.py
  class Args (line 22) | class Args(ModelArgs):
  function process_longbench (line 61) | def process_longbench(data, indices, tokenizer, chat_template, task, max...
  function main (line 96) | def main():

FILE: research/Long_LLM/activation_beacon/main/eval_mmlu.py
  class Args (line 22) | class Args(ModelArgs):
  function remove_eos (line 51) | def remove_eos(inputs: Mapping, eos_token_ids: Union[List,int]):
  function process_mmlu (line 64) | def process_mmlu(tokenizer, chat_template, eos_token_id, few_shot=0, tra...
  function evaluate_mmlu (line 138) | def evaluate_mmlu(eval_data, save_path, eval_preds):
  function main (line 182) | def main():

FILE: research/Long_LLM/activation_beacon/main/eval_msc.py
  class Args (line 23) | class Args(ModelArgs):
  function process_msc (line 48) | def process_msc(data, tokenizer, max_length, chat_template):
  function main (line 66) | def main():

FILE: research/Long_LLM/activation_beacon/main/eval_multiturn.py
  class Args (line 21) | class Args(ModelArgs):
  function process_multiturn (line 50) | def process_multiturn(data, indices, tokenizer, chat_template, min_lengt...
  function main (line 114) | def main():

FILE: research/Long_LLM/activation_beacon/main/eval_needle.py
  class Args (line 28) | class Args(ModelArgs):
    method __post_init__ (line 98) | def __post_init__(self):
  class OpenAIEvaluator (line 103) | class OpenAIEvaluator:
    method __init__ (line 113) | def __init__(self,
    method evaluate_response (line 147) | def evaluate_response(self, response: str) -> int:
  function generate_sample (line 170) | def generate_sample(
  function main (line 212) | def main():

FILE: research/Long_LLM/activation_beacon/main/eval_passkey.py
  class Args (line 28) | class Args(ModelArgs):
  function generate_sample (line 85) | def generate_sample(tokenizer, chat_template, context_length, passkey_de...
  function main (line 126) | def main():

FILE: research/Long_LLM/activation_beacon/main/eval_topic.py
  class Args (line 25) | class Args(ModelArgs):
  function process_topic_retrieval (line 54) | def process_topic_retrieval(data, tokenizer, chat_template, num_topic, t...
  function main (line 93) | def main():

FILE: research/Long_LLM/activation_beacon/main/infbench_utils.py
  function normalize_answer (line 12) | def normalize_answer(s: str) -> str:
  function normalize_zh_answer (line 31) | def normalize_zh_answer(s: str) -> str:
  function f1_score (line 48) | def f1_score(prediction, ground_truth) -> tuple[float, float, float]:
  function qa_f1_score (line 59) | def qa_f1_score(pred: str, ground_truths) -> float:
  function qa_f1_score_zh (line 78) | def qa_f1_score_zh(pred: str, ground_truths: list[str]) -> float:
  function load_json (line 100) | def load_json(fname):
  function iter_jsonl (line 104) | def iter_jsonl(fname, cnt=None):
  function first_int_match (line 117) | def first_int_match(prediction):
  function split_retrieval_answer (line 127) | def split_retrieval_answer(pred: str):
  function get_score_one_kv_retrieval (line 134) | def get_score_one_kv_retrieval(pred, label, model_name: str) -> bool:
  function get_score_one_passkey (line 141) | def get_score_one_passkey(pred, label, model_name: str) -> bool:
  function get_score_one_number_string (line 147) | def get_score_one_number_string(pred, label, model_name: str) -> bool:
  function get_score_one_code_run (line 153) | def get_score_one_code_run(pred, label, model_name: str) -> bool:
  function get_score_one_code_debug (line 172) | def get_score_one_code_debug(pred, label, model_name: str) -> bool:
  function get_score_one_math_find (line 208) | def get_score_one_math_find(pred, label, model_name: str) -> bool:
  function get_score_one_longdialogue_qa_eng (line 230) | def get_score_one_longdialogue_qa_eng(pred, label, model_name: str) -> b...
  function get_score_one_longbook_choice_eng (line 239) | def get_score_one_longbook_choice_eng(pred, label, model_name: str) -> b...
  function get_score_one_longbook_qa_eng (line 280) | def get_score_one_longbook_qa_eng(pred, label, model_name: str) -> float:
  function get_score_one_longbook_sum_eng (line 284) | def get_score_one_longbook_sum_eng(
  function get_score_one_longbook_qa_chn (line 297) | def get_score_one_longbook_qa_chn(pred, label, model_name: str) -> float:
  function get_score_one_math_calc (line 301) | def get_score_one_math_calc(pred, label, model_name: str) -> float:
  function get_score_one (line 326) | def get_score_one(
  function get_labels (line 359) | def get_labels(preds: list) -> list[str]:
  function get_preds (line 367) | def get_preds(preds: list, data_name: str) -> list[str]:
  function get_score (line 382) | def get_score(
  function compute_scores (line 396) | def compute_scores(preds_path, data_name: str, model_name: str):
  function create_prompt (line 406) | def create_prompt(eg: dict, data_name: str, prompt_template: str) -> str:
  function get_answer (line 516) | def get_answer(eg: dict, data_name: str):

FILE: research/Long_LLM/activation_beacon/main/longbench_utils.py
  function normalize_answer (line 12) | def normalize_answer(s):
  function normalize_zh_answer (line 31) | def normalize_zh_answer(s):
  function count_score (line 47) | def count_score(prediction, ground_truth, **kwargs):
  function retrieval_score (line 56) | def retrieval_score(prediction, ground_truth, **kwargs):
  function retrieval_zh_score (line 68) | def retrieval_zh_score(prediction, ground_truth, **kwargs):
  function code_sim_score (line 80) | def code_sim_score(prediction, ground_truth, **kwargs):
  function classification_score (line 89) | def classification_score(prediction, ground_truth, **kwargs):
  function rouge_score (line 114) | def rouge_score(prediction, ground_truth, **kwargs):
  function rouge_score_zh (line 122) | def rouge_score_zh(prediction, ground_truth, **kwargs):
  function f1_score (line 128) | def f1_score(prediction, ground_truth, **kwargs):
  function qa_f1_score (line 138) | def qa_f1_score(prediction, ground_truth, **kwargs):
  function qa_f1_score_zh (line 147) | def qa_f1_score_zh(prediction, ground_truth, **kwargs):
  function scorer (line 156) | def scorer(dataset, predictions, answers, all_classes):

FILE: research/Long_LLM/activation_beacon/main/pretrain_data.py
  class Args (line 22) | class Args(ModelArgs):
  function prepare_pretrain_data (line 54) | def prepare_pretrain_data(data_files, tokenizer: PreTrainedTokenizer, co...

FILE: research/Long_LLM/activation_beacon/main/train.py
  function main (line 20) | def main():

FILE: research/Long_LLM/activation_beacon/src/__init__.py
  function get_model_and_tokenizer (line 15) | def get_model_and_tokenizer(model_args, device="cpu", evaluation_mode=Tr...

FILE: research/Long_LLM/activation_beacon/src/args.py
  class ModelArgs (line 9) | class ModelArgs:
    method resolve_path (line 204) | def resolve_path(self, path):
    method get_generation_config (line 218) | def get_generation_config(self):
    method to_dict (line 230) | def to_dict(self):
    method save (line 233) | def save(self, path):
    method __post_init__ (line 237) | def __post_init__(self):
  class TrainingArgs (line 259) | class TrainingArgs(TrainingArguments):
    method __post_init__ (line 374) | def __post_init__(self):

FILE: research/Long_LLM/activation_beacon/src/chat.py
  class ChatTemplateOutput (line 17) | class ChatTemplateOutput:
  function mask_nested_lists (line 22) | def mask_nested_lists(lst, mask_target, mask_value=0):
  function apply_chat_template (line 31) | def apply_chat_template(template, messages, system_message=None, tokeniz...
  class SeparatorStyle (line 223) | class SeparatorStyle(IntEnum):
  class Conversation (line 255) | class Conversation:
    method get_prompt (line 280) | def get_prompt(self) -> str:
    method get_images (line 529) | def get_images(self):
    method set_system_message (line 539) | def set_system_message(self, system_message: str):
    method get_system_message (line 543) | def get_system_message(self):
    method append_message (line 547) | def append_message(self, role: str, message: str):
    method update_last_message (line 551) | def update_last_message(self, message: str):
    method convert_image_to_base64 (line 559) | def convert_image_to_base64(self, image):
    method to_gradio_chatbot (line 594) | def to_gradio_chatbot(self):
    method to_openai_api_messages (line 610) | def to_openai_api_messages(self):
    method extract_text_from_messages (line 625) | def extract_text_from_messages(self):
    method copy (line 631) | def copy(self):
    method dict (line 646) | def dict(self):
  function register_conv_template (line 660) | def register_conv_template(template: Conversation, override: bool = False):
  function get_conv_template (line 670) | def get_conv_template(name: str) -> Conversation:

FILE: research/Long_LLM/activation_beacon/src/data.py
  class Data (line 18) | class Data:
    method _process_pretrain_data (line 19) | def _process_pretrain_data(data, indices):
    method _process_language_modeling (line 28) | def _process_language_modeling(data, indices, tokenizer, min_length, m...
    method _process_instruction_tuning (line 54) | def _process_instruction_tuning(data, indices, tokenizer, chat_templat...
    method prepare_train_data (line 102) | def prepare_train_data(data_files=None, tokenizer=None, max_length=409...
    method prepare_eval_data (line 174) | def prepare_eval_data(data_files=None, tokenizer=None, max_length=4096...

FILE: research/Long_LLM/activation_beacon/src/llama/configuration_llama.py
  class LlamaConfig (line 31) | class LlamaConfig(PretrainedConfig):
    method __init__ (line 117) | def __init__(
    method _rope_scaling_validation (line 196) | def _rope_scaling_validation(self):

FILE: research/Long_LLM/activation_beacon/src/llama/modeling_llama.py
  function _get_unpad_data (line 63) | def _get_unpad_data(attention_mask):
  class LlamaRMSNorm (line 76) | class LlamaRMSNorm(nn.Module):
    method __init__ (line 77) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 85) | def forward(self, hidden_states):
  class LlamaMLP (line 94) | class LlamaMLP(nn.Module):
    method __init__ (line 95) | def __init__(self, config):
    method forward (line 105) | def forward(self, x):
  function repeat_kv (line 110) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  class LlamaAttention (line 122) | class LlamaAttention(nn.Module):
    method __init__ (line 125) | def __init__(self, config: LlamaConfig, layer_idx: Optional[int] = None):
    method _init_beacon_proj (line 179) | def _init_beacon_proj(self, missing_keys):
    method _shape (line 253) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
    method qkv_proj_with_beacon (line 329) | def qkv_proj_with_beacon(self, hidden_states, beacon_size, beacon_indi...
    method o_proj_with_beacon (line 375) | def o_proj_with_beacon(self, attn_output, beacon_size, beacon_indices):
    method forward (line 390) | def forward(
  class LlamaSdpaAttention (line 473) | class LlamaSdpaAttention(LlamaAttention):
    method forward (line 481) | def forward(
  class LlamaFlashAttention2 (line 563) | class LlamaFlashAttention2(LlamaAttention):
    method __init__ (line 570) | def __init__(self, *args, **kwargs):
    method forward (line 578) | def forward(
    method _flash_attention_forward (line 667) | def _flash_attention_forward(
    method _upad_input (line 726) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m...
  class LlamaDecoderLayer (line 772) | class LlamaDecoderLayer(nn.Module):
    method __init__ (line 773) | def __init__(self, config: LlamaConfig, layer_idx: int):
    method forward (line 783) | def forward(
  class LlamaPreTrainedModel (line 865) | class LlamaPreTrainedModel(PreTrainedModel):
    method _init_weights (line 875) | def _init_weights(self, module):
  class LlamaModel (line 961) | class LlamaModel(LlamaPreTrainedModel):
    method __init__ (line 969) | def __init__(self, config: LlamaConfig):
    method _init_beacon_embed (line 990) | def _init_beacon_embed(self, missing_keys):
    method get_input_embeddings (line 1021) | def get_input_embeddings(self):
    method set_input_embeddings (line 1024) | def set_input_embeddings(self, value):
    method forward (line 1028) | def forward(
  class LlamaForCausalLM (line 1148) | class LlamaForCausalLM(LlamaPreTrainedModel):
    method __init__ (line 1151) | def __init__(self, config):
    method get_input_embeddings (line 1159) | def get_input_embeddings(self):
    method set_input_embeddings (line 1162) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 1165) | def get_output_embeddings(self):
    method set_output_embeddings (line 1168) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 1171) | def set_decoder(self, decoder):
    method get_decoder (line 1174) | def get_decoder(self):
    method from_pretrained (line 1178) | def from_pretrained(cls, *args, **kwargs):
    method _native_forward (line 1201) | def _native_forward(
    method _beacon_forward (line 1263) | def _beacon_forward(self,
    method forward (line 1335) | def forward(self, **kwargs):
    method prepare_inputs_for_generation (line 1346) | def prepare_inputs_for_generation(
    method _reorder_cache (line 1377) | def _reorder_cache(past_key_values, beam_idx):

FILE: research/Long_LLM/activation_beacon/src/metrics.py
  class Metric (line 14) | class Metric:
    method get_metric_fn (line 17) | def get_metric_fn(cls, metrics, **kwds):
    method get_save_path (line 40) | def get_save_path(eval_data, output_dir=None, field="result", save_nam...
    method save_result (line 57) | def save_result(preds, labels, save_path, indices=None, **kwargs):
    method rouge (line 73) | def rouge(preds, labels, **kwargs):

FILE: research/Long_LLM/activation_beacon/src/mistral/configuration_mistral.py
  class MistralConfig (line 29) | class MistralConfig(PretrainedConfig):
    method __init__ (line 104) | def __init__(
    method _rope_scaling_validation (line 181) | def _rope_scaling_validation(self):

FILE: research/Long_LLM/activation_beacon/src/mistral/modeling_mistral.py
  function _get_unpad_data (line 63) | def _get_unpad_data(attention_mask):
  class MistralRMSNorm (line 76) | class MistralRMSNorm(nn.Module):
    method __init__ (line 77) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 85) | def forward(self, hidden_states):
  class MistralMLP (line 94) | class MistralMLP(nn.Module):
    method __init__ (line 95) | def __init__(self, config):
    method forward (line 105) | def forward(self, x):
  function repeat_kv (line 110) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  class MistralAttention (line 122) | class MistralAttention(nn.Module):
    method __init__ (line 125) | def __init__(self, config: MistralConfig, layer_idx: Optional[int] = N...
    method _init_beacon_proj (line 178) | def _init_beacon_proj(self, missing_keys):
    method _shape (line 252) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
    method qkv_proj_with_beacon (line 255) | def qkv_proj_with_beacon(self, hidden_states, beacon_size, beacon_indi...
    method o_proj_with_beacon (line 301) | def o_proj_with_beacon(self, attn_output, beacon_size, beacon_indices):
    method forward (line 316) | def forward(
  class MistralSdpaAttention (line 399) | class MistralSdpaAttention(MistralAttention):
    method forward (line 407) | def forward(
  class MistralFlashAttention2 (line 489) | class MistralFlashAttention2(MistralAttention):
    method __init__ (line 496) | def __init__(self, *args, **kwargs):
    method forward (line 504) | def forward(
    method _flash_attention_forward (line 596) | def _flash_attention_forward(
    method _upad_input (line 655) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m...
  class MistralDecoderLayer (line 701) | class MistralDecoderLayer(nn.Module):
    method __init__ (line 702) | def __init__(self, config: MistralConfig, layer_idx: int):
    method forward (line 717) | def forward(
  class MistralPreTrainedModel (line 798) | class MistralPreTrainedModel(PreTrainedModel):
    method _init_weights (line 808) | def _init_weights(self, module):
  class MistralModel (line 894) | class MistralModel(MistralPreTrainedModel):
    method __init__ (line 902) | def __init__(self, config: MistralConfig):
    method _init_beacon_embed (line 923) | def _init_beacon_embed(self, missing_keys):
    method get_input_embeddings (line 954) | def get_input_embeddings(self):
    method set_input_embeddings (line 957) | def set_input_embeddings(self, value):
    method forward (line 961) | def forward(
  class MistralForCausalLM (line 1081) | class MistralForCausalLM(MistralPreTrainedModel):
    method __init__ (line 1084) | def __init__(self, config):
    method get_input_embeddings (line 1092) | def get_input_embeddings(self):
    method set_input_embeddings (line 1095) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 1098) | def get_output_embeddings(self):
    method set_output_embeddings (line 1101) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 1104) | def set_decoder(self, decoder):
    method get_decoder (line 1107) | def get_decoder(self):
    method from_pretrained (line 1111) | def from_pretrained(cls, *args, **kwargs):
    method _native_forward (line 1134) | def _native_forward(
    method _beacon_forward (line 1196) | def _beacon_forward(self,
    method forward (line 1268) | def forward(self, **kwargs):
    method prepare_inputs_for_generation (line 1279) | def prepare_inputs_for_generation(
    method _reorder_cache (line 1310) | def _reorder_cache(past_key_values, beam_idx):

FILE: research/Long_LLM/activation_beacon/src/modeling_beacon.py
  class Memory (line 14) | class Memory(torch.nn.Module):
    method __init__ (line 15) | def __init__(
    method _post_validation (line 36) | def _post_validation(self, verbose=True):
    method set (line 54) | def set(self, verbose=True, **kwargs):
    method reset (line 62) | def reset(self):
    method all_sequence_length (line 105) | def all_sequence_length(self):
    method batch_size (line 112) | def batch_size(self):
    method finish (line 119) | def finish(self):
    method dtype (line 124) | def dtype(self):
    method min_value (line 128) | def min_value(self):
    method max_position_embeddings (line 132) | def max_position_embeddings(self):
    method get_memory_size (line 139) | def get_memory_size(self):
    method prepare (line 154) | def prepare(self, input_ids, attention_mask, labels, skip_first=None, ...
    method set_compression_ratio (line 200) | def set_compression_ratio(self, start_idx, end_idx):
    method step (line 273) | def step(self):
    method _step (line 394) | def _step(self, ignore_memory=False):
    method update_memory (line 659) | def update_memory(self, past_key_values):
    method update_loss (line 716) | def update_loss(self, batch_loss, valid_token_num):
    method output (line 729) | def output(self, model_outputs):
    method _make_4d_attention_mask_and_position_ids (line 757) | def _make_4d_attention_mask_and_position_ids(
    method _extract_beacon_and_raw_memory (line 890) | def _extract_beacon_and_raw_memory(
  function slice_tensor (line 930) | def slice_tensor(x, start=None, end=None, step=None, index=None, dim=2):
  function cat_tensor (line 978) | def cat_tensor(list_of_tensors, dim=-1):
  function slice_activations (line 988) | def slice_activations(activations, start=None, end=None, k_seq_dim=2, v_...
  function cat_activations (line 996) | def cat_activations(list_of_activations, k_seq_dim=2, v_seq_dim=2):
  function interleave_activations (line 1009) | def interleave_activations(main_activations, augment_activations, main_s...
  function softmax (line 1068) | def softmax(x:np.ndarray, axis=-1, temperature=1):
  function l1_norm (line 1076) | def l1_norm(x):

FILE: research/Long_LLM/activation_beacon/src/modeling_utils.py
  function optional_grad_ctx (line 12) | def optional_grad_ctx(with_grad=False):
  function move_to_device (line 18) | def move_to_device(data, device):
  function get_shifted_labels (line 32) | def get_shifted_labels(input_ids):
  function compute_loss (line 47) | def compute_loss(logits, labels, shift=False):
  function evaluate_perplexity (line 84) | def evaluate_perplexity(model, dataloader, accelerator:Optional[Accelera...
  function evaluate_generation (line 141) | def evaluate_generation(model, dataloader, accelerator:Optional[Accelera...
  function evaluate_nll (line 190) | def evaluate_nll(model, dataloader, accelerator:Optional[Accelerator]=No...
  class ModelOutput (line 236) | class ModelOutput(BaseModelOutputWithPast):
  function get_rope (line 249) | def get_rope(head_dim, base, max_position_embeddings, rope_scaling=None):
  function rotate_half (line 313) | def rotate_half(x):
  class RotaryEmbedding (line 320) | class RotaryEmbedding(torch.nn.Module):
    method __init__ (line 321) | def __init__(self, dim, max_position_embeddings=32768, base=10000, dev...
    method _set_cos_sin_cache (line 335) | def _set_cos_sin_cache(self, seq_len, device, dtype):
    method forward (line 344) | def forward(self, q, k, position_ids):
  class LinearScalingRotaryEmbedding (line 363) | class LinearScalingRotaryEmbedding(RotaryEmbedding):
    method __init__ (line 366) | def __init__(self, dim, max_position_embeddings=32768, base=10000, dev...
    method _set_cos_sin_cache (line 370) | def _set_cos_sin_cache(self, seq_len, device, dtype):
  class DynamicNTKScalingRotaryEmbedding (line 382) | class DynamicNTKScalingRotaryEmbedding(RotaryEmbedding):
    method __init__ (line 385) | def __init__(self, dim, max_position_embeddings=32768, base=10000, dev...
    method _set_cos_sin_cache (line 389) | def _set_cos_sin_cache(self, seq_len, device, dtype):
  class YarnRotaryEmbedding (line 408) | class YarnRotaryEmbedding(torch.nn.Module):
    method __init__ (line 409) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method _get_factor (line 423) | def _get_factor(self):
    method _get_temperature (line 442) | def _get_temperature(self):
    method _set_cos_sin_cache (line 447) | def _set_cos_sin_cache(self, seq_len, device, dtype):
    method forward (line 469) | def forward(self, q, k, position_ids):
  class YarnDynamicTemperatureRotaryEmbedding (line 488) | class YarnDynamicTemperatureRotaryEmbedding(torch.nn.Module):
    method __init__ (line 489) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method _get_factor (line 503) | def _get_factor(self):
    method _set_cos_sin_cache (line 522) | def _set_cos_sin_cache(self, seq_len, device, dtype):
    method forward (line 547) | def forward(self, q, k, position_ids):
  class YarnDynamicTemperatureLogNRotaryEmbedding (line 572) | class YarnDynamicTemperatureLogNRotaryEmbedding(torch.nn.Module):
    method __init__ (line 573) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method _get_factor (line 587) | def _get_factor(self):
    method _set_cos_sin_cache (line 606) | def _set_cos_sin_cache(self, seq_len, device, dtype):
    method forward (line 631) | def forward(self, q, k, position_ids):
  class Llama3RotaryEmbedding (line 656) | class Llama3RotaryEmbedding(torch.nn.Module):
    method __init__ (line 657) | def __init__(self, dim, max_position_embeddings=8192, base=10000, devi...
    method _set_cos_sin_cache (line 687) | def _set_cos_sin_cache(self, seq_len, device):
    method forward (line 696) | def forward(self, q, k, position_ids):

FILE: research/Long_LLM/activation_beacon/src/qwen2/configuration_qwen2.py
  class Qwen2Config (line 28) | class Qwen2Config(PretrainedConfig):
    method __init__ (line 98) | def __init__(

FILE: research/Long_LLM/activation_beacon/src/qwen2/modeling_qwen2.py
  function _get_unpad_data (line 71) | def _get_unpad_data(attention_mask):
  class Qwen2RMSNorm (line 84) | class Qwen2RMSNorm(nn.Module):
    method __init__ (line 85) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 93) | def forward(self, hidden_states):
  class Qwen2MLP (line 102) | class Qwen2MLP(nn.Module):
    method __init__ (line 103) | def __init__(self, config):
    method forward (line 113) | def forward(self, x):
  function repeat_kv (line 119) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  class Qwen2Attention (line 131) | class Qwen2Attention(nn.Module):
    method __init__ (line 134) | def __init__(self, config: Qwen2Config, layer_idx: Optional[int] = None):
    method _init_beacon_proj (line 187) | def _init_beacon_proj(self, missing_keys):
    method _shape (line 261) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
    method qkv_proj_with_beacon (line 264) | def qkv_proj_with_beacon(self, hidden_states, beacon_size, beacon_indi...
    method o_proj_with_beacon (line 310) | def o_proj_with_beacon(self, attn_output, beacon_size, beacon_indices):
    method forward (line 325) | def forward(
  class Qwen2SdpaAttention (line 408) | class Qwen2SdpaAttention(Qwen2Attention):
    method forward (line 416) | def forward(
  class Qwen2FlashAttention2 (line 498) | class Qwen2FlashAttention2(Qwen2Attention):
    method __init__ (line 505) | def __init__(self, *args, **kwargs):
    method forward (line 513) | def forward(
    method _flash_attention_forward (line 606) | def _flash_attention_forward(
    method _upad_input (line 665) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m...
  class Qwen2DecoderLayer (line 711) | class Qwen2DecoderLayer(nn.Module):
    method __init__ (line 712) | def __init__(self, config: Qwen2Config, layer_idx: int):
    method forward (line 727) | def forward(
  class Qwen2PreTrainedModel (line 808) | class Qwen2PreTrainedModel(PreTrainedModel):
    method _init_weights (line 818) | def _init_weights(self, module):
  class Qwen2Model (line 904) | class Qwen2Model(Qwen2PreTrainedModel):
    method __init__ (line 912) | def __init__(self, config: Qwen2Config):
    method _init_beacon_embed (line 933) | def _init_beacon_embed(self, missing_keys):
    method get_input_embeddings (line 964) | def get_input_embeddings(self):
    method set_input_embeddings (line 967) | def set_input_embeddings(self, value):
    method forward (line 971) | def forward(
  class Qwen2ForCausalLM (line 1091) | class Qwen2ForCausalLM(Qwen2PreTrainedModel):
    method __init__ (line 1094) | def __init__(self, config):
    method get_input_embeddings (line 1102) | def get_input_embeddings(self):
    method set_input_embeddings (line 1105) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 1108) | def get_output_embeddings(self):
    method set_output_embeddings (line 1111) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 1114) | def set_decoder(self, decoder):
    method get_decoder (line 1117) | def get_decoder(self):
    method from_pretrained (line 1121) | def from_pretrained(cls, *args, **kwargs):
    method _native_forward (line 1144) | def _native_forward(
    method _beacon_forward (line 1206) | def _beacon_forward(self,
    method forward (line 1282) | def forward(self, **kwargs):
    method prepare_inputs_for_generation (line 1293) | def prepare_inputs_for_generation(
    method _reorder_cache (line 1324) | def _reorder_cache(past_key_values, beam_idx):

FILE: research/Long_LLM/activation_beacon/src/trainer.py
  class ActivationBeaconTrainer (line 18) | class ActivationBeaconTrainer(Trainer):
    method __init__ (line 19) | def __init__(self, *args, model_args, file_logger, **kwargs):
    method compute_loss (line 24) | def compute_loss(self, model, inputs, return_outputs=False):
    method _get_train_sampler (line 47) | def _get_train_sampler(self) -> Optional[torch.utils.data.Sampler]:
    method _save (line 72) | def _save(self, output_dir: Optional[str] = None, state_dict=None):
    method evaluate (line 79) | def evaluate(self, eval_dataset: Dataset | None = None, ignore_keys: L...
  class StrideGroupedSampler (line 144) | class StrideGroupedSampler(Sampler):
    method __init__ (line 147) | def __init__(
    method __len__ (line 232) | def __len__(self):
    method __iter__ (line 235) | def __iter__(self):

FILE: research/Long_LLM/activation_beacon/src/utils.py
  function do_nothing (line 24) | def do_nothing():
  function optional_grad_ctx (line 27) | def optional_grad_ctx(with_grad=False):
  function makedirs (line 33) | def makedirs(path):
  function clear_dir (line 38) | def clear_dir(directory):
  function split_file_dir_name_ext (line 51) | def split_file_dir_name_ext(path):
  function save_pickle (line 57) | def save_pickle(obj, path:str):
  function load_pickle (line 66) | def load_pickle(path):
  function save_json (line 70) | def save_json(obj, path:str):
  function load_json (line 76) | def load_json(path, lines=False):
  function format_numel_str (line 87) | def format_numel_str(numel: int) -> str:
  function batched_iter (line 103) | def batched_iter(iterable: Iterable, max_batch_size: int):
  function show_time (line 114) | def show_time(times):
  function filelock (line 121) | def filelock(path, process_index=0):
  function normalize_text (line 133) | def normalize_text(text, ignore_case=True, ignore_punctuation=True, igno...
  function wrap_text (line 156) | def wrap_text(s):
  function min_max_normalize (line 165) | def min_max_normalize(array):
  function softmax (line 168) | def softmax(x:np.ndarray, axis=-1):
  function get_max_length_in_nested_lists (line 175) | def get_max_length_in_nested_lists(lst):
  function pad_nested_lists (line 186) | def pad_nested_lists(lst, max_length, padding_value, padding_side="right"):
  function mask_nested_lists (line 205) | def mask_nested_lists(lst, mask_target, mask_value=0):
  function are_elements_of_same_length (line 213) | def are_elements_of_same_length(lst: List):
  function add_eos (line 220) | def add_eos(inputs: Mapping, eos_token_id: int):
  function remove_eos (line 238) | def remove_eos(inputs: Mapping, eos_token_ids: Union[List,int]):
  class FileLogger (line 249) | class FileLogger:
    method __init__ (line 250) | def __init__(self, log_file) -> None:
    method log (line 253) | def log(self, metrics, **kwargs):
  class DefaultDataCollator (line 274) | class DefaultDataCollator:
    method __call__ (line 286) | def __call__(self, batch_elem: List) -> Dict[str, Any]:

FILE: research/Long_LLM/activation_beacon/src/vllm_utils.py
  class HFStyleVllmModel (line 16) | class HFStyleVllmModel:
    method __init__ (line 17) | def __init__(
    method device (line 26) | def device(self):
    method parse_generation_config (line 29) | def parse_generation_config(self, generation_config:Union[dict,Generat...
    method generate (line 47) | def generate(
    method __call__ (line 77) | def __call__(self, input_ids, attention_mask, labels, **kwargs):

FILE: research/Long_LLM/longllm_qlora/data_pipeline/_openai.py
  function process_api_requests_from_file (line 110) | async def process_api_requests_from_file(
  class StatusTracker (line 280) | class StatusTracker:
  class APIRequest (line 294) | class APIRequest:
    method call_api (line 304) | async def call_api(
  function api_endpoint_from_url (line 376) | def api_endpoint_from_url(request_url):
  function append_to_jsonl (line 387) | def append_to_jsonl(data, filename: str) -> None:
  function num_tokens_consumed_from_request (line 394) | def num_tokens_consumed_from_request(
  function task_id_generator_function (line 453) | def task_id_generator_function():

FILE: research/Long_LLM/longllm_qlora/main/eval_generation.py
  class Args (line 18) | class Args(ModelArgs):
  function main (line 55) | def main():

FILE: research/Long_LLM/longllm_qlora/main/eval_infbench.py
  class Args (line 23) | class Args(ModelArgs):
  function process_infbench (line 62) | def process_infbench(data, indices, tokenizer, chat_template, task:str, ...
  function main (line 97) | def main():

FILE: research/Long_LLM/longllm_qlora/main/eval_lm.py
  class Args (line 16) | class Args(ModelArgs):
  function process_lm_pre (line 50) | def process_lm_pre(tokenizer, tokenize_max_char=None):
  function process_lm (line 62) | def process_lm(tokenizer, max_length=4096, stride=1024, min_length=None):
  function main (line 124) | def main():

FILE: research/Long_LLM/longllm_qlora/main/eval_longbench.py
  class Args (line 22) | class Args(ModelArgs):
  function process_longbench (line 61) | def process_longbench(data, indices, tokenizer, chat_template, task, max...
  function main (line 96) | def main():

FILE: research/Long_LLM/longllm_qlora/main/eval_mmlu.py
  class Args (line 22) | class Args(ModelArgs):
  function remove_eos (line 51) | def remove_eos(inputs: Mapping, eos_token_ids: Union[List,int]):
  function process_mmlu (line 64) | def process_mmlu(tokenizer, chat_template, eos_token_id, few_shot=0, tra...
  function evaluate_mmlu (line 138) | def evaluate_mmlu(eval_data, save_path, eval_preds):
  function main (line 182) | def main():

FILE: research/Long_LLM/longllm_qlora/main/eval_needle.py
  class Args (line 28) | class Args(ModelArgs):
    method __post_init__ (line 102) | def __post_init__(self):
  class OpenAIEvaluator (line 107) | class OpenAIEvaluator:
    method __init__ (line 117) | def __init__(self,
    method evaluate_response (line 152) | def evaluate_response(self, response: str) -> int:
  function generate_sample (line 175) | def generate_sample(
  function main (line 217) | def main():

FILE: research/Long_LLM/longllm_qlora/main/eval_passkey.py
  class Args (line 28) | class Args(ModelArgs):
  function generate_sample (line 84) | def generate_sample(tokenizer, chat_template, context_length, passkey_de...
  function main (line 125) | def main():

FILE: research/Long_LLM/longllm_qlora/main/eval_topic.py
  class Args (line 23) | class Args(ModelArgs):
  function process_topic_retrieval (line 42) | def process_topic_retrieval(tokenizer, chat_template, num_topic):
  function main (line 69) | def main():

FILE: research/Long_LLM/longllm_qlora/main/infbench_utils.py
  function normalize_answer (line 12) | def normalize_answer(s: str) -> str:
  function normalize_zh_answer (line 31) | def normalize_zh_answer(s: str) -> str:
  function f1_score (line 48) | def f1_score(prediction, ground_truth) -> tuple[float, float, float]:
  function qa_f1_score (line 59) | def qa_f1_score(pred: str, ground_truths) -> float:
  function qa_f1_score_zh (line 78) | def qa_f1_score_zh(pred: str, ground_truths: list[str]) -> float:
  function load_json (line 100) | def load_json(fname):
  function iter_jsonl (line 104) | def iter_jsonl(fname, cnt=None):
  function first_int_match (line 117) | def first_int_match(prediction):
  function split_retrieval_answer (line 127) | def split_retrieval_answer(pred: str):
  function get_score_one_kv_retrieval (line 134) | def get_score_one_kv_retrieval(pred, label, model_name: str) -> bool:
  function get_score_one_passkey (line 141) | def get_score_one_passkey(pred, label, model_name: str) -> bool:
  function get_score_one_number_string (line 147) | def get_score_one_number_string(pred, label, model_name: str) -> bool:
  function get_score_one_code_run (line 153) | def get_score_one_code_run(pred, label, model_name: str) -> bool:
  function get_score_one_code_debug (line 172) | def get_score_one_code_debug(pred, label, model_name: str) -> bool:
  function get_score_one_math_find (line 208) | def get_score_one_math_find(pred, label, model_name: str) -> bool:
  function get_score_one_longdialogue_qa_eng (line 230) | def get_score_one_longdialogue_qa_eng(pred, label, model_name: str) -> b...
  function get_score_one_longbook_choice_eng (line 239) | def get_score_one_longbook_choice_eng(pred, label, model_name: str) -> b...
  function get_score_one_longbook_qa_eng (line 280) | def get_score_one_longbook_qa_eng(pred, label, model_name: str) -> float:
  function get_score_one_longbook_sum_eng (line 284) | def get_score_one_longbook_sum_eng(
  function get_score_one_longbook_qa_chn (line 295) | def get_score_one_longbook_qa_chn(pred, label, model_name: str) -> float:
  function get_score_one_math_calc (line 299) | def get_score_one_math_calc(pred, label, model_name: str) -> float:
  function get_score_one (line 324) | def get_score_one(
  function get_labels (line 357) | def get_labels(preds: list) -> list[str]:
  function get_preds (line 365) | def get_preds(preds: list, data_name: str) -> list[str]:
  function get_score (line 380) | def get_score(
  function compute_scores (line 394) | def compute_scores(preds_path, data_name: str, model_name: str):
  function create_prompt (line 404) | def create_prompt(eg: dict, data_name: str, prompt_template: str) -> str:
  function get_answer (line 514) | def get_answer(eg: dict, data_name: str):

FILE: research/Long_LLM/longllm_qlora/main/longbench_utils.py
  function normalize_answer (line 12) | def normalize_answer(s):
  function normalize_zh_answer (line 31) | def normalize_zh_answer(s):
  function count_score (line 47) | def count_score(prediction, ground_truth, **kwargs):
  function retrieval_score (line 56) | def retrieval_score(prediction, ground_truth, **kwargs):
  function retrieval_zh_score (line 68) | def retrieval_zh_score(prediction, ground_truth, **kwargs):
  function code_sim_score (line 80) | def code_sim_score(prediction, ground_truth, **kwargs):
  function classification_score (line 89) | def classification_score(prediction, ground_truth, **kwargs):
  function rouge_score (line 114) | def rouge_score(prediction, ground_truth, **kwargs):
  function rouge_score_zh (line 122) | def rouge_score_zh(prediction, ground_truth, **kwargs):
  function f1_score (line 128) | def f1_score(prediction, ground_truth, **kwargs):
  function qa_f1_score (line 138) | def qa_f1_score(prediction, ground_truth, **kwargs):
  function qa_f1_score_zh (line 147) | def qa_f1_score_zh(prediction, ground_truth, **kwargs):
  function scorer (line 156) | def scorer(dataset, predictions, answers, all_classes):

FILE: research/Long_LLM/longllm_qlora/main/train.py
  function main (line 21) | def main():

FILE: research/Long_LLM/longllm_qlora/src/__init__.py
  function get_model_and_tokenizer (line 15) | def get_model_and_tokenizer(model_args, device="cpu", evaluation_mode=Tr...

FILE: research/Long_LLM/longllm_qlora/src/args.py
  class ModelArgs (line 9) | class ModelArgs:
    method resolve_path (line 187) | def resolve_path(self, path):
    method get_generation_config (line 201) | def get_generation_config(self):
    method to_dict (line 213) | def to_dict(self):
    method save (line 216) | def save(self, path):
    method __post_init__ (line 220) | def __post_init__(self):
  class TrainingArgs (line 242) | class TrainingArgs(TrainingArguments):
    method __post_init__ (line 373) | def __post_init__(self):

FILE: research/Long_LLM/longllm_qlora/src/chat.py
  class ChatTemplateOutput (line 17) | class ChatTemplateOutput:
  function mask_nested_lists (line 22) | def mask_nested_lists(lst, mask_target, mask_value=0):
  function apply_chat_template (line 31) | def apply_chat_template(template, messages, system_message=None, tokeniz...
  class SeparatorStyle (line 190) | class SeparatorStyle(IntEnum):
  class Conversation (line 222) | class Conversation:
    method get_prompt (line 247) | def get_prompt(self) -> str:
    method get_images (line 496) | def get_images(self):
    method set_system_message (line 506) | def set_system_message(self, system_message: str):
    method get_system_message (line 510) | def get_system_message(self):
    method append_message (line 514) | def append_message(self, role: str, message: str):
    method update_last_message (line 518) | def update_last_message(self, message: str):
    method convert_image_to_base64 (line 526) | def convert_image_to_base64(self, image):
    method to_gradio_chatbot (line 561) | def to_gradio_chatbot(self):
    method to_openai_api_messages (line 577) | def to_openai_api_messages(self):
    method extract_text_from_messages (line 592) | def extract_text_from_messages(self):
    method copy (line 598) | def copy(self):
    method dict (line 613) | def dict(self):
  function register_conv_template (line 627) | def register_conv_template(template: Conversation, override: bool = False):
  function get_conv_template (line 637) | def get_conv_template(name: str) -> Conversation:

FILE: research/Long_LLM/longllm_qlora/src/data.py
  class Data (line 21) | class Data:
    method _process_language_modeling (line 22) | def _process_language_modeling(data, indices, tokenizer, min_length, m...
    method _process_instruction_tuning (line 46) | def _process_instruction_tuning(data, indices, tokenizer, chat_templat...
    method prepare_train_data (line 88) | def prepare_train_data(data_files=None, tokenizer=None, max_length=409...
    method prepare_eval_data (line 157) | def prepare_eval_data(data_files=None, tokenizer=None, max_length=4096...

FILE: research/Long_LLM/longllm_qlora/src/metrics.py
  class Metric (line 14) | class Metric:
    method get_metric_fn (line 17) | def get_metric_fn(cls, metrics, **kwds):
    method get_save_path (line 40) | def get_save_path(eval_data, output_dir=None, field="result", save_nam...
    method save_result (line 57) | def save_result(preds, labels, save_path, indices=None, **kwargs):
    method rouge (line 73) | def rouge(preds, labels, **kwargs):

FILE: research/Long_LLM/longllm_qlora/src/modeling_utils.py
  function optional_grad_ctx (line 12) | def optional_grad_ctx(with_grad=False):
  function move_to_device (line 18) | def move_to_device(data, device):
  function compute_loss (line 32) | def compute_loss(logits, labels, shift=False):
  function evaluate_perplexity (line 68) | def evaluate_perplexity(model, dataloader, accelerator:Optional[Accelera...
  function evaluate_generation (line 119) | def evaluate_generation(model, dataloader, accelerator:Optional[Accelera...
  function evaluate_nll (line 161) | def evaluate_nll(model, dataloader, accelerator:Optional[Accelerator]=No...
  class BeaconModelOutput (line 207) | class BeaconModelOutput(BaseModelOutputWithPast):

FILE: research/Long_LLM/longllm_qlora/src/trainer.py
  class LLMTrainer (line 14) | class LLMTrainer(Trainer):
    method __init__ (line 15) | def __init__(self, *args, model_args, file_logger, **kwargs):
    method _prepare_inputs (line 20) | def _prepare_inputs(self, inputs: Dict[str, Union[torch.Tensor, Any]])...
    method _save (line 34) | def _save(self, output_dir: Optional[str] = None, state_dict=None):
    method evaluate (line 41) | def evaluate(self, eval_dataset: Dataset | None = None, ignore_keys: L...

FILE: research/Long_LLM/longllm_qlora/src/utils.py
  function do_nothing (line 24) | def do_nothing():
  function optional_grad_ctx (line 27) | def optional_grad_ctx(with_grad=False):
  function makedirs (line 33) | def makedirs(path):
  function clear_dir (line 38) | def clear_dir(directory):
  function split_file_dir_name_ext (line 51) | def split_file_dir_name_ext(path):
  function save_pickle (line 57) | def save_pickle(obj, path:str):
  function load_pickle (line 66) | def load_pickle(path):
  function save_json (line 70) | def save_json(obj, path:str):
  function load_json (line 76) | def load_json(path, lines=False):
  function format_numel_str (line 87) | def format_numel_str(numel: int) -> str:
  function batched_iter (line 103) | def batched_iter(iterable: Iterable, max_batch_size: int):
  function show_time (line 114) | def show_time(times):
  function filelock (line 121) | def filelock(path, process_index=0):
  function normalize_text (line 133) | def normalize_text(text, ignore_case=True, ignore_punctuation=True, igno...
  function wrap_text (line 156) | def wrap_text(s):
  function min_max_normalize (line 165) | def min_max_normalize(array):
  function softmax (line 168) | def softmax(x:np.ndarray, axis=-1):
  function get_max_length_in_nested_lists (line 175) | def get_max_length_in_nested_lists(lst):
  function pad_nested_lists (line 186) | def pad_nested_lists(lst, max_length, padding_value, padding_side="right"):
  function mask_nested_lists (line 205) | def mask_nested_lists(lst, mask_target, mask_value=0):
  function are_elements_of_same_length (line 213) | def are_elements_of_same_length(lst: List):
  function add_eos (line 220) | def add_eos(inputs: Mapping, eos_token_id: int):
  function remove_eos (line 238) | def remove_eos(inputs: Mapping, eos_token_ids: Union[List,int]):
  function mix_parameters (line 247) | def mix_parameters(models: List[torch.nn.Module], weights: Optional[List...
  class FileLogger (line 286) | class FileLogger:
    method __init__ (line 287) | def __init__(self, log_file) -> None:
    method log (line 290) | def log(self, metrics, **kwargs):
  class DefaultDataCollator (line 311) | class DefaultDataCollator:
    method __call__ (line 323) | def __call__(self, batch_elem: List) -> Dict[str, Any]:

FILE: research/MLVU/evaluation/generation_evaluation/calculate.py
  function extract_scores (line 8) | def extract_scores(text):

FILE: research/MLVU/evaluation/generation_evaluation/calculate_sum.py
  function extract_scores (line 7) | def extract_scores(text):

FILE: research/MLVU/evaluation/generation_evaluation/evaluate_ssc.py
  function parse_args (line 9) | def parse_args():
  function get_scoring_points (line 20) | def get_scoring_points(score_points="MLVU_all/json/8_sub_scene.json"):
  function annotate (line 30) | def annotate(prediction_set, caption_files, output_dir):
  function main (line 112) | def main():

FILE: research/MLVU/evaluation/generation_evaluation/evaluate_summary.py
  function parse_args (line 9) | def parse_args():
  function annotate (line 21) | def annotate(prediction_set, caption_files, output_dir):
  function main (line 102) | def main():

FILE: research/MLVU/evaluation/generation_evaluation/open_bench.py
  function get_prompt2 (line 10) | def get_prompt2(conv):
  class MLVU (line 24) | class MLVU(Dataset):
    method __init__ (line 25) | def __init__(self, data_dir, data_list):
    method __str__ (line 39) | def __str__(self):
    method __len__ (line 61) | def __len__(self):
    method get_index (line 64) | def get_index(self, bound, fps, max_frame, first_idx=0):
    method qa_template (line 79) | def qa_template(self, data):
    method __getitem__ (line 85) | def __getitem__(self, idx):
  function main (line 98) | def main():

FILE: research/MLVU/evaluation/models/videochat2/choice_bench.py
  function get_prompt (line 62) | def get_prompt(conv):
  function get_prompt2 (line 72) | def get_prompt2(conv):
  function get_context_emb (line 87) | def get_context_emb(conv, model, img_list, answer_prompt=None, print_res...
  function ask (line 115) | def ask(text, conv):
  class StoppingCriteriaSub (line 119) | class StoppingCriteriaSub(StoppingCriteria):
    method __init__ (line 120) | def __init__(self, stops=[], encounters=1):
    method __call__ (line 123) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
  function answer (line 130) | def answer(conv, model, img_list, do_sample=True, max_new_tokens=200, nu...
  function get_index (line 163) | def get_index(num_frames, num_segments):
  function load_video (line 172) | def load_video(video_path, num_segments=8, return_msg=False, resolution=...
  function get_sinusoid_encoding_table (line 205) | def get_sinusoid_encoding_table(n_position=784, d_hid=1024, cur_frame=8,...
  class MLVU (line 272) | class MLVU(Dataset):
    method __init__ (line 273) | def __init__(self, data_dir, data_list, num_segments=8, resolution=224):
    method __str__ (line 305) | def __str__(self):
    method __len__ (line 327) | def __len__(self):
    method get_index (line 330) | def get_index(self, bound, fps, max_frame, first_idx=0):
    method read_video (line 344) | def read_video(self, video_path, bound=None):
    method qa_template (line 359) | def qa_template(self, data):
    method __getitem__ (line 372) | def __getitem__(self, idx):
  function infer_mvbench (line 395) | def infer_mvbench(
  function check_ans (line 445) | def check_ans(pred, gt):

FILE: research/MLVU/evaluation/models/videochat2/open_bench.py
  function get_prompt (line 62) | def get_prompt(conv):
  function get_prompt2 (line 72) | def get_prompt2(conv):
  function get_context_emb (line 87) | def get_context_emb(conv, model, img_list, answer_prompt=None, print_res...
  function ask (line 115) | def ask(text, conv):
  class StoppingCriteriaSub (line 119) | class StoppingCriteriaSub(StoppingCriteria):
    method __init__ (line 120) | def __init__(self, stops=[], encounters=1):
    method __call__ (line 123) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
  function answer (line 130) | def answer(conv, model, img_list, do_sample=True, max_new_tokens=200, nu...
  function get_index (line 163) | def get_index(num_frames, num_segments):
  function load_video (line 172) | def load_video(video_path, num_segments=8, return_msg=False, resolution=...
  function get_sinusoid_encoding_table (line 205) | def get_sinusoid_encoding_table(n_position=784, d_hid=1024, cur_frame=8,...
  class MLVU (line 264) | class MLVU(Dataset):
    method __init__ (line 265) | def __init__(self, data_dir, data_list, num_segments=8, resolution=224):
    method __str__ (line 296) | def __str__(self):
    method __len__ (line 318) | def __len__(self):
    method get_index (line 321) | def get_index(self, bound, fps, max_frame, first_idx=0):
    method read_video (line 335) | def read_video(self, video_path, bound=None):
    method qa_template (line 349) | def qa_template(self, data):
    method __getitem__ (line 354) | def __getitem__(self, idx):
  function infer_mvbench (line 377) | def infer_mvbench(

FILE: research/MLVU/evaluation/models/videollava/choice_bench.py
  function get_prompt2 (line 22) | def get_prompt2(conv):
  class MLVU (line 36) | class MLVU(Dataset):
    method __init__ (line 37) | def __init__(self, data_dir, data_list):
    method __str__ (line 51) | def __str__(self):
    method __len__ (line 73) | def __len__(self):
    method get_index (line 76) | def get_index(self, bound, fps, max_frame, first_idx=0):
    method qa_template (line 88) | def qa_template(self, data):
    method __getitem__ (line 101) | def __getitem__(self, idx):
  function check_ans (line 114) | def check_ans(pred, gt):
  function main (line 132) | def main():

FILE: research/MLVU/evaluation/models/videollava/open_bench.py
  class MLVU (line 24) | class MLVU(Dataset):
    method __init__ (line 25) | def __init__(self, data_dir, data_list):
    method __str__ (line 39) | def __str__(self):
    method __len__ (line 61) | def __len__(self):
    method get_index (line 64) | def get_index(self, bound, fps, max_frame, first_idx=0):
    method qa_template (line 79) | def qa_template(self, data):
    method __getitem__ (line 86) | def __getitem__(self, idx):
  function main (line 99) | def main():

FILE: research/MLVU/evaluation/multiple_choice_evaluation/choice_bench.py
  function get_prompt2 (line 10) | def get_prompt2(conv):
  class MLVU (line 24) | class MLVU(Dataset):
    method __init__ (line 25) | def __init__(self, data_dir, data_list):
    method __str__ (line 39) | def __str__(self):
    method __len__ (line 61) | def __len__(self):
    method get_index (line 64) | def get_index(self, bound, fps, max_frame, first_idx=0):
    method qa_template (line 79) | def qa_template(self, data):
    method __getitem__ (line 92) | def __getitem__(self, idx):
  function check_ans (line 105) | def check_ans(pred, gt):
  function main (line 121) | def main():

FILE: research/Matroyshka_reranker/finetune/compensation/arguments.py
  function default_list (line 8) | def default_list() -> List[str]:
  class ModelArguments (line 13) | class ModelArguments:
  class DataArguments (line 112) | class DataArguments:
  class RetrieverTrainingArguments (line 184) | class RetrieverTrainingArguments(TrainingArguments):

FILE: research/Matroyshka_reranker/finetune/compensation/data.py
  function traverse_directory_using_os (line 18) | def traverse_directory_using_os(root_folder):
  class TrainDatasetForReranker (line 29) | class TrainDatasetForReranker(Dataset):
    method __init__ (line 30) | def __init__(
    method __len__ (line 70) | def __len__(self):
    method __getitem__ (line 73) | def __getitem__(self, item) -> tuple[List[BatchEncoding], List[int], L...
  class RerankCollator (line 168) | class RerankCollator(DataCollatorForSeq2Seq):
    method __call__ (line 177) | def __call__(self, features_lengths, return_tensors='pt'):

FILE: research/Matroyshka_reranker/finetune/compensation/load_model.py
  function get_model (line 11) | def get_model(model_args, training_args, output_token_id):

FILE: research/Matroyshka_reranker/finetune/compensation/mistral_config.py
  class CostWiseMistralConfig (line 23) | class CostWiseMistralConfig(MistralConfig):
    method __init__ (line 98) | def __init__(

FILE: research/Matroyshka_reranker/finetune/compensation/mistral_model.py
  class CostWiseModelOutputWithPast (line 72) | class CostWiseModelOutputWithPast(ModelOutput):
  class CostWiseCausalLMOutputWithPast (line 80) | class CostWiseCausalLMOutputWithPast(ModelOutput):
  function token_compress (line 88) | def token_compress(compress_ratio,
  class CostWiseMistralModel (line 197) | class CostWiseMistralModel(MistralPreTrainedModel):
    method __init__ (line 205) | def __init__(self, config: CostWiseMistralConfig):
    method get_input_embeddings (line 221) | def get_input_embeddings(self):
    method set_input_embeddings (line 224) | def set_input_embeddings(self, value):
    method forward (line 228) | def forward(
  class CostWiseHead (line 468) | class CostWiseHead(nn.Module):
    method __init__ (line 471) | def __init__(self, input_size, output_size):
    method forward (line 475) | def forward(self, **kwargs):
  class CostWiseMistralForCausalLM (line 478) | class CostWiseMistralForCausalLM(MistralPreTrainedModel):
    method __init__ (line 481) | def __init__(self, config):
    method get_input_embeddings (line 497) | def get_input_embeddings(self):
    method set_input_embeddings (line 500) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 503) | def get_output_embeddings(self):
    method set_output_embeddings (line 506) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 509) | def set_decoder(self, decoder):
    method get_decoder (line 512) | def get_decoder(self):
    method forward (line 517) | def forward(
    method prepare_inputs_for_generation (line 642) | def prepare_inputs_for_generation(
    method _reorder_cache (line 700) | def _reorder_cache(past_key_values, beam_idx):

FILE: research/Matroyshka_reranker/finetune/compensation/modeling.py
  class RerankerOutput (line 19) | class RerankerOutput(ModelOutput):
  function last_logit_pool (line 24) | def last_logit_pool(logits: Tensor,
  function set_nested_attr (line 35) | def set_nested_attr(obj, attr, value):
  function get_nested_attr (line 42) | def get_nested_attr(obj, attr):
  class BiEncoderModel (line 49) | class BiEncoderModel(nn.Module):
    method __init__ (line 50) | def __init__(self,
    method gradient_checkpointing_enable (line 85) | def gradient_checkpointing_enable(self, **kwargs):
    method enable_input_require_grads (line 88) | def enable_input_require_grads(self, **kwargs):
    method encode (line 91) | def encode(self, features, query_lengths, prompt_lengths):
    method forward (line 118) | def forward(self,
    method compute_loss (line 159) | def compute_loss(self, scores, target):
    method save (line 162) | def save(self, output_dir: str):
    method save_pretrained (line 176) | def save_pretrained(self, **kwargs):

FILE: research/Matroyshka_reranker/finetune/compensation/run.py
  function main (line 20) | def main():

FILE: research/Matroyshka_reranker/finetune/compensation/trainer.py
  class BiTrainer (line 6) | class BiTrainer(Trainer):
    method _save (line 9) | def _save(self, output_dir: Optional[str] = None, state_dict=None):
    method compute_loss (line 41) | def compute_loss(self, model, inputs, return_outputs=False):

FILE: research/Matroyshka_reranker/finetune/self_distillation/arguments.py
  function default_list (line 8) | def default_list() -> List[str]:
  class ModelArguments (line 13) | class ModelArguments:
  class DataArguments (line 109) | class DataArguments:
  class RetrieverTrainingArguments (line 181) | class RetrieverTrainingArguments(TrainingArguments):

FILE: research/Matroyshka_reranker/finetune/self_distillation/data.py
  function traverse_directory_using_os (line 18) | def traverse_directory_using_os(root_folder):
  class TrainDatasetForReranker (line 29) | class TrainDatasetForReranker(Dataset):
    method __init__ (line 30) | def __init__(
    method __len__ (line 70) | def __len__(self):
    method __getitem__ (line 73) | def __getitem__(self, item) -> tuple[List[BatchEncoding], List[int], L...
  class RerankCollator (line 168) | class RerankCollator(DataCollatorForSeq2Seq):
    method __call__ (line 177) | def __call__(self, features_lengths, return_tensors='pt'):

FILE: research/Matroyshka_reranker/finetune/self_distillation/load_model.py
  function get_model (line 9) | def get_model(model_args, training_args, output_token_id):

FILE: research/Matroyshka_reranker/finetune/self_distillation/mistral_config.py
  class CostWiseMistralConfig (line 23) | class CostWiseMistralConfig(MistralConfig):
    method __init__ (line 98) | def __init__(

FILE: research/Matroyshka_reranker/finetune/self_distillation/mistral_model.py
  class CostWiseModelOutputWithPast (line 72) | class CostWiseModelOutputWithPast(ModelOutput):
  class CostWiseCausalLMOutputWithPast (line 80) | class CostWiseCausalLMOutputWithPast(ModelOutput):
  function token_compress (line 88) | def token_compress(compress_ratio,
  class CostWiseMistralModel (line 197) | class CostWiseMistralModel(MistralPreTrainedModel):
    method __init__ (line 205) | def __init__(self, config: CostWiseMistralConfig):
    method get_input_embeddings (line 221) | def get_input_embeddings(self):
    method set_input_embeddings (line 224) | def set_input_embeddings(self, value):
    method forward (line 228) | def forward(
  class CostWiseHead (line 468) | class CostWiseHead(nn.Module):
    method __init__ (line 471) | def __init__(self, input_size, output_size):
    method forward (line 475) | def forward(self, **kwargs):
  class CostWiseMistralForCausalLM (line 478) | class CostWiseMistralForCausalLM(MistralPreTrainedModel):
    method __init__ (line 481) | def __init__(self, config):
    method get_input_embeddings (line 497) | def get_input_embeddings(self):
    method set_input_embeddings (line 500) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 503) | def get_output_embeddings(self):
    method set_output_embeddings (line 506) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 509) | def set_decoder(self, decoder):
    method get_decoder (line 512) | def get_decoder(self):
    method forward (line 517) | def forward(
    method prepare_inputs_for_generation (line 642) | def prepare_inputs_for_generation(
    method _reorder_cache (line 700) | def _reorder_cache(past_key_values, beam_idx):

FILE: research/Matroyshka_reranker/finetune/self_distillation/modeling.py
  class RerankerOutput (line 15) | class RerankerOutput(ModelOutput):
  function last_logit_pool (line 20) | def last_logit_pool(logits: Tensor,
  class BiEncoderModel (line 31) | class BiEncoderModel(nn.Module):
    method __init__ (line 32) | def __init__(self,
    method gradient_checkpointing_enable (line 61) | def gradient_checkpointing_enable(self, **kwargs):
    method enable_input_require_grads (line 64) | def enable_input_require_grads(self, **kwargs):
    method encode (line 67) | def encode(self, features, query_lengths, prompt_lengths):
    method encode_full (line 91) | def encode_full(self, features, query_lengths, prompt_lengths):
    method forward (line 115) | def forward(self,
    method compute_loss (line 203) | def compute_loss(self, scores, target):
    method save (line 206) | def save(self, output_dir: str):
    method save_pretrained (line 215) | def save_pretrained(self, **kwargs):

FILE: research/Matroyshka_reranker/finetune/self_distillation/run.py
  function main (line 20) | def main():

FILE: research/Matroyshka_reranker/finetune/self_distillation/trainer.py
  class BiTrainer (line 6) | class BiTrainer(Trainer):
    method _save (line 9) | def _save(self, output_dir: Optional[str] = None, state_dict=None):
    method compute_loss (line 41) | def compute_loss(self, model, inputs, return_outputs=False):

FILE: research/Matroyshka_reranker/inference/mistral_config.py
  class CostWiseMistralConfig (line 23) | class CostWiseMistralConfig(MistralConfig):
    method __init__ (line 98) | def __init__(

FILE: research/Matroyshka_reranker/inference/mistral_model.py
  class CostWiseModelOutputWithPast (line 72) | class CostWiseModelOutputWithPast(ModelOutput):
  class CostWiseCausalLMOutputWithPast (line 80) | class CostWiseCausalLMOutputWithPast(ModelOutput):
  function token_compress (line 88) | def token_compress(compress_ratio,
  class CostWiseMistralModel (line 197) | class CostWiseMistralModel(MistralPreTrainedModel):
    method __init__ (line 205) | def __init__(self, config: CostWiseMistralConfig):
    method get_input_embeddings (line 221) | def get_input_embeddings(self):
    method set_input_embeddings (line 224) | def set_input_embeddings(self, value):
    method forward (line 228) | def forward(
  class CostWiseHead (line 468) | class CostWiseHead(nn.Module):
    method __init__ (line 471) | def __init__(self, input_size, output_size):
    method forward (line 475) | def forward(self, **kwargs):
  class CostWiseMistralForCausalLM (line 478) | class CostWiseMistralForCausalLM(MistralPreTrainedModel):
    method __init__ (line 481) | def __init__(self, config):
    method get_input_embeddings (line 497) | def get_input_embeddings(self):
    method set_input_embeddings (line 500) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 503) | def get_output_embeddings(self):
    method set_output_embeddings (line 506) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 509) | def set_decoder(self, decoder):
    method get_decoder (line 512) | def get_decoder(self):
    method forward (line 517) | def forward(
    method prepare_inputs_for_generation (line 642) | def prepare_inputs_for_generation(
    method _reorder_cache (line 700) | def _reorder_cache(past_key_values, beam_idx):

FILE: research/Matroyshka_reranker/inference/rank_model.py
  class MatroyshkaReranker (line 17) | class MatroyshkaReranker(AbsReranker):
    method __init__ (line 49) | def __init__(
    method compute_score_single_gpu (line 151) | def compute_score_single_gpu(

FILE: research/Reinforced_IR/data_generation/agent/gpt.py
  class GPTAgent (line 13) | class GPTAgent():
    method __init__ (line 14) | def __init__(
    method generate_single (line 31) | def generate_single(
    method generate (line 77) | def generate(
    method generate_single_direct (line 107) | def generate_single_direct(
    method generate_direct (line 127) | def generate_direct(

FILE: research/Reinforced_IR/data_generation/agent/vllm.py
  class LLMAgent (line 8) | class LLMAgent():
    method __init__ (line 9) | def __init__(
    method generate (line 21) | def generate(

FILE: research/Reinforced_IR/data_generation/agent/vllm_instruct.py
  class LLMInstructAgent (line 9) | class LLMInstructAgent():
    method __init__ (line 10) | def __init__(
    method generate (line 23) | def generate(
    method generate_direct (line 61) | def generate_direct(

FILE: research/Reinforced_IR/data_generation/generate_generator_data.py
  function parse_option (line 15) | def parse_option():
  function main (line 47) | def main(opt):

FILE: research/Reinforced_IR/data_generation/generate_retriever_data.py
  function parse_option (line 11) | def parse_option():
  function main (line 46) | def main(opt):

FILE: research/Reinforced_IR/data_generation/generate_retriever_distill_data.py
  function parse_option (line 13) | def parse_option():
  function main (line 36) | def main(opt):

FILE: research/Reinforced_IR/data_generation/generate_universal_query.py
  function parse_option (line 10) | def parse_option():
  function main (line 33) | def main(opt):

FILE: research/Reinforced_IR/data_generation/prompts/get_prompts.py
  function get_query_generation_prompt (line 324) | def get_query_generation_prompt(dataset_name: str, passage: str, use_exa...
  function get_additional_info_generation_prompt (line 465) | def get_additional_info_generation_prompt(dataset_name: str, query: str)...
  function get_additional_info_generation_long_prompt (line 488) | def get_additional_info_generation_long_prompt(dataset_name: str, query:...
  function get_additional_info_generation_long_air_prompt (line 511) | def get_additional_info_generation_long_air_prompt(dataset_name: str, qu...
  function get_additional_info_generation_train_prompt (line 535) | def get_additional_info_generation_train_prompt(dataset_name: str, query...
  function get_quality_control_prompt (line 670) | def get_quality_control_prompt(dataset_name: str, query: str, passage: s...
  function get_reranker_prompt (line 718) | def get_reranker_prompt(dataset_name: str, query: str, passage: str) -> ...

FILE: research/Reinforced_IR/data_generation/prompts/hyde_prompts.py
  function get_additional_info_generation_prompt (line 42) | def get_additional_info_generation_prompt(dataset_name: str, query: str)...

FILE: research/Reinforced_IR/data_generation/prompts/teacher_prompts.py
  function get_yes_prompt (line 95) | def get_yes_prompt(dataset_name: str, query: str, passage: str) -> str:
  function get_rank_prompt (line 151) | def get_rank_prompt(dataset_name, num, query, passages):

FILE: research/Reinforced_IR/data_generation/utils.py
  function extract_numbers (line 17) | def extract_numbers(s):
  function get_distill_data (line 22) | def get_distill_data(
  function generate_bge_train_data (line 65) | def generate_bge_train_data(
  function generate_llm_dpo_train_data (line 229) | def generate_llm_dpo_train_data(
  function evaluate_mrr (line 302) | def evaluate_mrr(qrels: Dict[str, Dict[str, int]],
  function search (line 329) | def search(queries_emb, doc_emb, topk: int = 100):
  function evaluate (line 358) | def evaluate(metrics: List[str] = ['recall', 'mrr', 'ndcg'],
  function evaluate_better (line 422) | def evaluate_better(metrics: List[str] = ['recall', 'mrr', 'ndcg'],

FILE: research/Reinforced_IR/finetune/generator/save_tokenizer.py
  function parse_option (line 9) | def parse_option():
  function main (line 20) | def main(opt):

FILE: research/Reinforced_IR/finetune/generator/update_file.py
  function parse_option (line 7) | def parse_option():
  function main (line 20) | def main(opt):

FILE: research/Reinforced_IR/finetune/retriever/arguments.py
  class IREmbedderTrainingArguments (line 11) | class IREmbedderTrainingArguments(AbsEmbedderTrainingArguments):
  class IREmbedderDataArguments (line 20) | class IREmbedderDataArguments(AbsEmbedderDataArguments):

FILE: research/Reinforced_IR/finetune/retriever/dataset.py
  class IREmbedderTrainDataset (line 24) | class IREmbedderTrainDataset(AbsEmbedderTrainDataset):
    method __init__ (line 31) | def __init__(
    method __getitem__ (line 41) | def __getitem__(self, item):
  class IREmbedderCollator (line 91) | class IREmbedderCollator(AbsEmbedderCollator):
    method __call__ (line 99) | def __call__(self, features):
  class IREmbedderSameDatasetTrainDataset (line 224) | class IREmbedderSameDatasetTrainDataset(AbsEmbedderSameDatasetTrainDatas...
    method __init__ (line 235) | def __init__(
    method _shuffle_answer (line 253) | def _shuffle_answer(self, text):
    method __getitem__ (line 269) | def __getitem__(self, _):
    method _create_batch_data (line 277) | def _create_batch_data(self, batch_raw_data):
  class IREmbedderSameDatasetCollator (line 401) | class IREmbedderSameDatasetCollator(AbsEmbedderSameDatasetCollator):
    method __call__ (line 414) | def __call__(self, features):

FILE: research/Reinforced_IR/finetune/retriever/modeling.py
  class BiIREmbedderModel (line 24) | class BiIREmbedderModel(BiEncoderOnlyEmbedderModel):
    method __init__ (line 40) | def __init__(
    method forward (line 74) | def forward(
    method distill_loss (line 159) | def distill_loss(kd_loss_type, teacher_targets, student_scores, group_...
    method save (line 204) | def save(self, output_dir: str):

FILE: research/Reinforced_IR/finetune/retriever/runner.py
  class IREmbedderRunner (line 19) | class IREmbedderRunner(AbsEmbedderRunner):
    method load_train_dataset (line 24) | def load_train_dataset(self):
    method load_data_collator (line 44) | def load_data_collator(self):
    method load_tokenizer_and_model (line 61) | def load_tokenizer_and_model(self) -> Tuple[PreTrainedTokenizer, AbsEm...
    method load_trainer (line 114) | def load_trainer(self) -> IREmbedderTrainer:

FILE: research/Reinforced_IR/finetune/retriever/trainer.py
  class IREmbedderTrainer (line 11) | class IREmbedderTrainer(AbsEmbedderTrainer):
    method _save (line 15) | def _save(self, output_dir: Optional[str] = None, state_dict=None):

FILE: research/Reinforced_IR/inference/agent/gpt.py
  class GPTAgent (line 13) | class GPTAgent():
    method __init__ (line 14) | def __init__(
    method generate_single (line 31) | def generate_single(
    method generate (line 77) | def generate(
    method generate_single_direct (line 107) | def generate_single_direct(
    method generate_direct (line 127) | def generate_direct(

FILE: research/Reinforced_IR/inference/agent/vllm.py
  class LLMAgent (line 8) | class LLMAgent():
    method __init__ (line 9) | def __init__(
    method generate (line 21) | def generate(

FILE: research/Reinforced_IR/inference/agent/vllm_instruct.py
  class LLMInstructAgent (line 9) | class LLMInstructAgent():
    method __init__ (line 10) | def __init__(
    method generate (line 23) | def generate(
    method generate_direct (line 61) | def generate_direct(

FILE: research/Reinforced_IR/inference/ir_model.py
  class Reinforced_IR_Model (line 23) | class Reinforced_IR_Model():
    method __init__ (line 24) | def __init__(
    method load_retriever (line 69) | def load_retriever(self):
    method load_generator (line 85) | def load_generator(self):
    method offload_retriever (line 102) | def offload_retriever(self):
    method offload_generator (line 107) | def offload_generator(self):
    method encode_queries (line 112) | def encode_queries(self, task_instruction, answer_type, queries, **kwa...
    method encode_corpus (line 129) | def encode_corpus(self, corpus, **kwargs):
    method encode (line 133) | def encode(self, corpus, **kwargs):

FILE: research/Reinforced_IR/inference/multi.py
  class Args (line 10) | class Args():
  function worker_function (line 60) | def worker_function(device):
  function merge (line 122) | def merge(args: Args):

FILE: research/baai_general_embedding/finetune/arguments.py
  class ModelArguments (line 9) | class ModelArguments:
  class DataArguments (line 30) | class DataArguments:
    method __post_init__ (line 63) | def __post_init__(self):
  class RetrieverTrainingArguments (line 68) | class RetrieverTrainingArguments(TrainingArguments):

FILE: research/baai_general_embedding/finetune/data.py
  class TrainDatasetForEmbedding (line 14) | class TrainDatasetForEmbedding(Dataset):
    method __init__ (line 15) | def __init__(
    method __len__ (line 37) | def __len__(self):
    method __getitem__ (line 40) | def __getitem__(self, item) -> Tuple[str, List[str]]:
  class EmbedCollator (line 64) | class EmbedCollator(DataCollatorWithPadding):
    method padding_score (line 73) | def padding_score(self, teacher_score):
    method __call__ (line 91) | def __call__(self, features):

FILE: research/baai_general_embedding/finetune/eval_msmarco.py
  class Args (line 16) | class Args:
  function index (line 74) | def index(model: FlagModel, corpus: datasets.Dataset, batch_size: int = ...
  function search (line 132) | def search(model: FlagModel, queries: datasets, faiss_index: faiss.Index...
  function evaluate (line 155) | def evaluate(preds,
  function main (line 212) | def main():

FILE: research/baai_general_embedding/finetune/hn_mine.py
  function get_args (line 11) | def get_args():
  function create_index (line 25) | def create_index(embeddings, use_gpu):
  function batch_search (line 37) | def batch_search(index,
  function get_corpus (line 50) | def get_corpus(candidate_pool):
  function find_knn_neg (line 58) | def find_knn_neg(model, input_file, candidate_pool, output_file, sample_...

FILE: research/baai_general_embedding/finetune/modeling.py
  class EncoderOutput (line 15) | class EncoderOutput(ModelOutput):
  class BiEncoderModel (line 22) | class BiEncoderModel(nn.Module):
    method __init__ (line 25) | def __init__(self,
    method gradient_checkpointing_enable (line 60) | def gradient_checkpointing_enable(self, **kwargs):
    method sentence_embedding (line 63) | def sentence_embedding(self, hidden_state, mask):
    method encode (line 71) | def encode(self, features):
    method compute_similarity (line 80) | def compute_similarity(self, q_reps, p_reps):
    method forward (line 85) | def forward(self, query: Dict[str, Tensor] = None, passage: Dict[str, ...
    method compute_loss (line 119) | def compute_loss(self, scores, target):
    method _dist_gather_tensor (line 122) | def _dist_gather_tensor(self, t: Optional[torch.Tensor]):
    method save (line 135) | def save(self, output_dir: str):

FILE: research/baai_general_embedding/finetune/run.py
  function main (line 20) | def main():

FILE: research/baai_general_embedding/finetune/trainer.py
  function save_ckpt_for_sentence_transformers (line 5) | def save_ckpt_for_sentence_transformers(ckpt_dir, pooling_mode: str = 'c...
  class BiTrainer (line 16) | class BiTrainer(Trainer):
    method _save (line 17) | def _save(self, output_dir: Optional[str] = None, state_dict=None):
    method compute_loss (line 40) | def compute_loss(self, model, inputs, return_outputs=False):

FILE: research/baai_general_embedding/retromae_pretrain/arguments.py
  class DataTrainingArguments (line 7) | class DataTrainingArguments:
    method __post_init__ (line 24) | def __post_init__(self):
  class ModelArguments (line 30) | class ModelArguments:

FILE: research/baai_general_embedding/retromae_pretrain/data.py
  class DatasetForPretraining (line 13) | class DatasetForPretraining(torch.utils.data.Dataset):
    method __init__ (line 14) | def __init__(self, data_dir):
    method load_dataset (line 26) | def load_dataset(self, file):
    method __getitem__ (line 34) | def __getitem__(self, item):
    method __len__ (line 37) | def __len__(self):
  class RetroMAECollator (line 42) | class RetroMAECollator(DataCollatorForWholeWordMask):
    method __call__ (line 47) | def __call__(self, examples):

FILE: research/baai_general_embedding/retromae_pretrain/enhancedDecoder.py
  class BertSelfAttention (line 24) | class BertSelfAttention(nn.Module):
    method __init__ (line 25) | def __init__(self, config, position_embedding_type=None):
    method transpose_for_scores (line 51) | def transpose_for_scores(self, x):
    method forward (line 56) | def forward(
  class BertAttention (line 153) | class BertAttention(nn.Module):
    method __init__ (line 154) | def __init__(self, config, position_embedding_type=None):
    method prune_heads (line 160) | def prune_heads(self, heads):
    method forward (line 178) | def forward(
  class BertLayerForDecoder (line 204) | class BertLayerForDecoder(nn.Module):
    method __init__ (line 205) | def __init__(self, config):
    method forward (line 219) | def forward(
    method feed_forward_chunk (line 285) | def feed_forward_chunk(self, attention_output):

FILE: research/baai_general_embedding/retromae_pretrain/modeling.py
  class RetroMAEForPretraining (line 15) | class RetroMAEForPretraining(nn.Module):
    method __init__ (line 16) | def __init__(
    method gradient_checkpointing_enable (line 38) | def gradient_checkpointing_enable(self, **kwargs):
    method forward (line 41) | def forward(self,
    method mlm_loss (line 77) | def mlm_loss(self, hiddens, labels):
    method save_pretrained (line 91) | def save_pretrained(self, output_dir: str):
    method from_pretrained (line 96) | def from_pretrained(

FILE: research/baai_general_embedding/retromae_pretrain/run.py
  class TrainerCallbackForSaving (line 27) | class TrainerCallbackForSaving(TrainerCallback):
    method on_epoch_end (line 28) | def on_epoch_end(self, args: TrainingArguments, state: TrainerState, c...
  function main (line 35) | def main():

FILE: research/baai_general_embedding/retromae_pretrain/trainer.py
  class PreTrainer (line 11) | class PreTrainer(Trainer):
    method log (line 12) | def log(self, logs: Dict[str, float]) -> None:
    method _save (line 30) | def _save(self, output_dir: Optional[str] = None, state_dict=None):

FILE: research/baai_general_embedding/retromae_pretrain/utils.py
  function tensorize_batch (line 6) | def tensorize_batch(sequences: List[torch.Tensor], padding_value, align_...

FILE: research/llm_dense_retriever/finetune/arguments.py
  function default_list (line 8) | def default_list() -> List[int]:
  class ModelArguments (line 13) | class ModelArguments:
  class DataArguments (line 75) | class DataArguments:
  class RetrieverTrainingArguments (line 155) | class RetrieverTrainingArguments(TrainingArguments):

FILE: research/llm_dense_retriever/finetune/data.py
  function get_query_prompt (line 22) | def get_query_prompt(query, prompt, use_special_tokens):
  function add_prompt (line 29) | def add_prompt(example, prompt):
  function traverse_directory_using_os (line 33) | def traverse_directory_using_os(root_folder):
  class SameDatasetTrainDataset (line 45) | class SameDatasetTrainDataset(Dataset):
    method __init__ (line 52) | def __init__(self, args: DataArguments, batch_size, seed, tokenizer, p...
    method refresh_epoch (line 157) | def refresh_epoch(self):
    method __getitem__ (line 180) | def __getitem__(self, idx):
    method create_batch_data (line 192) | def create_batch_data(self, batch_raw_data):
    method __len__ (line 341) | def __len__(self):
  class SameEmbedCollator (line 346) | class SameEmbedCollator(DataCollatorForSeq2Seq):
    method __call__ (line 357) | def __call__(self, features, return_tensors='pt'):

FILE: research/llm_dense_retriever/finetune/load_model.py
  function find_largest_checkpoint (line 8) | def find_largest_checkpoint(checkpoint_dir):
  function get_model (line 24) | def get_model(model_args, output_dir, resize, resize_tokens):
  function save_merged_model (line 93) | def save_merged_model(model_args, output_dir):

FILE: research/llm_dense_retriever/finetune/modeling.py
  class EncoderOutput (line 19) | class EncoderOutput(ModelOutput):
  class BiEncoderModel (line 26) | class BiEncoderModel(nn.Module):
    method __init__ (line 29) | def __init__(self,
    method gradient_checkpointing_enable (line 57) | def gradient_checkpointing_enable(self, **kwargs):
    method enable_input_require_grads (line 60) | def enable_input_require_grads(self, **kwargs):
    method encode (line 63) | def encode(self, features):
    method compute_similarity (line 99) | def compute_similarity(self, q_reps, p_reps):
    method get_local_similarity (line 104) | def get_local_similarity(self, q_reps, p_reps, all_scores):
    method compute_local_similarity (line 113) | def compute_local_similarity(self, q_reps, p_reps):
    method forward (line 118) | def forward(self,
    method compute_cross_entropy_loss (line 182) | def compute_cross_entropy_loss(self, scores, target):
    method _dist_gather_tensor (line 185) | def _dist_gather_tensor(self, t: Optional[torch.Tensor]):
    method save (line 197) | def save(self, output_dir: str):

FILE: research/llm_dense_retriever/finetune/run.py
  function main (line 22) | def main():
  function save_model (line 146) | def save_model():

FILE: research/llm_dense_retriever/finetune/trainer.py
  class BiTrainer (line 4) | class BiTrainer(Trainer):
    method _save (line 5) | def _save(self, output_dir: Optional[str] = None, state_dict=None):
    method compute_loss (line 28) | def compute_loss(self, model, inputs, return_outputs=False):

FILE: research/llm_embedder/evaluation/eval_icl.py
  class ICLArgs (line 96) | class ICLArgs(LMArgs, RetrievalArgs):
  class GenerationArgs (line 142) | class GenerationArgs(GenerationArgs):
  function remove_double_space (line 149) | def remove_double_space(string):
  function load_test_data (line 153) | def load_test_data(knn_inxs,
  function main (line 220) | def main():

FILE: research/llm_embedder/evaluation/eval_lrlm.py
  class LRLMArgs (line 23) | class LRLMArgs(RetrievalArgs, SRLMArgs):
    method __post_init__ (line 63) | def __post_init__(self):
  function process_lrlm (line 70) | def process_lrlm(tokenizer, context_max_length=4096, target_length=1024,...
  function main (line 100) | def main():

FILE: research/llm_embedder/evaluation/eval_mmlu.py
  class MMLUArgs (line 32) | class MMLUArgs(LMArgs, RetrievalArgs):
  function process_mmlu (line 88) | def process_mmlu(tokenizer, context_max_length=2048, key_num=3, few_shot...
  function evaluate_mmlu (line 204) | def evaluate_mmlu(eval_data, save_path, **kwds):
  function main (line 256) | def main():

FILE: research/llm_embedder/evaluation/eval_msc.py
  class LRLMArgs (line 23) | class LRLMArgs(RetrievalArgs, SRLMArgs):
  class HistoryCollator (line 51) | class HistoryCollator:
    method __call__ (line 53) | def __call__(self, batch_elem):
  function main (line 78) | def main():

FILE: research/llm_embedder/evaluation/eval_popqa.py
  class PopQAArgs (line 47) | class PopQAArgs(LMArgs, RetrievalArgs):
  class GenerationArgs (line 96) | class GenerationArgs(GenerationArgs):
  function process_popqa (line 104) | def process_popqa(tokenizer, context_max_length=2048, key_num=3, few_sho...
  function evaluate_popqa (line 185) | def evaluate_popqa(eval_data, save_path, **kwds):
  function main (line 217) | def main():

FILE: research/llm_embedder/evaluation/eval_qa.py
  class QAArgs (line 28) | class QAArgs(LMArgs, RetrievalArgs):
  class GenerationArgs (line 89) | class GenerationArgs(GenerationArgs):
  function process_qa (line 97) | def process_qa(tokenizer, context_max_length=2048, key_num=3, few_shot=0...
  function evaluate_qa (line 172) | def evaluate_qa(eval_data, save_path, **kwds):
  function main (line 197) | def main():

FILE: research/llm_embedder/evaluation/eval_qrecc.py
  class QRECCArgs (line 29) | class QRECCArgs(LMArgs, RetrievalArgs):
  class GenerationArgs (line 81) | class GenerationArgs(GenerationArgs):
  function process_qrecc (line 89) | def process_qrecc(tokenizer, context_max_length=2048, key_num=3, is_enco...
  function evaluate_qrecc (line 140) | def evaluate_qrecc(eval_data, save_path, **kwds):
  function main (line 166) | def main():

FILE: research/llm_embedder/evaluation/eval_retrieval.py
  class Args (line 23) | class Args(RetrievalArgs):
  function main (line 45) | def main(args, accelerator=None, log=True):

FILE: research/llm_embedder/evaluation/eval_tool.py
  class ToolArgs (line 15) | class ToolArgs(RetrievalArgs):

FILE: research/llm_embedder/evaluation/icl_utils.py
  function _normalize_answer (line 15) | def _normalize_answer(text, punc_chars, punc_repl):
  function normalize_squad (line 35) | def normalize_squad(answer):
  function _metric_max_over_ground_truths (line 40) | def _metric_max_over_ground_truths(metric_fn, ground_truths, prediction):
  function _exact_match_score (line 47) | def _exact_match_score(target, prediction):
  function _f1_score (line 51) | def _f1_score(target, prediction):
  function qa_metrics (line 65) | def qa_metrics(targets, predictions, return_list=False):
  class App (line 94) | class App:
    method __init__ (line 95) | def __init__(self):
    method add (line 98) | def add(self, key):
    method __getitem__ (line 105) | def __getitem__(self, __name: str):
  function rouge (line 113) | def rouge(preds, labels, return_list=False):
  function squad (line 135) | def squad(labels, preds, return_list=False):
  function simple_accuracy (line 154) | def simple_accuracy(preds, labels, return_list=False):
  function compute_metrics (line 165) | def compute_metrics(metric, labels, preds):
  function compute_scores (line 181) | def compute_scores(metric, preds, labels):
  function flat_options (line 188) | def flat_options(data):
  function perplexity_to_choice (line 195) | def perplexity_to_choice(data, perplexity):
  function get_length (line 208) | def get_length(tokenizer, text):
  function get_prompt_length (line 217) | def get_prompt_length(tokenizer, prompts_list, question, n_tokens_in_pro...
  function _llm_generation_func (line 224) | def _llm_generation_func(examples: Dict[str, List],
  function _llm_perplexity_func (line 257) | def _llm_perplexity_func(examples: Dict[str, List],

FILE: research/llm_embedder/run_dense.py
  function main (line 19) | def main():

FILE: research/llm_embedder/run_lm_score.py
  class ScoreArgs (line 20) | class ScoreArgs(LMArgs):
  function process_lm_scoring (line 47) | def process_lm_scoring(tokenizer, key_max_length=512):
  function collate_scores (line 123) | def collate_scores(eval_data, save_name):
  function main (line 186) | def main():

FILE: research/llm_embedder/run_ranker.py
  function main (line 18) | def main():

FILE: research/llm_embedder/scripts/ours2st.py
  function convert_ours_ckpt_to_sentence_transformer (line 8) | def convert_ours_ckpt_to_sentence_transformer(src_dir, dest_dir, pooling...
  class Args (line 46) | class Args:
    method __post_init__ (line 68) | def __post_init__(self):

FILE: research/llm_embedder/src/lm/args.py
  class LMArgs (line 7) | class LMArgs(BaseArgs):
  class SRLMArgs (line 53) | class SRLMArgs(LMArgs):
  class GenerationArgs (line 101) | class GenerationArgs:
    method __post_init__ (line 130) | def __post_init__(self):

FILE: research/llm_embedder/src/lm/modeling_lm.py
  class LM (line 11) | class LM(torch.nn.Module):
    method __init__ (line 12) | def __init__(self, model_name_or_path=None, padding_side="left", dtype...
    method device (line 59) | def device(self):
    method _move_to_device (line 65) | def _move_to_device(self, inputs):
    method compute_nlls (line 72) | def compute_nlls(self, dataloader):
    method generate (line 124) | def generate(self, dataloader, return_new_tokens_only=True, decode=Tru...

FILE: research/llm_embedder/src/lm/modeling_srlm.py
  class SRLMOutput (line 18) | class SRLMOutput(ModelOutput):
  class SelfRetrievalLM (line 24) | class SelfRetrievalLM(LM):
    method __init__ (line 25) | def __init__(self, retriever=None, context_window_size:int=2048, chunk...
    method _get_retrieved_chunks (line 47) | def _get_retrieved_chunks(self, value_chunks, retrieved_indices):
    method _get_retrieved_history (line 71) | def _get_retrieved_history(self, history, retrieved_indices):
    method forward (line 101) | def forward(self, **kwds):
    method forward_with_history_retrieval (line 107) | def forward_with_history_retrieval(self, query:np.ndarray, history:np....
    method forward_with_chunk_retrieval (line 227) | def forward_with_chunk_retrieval(self, input_ids, attention_mask, labe...
    method compute_perplexity (line 464) | def compute_perplexity(self, dataloader):

FILE: research/llm_embedder/src/retrieval/args.py
  class BaseArgs (line 8) | class BaseArgs:
    method resolve_path (line 71) | def resolve_path(self, path):
    method __post_init__ (line 85) | def __post_init__(self):
  class DenseRetrievalArgs (line 97) | class DenseRetrievalArgs(BaseArgs):
  class BM25Args (line 184) | class BM25Args(BaseArgs):
  class RankerArgs (line 225) | class RankerArgs(BaseArgs):
  class RetrievalArgs (line 271) | class RetrievalArgs(DenseRetrievalArgs, BM25Args):
  class RetrievalTrainingArgs (line 279) | class RetrievalTrainingArgs(TrainingArguments):
    method __setattr__ (line 406) | def __setattr__(self, name, value):
    method __post_init__ (line 409) | def __post_init__(self):

FILE: research/llm_embedder/src/retrieval/data.py
  class RetrievalDataset (line 16) | class RetrievalDataset:
    method get_train_process_fn (line 17) | def get_train_process_fn(train_group_size=8, select_positive="first", ...
    method prepare_train_dataset (line 122) | def prepare_train_dataset(data_file=None, cache_dir=None, config=None,...
    method prepare_eval_dataset (line 183) | def prepare_eval_dataset(data_file=None, cache_dir=None, instruction=N...
    method prepare_corpus (line 236) | def prepare_corpus(data_file, key_template:str, cache_dir=None, instru...
  class SameDatasetTrainDataset (line 254) | class SameDatasetTrainDataset(torch.utils.data.Dataset):
    method __init__ (line 264) | def __init__(self, dataset, dataset_indices_range, batch_size, seed, o...
    method create_epoch (line 284) | def create_epoch(self):
    method __getitem__ (line 305) | def __getitem__(self, idx):
    method __len__ (line 363) | def __len__(self):
  class RetrievalDataCollator (line 368) | class RetrievalDataCollator:
    method __call__ (line 377) | def __call__(self, batch_elem):

FILE: research/llm_embedder/src/retrieval/evalnq.py
  class SimpleTokenizer (line 12) | class SimpleTokenizer:
    method __init__ (line 16) | def __init__(self, **kwargs):
    method tokenize (line 26) | def tokenize(self, text, uncase=False):
  function _normalize (line 40) | def _normalize(text):
  function has_answer (line 44) | def has_answer(answers, text, tokenizer) -> bool:
  class EvalDataset (line 62) | class EvalDataset(Dataset):
    method __init__ (line 63) | def __init__(self, retrieval_result, eval_dataset, corpus):
    method __getitem__ (line 69) | def __getitem__(self, qidx):
    method __len__ (line 79) | def __len__(self):
  function evaluate_nq (line 83) | def evaluate_nq(retrieval_result: dict, eval_data: datasets.Dataset, cor...

FILE: research/llm_embedder/src/retrieval/metrics.py
  class RetrievalMetric (line 13) | class RetrievalMetric:
    method get_metric_fn (line 16) | def get_metric_fn(cls, metric_names, **kwds):
    method _get_save_path (line 37) | def _get_save_path(eval_data, output_dir=None, field="result", save_na...
    method _save_result (line 55) | def _save_result(query_ids, preds, result_path, scores=None):
    method _load_result (line 71) | def _load_result(result_path):
    method _clean_pred (line 92) | def _clean_pred(pred, score=None):
    method _prepare_label (line 109) | def _prepare_label(eval_data):
    method mrr (line 124) | def mrr(eval_data=None, cutoffs=[10], **kwds):
    method recall (line 163) | def recall(eval_data=None, cutoffs=[10], **kwds):
    method ndcg (line 194) | def ndcg(eval_data=None, cutoffs=[10], **kwds):
    method nq (line 234) | def nq(eval_data, corpus, cache_dir=None, **kwds):
    method collate_key (line 246) | def collate_key(eval_data, save_name, corpus, output_dir=None, save_to...
    method collate_neg (line 296) | def collate_neg(eval_data, save_name, corpus, max_neg_num=100, filter_...
    method collate_score (line 363) | def collate_score(eval_data, save_name, output_dir=None, save_to_outpu...

FILE: research/llm_embedder/src/retrieval/modeling_bm25.py
  class BM25Retriever (line 12) | class BM25Retriever:
    method __init__ (line 13) | def __init__(self, anserini_dir, k1=0.9, b=0.4, **kwds) -> None:
    method _prepare_collection (line 18) | def _prepare_collection(self, corpus:datasets.Dataset, collection_dir,...
    method _prepare_query (line 34) | def _prepare_query(self, eval_data:Union[str, datasets.Dataset], query...
    method _prepare_result (line 81) | def _prepare_result(self, result_path):
    method index (line 91) | def index(self, corpus:Optional[datasets.Dataset]=None, output_dir:str...
    method search (line 109) | def search(self, eval_data:Union[str, datasets.Dataset], output_dir:Op...
  class NaiveBM25Retriever (line 143) | class NaiveBM25Retriever:
    method __init__ (line 144) | def __init__(self, k1:float=0.9, b:float=0.4, **kwds) -> None:
    method index (line 148) | def index(self, corpus: List[Union[str, List[int]]], verbose: bool=Fal...
    method search (line 188) | def search(self, queries: Union[str, List[int], List[str], List[List[i...

FILE: research/llm_embedder/src/retrieval/modeling_dense.py
  class DenseRetriever (line 20) | class DenseRetriever(torch.nn.Module):
    method __init__ (line 21) | def __init__(self, query_encoder:str='BAAI/bge-base-en', key_encoder:s...
    method _post_init (line 64) | def _post_init(self):
    method gradient_checkpointing_enable (line 81) | def gradient_checkpointing_enable(self):
    method device (line 86) | def device(self):
    method _gather_tensors (line 92) | def _gather_tensors(self, local_tensor):
    method _save_to_memmap (line 110) | def _save_to_memmap(self, path: str, shape: tuple, array: np.ndarray, ...
    method _prepare (line 149) | def _prepare(self, inputs: Union[str, List[str], Mapping], field="key"):
    method _pool (line 175) | def _pool(self, embeddings, attention_mask):
    method encode (line 193) | def encode(self, inputs: Union[str, List[str], Mapping], field:str="ke...
    method _compute_loss (line 237) | def _compute_loss(self, query_embedding, key_embedding, teacher_scores):
    method _refresh_config (line 312) | def _refresh_config(self, task):
    method forward (line 330) | def forward(self, query, key, task, teacher_scores=None, **kwds):
    method index (line 345) | def index(self, corpus: Dataset, output_dir="data/outputs", embedding_...
    method search (line 414) | def search(self, inputs: Union[str, List[str], Mapping], hits:int=10, ...
    method rerank (line 449) | def rerank(self, query, key, key_mask=None, **kwds):
    method save_pretrained (line 463) | def save_pretrained(self, output_dir: str, *args, **kwargs):
  class FaissIndex (line 485) | class FaissIndex:
    method __init__ (line 486) | def __init__(self, device) -> None:
    method build (line 494) | def build(self, encoded_corpus, index_factory, metric):
    method load (line 517) | def load(self, index_path):
    method save (line 527) | def save(self, index_path):
    method search (line 535) | def search(self, query, hits):

FILE: research/llm_embedder/src/retrieval/modeling_ranker.py
  class CrossEncoder (line 11) | class CrossEncoder(torch.nn.Module):
    method __init__ (line 12) | def __init__(self, ranker, dtype:str="fp16", cache_dir=None, accelerat...
    method gradient_checkpointing_enable (line 31) | def gradient_checkpointing_enable(self):
    method forward (line 34) | def forward(self, cross, batch_size, **kwds):
    method rerank (line 41) | def rerank(self, cross, batch_size, key_mask=None, hits=None, **kwds):
    method save_pretrained (line 57) | def save_pretrained(self, output_dir: str, *args, **kwargs):

FILE: research/llm_embedder/src/retrieval/modeling_unified.py
  class Retriever (line 11) | class Retriever:
    method __init__ (line 13) | def __init__(self, retrieval_method: str="dense", **kwds) -> None:
    method to (line 30) | def to(self, *args, **kwds):
    method encode (line 35) | def encode(self, *args, **kwds):
    method index (line 41) | def index(self, corpus, **kwds):
    method search (line 47) | def search(self, eval_dataset, **kwds):

FILE: research/llm_embedder/src/retrieval/trainer.py
  class RetrievalTrainer (line 19) | class RetrievalTrainer(Trainer):
    method __init__ (line 20) | def __init__(self, *args, corpus:Dataset, model_args, file_logger, **k...
    method _save (line 29) | def _save(self, output_dir: Optional[str] = None, state_dict=None):
    method evaluate (line 50) | def evaluate(self, eval_dataset: Optional[Dataset] = None, ignore_keys...
  class EarlyExitCallBack (line 193) | class EarlyExitCallBack(TrainerCallback):
    method __init__ (line 194) | def __init__(self, early_exit_steps=None):
    method on_step_end (line 197) | def on_step_end(self, args: TrainingArguments, state: TrainerState, co...

FILE: research/llm_embedder/src/utils/llama_patch.py
  function forward (line 23) | def forward(
  function _prepare_decoder_attention_mask (line 121) | def _prepare_decoder_attention_mask(self, attention_mask, input_shape, i...
  function enable_flash_attention (line 125) | def enable_flash_attention(model=None):
  function disable_flash_attention (line 152) | def disable_flash_attention(model=None):
  function upcast_layer_for_flash_attention (line 173) | def upcast_layer_for_flash_attention(model, torch_dtype):

FILE: research/llm_embedder/src/utils/util.py
  function do_nothing (line 25) | def do_nothing():
  function makedirs (line 28) | def makedirs(path):
  function clear_dir (line 33) | def clear_dir(directory):
  function split_file_dir_name_ext (line 46) | def split_file_dir_name_ext(path):
  function save_pickle (line 52) | def save_pickle(obj, path:str):
  function load_pickle (line 61) | def load_pickle(path):
  function save_json (line 65) | def save_json(obj, path:str):
  function load_json (line 71) | def load_json(path, lines=False):
  function filelock (line 83) | def filelock(path, process_index=0):
  function normalize_text (line 95) | def normalize_text(text, ignore_case=True, ignore_punctuation=True, igno...
  function min_max_normalize (line 118) | def min_max_normalize(array):
  function get_max_length_in_nested_lists (line 121) | def get_max_length_in_nested_lists(lst):
  function pad_nested_lists (line 132) | def pad_nested_lists(lst, max_length, padding_value, padding_side="right"):
  function mask_nested_lists (line 151) | def mask_nested_lists(lst, mask_target, mask_value=0):
  function are_elements_of_same_length (line 159) | def are_elements_of_same_length(lst: List):
  function add_eos (line 166) | def add_eos(inputs: Mapping, eos_token_id: int):
  function remove_eos (line 180) | def remove_eos(inputs: Mapping, eos_token_id: int):
  function mix_parameters (line 187) | def mix_parameters(models: List[torch.nn.Module], weights: Optional[List...
  class FileLogger (line 226) | class FileLogger:
    method __init__ (line 227) | def __init__(self, log_file) -> None:
    method log (line 230) | def log(self, metrics, **kwargs):
  class Sequential_Sampler (line 251) | class Sequential_Sampler:
    method __init__ (line 255) | def __init__(self, dataset_length:int, num_replicas:int, rank:int) -> ...
    method __iter__ (line 275) | def __iter__(self):
    method __len__ (line 280) | def __len__(self):
  class DatasetProcessFn (line 284) | class DatasetProcessFn:
    method __init__ (line 293) | def __init__(self, augment=False):
    method __call__ (line 296) | def __call__(self, _process_fn):
  class DefaultDataCollator (line 334) | class DefaultDataCollator:
    method __call__ (line 345) | def __call__(self, batch_elem: List) -> Dict[str, Any]:

FILE: research/llm_reranker/evaluate.py
  class Args (line 11) | class Args():
  function evaluate_mrr (line 45) | def evaluate_mrr(predicts, labels, cutoffs):
  function main (line 70) | def main():

FILE: research/llm_reranker/finetune_for_instruction/arguments.py
  function default_list (line 8) | def default_list() -> List[str]:
  class ModelArguments (line 13) | class ModelArguments:
  class DataArguments (line 83) | class DataArguments:
    method __post_init__ (line 150) | def __post_init__(self):
  class RetrieverTrainingArguments (line 155) | class RetrieverTrainingArguments(TrainingArguments):

FILE: research/llm_reranker/finetune_for_instruction/data.py
  class TrainDatasetForReranker (line 19) | class TrainDatasetForReranker(Dataset):
    method __init__ (line 20) | def __init__(
    method __len__ (line 57) | def __len__(self):
    method is_chinese (line 60) | def is_chinese(self, text):
    method __getitem__ (line 64) | def __getitem__(self, item) -> List[BatchEncoding]:
  class RerankCollator (line 140) | class RerankCollator(DataCollatorForSeq2Seq):
    method __call__ (line 149) | def __call__(self, features, return_tensors='pt'):

FILE: research/llm_reranker/finetune_for_instruction/load_model.py
  function get_model (line 6) | def get_model(model_args, training_args):

FILE: research/llm_reranker/finetune_for_instruction/modeling.py
  class RerankerOutput (line 13) | class RerankerOutput(ModelOutput):
  class BiEncoderModel (line 18) | class BiEncoderModel(nn.Module):
    method __init__ (line 19) | def __init__(self,
    method gradient_checkpointing_enable (line 38) | def gradient_checkpointing_enable(self, **kwargs):
    method enable_input_require_grads (line 41) | def enable_input_require_grads(self, **kwargs):
    method encode (line 44) | def encode(self, features):
    method forward (line 59) | def forward(self, pair: Union[Dict[str, Tensor], List[Dict[str, Tensor...
    method compute_loss (line 75) | def compute_loss(self, scores, target):
    method save (line 78) | def save(self, output_dir: str):
    method save_pretrained (line 87) | def save_pretrained(self, **kwargs):

FILE: research/llm_reranker/finetune_for_instruction/run.py
  function main (line 21) | def main():

FILE: research/llm_reranker/finetune_for_instruction/trainer.py
  class BiTrainer (line 6) | class BiTrainer(Trainer):
    method _save (line 9) | def _save(self, output_dir: Optional[str] = None, state_dict=None):
    method compute_loss (line 41) | def compute_loss(self, model, inputs, return_outputs=False):

FILE: research/llm_reranker/finetune_for_layerwise/arguments.py
  function default_list (line 8) | def default_list() -> List[str]:
  class ModelArguments (line 13) | class ModelArguments:
  class DataArguments (line 97) | class DataArguments:
    method __post_init__ (line 168) | def __post_init__(self):
  class RetrieverTrainingArguments (line 173) | class RetrieverTrainingArguments(TrainingArguments):

FILE: research/llm_reranker/finetune_for_layerwise/configuration_minicpm_reranker.py
  class LayerWiseMiniCPMConfig (line 30) | class LayerWiseMiniCPMConfig(PretrainedConfig):
    method __init__ (line 116) | def __init__(
    method _rope_scaling_validation (line 189) | def _rope_scaling_validation(self):

FILE: research/llm_reranker/finetune_for_layerwise/data.py
  class TrainDatasetForReranker (line 19) | class TrainDatasetForReranker(Dataset):
    method __init__ (line 20) | def __init__(
    method __len__ (line 57) | def __len__(self):
    method __getitem__ (line 60) | def __getitem__(self, item) -> List[BatchEncoding]:
  class RerankCollator (line 135) | class RerankCollator(DataCollatorForSeq2Seq):
    method __call__ (line 144) | def __call__(self, features, return_tensors='pt'):

FILE: research/llm_reranker/finetune_for_layerwise/load_model.py
  function get_model (line 8) | def get_model(model_args, training_args, only_for_one_logit: int = None):

FILE: research/llm_reranker/finetune_for_layerwise/modeling.py
  class RerankerOutput (line 14) | class RerankerOutput(ModelOutput):
  class BiEncoderModel (line 18) | class BiEncoderModel(nn.Module):
    method __init__ (line 19) | def __init__(self,
    method gradient_checkpointing_enable (line 40) | def gradient_checkpointing_enable(self, **kwargs):
    method enable_input_require_grads (line 43) | def enable_input_require_grads(self, **kwargs):
    method encode (line 46) | def encode(self, features):
    method forward (line 64) | def forward(self, pair: Union[Dict[str, Tensor], List[Dict[str, Tensor...
    method compute_loss (line 94) | def compute_loss(self, scores, target):
    method save (line 97) | def save(self, output_dir: str):
    method save_pretrained (line 106) | def save_pretrained(self, **kwargs):

FILE: research/llm_reranker/finetune_for_layerwise/modeling_minicpm_reranker.py
  function _get_unpad_data (line 77) | def _get_unpad_data(attention_mask):
  function _expand_mask (line 89) | def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Option...
  function _make_causal_mask (line 96) | def _make_causal_mask(
  function rms_layernorm (line 108) | def rms_layernorm(hidden: torch.Tensor, weight: torch.Tensor, eps: float):
  class MiniCPMRMSNorm (line 115) | class MiniCPMRMSNorm(nn.Module):
    method __init__ (line 116) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 124) | def forward(self, hidden_states):
  class MiniCPMRotaryEmbedding (line 131) | class MiniCPMRotaryEmbedding(nn.Module):
    method __init__ (line 132) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method _set_cos_sin_cache (line 147) | def _set_cos_sin_cache(self, seq_len, device, dtype):
    method forward (line 157) | def forward(self, x, seq_len=None):
  class MiniCPMLinearScalingRotaryEmbedding (line 168) | class MiniCPMLinearScalingRotaryEmbedding(MiniCPMRotaryEmbedding):
    method __init__ (line 171) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method _set_cos_sin_cache (line 175) | def _set_cos_sin_cache(self, seq_len, device, dtype):
  class MiniCPMDynamicNTKScalingRotaryEmbedding (line 187) | class MiniCPMDynamicNTKScalingRotaryEmbedding(MiniCPMRotaryEmbedding):
    method __init__ (line 190) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method _set_cos_sin_cache (line 194) | def _set_cos_sin_cache(self, seq_len, device, dtype):
  function rotate_half (line 214) | def rotate_half(x):
  function apply_rotary_pos_emb (line 221) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1):
  class MiniCPMMLP (line 256) | class MiniCPMMLP(nn.Module):
    method __init__ (line 257) | def __init__(self, config):
    method forward (line 267) | def forward(self, x):
  function repeat_kv (line 290) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  class MiniCPMAttention (line 302) | class MiniCPMAttention(nn.Module):
    method __init__ (line 305) | def __init__(self, config: LayerWiseMiniCPMConfig, layer_idx: Optional...
    method _init_rope (line 338) | def _init_rope(self):
    method _shape (line 365) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
    method forward (line 368) | def forward(
  class MiniCPMFlashAttention2 (line 473) | class MiniCPMFlashAttention2(MiniCPMAttention):
    method __init__ (line 480) | def __init__(self, *args, **kwargs):
    method forward (line 488) | def forward(
    method _flash_attention_forward (line 576) | def _flash_attention_forward(
    method _upad_input (line 633) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m...
  class MiniCPMSdpaAttention (line 672) | class MiniCPMSdpaAttention(MiniCPMAttention):
    method forward (line 680) | def forward(
  class MiniCPMDecoderLayer (line 766) | class MiniCPMDecoderLayer(nn.Module):
    method __init__ (line 767) | def __init__(self, config: LayerWiseMiniCPMConfig, layer_idx: int):
    method forward (line 779) | def forward(
  class MiniCPMPreTrainedModel (line 862) | class MiniCPMPreTrainedModel(PreTrainedModel):
    method _init_weights (line 872) | def _init_weights(self, module):
  class LayerWiseMiniCPMModel (line 958) | class LayerWiseMiniCPMModel(MiniCPMPreTrainedModel):
    method __init__ (line 966) | def __init__(self, config: LayerWiseMiniCPMConfig):
    method get_input_embeddings (line 984) | def get_input_embeddings(self):
    method set_input_embeddings (line 987) | def set_input_embeddings(self, value):
    method forward (line 991) | def forward(
  class LayerWiseHead (line 1135) | class LayerWiseHead(nn.Module):
    method __init__ (line 1138) | def __init__(self, input_size, output_size):
    method forward (line 1142) | def forward(self, **kwargs):
  class LayerWiseMiniCPMForCausalLM (line 1145) | class LayerWiseMiniCPMForCausalLM(MiniCPMPreTrainedModel):
    method __init__ (line 1148) | def __init__(self, config):
    method get_input_embeddings (line 1191) | def get_input_embeddings(self):
    method set_input_embeddings (line 1194) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 1197) | def get_output_embeddings(self):
    method set_output_embeddings (line 1200) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 1203) | def set_decoder(self, decoder):
    method get_decoder (line 1206) | def get_decoder(self):
    method forward (line 1211) | def forward(
    method prepare_inputs_for_generation (line 1408) | def prepare_inputs_for_generation(
    method _reorder_cache (line 1465) | def _reorder_cache(past_key_values, beam_idx):
    method chat (line 1474) | def chat(self, tokenizer, query: str, history: List[Dict] = None, role...

FILE: research/llm_reranker/finetune_for_layerwise/run.py
  function main (line 21) | def main():

FILE: research/llm_reranker/finetune_for_layerwise/trainer.py
  class BiTrainer (line 5) | class BiTrainer(Trainer):
    method _save (line 8) | def _save(self, output_dir: Optional[str] = None, state_dict=None):
    method compute_loss (line 40) | def compute_loss(self, model, inputs, return_outputs=False):

FILE: research/llm_reranker/merge/configuration_minicpm_reranker.py
  class LayerWiseMiniCPMConfig (line 29) | class LayerWiseMiniCPMConfig(PretrainedConfig):
    method __init__ (line 115) | def __init__(
    method _rope_scaling_validation (line 188) | def _rope_scaling_validation(self):

FILE: research/llm_reranker/merge/merge_base_model.py
  function merge_llm (line 5) | def merge_llm(model_name_or_path, lora_name_or_path, save_path, cache_di...

FILE: research/llm_reranker/merge/merge_layerwise_model_from_finetuned_model.py
  function merge_layerwise_finetuned_llm (line 5) | def merge_layerwise_finetuned_llm(model_name_or_path, lora_name_or_path,...

FILE: research/llm_reranker/merge/merge_layerwise_model_from_raw_model.py
  function merge_layerwise_raw_llm (line 6) | def merge_layerwise_raw_llm(model_name_or_path, lora_name_or_path, save_...

FILE: research/llm_reranker/merge/modeling_minicpm_reranker.py
  function _get_unpad_data (line 76) | def _get_unpad_data(attention_mask):
  function _expand_mask (line 88) | def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Option...
  function _make_causal_mask (line 95) | def _make_causal_mask(
  function rms_layernorm (line 107) | def rms_layernorm(hidden: torch.Tensor, weight: torch.Tensor, eps: float):
  class MiniCPMRMSNorm (line 114) | class MiniCPMRMSNorm(nn.Module):
    method __init__ (line 115) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 123) | def forward(self, hidden_states):
  class MiniCPMRotaryEmbedding (line 130) | class MiniCPMRotaryEmbedding(nn.Module):
    method __init__ (line 131) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method _set_cos_sin_cache (line 146) | def _set_cos_sin_cache(self, seq_len, device, dtype):
    method forward (line 156) | def forward(self, x, seq_len=None):
  class MiniCPMLinearScalingRotaryEmbedding (line 167) | class MiniCPMLinearScalingRotaryEmbedding(MiniCPMRotaryEmbedding):
    method __init__ (line 170) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method _set_cos_sin_cache (line 174) | def _set_cos_sin_cache(self, seq_len, device, dtype):
  class MiniCPMDynamicNTKScalingRotaryEmbedding (line 186) | class MiniCPMDynamicNTKScalingRotaryEmbedding(MiniCPMRotaryEmbedding):
    method __init__ (line 189) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method _set_cos_sin_cache (line 193) | def _set_cos_sin_cache(self, seq_len, device, dtype):
  function rotate_half (line 213) | def rotate_half(x):
  function apply_rotary_pos_emb (line 220) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1):
  class MiniCPMMLP (line 255) | class MiniCPMMLP(nn.Module):
    method __init__ (line 256) | def __init__(self, config):
    method forward (line 266) | def forward(self, x):
  function repeat_kv (line 289) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  class MiniCPMAttention (line 301) | class MiniCPMAttention(nn.Module):
    method __init__ (line 304) | def __init__(self, config: LayerWiseMiniCPMConfig, layer_idx: Optional...
    method _init_rope (line 337) | def _init_rope(self):
    method _shape (line 364) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
    method forward (line 367) | def forward(
  class MiniCPMFlashAttention2 (line 472) | class MiniCPMFlashAttention2(MiniCPMAttention):
    method __init__ (line 479) | def __init__(self, *args, **kwargs):
    method forward (line 487) | def forward(
    method _flash_attention_forward (line 575) | def _flash_attention_forward(
    method _upad_input (line 632) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m...
  class MiniCPMSdpaAttention (line 671) | class MiniCPMSdpaAttention(MiniCPMAttention):
    method forward (line 679) | def forward(
  class MiniCPMDecoderLayer (line 765) | class MiniCPMDecoderLayer(nn.Module):
    method __init__ (line 766) | def __init__(self, config: LayerWiseMiniCPMConfig, layer_idx: int):
    method forward (line 778) | def forward(
  class MiniCPMPreTrainedModel (line 861) | class MiniCPMPreTrainedModel(PreTrainedModel):
    method _init_weights (line 871) | def _init_weights(self, module):
  class LayerWiseMiniCPMModel (line 957) | class LayerWiseMiniCPMModel(MiniCPMPreTrainedModel):
    method __init__ (line 965) | def __init__(self, config: LayerWiseMiniCPMConfig):
    method get_input_embeddings (line 983) | def get_input_embeddings(self):
    method set_input_embeddings (line 986) | def set_input_embeddings(self, value):
    method forward (line 990) | def forward(
  class LayerWiseHead (line 1134) | class LayerWiseHead(nn.Module):
    method __init__ (line 1137) | def __init__(self, input_size, output_size):
    method forward (line 1141) | def forward(self, **kwargs):
  class LayerWiseMiniCPMForCausalLM (line 1144) | class LayerWiseMiniCPMForCausalLM(MiniCPMPreTrainedModel):
    method __init__ (line 1147) | def __init__(self, config):
    method get_input_embeddings (line 1190) | def get_input_embeddings(self):
    method set_input_embeddings (line 1193) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 1196) | def get_output_embeddings(self):
    method set_output_embeddings (line 1199) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 1202) | def set_decoder(self, decoder):
    method get_decoder (line 1205) | def get_decoder(self):
    method forward (line 1210) | def forward(
    method prepare_inputs_for_generation (line 1407) | def prepare_inputs_for_generation(
    method _reorder_cache (line 1464) | def _reorder_cache(past_key_values, beam_idx):
    method chat (line 1473) | def chat(self, tokenizer, query: str, history: List[Dict] = None, role...

FILE: research/old-examples/pretrain/retromae_pretrain/arguments.py
  class DataTrainingArguments (line 7) | class DataTrainingArguments:
    method __post_init__ (line 24) | def __post_init__(self):
  class ModelArguments (line 30) | class ModelArguments:

FILE: research/old-examples/pretrain/retromae_pretrain/data.py
  class DatasetForPretraining (line 13) | class DatasetForPretraining(torch.utils.data.Dataset):
    method __init__ (line 14) | def __init__(self, data_dir):
    method load_dataset (line 26) | def load_dataset(self, file):
    method __getitem__ (line 34) | def __getitem__(self, item):
    method __len__ (line 37) | def __len__(self):
  class RetroMAECollator (line 42) | class RetroMAECollator(DataCollatorForWholeWordMask):
    method __call__ (line 47) | def __call__(self, examples):

FILE: research/old-examples/pretrain/retromae_pretrain/enhancedDecoder.py
  class BertSelfAttention (line 24) | class BertSelfAttention(nn.Module):
    method __init__ (line 25) | def __init__(self, config, position_embedding_type=None):
    method transpose_for_scores (line 51) | def transpose_for_scores(self, x):
    method forward (line 56) | def forward(
  class BertAttention (line 153) | class BertAttention(nn.Module):
    method __init__ (line 154) | def __init__(self, config, position_embedding_type=None):
    method prune_heads (line 160) | def prune_heads(self, heads):
    method forward (line 178) | def forward(
  class BertLayerForDecoder (line 204) | class BertLayerForDecoder(nn.Module):
    method __init__ (line 205) | def __init__(self, config):
    method forward (line 219) | def forward(
    method feed_forward_chunk (line 285) | def feed_forward_chunk(self, attention_output):

FILE: research/old-examples/pretrain/retromae_pretrain/modeling.py
  class RetroMAEForPretraining (line 15) | class RetroMAEForPretraining(nn.Module):
    method __init__ (line 16) | def __init__(
    method gradient_checkpointing_enable (line 38) | def gradient_checkpointing_enable(self, **kwargs):
    method forward (line 41) | def forward(self,
    method mlm_loss (line 77) | def mlm_loss(self, hiddens, labels):
    method save_pretrained (line 91) | def save_pretrained(self, output_dir: str):
    method from_pretrained (line 96) | def from_pretrained(

FILE: research/old-examples/pretrain/retromae_pretrain/run.py
  class TrainerCallbackForSaving (line 27) | class TrainerCallbackForSaving(TrainerCallback):
    method on_epoch_end (line 28) | def on_epoch_end(self, args: TrainingArguments, state: TrainerState, c...
  function main (line 35) | def main():

FILE: research/old-examples/pretrain/retromae_pretrain/trainer.py
  class PreTrainer (line 11) | class PreTrainer(Trainer):
    method log (line 12) | def log(self, logs: Dict[str, float]) -> None:
    method _save (line 30) | def _save(self, output_dir: Optional[str] = None, state_dict=None):

FILE: research/old-examples/pretrain/retromae_pretrain/utils.py
  function tensorize_batch (line 6) | def tensorize_batch(sequences: List[torch.Tensor], padding_value, align_...

FILE: research/old-examples/search_demo/arguments.py
  class ModelArguments (line 5) | class ModelArguments:
  class DataArguments (line 13) | class DataArguments:

FILE: research/old-examples/search_demo/pre_process.py
  class EmbDataset (line 16) | class EmbDataset(Dataset):
    method __init__ (line 17) | def __init__(
    method __len__ (line 26) | def __len__(self):
    method __getitem__ (line 29) | def __getitem__(self, item):
  function inference (line 39) | def inference(json_path, emb_path, model_path):
  function build_bm25_index (line 74) | def build_bm25_index(dataset, collection_path, index_path):

FILE: research/old-examples/search_demo/tool.py
  class LocalDatasetLoader (line 13) | class LocalDatasetLoader:
    method __init__ (line 17) | def __init__(self,
  class QueryGenerator (line 27) | class QueryGenerator:
    method __init__ (line 28) | def __init__(self):
    method run (line 38) | def run(self, history, question):
  class AnswerGenerator (line 42) | class AnswerGenerator:
    method __init__ (line 43) | def __init__(self):
    method run (line 53) | def run(self, history, question, references):
  class BMVectorIndex (line 57) | class BMVectorIndex:
    method __init__ (line 58) | def __init__(self,
    method search_for_doc (line 75) | def search_for_doc(self, query: str, RANKING: int = 1000, TOP_N: int =...
  class Agent (line 96) | class Agent:
    method __init__ (line 97) | def __init__(self, index):
    method empty_memory (line 103) | def empty_memory(self):
    method update_memory (line 106) | def update_memory(self, question, answer):
    method generate_query (line 113) | def generate_query(self, question):
    method generate_answer (line 119) | def generate_answer(self, query, references):
    method answer (line 122) | def answer(self, question, RANKING=1000, TOP_N=5, verbose=True):

FILE: research/reranker/arguments.py
  class ModelArguments (line 7) | class ModelArguments:
  class DataArguments (line 27) | class DataArguments:
    method __post_init__ (line 40) | def __post_init__(self):

FILE: research/reranker/data.py
  class TrainDatasetForCE (line 16) | class TrainDatasetForCE(Dataset):
    method __init__ (line 17) | def __init__(
    method create_one_example (line 36) | def create_one_example(self, qry_encoding: str, doc_encoding: str):
    method __len__ (line 46) | def __len__(self):
    method __getitem__ (line 49) | def __getitem__(self, item) -> List[BatchEncoding]:
  class GroupCollator (line 68) | class GroupCollator(DataCollatorWithPadding):
    method __call__ (line 69) | def __call__(

FILE: research/reranker/modeling.py
  class CrossEncoder (line 13) | class CrossEncoder(nn.Module):
    method __init__ (line 14) | def __init__(self, hf_model: PreTrainedModel, model_args: ModelArgumen...
    method gradient_checkpointing_enable (line 30) | def gradient_checkpointing_enable(self, **kwargs):
    method forward (line 33) | def forward(self, batch):
    method from_pretrained (line 52) | def from_pretrained(
    method save_pretrained (line 60) | def save_pretrained(self, output_dir: str):

FILE: research/reranker/run.py
  function main (line 19) | def main():

FILE: research/reranker/trainer.py
  class CETrainer (line 13) | class CETrainer(Trainer):
    method _save (line 14) | def _save(self, output_dir: Optional[str] = None, state_dict=None):
    method compute_loss (line 30) | def compute_loss(self, model: CrossEncoder, inputs):

FILE: research/visual_bge/visual_bge/eva_clip/eva_vit_model.py
  class DropPath (line 33) | class DropPath(nn.Module):
    method __init__ (line 36) | def __init__(self, drop_prob=None):
    method forward (line 40) | def forward(self, x):
    method extra_repr (line 43) | def extra_repr(self) -> str:
  class Mlp (line 47) | class Mlp(nn.Module):
    method __init__ (line 48) | def __init__(
    method forward (line 70) | def forward(self, x):
  class SwiGLU (line 81) | class SwiGLU(nn.Module):
    method __init__ (line 82) | def __init__(self, in_features, hidden_features=None, out_features=Non...
    method forward (line 97) | def forward(self, x):
  class Attention (line 106) | class Attention(nn.Module):
    method __init__ (line 107) | def __init__(
    method forward (line 173) | def forward(self, x, rel_pos_bias=None, attn_mask=None):
  class Block (line 246) | class Block(nn.Module):
    method __init__ (line 248) | def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_sc...
    method forward (line 287) | def forward(self, x, rel_pos_bias=None, attn_mask=None):
  class PatchEmbed (line 305) | class PatchEmbed(nn.Module):
    method __init__ (line 308) | def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=...
    method forward (line 320) | def forward(self, x, **kwargs):
  class RelativePositionBias (line 329) | class RelativePositionBias(nn.Module):
    method __init__ (line 331) | def __init__(self, window_size, num_heads):
    method forward (line 358) | def forward(self):
  class EVAVisionTransformer (line 366) | class EVAVisionTransformer(nn.Module):
    method __init__ (line 369) | def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classe...
    method fix_init_weight (line 443) | def fix_init_weight(self):
    method get_cast_dtype (line 454) | def get_cast_dtype(self) -> torch.dtype:
    method _init_weights (line 457) | def _init_weights(self, m):
    method get_num_layers (line 466) | def get_num_layers(self):
    method lock (line 469) | def lock(self, unlocked_groups=0, freeze_bn_stats=False):
    method set_grad_checkpointing (line 475) | def set_grad_checkpointing(self, enable=True):
    method no_weight_decay (line 479) | def no_weight_decay(self):
    method get_classifier (line 482) | def get_classifier(self):
    method reset_classifier (line 485) | def reset_classifier(self, num_classes, global_pool=''):
    method forward_features (line 489) | def forward_features(self, x, return_all_features=False):
    method forward (line 527) | def forward(self, x, return_all_features=True):

FILE: research/visual_bge/visual_bge/eva_clip/factory.py
  function _natural_key (line 25) | def _natural_key(string_):
  function _rescan_model_configs (line 29) | def _rescan_model_configs():
  function list_models (line 53) | def list_models():
  function add_model_config (line 58) | def add_model_config(path):
  function get_model_config (line 66) | def get_model_config(model_name):
  function get_tokenizer (line 73) | def get_tokenizer(model_name):
  function load_state_dict (line 80) | def load_state_dict(checkpoint_path: str, map_location: str='cpu', model...
  function load_checkpoint (line 110) | def load_checkpoint(model, checkpoint_path, model_key="model|module|stat...
  function load_clip_visual_state_dict (line 131) | def load_clip_visual_state_dict(checkpoint_path: str, map_location: str=...
  function load_clip_text_state_dict (line 144) | def load_clip_text_state_dict(checkpoint_path: str, map_location: str='c...
  function get_pretrained_tag (line 152) | def get_pretrained_tag(pretrained_model):
  function load_pretrained_checkpoint (line 163) | def load_pretrained_checkpoint(
  function create_model (line 211) | def create_model(
  function create_model_and_transforms (line 362) | def create_model_and_transforms(
  function create_eva_vision_and_transforms (line 416) | def create_eva_vision_and_transforms(
  function create_model_from_pretrained (line 471) | def create_model_from_pretrained(

FILE: research/visual_bge/visual_bge/eva_clip/hf_model.py
  class BaseModelOutput (line 21) | class BaseModelOutput:
  class PretrainedConfig (line 25) | class PretrainedConfig:
  function _camel2snake (line 31) | def _camel2snake(s):
  function register_pooler (line 37) | def register_pooler(cls):
  class MeanPooler (line 44) | class MeanPooler(nn.Module):
    method forward (line 46) | def forward(self, x:BaseModelOutput, attention_mask:TensorType):
  class MaxPooler (line 51) | class MaxPooler(nn.Module):
    method forward (line 53) | def forward(self, x:BaseModelOutput, attention_mask:TensorType):
  class ClsPooler (line 58) | class ClsPooler(nn.Module):
    method __init__ (line 60) | def __init__(self, use_pooler_output=True):
    method forward (line 65) | def forward(self, x:BaseModelOutput, attention_mask:TensorType):
  class HFTextEncoder (line 75) | class HFTextEncoder(nn.Module):
    method __init__ (line 77) | def __init__(
    method mask (line 152) | def mask(self, input_ids, vocab_size, device, targets=None, masked_ind...
    method forward_mlm (line 177) | def forward_mlm(self, input_ids, image_embeds, mlm_probability=0.25):
    method forward (line 213) | def forward(self, x:TensorType) -> TensorType:
    method lock (line 220) | def lock(self, unlocked_layers:int=0, freeze_layer_norm:bool=True):
    method set_grad_checkpointing (line 239) | def set_grad_checkpointing(self, enable=True):
    method get_num_layers (line 242) | def get_num_layers(self):
    method init_parameters (line 247) | def init_parameters(self):

FILE: research/visual_bge/visual_bge/eva_clip/loss.py
  function gather_features (line 21) | def gather_features(
  class ClipLoss (line 70) | class ClipLoss(nn.Module):
    method __init__ (line 72) | def __init__(
    method forward (line 95) | def forward(self, image_features, text_features, logit_scale=1.):

FILE: research/visual_bge/visual_bge/eva_clip/model.py
  class CLIPVisionCfg (line 37) | class CLIPVisionCfg:
  class CLIPTextCfg (line 66) | class CLIPTextCfg:
  function get_cast_dtype (line 83) | def get_cast_dtype(precision: str):
  function _build_vision_tower (line 92) | def _build_vision_tower(
  function _build_text_tower (line 173) | def _build_text_tower(
  class CLIP (line 210) | class CLIP(nn.Module):
    method __init__ (line 211) | def __init__(
    method lock_image_tower (line 233) | def lock_image_tower(self, unlocked_groups=0, freeze_bn_stats=False):
    method set_grad_checkpointing (line 238) | def set_grad_checkpointing(self, enable=True):
    method no_weight_decay (line 243) | def no_weight_decay(self):
    method encode_image (line 246) | def encode_image(self, image, normalize: bool = False):
    method encode_text (line 250) | def encode_text(self, text, normalize: bool = False):
    method forward (line 264) | def forward(self, image, text):
  class CustomCLIP (line 270) | class CustomCLIP(nn.Module):
    method __init__ (line 271) | def __init__(
    method lock_image_tower (line 291) | def lock_image_tower(self, unlocked_groups=0, freeze_bn_stats=False):
    method lock_text_tower (line 295) | def lock_text_tower(self, unlocked_layers:int=0, freeze_layer_norm:boo...
    method set_grad_checkpointing (line 299) | def set_grad_checkpointing(self, enable=True):
    method no_weight_decay (line 305) | def no_weight_decay(self):
    method encode_image (line 308) | def encode_image(self, image, normalize: bool = False):
    method encode_text (line 312) | def encode_text(self, text, normalize: bool = False):
    method forward (line 316) | def forward(self, image, text):
  function convert_weights_to_lp (line 328) | def convert_weights_to_lp(model: nn.Module, dtype=torch.float16):
  function convert_to_custom_text_state_dict (line 360) | def convert_to_custom_text_state_dict(state_dict: dict):
  function build_model_from_openai_state_dict (line 379) | def build_model_from_openai_state_dict(
  function trace_model (line 439) | def trace_model(model, batch_size=256, device=torch.device('cpu')):

FILE: research/visual_bge/visual_bge/eva_clip/modified_resnet.py
  class Bottleneck (line 10) | class Bottleneck(nn.Module):
    method __init__ (line 13) | def __init__(self, inplanes, planes, stride=1):
    method forward (line 42) | def forward(self, x: torch.Tensor):
  class AttentionPool2d (line 58) | class AttentionPool2d(nn.Module):
    method __init__ (line 59) | def __init__(self, spacial_dim: int, embed_dim: int, num_heads: int, o...
    method forward (line 68) | def forward(self, x):
  class ModifiedResNet (line 95) | class ModifiedResNet(nn.Module):
    method __init__ (line 103) | def __init__(self, layers, output_dim, heads, image_size=224, width=64):
    method _make_layer (line 132) | def _make_layer(self, planes, blocks, stride=1):
    method init_parameters (line 141) | def init_parameters(self):
    method lock (line 154) | def lock(self, unlocked_groups=0, freeze_bn_stats=False):
    method set_grad_checkpointing (line 162) | def set_grad_checkpointing(self, enable=True):
    method stem (line 166) | def stem(self, x):
    method forward (line 173) | def forward(self, x):

FILE: research/visual_bge/visual_bge/eva_clip/openai.py
  function list_openai_models (line 18) | def list_openai_models() -> List[str]:
  function load_openai_model (line 23) | def load_openai_model(

FILE: research/visual_bge/visual_bge/eva_clip/pretrained.py
  function _pcfg (line 18) | def _pcfg(url='', hf_hub='', filename='', mean=None, std=None):
  function _clean_tag (line 191) | def _clean_tag(tag: str):
  function list_pretrained (line 196) | def list_pretrained(as_str: bool = False):
  function list_pretrained_models_by_tag (line 203) | def list_pretrained_models_by_tag(tag: str):
  function list_pretrained_tags_by_model (line 213) | def list_pretrained_tags_by_model(model: str):
  function is_pretrained_cfg (line 221) | def is_pretrained_cfg(model: str, tag: str):
  function get_pretrained_cfg (line 227) | def get_pretrained_cfg(model: str, tag: str):
  function get_pretrained_url (line 234) | def get_pretrained_url(model: str, tag: str):
  function download_pretrained_from_url (line 239) | def download_pretrained_from_url(
  function has_hf_hub (line 285) | def has_hf_hub(necessary=False):
  function download_pretrained_from_hf (line 293) | def download_pretrained_from_hf(
  function download_pretrained (line 304) | def download_pretrained(

FILE: research/visual_bge/visual_bge/eva_clip/rope.py
  function broadcat (line 7) | def broadcat(tensors, dim = -1):
  function rotate_half (line 23) | def rotate_half(x):
  class VisionRotaryEmbedding (line 30) | class VisionRotaryEmbedding(nn.Module):
    method __init__ (line 31) | def __init__(
    method forward (line 70) | def forward(self, t, start_index = 0):
  class VisionRotaryEmbeddingFast (line 79) | class VisionRotaryEmbeddingFast(nn.Module):
    method __init__ (line 80) | def __init__(
    method forward (line 121) | def forward(self, t, patch_indices_keep=None):

FILE: research/visual_bge/visual_bge/eva_clip/timm_model.py
  class TimmModel (line 28) | class TimmModel(nn.Module):
    method __init__ (line 33) | def __init__(
    method lock (line 80) | def lock(self, unlocked_groups=0, freeze_bn_stats=False):
    method set_grad_checkpointing (line 113) | def set_grad_checkpointing(self, enable=True):
    method forward (line 119) | def forward(self, x):

FILE: research/visual_bge/visual_bge/eva_clip/tokenizer.py
  function default_bpe (line 21) | def default_bpe():
  function bytes_to_unicode (line 26) | def bytes_to_unicode():
  function get_pairs (line 48) | def get_pairs(word):
  function basic_clean (line 60) | def basic_clean(text):
  function whitespace_clean (line 66) | def whitespace_clean(text):
  class SimpleTokenizer (line 72) | class SimpleTokenizer(object):
    method __init__ (line 73) | def __init__(self, bpe_path: str = default_bpe(), special_tokens=None):
    method bpe (line 98) | def bpe(self, token):
    method encode (line 139) | def encode(self, text):
    method decode (line 147) | def decode(self, tokens):
  function tokenize (line 156) | def tokenize(texts: Union[str, List[str]], context_length: int = 77) -> ...
  class HFTokenizer (line 188) | class HFTokenizer:
    method __init__ (line 190) | def __init__(self, tokenizer_name:str):
    method __call__ (line 194) | def __call__(self, texts:Union[str, List[str]], context_length:int=77)...

FILE: research/visual_bge/visual_bge/eva_clip/transform.py
  class ResizeMaxSize (line 13) | class ResizeMaxSize(nn.Module):
    method __init__ (line 15) | def __init__(self, max_size, interpolation=InterpolationMode.BICUBIC, ...
    method forward (line 24) | def forward(self, img):
  function _convert_to_rgb (line 39) | def _convert_to_rgb(image):
  function image_transform (line 60) | def image_transform(

FILE: research/visual_bge/visual_bge/eva_clip/transformer.py
  class LayerNormFp32 (line 36) | class LayerNormFp32(nn.LayerNorm):
    method __init__ (line 38) | def __init__(self, *args, **kwargs):
    method forward (line 41) | def forward(self, x: torch.Tensor):
  class LayerNorm (line 52) | class LayerNorm(nn.LayerNorm):
    method forward (line 55) | def forward(self, x: torch.Tensor):
  class QuickGELU (line 60) | class QuickGELU(nn.Module):
    method forward (line 62) | def forward(self, x: torch.Tensor):
  class LayerScale (line 66) | class LayerScale(nn.Module):
    method __init__ (line 67) | def __init__(self, dim, init_values=1e-5, inplace=False):
    method forward (line 72) | def forward(self, x):
  class PatchDropout (line 75) | class PatchDropout(nn.Module):
    method __init__ (line 80) | def __init__(self, prob, exclude_first_token=True):
    method forward (line 87) | def forward(self, x):
  function _in_projection_packed (line 119) | def _in_projection_packed(
  class Attention (line 150) | class Attention(nn.Module):
    method __init__ (line 151) | def __init__(
    method forward (line 195) | def forward(self, x, attn_mask: Optional[torch.Tensor] = None):
  class CustomAttention (line 243) | class CustomAttention(nn.Module):
    method __init__ (line 244) | def __init__(
    method forward (line 286) | def forward(self, query: torch.Tensor, key: torch.Tensor, value: torch...
  class CustomResidualAttentionBlock (line 339) | class CustomResidualAttentionBlock(nn.Module):
    method __init__ (line 340) | def __init__(
    method forward (line 384) | def forward(self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, a...
  class CustomTransformer (line 389) | class CustomTransformer(nn.Module):
    method __init__ (line 390) | def __init__(
    method get_cast_dtype (line 429) | def get_cast_dtype(self) -> torch.dtype:
    method forward (line 432) | def forward(self, q: torch.Tensor, k: torch.Tensor = None, v: torch.Te...
  class ResidualAttentionBlock (line 443) | class ResidualAttentionBlock(nn.Module):
    method __init__ (line 444) | def __init__(
    method attention (line 474) | def attention(self, x: torch.Tensor, attn_mask: Optional[torch.Tensor]...
    method forward (line 480) | def forward(self, x: torch.Tensor, attn_mask: Optional[torch.Tensor] =...
  class Transformer (line 485) | class Transformer(nn.Module):
    method __init__ (line 486) | def __init__(
    method get_cast_dtype (line 508) | def get_cast_dtype(self) -> torch.dtype:
    method forward (line 511) | def forward(self, x: torch.Tensor, attn_mask: Optional[torch.Tensor] =...
  class VisionTransformer (line 520) | class VisionTransformer(nn.Module):
    method __init__ (line 521) | def __init__(
    method lock (line 567) | def lock(self, unlocked_groups=0, freeze_bn_stats=False):
    method get_num_layers (line 600) | def get_num_layers(self):
    method set_grad_checkpointing (line 604) | def set_grad_checkpointing(self, enable=True):
    method no_weight_decay (line 608) | def no_weight_decay(self):
    method forward (line 611) | def forward(self, x: torch.Tensor, return_all_features: bool=False):
  class TextTransformer (line 642) | class TextTransformer(nn.Module):
    method __init__ (line 643) | def __init__(
    method init_parameters (line 686) | def init_parameters(self):
    method set_grad_checkpointing (line 703) | def set_grad_checkpointing(self, enable=True):
    method no_weight_decay (line 707) | def no_weight_decay(self):
    method get_num_layers (line 711) | def get_num_layers(self):
    method build_attention_mask (line 714) | def build_attention_mask(self):
    method forward (line 722) | def forward(self, text, return_all_features: bool=False):

FILE: research/visual_bge/visual_bge/eva_clip/utils.py
  function resize_clip_pos_embed (line 13) | def resize_clip_pos_embed(state_dict, model, interpolation: str = 'bicub...
  function resize_visual_pos_embed (line 46) | def resize_visual_pos_embed(state_dict, model, interpolation: str = 'bic...
  function resize_evaclip_pos_embed (line 78) | def resize_evaclip_pos_embed(state_dict, model, interpolation: str = 'bi...
  function resize_eva_pos_embed (line 109) | def resize_eva_pos_embed(state_dict, model, interpolation: str = 'bicubi...
  function resize_rel_pos_embed (line 140) | def resize_rel_pos_embed(state_dict, model, interpolation: str = 'bicubi...
  function freeze_batch_norm_2d (line 237) | def freeze_batch_norm_2d(module, module_match={}, name=''):
  function _ntuple (line 277) | def _ntuple(n):
  function is_logging (line 292) | def is_logging(args):
  class AllGather (line 304) | class AllGather(torch.autograd.Function):
    method forward (line 311) | def forward(ctx, tensor, rank, world_size):
    method backward (line 319) | def backward(ctx, grad_output):

FILE: research/visual_bge/visual_bge/modeling.py
  class EncoderOutput (line 19) | class EncoderOutput(ModelOutput):
  class Visualized_BGE (line 26) | class Visualized_BGE(nn.Module):
    method __init__ (line 27) | def __init__(self,
    method load_model (line 105) | def load_model(self, model_weight):
    method gradient_checkpointing_enable (line 108) | def gradient_checkpointing_enable(self, **kwargs):
    method encode (line 114) | def encode(self, image=None, text=None):
    method get_extended_attention_mask (line 132) | def get_extended_attention_mask(
    method sentence_embedding (line 173) | def sentence_embedding(self, hidden_state, mask):
    method encode_text (line 182) | def encode_text(self, texts):
    method encode_mm (line 224) | def encode_mm(self, images:torch.Tensor, texts):
    method compute_similarity (line 296) | def compute_similarity(self, q_reps, p_reps):
    method img_token_embedding (line 301) | def img_token_embedding(self, images):
    method encode_image (line 308) | def encode_image(self, images):
    method forward (line 320) | def forward(self, mm_it_query=None, image_candidate=None, text_candida...
    method compute_loss (line 361) | def compute_loss(self, scores, target):
    method _dist_gather_tensor (line 364) | def _dist_gather_tensor(self, t: Optional[torch.Tensor]):
    method save (line 377) | def save(self, output_dir: str):

FILE: scripts/add_reranker_score.py
  class ScoreArgs (line 10) | class ScoreArgs:
  class ModelArgs (line 20) | class ModelArgs:
  function main (line 84) | def main(score_args: ScoreArgs, model_args: ModelArgs):

FILE: scripts/hn_mine.py
  class DataArgs (line 15) | class DataArgs:
  class ModelArgs (line 43) | class ModelArgs:
    method __post_init__ (line 94) | def __post_init__(self):
  function create_index (line 102) | def create_index(embeddings: np.ndarray, use_gpu: bool = False):
  function batch_search (line 114) | def batch_search(
  function get_corpus (line 129) | def get_corpus(candidate_pool: str):
  function find_knn_neg (line 138) | def find_knn_neg(
  function load_model (line 203) | def load_model(model_args: ModelArgs):
  function main (line 224) | def main(data_args: DataArgs, model_args: ModelArgs):

FILE: scripts/split_data_by_length.py
  function get_args (line 24) | def get_args():
  class SplitByLengthHandler (line 38) | class SplitByLengthHandler:
    method __init__ (line 39) | def __init__(self,
    method _get_length_ranges_list (line 76) | def _get_length_ranges_list(length_list: list):
    method _process_dir (line 90) | def _process_dir(self, dir_path: str, output_dir: str):
    method _process_file (line 104) | def _process_file(self, file_path: str, output_path: str):
    method run (line 165) | def run(self, input_path: str, output_dir: str, log_name: str=None):
  function main (line 190) | def main(args):

FILE: tests/conftest.py
  function device (line 17) | def device():
  function transformers_version (line 23) | def transformers_version():

FILE: tests/test_imports_v5.py
  function test_import_modeling_minicpm_reranker_inference (line 21) | def test_import_modeling_minicpm_reranker_inference():
  function test_import_modeling_minicpm_reranker_finetune (line 30) | def test_import_modeling_minicpm_reranker_finetune():
  function test_is_torch_fx_available_v5 (line 40) | def test_is_torch_fx_available_v5():
  function test_transformers_version (line 48) | def test_transformers_version(transformers_version):

FILE: tests/test_infer_embedder_basic.py
  function cosine_similarity (line 12) | def cosine_similarity(a, b):
  function test_bge_embedder_basic (line 16) | def test_bge_embedder_basic(device):
  function test_bge_embedder_batch (line 45) | def test_bge_embedder_batch(device):

FILE: tests/test_infer_reranker_basic.py
  function test_reranker_basic (line 14) | def test_reranker_basic(device):
  function test_reranker_batch (line 35) | def test_reranker_batch(device):