SYMBOL INDEX (3104 symbols across 447 files) FILE: FlagEmbedding/abc/evaluation/arguments.py class AbsEvalArgs (line 10) | class AbsEvalArgs: class AbsEvalModelArgs (line 82) | class AbsEvalModelArgs: method __post_init__ (line 181) | def __post_init__(self): FILE: FlagEmbedding/abc/evaluation/data_loader.py class AbsEvalDataLoader (line 14) | class AbsEvalDataLoader(ABC): method __init__ (line 25) | def __init__( method available_dataset_names (line 42) | def available_dataset_names(self) -> List[str]: method available_splits (line 49) | def available_splits(self, dataset_name: Optional[str] = None) -> List... method check_dataset_names (line 55) | def check_dataset_names(self, dataset_names: Union[str, List[str]]) ->... method check_splits (line 76) | def check_splits(self, splits: Union[str, List[str]], dataset_name: Op... method load_corpus (line 97) | def load_corpus(self, dataset_name: Optional[str] = None) -> datasets.... method load_qrels (line 115) | def load_qrels(self, dataset_name: Optional[str] = None, split: str = ... method load_queries (line 143) | def load_queries(self, dataset_name: Optional[str] = None, split: str ... method _load_remote_corpus (line 171) | def _load_remote_corpus( method _load_remote_qrels (line 190) | def _load_remote_qrels( method _load_remote_queries (line 211) | def _load_remote_queries( method _load_local_corpus (line 232) | def _load_local_corpus(self, save_dir: str, dataset_name: Optional[str... method _load_local_qrels (line 255) | def _load_local_qrels(self, save_dir: str, dataset_name: Optional[str]... method _load_local_queries (line 290) | def _load_local_queries(self, save_dir: str, dataset_name: Optional[st... method _download_file (line 319) | def _download_file(self, download_url: str, save_dir: str): method _get_fpath_size (line 350) | def _get_fpath_size(self, fpath: str) -> int: method _download_gz_file (line 369) | def _download_gz_file(self, download_url: str, save_dir: str): method _download_zip_file (line 395) | def _download_zip_file(self, download_url: str, save_dir: str): FILE: FlagEmbedding/abc/evaluation/evaluator.py class AbsEvaluator (line 18) | class AbsEvaluator: method __init__ (line 27) | def __init__( method check_data_info (line 37) | def check_data_info( method get_corpus_embd_save_dir (line 80) | def get_corpus_embd_save_dir( method __call__ (line 102) | def __call__( method save_search_results (line 267) | def save_search_results( method load_search_results (line 302) | def load_search_results(input_path: str): method compute_metrics (line 318) | def compute_metrics( method evaluate_results (line 358) | def evaluate_results( method output_eval_results_to_json (line 403) | def output_eval_results_to_json(eval_results_dict: dict, output_path: ... method get_results_df (line 417) | def get_results_df(metric: str, eval_results_dict: dict): method output_eval_results_to_markdown (line 467) | def output_eval_results_to_markdown(eval_results_dict: dict, output_pa... FILE: FlagEmbedding/abc/evaluation/runner.py class AbsEvalRunner (line 16) | class AbsEvalRunner: method __init__ (line 24) | def __init__( method get_models (line 37) | def get_models(model_args: AbsEvalModelArgs) -> Tuple[AbsEmbedder, Uni... method load_retriever_and_reranker (line 92) | def load_retriever_and_reranker(self) -> Tuple[EvalDenseRetriever, Uni... method load_data_loader (line 109) | def load_data_loader(self) -> AbsEvalDataLoader: method load_evaluator (line 124) | def load_evaluator(self) -> AbsEvaluator: method evaluate_metrics (line 138) | def evaluate_metrics( method run (line 183) | def run(self): FILE: FlagEmbedding/abc/evaluation/searcher.py class EvalRetriever (line 18) | class EvalRetriever(ABC): method __init__ (line 22) | def __init__(self, embedder: AbsEmbedder, search_top_k: int = 1000, ov... method __str__ (line 27) | def __str__(self) -> str: method stop_multi_process_pool (line 33) | def stop_multi_process_pool(self): method __call__ (line 43) | def __call__( class EvalDenseRetriever (line 71) | class EvalDenseRetriever(EvalRetriever): method __call__ (line 75) | def __call__( class EvalReranker (line 160) | class EvalReranker: method __init__ (line 164) | def __init__(self, reranker: AbsReranker, rerank_top_k: int = 100): method __str__ (line 168) | def __str__(self) -> str: method stop_multi_process_pool (line 174) | def stop_multi_process_pool(self): method __call__ (line 183) | def __call__( FILE: FlagEmbedding/abc/evaluation/utils.py function evaluate_mrr (line 14) | def evaluate_mrr( function evaluate_recall_cap (line 56) | def evaluate_recall_cap( function evaluate_metrics (line 95) | def evaluate_metrics( function index (line 150) | def index( function search (line 192) | def search( FILE: FlagEmbedding/abc/finetune/embedder/AbsArguments.py class AbsEmbedderModelArguments (line 9) | class AbsEmbedderModelArguments: class AbsEmbedderDataArguments (line 44) | class AbsEmbedderDataArguments: method __post_init__ (line 120) | def __post_init__(self): class AbsEmbedderTrainingArguments (line 134) | class AbsEmbedderTrainingArguments(TrainingArguments): FILE: FlagEmbedding/abc/finetune/embedder/AbsDataset.py class AbsEmbedderTrainDataset (line 23) | class AbsEmbedderTrainDataset(Dataset): method __init__ (line 30) | def __init__( method _load_dataset (line 54) | def _load_dataset(self, file_path: str): method _shuffle_text (line 83) | def _shuffle_text(self, text): method __len__ (line 102) | def __len__(self): method __getitem__ (line 105) | def __getitem__(self, item): class AbsEmbedderCollator (line 154) | class AbsEmbedderCollator(DataCollatorWithPadding): method __call__ (line 162) | def __call__(self, features): class AbsEmbedderSameDatasetTrainDataset (line 245) | class AbsEmbedderSameDatasetTrainDataset(AbsEmbedderTrainDataset): method __init__ (line 256) | def __init__( method _load_dataset (line 337) | def _load_dataset(self, file_path: str): method _get_file_batch_size (line 361) | def _get_file_batch_size(temp_dataset: datasets.Dataset, default_batch... method refresh_epoch (line 379) | def refresh_epoch(self): method __len__ (line 403) | def __len__(self): method __getitem__ (line 406) | def __getitem__(self, _): method _get_train_group_size (line 415) | def _get_train_group_size(self, batch_raw_data): method _create_batch_data (line 441) | def _create_batch_data(self, batch_raw_data): class AbsEmbedderSameDatasetCollator (line 514) | class AbsEmbedderSameDatasetCollator(DataCollatorWithPadding): method __call__ (line 527) | def __call__(self, features): class EmbedderTrainerCallbackForDataRefresh (line 607) | class EmbedderTrainerCallbackForDataRefresh(TrainerCallback): method __init__ (line 611) | def __init__(self, train_dataset: AbsEmbedderSameDatasetTrainDataset): method on_epoch_end (line 614) | def on_epoch_end( FILE: FlagEmbedding/abc/finetune/embedder/AbsModeling.py class EmbedderOutput (line 17) | class EmbedderOutput(ModelOutput): class AbsEmbedderModel (line 27) | class AbsEmbedderModel(ABC, nn.Module): method __init__ (line 39) | def __init__( method encode (line 64) | def encode(self, features): method compute_loss (line 73) | def compute_loss(self, scores, target): method compute_score (line 83) | def compute_score(self, q_reps, p_reps): method save (line 93) | def save(self, output_dir: str): method get_local_score (line 101) | def get_local_score(self, q_reps, p_reps, all_scores): method compute_local_score (line 121) | def compute_local_score(self, q_reps, p_reps, compute_score_func=None,... method _compute_no_in_batch_neg_loss (line 140) | def _compute_no_in_batch_neg_loss(self, q_reps, p_reps, teacher_target... method _compute_in_batch_neg_loss (line 162) | def _compute_in_batch_neg_loss(self, q_reps, p_reps, teacher_targets=N... method _compute_cross_device_neg_loss (line 194) | def _compute_cross_device_neg_loss(self, q_reps, p_reps, teacher_targe... method forward (line 234) | def forward( method distill_loss (line 280) | def distill_loss(kd_loss_type, teacher_targets, student_scores, group_... method _dist_gather_tensor (line 320) | def _dist_gather_tensor(self, t: Optional[torch.Tensor]): FILE: FlagEmbedding/abc/finetune/embedder/AbsRunner.py class AbsEmbedderRunner (line 24) | class AbsEmbedderRunner(ABC): method __init__ (line 32) | def __init__( method load_tokenizer_and_model (line 79) | def load_tokenizer_and_model(self) -> Tuple[PreTrainedTokenizer, AbsEm... method load_trainer (line 88) | def load_trainer(self) -> AbsEmbedderTrainer: method load_train_dataset (line 96) | def load_train_dataset(self) -> AbsEmbedderTrainDataset: method load_data_collator (line 120) | def load_data_collator(self) -> AbsEmbedderCollator: method run (line 142) | def run(self): FILE: FlagEmbedding/abc/finetune/embedder/AbsTrainer.py class AbsEmbedderTrainer (line 9) | class AbsEmbedderTrainer(ABC, Trainer): method _save (line 14) | def _save(self, output_dir: Optional[str] = None, state_dict=None): method compute_loss (line 17) | def compute_loss(self, model, inputs, return_outputs=False, **kwargs): FILE: FlagEmbedding/abc/finetune/reranker/AbsArguments.py class AbsRerankerModelArguments (line 9) | class AbsRerankerModelArguments: class AbsRerankerDataArguments (line 52) | class AbsRerankerDataArguments: method __post_init__ (line 126) | def __post_init__(self): class AbsRerankerTrainingArguments (line 140) | class AbsRerankerTrainingArguments(TrainingArguments): FILE: FlagEmbedding/abc/finetune/reranker/AbsDataset.py class AbsRerankerTrainDataset (line 23) | class AbsRerankerTrainDataset(Dataset): method __init__ (line 30) | def __init__( method _load_dataset (line 55) | def _load_dataset(self, file_path: str): method _shuffle_text (line 84) | def _shuffle_text(self, text): method __len__ (line 103) | def __len__(self): method create_one_example (line 106) | def create_one_example(self, qry_encoding: str, doc_encoding: str): method __getitem__ (line 127) | def __getitem__(self, item): class AbsRerankerCollator (line 180) | class AbsRerankerCollator(DataCollatorWithPadding): method __call__ (line 187) | def __call__(self, features) -> List[BatchEncoding]: class AbsLLMRerankerTrainDataset (line 211) | class AbsLLMRerankerTrainDataset(AbsRerankerTrainDataset): method __init__ (line 218) | def __init__( method __getitem__ (line 231) | def __getitem__(self, item) -> List[BatchEncoding]: class AbsLLMRerankerCollator (line 341) | class AbsLLMRerankerCollator(DataCollatorForSeq2Seq): method __call__ (line 350) | def __call__(self, features, return_tensors='pt'): FILE: FlagEmbedding/abc/finetune/reranker/AbsModeling.py class RerankerOutput (line 15) | class RerankerOutput(ModelOutput): class AbsRerankerModel (line 20) | class AbsRerankerModel(ABC, nn.Module): method __init__ (line 28) | def __init__( method gradient_checkpointing_enable (line 47) | def gradient_checkpointing_enable(self, **kwargs): method enable_input_require_grads (line 53) | def enable_input_require_grads(self, **kwargs): method encode (line 60) | def encode(self, features): method forward (line 68) | def forward(self, pair: Union[Dict[str, Tensor], List[Dict[str, Tensor... method compute_loss (line 101) | def compute_loss(self, scores, target): method save (line 113) | def save(self, output_dir: str): method save_pretrained (line 127) | def save_pretrained(self, *args, **kwargs): FILE: FlagEmbedding/abc/finetune/reranker/AbsRunner.py class AbsRerankerRunner (line 24) | class AbsRerankerRunner(ABC): method __init__ (line 32) | def __init__( method load_tokenizer_and_model (line 79) | def load_tokenizer_and_model(self) -> Tuple[PreTrainedTokenizer, AbsRe... method load_trainer (line 88) | def load_trainer(self) -> AbsRerankerTrainer: method load_train_dataset (line 96) | def load_train_dataset(self) -> AbsRerankerTrainDataset: method load_data_collator (line 114) | def load_data_collator(self) -> AbsRerankerCollator: method run (line 135) | def run(self): FILE: FlagEmbedding/abc/finetune/reranker/AbsTrainer.py class AbsRerankerTrainer (line 9) | class AbsRerankerTrainer(ABC, Trainer): method _save (line 14) | def _save(self, output_dir: Optional[str] = None, state_dict=None): method compute_loss (line 17) | def compute_loss(self, model, inputs, return_outputs=False, **kwargs): FILE: FlagEmbedding/abc/inference/AbsEmbedder.py class AbsEmbedder (line 24) | class AbsEmbedder(ABC): method __init__ (line 47) | def __init__( method stop_self_pool (line 84) | def stop_self_pool(self): method get_target_devices (line 97) | def get_target_devices(devices: Union[str, int, List[str], List[int]])... method get_detailed_instruct (line 144) | def get_detailed_instruct(instruction_format: str, instruction: str, s... method encode_queries (line 159) | def encode_queries( method encode_corpus (line 193) | def encode_corpus( method encode (line 230) | def encode( method __del__ (line 287) | def __del__(self): method encode_single_device (line 291) | def encode_single_device( method start_multi_process_pool (line 306) | def start_multi_process_pool( method _encode_multi_process_worker (line 346) | def _encode_multi_process_worker( method stop_multi_process_pool (line 369) | def stop_multi_process_pool(pool: Dict[Literal["input", "output", "pro... method encode_multi_process (line 391) | def encode_multi_process( method _concatenate_results_from_multi_process (line 424) | def _concatenate_results_from_multi_process(self, results_list: List[U... FILE: FlagEmbedding/abc/inference/AbsReranker.py class AbsReranker (line 23) | class AbsReranker(ABC): method __init__ (line 47) | def __init__( method stop_self_pool (line 86) | def stop_self_pool(self): method get_target_devices (line 99) | def get_target_devices(devices: Union[str, int, List[str], List[int]])... method get_detailed_instruct (line 142) | def get_detailed_instruct(self, instruction_format: str, instruction: ... method get_detailed_inputs (line 157) | def get_detailed_inputs(self, sentence_pairs: Union[str, List[str]]): method compute_score (line 200) | def compute_score( method __del__ (line 231) | def __del__(self): method compute_score_single_gpu (line 235) | def compute_score_single_gpu( method start_multi_process_pool (line 251) | def start_multi_process_pool(self) -> Dict[Literal["input", "output", ... method encode_multi_process (line 284) | def encode_multi_process( method _encode_multi_process_worker (line 319) | def _encode_multi_process_worker( method stop_multi_process_pool (line 342) | def stop_multi_process_pool(pool: Dict[Literal["input", "output", "pro... FILE: FlagEmbedding/evaluation/air_bench/__main__.py function main (line 9) | def main(): FILE: FlagEmbedding/evaluation/air_bench/arguments.py class AIRBenchEvalModelArgs (line 7) | class AIRBenchEvalModelArgs: method __post_init__ (line 106) | def __post_init__(self): FILE: FlagEmbedding/evaluation/air_bench/runner.py class AIRBenchEvalRunner (line 12) | class AIRBenchEvalRunner: method __init__ (line 20) | def __init__( method load_retriever_and_reranker (line 31) | def load_retriever_and_reranker(self) -> Tuple[EvalDenseRetriever, Uni... method run (line 48) | def run(self): FILE: FlagEmbedding/evaluation/beir/__main__.py function main (line 9) | def main(): FILE: FlagEmbedding/evaluation/beir/arguments.py class BEIREvalArgs (line 7) | class BEIREvalArgs(AbsEvalArgs): FILE: FlagEmbedding/evaluation/beir/data_loader.py class BEIREvalDataLoader (line 15) | class BEIREvalDataLoader(AbsEvalDataLoader): method available_dataset_names (line 19) | def available_dataset_names(self) -> List[str]: method available_sub_dataset_names (line 28) | def available_sub_dataset_names(self, dataset_name: Optional[str] = No... method available_splits (line 42) | def available_splits(self, dataset_name: Optional[str] = None) -> List... method _load_remote_corpus (line 56) | def _load_remote_corpus( method _load_remote_qrels (line 127) | def _load_remote_qrels( method _load_remote_queries (line 212) | def _load_remote_queries( method load_corpus (line 291) | def load_corpus(self, dataset_name: Optional[str] = None, sub_dataset_... method load_qrels (line 310) | def load_qrels(self, dataset_name: Optional[str] = None, sub_dataset_n... method load_queries (line 339) | def load_queries(self, dataset_name: Optional[str] = None, sub_dataset... method _load_local_corpus (line 368) | def _load_local_corpus(self, save_dir: str, dataset_name: Optional[str... method _load_local_qrels (line 397) | def _load_local_qrels(self, save_dir: str, dataset_name: Optional[str]... method _load_local_queries (line 438) | def _load_local_queries(self, save_dir: str, dataset_name: Optional[st... FILE: FlagEmbedding/evaluation/beir/evaluator.py class BEIREvaluator (line 12) | class BEIREvaluator(AbsEvaluator): method check_data_info (line 16) | def check_data_info( method __call__ (line 66) | def __call__( method evaluate_results (line 351) | def evaluate_results( method save_search_results (line 418) | def save_search_results( FILE: FlagEmbedding/evaluation/beir/runner.py class BEIREvalRunner (line 11) | class BEIREvalRunner(AbsEvalRunner): method run (line 15) | def run(self): method load_data_loader (line 63) | def load_data_loader(self) -> BEIREvalDataLoader: method load_evaluator (line 78) | def load_evaluator(self) -> BEIREvaluator: FILE: FlagEmbedding/evaluation/bright/__main__.py function main (line 9) | def main(): FILE: FlagEmbedding/evaluation/bright/arguments.py class BrightEvalArgs (line 7) | class BrightEvalArgs(AbsEvalArgs): FILE: FlagEmbedding/evaluation/bright/data_loader.py class BrightShortEvalDataLoader (line 14) | class BrightShortEvalDataLoader(AbsEvalDataLoader): method available_dataset_names (line 18) | def available_dataset_names(self) -> List[str]: method available_splits (line 34) | def available_splits(self, dataset_name: str) -> List[str]: method _load_remote_corpus (line 51) | def _load_remote_corpus( method _load_remote_qrels (line 89) | def _load_remote_qrels( method _load_remote_queries (line 165) | def _load_remote_queries( class BrightLongEvalDataLoader (line 209) | class BrightLongEvalDataLoader(AbsEvalDataLoader): method available_dataset_names (line 213) | def available_dataset_names(self) -> List[str]: method available_splits (line 227) | def available_splits(self, dataset_name: str) -> List[str]: method _load_remote_corpus (line 244) | def _load_remote_corpus( method _load_remote_qrels (line 282) | def _load_remote_qrels( method _load_remote_queries (line 358) | def _load_remote_queries( FILE: FlagEmbedding/evaluation/bright/runner.py class BrightEvalRunner (line 14) | class BrightEvalRunner(AbsEvalRunner): method __init__ (line 18) | def __init__(self, eval_args: BrightEvalArgs, model_args: BrightEvalMo... method load_data_loader (line 23) | def load_data_loader(self) -> Union[BrightShortEvalDataLoader, BrightL... method load_retriever_and_reranker (line 45) | def load_retriever_and_reranker(self) -> Tuple[BrightEvalDenseRetrieve... method run (line 62) | def run(self): FILE: FlagEmbedding/evaluation/bright/searcher.py class BrightEvalDenseRetriever (line 15) | class BrightEvalDenseRetriever(EvalRetriever): method __call__ (line 19) | def __call__( FILE: FlagEmbedding/evaluation/custom/__main__.py function main (line 9) | def main(): FILE: FlagEmbedding/evaluation/custom/data_loader.py class CustomEvalDataLoader (line 10) | class CustomEvalDataLoader(AbsEvalDataLoader): method available_dataset_names (line 11) | def available_dataset_names(self) -> List[str]: method available_splits (line 14) | def available_splits(self, dataset_name: Optional[str] = None) -> List... FILE: FlagEmbedding/evaluation/custom/runner.py class CustomEvalRunner (line 6) | class CustomEvalRunner(AbsEvalRunner): method load_data_loader (line 7) | def load_data_loader(self) -> CustomEvalDataLoader: FILE: FlagEmbedding/evaluation/miracl/__main__.py function main (line 9) | def main(): FILE: FlagEmbedding/evaluation/miracl/data_loader.py class MIRACLEvalDataLoader (line 13) | class MIRACLEvalDataLoader(AbsEvalDataLoader): method available_dataset_names (line 17) | def available_dataset_names(self) -> List[str]: method available_splits (line 26) | def available_splits(self, dataset_name: str) -> List[str]: method _load_remote_corpus (line 41) | def _load_remote_corpus( method _load_remote_qrels (line 84) | def _load_remote_qrels( method _load_remote_queries (line 135) | def _load_remote_queries( FILE: FlagEmbedding/evaluation/miracl/runner.py class MIRACLEvalRunner (line 6) | class MIRACLEvalRunner(AbsEvalRunner): method load_data_loader (line 10) | def load_data_loader(self) -> MIRACLEvalDataLoader: FILE: FlagEmbedding/evaluation/mkqa/__main__.py function main (line 9) | def main(): FILE: FlagEmbedding/evaluation/mkqa/data_loader.py class MKQAEvalDataLoader (line 15) | class MKQAEvalDataLoader(AbsEvalDataLoader): method available_dataset_names (line 19) | def available_dataset_names(self) -> List[str]: method available_splits (line 28) | def available_splits(self, dataset_name: Optional[str] = None) -> List... method load_corpus (line 40) | def load_corpus(self, dataset_name: Optional[str] = None) -> datasets.... method _load_local_qrels (line 56) | def _load_local_qrels(self, save_dir: str, dataset_name: Optional[str]... method _load_remote_corpus (line 89) | def _load_remote_corpus( method _load_remote_qrels (line 132) | def _load_remote_qrels( method _load_remote_queries (line 183) | def _load_remote_queries( FILE: FlagEmbedding/evaluation/mkqa/evaluator.py class MKQAEvaluator (line 10) | class MKQAEvaluator(AbsEvaluator): method get_corpus_embd_save_dir (line 14) | def get_corpus_embd_save_dir( method evaluate_results (line 35) | def evaluate_results( method compute_metrics (line 87) | def compute_metrics( FILE: FlagEmbedding/evaluation/mkqa/runner.py class MKQAEvalRunner (line 7) | class MKQAEvalRunner(AbsEvalRunner): method load_data_loader (line 11) | def load_data_loader(self) -> MKQAEvalDataLoader: method load_evaluator (line 26) | def load_evaluator(self) -> MKQAEvaluator: FILE: FlagEmbedding/evaluation/mkqa/utils/compute_metrics.py class SimpleTokenizer (line 10) | class SimpleTokenizer: method __init__ (line 14) | def __init__(self): method tokenize (line 24) | def tokenize(self, text, uncased=False): function _normalize (line 33) | def _normalize(text): function has_answer (line 37) | def has_answer(answers, text, tokenizer) -> bool: function check_answer (line 51) | def check_answer(example, tokenizer) -> List[bool]: function evaluate_qa_recall (line 65) | def evaluate_qa_recall(ctxs, answers, k_values: Union[int, List[int]]=100): FILE: FlagEmbedding/evaluation/mkqa/utils/normalize_text.py function normalize_text (line 133) | def normalize_text(text: str): FILE: FlagEmbedding/evaluation/mldr/__main__.py function main (line 9) | def main(): FILE: FlagEmbedding/evaluation/mldr/data_loader.py class MLDREvalDataLoader (line 13) | class MLDREvalDataLoader(AbsEvalDataLoader): method available_dataset_names (line 17) | def available_dataset_names(self) -> List[str]: method available_splits (line 26) | def available_splits(self, dataset_name: Optional[str] = None) -> List... method _load_remote_corpus (line 38) | def _load_remote_corpus( method _load_remote_qrels (line 77) | def _load_remote_qrels( method _load_remote_queries (line 142) | def _load_remote_queries( FILE: FlagEmbedding/evaluation/mldr/runner.py class MLDREvalRunner (line 6) | class MLDREvalRunner(AbsEvalRunner): method load_data_loader (line 10) | def load_data_loader(self) -> MLDREvalDataLoader: FILE: FlagEmbedding/evaluation/msmarco/__main__.py function main (line 9) | def main(): FILE: FlagEmbedding/evaluation/msmarco/data_loader.py class MSMARCOEvalDataLoader (line 13) | class MSMARCOEvalDataLoader(AbsEvalDataLoader): method available_dataset_names (line 17) | def available_dataset_names(self) -> List[str]: method available_splits (line 26) | def available_splits(self, dataset_name: Optional[str] = None) -> List... method _load_remote_corpus (line 38) | def _load_remote_corpus( method _load_remote_qrels (line 104) | def _load_remote_qrels( method _load_remote_queries (line 198) | def _load_remote_queries( FILE: FlagEmbedding/evaluation/msmarco/runner.py class MSMARCOEvalRunner (line 6) | class MSMARCOEvalRunner(AbsEvalRunner): method load_data_loader (line 10) | def load_data_loader(self) -> MSMARCOEvalDataLoader: FILE: FlagEmbedding/evaluation/mteb/__main__.py function main (line 9) | def main(): FILE: FlagEmbedding/evaluation/mteb/arguments.py class MTEBEvalArgs (line 8) | class MTEBEvalArgs(AbsEvalArgs): FILE: FlagEmbedding/evaluation/mteb/prompts.py function get_task_def_by_task_name_and_type (line 4) | def get_task_def_by_task_name_and_type(task_name: str, task_type: str) -... FILE: FlagEmbedding/evaluation/mteb/runner.py function ensure_dir (line 17) | def ensure_dir(file_path): class MTEBEvalRunner (line 22) | class MTEBEvalRunner(AbsEvalRunner): method __init__ (line 26) | def __init__( method load_retriever_and_reranker (line 36) | def load_retriever_and_reranker(self) -> Tuple[MTEBEvalDenseRetriever,... method read_results (line 52) | def read_results(self, output_folder, tasks): method output_json (line 97) | def output_json(self, tasks_results, save_file): method run (line 136) | def run(self): FILE: FlagEmbedding/evaluation/mteb/searcher.py class MTEBEvalDenseRetriever (line 7) | class MTEBEvalDenseRetriever(EvalDenseRetriever): method __init__ (line 11) | def __init__(self, embedder, **kwargs): method set_examples (line 14) | def set_examples(self, examples_for_task: Optional[List[dict]] = None): method set_instruction (line 22) | def set_instruction(self, instruction: Optional[str] = None): method get_instruction (line 30) | def get_instruction(self): method set_normalize_embeddings (line 38) | def set_normalize_embeddings(self, normalize_embeddings: bool = True): method stop_pool (line 46) | def stop_pool(self): method encode_queries (line 53) | def encode_queries(self, queries: List[str], **kwargs): method encode_corpus (line 67) | def encode_corpus(self, corpus: List[Dict[str, str]], **kwargs): method encode (line 85) | def encode(self, corpus: List[Dict[str, str]], **kwargs): class MTEBEvalReranker (line 103) | class MTEBEvalReranker(EvalReranker): method __init__ (line 107) | def __init__(self, reranker, **kwargs): FILE: FlagEmbedding/finetune/embedder/decoder_only/base/__main__.py function main (line 11) | def main(): FILE: FlagEmbedding/finetune/embedder/decoder_only/base/arguments.py function default_target_modules (line 7) | def default_target_modules() -> List[int]: class DecoderOnlyEmbedderModelArguments (line 12) | class DecoderOnlyEmbedderModelArguments(AbsEmbedderModelArguments): FILE: FlagEmbedding/finetune/embedder/decoder_only/base/load_model.py function find_largest_checkpoint (line 13) | def find_largest_checkpoint(checkpoint_dir): function get_model (line 38) | def get_model(model_args: DecoderOnlyEmbedderModelArguments, output_dir:... function save_merged_model (line 123) | def save_merged_model(model_args: DecoderOnlyEmbedderModelArguments, out... FILE: FlagEmbedding/finetune/embedder/decoder_only/base/modeling.py class BiDecoderOnlyEmbedderModel (line 11) | class BiDecoderOnlyEmbedderModel(AbsEmbedderModel): method __init__ (line 27) | def __init__( method encode (line 50) | def encode(self, features): method _sentence_embedding (line 94) | def _sentence_embedding(self, last_hidden_state, attention_mask): method compute_score (line 129) | def compute_score(self, q_reps, p_reps): method _compute_similarity (line 143) | def _compute_similarity(self, q_reps, p_reps): method compute_loss (line 157) | def compute_loss(self, scores, target): method gradient_checkpointing_enable (line 169) | def gradient_checkpointing_enable(self, **kwargs): method enable_input_require_grads (line 175) | def enable_input_require_grads(self, **kwargs): method save (line 181) | def save(self, output_dir: str): FILE: FlagEmbedding/finetune/embedder/decoder_only/base/runner.py class DecoderOnlyEmbedderRunner (line 17) | class DecoderOnlyEmbedderRunner(AbsEmbedderRunner): method __init__ (line 25) | def __init__( method load_tokenizer_and_model (line 36) | def load_tokenizer_and_model(self) -> Tuple[PreTrainedTokenizer, AbsEm... method load_trainer (line 102) | def load_trainer(self) -> DecoderOnlyEmbedderTrainer: method run (line 119) | def run(self): FILE: FlagEmbedding/finetune/embedder/decoder_only/base/trainer.py class DecoderOnlyEmbedderTrainer (line 11) | class DecoderOnlyEmbedderTrainer(AbsEmbedderTrainer): method _save (line 15) | def _save(self, output_dir: Optional[str] = None, state_dict=None): FILE: FlagEmbedding/finetune/embedder/decoder_only/icl/__main__.py function main (line 11) | def main(): FILE: FlagEmbedding/finetune/embedder/decoder_only/icl/arguments.py function default_target_modules (line 10) | def default_target_modules() -> List[int]: class DecoderOnlyEmbedderICLModelArguments (line 15) | class DecoderOnlyEmbedderICLModelArguments(AbsEmbedderModelArguments): class DecoderOnlyEmbedderICLDataArguments (line 83) | class DecoderOnlyEmbedderICLDataArguments(AbsEmbedderDataArguments): FILE: FlagEmbedding/finetune/embedder/decoder_only/icl/dataset.py class DecoderOnlyEmbedderICLSameDatasetTrainDataset (line 17) | class DecoderOnlyEmbedderICLSameDatasetTrainDataset(AbsEmbedderSameDatas... method __init__ (line 28) | def __init__( method _create_batch_data (line 51) | def _create_batch_data(self, batch_raw_data): class AbsEmbedderSameDatasetCollator (line 201) | class AbsEmbedderSameDatasetCollator(DataCollatorWithPadding): method __call__ (line 214) | def __call__(self, features): FILE: FlagEmbedding/finetune/embedder/decoder_only/icl/load_model.py function find_largest_checkpoint (line 13) | def find_largest_checkpoint(checkpoint_dir): function get_model (line 38) | def get_model(model_args: DecoderOnlyEmbedderICLModelArguments, output_d... function save_merged_model (line 123) | def save_merged_model(model_args: DecoderOnlyEmbedderICLModelArguments, ... FILE: FlagEmbedding/finetune/embedder/decoder_only/icl/modeling.py class BiDecoderOnlyEmbedderICLModel (line 11) | class BiDecoderOnlyEmbedderICLModel(AbsEmbedderModel): method __init__ (line 27) | def __init__( method encode (line 50) | def encode(self, features): method _sentence_embedding (line 94) | def _sentence_embedding(self, last_hidden_state, attention_mask): method compute_score (line 129) | def compute_score(self, q_reps, p_reps): method _compute_similarity (line 143) | def _compute_similarity(self, q_reps, p_reps): method compute_loss (line 157) | def compute_loss(self, scores, target): method gradient_checkpointing_enable (line 169) | def gradient_checkpointing_enable(self, **kwargs): method enable_input_require_grads (line 175) | def enable_input_require_grads(self, **kwargs): method save (line 181) | def save(self, output_dir: str): FILE: FlagEmbedding/finetune/embedder/decoder_only/icl/runner.py class DecoderOnlyEmbedderICLRunner (line 18) | class DecoderOnlyEmbedderICLRunner(AbsEmbedderRunner): method __init__ (line 26) | def __init__( method load_tokenizer_and_model (line 37) | def load_tokenizer_and_model(self) -> Tuple[PreTrainedTokenizer, AbsEm... method load_trainer (line 103) | def load_trainer(self) -> DecoderOnlyEmbedderICLTrainer: method load_train_dataset (line 120) | def load_train_dataset(self) -> DecoderOnlyEmbedderICLSameDatasetTrain... method run (line 144) | def run(self): FILE: FlagEmbedding/finetune/embedder/decoder_only/icl/trainer.py class DecoderOnlyEmbedderICLTrainer (line 11) | class DecoderOnlyEmbedderICLTrainer(AbsEmbedderTrainer): method _save (line 15) | def _save(self, output_dir: Optional[str] = None, state_dict=None): FILE: FlagEmbedding/finetune/embedder/encoder_only/base/__main__.py function main (line 11) | def main(): FILE: FlagEmbedding/finetune/embedder/encoder_only/base/modeling.py class BiEncoderOnlyEmbedderModel (line 11) | class BiEncoderOnlyEmbedderModel(AbsEmbedderModel): method __init__ (line 27) | def __init__( method encode (line 50) | def encode(self, features): method _sentence_embedding (line 93) | def _sentence_embedding(self, last_hidden_state, attention_mask): method compute_score (line 128) | def compute_score(self, q_reps, p_reps): method _compute_similarity (line 142) | def _compute_similarity(self, q_reps, p_reps): method compute_loss (line 156) | def compute_loss(self, scores, target): method gradient_checkpointing_enable (line 168) | def gradient_checkpointing_enable(self, **kwargs): method enable_input_require_grads (line 174) | def enable_input_require_grads(self, **kwargs): method save (line 180) | def save(self, output_dir: str): FILE: FlagEmbedding/finetune/embedder/encoder_only/base/runner.py class EncoderOnlyEmbedderRunner (line 15) | class EncoderOnlyEmbedderRunner(AbsEmbedderRunner): method load_tokenizer_and_model (line 19) | def load_tokenizer_and_model(self) -> Tuple[PreTrainedTokenizer, AbsEm... method load_trainer (line 70) | def load_trainer(self) -> EncoderOnlyEmbedderTrainer: FILE: FlagEmbedding/finetune/embedder/encoder_only/base/trainer.py class EncoderOnlyEmbedderTrainer (line 11) | class EncoderOnlyEmbedderTrainer(AbsEmbedderTrainer): method _save (line 15) | def _save(self, output_dir: Optional[str] = None, state_dict=None): FILE: FlagEmbedding/finetune/embedder/encoder_only/m3/__main__.py function main (line 11) | def main(): FILE: FlagEmbedding/finetune/embedder/encoder_only/m3/arguments.py class EncoderOnlyEmbedderM3ModelArguments (line 10) | class EncoderOnlyEmbedderM3ModelArguments(AbsEmbedderModelArguments): class EncoderOnlyEmbedderM3TrainingArguments (line 18) | class EncoderOnlyEmbedderM3TrainingArguments(AbsEmbedderTrainingArguments): FILE: FlagEmbedding/finetune/embedder/encoder_only/m3/modeling.py class EncoderOnlyEmbedderM3Model (line 15) | class EncoderOnlyEmbedderM3Model(AbsEmbedderModel): method __init__ (line 32) | def __init__( method _dense_embedding (line 75) | def _dense_embedding(self, last_hidden_state, attention_mask): method _sparse_embedding (line 110) | def _sparse_embedding(self, hidden_state, input_ids, return_embedding:... method _colbert_embedding (line 153) | def _colbert_embedding(self, last_hidden_state, mask): method compute_score (line 167) | def compute_score( method compute_dense_score (line 189) | def compute_dense_score(self, q_reps, p_reps): method compute_sparse_score (line 203) | def compute_sparse_score(self, q_reps, p_reps): method compute_colbert_score (line 217) | def compute_colbert_score(self, q_reps, p_reps, q_mask: torch.Tensor=N... method ensemble_score (line 233) | def ensemble_score(self, q_reps, p_reps, dense_scores=None, sparse_sco... method _encode (line 253) | def _encode(self, features): method encode (line 276) | def encode(self, features): method _compute_similarity (line 328) | def _compute_similarity(self, q_reps, p_reps): method _get_queries_attention_mask (line 342) | def _get_queries_attention_mask(self, queries: Union[Dict[str, Tensor]... method forward (line 368) | def forward( method compute_loss (line 472) | def compute_loss(self, scores, target): method gradient_checkpointing_enable (line 484) | def gradient_checkpointing_enable(self, **kwargs): method enable_input_require_grads (line 490) | def enable_input_require_grads(self, **kwargs): method save (line 496) | def save(self, output_dir: str): class EncoderOnlyEmbedderM3ModelForInference (line 518) | class EncoderOnlyEmbedderM3ModelForInference(EncoderOnlyEmbedderM3Model): method forward (line 522) | def forward(self, FILE: FlagEmbedding/finetune/embedder/encoder_only/m3/runner.py class EncoderOnlyEmbedderM3Runner (line 22) | class EncoderOnlyEmbedderM3Runner(AbsEmbedderRunner): method __init__ (line 31) | def __init__( method get_model (line 43) | def get_model( method load_tokenizer_and_model (line 100) | def load_tokenizer_and_model(self) -> Tuple[PreTrainedTokenizer, AbsEm... method load_trainer (line 156) | def load_trainer(self) -> EncoderOnlyEmbedderM3Trainer: FILE: FlagEmbedding/finetune/embedder/encoder_only/m3/trainer.py class EncoderOnlyEmbedderM3Trainer (line 11) | class EncoderOnlyEmbedderM3Trainer(AbsEmbedderTrainer): method _save (line 15) | def _save(self, output_dir: Optional[str] = None, state_dict=None): FILE: FlagEmbedding/finetune/reranker/decoder_only/base/__main__.py function main (line 14) | def main(): FILE: FlagEmbedding/finetune/reranker/decoder_only/base/arguments.py function default_target_modules (line 7) | def default_target_modules() -> List[int]: class RerankerModelArguments (line 12) | class RerankerModelArguments(AbsRerankerModelArguments): FILE: FlagEmbedding/finetune/reranker/decoder_only/base/load_model.py function find_largest_checkpoint (line 12) | def find_largest_checkpoint(checkpoint_dir): function get_model (line 37) | def get_model(model_args: RerankerModelArguments): function save_merged_model (line 106) | def save_merged_model(model_args: RerankerModelArguments, output_dir: str): FILE: FlagEmbedding/finetune/reranker/decoder_only/base/modeling.py class CrossDecoderModel (line 10) | class CrossDecoderModel(AbsRerankerModel): method __init__ (line 19) | def __init__( method encode (line 31) | def encode(self, features): FILE: FlagEmbedding/finetune/reranker/decoder_only/base/runner.py class DecoderOnlyRerankerRunner (line 19) | class DecoderOnlyRerankerRunner(AbsRerankerRunner): method __init__ (line 28) | def __init__( method load_tokenizer_and_model (line 36) | def load_tokenizer_and_model(self) -> Tuple[PreTrainedTokenizer, AbsRe... method load_trainer (line 81) | def load_trainer(self) -> DecoderOnlyRerankerTrainer: method run (line 96) | def run(self): FILE: FlagEmbedding/finetune/reranker/decoder_only/base/trainer.py class DecoderOnlyRerankerTrainer (line 13) | class DecoderOnlyRerankerTrainer(AbsRerankerTrainer): method _save (line 17) | def _save(self, output_dir: Optional[str] = None, state_dict=None): FILE: FlagEmbedding/finetune/reranker/decoder_only/layerwise/__main__.py function main (line 14) | def main(): FILE: FlagEmbedding/finetune/reranker/decoder_only/layerwise/arguments.py function default_target_modules (line 7) | def default_target_modules() -> List[int]: class RerankerModelArguments (line 12) | class RerankerModelArguments(AbsRerankerModelArguments): FILE: FlagEmbedding/finetune/reranker/decoder_only/layerwise/configuration_minicpm_reranker.py class LayerWiseMiniCPMConfig (line 30) | class LayerWiseMiniCPMConfig(PretrainedConfig): method __init__ (line 116) | def __init__( method _rope_scaling_validation (line 189) | def _rope_scaling_validation(self): FILE: FlagEmbedding/finetune/reranker/decoder_only/layerwise/load_model.py function find_largest_checkpoint (line 16) | def find_largest_checkpoint(checkpoint_dir): function get_model (line 41) | def get_model(model_args: RerankerModelArguments, only_for_one_logit): function save_merged_model (line 170) | def save_merged_model(model_args: RerankerModelArguments, output_dir: str): FILE: FlagEmbedding/finetune/reranker/decoder_only/layerwise/modeling.py class CrossDecoderModel (line 12) | class CrossDecoderModel(AbsRerankerModel): method __init__ (line 22) | def __init__( method encode (line 37) | def encode(self, features): method forward (line 50) | def forward(self, pair: Union[Dict[str, Tensor], List[Dict[str, Tensor... FILE: FlagEmbedding/finetune/reranker/decoder_only/layerwise/modeling_minicpm_reranker.py function _get_unpad_data (line 77) | def _get_unpad_data(attention_mask): function _expand_mask (line 89) | def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Option... function _make_causal_mask (line 96) | def _make_causal_mask( function rms_layernorm (line 108) | def rms_layernorm(hidden: torch.Tensor, weight: torch.Tensor, eps: float): class MiniCPMRMSNorm (line 115) | class MiniCPMRMSNorm(nn.Module): method __init__ (line 116) | def __init__(self, hidden_size, eps=1e-6): method forward (line 124) | def forward(self, hidden_states): class MiniCPMRotaryEmbedding (line 131) | class MiniCPMRotaryEmbedding(nn.Module): method __init__ (line 132) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi... method _set_cos_sin_cache (line 147) | def _set_cos_sin_cache(self, seq_len, device, dtype): method forward (line 157) | def forward(self, x, seq_len=None): class MiniCPMLinearScalingRotaryEmbedding (line 168) | class MiniCPMLinearScalingRotaryEmbedding(MiniCPMRotaryEmbedding): method __init__ (line 171) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi... method _set_cos_sin_cache (line 175) | def _set_cos_sin_cache(self, seq_len, device, dtype): class MiniCPMDynamicNTKScalingRotaryEmbedding (line 187) | class MiniCPMDynamicNTKScalingRotaryEmbedding(MiniCPMRotaryEmbedding): method __init__ (line 190) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi... method _set_cos_sin_cache (line 194) | def _set_cos_sin_cache(self, seq_len, device, dtype): function rotate_half (line 214) | def rotate_half(x): function apply_rotary_pos_emb (line 221) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): class MiniCPMMLP (line 256) | class MiniCPMMLP(nn.Module): method __init__ (line 257) | def __init__(self, config): method forward (line 267) | def forward(self, x): function repeat_kv (line 290) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: class MiniCPMAttention (line 302) | class MiniCPMAttention(nn.Module): method __init__ (line 305) | def __init__(self, config: LayerWiseMiniCPMConfig, layer_idx: Optional... method _init_rope (line 338) | def _init_rope(self): method _shape (line 365) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): method forward (line 368) | def forward( class MiniCPMFlashAttention2 (line 473) | class MiniCPMFlashAttention2(MiniCPMAttention): method __init__ (line 480) | def __init__(self, *args, **kwargs): method forward (line 488) | def forward( method _flash_attention_forward (line 576) | def _flash_attention_forward( method _upad_input (line 633) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m... class MiniCPMSdpaAttention (line 672) | class MiniCPMSdpaAttention(MiniCPMAttention): method forward (line 680) | def forward( class MiniCPMDecoderLayer (line 766) | class MiniCPMDecoderLayer(nn.Module): method __init__ (line 767) | def __init__(self, config: LayerWiseMiniCPMConfig, layer_idx: int): method forward (line 779) | def forward( class MiniCPMPreTrainedModel (line 862) | class MiniCPMPreTrainedModel(PreTrainedModel): method _init_weights (line 872) | def _init_weights(self, module): class LayerWiseMiniCPMModel (line 958) | class LayerWiseMiniCPMModel(MiniCPMPreTrainedModel): method __init__ (line 966) | def __init__(self, config: LayerWiseMiniCPMConfig): method get_input_embeddings (line 984) | def get_input_embeddings(self): method set_input_embeddings (line 987) | def set_input_embeddings(self, value): method forward (line 991) | def forward( class LayerWiseHead (line 1135) | class LayerWiseHead(nn.Module): method __init__ (line 1138) | def __init__(self, input_size, output_size): method forward (line 1142) | def forward(self, **kwargs): class LayerWiseMiniCPMForCausalLM (line 1145) | class LayerWiseMiniCPMForCausalLM(MiniCPMPreTrainedModel): method __init__ (line 1148) | def __init__(self, config): method get_input_embeddings (line 1191) | def get_input_embeddings(self): method set_input_embeddings (line 1194) | def set_input_embeddings(self, value): method get_output_embeddings (line 1197) | def get_output_embeddings(self): method set_output_embeddings (line 1200) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 1203) | def set_decoder(self, decoder): method get_decoder (line 1206) | def get_decoder(self): method forward (line 1211) | def forward( method prepare_inputs_for_generation (line 1408) | def prepare_inputs_for_generation( method _reorder_cache (line 1465) | def _reorder_cache(past_key_values, beam_idx): method chat (line 1474) | def chat(self, tokenizer, query: str, history: List[Dict] = None, role... FILE: FlagEmbedding/finetune/reranker/decoder_only/layerwise/runner.py class DecoderOnlyRerankerRunner (line 18) | class DecoderOnlyRerankerRunner(AbsRerankerRunner): method __init__ (line 27) | def __init__( method load_tokenizer_and_model (line 35) | def load_tokenizer_and_model(self) -> Tuple[PreTrainedTokenizer, AbsRe... method load_trainer (line 82) | def load_trainer(self) -> DecoderOnlyRerankerTrainer: method run (line 97) | def run(self): FILE: FlagEmbedding/finetune/reranker/decoder_only/layerwise/trainer.py class DecoderOnlyRerankerTrainer (line 13) | class DecoderOnlyRerankerTrainer(AbsRerankerTrainer): method _save (line 17) | def _save(self, output_dir: Optional[str] = None, state_dict=None): FILE: FlagEmbedding/finetune/reranker/encoder_only/base/__main__.py function main (line 11) | def main(): FILE: FlagEmbedding/finetune/reranker/encoder_only/base/modeling.py class CrossEncoderModel (line 9) | class CrossEncoderModel(AbsRerankerModel): method __init__ (line 17) | def __init__( method encode (line 29) | def encode(self, features): FILE: FlagEmbedding/finetune/reranker/encoder_only/base/runner.py class EncoderOnlyRerankerRunner (line 15) | class EncoderOnlyRerankerRunner(AbsRerankerRunner): method load_tokenizer_and_model (line 19) | def load_tokenizer_and_model(self) -> Tuple[PreTrainedTokenizer, AbsRe... method load_trainer (line 63) | def load_trainer(self) -> EncoderOnlyRerankerTrainer: FILE: FlagEmbedding/finetune/reranker/encoder_only/base/trainer.py class EncoderOnlyRerankerTrainer (line 11) | class EncoderOnlyRerankerTrainer(AbsRerankerTrainer): method _save (line 15) | def _save(self, output_dir: Optional[str] = None, state_dict=None): FILE: FlagEmbedding/inference/auto_embedder.py class FlagAutoModel (line 13) | class FlagAutoModel: method __init__ (line 17) | def __init__(self): method from_finetuned (line 23) | def from_finetuned( FILE: FlagEmbedding/inference/auto_reranker.py class FlagAutoReranker (line 14) | class FlagAutoReranker: method __init__ (line 18) | def __init__(self): method from_finetuned (line 24) | def from_finetuned( FILE: FlagEmbedding/inference/embedder/decoder_only/base.py function last_token_pool (line 12) | def last_token_pool(last_hidden_states: torch.Tensor, class BaseLLMEmbedder (line 32) | class BaseLLMEmbedder(AbsEmbedder): method __init__ (line 58) | def __init__( method encode_queries (line 104) | def encode_queries( method encode_corpus (line 132) | def encode_corpus( method encode (line 160) | def encode( method encode_single_device (line 189) | def encode_single_device( FILE: FlagEmbedding/inference/embedder/decoder_only/icl.py function last_token_pool (line 16) | def last_token_pool(last_hidden_states: torch.Tensor, class ICLLLMEmbedder (line 36) | class ICLLLMEmbedder(AbsEmbedder): method __init__ (line 66) | def __init__( method __del__ (line 124) | def __del__(self): method set_examples (line 128) | def set_examples(self, examples_for_task: Optional[List[dict]] = None): method get_detailed_example (line 163) | def get_detailed_example(instruction_format: str, instruction: str, qu... method stop_self_query_pool (line 179) | def stop_self_query_pool(self): method encode_queries (line 190) | def encode_queries( method encode_corpus (line 237) | def encode_corpus( method encode (line 266) | def encode( method _encode_queries_multi_process_worker (line 296) | def _encode_queries_multi_process_worker( method encode_queries_single_device (line 318) | def encode_queries_single_device( method encode_single_device (line 454) | def encode_single_device( FILE: FlagEmbedding/inference/embedder/encoder_only/base.py class BaseEmbedder (line 11) | class BaseEmbedder(AbsEmbedder): method __init__ (line 40) | def __init__( method encode_queries (line 85) | def encode_queries( method encode_corpus (line 113) | def encode_corpus( method encode (line 141) | def encode( method encode_single_device (line 170) | def encode_single_device( method pooling (line 278) | def pooling( FILE: FlagEmbedding/inference/embedder/encoder_only/m3.py class M3Embedder (line 20) | class M3Embedder(AbsEmbedder): method __init__ (line 50) | def __init__( method convert_id_to_token (line 106) | def convert_id_to_token(self, lexical_weights: List[Dict]): method compute_lexical_matching_score (line 129) | def compute_lexical_matching_score( method colbert_score (line 163) | def colbert_score(self, q_reps, p_reps): method encode_queries (line 179) | def encode_queries( method encode_corpus (line 221) | def encode_corpus( method encode (line 263) | def encode( method encode_single_device (line 306) | def encode_single_device( method compute_score (line 482) | def compute_score( method compute_score_multi_process (line 535) | def compute_score_multi_process( method _compute_score_multi_process_worker (line 571) | def _compute_score_multi_process_worker( method compute_score_single_device (line 593) | def compute_score_single_device( method _concatenate_results_from_multi_process (line 719) | def _concatenate_results_from_multi_process( method _concatenate_compute_score_results_from_multi_process (line 753) | def _concatenate_compute_score_results_from_multi_process( FILE: FlagEmbedding/inference/embedder/model_mapping.py class EmbedderModelClass (line 10) | class EmbedderModelClass(Enum): class PoolingMethod (line 25) | class PoolingMethod(Enum): class EmbedderConfig (line 32) | class EmbedderConfig: function support_native_bge_model_list (line 268) | def support_native_bge_model_list()->List[str]: function support_model_list (line 271) | def support_model_list()->List[str]: FILE: FlagEmbedding/inference/reranker/decoder_only/base.py function last_logit_pool (line 15) | def last_logit_pool(logits: Tensor, class DatasetForReranker (line 35) | class DatasetForReranker(Dataset): method __init__ (line 47) | def __init__( method __len__ (line 85) | def __len__(self): method __getitem__ (line 88) | def __getitem__(self, item): class Collater (line 122) | class Collater: method __init__ (line 130) | def __init__(self, tokenizer, max_len): method __call__ (line 138) | def __call__(self, data): class BaseLLMReranker (line 171) | class BaseLLMReranker(AbsReranker): method __init__ (line 199) | def __init__( method compute_score_single_gpu (line 257) | def compute_score_single_gpu( FILE: FlagEmbedding/inference/reranker/decoder_only/layerwise.py function last_logit_pool_layerwise (line 18) | def last_logit_pool_layerwise(logits: Tensor, class LayerWiseLLMReranker (line 38) | class LayerWiseLLMReranker(AbsReranker): method __init__ (line 67) | def __init__( method compute_score_single_gpu (line 136) | def compute_score_single_gpu( FILE: FlagEmbedding/inference/reranker/decoder_only/lightweight.py function last_logit_pool_lightweight (line 15) | def last_logit_pool_lightweight(logits: Tensor, class Collater_for_lightweight (line 35) | class Collater_for_lightweight: method __init__ (line 43) | def __init__(self, tokenizer, max_len): method __call__ (line 51) | def __call__(self, data): class LightweightLLMReranker (line 90) | class LightweightLLMReranker(AbsReranker): method __init__ (line 122) | def __init__( method compute_score_single_gpu (line 206) | def compute_score_single_gpu( FILE: FlagEmbedding/inference/reranker/decoder_only/models/configuration_minicpm_reranker.py class LayerWiseMiniCPMConfig (line 30) | class LayerWiseMiniCPMConfig(PretrainedConfig): method __init__ (line 116) | def __init__( method _rope_scaling_validation (line 189) | def _rope_scaling_validation(self): FILE: FlagEmbedding/inference/reranker/decoder_only/models/gemma_config.py class CostWiseGemmaConfig (line 26) | class CostWiseGemmaConfig(Gemma2Config): method __init__ (line 54) | def __init__( FILE: FlagEmbedding/inference/reranker/decoder_only/models/gemma_model.py function _get_unpad_data (line 69) | def _get_unpad_data(attention_mask): class CostWiseGemma2PreTrainedModel (line 84) | class CostWiseGemma2PreTrainedModel(PreTrainedModel): method _init_weights (line 97) | def _init_weights(self, module): class CostWiseModelOutputWithPast (line 112) | class CostWiseModelOutputWithPast(ModelOutput): class CostWiseCausalLMOutputWithPast (line 120) | class CostWiseCausalLMOutputWithPast(ModelOutput): function token_compress (line 128) | def token_compress(compress_ratio, class CostWiseGemmaModel (line 237) | class CostWiseGemmaModel(CostWiseGemma2PreTrainedModel): method __init__ (line 245) | def __init__(self, config: CostWiseGemmaConfig): method get_input_embeddings (line 260) | def get_input_embeddings(self): method set_input_embeddings (line 263) | def set_input_embeddings(self, value): method forward (line 267) | def forward( method _update_causal_mask (line 434) | def _update_causal_mask( class CostWiseHead (line 479) | class CostWiseHead(nn.Module): method __init__ (line 482) | def __init__(self, input_size, output_size): method forward (line 486) | def forward(self, **kwargs): class CostWiseGemmaForCausalLM (line 490) | class CostWiseGemmaForCausalLM(CostWiseGemma2PreTrainedModel): method __init__ (line 493) | def __init__(self, config: CostWiseGemmaConfig): method get_input_embeddings (line 510) | def get_input_embeddings(self): method set_input_embeddings (line 513) | def set_input_embeddings(self, value): method get_output_embeddings (line 516) | def get_output_embeddings(self): method set_output_embeddings (line 519) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 522) | def set_decoder(self, decoder): method get_decoder (line 525) | def get_decoder(self): method forward (line 530) | def forward( method prepare_inputs_for_generation (line 664) | def prepare_inputs_for_generation( method _reorder_cache (line 739) | def _reorder_cache(past_key_values, beam_idx): FILE: FlagEmbedding/inference/reranker/decoder_only/models/modeling_minicpm_reranker.py function _get_unpad_data (line 80) | def _get_unpad_data(attention_mask): function _expand_mask (line 92) | def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Option... function _make_causal_mask (line 99) | def _make_causal_mask( function rms_layernorm (line 111) | def rms_layernorm(hidden: torch.Tensor, weight: torch.Tensor, eps: float): class MiniCPMRMSNorm (line 118) | class MiniCPMRMSNorm(nn.Module): method __init__ (line 119) | def __init__(self, hidden_size, eps=1e-6): method forward (line 127) | def forward(self, hidden_states): class MiniCPMRotaryEmbedding (line 134) | class MiniCPMRotaryEmbedding(nn.Module): method __init__ (line 135) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi... method _set_cos_sin_cache (line 150) | def _set_cos_sin_cache(self, seq_len, device, dtype): method forward (line 160) | def forward(self, x, seq_len=None): class MiniCPMLinearScalingRotaryEmbedding (line 171) | class MiniCPMLinearScalingRotaryEmbedding(MiniCPMRotaryEmbedding): method __init__ (line 174) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi... method _set_cos_sin_cache (line 178) | def _set_cos_sin_cache(self, seq_len, device, dtype): class MiniCPMDynamicNTKScalingRotaryEmbedding (line 190) | class MiniCPMDynamicNTKScalingRotaryEmbedding(MiniCPMRotaryEmbedding): method __init__ (line 193) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi... method _set_cos_sin_cache (line 197) | def _set_cos_sin_cache(self, seq_len, device, dtype): function rotate_half (line 217) | def rotate_half(x): function apply_rotary_pos_emb (line 224) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): class MiniCPMMLP (line 259) | class MiniCPMMLP(nn.Module): method __init__ (line 260) | def __init__(self, config): method forward (line 270) | def forward(self, x): function repeat_kv (line 293) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: class MiniCPMAttention (line 305) | class MiniCPMAttention(nn.Module): method __init__ (line 308) | def __init__(self, config: LayerWiseMiniCPMConfig, layer_idx: Optional... method _init_rope (line 341) | def _init_rope(self): method _shape (line 368) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): method forward (line 371) | def forward( class MiniCPMFlashAttention2 (line 476) | class MiniCPMFlashAttention2(MiniCPMAttention): method __init__ (line 483) | def __init__(self, *args, **kwargs): method forward (line 491) | def forward( method _flash_attention_forward (line 579) | def _flash_attention_forward( method _upad_input (line 636) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m... class MiniCPMSdpaAttention (line 675) | class MiniCPMSdpaAttention(MiniCPMAttention): method forward (line 683) | def forward( class MiniCPMDecoderLayer (line 769) | class MiniCPMDecoderLayer(nn.Module): method __init__ (line 770) | def __init__(self, config: LayerWiseMiniCPMConfig, layer_idx: int): method forward (line 782) | def forward( class MiniCPMPreTrainedModel (line 865) | class MiniCPMPreTrainedModel(PreTrainedModel): method _init_weights (line 875) | def _init_weights(self, module): class LayerWiseMiniCPMModel (line 961) | class LayerWiseMiniCPMModel(MiniCPMPreTrainedModel): method __init__ (line 969) | def __init__(self, config: LayerWiseMiniCPMConfig): method get_input_embeddings (line 987) | def get_input_embeddings(self): method set_input_embeddings (line 990) | def set_input_embeddings(self, value): method forward (line 994) | def forward( class LayerWiseHead (line 1138) | class LayerWiseHead(nn.Module): method __init__ (line 1141) | def __init__(self, input_size, output_size): method forward (line 1145) | def forward(self, **kwargs): class LayerWiseMiniCPMForCausalLM (line 1148) | class LayerWiseMiniCPMForCausalLM(MiniCPMPreTrainedModel): method __init__ (line 1151) | def __init__(self, config): method get_input_embeddings (line 1194) | def get_input_embeddings(self): method set_input_embeddings (line 1197) | def set_input_embeddings(self, value): method get_output_embeddings (line 1200) | def get_output_embeddings(self): method set_output_embeddings (line 1203) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 1206) | def set_decoder(self, decoder): method get_decoder (line 1209) | def get_decoder(self): method forward (line 1214) | def forward( method prepare_inputs_for_generation (line 1411) | def prepare_inputs_for_generation( method _reorder_cache (line 1468) | def _reorder_cache(past_key_values, beam_idx): method chat (line 1477) | def chat(self, tokenizer, query: str, history: List[Dict] = None, role... FILE: FlagEmbedding/inference/reranker/encoder_only/base.py function sigmoid (line 10) | def sigmoid(x): class BaseReranker (line 14) | class BaseReranker(AbsReranker): method __init__ (line 34) | def __init__( method compute_score_single_gpu (line 78) | def compute_score_single_gpu( FILE: FlagEmbedding/inference/reranker/model_mapping.py class RerankerModelClass (line 10) | class RerankerModelClass(Enum): class RerankerConfig (line 26) | class RerankerConfig: FILE: FlagEmbedding/utils/transformers_compat.py function is_torch_fx_available (line 10) | def is_torch_fx_available(): function import_from_candidates (line 22) | def import_from_candidates(candidates, default=None): FILE: Tutorials/4_Evaluation/utils/compute_metrics.py class SimpleTokenizer (line 10) | class SimpleTokenizer: method __init__ (line 14) | def __init__(self): method tokenize (line 24) | def tokenize(self, text, uncased=False): function _normalize (line 33) | def _normalize(text): function has_answer (line 37) | def has_answer(answers, text, tokenizer) -> bool: function check_answer (line 51) | def check_answer(example, tokenizer) -> List[bool]: function evaluate_qa_recall (line 65) | def evaluate_qa_recall(ctxs, answers, k_values: Union[int, List[int]]=100): FILE: Tutorials/4_Evaluation/utils/normalize_text.py function normalize_text (line 133) | def normalize_text(text: str): FILE: examples/inference/embedder/decoder_only/auto_base_multi_devices.py function test_base_multi_devices (line 5) | def test_base_multi_devices(): FILE: examples/inference/embedder/decoder_only/auto_base_single_device.py function test_base_single_device (line 5) | def test_base_single_device(): FILE: examples/inference/embedder/decoder_only/auto_icl_multi_devices.py function test_icl_multi_devices (line 5) | def test_icl_multi_devices(): FILE: examples/inference/embedder/decoder_only/auto_icl_single_device.py function test_icl_single_device (line 5) | def test_icl_single_device(): FILE: examples/inference/embedder/decoder_only/base_multi_devices.py function test_base_multi_devices (line 5) | def test_base_multi_devices(): FILE: examples/inference/embedder/decoder_only/base_single_device.py function test_base_single_device (line 5) | def test_base_single_device(): FILE: examples/inference/embedder/decoder_only/icl_multi_devices.py function test_icl_multi_devices (line 5) | def test_icl_multi_devices(): FILE: examples/inference/embedder/decoder_only/icl_single_device.py function test_icl_single_device (line 5) | def test_icl_single_device(): FILE: examples/inference/embedder/encoder_only/auto_base_multi_devices.py function test_base_multi_devices (line 5) | def test_base_multi_devices(): FILE: examples/inference/embedder/encoder_only/auto_base_single_device.py function test_base_single_device (line 5) | def test_base_single_device(): FILE: examples/inference/embedder/encoder_only/auto_m3_multi_devices.py function test_m3_multi_devices (line 5) | def test_m3_multi_devices(): FILE: examples/inference/embedder/encoder_only/auto_m3_single_device.py function test_m3_single_device (line 5) | def test_m3_single_device(): FILE: examples/inference/embedder/encoder_only/base_multi_devices.py function test_base_multi_devices (line 5) | def test_base_multi_devices(): FILE: examples/inference/embedder/encoder_only/base_single_device.py function test_base_single_device (line 5) | def test_base_single_device(): FILE: examples/inference/embedder/encoder_only/m3_multi_devices.py function test_m3_multi_devices (line 5) | def test_m3_multi_devices(): FILE: examples/inference/embedder/encoder_only/m3_multi_devices_compute_score.py function test_m3_multi_devices (line 5) | def test_m3_multi_devices(): FILE: examples/inference/embedder/encoder_only/m3_single_device.py function test_m3_single_device (line 5) | def test_m3_single_device(): FILE: examples/inference/embedder/encoder_only/m3_single_device_compute_score.py function test_m3_single_device (line 5) | def test_m3_single_device(): FILE: examples/inference/reranker/decoder_only/auto_base_multi_devices.py function test_base_multi_devices (line 5) | def test_base_multi_devices(): FILE: examples/inference/reranker/decoder_only/auto_base_single_device.py function test_base_multi_devices (line 5) | def test_base_multi_devices(): FILE: examples/inference/reranker/decoder_only/auto_layerwise_multi_devices.py function test_base_multi_devices (line 5) | def test_base_multi_devices(): FILE: examples/inference/reranker/decoder_only/auto_layerwise_single_device.py function test_base_multi_devices (line 5) | def test_base_multi_devices(): FILE: examples/inference/reranker/decoder_only/auto_lightweight_multi_devices.py function test_base_multi_devices (line 5) | def test_base_multi_devices(): FILE: examples/inference/reranker/decoder_only/auto_lightweight_single_device.py function test_base_multi_devices (line 5) | def test_base_multi_devices(): FILE: examples/inference/reranker/decoder_only/base_multi_devices.py function test_base_multi_devices (line 5) | def test_base_multi_devices(): FILE: examples/inference/reranker/decoder_only/base_single_device.py function test_base_multi_devices (line 5) | def test_base_multi_devices(): FILE: examples/inference/reranker/decoder_only/layerwise_multi_devices.py function test_base_multi_devices (line 5) | def test_base_multi_devices(): FILE: examples/inference/reranker/decoder_only/layerwise_single_device.py function test_base_multi_devices (line 5) | def test_base_multi_devices(): FILE: examples/inference/reranker/decoder_only/lightweight_multi_devices.py function test_base_multi_devices (line 5) | def test_base_multi_devices(): FILE: examples/inference/reranker/decoder_only/lightweight_single_device.py function test_base_multi_devices (line 5) | def test_base_multi_devices(): FILE: examples/inference/reranker/encoder_only/auto_base_multi_devices.py function test_base_multi_devices (line 5) | def test_base_multi_devices(): FILE: examples/inference/reranker/encoder_only/auto_base_single_device.py function test_base_multi_devices (line 5) | def test_base_multi_devices(): FILE: examples/inference/reranker/encoder_only/base_multi_devices.py function test_base_multi_devices (line 5) | def test_base_multi_devices(): FILE: examples/inference/reranker/encoder_only/base_single_device.py function test_base_multi_devices (line 5) | def test_base_multi_devices(): FILE: research/BGE_Coder/data_generation/constant.py class TaskType (line 6) | class TaskType(Enum): function get_task_def_by_task_type (line 65) | def get_task_def_by_task_type(task_type: Union[str, TaskType]) -> Tuple[... class Language (line 206) | class Language(Enum): class CodeLanguage (line 270) | class CodeLanguage(Enum): class Task (line 337) | class Task: function get_task (line 346) | def get_task( function get_pos_as_input_by_task_type (line 391) | def get_pos_as_input_by_task_type(task_type: TaskType) -> bool: function get_generation_prompt (line 423) | def get_generation_prompt( function get_quality_control_prompt (line 759) | def get_quality_control_prompt( class DocLength (line 1256) | class DocLength(Enum): function get_gen_hard_neg_prompt (line 1267) | def get_gen_hard_neg_prompt(task: Task, query: str, pos: str) -> str: FILE: research/BGE_Coder/data_generation/corpus_generator.py class CorpusGenerator (line 11) | class CorpusGenerator: method __init__ (line 12) | def __init__( method _load_corpus (line 18) | def _load_corpus(self, corpus_dir: str, doc_length: List[str], externa... method run (line 80) | def run( FILE: research/BGE_Coder/data_generation/format_generated_examples.py function format_generated_examples (line 7) | def format_generated_examples( function main (line 56) | def main(): FILE: research/BGE_Coder/data_generation/llm.py class LLM (line 11) | class LLM: method __init__ (line 12) | def __init__( method split_text (line 41) | def split_text(self, text: str, anchor_points: Tuple[float, float] = (... method chat (line 47) | def chat( FILE: research/BGE_Coder/data_generation/run_generation.py function compute_md5 (line 18) | def compute_md5(text: str): function get_args (line 22) | def get_args(): function gen_triplets (line 161) | def gen_triplets( function get_save_path (line 196) | def get_save_path( function save_triplets (line 213) | def save_triplets( function main (line 250) | def main(args): FILE: research/BGE_Coder/data_generation/search.py function create_index (line 8) | def create_index(embeddings: np.ndarray, use_gpu: bool = False): function search (line 20) | def search( function get_top1 (line 45) | def get_top1( FILE: research/BGE_Coder/data_generation/triplet_generator.py function compute_md5 (line 17) | def compute_md5(text: str): class TripletGenerator (line 21) | class TripletGenerator(LLM): method __init__ (line 22) | def __init__( method _gen_for_code_modification_retrieval (line 34) | def _gen_for_code_modification_retrieval( method _gen_for_code_comparison_retrieval (line 81) | def _gen_for_code_comparison_retrieval( method _gen_for_code_context_retrieval (line 126) | def _gen_for_code_context_retrieval( method _arrange_query_and_pos (line 146) | def _arrange_query_and_pos(task: Task, input_text: str, response: str): method _gen_for_normal_task (line 168) | def _gen_for_normal_task( method _gen_for_bug_desc_retrieval (line 208) | def _gen_for_bug_desc_retrieval( method _gen_for_two_step_not_use_last (line 253) | def _gen_for_two_step_not_use_last( method _gen_for_two_step_use_last (line 297) | def _gen_for_two_step_use_last( method generate_triplets (line 341) | def generate_triplets( method gen_hard_negatives (line 470) | def gen_hard_negatives(self, result: dict, task: Task, num_negatives: ... method run_single (line 485) | def run_single( method run (line 552) | def run( method run_for_gen_neg (line 605) | def run_for_gen_neg( FILE: research/BGE_Coder/data_generation/utils.py function clean_content (line 4) | def clean_content(content: str): function clean_code (line 19) | def clean_code(code: str, lang: str, length_threshold: int = 30) -> str: FILE: research/BGE_Coder/evaluation/coderag_eval/test/arguments.py class CodeRAGEvalArgs (line 9) | class CodeRAGEvalArgs: FILE: research/BGE_Coder/evaluation/coderag_eval/test/create/code_search_net.py function document2code (line 8) | def document2code(data, split="train"): function main (line 25) | def main(): FILE: research/BGE_Coder/evaluation/coderag_eval/test/create/ds1000.py function download_source (line 16) | def download_source(source_dir): function download_dataset (line 32) | def download_dataset(source_dir): function get_dataset (line 47) | def get_dataset(source_dir, mode: str = "Completion", key: str = "All"): function document2code (line 65) | def document2code(data: list): function main (line 95) | def main(): FILE: research/BGE_Coder/evaluation/coderag_eval/test/create/general_programming.py function save_file_jsonl (line 25) | def save_file_jsonl(data, path): function main (line 31) | def main(): FILE: research/BGE_Coder/evaluation/coderag_eval/test/create/humaneval.py function document2code (line 8) | def document2code(data, split="test"): function main (line 25) | def main(): FILE: research/BGE_Coder/evaluation/coderag_eval/test/create/live_code_bench.py function get_queries (line 9) | def get_queries(data, split="test") -> list[dict]: function get_corpus (line 17) | def get_corpus(hf_name: str, cache_dir: str) -> list[dict]: function main (line 26) | def main(): FILE: research/BGE_Coder/evaluation/coderag_eval/test/create/mbpp.py function get_function_name (line 8) | def get_function_name(code: str) -> str: function document2code (line 19) | def document2code(data, split="test"): function main (line 36) | def main(): FILE: research/BGE_Coder/evaluation/coderag_eval/test/create/odex.py function document2code (line 12) | def document2code(data, split="test"): function main (line 43) | def main(): FILE: research/BGE_Coder/evaluation/coderag_eval/test/create/repoeval.py function iterate_repository (line 38) | def iterate_repository(base_dir: str, repo: str) -> dict: function window_overlap (line 61) | def window_overlap(span: tuple, target_span: tuple) -> bool: class RepoWindowMaker (line 67) | class RepoWindowMaker: method __init__ (line 68) | def __init__(self, base_dir, repo, tasks, window_size, slice_size): method _buid_windows_for_a_file (line 77) | def _buid_windows_for_a_file(self, fpath_tuple, code): method _merge_windows_with_same_context (line 102) | def _merge_windows_with_same_context(self, code_windows): method build_windows (line 116) | def build_windows(self): function download_data (line 144) | def download_data(directory: str = "repoeval"): function repo2code (line 177) | def repo2code( function main (line 221) | def main(): FILE: research/BGE_Coder/evaluation/coderag_eval/test/create/repoeval_repo.py function iterate_repository (line 38) | def iterate_repository(base_dir: str, repo: str) -> dict: function window_overlap (line 61) | def window_overlap(span: tuple, target_span: tuple) -> bool: class RepoWindowMaker (line 67) | class RepoWindowMaker: method __init__ (line 68) | def __init__(self, base_dir, repo, tasks, window_size, slice_size): method _buid_windows_for_a_file (line 77) | def _buid_windows_for_a_file(self, fpath_tuple, code): method _merge_windows_with_same_context (line 102) | def _merge_windows_with_same_context(self, code_windows): method build_windows (line 116) | def build_windows(self): function download_data (line 147) | def download_data(directory: str = "repoeval"): function repo2code (line 174) | def repo2code( function main (line 210) | def main(): FILE: research/BGE_Coder/evaluation/coderag_eval/test/create/swebench.py class ContextManager (line 18) | class ContextManager: method __init__ (line 19) | def __init__(self, repo_path, base_commit, verbose=False): method __enter__ (line 25) | def __enter__(self): method get_environment (line 40) | def get_environment(self): method get_readme_files (line 43) | def get_readme_files(self): method __exit__ (line 49) | def __exit__(self, exc_type, exc_val, exc_tb): class AutoContextManager (line 53) | class AutoContextManager(ContextManager): method __init__ (line 56) | def __init__(self, instance, root_dir=None, verbose=False, token=None): method __exit__ (line 77) | def __exit__(self, exc_type, exc_val, exc_tb): function ingest_files (line 83) | def ingest_files(filenames): function get_oracle_filenames (line 91) | def get_oracle_filenames(instance): function is_test (line 106) | def is_test(name, test_phrases=None): function list_files (line 112) | def list_files(root_dir, include_tests=False): function detect_encoding (line 120) | def detect_encoding(filename): function ingest_directory_contents (line 128) | def ingest_directory_contents(root_dir, include_tests=False): function get_file_contents (line 144) | def get_file_contents(input_instances, verbose: bool = False, tmp_dir: s... function document2code (line 173) | def document2code(data, split: str = "test"): function main (line 219) | def main(): FILE: research/BGE_Coder/evaluation/coderag_eval/test/create/swebench_repo.py class ContextManager (line 18) | class ContextManager: method __init__ (line 19) | def __init__(self, repo_path, base_commit, verbose=False): method __enter__ (line 25) | def __enter__(self): method get_environment (line 40) | def get_environment(self): method get_readme_files (line 43) | def get_readme_files(self): method __exit__ (line 49) | def __exit__(self, exc_type, exc_val, exc_tb): class AutoContextManager (line 53) | class AutoContextManager(ContextManager): method __init__ (line 56) | def __init__(self, instance, root_dir=None, verbose=False, token=None): method __exit__ (line 77) | def __exit__(self, exc_type, exc_val, exc_tb): function ingest_files (line 83) | def ingest_files(filenames): function get_oracle_filenames (line 91) | def get_oracle_filenames(instance): function is_test (line 106) | def is_test(name, test_phrases=None): function list_files (line 112) | def list_files(root_dir, include_tests=False): function detect_encoding (line 120) | def detect_encoding(filename): function ingest_directory_contents (line 128) | def ingest_directory_contents(root_dir, include_tests=False): function get_file_contents (line 144) | def get_file_contents(input_instances, verbose: bool = False, tmp_dir: s... function process_single_item (line 174) | def process_single_item(item, args): function main (line 226) | def main(): FILE: research/BGE_Coder/evaluation/coderag_eval/test/create/utils.py function load_jsonlines (line 5) | def load_jsonlines(file): function save_file_jsonl (line 10) | def save_file_jsonl(data, fp): function save_tsv_dict (line 14) | def save_tsv_dict(data, fp, fields): function cost_esitmate (line 25) | def cost_esitmate(path): FILE: research/BGE_Coder/evaluation/coderag_eval/test/main.py function get_model (line 23) | def get_model(model_args: CodeRAGEvalModelArgs): function get_top_docs (line 102) | def get_top_docs(results: dict, corpus: dict, task_id: str, topk: int = ... function main (line 111) | def main( FILE: research/BGE_Coder/evaluation/coderag_eval/test/prompts.py function get_task_def_by_task_name (line 4) | def get_task_def_by_task_name(task_name: str) -> str: FILE: research/BGE_Coder/evaluation/coir_eval/arguments.py function coir_tasks (line 9) | def coir_tasks(): class COIREvalArgs (line 35) | class COIREvalArgs: FILE: research/BGE_Coder/evaluation/coir_eval/main.py function get_model (line 11) | def get_model(model_args: COIREvalModelArgs): function main (line 84) | def main( FILE: research/BGE_Coder/evaluation/coir_eval/prompts.py function get_task_def_by_task_name (line 4) | def get_task_def_by_task_name(task_name: str) -> str: FILE: research/BGE_M3/arguments.py class ModelArguments (line 9) | class ModelArguments: class DataArguments (line 29) | class DataArguments: method __post_init__ (line 82) | def __post_init__(self): class RetrieverTrainingArguments (line 88) | class RetrieverTrainingArguments(TrainingArguments): FILE: research/BGE_M3/data.py class SameDatasetTrainDataset (line 16) | class SameDatasetTrainDataset(Dataset): method __init__ (line 19) | def __init__(self, args: DataArguments, batch_size: int, seed: int, pr... method print_batch_size (line 108) | def print_batch_size(self, batch_size: int, train_group_size: int): method get_file_batch_size (line 122) | def get_file_batch_size(file: str, batch_size: int, train_group_size: ... method refresh_epoch (line 170) | def refresh_epoch(self): method __getitem__ (line 188) | def __getitem__(self, _): method shuffle_text (line 198) | def shuffle_text(self, text): method create_batch_data (line 209) | def create_batch_data(self, batch_raw_data): method __len__ (line 248) | def __len__(self): class EmbedCollator (line 253) | class EmbedCollator(DataCollatorWithPadding): method __call__ (line 262) | def __call__(self, features): FILE: research/BGE_M3/modeling.py class EncoderOutput (line 18) | class EncoderOutput(ModelOutput): class BGEM3Model (line 25) | class BGEM3Model(nn.Module): method __init__ (line 27) | def __init__(self, method load_model (line 69) | def load_model(self, model_name, colbert_dim: int = -1): method gradient_checkpointing_enable (line 91) | def gradient_checkpointing_enable(self, **kwargs): method dense_embedding (line 94) | def dense_embedding(self, hidden_state, mask): method sparse_embedding (line 102) | def sparse_embedding(self, hidden_state, input_ids, return_embedding: ... method colbert_embedding (line 132) | def colbert_embedding(self, last_hidden_state, mask): method dense_score (line 137) | def dense_score(self, q_reps, p_reps): method sparse_score (line 142) | def sparse_score(self, q_reps, p_reps): method colbert_score (line 147) | def colbert_score(self, q_reps, p_reps, q_mask: torch.Tensor): method _encode (line 154) | def _encode(self, features): method encode (line 167) | def encode(self, features, sub_batch_size=None): method compute_sub_batch_size (line 196) | def compute_sub_batch_size(self, features): method compute_similarity (line 203) | def compute_similarity(self, q_reps, p_reps): method distill_loss (line 208) | def distill_loss(self, teacher_targets, student_scores, group_size): method forward (line 223) | def forward(self, query: Dict[str, Tensor] = None, passage: Dict[str, ... method compute_loss (line 317) | def compute_loss(self, scores, target): method _dist_gather_tensor (line 320) | def _dist_gather_tensor(self, t: Optional[torch.Tensor]): method save (line 333) | def save(self, output_dir: str): method load_pooler (line 349) | def load_pooler(self, model_dir): class BGEM3ForInference (line 356) | class BGEM3ForInference(BGEM3Model): method forward (line 358) | def forward(self, FILE: research/BGE_M3/run.py class TrainerCallbackForDataRefresh (line 28) | class TrainerCallbackForDataRefresh(TrainerCallback): method __init__ (line 29) | def __init__(self, train_dataset): method on_epoch_end (line 32) | def on_epoch_end(self, args: TrainingArguments, state: TrainerState, c... function main (line 39) | def main(): FILE: research/BGE_M3/split_data_by_length.py function get_args (line 24) | def get_args(): class SplitByLengthHandler (line 38) | class SplitByLengthHandler: method __init__ (line 39) | def __init__(self, method _get_length_ranges_list (line 76) | def _get_length_ranges_list(length_list: list): method _process_dir (line 90) | def _process_dir(self, dir_path: str, output_dir: str): method _process_file (line 104) | def _process_file(self, file_path: str, output_path: str): method run (line 165) | def run(self, input_path: str, output_dir: str, log_name: str=None): FILE: research/BGE_M3/trainer.py function save_ckpt_for_sentence_transformers (line 5) | def save_ckpt_for_sentence_transformers(ckpt_dir, pooling_mode: str = 'c... class BiTrainer (line 16) | class BiTrainer(Trainer): method _save (line 17) | def _save(self, output_dir: Optional[str] = None, state_dict=None): method compute_loss (line 41) | def compute_loss(self, model, inputs, return_outputs=False): FILE: research/BGE_VL/eval/eval_Circo.py class Args (line 18) | class Args: function index (line 70) | def index(model: Flag_mmret, corpus: datasets.Dataset, batch_size: int =... function search (line 134) | def search(model: Flag_mmret, queries: datasets, faiss_index: faiss.Inde... function main (line 162) | def main(): FILE: research/BGE_VL/eval/eval_fashioniq.py class Args (line 23) | class Args: function index (line 71) | def index(model: Flag_mmret, corpus: datasets.Dataset, batch_size: int =... function search (line 135) | def search(model: Flag_mmret, queries: datasets, faiss_index: faiss.Inde... function evaluate (line 162) | def evaluate(preds, labels, cutoffs=[1,5,10,20,50,100]): function main (line 200) | def main(): FILE: research/BGE_VL/eval/flag_dataset.py class MMIT_Dataset (line 22) | class MMIT_Dataset(Dataset): method __init__ (line 23) | def __init__(self, captions, image_ids, image_dir, image_processor) ->... method __getitem__ (line 34) | def __getitem__(self, item): method __len__ (line 46) | def __len__(self): class MMIT_Collator (line 50) | class MMIT_Collator: method __init__ (line 51) | def __init__(self, tokenizer, caption_max_len): method __call__ (line 57) | def __call__(self, features): class Image_Dataset (line 79) | class Image_Dataset(Dataset): method __init__ (line 80) | def __init__(self, image_ids, image_dir, image_processor) -> None: method __getitem__ (line 85) | def __getitem__(self, item): method __len__ (line 91) | def __len__(self): class Image_Collator (line 94) | class Image_Collator: method __init__ (line 95) | def __init__(self, tokenizer, caption_max_len): method __call__ (line 100) | def __call__(self, features): FILE: research/BGE_VL/eval/flag_mmret.py class Flag_mmret (line 11) | class Flag_mmret(nn.Module): method __init__ (line 12) | def __init__( method encode_queries (line 47) | def encode_queries(self, queries: Union[List[str], str], method encode_corpus (line 72) | def encode_corpus(self, method encode_text (line 90) | def encode_text(self, sentences: Union[List[str], str], batch_size: in... method encode_mm_it (line 123) | def encode_mm_it(self, captions: Union[List[str], str], image_ids: Uni... method encode_image (line 172) | def encode_image(self, image_ids: Union[List[str], str], batch_size: ... FILE: research/BGE_VL/modeling_MMRet_CLIP.py function contrastive_loss (line 59) | def contrastive_loss(logits: torch.Tensor) -> torch.Tensor: function clip_loss (line 63) | def clip_loss(similarity: torch.Tensor) -> torch.Tensor: function _get_vector_norm (line 69) | def _get_vector_norm(tensor: torch.Tensor) -> torch.Tensor: class CLIPVisionModelOutput (line 81) | class CLIPVisionModelOutput(ModelOutput): class CLIPTextModelOutput (line 110) | class CLIPTextModelOutput(ModelOutput): class CLIPOutput (line 139) | class CLIPOutput(ModelOutput): method to_tuple (line 168) | def to_tuple(self) -> Tuple[Any]: class CLIPVisionEmbeddings (line 175) | class CLIPVisionEmbeddings(nn.Module): method __init__ (line 176) | def __init__(self, config: CLIPVisionConfig): method forward (line 198) | def forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor: class CLIPTextEmbeddings (line 210) | class CLIPTextEmbeddings(nn.Module): method __init__ (line 211) | def __init__(self, config: CLIPTextConfig): method forward (line 223) | def forward( class CLIPAttention (line 243) | class CLIPAttention(nn.Module): method __init__ (line 246) | def __init__(self, config): method _shape (line 265) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): method forward (line 268) | def forward( class CLIPFlashAttention2 (line 347) | class CLIPFlashAttention2(CLIPAttention): method __init__ (line 355) | def __init__(self, *args, **kwargs): method forward (line 364) | def forward( class CLIPSdpaAttention (line 434) | class CLIPSdpaAttention(CLIPAttention): method forward (line 442) | def forward( class CLIPMLP (line 514) | class CLIPMLP(nn.Module): method __init__ (line 515) | def __init__(self, config): method forward (line 522) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class CLIPEncoderLayer (line 529) | class CLIPEncoderLayer(nn.Module): method __init__ (line 530) | def __init__(self, config: CLIPConfig): method forward (line 538) | def forward( class CLIPPreTrainedModel (line 579) | class CLIPPreTrainedModel(PreTrainedModel): method _init_weights (line 591) | def _init_weights(self, module): class CLIPEncoder (line 748) | class CLIPEncoder(nn.Module): method __init__ (line 757) | def __init__(self, config: CLIPConfig): method forward (line 763) | def forward( class CLIPTextTransformer (line 845) | class CLIPTextTransformer(nn.Module): method __init__ (line 846) | def __init__(self, config: CLIPTextConfig): method forward (line 862) | def forward( class CLIPTextModel (line 949) | class CLIPTextModel(CLIPPreTrainedModel): method __init__ (line 954) | def __init__(self, config: CLIPTextConfig): method get_input_embeddings (line 960) | def get_input_embeddings(self) -> nn.Module: method set_input_embeddings (line 963) | def set_input_embeddings(self, value): method forward (line 968) | def forward( class CLIPVisionTransformer (line 1006) | class CLIPVisionTransformer(nn.Module): method __init__ (line 1007) | def __init__(self, config: CLIPVisionConfig): method forward (line 1019) | def forward( class CLIPVisionModel (line 1068) | class CLIPVisionModel(CLIPPreTrainedModel): method __init__ (line 1073) | def __init__(self, config: CLIPVisionConfig): method get_input_embeddings (line 1079) | def get_input_embeddings(self) -> nn.Module: method forward (line 1084) | def forward( class CLIPModel (line 1124) | class CLIPModel(CLIPPreTrainedModel): method __init__ (line 1128) | def __init__(self, config: CLIPConfig): method set_processor (line 1163) | def set_processor(self, model_name): method get_text_features (line 1167) | def get_text_features( method get_image_features (line 1214) | def get_image_features( method encode_image (line 1263) | def encode_image(self, images): method encode_text (line 1268) | def encode_text(self, text): method encode_multimodal (line 1273) | def encode_multimodal(self, images, text): method data_process (line 1282) | def data_process(self, images=None, text=None): method encode (line 1309) | def encode(self, images=None, text=None): method forward (line 1321) | def forward( class CLIPTextModelWithProjection (line 1421) | class CLIPTextModelWithProjection(CLIPPreTrainedModel): method __init__ (line 1426) | def __init__(self, config: CLIPTextConfig): method get_input_embeddings (line 1437) | def get_input_embeddings(self) -> nn.Module: method set_input_embeddings (line 1440) | def set_input_embeddings(self, value): method forward (line 1445) | def forward( class CLIPVisionModelWithProjection (line 1503) | class CLIPVisionModelWithProjection(CLIPPreTrainedModel): method __init__ (line 1507) | def __init__(self, config: CLIPVisionConfig): method get_input_embeddings (line 1518) | def get_input_embeddings(self) -> nn.Module: method forward (line 1523) | def forward( class CLIPForImageClassification (line 1583) | class CLIPForImageClassification(CLIPPreTrainedModel): method __init__ (line 1586) | def __init__(self, config: CLIPConfig) -> None: method forward (line 1610) | def forward( FILE: research/C_MTEB/C_MTEB/tasks/Classification.py class TNews (line 7) | class TNews(AbsTaskClassification): method metadata_dict (line 76) | def metadata_dict(self) -> dict[str, str]: class IFlyTek (line 82) | class IFlyTek(AbsTaskClassification): method metadata_dict (line 152) | def metadata_dict(self) -> dict[str, str]: class MultilingualSentiment (line 159) | class MultilingualSentiment(AbsTaskClassification): method metadata_dict (line 186) | def metadata_dict(self) -> dict[str, str]: class JDReview (line 192) | class JDReview(AbsTaskClassification): method metadata_dict (line 224) | def metadata_dict(self) -> dict[str, str]: class OnlineShopping (line 230) | class OnlineShopping(AbsTaskClassification): method metadata_dict (line 262) | def metadata_dict(self) -> dict[str, str]: class Waimai (line 268) | class Waimai(AbsTaskClassification): method metadata_dict (line 300) | def metadata_dict(self) -> dict[str, str]: FILE: research/C_MTEB/C_MTEB/tasks/Clustering.py class CLSClusteringFastS2S (line 17) | class CLSClusteringFastS2S(AbsTaskClusteringFast): method dataset_transform (line 56) | def dataset_transform(self): class CLSClusteringFastP2P (line 77) | class CLSClusteringFastP2P(AbsTaskClusteringFast): method dataset_transform (line 116) | def dataset_transform(self): class CLSClusteringS2S (line 137) | class CLSClusteringS2S(AbsTaskClustering): class CLSClusteringP2P (line 173) | class CLSClusteringP2P(AbsTaskClustering): class ThuNewsClusteringFastS2S (line 207) | class ThuNewsClusteringFastS2S(AbsTaskClusteringFast): method dataset_transform (line 246) | def dataset_transform(self): class ThuNewsClusteringFastP2P (line 267) | class ThuNewsClusteringFastP2P(AbsTaskClusteringFast): method dataset_transform (line 306) | def dataset_transform(self): class ThuNewsClusteringS2S (line 327) | class ThuNewsClusteringS2S(AbsTaskClustering): class ThuNewsClusteringP2P (line 370) | class ThuNewsClusteringP2P(AbsTaskClustering): FILE: research/C_MTEB/C_MTEB/tasks/MultiLongDocRetrieval.py function load_mldr_data (line 11) | def load_mldr_data(path: str, langs: list, eval_splits: list, cache_dir:... class MultiLongDocRetrieval (line 31) | class MultiLongDocRetrieval(MultilingualTask, AbsTaskRetrieval): method description (line 33) | def description(self): method load_data (line 46) | def load_data(self, **kwargs): method evaluate (line 58) | def evaluate( FILE: research/C_MTEB/C_MTEB/tasks/PairClassification.py class Ocnli (line 7) | class Ocnli(AbsTaskPairClassification): method dataset_transform (line 40) | def dataset_transform(self): class Cmnli (line 45) | class Cmnli(AbsTaskPairClassification): method dataset_transform (line 113) | def dataset_transform(self): FILE: research/C_MTEB/C_MTEB/tasks/Reranking.py class T2Reranking (line 7) | class T2Reranking(AbsTaskReranking): class MMarcoReranking (line 42) | class MMarcoReranking(AbsTaskReranking): class CMedQAv1 (line 77) | class CMedQAv1(AbsTaskReranking): class CMedQAv2 (line 116) | class CMedQAv2(AbsTaskReranking): FILE: research/C_MTEB/C_MTEB/tasks/Retrieval.py function load_retrieval_data (line 11) | def load_retrieval_data(dataset_path, dataset_revision, qrel_revision, e... class T2Retrieval (line 28) | class T2Retrieval(AbsTaskRetrieval): method load_data (line 75) | def load_data(self, **kwargs): class MMarcoRetrieval (line 88) | class MMarcoRetrieval(AbsTaskRetrieval): method load_data (line 135) | def load_data(self, **kwargs): class DuRetrieval (line 148) | class DuRetrieval(AbsTaskRetrieval): method load_data (line 193) | def load_data(self, **kwargs): class CovidRetrieval (line 206) | class CovidRetrieval(AbsTaskRetrieval): method load_data (line 244) | def load_data(self, **kwargs): class CmedqaRetrieval (line 257) | class CmedqaRetrieval(AbsTaskRetrieval): method load_data (line 295) | def load_data(self, **kwargs): class EcomRetrieval (line 308) | class EcomRetrieval(AbsTaskRetrieval): method load_data (line 348) | def load_data(self, **kwargs): class MedicalRetrieval (line 361) | class MedicalRetrieval(AbsTaskRetrieval): method load_data (line 401) | def load_data(self, **kwargs): class VideoRetrieval (line 414) | class VideoRetrieval(AbsTaskRetrieval): method load_data (line 454) | def load_data(self, **kwargs): FILE: research/C_MTEB/C_MTEB/tasks/STS.py class ATEC (line 8) | class ATEC(AbsTaskSTS): method metadata_dict (line 54) | def metadata_dict(self) -> dict[str, str]: class BQ (line 61) | class BQ(AbsTaskSTS): method metadata_dict (line 96) | def metadata_dict(self) -> dict[str, str]: class LCQMC (line 103) | class LCQMC(AbsTaskSTS): method metadata_dict (line 138) | def metadata_dict(self) -> dict[str, str]: class PAWSX (line 145) | class PAWSX(AbsTaskSTS): method metadata_dict (line 180) | def metadata_dict(self) -> dict[str, str]: class STSB (line 187) | class STSB(AbsTaskSTS): method metadata_dict (line 222) | def metadata_dict(self) -> dict[str, str]: class AFQMC (line 229) | class AFQMC(AbsTaskSTS): method metadata_dict (line 275) | def metadata_dict(self) -> dict[str, str]: class QBQTC (line 282) | class QBQTC(AbsTaskSTS): method metadata_dict (line 309) | def metadata_dict(self) -> dict[str, str]: FILE: research/C_MTEB/MKQA/dense_retrieval/step0-generate_embedding.py class ModelArgs (line 28) | class ModelArgs: class EvalArgs (line 48) | class EvalArgs: function get_model (line 67) | def get_model(model_args: ModelArgs): function parse_corpus (line 77) | def parse_corpus(corpus: datasets.Dataset): function generate_index (line 89) | def generate_index(model: FlagModel, corpus: datasets.Dataset, max_passa... function save_result (line 100) | def save_result(index: faiss.Index, docid: list, index_save_dir: str): function main (line 109) | def main(): FILE: research/C_MTEB/MKQA/dense_retrieval/step1-search_results.py class ModelArgs (line 28) | class ModelArgs: class EvalArgs (line 52) | class EvalArgs: function get_query_encoder (line 88) | def get_query_encoder(model_args: ModelArgs): function check_languages (line 104) | def check_languages(languages): function get_queries_and_qids (line 114) | def get_queries_and_qids(qa_data_dir: str, lang: str, add_instruction: b... function save_result (line 131) | def save_result(search_results, result_save_path: str, qids: list, max_h... function main (line 142) | def main(): FILE: research/C_MTEB/MKQA/dense_retrieval/step2-eval_dense_mkqa.py class EvalArgs (line 58) | class EvalArgs: function check_languages (line 99) | def check_languages(languages): function compute_average (line 109) | def compute_average(results: dict): function save_results (line 121) | def save_results(model_name: str, pooling_method: str, normalize_embeddi... function get_corpus_dict (line 141) | def get_corpus_dict(): function get_qa_dict (line 152) | def get_qa_dict(qa_path: str): function get_search_result_dict (line 162) | def get_search_result_dict(search_result_path: str, top_k: int=100): function evaluate (line 181) | def evaluate(corpus_dict: dict, qa_dict: dict, search_result_path: str, ... function main (line 204) | def main(): FILE: research/C_MTEB/MKQA/hybrid_retrieval/step0-hybrid_search_results.py class EvalArgs (line 21) | class EvalArgs: function check_languages (line 61) | def check_languages(languages): function get_search_result_dict (line 71) | def get_search_result_dict(search_result_path: str, top_k: int=1000): function get_queries_dict (line 91) | def get_queries_dict(queries_path: str): function save_hybrid_results (line 100) | def save_hybrid_results(sparse_search_result_path: str, dense_search_res... function main (line 134) | def main(): FILE: research/C_MTEB/MKQA/hybrid_retrieval/step1-eval_hybrid_mkqa.py class EvalArgs (line 50) | class EvalArgs: function check_languages (line 91) | def check_languages(languages): function compute_average (line 101) | def compute_average(results: dict): function save_results (line 113) | def save_results(model_name: str, pooling_method: str, normalize_embeddi... function get_corpus_dict (line 133) | def get_corpus_dict(): function get_qa_dict (line 144) | def get_qa_dict(qa_path: str): function get_search_result_dict (line 154) | def get_search_result_dict(search_result_path: str, top_k: int=100): function evaluate (line 173) | def evaluate(corpus_dict: dict, qa_dict: dict, search_result_path: str, ... function main (line 196) | def main(): FILE: research/C_MTEB/MKQA/multi_vector_rerank/hybrid_all_results.py class EvalArgs (line 23) | class EvalArgs: function check_languages (line 75) | def check_languages(languages): function get_search_result_dict (line 85) | def get_search_result_dict(search_result_path: str, top_k: int=1000): function get_queries_dict (line 105) | def get_queries_dict(queries_path: str): function save_hybrid_results (line 114) | def save_hybrid_results(sparse_search_result_dict: dict, dense_search_re... function main (line 151) | def main(): FILE: research/C_MTEB/MKQA/multi_vector_rerank/step0-rerank_results.py class ModelArgs (line 33) | class ModelArgs: class EvalArgs (line 53) | class EvalArgs: function check_languages (line 113) | def check_languages(languages): function get_reranker (line 123) | def get_reranker(model_args: ModelArgs, device: str=None): function get_search_result_dict (line 133) | def get_search_result_dict(search_result_path: str, top_k: int=100): function get_queries_dict (line 152) | def get_queries_dict(queries_path: str): function get_corpus_dict (line 162) | def get_corpus_dict(corpus: datasets.Dataset): function save_rerank_results (line 172) | def save_rerank_results(queries_dict: dict, corpus_dict: dict, reranker:... function get_shard (line 211) | def get_shard(search_result_dict: dict, num_shards: int, shard_id: int): function rerank_results (line 225) | def rerank_results(corpus_dict: dict, languages: list, eval_args: EvalAr... function main (line 277) | def main(): FILE: research/C_MTEB/MKQA/multi_vector_rerank/step1-eval_rerank_mkqa.py class EvalArgs (line 48) | class EvalArgs: function check_languages (line 85) | def check_languages(languages): function compute_average (line 95) | def compute_average(results: dict): function save_results (line 107) | def save_results(model_name: str, reranker_name: str, results: dict, sav... function get_corpus_dict (line 126) | def get_corpus_dict(): function get_qa_dict (line 137) | def get_qa_dict(qa_path: str): function get_search_result_dict (line 147) | def get_search_result_dict(search_result_path: str, top_k: int=100): function evaluate (line 166) | def evaluate(corpus_dict: dict, qa_dict: dict, search_result_path: str, ... function main (line 189) | def main(): FILE: research/C_MTEB/MKQA/sparse_retrieval/bm25_baseline.py function generate_corpus (line 25) | def generate_corpus(corpus_save_path: str): function generate_queries (line 42) | def generate_queries(qa_data_dir: str, lang: str, queries_save_dir: str): function index (line 65) | def index(corpus_save_dir: str, index_save_dir: str): function search (line 77) | def search(index_save_dir: str, queries_save_dir: str, lang: str, result... function main (line 92) | def main(): FILE: research/C_MTEB/MKQA/sparse_retrieval/bm25_baseline_same_tokenizer.py function _map_func_corpus (line 32) | def _map_func_corpus(examples): function _map_func_query (line 51) | def _map_func_query(examples): function generate_corpus (line 70) | def generate_corpus(corpus_save_path: str): function generate_queries (line 90) | def generate_queries(qa_data_dir: str, lang: str, queries_save_dir: str): function index (line 115) | def index(corpus_save_dir: str, index_save_dir: str): function search (line 127) | def search(index_save_dir: str, queries_save_dir: str, lang: str, result... function main (line 141) | def main(): FILE: research/C_MTEB/MKQA/sparse_retrieval/step0-encode_query-and-corpus.py class ModelArgs (line 31) | class ModelArgs: class EvalArgs (line 51) | class EvalArgs: function get_model (line 82) | def get_model(model_args: ModelArgs): function check_languages (line 92) | def check_languages(languages): function parse_corpus (line 102) | def parse_corpus(corpus: datasets.Dataset): function get_queries (line 114) | def get_queries(qa_data_dir: str, lang: str): function encode_and_save_corpus (line 134) | def encode_and_save_corpus(corpus_save_path: str, model: BGEM3FlagModel,... function encode_and_save_queries (line 161) | def encode_and_save_queries(queries_save_path: str, model: BGEM3FlagMode... function main (line 191) | def main(): FILE: research/C_MTEB/MKQA/sparse_retrieval/step1-search_results.py class ModelArgs (line 20) | class ModelArgs: class EvalArgs (line 28) | class EvalArgs: function check_languages (line 64) | def check_languages(languages): function generate_index (line 74) | def generate_index(corpus_embd_dir: str, index_save_dir: str, threads: i... function search_and_save_results (line 87) | def search_and_save_results(index_save_dir: str, query_embd_path: str, r... function parse_corpus (line 101) | def parse_corpus(corpus: datasets.Dataset): function main (line 107) | def main(): FILE: research/C_MTEB/MKQA/sparse_retrieval/step2-eval_sparse_mkqa.py class EvalArgs (line 56) | class EvalArgs: function check_languages (line 97) | def check_languages(languages): function compute_average (line 107) | def compute_average(results: dict): function save_results (line 119) | def save_results(model_name: str, pooling_method: str, normalize_embeddi... function get_corpus_dict (line 139) | def get_corpus_dict(): function get_qa_dict (line 150) | def get_qa_dict(qa_path: str): function get_search_result_dict (line 160) | def get_search_result_dict(search_result_path: str, top_k: int=100): function evaluate (line 179) | def evaluate(corpus_dict: dict, qa_dict: dict, search_result_path: str, ... function main (line 202) | def main(): FILE: research/C_MTEB/MKQA/utils/evaluation.py class SimpleTokenizer (line 8) | class SimpleTokenizer: method __init__ (line 12) | def __init__(self): method tokenize (line 22) | def tokenize(self, text, uncased=False): function _normalize (line 31) | def _normalize(text): function has_answer (line 35) | def has_answer(answers, text, tokenizer) -> bool: function check_answer (line 49) | def check_answer(example, tokenizer) -> List[bool]: function evaluate_recall_qa (line 63) | def evaluate_recall_qa(ctxs, answers, k=100): FILE: research/C_MTEB/MKQA/utils/normalize_text.py function normalize (line 133) | def normalize(text): FILE: research/C_MTEB/MLDR/dense_retrieval/step0-generate_embedding.py class ModelArgs (line 23) | class ModelArgs: class EvalArgs (line 43) | class EvalArgs: function get_model (line 67) | def get_model(model_args: ModelArgs): function check_languages (line 77) | def check_languages(languages): function load_corpus (line 87) | def load_corpus(lang: str): function generate_index (line 95) | def generate_index(model: FlagModel, corpus: datasets.Dataset, max_passa... function save_result (line 106) | def save_result(index: faiss.Index, docid: list, index_save_dir: str): function main (line 115) | def main(): FILE: research/C_MTEB/MLDR/dense_retrieval/step1-search_results.py class ModelArgs (line 24) | class ModelArgs: class EvalArgs (line 48) | class EvalArgs: function get_query_encoder (line 76) | def get_query_encoder(model_args: ModelArgs): function check_languages (line 92) | def check_languages(languages): function get_queries_and_qids (line 102) | def get_queries_and_qids(lang: str, split: str='test', add_instruction: ... function save_result (line 115) | def save_result(search_results, result_save_path: str, qids: list, max_h... function main (line 126) | def main(): FILE: research/C_MTEB/MLDR/dense_retrieval/step2-eval_dense_mldr.py class EvalArgs (line 49) | class EvalArgs: function check_languages (line 86) | def check_languages(languages): function compute_average (line 96) | def compute_average(results: dict): function save_results (line 108) | def save_results(model_name: str, pooling_method: str, normalize_embeddi... function map_metric (line 128) | def map_metric(metric: str): function evaluate (line 138) | def evaluate(script_path, qrels_path, search_result_path, metrics: list): function main (line 164) | def main(): FILE: research/C_MTEB/MLDR/hybrid_retrieval/step0-hybrid_search_results.py class EvalArgs (line 19) | class EvalArgs: function check_languages (line 55) | def check_languages(languages): function get_search_result_dict (line 65) | def get_search_result_dict(search_result_path: str, top_k: int=1000): function save_hybrid_results (line 85) | def save_hybrid_results(sparse_search_result_dict: dict, dense_search_re... function main (line 116) | def main(): FILE: research/C_MTEB/MLDR/hybrid_retrieval/step1-eval_hybrid_mldr.py class EvalArgs (line 39) | class EvalArgs: function check_languages (line 76) | def check_languages(languages): function compute_average (line 86) | def compute_average(results: dict): function save_results (line 98) | def save_results(model_name: str, pooling_method: str, normalize_embeddi... function map_metric (line 118) | def map_metric(metric: str): function evaluate (line 128) | def evaluate(script_path, qrels_path, search_result_path, metrics: list): function main (line 154) | def main(): FILE: research/C_MTEB/MLDR/mteb_dense_eval/eval_MLDR.py class EvalArgs (line 26) | class EvalArgs: class ModelArgs (line 43) | class ModelArgs: function check_languages (line 86) | def check_languages(languages): function main (line 98) | def main(): FILE: research/C_MTEB/MLDR/mteb_dense_eval/flag_dres_model.py function _transform_func (line 13) | def _transform_func(examples: Dict[str, List], function _transform_func_v2 (line 24) | def _transform_func_v2(examples: Dict[str, List], class FlagDRESModel (line 39) | class FlagDRESModel(DRESModel): method __init__ (line 40) | def __init__( method encode_queries (line 81) | def encode_queries(self, queries: List[str], **kwargs) -> np.ndarray: method encode_corpus (line 98) | def encode_corpus(self, corpus: List[Union[Dict[str, str], str]], **kw... method encode (line 116) | def encode(self, sentences: List[str], max_length: int, batch_size: in... method pooling (line 161) | def pooling(self, FILE: research/C_MTEB/MLDR/multi_vector_rerank/hybrid_all_results.py class EvalArgs (line 21) | class EvalArgs: function check_languages (line 69) | def check_languages(languages): function get_search_result_dict (line 79) | def get_search_result_dict(search_result_path: str, top_k: int=1000): function save_hybrid_results (line 99) | def save_hybrid_results(sparse_search_result_dict: dict, dense_search_re... function main (line 136) | def main(): FILE: research/C_MTEB/MLDR/multi_vector_rerank/step0-rerank_results.py class ModelArgs (line 28) | class ModelArgs: class EvalArgs (line 48) | class EvalArgs: function check_languages (line 108) | def check_languages(languages): function get_reranker (line 118) | def get_reranker(model_args: ModelArgs, device: str=None): function get_search_result_dict (line 128) | def get_search_result_dict(search_result_path: str, top_k: int=200): function get_queries_dict (line 147) | def get_queries_dict(lang: str, split: str='test'): function get_corpus_dict (line 158) | def get_corpus_dict(lang: str): function save_rerank_results (line 169) | def save_rerank_results(queries_dict: dict, corpus_dict: dict, reranker:... function get_shard (line 207) | def get_shard(search_result_dict: dict, num_shards: int, shard_id: int): function rerank_results (line 221) | def rerank_results(languages: list, eval_args: EvalArgs, model_args: Mod... function main (line 278) | def main(): FILE: research/C_MTEB/MLDR/multi_vector_rerank/step1-eval_rerank_mldr.py class EvalArgs (line 41) | class EvalArgs: function check_languages (line 74) | def check_languages(languages): function compute_average (line 84) | def compute_average(results: dict): function save_results (line 96) | def save_results(model_name: str, reranker_name: str, results: dict, sav... function map_metric (line 115) | def map_metric(metric: str): function evaluate (line 125) | def evaluate(script_path: str, qrels_path, search_result_path, metrics: ... function merge_search_result (line 151) | def merge_search_result(search_result_save_dir: str, lang: str): function main (line 175) | def main(): FILE: research/C_MTEB/MLDR/sparse_retrieval/bm25_baseline.py function generate_corpus (line 19) | def generate_corpus(lang: str, corpus_save_dir: str): function generate_queries (line 32) | def generate_queries(lang: str, queries_save_dir: str, split: str='test'): function index (line 52) | def index(lang: str, corpus_save_dir: str, index_save_dir: str): function search (line 64) | def search(index_save_dir: str, queries_save_dir: str, lang: str, result... function main (line 79) | def main(): FILE: research/C_MTEB/MLDR/sparse_retrieval/bm25_baseline_same_tokenizer.py function _map_func_corpus (line 26) | def _map_func_corpus(examples): function _map_func_query (line 46) | def _map_func_query(examples): function generate_corpus (line 67) | def generate_corpus(lang: str, corpus_save_dir: str): function generate_queries (line 82) | def generate_queries(lang: str, queries_save_dir: str, split: str='test'): function index (line 104) | def index(corpus_save_dir: str, index_save_dir: str): function search (line 115) | def search(index_save_dir: str, queries_save_dir: str, lang: str, result... function main (line 129) | def main(): FILE: research/C_MTEB/MLDR/sparse_retrieval/step0-encode_query-and-corpus.py class ModelArgs (line 25) | class ModelArgs: class EvalArgs (line 45) | class EvalArgs: function get_model (line 77) | def get_model(model_args: ModelArgs): function check_languages (line 87) | def check_languages(languages): function load_corpus (line 97) | def load_corpus(lang: str): function get_queries (line 105) | def get_queries(lang: str, split: str='test'): function encode_corpus (line 119) | def encode_corpus(model: BGEM3FlagModel, corpus: datasets.Dataset, max_p... function encode_queries (line 143) | def encode_queries(model: BGEM3FlagModel, queries: datasets.Dataset, max... function save_result (line 170) | def save_result(encoded_queries_list: list, encoded_corpus_list: list, s... function main (line 185) | def main(): FILE: research/C_MTEB/MLDR/sparse_retrieval/step1-search_results.py class ModelArgs (line 16) | class ModelArgs: class EvalArgs (line 24) | class EvalArgs: function check_languages (line 56) | def check_languages(languages): function generate_index (line 66) | def generate_index(lang: str, corpus_embd_dir: str, index_save_dir: str,... function search_and_save_results (line 79) | def search_and_save_results(index_save_dir: str, query_embd_path: str, r... function main (line 93) | def main(): FILE: research/C_MTEB/MLDR/sparse_retrieval/step2-eval_sparse_mldr.py class EvalArgs (line 47) | class EvalArgs: function check_languages (line 84) | def check_languages(languages): function compute_average (line 94) | def compute_average(results: dict): function save_results (line 106) | def save_results(model_name: str, pooling_method: str, normalize_embeddi... function map_metric (line 130) | def map_metric(metric: str): function evaluate (line 140) | def evaluate(script_path, qrels_path, search_result_path, metrics: list): function main (line 166) | def main(): FILE: research/C_MTEB/eval_C-MTEB.py function get_args (line 18) | def get_args(): FILE: research/C_MTEB/eval_MTEB.py function get_args (line 16) | def get_args(): FILE: research/C_MTEB/eval_cross_encoder.py function get_args (line 9) | def get_args(): FILE: research/C_MTEB/flag_dres_model.py class FlagDRESModel (line 9) | class FlagDRESModel: method __init__ (line 10) | def __init__( method encode_queries (line 40) | def encode_queries(self, queries: List[str], **kwargs) -> np.ndarray: method encode_corpus (line 52) | def encode_corpus(self, corpus: List[Union[Dict[str, str], str]], **kw... method encode (line 65) | def encode(self, sentences: List[str], **kwargs) -> np.ndarray: method pooling (line 87) | def pooling(self, FILE: research/C_MTEB/summarize_results.py function read_results (line 21) | def read_results(task_types, args): function output_markdown (line 47) | def output_markdown(tasks_results, model, save_file): function get_args (line 119) | def get_args(): FILE: research/LLARA/finetune/arguments.py function default_list (line 8) | def default_list() -> List[int]: class ModelArguments (line 13) | class ModelArguments: class DataArguments (line 83) | class DataArguments: method __post_init__ (line 153) | def __post_init__(self): class RetrieverTrainingArguments (line 158) | class RetrieverTrainingArguments(TrainingArguments): FILE: research/LLARA/finetune/data.py class TrainDatasetForEmbedding (line 19) | class TrainDatasetForEmbedding(Dataset): method __init__ (line 20) | def __init__( method __len__ (line 52) | def __len__(self): method __getitem__ (line 56) | def __getitem__(self, item) -> Tuple[BatchEncoding, List[BatchEncoding]]: class EmbedCollator (line 96) | class EmbedCollator(DataCollatorForSeq2Seq): method __call__ (line 106) | def __call__(self, features, return_tensors='pt'): FILE: research/LLARA/finetune/load_model.py function get_model (line 8) | def get_model(model_args): FILE: research/LLARA/finetune/modeling.py class EncoderOutput (line 17) | class EncoderOutput(ModelOutput): class BiEncoderModel (line 24) | class BiEncoderModel(nn.Module): method __init__ (line 27) | def __init__(self, method gradient_checkpointing_enable (line 59) | def gradient_checkpointing_enable(self, **kwargs): method enable_input_require_grads (line 62) | def enable_input_require_grads(self, **kwargs): method encode (line 65) | def encode(self, features): method compute_similarity (line 107) | def compute_similarity(self, q_reps, p_reps): method forward (line 112) | def forward(self, query: Union[Dict[str, Tensor], List[Dict[str, Tenso... method compute_loss (line 144) | def compute_loss(self, scores, target): method _dist_gather_tensor (line 147) | def _dist_gather_tensor(self, t: Optional[torch.Tensor]): method save (line 159) | def save(self, output_dir: str): FILE: research/LLARA/finetune/run.py function main (line 21) | def main(): FILE: research/LLARA/finetune/trainer.py class BiTrainer (line 4) | class BiTrainer(Trainer): method _save (line 5) | def _save(self, output_dir: Optional[str] = None, state_dict=None): method compute_loss (line 28) | def compute_loss(self, model, inputs, return_outputs=False): FILE: research/LLARA/pretrain/arguments.py function default_list (line 8) | def default_list() -> List[int]: class ModelArguments (line 13) | class ModelArguments: class DataArguments (line 70) | class DataArguments: method __post_init__ (line 95) | def __post_init__(self): class PretrainTrainingArguments (line 100) | class PretrainTrainingArguments(TrainingArguments): FILE: research/LLARA/pretrain/data.py class TrainDatasetForEmbedding (line 16) | class TrainDatasetForEmbedding(Dataset): method __init__ (line 17) | def __init__( method __len__ (line 52) | def __len__(self): method __getitem__ (line 55) | def __getitem__(self, item): class EmbedCollator (line 90) | class EmbedCollator(DataCollatorForSeq2Seq): method __call__ (line 98) | def __call__(self, features, return_tensors='pt'): FILE: research/LLARA/pretrain/load_model.py function get_model (line 6) | def get_model(model_args, use_gradient_checkpointing: bool = False): FILE: research/LLARA/pretrain/modeling.py class NewLlamaModel (line 20) | class NewLlamaModel(LlamaModel): method forward (line 23) | def forward( method _update_causal_mask (line 156) | def _update_causal_mask( class PreLlamaModel (line 236) | class PreLlamaModel(LlamaForCausalLM): method __init__ (line 237) | def __init__(self, config): method forward (line 266) | def forward( class PreModel (line 419) | class PreModel(nn.Module): method __init__ (line 420) | def __init__(self, method gradient_checkpointing_enable (line 426) | def gradient_checkpointing_enable(self, **kwargs): method enable_input_require_grads (line 429) | def enable_input_require_grads(self, **kwargs): method forward (line 432) | def forward(self, *args, **kwargs): method save (line 435) | def save(self, output_dir: str): FILE: research/LLARA/pretrain/run.py function main (line 21) | def main(): FILE: research/LLARA/pretrain/trainer.py class PreTrainer (line 3) | class PreTrainer(Trainer): method _save (line 4) | def _save(self, output_dir: Optional[str] = None, state_dict=None): method compute_loss (line 21) | def compute_loss(self, model, inputs, return_outputs=False): FILE: research/LM_Cocktail/LM_Cocktail/cocktail.py function save_ckpt_for_sentence_transformers (line 17) | def save_ckpt_for_sentence_transformers(ckpt_dir, pooling_mode: str = 'c... function mix_models (line 30) | def mix_models(model_names_or_paths: List[str], function mix_models_with_data (line 72) | def mix_models_with_data(model_names_or_paths: List[str], function mix_models_by_layers (line 125) | def mix_models_by_layers(model_names_or_paths: List[str], FILE: research/LM_Cocktail/LM_Cocktail/utils.py function load_llm (line 14) | def load_llm(model_name:str, trust_remote_code:bool): function load_embedder (line 19) | def load_embedder(model_name:str, trust_remote_code:bool): function load_reranker (line 24) | def load_reranker(model_name:str, trust_remote_code:bool): function load_seq2seq_model (line 29) | def load_seq2seq_model(model_name:str, trust_remote_code:bool): function load_model (line 34) | def load_model(model_name:str, model_type:str, trust_remote_code:bool=Tr... function get_model_param_list (line 48) | def get_model_param_list(model_names: List[str], model_type:str): function merge_param (line 57) | def merge_param(model_param_list: List[Dict], weights: List[float]): function get_model_param_dirs (line 70) | def get_model_param_dirs(model_names: List[str], model_type:str): function merge_param_by_layer (line 95) | def merge_param_by_layer(model_param_dirs: List[str], weights: List[floa... function compute_weights (line 127) | def compute_weights(base_model, tokenizer, param_list: List[Dict], model... function preprocess_data_for_seq2seq (line 158) | def preprocess_data_for_seq2seq(example_data, tokenizer, device, batch_s... function preprocess_data_for_embedder (line 181) | def preprocess_data_for_embedder(example_data, tokenizer, device, batch_... function seq2seq_loss (line 207) | def seq2seq_loss(base_model, input_data): function embedder_loss (line 219) | def embedder_loss(base_model, input_data): function preprocess_data_for_llm (line 239) | def preprocess_data_for_llm(example_data, tokenizer, device, batch_size:... function llm_loss (line 275) | def llm_loss(base_model, input_data): FILE: research/Long_LLM/activation_beacon/main/eval_generation.py class Args (line 18) | class Args(ModelArgs): function main (line 55) | def main(): FILE: research/Long_LLM/activation_beacon/main/eval_infbench.py class Args (line 23) | class Args(ModelArgs): function process_infbench (line 62) | def process_infbench(data, indices, tokenizer, chat_template, task:str, ... function main (line 97) | def main(): FILE: research/Long_LLM/activation_beacon/main/eval_lm.py class Args (line 16) | class Args(ModelArgs): function process_lm_pre (line 50) | def process_lm_pre(tokenizer, tokenize_max_char=None): function process_lm (line 62) | def process_lm(tokenizer, max_length=4096, stride=1024, min_length=None): function main (line 124) | def main(): FILE: research/Long_LLM/activation_beacon/main/eval_longbench.py class Args (line 22) | class Args(ModelArgs): function process_longbench (line 61) | def process_longbench(data, indices, tokenizer, chat_template, task, max... function main (line 96) | def main(): FILE: research/Long_LLM/activation_beacon/main/eval_mmlu.py class Args (line 22) | class Args(ModelArgs): function remove_eos (line 51) | def remove_eos(inputs: Mapping, eos_token_ids: Union[List,int]): function process_mmlu (line 64) | def process_mmlu(tokenizer, chat_template, eos_token_id, few_shot=0, tra... function evaluate_mmlu (line 138) | def evaluate_mmlu(eval_data, save_path, eval_preds): function main (line 182) | def main(): FILE: research/Long_LLM/activation_beacon/main/eval_msc.py class Args (line 23) | class Args(ModelArgs): function process_msc (line 48) | def process_msc(data, tokenizer, max_length, chat_template): function main (line 66) | def main(): FILE: research/Long_LLM/activation_beacon/main/eval_multiturn.py class Args (line 21) | class Args(ModelArgs): function process_multiturn (line 50) | def process_multiturn(data, indices, tokenizer, chat_template, min_lengt... function main (line 114) | def main(): FILE: research/Long_LLM/activation_beacon/main/eval_needle.py class Args (line 28) | class Args(ModelArgs): method __post_init__ (line 98) | def __post_init__(self): class OpenAIEvaluator (line 103) | class OpenAIEvaluator: method __init__ (line 113) | def __init__(self, method evaluate_response (line 147) | def evaluate_response(self, response: str) -> int: function generate_sample (line 170) | def generate_sample( function main (line 212) | def main(): FILE: research/Long_LLM/activation_beacon/main/eval_passkey.py class Args (line 28) | class Args(ModelArgs): function generate_sample (line 85) | def generate_sample(tokenizer, chat_template, context_length, passkey_de... function main (line 126) | def main(): FILE: research/Long_LLM/activation_beacon/main/eval_topic.py class Args (line 25) | class Args(ModelArgs): function process_topic_retrieval (line 54) | def process_topic_retrieval(data, tokenizer, chat_template, num_topic, t... function main (line 93) | def main(): FILE: research/Long_LLM/activation_beacon/main/infbench_utils.py function normalize_answer (line 12) | def normalize_answer(s: str) -> str: function normalize_zh_answer (line 31) | def normalize_zh_answer(s: str) -> str: function f1_score (line 48) | def f1_score(prediction, ground_truth) -> tuple[float, float, float]: function qa_f1_score (line 59) | def qa_f1_score(pred: str, ground_truths) -> float: function qa_f1_score_zh (line 78) | def qa_f1_score_zh(pred: str, ground_truths: list[str]) -> float: function load_json (line 100) | def load_json(fname): function iter_jsonl (line 104) | def iter_jsonl(fname, cnt=None): function first_int_match (line 117) | def first_int_match(prediction): function split_retrieval_answer (line 127) | def split_retrieval_answer(pred: str): function get_score_one_kv_retrieval (line 134) | def get_score_one_kv_retrieval(pred, label, model_name: str) -> bool: function get_score_one_passkey (line 141) | def get_score_one_passkey(pred, label, model_name: str) -> bool: function get_score_one_number_string (line 147) | def get_score_one_number_string(pred, label, model_name: str) -> bool: function get_score_one_code_run (line 153) | def get_score_one_code_run(pred, label, model_name: str) -> bool: function get_score_one_code_debug (line 172) | def get_score_one_code_debug(pred, label, model_name: str) -> bool: function get_score_one_math_find (line 208) | def get_score_one_math_find(pred, label, model_name: str) -> bool: function get_score_one_longdialogue_qa_eng (line 230) | def get_score_one_longdialogue_qa_eng(pred, label, model_name: str) -> b... function get_score_one_longbook_choice_eng (line 239) | def get_score_one_longbook_choice_eng(pred, label, model_name: str) -> b... function get_score_one_longbook_qa_eng (line 280) | def get_score_one_longbook_qa_eng(pred, label, model_name: str) -> float: function get_score_one_longbook_sum_eng (line 284) | def get_score_one_longbook_sum_eng( function get_score_one_longbook_qa_chn (line 297) | def get_score_one_longbook_qa_chn(pred, label, model_name: str) -> float: function get_score_one_math_calc (line 301) | def get_score_one_math_calc(pred, label, model_name: str) -> float: function get_score_one (line 326) | def get_score_one( function get_labels (line 359) | def get_labels(preds: list) -> list[str]: function get_preds (line 367) | def get_preds(preds: list, data_name: str) -> list[str]: function get_score (line 382) | def get_score( function compute_scores (line 396) | def compute_scores(preds_path, data_name: str, model_name: str): function create_prompt (line 406) | def create_prompt(eg: dict, data_name: str, prompt_template: str) -> str: function get_answer (line 516) | def get_answer(eg: dict, data_name: str): FILE: research/Long_LLM/activation_beacon/main/longbench_utils.py function normalize_answer (line 12) | def normalize_answer(s): function normalize_zh_answer (line 31) | def normalize_zh_answer(s): function count_score (line 47) | def count_score(prediction, ground_truth, **kwargs): function retrieval_score (line 56) | def retrieval_score(prediction, ground_truth, **kwargs): function retrieval_zh_score (line 68) | def retrieval_zh_score(prediction, ground_truth, **kwargs): function code_sim_score (line 80) | def code_sim_score(prediction, ground_truth, **kwargs): function classification_score (line 89) | def classification_score(prediction, ground_truth, **kwargs): function rouge_score (line 114) | def rouge_score(prediction, ground_truth, **kwargs): function rouge_score_zh (line 122) | def rouge_score_zh(prediction, ground_truth, **kwargs): function f1_score (line 128) | def f1_score(prediction, ground_truth, **kwargs): function qa_f1_score (line 138) | def qa_f1_score(prediction, ground_truth, **kwargs): function qa_f1_score_zh (line 147) | def qa_f1_score_zh(prediction, ground_truth, **kwargs): function scorer (line 156) | def scorer(dataset, predictions, answers, all_classes): FILE: research/Long_LLM/activation_beacon/main/pretrain_data.py class Args (line 22) | class Args(ModelArgs): function prepare_pretrain_data (line 54) | def prepare_pretrain_data(data_files, tokenizer: PreTrainedTokenizer, co... FILE: research/Long_LLM/activation_beacon/main/train.py function main (line 20) | def main(): FILE: research/Long_LLM/activation_beacon/src/__init__.py function get_model_and_tokenizer (line 15) | def get_model_and_tokenizer(model_args, device="cpu", evaluation_mode=Tr... FILE: research/Long_LLM/activation_beacon/src/args.py class ModelArgs (line 9) | class ModelArgs: method resolve_path (line 204) | def resolve_path(self, path): method get_generation_config (line 218) | def get_generation_config(self): method to_dict (line 230) | def to_dict(self): method save (line 233) | def save(self, path): method __post_init__ (line 237) | def __post_init__(self): class TrainingArgs (line 259) | class TrainingArgs(TrainingArguments): method __post_init__ (line 374) | def __post_init__(self): FILE: research/Long_LLM/activation_beacon/src/chat.py class ChatTemplateOutput (line 17) | class ChatTemplateOutput: function mask_nested_lists (line 22) | def mask_nested_lists(lst, mask_target, mask_value=0): function apply_chat_template (line 31) | def apply_chat_template(template, messages, system_message=None, tokeniz... class SeparatorStyle (line 223) | class SeparatorStyle(IntEnum): class Conversation (line 255) | class Conversation: method get_prompt (line 280) | def get_prompt(self) -> str: method get_images (line 529) | def get_images(self): method set_system_message (line 539) | def set_system_message(self, system_message: str): method get_system_message (line 543) | def get_system_message(self): method append_message (line 547) | def append_message(self, role: str, message: str): method update_last_message (line 551) | def update_last_message(self, message: str): method convert_image_to_base64 (line 559) | def convert_image_to_base64(self, image): method to_gradio_chatbot (line 594) | def to_gradio_chatbot(self): method to_openai_api_messages (line 610) | def to_openai_api_messages(self): method extract_text_from_messages (line 625) | def extract_text_from_messages(self): method copy (line 631) | def copy(self): method dict (line 646) | def dict(self): function register_conv_template (line 660) | def register_conv_template(template: Conversation, override: bool = False): function get_conv_template (line 670) | def get_conv_template(name: str) -> Conversation: FILE: research/Long_LLM/activation_beacon/src/data.py class Data (line 18) | class Data: method _process_pretrain_data (line 19) | def _process_pretrain_data(data, indices): method _process_language_modeling (line 28) | def _process_language_modeling(data, indices, tokenizer, min_length, m... method _process_instruction_tuning (line 54) | def _process_instruction_tuning(data, indices, tokenizer, chat_templat... method prepare_train_data (line 102) | def prepare_train_data(data_files=None, tokenizer=None, max_length=409... method prepare_eval_data (line 174) | def prepare_eval_data(data_files=None, tokenizer=None, max_length=4096... FILE: research/Long_LLM/activation_beacon/src/llama/configuration_llama.py class LlamaConfig (line 31) | class LlamaConfig(PretrainedConfig): method __init__ (line 117) | def __init__( method _rope_scaling_validation (line 196) | def _rope_scaling_validation(self): FILE: research/Long_LLM/activation_beacon/src/llama/modeling_llama.py function _get_unpad_data (line 63) | def _get_unpad_data(attention_mask): class LlamaRMSNorm (line 76) | class LlamaRMSNorm(nn.Module): method __init__ (line 77) | def __init__(self, hidden_size, eps=1e-6): method forward (line 85) | def forward(self, hidden_states): class LlamaMLP (line 94) | class LlamaMLP(nn.Module): method __init__ (line 95) | def __init__(self, config): method forward (line 105) | def forward(self, x): function repeat_kv (line 110) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: class LlamaAttention (line 122) | class LlamaAttention(nn.Module): method __init__ (line 125) | def __init__(self, config: LlamaConfig, layer_idx: Optional[int] = None): method _init_beacon_proj (line 179) | def _init_beacon_proj(self, missing_keys): method _shape (line 253) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): method qkv_proj_with_beacon (line 329) | def qkv_proj_with_beacon(self, hidden_states, beacon_size, beacon_indi... method o_proj_with_beacon (line 375) | def o_proj_with_beacon(self, attn_output, beacon_size, beacon_indices): method forward (line 390) | def forward( class LlamaSdpaAttention (line 473) | class LlamaSdpaAttention(LlamaAttention): method forward (line 481) | def forward( class LlamaFlashAttention2 (line 563) | class LlamaFlashAttention2(LlamaAttention): method __init__ (line 570) | def __init__(self, *args, **kwargs): method forward (line 578) | def forward( method _flash_attention_forward (line 667) | def _flash_attention_forward( method _upad_input (line 726) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m... class LlamaDecoderLayer (line 772) | class LlamaDecoderLayer(nn.Module): method __init__ (line 773) | def __init__(self, config: LlamaConfig, layer_idx: int): method forward (line 783) | def forward( class LlamaPreTrainedModel (line 865) | class LlamaPreTrainedModel(PreTrainedModel): method _init_weights (line 875) | def _init_weights(self, module): class LlamaModel (line 961) | class LlamaModel(LlamaPreTrainedModel): method __init__ (line 969) | def __init__(self, config: LlamaConfig): method _init_beacon_embed (line 990) | def _init_beacon_embed(self, missing_keys): method get_input_embeddings (line 1021) | def get_input_embeddings(self): method set_input_embeddings (line 1024) | def set_input_embeddings(self, value): method forward (line 1028) | def forward( class LlamaForCausalLM (line 1148) | class LlamaForCausalLM(LlamaPreTrainedModel): method __init__ (line 1151) | def __init__(self, config): method get_input_embeddings (line 1159) | def get_input_embeddings(self): method set_input_embeddings (line 1162) | def set_input_embeddings(self, value): method get_output_embeddings (line 1165) | def get_output_embeddings(self): method set_output_embeddings (line 1168) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 1171) | def set_decoder(self, decoder): method get_decoder (line 1174) | def get_decoder(self): method from_pretrained (line 1178) | def from_pretrained(cls, *args, **kwargs): method _native_forward (line 1201) | def _native_forward( method _beacon_forward (line 1263) | def _beacon_forward(self, method forward (line 1335) | def forward(self, **kwargs): method prepare_inputs_for_generation (line 1346) | def prepare_inputs_for_generation( method _reorder_cache (line 1377) | def _reorder_cache(past_key_values, beam_idx): FILE: research/Long_LLM/activation_beacon/src/metrics.py class Metric (line 14) | class Metric: method get_metric_fn (line 17) | def get_metric_fn(cls, metrics, **kwds): method get_save_path (line 40) | def get_save_path(eval_data, output_dir=None, field="result", save_nam... method save_result (line 57) | def save_result(preds, labels, save_path, indices=None, **kwargs): method rouge (line 73) | def rouge(preds, labels, **kwargs): FILE: research/Long_LLM/activation_beacon/src/mistral/configuration_mistral.py class MistralConfig (line 29) | class MistralConfig(PretrainedConfig): method __init__ (line 104) | def __init__( method _rope_scaling_validation (line 181) | def _rope_scaling_validation(self): FILE: research/Long_LLM/activation_beacon/src/mistral/modeling_mistral.py function _get_unpad_data (line 63) | def _get_unpad_data(attention_mask): class MistralRMSNorm (line 76) | class MistralRMSNorm(nn.Module): method __init__ (line 77) | def __init__(self, hidden_size, eps=1e-6): method forward (line 85) | def forward(self, hidden_states): class MistralMLP (line 94) | class MistralMLP(nn.Module): method __init__ (line 95) | def __init__(self, config): method forward (line 105) | def forward(self, x): function repeat_kv (line 110) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: class MistralAttention (line 122) | class MistralAttention(nn.Module): method __init__ (line 125) | def __init__(self, config: MistralConfig, layer_idx: Optional[int] = N... method _init_beacon_proj (line 178) | def _init_beacon_proj(self, missing_keys): method _shape (line 252) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): method qkv_proj_with_beacon (line 255) | def qkv_proj_with_beacon(self, hidden_states, beacon_size, beacon_indi... method o_proj_with_beacon (line 301) | def o_proj_with_beacon(self, attn_output, beacon_size, beacon_indices): method forward (line 316) | def forward( class MistralSdpaAttention (line 399) | class MistralSdpaAttention(MistralAttention): method forward (line 407) | def forward( class MistralFlashAttention2 (line 489) | class MistralFlashAttention2(MistralAttention): method __init__ (line 496) | def __init__(self, *args, **kwargs): method forward (line 504) | def forward( method _flash_attention_forward (line 596) | def _flash_attention_forward( method _upad_input (line 655) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m... class MistralDecoderLayer (line 701) | class MistralDecoderLayer(nn.Module): method __init__ (line 702) | def __init__(self, config: MistralConfig, layer_idx: int): method forward (line 717) | def forward( class MistralPreTrainedModel (line 798) | class MistralPreTrainedModel(PreTrainedModel): method _init_weights (line 808) | def _init_weights(self, module): class MistralModel (line 894) | class MistralModel(MistralPreTrainedModel): method __init__ (line 902) | def __init__(self, config: MistralConfig): method _init_beacon_embed (line 923) | def _init_beacon_embed(self, missing_keys): method get_input_embeddings (line 954) | def get_input_embeddings(self): method set_input_embeddings (line 957) | def set_input_embeddings(self, value): method forward (line 961) | def forward( class MistralForCausalLM (line 1081) | class MistralForCausalLM(MistralPreTrainedModel): method __init__ (line 1084) | def __init__(self, config): method get_input_embeddings (line 1092) | def get_input_embeddings(self): method set_input_embeddings (line 1095) | def set_input_embeddings(self, value): method get_output_embeddings (line 1098) | def get_output_embeddings(self): method set_output_embeddings (line 1101) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 1104) | def set_decoder(self, decoder): method get_decoder (line 1107) | def get_decoder(self): method from_pretrained (line 1111) | def from_pretrained(cls, *args, **kwargs): method _native_forward (line 1134) | def _native_forward( method _beacon_forward (line 1196) | def _beacon_forward(self, method forward (line 1268) | def forward(self, **kwargs): method prepare_inputs_for_generation (line 1279) | def prepare_inputs_for_generation( method _reorder_cache (line 1310) | def _reorder_cache(past_key_values, beam_idx): FILE: research/Long_LLM/activation_beacon/src/modeling_beacon.py class Memory (line 14) | class Memory(torch.nn.Module): method __init__ (line 15) | def __init__( method _post_validation (line 36) | def _post_validation(self, verbose=True): method set (line 54) | def set(self, verbose=True, **kwargs): method reset (line 62) | def reset(self): method all_sequence_length (line 105) | def all_sequence_length(self): method batch_size (line 112) | def batch_size(self): method finish (line 119) | def finish(self): method dtype (line 124) | def dtype(self): method min_value (line 128) | def min_value(self): method max_position_embeddings (line 132) | def max_position_embeddings(self): method get_memory_size (line 139) | def get_memory_size(self): method prepare (line 154) | def prepare(self, input_ids, attention_mask, labels, skip_first=None, ... method set_compression_ratio (line 200) | def set_compression_ratio(self, start_idx, end_idx): method step (line 273) | def step(self): method _step (line 394) | def _step(self, ignore_memory=False): method update_memory (line 659) | def update_memory(self, past_key_values): method update_loss (line 716) | def update_loss(self, batch_loss, valid_token_num): method output (line 729) | def output(self, model_outputs): method _make_4d_attention_mask_and_position_ids (line 757) | def _make_4d_attention_mask_and_position_ids( method _extract_beacon_and_raw_memory (line 890) | def _extract_beacon_and_raw_memory( function slice_tensor (line 930) | def slice_tensor(x, start=None, end=None, step=None, index=None, dim=2): function cat_tensor (line 978) | def cat_tensor(list_of_tensors, dim=-1): function slice_activations (line 988) | def slice_activations(activations, start=None, end=None, k_seq_dim=2, v_... function cat_activations (line 996) | def cat_activations(list_of_activations, k_seq_dim=2, v_seq_dim=2): function interleave_activations (line 1009) | def interleave_activations(main_activations, augment_activations, main_s... function softmax (line 1068) | def softmax(x:np.ndarray, axis=-1, temperature=1): function l1_norm (line 1076) | def l1_norm(x): FILE: research/Long_LLM/activation_beacon/src/modeling_utils.py function optional_grad_ctx (line 12) | def optional_grad_ctx(with_grad=False): function move_to_device (line 18) | def move_to_device(data, device): function get_shifted_labels (line 32) | def get_shifted_labels(input_ids): function compute_loss (line 47) | def compute_loss(logits, labels, shift=False): function evaluate_perplexity (line 84) | def evaluate_perplexity(model, dataloader, accelerator:Optional[Accelera... function evaluate_generation (line 141) | def evaluate_generation(model, dataloader, accelerator:Optional[Accelera... function evaluate_nll (line 190) | def evaluate_nll(model, dataloader, accelerator:Optional[Accelerator]=No... class ModelOutput (line 236) | class ModelOutput(BaseModelOutputWithPast): function get_rope (line 249) | def get_rope(head_dim, base, max_position_embeddings, rope_scaling=None): function rotate_half (line 313) | def rotate_half(x): class RotaryEmbedding (line 320) | class RotaryEmbedding(torch.nn.Module): method __init__ (line 321) | def __init__(self, dim, max_position_embeddings=32768, base=10000, dev... method _set_cos_sin_cache (line 335) | def _set_cos_sin_cache(self, seq_len, device, dtype): method forward (line 344) | def forward(self, q, k, position_ids): class LinearScalingRotaryEmbedding (line 363) | class LinearScalingRotaryEmbedding(RotaryEmbedding): method __init__ (line 366) | def __init__(self, dim, max_position_embeddings=32768, base=10000, dev... method _set_cos_sin_cache (line 370) | def _set_cos_sin_cache(self, seq_len, device, dtype): class DynamicNTKScalingRotaryEmbedding (line 382) | class DynamicNTKScalingRotaryEmbedding(RotaryEmbedding): method __init__ (line 385) | def __init__(self, dim, max_position_embeddings=32768, base=10000, dev... method _set_cos_sin_cache (line 389) | def _set_cos_sin_cache(self, seq_len, device, dtype): class YarnRotaryEmbedding (line 408) | class YarnRotaryEmbedding(torch.nn.Module): method __init__ (line 409) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi... method _get_factor (line 423) | def _get_factor(self): method _get_temperature (line 442) | def _get_temperature(self): method _set_cos_sin_cache (line 447) | def _set_cos_sin_cache(self, seq_len, device, dtype): method forward (line 469) | def forward(self, q, k, position_ids): class YarnDynamicTemperatureRotaryEmbedding (line 488) | class YarnDynamicTemperatureRotaryEmbedding(torch.nn.Module): method __init__ (line 489) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi... method _get_factor (line 503) | def _get_factor(self): method _set_cos_sin_cache (line 522) | def _set_cos_sin_cache(self, seq_len, device, dtype): method forward (line 547) | def forward(self, q, k, position_ids): class YarnDynamicTemperatureLogNRotaryEmbedding (line 572) | class YarnDynamicTemperatureLogNRotaryEmbedding(torch.nn.Module): method __init__ (line 573) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi... method _get_factor (line 587) | def _get_factor(self): method _set_cos_sin_cache (line 606) | def _set_cos_sin_cache(self, seq_len, device, dtype): method forward (line 631) | def forward(self, q, k, position_ids): class Llama3RotaryEmbedding (line 656) | class Llama3RotaryEmbedding(torch.nn.Module): method __init__ (line 657) | def __init__(self, dim, max_position_embeddings=8192, base=10000, devi... method _set_cos_sin_cache (line 687) | def _set_cos_sin_cache(self, seq_len, device): method forward (line 696) | def forward(self, q, k, position_ids): FILE: research/Long_LLM/activation_beacon/src/qwen2/configuration_qwen2.py class Qwen2Config (line 28) | class Qwen2Config(PretrainedConfig): method __init__ (line 98) | def __init__( FILE: research/Long_LLM/activation_beacon/src/qwen2/modeling_qwen2.py function _get_unpad_data (line 71) | def _get_unpad_data(attention_mask): class Qwen2RMSNorm (line 84) | class Qwen2RMSNorm(nn.Module): method __init__ (line 85) | def __init__(self, hidden_size, eps=1e-6): method forward (line 93) | def forward(self, hidden_states): class Qwen2MLP (line 102) | class Qwen2MLP(nn.Module): method __init__ (line 103) | def __init__(self, config): method forward (line 113) | def forward(self, x): function repeat_kv (line 119) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: class Qwen2Attention (line 131) | class Qwen2Attention(nn.Module): method __init__ (line 134) | def __init__(self, config: Qwen2Config, layer_idx: Optional[int] = None): method _init_beacon_proj (line 187) | def _init_beacon_proj(self, missing_keys): method _shape (line 261) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): method qkv_proj_with_beacon (line 264) | def qkv_proj_with_beacon(self, hidden_states, beacon_size, beacon_indi... method o_proj_with_beacon (line 310) | def o_proj_with_beacon(self, attn_output, beacon_size, beacon_indices): method forward (line 325) | def forward( class Qwen2SdpaAttention (line 408) | class Qwen2SdpaAttention(Qwen2Attention): method forward (line 416) | def forward( class Qwen2FlashAttention2 (line 498) | class Qwen2FlashAttention2(Qwen2Attention): method __init__ (line 505) | def __init__(self, *args, **kwargs): method forward (line 513) | def forward( method _flash_attention_forward (line 606) | def _flash_attention_forward( method _upad_input (line 665) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m... class Qwen2DecoderLayer (line 711) | class Qwen2DecoderLayer(nn.Module): method __init__ (line 712) | def __init__(self, config: Qwen2Config, layer_idx: int): method forward (line 727) | def forward( class Qwen2PreTrainedModel (line 808) | class Qwen2PreTrainedModel(PreTrainedModel): method _init_weights (line 818) | def _init_weights(self, module): class Qwen2Model (line 904) | class Qwen2Model(Qwen2PreTrainedModel): method __init__ (line 912) | def __init__(self, config: Qwen2Config): method _init_beacon_embed (line 933) | def _init_beacon_embed(self, missing_keys): method get_input_embeddings (line 964) | def get_input_embeddings(self): method set_input_embeddings (line 967) | def set_input_embeddings(self, value): method forward (line 971) | def forward( class Qwen2ForCausalLM (line 1091) | class Qwen2ForCausalLM(Qwen2PreTrainedModel): method __init__ (line 1094) | def __init__(self, config): method get_input_embeddings (line 1102) | def get_input_embeddings(self): method set_input_embeddings (line 1105) | def set_input_embeddings(self, value): method get_output_embeddings (line 1108) | def get_output_embeddings(self): method set_output_embeddings (line 1111) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 1114) | def set_decoder(self, decoder): method get_decoder (line 1117) | def get_decoder(self): method from_pretrained (line 1121) | def from_pretrained(cls, *args, **kwargs): method _native_forward (line 1144) | def _native_forward( method _beacon_forward (line 1206) | def _beacon_forward(self, method forward (line 1282) | def forward(self, **kwargs): method prepare_inputs_for_generation (line 1293) | def prepare_inputs_for_generation( method _reorder_cache (line 1324) | def _reorder_cache(past_key_values, beam_idx): FILE: research/Long_LLM/activation_beacon/src/trainer.py class ActivationBeaconTrainer (line 18) | class ActivationBeaconTrainer(Trainer): method __init__ (line 19) | def __init__(self, *args, model_args, file_logger, **kwargs): method compute_loss (line 24) | def compute_loss(self, model, inputs, return_outputs=False): method _get_train_sampler (line 47) | def _get_train_sampler(self) -> Optional[torch.utils.data.Sampler]: method _save (line 72) | def _save(self, output_dir: Optional[str] = None, state_dict=None): method evaluate (line 79) | def evaluate(self, eval_dataset: Dataset | None = None, ignore_keys: L... class StrideGroupedSampler (line 144) | class StrideGroupedSampler(Sampler): method __init__ (line 147) | def __init__( method __len__ (line 232) | def __len__(self): method __iter__ (line 235) | def __iter__(self): FILE: research/Long_LLM/activation_beacon/src/utils.py function do_nothing (line 24) | def do_nothing(): function optional_grad_ctx (line 27) | def optional_grad_ctx(with_grad=False): function makedirs (line 33) | def makedirs(path): function clear_dir (line 38) | def clear_dir(directory): function split_file_dir_name_ext (line 51) | def split_file_dir_name_ext(path): function save_pickle (line 57) | def save_pickle(obj, path:str): function load_pickle (line 66) | def load_pickle(path): function save_json (line 70) | def save_json(obj, path:str): function load_json (line 76) | def load_json(path, lines=False): function format_numel_str (line 87) | def format_numel_str(numel: int) -> str: function batched_iter (line 103) | def batched_iter(iterable: Iterable, max_batch_size: int): function show_time (line 114) | def show_time(times): function filelock (line 121) | def filelock(path, process_index=0): function normalize_text (line 133) | def normalize_text(text, ignore_case=True, ignore_punctuation=True, igno... function wrap_text (line 156) | def wrap_text(s): function min_max_normalize (line 165) | def min_max_normalize(array): function softmax (line 168) | def softmax(x:np.ndarray, axis=-1): function get_max_length_in_nested_lists (line 175) | def get_max_length_in_nested_lists(lst): function pad_nested_lists (line 186) | def pad_nested_lists(lst, max_length, padding_value, padding_side="right"): function mask_nested_lists (line 205) | def mask_nested_lists(lst, mask_target, mask_value=0): function are_elements_of_same_length (line 213) | def are_elements_of_same_length(lst: List): function add_eos (line 220) | def add_eos(inputs: Mapping, eos_token_id: int): function remove_eos (line 238) | def remove_eos(inputs: Mapping, eos_token_ids: Union[List,int]): class FileLogger (line 249) | class FileLogger: method __init__ (line 250) | def __init__(self, log_file) -> None: method log (line 253) | def log(self, metrics, **kwargs): class DefaultDataCollator (line 274) | class DefaultDataCollator: method __call__ (line 286) | def __call__(self, batch_elem: List) -> Dict[str, Any]: FILE: research/Long_LLM/activation_beacon/src/vllm_utils.py class HFStyleVllmModel (line 16) | class HFStyleVllmModel: method __init__ (line 17) | def __init__( method device (line 26) | def device(self): method parse_generation_config (line 29) | def parse_generation_config(self, generation_config:Union[dict,Generat... method generate (line 47) | def generate( method __call__ (line 77) | def __call__(self, input_ids, attention_mask, labels, **kwargs): FILE: research/Long_LLM/longllm_qlora/data_pipeline/_openai.py function process_api_requests_from_file (line 110) | async def process_api_requests_from_file( class StatusTracker (line 280) | class StatusTracker: class APIRequest (line 294) | class APIRequest: method call_api (line 304) | async def call_api( function api_endpoint_from_url (line 376) | def api_endpoint_from_url(request_url): function append_to_jsonl (line 387) | def append_to_jsonl(data, filename: str) -> None: function num_tokens_consumed_from_request (line 394) | def num_tokens_consumed_from_request( function task_id_generator_function (line 453) | def task_id_generator_function(): FILE: research/Long_LLM/longllm_qlora/main/eval_generation.py class Args (line 18) | class Args(ModelArgs): function main (line 55) | def main(): FILE: research/Long_LLM/longllm_qlora/main/eval_infbench.py class Args (line 23) | class Args(ModelArgs): function process_infbench (line 62) | def process_infbench(data, indices, tokenizer, chat_template, task:str, ... function main (line 97) | def main(): FILE: research/Long_LLM/longllm_qlora/main/eval_lm.py class Args (line 16) | class Args(ModelArgs): function process_lm_pre (line 50) | def process_lm_pre(tokenizer, tokenize_max_char=None): function process_lm (line 62) | def process_lm(tokenizer, max_length=4096, stride=1024, min_length=None): function main (line 124) | def main(): FILE: research/Long_LLM/longllm_qlora/main/eval_longbench.py class Args (line 22) | class Args(ModelArgs): function process_longbench (line 61) | def process_longbench(data, indices, tokenizer, chat_template, task, max... function main (line 96) | def main(): FILE: research/Long_LLM/longllm_qlora/main/eval_mmlu.py class Args (line 22) | class Args(ModelArgs): function remove_eos (line 51) | def remove_eos(inputs: Mapping, eos_token_ids: Union[List,int]): function process_mmlu (line 64) | def process_mmlu(tokenizer, chat_template, eos_token_id, few_shot=0, tra... function evaluate_mmlu (line 138) | def evaluate_mmlu(eval_data, save_path, eval_preds): function main (line 182) | def main(): FILE: research/Long_LLM/longllm_qlora/main/eval_needle.py class Args (line 28) | class Args(ModelArgs): method __post_init__ (line 102) | def __post_init__(self): class OpenAIEvaluator (line 107) | class OpenAIEvaluator: method __init__ (line 117) | def __init__(self, method evaluate_response (line 152) | def evaluate_response(self, response: str) -> int: function generate_sample (line 175) | def generate_sample( function main (line 217) | def main(): FILE: research/Long_LLM/longllm_qlora/main/eval_passkey.py class Args (line 28) | class Args(ModelArgs): function generate_sample (line 84) | def generate_sample(tokenizer, chat_template, context_length, passkey_de... function main (line 125) | def main(): FILE: research/Long_LLM/longllm_qlora/main/eval_topic.py class Args (line 23) | class Args(ModelArgs): function process_topic_retrieval (line 42) | def process_topic_retrieval(tokenizer, chat_template, num_topic): function main (line 69) | def main(): FILE: research/Long_LLM/longllm_qlora/main/infbench_utils.py function normalize_answer (line 12) | def normalize_answer(s: str) -> str: function normalize_zh_answer (line 31) | def normalize_zh_answer(s: str) -> str: function f1_score (line 48) | def f1_score(prediction, ground_truth) -> tuple[float, float, float]: function qa_f1_score (line 59) | def qa_f1_score(pred: str, ground_truths) -> float: function qa_f1_score_zh (line 78) | def qa_f1_score_zh(pred: str, ground_truths: list[str]) -> float: function load_json (line 100) | def load_json(fname): function iter_jsonl (line 104) | def iter_jsonl(fname, cnt=None): function first_int_match (line 117) | def first_int_match(prediction): function split_retrieval_answer (line 127) | def split_retrieval_answer(pred: str): function get_score_one_kv_retrieval (line 134) | def get_score_one_kv_retrieval(pred, label, model_name: str) -> bool: function get_score_one_passkey (line 141) | def get_score_one_passkey(pred, label, model_name: str) -> bool: function get_score_one_number_string (line 147) | def get_score_one_number_string(pred, label, model_name: str) -> bool: function get_score_one_code_run (line 153) | def get_score_one_code_run(pred, label, model_name: str) -> bool: function get_score_one_code_debug (line 172) | def get_score_one_code_debug(pred, label, model_name: str) -> bool: function get_score_one_math_find (line 208) | def get_score_one_math_find(pred, label, model_name: str) -> bool: function get_score_one_longdialogue_qa_eng (line 230) | def get_score_one_longdialogue_qa_eng(pred, label, model_name: str) -> b... function get_score_one_longbook_choice_eng (line 239) | def get_score_one_longbook_choice_eng(pred, label, model_name: str) -> b... function get_score_one_longbook_qa_eng (line 280) | def get_score_one_longbook_qa_eng(pred, label, model_name: str) -> float: function get_score_one_longbook_sum_eng (line 284) | def get_score_one_longbook_sum_eng( function get_score_one_longbook_qa_chn (line 295) | def get_score_one_longbook_qa_chn(pred, label, model_name: str) -> float: function get_score_one_math_calc (line 299) | def get_score_one_math_calc(pred, label, model_name: str) -> float: function get_score_one (line 324) | def get_score_one( function get_labels (line 357) | def get_labels(preds: list) -> list[str]: function get_preds (line 365) | def get_preds(preds: list, data_name: str) -> list[str]: function get_score (line 380) | def get_score( function compute_scores (line 394) | def compute_scores(preds_path, data_name: str, model_name: str): function create_prompt (line 404) | def create_prompt(eg: dict, data_name: str, prompt_template: str) -> str: function get_answer (line 514) | def get_answer(eg: dict, data_name: str): FILE: research/Long_LLM/longllm_qlora/main/longbench_utils.py function normalize_answer (line 12) | def normalize_answer(s): function normalize_zh_answer (line 31) | def normalize_zh_answer(s): function count_score (line 47) | def count_score(prediction, ground_truth, **kwargs): function retrieval_score (line 56) | def retrieval_score(prediction, ground_truth, **kwargs): function retrieval_zh_score (line 68) | def retrieval_zh_score(prediction, ground_truth, **kwargs): function code_sim_score (line 80) | def code_sim_score(prediction, ground_truth, **kwargs): function classification_score (line 89) | def classification_score(prediction, ground_truth, **kwargs): function rouge_score (line 114) | def rouge_score(prediction, ground_truth, **kwargs): function rouge_score_zh (line 122) | def rouge_score_zh(prediction, ground_truth, **kwargs): function f1_score (line 128) | def f1_score(prediction, ground_truth, **kwargs): function qa_f1_score (line 138) | def qa_f1_score(prediction, ground_truth, **kwargs): function qa_f1_score_zh (line 147) | def qa_f1_score_zh(prediction, ground_truth, **kwargs): function scorer (line 156) | def scorer(dataset, predictions, answers, all_classes): FILE: research/Long_LLM/longllm_qlora/main/train.py function main (line 21) | def main(): FILE: research/Long_LLM/longllm_qlora/src/__init__.py function get_model_and_tokenizer (line 15) | def get_model_and_tokenizer(model_args, device="cpu", evaluation_mode=Tr... FILE: research/Long_LLM/longllm_qlora/src/args.py class ModelArgs (line 9) | class ModelArgs: method resolve_path (line 187) | def resolve_path(self, path): method get_generation_config (line 201) | def get_generation_config(self): method to_dict (line 213) | def to_dict(self): method save (line 216) | def save(self, path): method __post_init__ (line 220) | def __post_init__(self): class TrainingArgs (line 242) | class TrainingArgs(TrainingArguments): method __post_init__ (line 373) | def __post_init__(self): FILE: research/Long_LLM/longllm_qlora/src/chat.py class ChatTemplateOutput (line 17) | class ChatTemplateOutput: function mask_nested_lists (line 22) | def mask_nested_lists(lst, mask_target, mask_value=0): function apply_chat_template (line 31) | def apply_chat_template(template, messages, system_message=None, tokeniz... class SeparatorStyle (line 190) | class SeparatorStyle(IntEnum): class Conversation (line 222) | class Conversation: method get_prompt (line 247) | def get_prompt(self) -> str: method get_images (line 496) | def get_images(self): method set_system_message (line 506) | def set_system_message(self, system_message: str): method get_system_message (line 510) | def get_system_message(self): method append_message (line 514) | def append_message(self, role: str, message: str): method update_last_message (line 518) | def update_last_message(self, message: str): method convert_image_to_base64 (line 526) | def convert_image_to_base64(self, image): method to_gradio_chatbot (line 561) | def to_gradio_chatbot(self): method to_openai_api_messages (line 577) | def to_openai_api_messages(self): method extract_text_from_messages (line 592) | def extract_text_from_messages(self): method copy (line 598) | def copy(self): method dict (line 613) | def dict(self): function register_conv_template (line 627) | def register_conv_template(template: Conversation, override: bool = False): function get_conv_template (line 637) | def get_conv_template(name: str) -> Conversation: FILE: research/Long_LLM/longllm_qlora/src/data.py class Data (line 21) | class Data: method _process_language_modeling (line 22) | def _process_language_modeling(data, indices, tokenizer, min_length, m... method _process_instruction_tuning (line 46) | def _process_instruction_tuning(data, indices, tokenizer, chat_templat... method prepare_train_data (line 88) | def prepare_train_data(data_files=None, tokenizer=None, max_length=409... method prepare_eval_data (line 157) | def prepare_eval_data(data_files=None, tokenizer=None, max_length=4096... FILE: research/Long_LLM/longllm_qlora/src/metrics.py class Metric (line 14) | class Metric: method get_metric_fn (line 17) | def get_metric_fn(cls, metrics, **kwds): method get_save_path (line 40) | def get_save_path(eval_data, output_dir=None, field="result", save_nam... method save_result (line 57) | def save_result(preds, labels, save_path, indices=None, **kwargs): method rouge (line 73) | def rouge(preds, labels, **kwargs): FILE: research/Long_LLM/longllm_qlora/src/modeling_utils.py function optional_grad_ctx (line 12) | def optional_grad_ctx(with_grad=False): function move_to_device (line 18) | def move_to_device(data, device): function compute_loss (line 32) | def compute_loss(logits, labels, shift=False): function evaluate_perplexity (line 68) | def evaluate_perplexity(model, dataloader, accelerator:Optional[Accelera... function evaluate_generation (line 119) | def evaluate_generation(model, dataloader, accelerator:Optional[Accelera... function evaluate_nll (line 161) | def evaluate_nll(model, dataloader, accelerator:Optional[Accelerator]=No... class BeaconModelOutput (line 207) | class BeaconModelOutput(BaseModelOutputWithPast): FILE: research/Long_LLM/longllm_qlora/src/trainer.py class LLMTrainer (line 14) | class LLMTrainer(Trainer): method __init__ (line 15) | def __init__(self, *args, model_args, file_logger, **kwargs): method _prepare_inputs (line 20) | def _prepare_inputs(self, inputs: Dict[str, Union[torch.Tensor, Any]])... method _save (line 34) | def _save(self, output_dir: Optional[str] = None, state_dict=None): method evaluate (line 41) | def evaluate(self, eval_dataset: Dataset | None = None, ignore_keys: L... FILE: research/Long_LLM/longllm_qlora/src/utils.py function do_nothing (line 24) | def do_nothing(): function optional_grad_ctx (line 27) | def optional_grad_ctx(with_grad=False): function makedirs (line 33) | def makedirs(path): function clear_dir (line 38) | def clear_dir(directory): function split_file_dir_name_ext (line 51) | def split_file_dir_name_ext(path): function save_pickle (line 57) | def save_pickle(obj, path:str): function load_pickle (line 66) | def load_pickle(path): function save_json (line 70) | def save_json(obj, path:str): function load_json (line 76) | def load_json(path, lines=False): function format_numel_str (line 87) | def format_numel_str(numel: int) -> str: function batched_iter (line 103) | def batched_iter(iterable: Iterable, max_batch_size: int): function show_time (line 114) | def show_time(times): function filelock (line 121) | def filelock(path, process_index=0): function normalize_text (line 133) | def normalize_text(text, ignore_case=True, ignore_punctuation=True, igno... function wrap_text (line 156) | def wrap_text(s): function min_max_normalize (line 165) | def min_max_normalize(array): function softmax (line 168) | def softmax(x:np.ndarray, axis=-1): function get_max_length_in_nested_lists (line 175) | def get_max_length_in_nested_lists(lst): function pad_nested_lists (line 186) | def pad_nested_lists(lst, max_length, padding_value, padding_side="right"): function mask_nested_lists (line 205) | def mask_nested_lists(lst, mask_target, mask_value=0): function are_elements_of_same_length (line 213) | def are_elements_of_same_length(lst: List): function add_eos (line 220) | def add_eos(inputs: Mapping, eos_token_id: int): function remove_eos (line 238) | def remove_eos(inputs: Mapping, eos_token_ids: Union[List,int]): function mix_parameters (line 247) | def mix_parameters(models: List[torch.nn.Module], weights: Optional[List... class FileLogger (line 286) | class FileLogger: method __init__ (line 287) | def __init__(self, log_file) -> None: method log (line 290) | def log(self, metrics, **kwargs): class DefaultDataCollator (line 311) | class DefaultDataCollator: method __call__ (line 323) | def __call__(self, batch_elem: List) -> Dict[str, Any]: FILE: research/MLVU/evaluation/generation_evaluation/calculate.py function extract_scores (line 8) | def extract_scores(text): FILE: research/MLVU/evaluation/generation_evaluation/calculate_sum.py function extract_scores (line 7) | def extract_scores(text): FILE: research/MLVU/evaluation/generation_evaluation/evaluate_ssc.py function parse_args (line 9) | def parse_args(): function get_scoring_points (line 20) | def get_scoring_points(score_points="MLVU_all/json/8_sub_scene.json"): function annotate (line 30) | def annotate(prediction_set, caption_files, output_dir): function main (line 112) | def main(): FILE: research/MLVU/evaluation/generation_evaluation/evaluate_summary.py function parse_args (line 9) | def parse_args(): function annotate (line 21) | def annotate(prediction_set, caption_files, output_dir): function main (line 102) | def main(): FILE: research/MLVU/evaluation/generation_evaluation/open_bench.py function get_prompt2 (line 10) | def get_prompt2(conv): class MLVU (line 24) | class MLVU(Dataset): method __init__ (line 25) | def __init__(self, data_dir, data_list): method __str__ (line 39) | def __str__(self): method __len__ (line 61) | def __len__(self): method get_index (line 64) | def get_index(self, bound, fps, max_frame, first_idx=0): method qa_template (line 79) | def qa_template(self, data): method __getitem__ (line 85) | def __getitem__(self, idx): function main (line 98) | def main(): FILE: research/MLVU/evaluation/models/videochat2/choice_bench.py function get_prompt (line 62) | def get_prompt(conv): function get_prompt2 (line 72) | def get_prompt2(conv): function get_context_emb (line 87) | def get_context_emb(conv, model, img_list, answer_prompt=None, print_res... function ask (line 115) | def ask(text, conv): class StoppingCriteriaSub (line 119) | class StoppingCriteriaSub(StoppingCriteria): method __init__ (line 120) | def __init__(self, stops=[], encounters=1): method __call__ (line 123) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen... function answer (line 130) | def answer(conv, model, img_list, do_sample=True, max_new_tokens=200, nu... function get_index (line 163) | def get_index(num_frames, num_segments): function load_video (line 172) | def load_video(video_path, num_segments=8, return_msg=False, resolution=... function get_sinusoid_encoding_table (line 205) | def get_sinusoid_encoding_table(n_position=784, d_hid=1024, cur_frame=8,... class MLVU (line 272) | class MLVU(Dataset): method __init__ (line 273) | def __init__(self, data_dir, data_list, num_segments=8, resolution=224): method __str__ (line 305) | def __str__(self): method __len__ (line 327) | def __len__(self): method get_index (line 330) | def get_index(self, bound, fps, max_frame, first_idx=0): method read_video (line 344) | def read_video(self, video_path, bound=None): method qa_template (line 359) | def qa_template(self, data): method __getitem__ (line 372) | def __getitem__(self, idx): function infer_mvbench (line 395) | def infer_mvbench( function check_ans (line 445) | def check_ans(pred, gt): FILE: research/MLVU/evaluation/models/videochat2/open_bench.py function get_prompt (line 62) | def get_prompt(conv): function get_prompt2 (line 72) | def get_prompt2(conv): function get_context_emb (line 87) | def get_context_emb(conv, model, img_list, answer_prompt=None, print_res... function ask (line 115) | def ask(text, conv): class StoppingCriteriaSub (line 119) | class StoppingCriteriaSub(StoppingCriteria): method __init__ (line 120) | def __init__(self, stops=[], encounters=1): method __call__ (line 123) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen... function answer (line 130) | def answer(conv, model, img_list, do_sample=True, max_new_tokens=200, nu... function get_index (line 163) | def get_index(num_frames, num_segments): function load_video (line 172) | def load_video(video_path, num_segments=8, return_msg=False, resolution=... function get_sinusoid_encoding_table (line 205) | def get_sinusoid_encoding_table(n_position=784, d_hid=1024, cur_frame=8,... class MLVU (line 264) | class MLVU(Dataset): method __init__ (line 265) | def __init__(self, data_dir, data_list, num_segments=8, resolution=224): method __str__ (line 296) | def __str__(self): method __len__ (line 318) | def __len__(self): method get_index (line 321) | def get_index(self, bound, fps, max_frame, first_idx=0): method read_video (line 335) | def read_video(self, video_path, bound=None): method qa_template (line 349) | def qa_template(self, data): method __getitem__ (line 354) | def __getitem__(self, idx): function infer_mvbench (line 377) | def infer_mvbench( FILE: research/MLVU/evaluation/models/videollava/choice_bench.py function get_prompt2 (line 22) | def get_prompt2(conv): class MLVU (line 36) | class MLVU(Dataset): method __init__ (line 37) | def __init__(self, data_dir, data_list): method __str__ (line 51) | def __str__(self): method __len__ (line 73) | def __len__(self): method get_index (line 76) | def get_index(self, bound, fps, max_frame, first_idx=0): method qa_template (line 88) | def qa_template(self, data): method __getitem__ (line 101) | def __getitem__(self, idx): function check_ans (line 114) | def check_ans(pred, gt): function main (line 132) | def main(): FILE: research/MLVU/evaluation/models/videollava/open_bench.py class MLVU (line 24) | class MLVU(Dataset): method __init__ (line 25) | def __init__(self, data_dir, data_list): method __str__ (line 39) | def __str__(self): method __len__ (line 61) | def __len__(self): method get_index (line 64) | def get_index(self, bound, fps, max_frame, first_idx=0): method qa_template (line 79) | def qa_template(self, data): method __getitem__ (line 86) | def __getitem__(self, idx): function main (line 99) | def main(): FILE: research/MLVU/evaluation/multiple_choice_evaluation/choice_bench.py function get_prompt2 (line 10) | def get_prompt2(conv): class MLVU (line 24) | class MLVU(Dataset): method __init__ (line 25) | def __init__(self, data_dir, data_list): method __str__ (line 39) | def __str__(self): method __len__ (line 61) | def __len__(self): method get_index (line 64) | def get_index(self, bound, fps, max_frame, first_idx=0): method qa_template (line 79) | def qa_template(self, data): method __getitem__ (line 92) | def __getitem__(self, idx): function check_ans (line 105) | def check_ans(pred, gt): function main (line 121) | def main(): FILE: research/Matroyshka_reranker/finetune/compensation/arguments.py function default_list (line 8) | def default_list() -> List[str]: class ModelArguments (line 13) | class ModelArguments: class DataArguments (line 112) | class DataArguments: class RetrieverTrainingArguments (line 184) | class RetrieverTrainingArguments(TrainingArguments): FILE: research/Matroyshka_reranker/finetune/compensation/data.py function traverse_directory_using_os (line 18) | def traverse_directory_using_os(root_folder): class TrainDatasetForReranker (line 29) | class TrainDatasetForReranker(Dataset): method __init__ (line 30) | def __init__( method __len__ (line 70) | def __len__(self): method __getitem__ (line 73) | def __getitem__(self, item) -> tuple[List[BatchEncoding], List[int], L... class RerankCollator (line 168) | class RerankCollator(DataCollatorForSeq2Seq): method __call__ (line 177) | def __call__(self, features_lengths, return_tensors='pt'): FILE: research/Matroyshka_reranker/finetune/compensation/load_model.py function get_model (line 11) | def get_model(model_args, training_args, output_token_id): FILE: research/Matroyshka_reranker/finetune/compensation/mistral_config.py class CostWiseMistralConfig (line 23) | class CostWiseMistralConfig(MistralConfig): method __init__ (line 98) | def __init__( FILE: research/Matroyshka_reranker/finetune/compensation/mistral_model.py class CostWiseModelOutputWithPast (line 72) | class CostWiseModelOutputWithPast(ModelOutput): class CostWiseCausalLMOutputWithPast (line 80) | class CostWiseCausalLMOutputWithPast(ModelOutput): function token_compress (line 88) | def token_compress(compress_ratio, class CostWiseMistralModel (line 197) | class CostWiseMistralModel(MistralPreTrainedModel): method __init__ (line 205) | def __init__(self, config: CostWiseMistralConfig): method get_input_embeddings (line 221) | def get_input_embeddings(self): method set_input_embeddings (line 224) | def set_input_embeddings(self, value): method forward (line 228) | def forward( class CostWiseHead (line 468) | class CostWiseHead(nn.Module): method __init__ (line 471) | def __init__(self, input_size, output_size): method forward (line 475) | def forward(self, **kwargs): class CostWiseMistralForCausalLM (line 478) | class CostWiseMistralForCausalLM(MistralPreTrainedModel): method __init__ (line 481) | def __init__(self, config): method get_input_embeddings (line 497) | def get_input_embeddings(self): method set_input_embeddings (line 500) | def set_input_embeddings(self, value): method get_output_embeddings (line 503) | def get_output_embeddings(self): method set_output_embeddings (line 506) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 509) | def set_decoder(self, decoder): method get_decoder (line 512) | def get_decoder(self): method forward (line 517) | def forward( method prepare_inputs_for_generation (line 642) | def prepare_inputs_for_generation( method _reorder_cache (line 700) | def _reorder_cache(past_key_values, beam_idx): FILE: research/Matroyshka_reranker/finetune/compensation/modeling.py class RerankerOutput (line 19) | class RerankerOutput(ModelOutput): function last_logit_pool (line 24) | def last_logit_pool(logits: Tensor, function set_nested_attr (line 35) | def set_nested_attr(obj, attr, value): function get_nested_attr (line 42) | def get_nested_attr(obj, attr): class BiEncoderModel (line 49) | class BiEncoderModel(nn.Module): method __init__ (line 50) | def __init__(self, method gradient_checkpointing_enable (line 85) | def gradient_checkpointing_enable(self, **kwargs): method enable_input_require_grads (line 88) | def enable_input_require_grads(self, **kwargs): method encode (line 91) | def encode(self, features, query_lengths, prompt_lengths): method forward (line 118) | def forward(self, method compute_loss (line 159) | def compute_loss(self, scores, target): method save (line 162) | def save(self, output_dir: str): method save_pretrained (line 176) | def save_pretrained(self, **kwargs): FILE: research/Matroyshka_reranker/finetune/compensation/run.py function main (line 20) | def main(): FILE: research/Matroyshka_reranker/finetune/compensation/trainer.py class BiTrainer (line 6) | class BiTrainer(Trainer): method _save (line 9) | def _save(self, output_dir: Optional[str] = None, state_dict=None): method compute_loss (line 41) | def compute_loss(self, model, inputs, return_outputs=False): FILE: research/Matroyshka_reranker/finetune/self_distillation/arguments.py function default_list (line 8) | def default_list() -> List[str]: class ModelArguments (line 13) | class ModelArguments: class DataArguments (line 109) | class DataArguments: class RetrieverTrainingArguments (line 181) | class RetrieverTrainingArguments(TrainingArguments): FILE: research/Matroyshka_reranker/finetune/self_distillation/data.py function traverse_directory_using_os (line 18) | def traverse_directory_using_os(root_folder): class TrainDatasetForReranker (line 29) | class TrainDatasetForReranker(Dataset): method __init__ (line 30) | def __init__( method __len__ (line 70) | def __len__(self): method __getitem__ (line 73) | def __getitem__(self, item) -> tuple[List[BatchEncoding], List[int], L... class RerankCollator (line 168) | class RerankCollator(DataCollatorForSeq2Seq): method __call__ (line 177) | def __call__(self, features_lengths, return_tensors='pt'): FILE: research/Matroyshka_reranker/finetune/self_distillation/load_model.py function get_model (line 9) | def get_model(model_args, training_args, output_token_id): FILE: research/Matroyshka_reranker/finetune/self_distillation/mistral_config.py class CostWiseMistralConfig (line 23) | class CostWiseMistralConfig(MistralConfig): method __init__ (line 98) | def __init__( FILE: research/Matroyshka_reranker/finetune/self_distillation/mistral_model.py class CostWiseModelOutputWithPast (line 72) | class CostWiseModelOutputWithPast(ModelOutput): class CostWiseCausalLMOutputWithPast (line 80) | class CostWiseCausalLMOutputWithPast(ModelOutput): function token_compress (line 88) | def token_compress(compress_ratio, class CostWiseMistralModel (line 197) | class CostWiseMistralModel(MistralPreTrainedModel): method __init__ (line 205) | def __init__(self, config: CostWiseMistralConfig): method get_input_embeddings (line 221) | def get_input_embeddings(self): method set_input_embeddings (line 224) | def set_input_embeddings(self, value): method forward (line 228) | def forward( class CostWiseHead (line 468) | class CostWiseHead(nn.Module): method __init__ (line 471) | def __init__(self, input_size, output_size): method forward (line 475) | def forward(self, **kwargs): class CostWiseMistralForCausalLM (line 478) | class CostWiseMistralForCausalLM(MistralPreTrainedModel): method __init__ (line 481) | def __init__(self, config): method get_input_embeddings (line 497) | def get_input_embeddings(self): method set_input_embeddings (line 500) | def set_input_embeddings(self, value): method get_output_embeddings (line 503) | def get_output_embeddings(self): method set_output_embeddings (line 506) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 509) | def set_decoder(self, decoder): method get_decoder (line 512) | def get_decoder(self): method forward (line 517) | def forward( method prepare_inputs_for_generation (line 642) | def prepare_inputs_for_generation( method _reorder_cache (line 700) | def _reorder_cache(past_key_values, beam_idx): FILE: research/Matroyshka_reranker/finetune/self_distillation/modeling.py class RerankerOutput (line 15) | class RerankerOutput(ModelOutput): function last_logit_pool (line 20) | def last_logit_pool(logits: Tensor, class BiEncoderModel (line 31) | class BiEncoderModel(nn.Module): method __init__ (line 32) | def __init__(self, method gradient_checkpointing_enable (line 61) | def gradient_checkpointing_enable(self, **kwargs): method enable_input_require_grads (line 64) | def enable_input_require_grads(self, **kwargs): method encode (line 67) | def encode(self, features, query_lengths, prompt_lengths): method encode_full (line 91) | def encode_full(self, features, query_lengths, prompt_lengths): method forward (line 115) | def forward(self, method compute_loss (line 203) | def compute_loss(self, scores, target): method save (line 206) | def save(self, output_dir: str): method save_pretrained (line 215) | def save_pretrained(self, **kwargs): FILE: research/Matroyshka_reranker/finetune/self_distillation/run.py function main (line 20) | def main(): FILE: research/Matroyshka_reranker/finetune/self_distillation/trainer.py class BiTrainer (line 6) | class BiTrainer(Trainer): method _save (line 9) | def _save(self, output_dir: Optional[str] = None, state_dict=None): method compute_loss (line 41) | def compute_loss(self, model, inputs, return_outputs=False): FILE: research/Matroyshka_reranker/inference/mistral_config.py class CostWiseMistralConfig (line 23) | class CostWiseMistralConfig(MistralConfig): method __init__ (line 98) | def __init__( FILE: research/Matroyshka_reranker/inference/mistral_model.py class CostWiseModelOutputWithPast (line 72) | class CostWiseModelOutputWithPast(ModelOutput): class CostWiseCausalLMOutputWithPast (line 80) | class CostWiseCausalLMOutputWithPast(ModelOutput): function token_compress (line 88) | def token_compress(compress_ratio, class CostWiseMistralModel (line 197) | class CostWiseMistralModel(MistralPreTrainedModel): method __init__ (line 205) | def __init__(self, config: CostWiseMistralConfig): method get_input_embeddings (line 221) | def get_input_embeddings(self): method set_input_embeddings (line 224) | def set_input_embeddings(self, value): method forward (line 228) | def forward( class CostWiseHead (line 468) | class CostWiseHead(nn.Module): method __init__ (line 471) | def __init__(self, input_size, output_size): method forward (line 475) | def forward(self, **kwargs): class CostWiseMistralForCausalLM (line 478) | class CostWiseMistralForCausalLM(MistralPreTrainedModel): method __init__ (line 481) | def __init__(self, config): method get_input_embeddings (line 497) | def get_input_embeddings(self): method set_input_embeddings (line 500) | def set_input_embeddings(self, value): method get_output_embeddings (line 503) | def get_output_embeddings(self): method set_output_embeddings (line 506) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 509) | def set_decoder(self, decoder): method get_decoder (line 512) | def get_decoder(self): method forward (line 517) | def forward( method prepare_inputs_for_generation (line 642) | def prepare_inputs_for_generation( method _reorder_cache (line 700) | def _reorder_cache(past_key_values, beam_idx): FILE: research/Matroyshka_reranker/inference/rank_model.py class MatroyshkaReranker (line 17) | class MatroyshkaReranker(AbsReranker): method __init__ (line 49) | def __init__( method compute_score_single_gpu (line 151) | def compute_score_single_gpu( FILE: research/Reinforced_IR/data_generation/agent/gpt.py class GPTAgent (line 13) | class GPTAgent(): method __init__ (line 14) | def __init__( method generate_single (line 31) | def generate_single( method generate (line 77) | def generate( method generate_single_direct (line 107) | def generate_single_direct( method generate_direct (line 127) | def generate_direct( FILE: research/Reinforced_IR/data_generation/agent/vllm.py class LLMAgent (line 8) | class LLMAgent(): method __init__ (line 9) | def __init__( method generate (line 21) | def generate( FILE: research/Reinforced_IR/data_generation/agent/vllm_instruct.py class LLMInstructAgent (line 9) | class LLMInstructAgent(): method __init__ (line 10) | def __init__( method generate (line 23) | def generate( method generate_direct (line 61) | def generate_direct( FILE: research/Reinforced_IR/data_generation/generate_generator_data.py function parse_option (line 15) | def parse_option(): function main (line 47) | def main(opt): FILE: research/Reinforced_IR/data_generation/generate_retriever_data.py function parse_option (line 11) | def parse_option(): function main (line 46) | def main(opt): FILE: research/Reinforced_IR/data_generation/generate_retriever_distill_data.py function parse_option (line 13) | def parse_option(): function main (line 36) | def main(opt): FILE: research/Reinforced_IR/data_generation/generate_universal_query.py function parse_option (line 10) | def parse_option(): function main (line 33) | def main(opt): FILE: research/Reinforced_IR/data_generation/prompts/get_prompts.py function get_query_generation_prompt (line 324) | def get_query_generation_prompt(dataset_name: str, passage: str, use_exa... function get_additional_info_generation_prompt (line 465) | def get_additional_info_generation_prompt(dataset_name: str, query: str)... function get_additional_info_generation_long_prompt (line 488) | def get_additional_info_generation_long_prompt(dataset_name: str, query:... function get_additional_info_generation_long_air_prompt (line 511) | def get_additional_info_generation_long_air_prompt(dataset_name: str, qu... function get_additional_info_generation_train_prompt (line 535) | def get_additional_info_generation_train_prompt(dataset_name: str, query... function get_quality_control_prompt (line 670) | def get_quality_control_prompt(dataset_name: str, query: str, passage: s... function get_reranker_prompt (line 718) | def get_reranker_prompt(dataset_name: str, query: str, passage: str) -> ... FILE: research/Reinforced_IR/data_generation/prompts/hyde_prompts.py function get_additional_info_generation_prompt (line 42) | def get_additional_info_generation_prompt(dataset_name: str, query: str)... FILE: research/Reinforced_IR/data_generation/prompts/teacher_prompts.py function get_yes_prompt (line 95) | def get_yes_prompt(dataset_name: str, query: str, passage: str) -> str: function get_rank_prompt (line 151) | def get_rank_prompt(dataset_name, num, query, passages): FILE: research/Reinforced_IR/data_generation/utils.py function extract_numbers (line 17) | def extract_numbers(s): function get_distill_data (line 22) | def get_distill_data( function generate_bge_train_data (line 65) | def generate_bge_train_data( function generate_llm_dpo_train_data (line 229) | def generate_llm_dpo_train_data( function evaluate_mrr (line 302) | def evaluate_mrr(qrels: Dict[str, Dict[str, int]], function search (line 329) | def search(queries_emb, doc_emb, topk: int = 100): function evaluate (line 358) | def evaluate(metrics: List[str] = ['recall', 'mrr', 'ndcg'], function evaluate_better (line 422) | def evaluate_better(metrics: List[str] = ['recall', 'mrr', 'ndcg'], FILE: research/Reinforced_IR/finetune/generator/save_tokenizer.py function parse_option (line 9) | def parse_option(): function main (line 20) | def main(opt): FILE: research/Reinforced_IR/finetune/generator/update_file.py function parse_option (line 7) | def parse_option(): function main (line 20) | def main(opt): FILE: research/Reinforced_IR/finetune/retriever/arguments.py class IREmbedderTrainingArguments (line 11) | class IREmbedderTrainingArguments(AbsEmbedderTrainingArguments): class IREmbedderDataArguments (line 20) | class IREmbedderDataArguments(AbsEmbedderDataArguments): FILE: research/Reinforced_IR/finetune/retriever/dataset.py class IREmbedderTrainDataset (line 24) | class IREmbedderTrainDataset(AbsEmbedderTrainDataset): method __init__ (line 31) | def __init__( method __getitem__ (line 41) | def __getitem__(self, item): class IREmbedderCollator (line 91) | class IREmbedderCollator(AbsEmbedderCollator): method __call__ (line 99) | def __call__(self, features): class IREmbedderSameDatasetTrainDataset (line 224) | class IREmbedderSameDatasetTrainDataset(AbsEmbedderSameDatasetTrainDatas... method __init__ (line 235) | def __init__( method _shuffle_answer (line 253) | def _shuffle_answer(self, text): method __getitem__ (line 269) | def __getitem__(self, _): method _create_batch_data (line 277) | def _create_batch_data(self, batch_raw_data): class IREmbedderSameDatasetCollator (line 401) | class IREmbedderSameDatasetCollator(AbsEmbedderSameDatasetCollator): method __call__ (line 414) | def __call__(self, features): FILE: research/Reinforced_IR/finetune/retriever/modeling.py class BiIREmbedderModel (line 24) | class BiIREmbedderModel(BiEncoderOnlyEmbedderModel): method __init__ (line 40) | def __init__( method forward (line 74) | def forward( method distill_loss (line 159) | def distill_loss(kd_loss_type, teacher_targets, student_scores, group_... method save (line 204) | def save(self, output_dir: str): FILE: research/Reinforced_IR/finetune/retriever/runner.py class IREmbedderRunner (line 19) | class IREmbedderRunner(AbsEmbedderRunner): method load_train_dataset (line 24) | def load_train_dataset(self): method load_data_collator (line 44) | def load_data_collator(self): method load_tokenizer_and_model (line 61) | def load_tokenizer_and_model(self) -> Tuple[PreTrainedTokenizer, AbsEm... method load_trainer (line 114) | def load_trainer(self) -> IREmbedderTrainer: FILE: research/Reinforced_IR/finetune/retriever/trainer.py class IREmbedderTrainer (line 11) | class IREmbedderTrainer(AbsEmbedderTrainer): method _save (line 15) | def _save(self, output_dir: Optional[str] = None, state_dict=None): FILE: research/Reinforced_IR/inference/agent/gpt.py class GPTAgent (line 13) | class GPTAgent(): method __init__ (line 14) | def __init__( method generate_single (line 31) | def generate_single( method generate (line 77) | def generate( method generate_single_direct (line 107) | def generate_single_direct( method generate_direct (line 127) | def generate_direct( FILE: research/Reinforced_IR/inference/agent/vllm.py class LLMAgent (line 8) | class LLMAgent(): method __init__ (line 9) | def __init__( method generate (line 21) | def generate( FILE: research/Reinforced_IR/inference/agent/vllm_instruct.py class LLMInstructAgent (line 9) | class LLMInstructAgent(): method __init__ (line 10) | def __init__( method generate (line 23) | def generate( method generate_direct (line 61) | def generate_direct( FILE: research/Reinforced_IR/inference/ir_model.py class Reinforced_IR_Model (line 23) | class Reinforced_IR_Model(): method __init__ (line 24) | def __init__( method load_retriever (line 69) | def load_retriever(self): method load_generator (line 85) | def load_generator(self): method offload_retriever (line 102) | def offload_retriever(self): method offload_generator (line 107) | def offload_generator(self): method encode_queries (line 112) | def encode_queries(self, task_instruction, answer_type, queries, **kwa... method encode_corpus (line 129) | def encode_corpus(self, corpus, **kwargs): method encode (line 133) | def encode(self, corpus, **kwargs): FILE: research/Reinforced_IR/inference/multi.py class Args (line 10) | class Args(): function worker_function (line 60) | def worker_function(device): function merge (line 122) | def merge(args: Args): FILE: research/baai_general_embedding/finetune/arguments.py class ModelArguments (line 9) | class ModelArguments: class DataArguments (line 30) | class DataArguments: method __post_init__ (line 63) | def __post_init__(self): class RetrieverTrainingArguments (line 68) | class RetrieverTrainingArguments(TrainingArguments): FILE: research/baai_general_embedding/finetune/data.py class TrainDatasetForEmbedding (line 14) | class TrainDatasetForEmbedding(Dataset): method __init__ (line 15) | def __init__( method __len__ (line 37) | def __len__(self): method __getitem__ (line 40) | def __getitem__(self, item) -> Tuple[str, List[str]]: class EmbedCollator (line 64) | class EmbedCollator(DataCollatorWithPadding): method padding_score (line 73) | def padding_score(self, teacher_score): method __call__ (line 91) | def __call__(self, features): FILE: research/baai_general_embedding/finetune/eval_msmarco.py class Args (line 16) | class Args: function index (line 74) | def index(model: FlagModel, corpus: datasets.Dataset, batch_size: int = ... function search (line 132) | def search(model: FlagModel, queries: datasets, faiss_index: faiss.Index... function evaluate (line 155) | def evaluate(preds, function main (line 212) | def main(): FILE: research/baai_general_embedding/finetune/hn_mine.py function get_args (line 11) | def get_args(): function create_index (line 25) | def create_index(embeddings, use_gpu): function batch_search (line 37) | def batch_search(index, function get_corpus (line 50) | def get_corpus(candidate_pool): function find_knn_neg (line 58) | def find_knn_neg(model, input_file, candidate_pool, output_file, sample_... FILE: research/baai_general_embedding/finetune/modeling.py class EncoderOutput (line 15) | class EncoderOutput(ModelOutput): class BiEncoderModel (line 22) | class BiEncoderModel(nn.Module): method __init__ (line 25) | def __init__(self, method gradient_checkpointing_enable (line 60) | def gradient_checkpointing_enable(self, **kwargs): method sentence_embedding (line 63) | def sentence_embedding(self, hidden_state, mask): method encode (line 71) | def encode(self, features): method compute_similarity (line 80) | def compute_similarity(self, q_reps, p_reps): method forward (line 85) | def forward(self, query: Dict[str, Tensor] = None, passage: Dict[str, ... method compute_loss (line 119) | def compute_loss(self, scores, target): method _dist_gather_tensor (line 122) | def _dist_gather_tensor(self, t: Optional[torch.Tensor]): method save (line 135) | def save(self, output_dir: str): FILE: research/baai_general_embedding/finetune/run.py function main (line 20) | def main(): FILE: research/baai_general_embedding/finetune/trainer.py function save_ckpt_for_sentence_transformers (line 5) | def save_ckpt_for_sentence_transformers(ckpt_dir, pooling_mode: str = 'c... class BiTrainer (line 16) | class BiTrainer(Trainer): method _save (line 17) | def _save(self, output_dir: Optional[str] = None, state_dict=None): method compute_loss (line 40) | def compute_loss(self, model, inputs, return_outputs=False): FILE: research/baai_general_embedding/retromae_pretrain/arguments.py class DataTrainingArguments (line 7) | class DataTrainingArguments: method __post_init__ (line 24) | def __post_init__(self): class ModelArguments (line 30) | class ModelArguments: FILE: research/baai_general_embedding/retromae_pretrain/data.py class DatasetForPretraining (line 13) | class DatasetForPretraining(torch.utils.data.Dataset): method __init__ (line 14) | def __init__(self, data_dir): method load_dataset (line 26) | def load_dataset(self, file): method __getitem__ (line 34) | def __getitem__(self, item): method __len__ (line 37) | def __len__(self): class RetroMAECollator (line 42) | class RetroMAECollator(DataCollatorForWholeWordMask): method __call__ (line 47) | def __call__(self, examples): FILE: research/baai_general_embedding/retromae_pretrain/enhancedDecoder.py class BertSelfAttention (line 24) | class BertSelfAttention(nn.Module): method __init__ (line 25) | def __init__(self, config, position_embedding_type=None): method transpose_for_scores (line 51) | def transpose_for_scores(self, x): method forward (line 56) | def forward( class BertAttention (line 153) | class BertAttention(nn.Module): method __init__ (line 154) | def __init__(self, config, position_embedding_type=None): method prune_heads (line 160) | def prune_heads(self, heads): method forward (line 178) | def forward( class BertLayerForDecoder (line 204) | class BertLayerForDecoder(nn.Module): method __init__ (line 205) | def __init__(self, config): method forward (line 219) | def forward( method feed_forward_chunk (line 285) | def feed_forward_chunk(self, attention_output): FILE: research/baai_general_embedding/retromae_pretrain/modeling.py class RetroMAEForPretraining (line 15) | class RetroMAEForPretraining(nn.Module): method __init__ (line 16) | def __init__( method gradient_checkpointing_enable (line 38) | def gradient_checkpointing_enable(self, **kwargs): method forward (line 41) | def forward(self, method mlm_loss (line 77) | def mlm_loss(self, hiddens, labels): method save_pretrained (line 91) | def save_pretrained(self, output_dir: str): method from_pretrained (line 96) | def from_pretrained( FILE: research/baai_general_embedding/retromae_pretrain/run.py class TrainerCallbackForSaving (line 27) | class TrainerCallbackForSaving(TrainerCallback): method on_epoch_end (line 28) | def on_epoch_end(self, args: TrainingArguments, state: TrainerState, c... function main (line 35) | def main(): FILE: research/baai_general_embedding/retromae_pretrain/trainer.py class PreTrainer (line 11) | class PreTrainer(Trainer): method log (line 12) | def log(self, logs: Dict[str, float]) -> None: method _save (line 30) | def _save(self, output_dir: Optional[str] = None, state_dict=None): FILE: research/baai_general_embedding/retromae_pretrain/utils.py function tensorize_batch (line 6) | def tensorize_batch(sequences: List[torch.Tensor], padding_value, align_... FILE: research/llm_dense_retriever/finetune/arguments.py function default_list (line 8) | def default_list() -> List[int]: class ModelArguments (line 13) | class ModelArguments: class DataArguments (line 75) | class DataArguments: class RetrieverTrainingArguments (line 155) | class RetrieverTrainingArguments(TrainingArguments): FILE: research/llm_dense_retriever/finetune/data.py function get_query_prompt (line 22) | def get_query_prompt(query, prompt, use_special_tokens): function add_prompt (line 29) | def add_prompt(example, prompt): function traverse_directory_using_os (line 33) | def traverse_directory_using_os(root_folder): class SameDatasetTrainDataset (line 45) | class SameDatasetTrainDataset(Dataset): method __init__ (line 52) | def __init__(self, args: DataArguments, batch_size, seed, tokenizer, p... method refresh_epoch (line 157) | def refresh_epoch(self): method __getitem__ (line 180) | def __getitem__(self, idx): method create_batch_data (line 192) | def create_batch_data(self, batch_raw_data): method __len__ (line 341) | def __len__(self): class SameEmbedCollator (line 346) | class SameEmbedCollator(DataCollatorForSeq2Seq): method __call__ (line 357) | def __call__(self, features, return_tensors='pt'): FILE: research/llm_dense_retriever/finetune/load_model.py function find_largest_checkpoint (line 8) | def find_largest_checkpoint(checkpoint_dir): function get_model (line 24) | def get_model(model_args, output_dir, resize, resize_tokens): function save_merged_model (line 93) | def save_merged_model(model_args, output_dir): FILE: research/llm_dense_retriever/finetune/modeling.py class EncoderOutput (line 19) | class EncoderOutput(ModelOutput): class BiEncoderModel (line 26) | class BiEncoderModel(nn.Module): method __init__ (line 29) | def __init__(self, method gradient_checkpointing_enable (line 57) | def gradient_checkpointing_enable(self, **kwargs): method enable_input_require_grads (line 60) | def enable_input_require_grads(self, **kwargs): method encode (line 63) | def encode(self, features): method compute_similarity (line 99) | def compute_similarity(self, q_reps, p_reps): method get_local_similarity (line 104) | def get_local_similarity(self, q_reps, p_reps, all_scores): method compute_local_similarity (line 113) | def compute_local_similarity(self, q_reps, p_reps): method forward (line 118) | def forward(self, method compute_cross_entropy_loss (line 182) | def compute_cross_entropy_loss(self, scores, target): method _dist_gather_tensor (line 185) | def _dist_gather_tensor(self, t: Optional[torch.Tensor]): method save (line 197) | def save(self, output_dir: str): FILE: research/llm_dense_retriever/finetune/run.py function main (line 22) | def main(): function save_model (line 146) | def save_model(): FILE: research/llm_dense_retriever/finetune/trainer.py class BiTrainer (line 4) | class BiTrainer(Trainer): method _save (line 5) | def _save(self, output_dir: Optional[str] = None, state_dict=None): method compute_loss (line 28) | def compute_loss(self, model, inputs, return_outputs=False): FILE: research/llm_embedder/evaluation/eval_icl.py class ICLArgs (line 96) | class ICLArgs(LMArgs, RetrievalArgs): class GenerationArgs (line 142) | class GenerationArgs(GenerationArgs): function remove_double_space (line 149) | def remove_double_space(string): function load_test_data (line 153) | def load_test_data(knn_inxs, function main (line 220) | def main(): FILE: research/llm_embedder/evaluation/eval_lrlm.py class LRLMArgs (line 23) | class LRLMArgs(RetrievalArgs, SRLMArgs): method __post_init__ (line 63) | def __post_init__(self): function process_lrlm (line 70) | def process_lrlm(tokenizer, context_max_length=4096, target_length=1024,... function main (line 100) | def main(): FILE: research/llm_embedder/evaluation/eval_mmlu.py class MMLUArgs (line 32) | class MMLUArgs(LMArgs, RetrievalArgs): function process_mmlu (line 88) | def process_mmlu(tokenizer, context_max_length=2048, key_num=3, few_shot... function evaluate_mmlu (line 204) | def evaluate_mmlu(eval_data, save_path, **kwds): function main (line 256) | def main(): FILE: research/llm_embedder/evaluation/eval_msc.py class LRLMArgs (line 23) | class LRLMArgs(RetrievalArgs, SRLMArgs): class HistoryCollator (line 51) | class HistoryCollator: method __call__ (line 53) | def __call__(self, batch_elem): function main (line 78) | def main(): FILE: research/llm_embedder/evaluation/eval_popqa.py class PopQAArgs (line 47) | class PopQAArgs(LMArgs, RetrievalArgs): class GenerationArgs (line 96) | class GenerationArgs(GenerationArgs): function process_popqa (line 104) | def process_popqa(tokenizer, context_max_length=2048, key_num=3, few_sho... function evaluate_popqa (line 185) | def evaluate_popqa(eval_data, save_path, **kwds): function main (line 217) | def main(): FILE: research/llm_embedder/evaluation/eval_qa.py class QAArgs (line 28) | class QAArgs(LMArgs, RetrievalArgs): class GenerationArgs (line 89) | class GenerationArgs(GenerationArgs): function process_qa (line 97) | def process_qa(tokenizer, context_max_length=2048, key_num=3, few_shot=0... function evaluate_qa (line 172) | def evaluate_qa(eval_data, save_path, **kwds): function main (line 197) | def main(): FILE: research/llm_embedder/evaluation/eval_qrecc.py class QRECCArgs (line 29) | class QRECCArgs(LMArgs, RetrievalArgs): class GenerationArgs (line 81) | class GenerationArgs(GenerationArgs): function process_qrecc (line 89) | def process_qrecc(tokenizer, context_max_length=2048, key_num=3, is_enco... function evaluate_qrecc (line 140) | def evaluate_qrecc(eval_data, save_path, **kwds): function main (line 166) | def main(): FILE: research/llm_embedder/evaluation/eval_retrieval.py class Args (line 23) | class Args(RetrievalArgs): function main (line 45) | def main(args, accelerator=None, log=True): FILE: research/llm_embedder/evaluation/eval_tool.py class ToolArgs (line 15) | class ToolArgs(RetrievalArgs): FILE: research/llm_embedder/evaluation/icl_utils.py function _normalize_answer (line 15) | def _normalize_answer(text, punc_chars, punc_repl): function normalize_squad (line 35) | def normalize_squad(answer): function _metric_max_over_ground_truths (line 40) | def _metric_max_over_ground_truths(metric_fn, ground_truths, prediction): function _exact_match_score (line 47) | def _exact_match_score(target, prediction): function _f1_score (line 51) | def _f1_score(target, prediction): function qa_metrics (line 65) | def qa_metrics(targets, predictions, return_list=False): class App (line 94) | class App: method __init__ (line 95) | def __init__(self): method add (line 98) | def add(self, key): method __getitem__ (line 105) | def __getitem__(self, __name: str): function rouge (line 113) | def rouge(preds, labels, return_list=False): function squad (line 135) | def squad(labels, preds, return_list=False): function simple_accuracy (line 154) | def simple_accuracy(preds, labels, return_list=False): function compute_metrics (line 165) | def compute_metrics(metric, labels, preds): function compute_scores (line 181) | def compute_scores(metric, preds, labels): function flat_options (line 188) | def flat_options(data): function perplexity_to_choice (line 195) | def perplexity_to_choice(data, perplexity): function get_length (line 208) | def get_length(tokenizer, text): function get_prompt_length (line 217) | def get_prompt_length(tokenizer, prompts_list, question, n_tokens_in_pro... function _llm_generation_func (line 224) | def _llm_generation_func(examples: Dict[str, List], function _llm_perplexity_func (line 257) | def _llm_perplexity_func(examples: Dict[str, List], FILE: research/llm_embedder/run_dense.py function main (line 19) | def main(): FILE: research/llm_embedder/run_lm_score.py class ScoreArgs (line 20) | class ScoreArgs(LMArgs): function process_lm_scoring (line 47) | def process_lm_scoring(tokenizer, key_max_length=512): function collate_scores (line 123) | def collate_scores(eval_data, save_name): function main (line 186) | def main(): FILE: research/llm_embedder/run_ranker.py function main (line 18) | def main(): FILE: research/llm_embedder/scripts/ours2st.py function convert_ours_ckpt_to_sentence_transformer (line 8) | def convert_ours_ckpt_to_sentence_transformer(src_dir, dest_dir, pooling... class Args (line 46) | class Args: method __post_init__ (line 68) | def __post_init__(self): FILE: research/llm_embedder/src/lm/args.py class LMArgs (line 7) | class LMArgs(BaseArgs): class SRLMArgs (line 53) | class SRLMArgs(LMArgs): class GenerationArgs (line 101) | class GenerationArgs: method __post_init__ (line 130) | def __post_init__(self): FILE: research/llm_embedder/src/lm/modeling_lm.py class LM (line 11) | class LM(torch.nn.Module): method __init__ (line 12) | def __init__(self, model_name_or_path=None, padding_side="left", dtype... method device (line 59) | def device(self): method _move_to_device (line 65) | def _move_to_device(self, inputs): method compute_nlls (line 72) | def compute_nlls(self, dataloader): method generate (line 124) | def generate(self, dataloader, return_new_tokens_only=True, decode=Tru... FILE: research/llm_embedder/src/lm/modeling_srlm.py class SRLMOutput (line 18) | class SRLMOutput(ModelOutput): class SelfRetrievalLM (line 24) | class SelfRetrievalLM(LM): method __init__ (line 25) | def __init__(self, retriever=None, context_window_size:int=2048, chunk... method _get_retrieved_chunks (line 47) | def _get_retrieved_chunks(self, value_chunks, retrieved_indices): method _get_retrieved_history (line 71) | def _get_retrieved_history(self, history, retrieved_indices): method forward (line 101) | def forward(self, **kwds): method forward_with_history_retrieval (line 107) | def forward_with_history_retrieval(self, query:np.ndarray, history:np.... method forward_with_chunk_retrieval (line 227) | def forward_with_chunk_retrieval(self, input_ids, attention_mask, labe... method compute_perplexity (line 464) | def compute_perplexity(self, dataloader): FILE: research/llm_embedder/src/retrieval/args.py class BaseArgs (line 8) | class BaseArgs: method resolve_path (line 71) | def resolve_path(self, path): method __post_init__ (line 85) | def __post_init__(self): class DenseRetrievalArgs (line 97) | class DenseRetrievalArgs(BaseArgs): class BM25Args (line 184) | class BM25Args(BaseArgs): class RankerArgs (line 225) | class RankerArgs(BaseArgs): class RetrievalArgs (line 271) | class RetrievalArgs(DenseRetrievalArgs, BM25Args): class RetrievalTrainingArgs (line 279) | class RetrievalTrainingArgs(TrainingArguments): method __setattr__ (line 406) | def __setattr__(self, name, value): method __post_init__ (line 409) | def __post_init__(self): FILE: research/llm_embedder/src/retrieval/data.py class RetrievalDataset (line 16) | class RetrievalDataset: method get_train_process_fn (line 17) | def get_train_process_fn(train_group_size=8, select_positive="first", ... method prepare_train_dataset (line 122) | def prepare_train_dataset(data_file=None, cache_dir=None, config=None,... method prepare_eval_dataset (line 183) | def prepare_eval_dataset(data_file=None, cache_dir=None, instruction=N... method prepare_corpus (line 236) | def prepare_corpus(data_file, key_template:str, cache_dir=None, instru... class SameDatasetTrainDataset (line 254) | class SameDatasetTrainDataset(torch.utils.data.Dataset): method __init__ (line 264) | def __init__(self, dataset, dataset_indices_range, batch_size, seed, o... method create_epoch (line 284) | def create_epoch(self): method __getitem__ (line 305) | def __getitem__(self, idx): method __len__ (line 363) | def __len__(self): class RetrievalDataCollator (line 368) | class RetrievalDataCollator: method __call__ (line 377) | def __call__(self, batch_elem): FILE: research/llm_embedder/src/retrieval/evalnq.py class SimpleTokenizer (line 12) | class SimpleTokenizer: method __init__ (line 16) | def __init__(self, **kwargs): method tokenize (line 26) | def tokenize(self, text, uncase=False): function _normalize (line 40) | def _normalize(text): function has_answer (line 44) | def has_answer(answers, text, tokenizer) -> bool: class EvalDataset (line 62) | class EvalDataset(Dataset): method __init__ (line 63) | def __init__(self, retrieval_result, eval_dataset, corpus): method __getitem__ (line 69) | def __getitem__(self, qidx): method __len__ (line 79) | def __len__(self): function evaluate_nq (line 83) | def evaluate_nq(retrieval_result: dict, eval_data: datasets.Dataset, cor... FILE: research/llm_embedder/src/retrieval/metrics.py class RetrievalMetric (line 13) | class RetrievalMetric: method get_metric_fn (line 16) | def get_metric_fn(cls, metric_names, **kwds): method _get_save_path (line 37) | def _get_save_path(eval_data, output_dir=None, field="result", save_na... method _save_result (line 55) | def _save_result(query_ids, preds, result_path, scores=None): method _load_result (line 71) | def _load_result(result_path): method _clean_pred (line 92) | def _clean_pred(pred, score=None): method _prepare_label (line 109) | def _prepare_label(eval_data): method mrr (line 124) | def mrr(eval_data=None, cutoffs=[10], **kwds): method recall (line 163) | def recall(eval_data=None, cutoffs=[10], **kwds): method ndcg (line 194) | def ndcg(eval_data=None, cutoffs=[10], **kwds): method nq (line 234) | def nq(eval_data, corpus, cache_dir=None, **kwds): method collate_key (line 246) | def collate_key(eval_data, save_name, corpus, output_dir=None, save_to... method collate_neg (line 296) | def collate_neg(eval_data, save_name, corpus, max_neg_num=100, filter_... method collate_score (line 363) | def collate_score(eval_data, save_name, output_dir=None, save_to_outpu... FILE: research/llm_embedder/src/retrieval/modeling_bm25.py class BM25Retriever (line 12) | class BM25Retriever: method __init__ (line 13) | def __init__(self, anserini_dir, k1=0.9, b=0.4, **kwds) -> None: method _prepare_collection (line 18) | def _prepare_collection(self, corpus:datasets.Dataset, collection_dir,... method _prepare_query (line 34) | def _prepare_query(self, eval_data:Union[str, datasets.Dataset], query... method _prepare_result (line 81) | def _prepare_result(self, result_path): method index (line 91) | def index(self, corpus:Optional[datasets.Dataset]=None, output_dir:str... method search (line 109) | def search(self, eval_data:Union[str, datasets.Dataset], output_dir:Op... class NaiveBM25Retriever (line 143) | class NaiveBM25Retriever: method __init__ (line 144) | def __init__(self, k1:float=0.9, b:float=0.4, **kwds) -> None: method index (line 148) | def index(self, corpus: List[Union[str, List[int]]], verbose: bool=Fal... method search (line 188) | def search(self, queries: Union[str, List[int], List[str], List[List[i... FILE: research/llm_embedder/src/retrieval/modeling_dense.py class DenseRetriever (line 20) | class DenseRetriever(torch.nn.Module): method __init__ (line 21) | def __init__(self, query_encoder:str='BAAI/bge-base-en', key_encoder:s... method _post_init (line 64) | def _post_init(self): method gradient_checkpointing_enable (line 81) | def gradient_checkpointing_enable(self): method device (line 86) | def device(self): method _gather_tensors (line 92) | def _gather_tensors(self, local_tensor): method _save_to_memmap (line 110) | def _save_to_memmap(self, path: str, shape: tuple, array: np.ndarray, ... method _prepare (line 149) | def _prepare(self, inputs: Union[str, List[str], Mapping], field="key"): method _pool (line 175) | def _pool(self, embeddings, attention_mask): method encode (line 193) | def encode(self, inputs: Union[str, List[str], Mapping], field:str="ke... method _compute_loss (line 237) | def _compute_loss(self, query_embedding, key_embedding, teacher_scores): method _refresh_config (line 312) | def _refresh_config(self, task): method forward (line 330) | def forward(self, query, key, task, teacher_scores=None, **kwds): method index (line 345) | def index(self, corpus: Dataset, output_dir="data/outputs", embedding_... method search (line 414) | def search(self, inputs: Union[str, List[str], Mapping], hits:int=10, ... method rerank (line 449) | def rerank(self, query, key, key_mask=None, **kwds): method save_pretrained (line 463) | def save_pretrained(self, output_dir: str, *args, **kwargs): class FaissIndex (line 485) | class FaissIndex: method __init__ (line 486) | def __init__(self, device) -> None: method build (line 494) | def build(self, encoded_corpus, index_factory, metric): method load (line 517) | def load(self, index_path): method save (line 527) | def save(self, index_path): method search (line 535) | def search(self, query, hits): FILE: research/llm_embedder/src/retrieval/modeling_ranker.py class CrossEncoder (line 11) | class CrossEncoder(torch.nn.Module): method __init__ (line 12) | def __init__(self, ranker, dtype:str="fp16", cache_dir=None, accelerat... method gradient_checkpointing_enable (line 31) | def gradient_checkpointing_enable(self): method forward (line 34) | def forward(self, cross, batch_size, **kwds): method rerank (line 41) | def rerank(self, cross, batch_size, key_mask=None, hits=None, **kwds): method save_pretrained (line 57) | def save_pretrained(self, output_dir: str, *args, **kwargs): FILE: research/llm_embedder/src/retrieval/modeling_unified.py class Retriever (line 11) | class Retriever: method __init__ (line 13) | def __init__(self, retrieval_method: str="dense", **kwds) -> None: method to (line 30) | def to(self, *args, **kwds): method encode (line 35) | def encode(self, *args, **kwds): method index (line 41) | def index(self, corpus, **kwds): method search (line 47) | def search(self, eval_dataset, **kwds): FILE: research/llm_embedder/src/retrieval/trainer.py class RetrievalTrainer (line 19) | class RetrievalTrainer(Trainer): method __init__ (line 20) | def __init__(self, *args, corpus:Dataset, model_args, file_logger, **k... method _save (line 29) | def _save(self, output_dir: Optional[str] = None, state_dict=None): method evaluate (line 50) | def evaluate(self, eval_dataset: Optional[Dataset] = None, ignore_keys... class EarlyExitCallBack (line 193) | class EarlyExitCallBack(TrainerCallback): method __init__ (line 194) | def __init__(self, early_exit_steps=None): method on_step_end (line 197) | def on_step_end(self, args: TrainingArguments, state: TrainerState, co... FILE: research/llm_embedder/src/utils/llama_patch.py function forward (line 23) | def forward( function _prepare_decoder_attention_mask (line 121) | def _prepare_decoder_attention_mask(self, attention_mask, input_shape, i... function enable_flash_attention (line 125) | def enable_flash_attention(model=None): function disable_flash_attention (line 152) | def disable_flash_attention(model=None): function upcast_layer_for_flash_attention (line 173) | def upcast_layer_for_flash_attention(model, torch_dtype): FILE: research/llm_embedder/src/utils/util.py function do_nothing (line 25) | def do_nothing(): function makedirs (line 28) | def makedirs(path): function clear_dir (line 33) | def clear_dir(directory): function split_file_dir_name_ext (line 46) | def split_file_dir_name_ext(path): function save_pickle (line 52) | def save_pickle(obj, path:str): function load_pickle (line 61) | def load_pickle(path): function save_json (line 65) | def save_json(obj, path:str): function load_json (line 71) | def load_json(path, lines=False): function filelock (line 83) | def filelock(path, process_index=0): function normalize_text (line 95) | def normalize_text(text, ignore_case=True, ignore_punctuation=True, igno... function min_max_normalize (line 118) | def min_max_normalize(array): function get_max_length_in_nested_lists (line 121) | def get_max_length_in_nested_lists(lst): function pad_nested_lists (line 132) | def pad_nested_lists(lst, max_length, padding_value, padding_side="right"): function mask_nested_lists (line 151) | def mask_nested_lists(lst, mask_target, mask_value=0): function are_elements_of_same_length (line 159) | def are_elements_of_same_length(lst: List): function add_eos (line 166) | def add_eos(inputs: Mapping, eos_token_id: int): function remove_eos (line 180) | def remove_eos(inputs: Mapping, eos_token_id: int): function mix_parameters (line 187) | def mix_parameters(models: List[torch.nn.Module], weights: Optional[List... class FileLogger (line 226) | class FileLogger: method __init__ (line 227) | def __init__(self, log_file) -> None: method log (line 230) | def log(self, metrics, **kwargs): class Sequential_Sampler (line 251) | class Sequential_Sampler: method __init__ (line 255) | def __init__(self, dataset_length:int, num_replicas:int, rank:int) -> ... method __iter__ (line 275) | def __iter__(self): method __len__ (line 280) | def __len__(self): class DatasetProcessFn (line 284) | class DatasetProcessFn: method __init__ (line 293) | def __init__(self, augment=False): method __call__ (line 296) | def __call__(self, _process_fn): class DefaultDataCollator (line 334) | class DefaultDataCollator: method __call__ (line 345) | def __call__(self, batch_elem: List) -> Dict[str, Any]: FILE: research/llm_reranker/evaluate.py class Args (line 11) | class Args(): function evaluate_mrr (line 45) | def evaluate_mrr(predicts, labels, cutoffs): function main (line 70) | def main(): FILE: research/llm_reranker/finetune_for_instruction/arguments.py function default_list (line 8) | def default_list() -> List[str]: class ModelArguments (line 13) | class ModelArguments: class DataArguments (line 83) | class DataArguments: method __post_init__ (line 150) | def __post_init__(self): class RetrieverTrainingArguments (line 155) | class RetrieverTrainingArguments(TrainingArguments): FILE: research/llm_reranker/finetune_for_instruction/data.py class TrainDatasetForReranker (line 19) | class TrainDatasetForReranker(Dataset): method __init__ (line 20) | def __init__( method __len__ (line 57) | def __len__(self): method is_chinese (line 60) | def is_chinese(self, text): method __getitem__ (line 64) | def __getitem__(self, item) -> List[BatchEncoding]: class RerankCollator (line 140) | class RerankCollator(DataCollatorForSeq2Seq): method __call__ (line 149) | def __call__(self, features, return_tensors='pt'): FILE: research/llm_reranker/finetune_for_instruction/load_model.py function get_model (line 6) | def get_model(model_args, training_args): FILE: research/llm_reranker/finetune_for_instruction/modeling.py class RerankerOutput (line 13) | class RerankerOutput(ModelOutput): class BiEncoderModel (line 18) | class BiEncoderModel(nn.Module): method __init__ (line 19) | def __init__(self, method gradient_checkpointing_enable (line 38) | def gradient_checkpointing_enable(self, **kwargs): method enable_input_require_grads (line 41) | def enable_input_require_grads(self, **kwargs): method encode (line 44) | def encode(self, features): method forward (line 59) | def forward(self, pair: Union[Dict[str, Tensor], List[Dict[str, Tensor... method compute_loss (line 75) | def compute_loss(self, scores, target): method save (line 78) | def save(self, output_dir: str): method save_pretrained (line 87) | def save_pretrained(self, **kwargs): FILE: research/llm_reranker/finetune_for_instruction/run.py function main (line 21) | def main(): FILE: research/llm_reranker/finetune_for_instruction/trainer.py class BiTrainer (line 6) | class BiTrainer(Trainer): method _save (line 9) | def _save(self, output_dir: Optional[str] = None, state_dict=None): method compute_loss (line 41) | def compute_loss(self, model, inputs, return_outputs=False): FILE: research/llm_reranker/finetune_for_layerwise/arguments.py function default_list (line 8) | def default_list() -> List[str]: class ModelArguments (line 13) | class ModelArguments: class DataArguments (line 97) | class DataArguments: method __post_init__ (line 168) | def __post_init__(self): class RetrieverTrainingArguments (line 173) | class RetrieverTrainingArguments(TrainingArguments): FILE: research/llm_reranker/finetune_for_layerwise/configuration_minicpm_reranker.py class LayerWiseMiniCPMConfig (line 30) | class LayerWiseMiniCPMConfig(PretrainedConfig): method __init__ (line 116) | def __init__( method _rope_scaling_validation (line 189) | def _rope_scaling_validation(self): FILE: research/llm_reranker/finetune_for_layerwise/data.py class TrainDatasetForReranker (line 19) | class TrainDatasetForReranker(Dataset): method __init__ (line 20) | def __init__( method __len__ (line 57) | def __len__(self): method __getitem__ (line 60) | def __getitem__(self, item) -> List[BatchEncoding]: class RerankCollator (line 135) | class RerankCollator(DataCollatorForSeq2Seq): method __call__ (line 144) | def __call__(self, features, return_tensors='pt'): FILE: research/llm_reranker/finetune_for_layerwise/load_model.py function get_model (line 8) | def get_model(model_args, training_args, only_for_one_logit: int = None): FILE: research/llm_reranker/finetune_for_layerwise/modeling.py class RerankerOutput (line 14) | class RerankerOutput(ModelOutput): class BiEncoderModel (line 18) | class BiEncoderModel(nn.Module): method __init__ (line 19) | def __init__(self, method gradient_checkpointing_enable (line 40) | def gradient_checkpointing_enable(self, **kwargs): method enable_input_require_grads (line 43) | def enable_input_require_grads(self, **kwargs): method encode (line 46) | def encode(self, features): method forward (line 64) | def forward(self, pair: Union[Dict[str, Tensor], List[Dict[str, Tensor... method compute_loss (line 94) | def compute_loss(self, scores, target): method save (line 97) | def save(self, output_dir: str): method save_pretrained (line 106) | def save_pretrained(self, **kwargs): FILE: research/llm_reranker/finetune_for_layerwise/modeling_minicpm_reranker.py function _get_unpad_data (line 77) | def _get_unpad_data(attention_mask): function _expand_mask (line 89) | def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Option... function _make_causal_mask (line 96) | def _make_causal_mask( function rms_layernorm (line 108) | def rms_layernorm(hidden: torch.Tensor, weight: torch.Tensor, eps: float): class MiniCPMRMSNorm (line 115) | class MiniCPMRMSNorm(nn.Module): method __init__ (line 116) | def __init__(self, hidden_size, eps=1e-6): method forward (line 124) | def forward(self, hidden_states): class MiniCPMRotaryEmbedding (line 131) | class MiniCPMRotaryEmbedding(nn.Module): method __init__ (line 132) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi... method _set_cos_sin_cache (line 147) | def _set_cos_sin_cache(self, seq_len, device, dtype): method forward (line 157) | def forward(self, x, seq_len=None): class MiniCPMLinearScalingRotaryEmbedding (line 168) | class MiniCPMLinearScalingRotaryEmbedding(MiniCPMRotaryEmbedding): method __init__ (line 171) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi... method _set_cos_sin_cache (line 175) | def _set_cos_sin_cache(self, seq_len, device, dtype): class MiniCPMDynamicNTKScalingRotaryEmbedding (line 187) | class MiniCPMDynamicNTKScalingRotaryEmbedding(MiniCPMRotaryEmbedding): method __init__ (line 190) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi... method _set_cos_sin_cache (line 194) | def _set_cos_sin_cache(self, seq_len, device, dtype): function rotate_half (line 214) | def rotate_half(x): function apply_rotary_pos_emb (line 221) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): class MiniCPMMLP (line 256) | class MiniCPMMLP(nn.Module): method __init__ (line 257) | def __init__(self, config): method forward (line 267) | def forward(self, x): function repeat_kv (line 290) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: class MiniCPMAttention (line 302) | class MiniCPMAttention(nn.Module): method __init__ (line 305) | def __init__(self, config: LayerWiseMiniCPMConfig, layer_idx: Optional... method _init_rope (line 338) | def _init_rope(self): method _shape (line 365) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): method forward (line 368) | def forward( class MiniCPMFlashAttention2 (line 473) | class MiniCPMFlashAttention2(MiniCPMAttention): method __init__ (line 480) | def __init__(self, *args, **kwargs): method forward (line 488) | def forward( method _flash_attention_forward (line 576) | def _flash_attention_forward( method _upad_input (line 633) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m... class MiniCPMSdpaAttention (line 672) | class MiniCPMSdpaAttention(MiniCPMAttention): method forward (line 680) | def forward( class MiniCPMDecoderLayer (line 766) | class MiniCPMDecoderLayer(nn.Module): method __init__ (line 767) | def __init__(self, config: LayerWiseMiniCPMConfig, layer_idx: int): method forward (line 779) | def forward( class MiniCPMPreTrainedModel (line 862) | class MiniCPMPreTrainedModel(PreTrainedModel): method _init_weights (line 872) | def _init_weights(self, module): class LayerWiseMiniCPMModel (line 958) | class LayerWiseMiniCPMModel(MiniCPMPreTrainedModel): method __init__ (line 966) | def __init__(self, config: LayerWiseMiniCPMConfig): method get_input_embeddings (line 984) | def get_input_embeddings(self): method set_input_embeddings (line 987) | def set_input_embeddings(self, value): method forward (line 991) | def forward( class LayerWiseHead (line 1135) | class LayerWiseHead(nn.Module): method __init__ (line 1138) | def __init__(self, input_size, output_size): method forward (line 1142) | def forward(self, **kwargs): class LayerWiseMiniCPMForCausalLM (line 1145) | class LayerWiseMiniCPMForCausalLM(MiniCPMPreTrainedModel): method __init__ (line 1148) | def __init__(self, config): method get_input_embeddings (line 1191) | def get_input_embeddings(self): method set_input_embeddings (line 1194) | def set_input_embeddings(self, value): method get_output_embeddings (line 1197) | def get_output_embeddings(self): method set_output_embeddings (line 1200) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 1203) | def set_decoder(self, decoder): method get_decoder (line 1206) | def get_decoder(self): method forward (line 1211) | def forward( method prepare_inputs_for_generation (line 1408) | def prepare_inputs_for_generation( method _reorder_cache (line 1465) | def _reorder_cache(past_key_values, beam_idx): method chat (line 1474) | def chat(self, tokenizer, query: str, history: List[Dict] = None, role... FILE: research/llm_reranker/finetune_for_layerwise/run.py function main (line 21) | def main(): FILE: research/llm_reranker/finetune_for_layerwise/trainer.py class BiTrainer (line 5) | class BiTrainer(Trainer): method _save (line 8) | def _save(self, output_dir: Optional[str] = None, state_dict=None): method compute_loss (line 40) | def compute_loss(self, model, inputs, return_outputs=False): FILE: research/llm_reranker/merge/configuration_minicpm_reranker.py class LayerWiseMiniCPMConfig (line 29) | class LayerWiseMiniCPMConfig(PretrainedConfig): method __init__ (line 115) | def __init__( method _rope_scaling_validation (line 188) | def _rope_scaling_validation(self): FILE: research/llm_reranker/merge/merge_base_model.py function merge_llm (line 5) | def merge_llm(model_name_or_path, lora_name_or_path, save_path, cache_di... FILE: research/llm_reranker/merge/merge_layerwise_model_from_finetuned_model.py function merge_layerwise_finetuned_llm (line 5) | def merge_layerwise_finetuned_llm(model_name_or_path, lora_name_or_path,... FILE: research/llm_reranker/merge/merge_layerwise_model_from_raw_model.py function merge_layerwise_raw_llm (line 6) | def merge_layerwise_raw_llm(model_name_or_path, lora_name_or_path, save_... FILE: research/llm_reranker/merge/modeling_minicpm_reranker.py function _get_unpad_data (line 76) | def _get_unpad_data(attention_mask): function _expand_mask (line 88) | def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Option... function _make_causal_mask (line 95) | def _make_causal_mask( function rms_layernorm (line 107) | def rms_layernorm(hidden: torch.Tensor, weight: torch.Tensor, eps: float): class MiniCPMRMSNorm (line 114) | class MiniCPMRMSNorm(nn.Module): method __init__ (line 115) | def __init__(self, hidden_size, eps=1e-6): method forward (line 123) | def forward(self, hidden_states): class MiniCPMRotaryEmbedding (line 130) | class MiniCPMRotaryEmbedding(nn.Module): method __init__ (line 131) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi... method _set_cos_sin_cache (line 146) | def _set_cos_sin_cache(self, seq_len, device, dtype): method forward (line 156) | def forward(self, x, seq_len=None): class MiniCPMLinearScalingRotaryEmbedding (line 167) | class MiniCPMLinearScalingRotaryEmbedding(MiniCPMRotaryEmbedding): method __init__ (line 170) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi... method _set_cos_sin_cache (line 174) | def _set_cos_sin_cache(self, seq_len, device, dtype): class MiniCPMDynamicNTKScalingRotaryEmbedding (line 186) | class MiniCPMDynamicNTKScalingRotaryEmbedding(MiniCPMRotaryEmbedding): method __init__ (line 189) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi... method _set_cos_sin_cache (line 193) | def _set_cos_sin_cache(self, seq_len, device, dtype): function rotate_half (line 213) | def rotate_half(x): function apply_rotary_pos_emb (line 220) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): class MiniCPMMLP (line 255) | class MiniCPMMLP(nn.Module): method __init__ (line 256) | def __init__(self, config): method forward (line 266) | def forward(self, x): function repeat_kv (line 289) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: class MiniCPMAttention (line 301) | class MiniCPMAttention(nn.Module): method __init__ (line 304) | def __init__(self, config: LayerWiseMiniCPMConfig, layer_idx: Optional... method _init_rope (line 337) | def _init_rope(self): method _shape (line 364) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): method forward (line 367) | def forward( class MiniCPMFlashAttention2 (line 472) | class MiniCPMFlashAttention2(MiniCPMAttention): method __init__ (line 479) | def __init__(self, *args, **kwargs): method forward (line 487) | def forward( method _flash_attention_forward (line 575) | def _flash_attention_forward( method _upad_input (line 632) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m... class MiniCPMSdpaAttention (line 671) | class MiniCPMSdpaAttention(MiniCPMAttention): method forward (line 679) | def forward( class MiniCPMDecoderLayer (line 765) | class MiniCPMDecoderLayer(nn.Module): method __init__ (line 766) | def __init__(self, config: LayerWiseMiniCPMConfig, layer_idx: int): method forward (line 778) | def forward( class MiniCPMPreTrainedModel (line 861) | class MiniCPMPreTrainedModel(PreTrainedModel): method _init_weights (line 871) | def _init_weights(self, module): class LayerWiseMiniCPMModel (line 957) | class LayerWiseMiniCPMModel(MiniCPMPreTrainedModel): method __init__ (line 965) | def __init__(self, config: LayerWiseMiniCPMConfig): method get_input_embeddings (line 983) | def get_input_embeddings(self): method set_input_embeddings (line 986) | def set_input_embeddings(self, value): method forward (line 990) | def forward( class LayerWiseHead (line 1134) | class LayerWiseHead(nn.Module): method __init__ (line 1137) | def __init__(self, input_size, output_size): method forward (line 1141) | def forward(self, **kwargs): class LayerWiseMiniCPMForCausalLM (line 1144) | class LayerWiseMiniCPMForCausalLM(MiniCPMPreTrainedModel): method __init__ (line 1147) | def __init__(self, config): method get_input_embeddings (line 1190) | def get_input_embeddings(self): method set_input_embeddings (line 1193) | def set_input_embeddings(self, value): method get_output_embeddings (line 1196) | def get_output_embeddings(self): method set_output_embeddings (line 1199) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 1202) | def set_decoder(self, decoder): method get_decoder (line 1205) | def get_decoder(self): method forward (line 1210) | def forward( method prepare_inputs_for_generation (line 1407) | def prepare_inputs_for_generation( method _reorder_cache (line 1464) | def _reorder_cache(past_key_values, beam_idx): method chat (line 1473) | def chat(self, tokenizer, query: str, history: List[Dict] = None, role... FILE: research/old-examples/pretrain/retromae_pretrain/arguments.py class DataTrainingArguments (line 7) | class DataTrainingArguments: method __post_init__ (line 24) | def __post_init__(self): class ModelArguments (line 30) | class ModelArguments: FILE: research/old-examples/pretrain/retromae_pretrain/data.py class DatasetForPretraining (line 13) | class DatasetForPretraining(torch.utils.data.Dataset): method __init__ (line 14) | def __init__(self, data_dir): method load_dataset (line 26) | def load_dataset(self, file): method __getitem__ (line 34) | def __getitem__(self, item): method __len__ (line 37) | def __len__(self): class RetroMAECollator (line 42) | class RetroMAECollator(DataCollatorForWholeWordMask): method __call__ (line 47) | def __call__(self, examples): FILE: research/old-examples/pretrain/retromae_pretrain/enhancedDecoder.py class BertSelfAttention (line 24) | class BertSelfAttention(nn.Module): method __init__ (line 25) | def __init__(self, config, position_embedding_type=None): method transpose_for_scores (line 51) | def transpose_for_scores(self, x): method forward (line 56) | def forward( class BertAttention (line 153) | class BertAttention(nn.Module): method __init__ (line 154) | def __init__(self, config, position_embedding_type=None): method prune_heads (line 160) | def prune_heads(self, heads): method forward (line 178) | def forward( class BertLayerForDecoder (line 204) | class BertLayerForDecoder(nn.Module): method __init__ (line 205) | def __init__(self, config): method forward (line 219) | def forward( method feed_forward_chunk (line 285) | def feed_forward_chunk(self, attention_output): FILE: research/old-examples/pretrain/retromae_pretrain/modeling.py class RetroMAEForPretraining (line 15) | class RetroMAEForPretraining(nn.Module): method __init__ (line 16) | def __init__( method gradient_checkpointing_enable (line 38) | def gradient_checkpointing_enable(self, **kwargs): method forward (line 41) | def forward(self, method mlm_loss (line 77) | def mlm_loss(self, hiddens, labels): method save_pretrained (line 91) | def save_pretrained(self, output_dir: str): method from_pretrained (line 96) | def from_pretrained( FILE: research/old-examples/pretrain/retromae_pretrain/run.py class TrainerCallbackForSaving (line 27) | class TrainerCallbackForSaving(TrainerCallback): method on_epoch_end (line 28) | def on_epoch_end(self, args: TrainingArguments, state: TrainerState, c... function main (line 35) | def main(): FILE: research/old-examples/pretrain/retromae_pretrain/trainer.py class PreTrainer (line 11) | class PreTrainer(Trainer): method log (line 12) | def log(self, logs: Dict[str, float]) -> None: method _save (line 30) | def _save(self, output_dir: Optional[str] = None, state_dict=None): FILE: research/old-examples/pretrain/retromae_pretrain/utils.py function tensorize_batch (line 6) | def tensorize_batch(sequences: List[torch.Tensor], padding_value, align_... FILE: research/old-examples/search_demo/arguments.py class ModelArguments (line 5) | class ModelArguments: class DataArguments (line 13) | class DataArguments: FILE: research/old-examples/search_demo/pre_process.py class EmbDataset (line 16) | class EmbDataset(Dataset): method __init__ (line 17) | def __init__( method __len__ (line 26) | def __len__(self): method __getitem__ (line 29) | def __getitem__(self, item): function inference (line 39) | def inference(json_path, emb_path, model_path): function build_bm25_index (line 74) | def build_bm25_index(dataset, collection_path, index_path): FILE: research/old-examples/search_demo/tool.py class LocalDatasetLoader (line 13) | class LocalDatasetLoader: method __init__ (line 17) | def __init__(self, class QueryGenerator (line 27) | class QueryGenerator: method __init__ (line 28) | def __init__(self): method run (line 38) | def run(self, history, question): class AnswerGenerator (line 42) | class AnswerGenerator: method __init__ (line 43) | def __init__(self): method run (line 53) | def run(self, history, question, references): class BMVectorIndex (line 57) | class BMVectorIndex: method __init__ (line 58) | def __init__(self, method search_for_doc (line 75) | def search_for_doc(self, query: str, RANKING: int = 1000, TOP_N: int =... class Agent (line 96) | class Agent: method __init__ (line 97) | def __init__(self, index): method empty_memory (line 103) | def empty_memory(self): method update_memory (line 106) | def update_memory(self, question, answer): method generate_query (line 113) | def generate_query(self, question): method generate_answer (line 119) | def generate_answer(self, query, references): method answer (line 122) | def answer(self, question, RANKING=1000, TOP_N=5, verbose=True): FILE: research/reranker/arguments.py class ModelArguments (line 7) | class ModelArguments: class DataArguments (line 27) | class DataArguments: method __post_init__ (line 40) | def __post_init__(self): FILE: research/reranker/data.py class TrainDatasetForCE (line 16) | class TrainDatasetForCE(Dataset): method __init__ (line 17) | def __init__( method create_one_example (line 36) | def create_one_example(self, qry_encoding: str, doc_encoding: str): method __len__ (line 46) | def __len__(self): method __getitem__ (line 49) | def __getitem__(self, item) -> List[BatchEncoding]: class GroupCollator (line 68) | class GroupCollator(DataCollatorWithPadding): method __call__ (line 69) | def __call__( FILE: research/reranker/modeling.py class CrossEncoder (line 13) | class CrossEncoder(nn.Module): method __init__ (line 14) | def __init__(self, hf_model: PreTrainedModel, model_args: ModelArgumen... method gradient_checkpointing_enable (line 30) | def gradient_checkpointing_enable(self, **kwargs): method forward (line 33) | def forward(self, batch): method from_pretrained (line 52) | def from_pretrained( method save_pretrained (line 60) | def save_pretrained(self, output_dir: str): FILE: research/reranker/run.py function main (line 19) | def main(): FILE: research/reranker/trainer.py class CETrainer (line 13) | class CETrainer(Trainer): method _save (line 14) | def _save(self, output_dir: Optional[str] = None, state_dict=None): method compute_loss (line 30) | def compute_loss(self, model: CrossEncoder, inputs): FILE: research/visual_bge/visual_bge/eva_clip/eva_vit_model.py class DropPath (line 33) | class DropPath(nn.Module): method __init__ (line 36) | def __init__(self, drop_prob=None): method forward (line 40) | def forward(self, x): method extra_repr (line 43) | def extra_repr(self) -> str: class Mlp (line 47) | class Mlp(nn.Module): method __init__ (line 48) | def __init__( method forward (line 70) | def forward(self, x): class SwiGLU (line 81) | class SwiGLU(nn.Module): method __init__ (line 82) | def __init__(self, in_features, hidden_features=None, out_features=Non... method forward (line 97) | def forward(self, x): class Attention (line 106) | class Attention(nn.Module): method __init__ (line 107) | def __init__( method forward (line 173) | def forward(self, x, rel_pos_bias=None, attn_mask=None): class Block (line 246) | class Block(nn.Module): method __init__ (line 248) | def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_sc... method forward (line 287) | def forward(self, x, rel_pos_bias=None, attn_mask=None): class PatchEmbed (line 305) | class PatchEmbed(nn.Module): method __init__ (line 308) | def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=... method forward (line 320) | def forward(self, x, **kwargs): class RelativePositionBias (line 329) | class RelativePositionBias(nn.Module): method __init__ (line 331) | def __init__(self, window_size, num_heads): method forward (line 358) | def forward(self): class EVAVisionTransformer (line 366) | class EVAVisionTransformer(nn.Module): method __init__ (line 369) | def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classe... method fix_init_weight (line 443) | def fix_init_weight(self): method get_cast_dtype (line 454) | def get_cast_dtype(self) -> torch.dtype: method _init_weights (line 457) | def _init_weights(self, m): method get_num_layers (line 466) | def get_num_layers(self): method lock (line 469) | def lock(self, unlocked_groups=0, freeze_bn_stats=False): method set_grad_checkpointing (line 475) | def set_grad_checkpointing(self, enable=True): method no_weight_decay (line 479) | def no_weight_decay(self): method get_classifier (line 482) | def get_classifier(self): method reset_classifier (line 485) | def reset_classifier(self, num_classes, global_pool=''): method forward_features (line 489) | def forward_features(self, x, return_all_features=False): method forward (line 527) | def forward(self, x, return_all_features=True): FILE: research/visual_bge/visual_bge/eva_clip/factory.py function _natural_key (line 25) | def _natural_key(string_): function _rescan_model_configs (line 29) | def _rescan_model_configs(): function list_models (line 53) | def list_models(): function add_model_config (line 58) | def add_model_config(path): function get_model_config (line 66) | def get_model_config(model_name): function get_tokenizer (line 73) | def get_tokenizer(model_name): function load_state_dict (line 80) | def load_state_dict(checkpoint_path: str, map_location: str='cpu', model... function load_checkpoint (line 110) | def load_checkpoint(model, checkpoint_path, model_key="model|module|stat... function load_clip_visual_state_dict (line 131) | def load_clip_visual_state_dict(checkpoint_path: str, map_location: str=... function load_clip_text_state_dict (line 144) | def load_clip_text_state_dict(checkpoint_path: str, map_location: str='c... function get_pretrained_tag (line 152) | def get_pretrained_tag(pretrained_model): function load_pretrained_checkpoint (line 163) | def load_pretrained_checkpoint( function create_model (line 211) | def create_model( function create_model_and_transforms (line 362) | def create_model_and_transforms( function create_eva_vision_and_transforms (line 416) | def create_eva_vision_and_transforms( function create_model_from_pretrained (line 471) | def create_model_from_pretrained( FILE: research/visual_bge/visual_bge/eva_clip/hf_model.py class BaseModelOutput (line 21) | class BaseModelOutput: class PretrainedConfig (line 25) | class PretrainedConfig: function _camel2snake (line 31) | def _camel2snake(s): function register_pooler (line 37) | def register_pooler(cls): class MeanPooler (line 44) | class MeanPooler(nn.Module): method forward (line 46) | def forward(self, x:BaseModelOutput, attention_mask:TensorType): class MaxPooler (line 51) | class MaxPooler(nn.Module): method forward (line 53) | def forward(self, x:BaseModelOutput, attention_mask:TensorType): class ClsPooler (line 58) | class ClsPooler(nn.Module): method __init__ (line 60) | def __init__(self, use_pooler_output=True): method forward (line 65) | def forward(self, x:BaseModelOutput, attention_mask:TensorType): class HFTextEncoder (line 75) | class HFTextEncoder(nn.Module): method __init__ (line 77) | def __init__( method mask (line 152) | def mask(self, input_ids, vocab_size, device, targets=None, masked_ind... method forward_mlm (line 177) | def forward_mlm(self, input_ids, image_embeds, mlm_probability=0.25): method forward (line 213) | def forward(self, x:TensorType) -> TensorType: method lock (line 220) | def lock(self, unlocked_layers:int=0, freeze_layer_norm:bool=True): method set_grad_checkpointing (line 239) | def set_grad_checkpointing(self, enable=True): method get_num_layers (line 242) | def get_num_layers(self): method init_parameters (line 247) | def init_parameters(self): FILE: research/visual_bge/visual_bge/eva_clip/loss.py function gather_features (line 21) | def gather_features( class ClipLoss (line 70) | class ClipLoss(nn.Module): method __init__ (line 72) | def __init__( method forward (line 95) | def forward(self, image_features, text_features, logit_scale=1.): FILE: research/visual_bge/visual_bge/eva_clip/model.py class CLIPVisionCfg (line 37) | class CLIPVisionCfg: class CLIPTextCfg (line 66) | class CLIPTextCfg: function get_cast_dtype (line 83) | def get_cast_dtype(precision: str): function _build_vision_tower (line 92) | def _build_vision_tower( function _build_text_tower (line 173) | def _build_text_tower( class CLIP (line 210) | class CLIP(nn.Module): method __init__ (line 211) | def __init__( method lock_image_tower (line 233) | def lock_image_tower(self, unlocked_groups=0, freeze_bn_stats=False): method set_grad_checkpointing (line 238) | def set_grad_checkpointing(self, enable=True): method no_weight_decay (line 243) | def no_weight_decay(self): method encode_image (line 246) | def encode_image(self, image, normalize: bool = False): method encode_text (line 250) | def encode_text(self, text, normalize: bool = False): method forward (line 264) | def forward(self, image, text): class CustomCLIP (line 270) | class CustomCLIP(nn.Module): method __init__ (line 271) | def __init__( method lock_image_tower (line 291) | def lock_image_tower(self, unlocked_groups=0, freeze_bn_stats=False): method lock_text_tower (line 295) | def lock_text_tower(self, unlocked_layers:int=0, freeze_layer_norm:boo... method set_grad_checkpointing (line 299) | def set_grad_checkpointing(self, enable=True): method no_weight_decay (line 305) | def no_weight_decay(self): method encode_image (line 308) | def encode_image(self, image, normalize: bool = False): method encode_text (line 312) | def encode_text(self, text, normalize: bool = False): method forward (line 316) | def forward(self, image, text): function convert_weights_to_lp (line 328) | def convert_weights_to_lp(model: nn.Module, dtype=torch.float16): function convert_to_custom_text_state_dict (line 360) | def convert_to_custom_text_state_dict(state_dict: dict): function build_model_from_openai_state_dict (line 379) | def build_model_from_openai_state_dict( function trace_model (line 439) | def trace_model(model, batch_size=256, device=torch.device('cpu')): FILE: research/visual_bge/visual_bge/eva_clip/modified_resnet.py class Bottleneck (line 10) | class Bottleneck(nn.Module): method __init__ (line 13) | def __init__(self, inplanes, planes, stride=1): method forward (line 42) | def forward(self, x: torch.Tensor): class AttentionPool2d (line 58) | class AttentionPool2d(nn.Module): method __init__ (line 59) | def __init__(self, spacial_dim: int, embed_dim: int, num_heads: int, o... method forward (line 68) | def forward(self, x): class ModifiedResNet (line 95) | class ModifiedResNet(nn.Module): method __init__ (line 103) | def __init__(self, layers, output_dim, heads, image_size=224, width=64): method _make_layer (line 132) | def _make_layer(self, planes, blocks, stride=1): method init_parameters (line 141) | def init_parameters(self): method lock (line 154) | def lock(self, unlocked_groups=0, freeze_bn_stats=False): method set_grad_checkpointing (line 162) | def set_grad_checkpointing(self, enable=True): method stem (line 166) | def stem(self, x): method forward (line 173) | def forward(self, x): FILE: research/visual_bge/visual_bge/eva_clip/openai.py function list_openai_models (line 18) | def list_openai_models() -> List[str]: function load_openai_model (line 23) | def load_openai_model( FILE: research/visual_bge/visual_bge/eva_clip/pretrained.py function _pcfg (line 18) | def _pcfg(url='', hf_hub='', filename='', mean=None, std=None): function _clean_tag (line 191) | def _clean_tag(tag: str): function list_pretrained (line 196) | def list_pretrained(as_str: bool = False): function list_pretrained_models_by_tag (line 203) | def list_pretrained_models_by_tag(tag: str): function list_pretrained_tags_by_model (line 213) | def list_pretrained_tags_by_model(model: str): function is_pretrained_cfg (line 221) | def is_pretrained_cfg(model: str, tag: str): function get_pretrained_cfg (line 227) | def get_pretrained_cfg(model: str, tag: str): function get_pretrained_url (line 234) | def get_pretrained_url(model: str, tag: str): function download_pretrained_from_url (line 239) | def download_pretrained_from_url( function has_hf_hub (line 285) | def has_hf_hub(necessary=False): function download_pretrained_from_hf (line 293) | def download_pretrained_from_hf( function download_pretrained (line 304) | def download_pretrained( FILE: research/visual_bge/visual_bge/eva_clip/rope.py function broadcat (line 7) | def broadcat(tensors, dim = -1): function rotate_half (line 23) | def rotate_half(x): class VisionRotaryEmbedding (line 30) | class VisionRotaryEmbedding(nn.Module): method __init__ (line 31) | def __init__( method forward (line 70) | def forward(self, t, start_index = 0): class VisionRotaryEmbeddingFast (line 79) | class VisionRotaryEmbeddingFast(nn.Module): method __init__ (line 80) | def __init__( method forward (line 121) | def forward(self, t, patch_indices_keep=None): FILE: research/visual_bge/visual_bge/eva_clip/timm_model.py class TimmModel (line 28) | class TimmModel(nn.Module): method __init__ (line 33) | def __init__( method lock (line 80) | def lock(self, unlocked_groups=0, freeze_bn_stats=False): method set_grad_checkpointing (line 113) | def set_grad_checkpointing(self, enable=True): method forward (line 119) | def forward(self, x): FILE: research/visual_bge/visual_bge/eva_clip/tokenizer.py function default_bpe (line 21) | def default_bpe(): function bytes_to_unicode (line 26) | def bytes_to_unicode(): function get_pairs (line 48) | def get_pairs(word): function basic_clean (line 60) | def basic_clean(text): function whitespace_clean (line 66) | def whitespace_clean(text): class SimpleTokenizer (line 72) | class SimpleTokenizer(object): method __init__ (line 73) | def __init__(self, bpe_path: str = default_bpe(), special_tokens=None): method bpe (line 98) | def bpe(self, token): method encode (line 139) | def encode(self, text): method decode (line 147) | def decode(self, tokens): function tokenize (line 156) | def tokenize(texts: Union[str, List[str]], context_length: int = 77) -> ... class HFTokenizer (line 188) | class HFTokenizer: method __init__ (line 190) | def __init__(self, tokenizer_name:str): method __call__ (line 194) | def __call__(self, texts:Union[str, List[str]], context_length:int=77)... FILE: research/visual_bge/visual_bge/eva_clip/transform.py class ResizeMaxSize (line 13) | class ResizeMaxSize(nn.Module): method __init__ (line 15) | def __init__(self, max_size, interpolation=InterpolationMode.BICUBIC, ... method forward (line 24) | def forward(self, img): function _convert_to_rgb (line 39) | def _convert_to_rgb(image): function image_transform (line 60) | def image_transform( FILE: research/visual_bge/visual_bge/eva_clip/transformer.py class LayerNormFp32 (line 36) | class LayerNormFp32(nn.LayerNorm): method __init__ (line 38) | def __init__(self, *args, **kwargs): method forward (line 41) | def forward(self, x: torch.Tensor): class LayerNorm (line 52) | class LayerNorm(nn.LayerNorm): method forward (line 55) | def forward(self, x: torch.Tensor): class QuickGELU (line 60) | class QuickGELU(nn.Module): method forward (line 62) | def forward(self, x: torch.Tensor): class LayerScale (line 66) | class LayerScale(nn.Module): method __init__ (line 67) | def __init__(self, dim, init_values=1e-5, inplace=False): method forward (line 72) | def forward(self, x): class PatchDropout (line 75) | class PatchDropout(nn.Module): method __init__ (line 80) | def __init__(self, prob, exclude_first_token=True): method forward (line 87) | def forward(self, x): function _in_projection_packed (line 119) | def _in_projection_packed( class Attention (line 150) | class Attention(nn.Module): method __init__ (line 151) | def __init__( method forward (line 195) | def forward(self, x, attn_mask: Optional[torch.Tensor] = None): class CustomAttention (line 243) | class CustomAttention(nn.Module): method __init__ (line 244) | def __init__( method forward (line 286) | def forward(self, query: torch.Tensor, key: torch.Tensor, value: torch... class CustomResidualAttentionBlock (line 339) | class CustomResidualAttentionBlock(nn.Module): method __init__ (line 340) | def __init__( method forward (line 384) | def forward(self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, a... class CustomTransformer (line 389) | class CustomTransformer(nn.Module): method __init__ (line 390) | def __init__( method get_cast_dtype (line 429) | def get_cast_dtype(self) -> torch.dtype: method forward (line 432) | def forward(self, q: torch.Tensor, k: torch.Tensor = None, v: torch.Te... class ResidualAttentionBlock (line 443) | class ResidualAttentionBlock(nn.Module): method __init__ (line 444) | def __init__( method attention (line 474) | def attention(self, x: torch.Tensor, attn_mask: Optional[torch.Tensor]... method forward (line 480) | def forward(self, x: torch.Tensor, attn_mask: Optional[torch.Tensor] =... class Transformer (line 485) | class Transformer(nn.Module): method __init__ (line 486) | def __init__( method get_cast_dtype (line 508) | def get_cast_dtype(self) -> torch.dtype: method forward (line 511) | def forward(self, x: torch.Tensor, attn_mask: Optional[torch.Tensor] =... class VisionTransformer (line 520) | class VisionTransformer(nn.Module): method __init__ (line 521) | def __init__( method lock (line 567) | def lock(self, unlocked_groups=0, freeze_bn_stats=False): method get_num_layers (line 600) | def get_num_layers(self): method set_grad_checkpointing (line 604) | def set_grad_checkpointing(self, enable=True): method no_weight_decay (line 608) | def no_weight_decay(self): method forward (line 611) | def forward(self, x: torch.Tensor, return_all_features: bool=False): class TextTransformer (line 642) | class TextTransformer(nn.Module): method __init__ (line 643) | def __init__( method init_parameters (line 686) | def init_parameters(self): method set_grad_checkpointing (line 703) | def set_grad_checkpointing(self, enable=True): method no_weight_decay (line 707) | def no_weight_decay(self): method get_num_layers (line 711) | def get_num_layers(self): method build_attention_mask (line 714) | def build_attention_mask(self): method forward (line 722) | def forward(self, text, return_all_features: bool=False): FILE: research/visual_bge/visual_bge/eva_clip/utils.py function resize_clip_pos_embed (line 13) | def resize_clip_pos_embed(state_dict, model, interpolation: str = 'bicub... function resize_visual_pos_embed (line 46) | def resize_visual_pos_embed(state_dict, model, interpolation: str = 'bic... function resize_evaclip_pos_embed (line 78) | def resize_evaclip_pos_embed(state_dict, model, interpolation: str = 'bi... function resize_eva_pos_embed (line 109) | def resize_eva_pos_embed(state_dict, model, interpolation: str = 'bicubi... function resize_rel_pos_embed (line 140) | def resize_rel_pos_embed(state_dict, model, interpolation: str = 'bicubi... function freeze_batch_norm_2d (line 237) | def freeze_batch_norm_2d(module, module_match={}, name=''): function _ntuple (line 277) | def _ntuple(n): function is_logging (line 292) | def is_logging(args): class AllGather (line 304) | class AllGather(torch.autograd.Function): method forward (line 311) | def forward(ctx, tensor, rank, world_size): method backward (line 319) | def backward(ctx, grad_output): FILE: research/visual_bge/visual_bge/modeling.py class EncoderOutput (line 19) | class EncoderOutput(ModelOutput): class Visualized_BGE (line 26) | class Visualized_BGE(nn.Module): method __init__ (line 27) | def __init__(self, method load_model (line 105) | def load_model(self, model_weight): method gradient_checkpointing_enable (line 108) | def gradient_checkpointing_enable(self, **kwargs): method encode (line 114) | def encode(self, image=None, text=None): method get_extended_attention_mask (line 132) | def get_extended_attention_mask( method sentence_embedding (line 173) | def sentence_embedding(self, hidden_state, mask): method encode_text (line 182) | def encode_text(self, texts): method encode_mm (line 224) | def encode_mm(self, images:torch.Tensor, texts): method compute_similarity (line 296) | def compute_similarity(self, q_reps, p_reps): method img_token_embedding (line 301) | def img_token_embedding(self, images): method encode_image (line 308) | def encode_image(self, images): method forward (line 320) | def forward(self, mm_it_query=None, image_candidate=None, text_candida... method compute_loss (line 361) | def compute_loss(self, scores, target): method _dist_gather_tensor (line 364) | def _dist_gather_tensor(self, t: Optional[torch.Tensor]): method save (line 377) | def save(self, output_dir: str): FILE: scripts/add_reranker_score.py class ScoreArgs (line 10) | class ScoreArgs: class ModelArgs (line 20) | class ModelArgs: function main (line 84) | def main(score_args: ScoreArgs, model_args: ModelArgs): FILE: scripts/hn_mine.py class DataArgs (line 15) | class DataArgs: class ModelArgs (line 43) | class ModelArgs: method __post_init__ (line 94) | def __post_init__(self): function create_index (line 102) | def create_index(embeddings: np.ndarray, use_gpu: bool = False): function batch_search (line 114) | def batch_search( function get_corpus (line 129) | def get_corpus(candidate_pool: str): function find_knn_neg (line 138) | def find_knn_neg( function load_model (line 203) | def load_model(model_args: ModelArgs): function main (line 224) | def main(data_args: DataArgs, model_args: ModelArgs): FILE: scripts/split_data_by_length.py function get_args (line 24) | def get_args(): class SplitByLengthHandler (line 38) | class SplitByLengthHandler: method __init__ (line 39) | def __init__(self, method _get_length_ranges_list (line 76) | def _get_length_ranges_list(length_list: list): method _process_dir (line 90) | def _process_dir(self, dir_path: str, output_dir: str): method _process_file (line 104) | def _process_file(self, file_path: str, output_path: str): method run (line 165) | def run(self, input_path: str, output_dir: str, log_name: str=None): function main (line 190) | def main(args): FILE: tests/conftest.py function device (line 17) | def device(): function transformers_version (line 23) | def transformers_version(): FILE: tests/test_imports_v5.py function test_import_modeling_minicpm_reranker_inference (line 21) | def test_import_modeling_minicpm_reranker_inference(): function test_import_modeling_minicpm_reranker_finetune (line 30) | def test_import_modeling_minicpm_reranker_finetune(): function test_is_torch_fx_available_v5 (line 40) | def test_is_torch_fx_available_v5(): function test_transformers_version (line 48) | def test_transformers_version(transformers_version): FILE: tests/test_infer_embedder_basic.py function cosine_similarity (line 12) | def cosine_similarity(a, b): function test_bge_embedder_basic (line 16) | def test_bge_embedder_basic(device): function test_bge_embedder_batch (line 45) | def test_bge_embedder_batch(device): FILE: tests/test_infer_reranker_basic.py function test_reranker_basic (line 14) | def test_reranker_basic(device): function test_reranker_batch (line 35) | def test_reranker_batch(device):