SYMBOL INDEX (184 symbols across 19 files) FILE: data_gen/generate_data.py class GPT4 (line 24) | class GPT4: method __init__ (line 25) | def __init__(self, model_name='gpt-4-turbo') -> None: method init_api_keys (line 37) | def init_api_keys(self): method get_api_key (line 52) | def get_api_key(self): method call (line 56) | def call(self, content, args={}, showkeys=False): method test (line 91) | def test(self): method retry_call (line 98) | def retry_call(self, content, args={"max_tokens": 4096}): function select_from_data (line 101) | def select_from_data(file_path, sample_size=4200): function call_gpt4_with_retry (line 135) | def call_gpt4_with_retry(task, gpt_instance, content, args={}, showkeys=... function generate_select_data (line 158) | def generate_select_data(item, gpt, task, prefix, ii, save_dir): function find_bracket_content (line 194) | def find_bracket_content(s): function main (line 203) | def main(args): FILE: eval/eval_sglang.py function postprocess_output (line 15) | def postprocess_output(pred): function load_file (line 22) | def load_file(input_fp): function main (line 32) | def main(): FILE: eval/scorer.py function str_similarity (line 6) | def str_similarity(str1, str2): function find_most_similar_index (line 10) | def find_most_similar_index(str_list, target_str): function match_choice3 (line 33) | def match_choice3(text,options): function match (line 50) | def match(prediction, ground_truth): function score (line 57) | def score(data): FILE: eval/utils.py function postprocess_output (line 14) | def postprocess_output(input_instance, prediction, task, intermediate_re... function process_arc_instruction (line 43) | def process_arc_instruction(item, instruction): function postprocess_answers_closed (line 69) | def postprocess_answers_closed(output, task, choices=None): FILE: retrieval_lm/passage_retrieval.py class Retriever (line 30) | class Retriever: method __init__ (line 31) | def __init__(self, args, model=None, tokenizer=None) : method embed_queries (line 36) | def embed_queries(self, args, queries): method embed_queries_demo (line 67) | def embed_queries_demo(self, queries): method index_encoded_data (line 93) | def index_encoded_data(self, index, embedding_files, indexing_batch_si... method add_embeddings (line 112) | def add_embeddings(self, index, embeddings, ids, indexing_batch_size): method add_passages (line 122) | def add_passages(self, passages, top_passages_and_scores): method add_id (line 136) | def add_id(self): method setup_retriever (line 141) | def setup_retriever(self): method search_document (line 178) | def search_document(self, query, top_n=10): method search_document_demo (line 188) | def search_document_demo(self, query, n_docs=10): method setup_retriever_demo (line 198) | def setup_retriever_demo(self, model_name_or_path, passages, passages_... function add_hasanswer (line 229) | def add_hasanswer(data, hasanswer): function load_data (line 236) | def load_data(data_path): function process_item (line 249) | def process_item(item, retriever, n_docs): function load_items (line 282) | def load_items(file_path): function main (line 295) | def main(args): FILE: retrieval_lm/src/beir_utils.py class DenseEncoderModel (line 22) | class DenseEncoderModel: method __init__ (line 23) | def __init__( method encode_queries (line 46) | def encode_queries(self, queries: List[str], batch_size: int, **kwargs... method encode_corpus (line 85) | def encode_corpus(self, corpus: List[Dict[str, str]], batch_size: int,... function evaluate_model (line 125) | def evaluate_model( FILE: retrieval_lm/src/contriever.py class Contriever (line 11) | class Contriever(BertModel): method __init__ (line 12) | def __init__(self, config, pooling="average", **kwargs): method forward (line 17) | def forward( class XLMRetriever (line 58) | class XLMRetriever(XLMRobertaModel): method __init__ (line 59) | def __init__(self, config, pooling="average", **kwargs): method forward (line 64) | def forward( function load_retriever (line 103) | def load_retriever(model_path, pooling="average", random_init=False): FILE: retrieval_lm/src/data.py function load_data (line 20) | def load_data(opt, tokenizer): function load_dataset (line 31) | def load_dataset(data_path, loading_mode): class MultiDataset (line 53) | class MultiDataset(torch.utils.data.Dataset): method __init__ (line 54) | def __init__(self, datasets): method __len__ (line 60) | def __len__(self): method __getitem__ (line 63) | def __getitem__(self, index): method generate_offset (line 71) | def generate_offset(self): method set_prob (line 75) | def set_prob(self, coeff=0.0): class Dataset (line 84) | class Dataset(torch.utils.data.Dataset): method __init__ (line 87) | def __init__(self, data, chunk_length, tokenizer, opt): method __len__ (line 95) | def __len__(self): method __getitem__ (line 98) | def __getitem__(self, index): method generate_offset (line 111) | def generate_offset(self): class Collator (line 115) | class Collator(object): method __init__ (line 116) | def __init__(self, opt): method __call__ (line 119) | def __call__(self, batch_examples): function randomcrop (line 137) | def randomcrop(x, ratio_min, ratio_max): function build_mask (line 147) | def build_mask(tensors): function add_token (line 160) | def add_token(x, token): function deleteword (line 165) | def deleteword(x, p=0.1): function replaceword (line 171) | def replaceword(x, min_random, max_random, p=0.1): function maskword (line 177) | def maskword(x, mask_id, p=0.1): function shuffleword (line 183) | def shuffleword(x, p=0.1): function apply_augmentation (line 195) | def apply_augmentation(x, opt): function add_bos_eos (line 212) | def add_bos_eos(x, bos_token_id, eos_token_id): function load_passages (line 227) | def load_passages(path): FILE: retrieval_lm/src/dist_utils.py class Gather (line 7) | class Gather(torch.autograd.Function): method forward (line 9) | def forward(ctx, x: torch.tensor): method backward (line 15) | def backward(ctx, *grads): function gather (line 21) | def gather(x: torch.tensor): function gather_nograd (line 30) | def gather_nograd(x: torch.tensor): function varsize_gather_nograd (line 41) | def varsize_gather_nograd(x: torch.Tensor): function get_varsize (line 64) | def get_varsize(x: torch.Tensor): function get_rank (line 77) | def get_rank(): function is_main (line 85) | def is_main(): function get_world_size (line 89) | def get_world_size(): function barrier (line 96) | def barrier(): function average_main (line 101) | def average_main(x): function sum_main (line 111) | def sum_main(x): function weighted_average (line 119) | def weighted_average(x, count): FILE: retrieval_lm/src/evaluation.py class SimpleTokenizer (line 22) | class SimpleTokenizer(object): method __init__ (line 26) | def __init__(self): method tokenize (line 36) | def tokenize(self, text, uncased=False): function calculate_matches (line 48) | def calculate_matches(data: List, workers_num: int): function check_answer (line 82) | def check_answer(example, tokenizer) -> List[bool]: function has_answer (line 101) | def has_answer(answers, text, tokenizer) -> bool: function _normalize (line 118) | def _normalize(text): function normalize_answer (line 121) | def normalize_answer(s): function em (line 137) | def em(prediction, ground_truth): function f1 (line 140) | def f1(prediction, ground_truth): function f1_score (line 152) | def f1_score(prediction, ground_truths): function exact_match_score (line 155) | def exact_match_score(prediction, ground_truths): function eval_batch (line 162) | def eval_batch(scores, inversions, avg_topk, idx_topk): function count_inversions (line 168) | def count_inversions(arr): function score (line 177) | def score(x, inversions, avg_topk, idx_topk): FILE: retrieval_lm/src/finetuning_data.py class Dataset (line 11) | class Dataset(torch.utils.data.Dataset): method __init__ (line 12) | def __init__( method __len__ (line 31) | def __len__(self): method __getitem__ (line 34) | def __getitem__(self, index): method _load_data (line 71) | def _load_data(self, datapaths, global_rank, world_size, maxload): method _load_data_json (line 84) | def _load_data_json(self, path, global_rank, world_size, counter, maxl... method _load_data_jsonl (line 98) | def _load_data_jsonl(self, path, global_rank, world_size, counter, max... method sample_n_hard_negatives (line 112) | def sample_n_hard_negatives(self, ex): class Collator (line 127) | class Collator(object): method __init__ (line 128) | def __init__(self, tokenizer, passage_maxlength=200): method __call__ (line 132) | def __call__(self, batch): FILE: retrieval_lm/src/inbatch.py class InBatch (line 17) | class InBatch(nn.Module): method __init__ (line 18) | def __init__(self, opt, retriever=None, tokenizer=None): method _load_retriever (line 32) | def _load_retriever(self, model_id, pooling, random_init): method get_encoder (line 56) | def get_encoder(self): method forward (line 59) | def forward(self, q_tokens, q_mask, k_tokens, k_mask, stats_prefix="",... FILE: retrieval_lm/src/index.py class Indexer (line 17) | class Indexer(object): method __init__ (line 19) | def __init__(self, vector_sz, n_subquantizers=0, n_bits=8): method index_data (line 28) | def index_data(self, ids, embeddings): method convert_to_gpu (line 37) | def convert_to_gpu(self, faiss_gpu_index, useFloat16=False): method search_knn (line 45) | def search_knn(self, query_vectors: np.array, top_docs: int, index_bat... method serialize (line 59) | def serialize(self, dir_path): method deserialize_from (line 68) | def deserialize_from(self, dir_path): method _update_id_mapping (line 81) | def _update_id_mapping(self, db_ids: List): FILE: retrieval_lm/src/moco.py class MoCo (line 14) | class MoCo(nn.Module): method __init__ (line 15) | def __init__(self, opt): method _load_retriever (line 44) | def _load_retriever(self, model_id, pooling, random_init): method get_encoder (line 68) | def get_encoder(self, return_encoder_k=False): method _momentum_update_key_encoder (line 74) | def _momentum_update_key_encoder(self): method _dequeue_and_enqueue (line 82) | def _dequeue_and_enqueue(self, keys): method _compute_logits (line 97) | def _compute_logits(self, q, k): method forward (line 104) | def forward(self, q_tokens, q_mask, k_tokens, k_mask, stats_prefix="",... FILE: retrieval_lm/src/normalize_text.py function normalize (line 133) | def normalize(text): FILE: retrieval_lm/src/options.py class Options (line 7) | class Options: method __init__ (line 8) | def __init__(self): method initialize (line 12) | def initialize(self): method print_options (line 112) | def print_options(self, opt): method parse (line 129) | def parse(self): FILE: retrieval_lm/src/slurm.py function sig_handler (line 18) | def sig_handler(signum, frame): function term_handler (line 30) | def term_handler(signum, frame): function init_signal_handler (line 35) | def init_signal_handler(): function init_distributed_mode (line 43) | def init_distributed_mode(params): FILE: retrieval_lm/src/utils.py function init_logger (line 18) | def init_logger(args, stdout_only=False): function symlink_force (line 35) | def symlink_force(target, link_name): function save (line 46) | def save(model, optimizer, scheduler, step, opt, dir_path, name): function load (line 66) | def load(model_class, dir_path, opt, reset_params=False): class WarmupLinearScheduler (line 91) | class WarmupLinearScheduler(torch.optim.lr_scheduler.LambdaLR): method __init__ (line 92) | def __init__(self, optimizer, warmup, total, ratio, last_epoch=-1): method lr_lambda (line 98) | def lr_lambda(self, step): class CosineScheduler (line 108) | class CosineScheduler(torch.optim.lr_scheduler.LambdaLR): method __init__ (line 109) | def __init__(self, optimizer, warmup, total, ratio=0.1, last_epoch=-1): method lr_lambda (line 115) | def lr_lambda(self, step): function set_optim (line 122) | def set_optim(opt, model): function get_parameters (line 145) | def get_parameters(net, verbose=False): class WeightedAvgStats (line 153) | class WeightedAvgStats: method __init__ (line 156) | def __init__(self): method update (line 160) | def update(self, vals: Dict[str, Tuple[Number, Number]]) -> None: method stats (line 166) | def stats(self) -> Dict[str, float]: method tuple_stats (line 170) | def tuple_stats(self) -> Dict[str, Tuple[float, float]]: method reset (line 173) | def reset(self) -> None: method average_stats (line 178) | def average_stats(self) -> Dict[str, float]: function load_hf (line 193) | def load_hf(object_class, model_name): function init_tb_logger (line 201) | def init_tb_logger(output_dir): FILE: train_rag_sft.py class Train_dataset (line 27) | class Train_dataset(torch.utils.data.Dataset): method __init__ (line 28) | def __init__(self, config, tokenizer): method __getitem__ (line 62) | def __getitem__(self, index): method get_ctxs (line 65) | def get_ctxs(self,documents): method get_prompt (line 80) | def get_prompt(self,da): method collate_fn (line 100) | def collate_fn(self, batch): method __len__ (line 117) | def __len__(self): class SFTMetric (line 120) | class SFTMetric: method __init__ (line 121) | def __init__(self, device): method __call__ (line 128) | def __call__(self, logits, labels, loss): method update (line 131) | def update(self, logits, labels, loss): method get_metric (line 140) | def get_metric(self, reset=True): function table_to_csv_string (line 156) | def table_to_csv_string(table): function train (line 163) | def train(args):