SYMBOL INDEX (262 symbols across 34 files) FILE: examples/MyopicTrap/appendix_exp_cosine_sim.py function add_eos (line 212) | def add_eos(input_examples): FILE: examples/MyopicTrap/commercial_embedding_api.py class CommercialEncoder (line 14) | class CommercialEncoder(ABC): method encode (line 15) | def encode( method _generate_cache_key (line 24) | def _generate_cache_key(self, model_name: str, text: str, **kwargs): class OpenAIEncoder (line 29) | class OpenAIEncoder(CommercialEncoder): method __init__ (line 30) | def __init__(self): method encode (line 37) | def encode( method test (line 85) | def test(): class CohereEncoder (line 94) | class CohereEncoder(CommercialEncoder): method __init__ (line 95) | def __init__(self): method encode (line 101) | def encode( method test (line 146) | def test(): class VoyageEncoder (line 157) | class VoyageEncoder(CommercialEncoder): method __init__ (line 158) | def __init__(self): method encode (line 164) | def encode( method test (line 210) | def test(): class JinaEncoder (line 222) | class JinaEncoder(CommercialEncoder): method __init__ (line 223) | def __init__(self): method encode (line 227) | def encode( method test (line 288) | def test(): FILE: examples/MyopicTrap/utils.py function compute_colbert_score (line 22) | def compute_colbert_score(q_reps, p_reps, q_mask=None): function convert_numpy_to_tensor (line 37) | def convert_numpy_to_tensor(output_1): function find_topk_via_faiss (line 59) | def find_topk_via_faiss(source_vecs: np.ndarray, target_vecs: np.ndarray... function find_topk_by_bm25 (line 66) | def find_topk_by_bm25( function find_topk_by_single_vecs (line 86) | def find_topk_by_single_vecs( function find_topk_by_multi_vecs (line 319) | def find_topk_by_multi_vecs( function find_topk_by_reranker (line 517) | def find_topk_by_reranker( FILE: examples/distill_llm_to_bert_reranker/create_distill_data.py function get_train_data (line 11) | def get_train_data(train_data_path): FILE: examples/distill_llm_to_bert_reranker/model_llm_generate.py class LLMGenerateDecoder (line 7) | class LLMGenerateDecoder(nn.Module): method __init__ (line 8) | def __init__( method forward (line 27) | def forward(self, batch, labels = None): method compute_score (line 41) | def compute_score( method preprocess (line 61) | def preprocess(self, sentences,max_len): method from_pretrained (line 81) | def from_pretrained( function test_relecance (line 103) | def test_relecance(): FILE: examples/stella_embedding_distill/create_distill_data.py function get_train_data (line 14) | def get_train_data(train_data_path): FILE: examples/synthetic_data_embedding/get_lm_probs_dataset.py class LMProb (line 16) | class LMProb: method __init__ (line 23) | def __init__(self, config): method run (line 29) | def run(self, dataset): method calculate_prob (line 54) | def calculate_prob(self, prompt, answer): function main (line 59) | def main( FILE: rag_retrieval/infer/reranker_models/cross_encoder_ranker.py class CorssEncoderRanker (line 13) | class CorssEncoderRanker(BaseRanker): method __init__ (line 14) | def __init__(self, method compute_score (line 43) | def compute_score(self, method rerank (line 77) | def rerank(self, method __max_length_truncation_rerank (line 99) | def __max_length_truncation_rerank(self, method __max_score_slice_rerank (line 119) | def __max_score_slice_rerank(self, method __reranker_tokenize_preproc (line 170) | def __reranker_tokenize_preproc(self, FILE: rag_retrieval/infer/reranker_models/llm_rankers.py function sigmoid (line 11) | def sigmoid(x): class LLMRanker (line 14) | class LLMRanker(BaseRanker): method __init__ (line 16) | def __init__(self, method compute_score (line 54) | def compute_score(self, method rerank (line 91) | def rerank(self, method __max_length_truncation_rerank (line 114) | def __max_length_truncation_rerank(self, method __max_score_slice_rerank (line 144) | def __max_score_slice_rerank(self, method __reranker_tokenize_preproc (line 200) | def __reranker_tokenize_preproc(self, method get_inputs (line 289) | def get_inputs(self, FILE: rag_retrieval/infer/reranker_models/ranker.py class BaseRanker (line 4) | class BaseRanker(ABC): method __init__ (line 6) | def __init__(self, method rerank (line 13) | def rerank( method compute_score (line 24) | def compute_score( FILE: rag_retrieval/infer/reranker_models/result.py class Result (line 5) | class Result(BaseModel): method check_score_or_rank_exists (line 12) | def check_score_or_rank_exists(cls, v, values): class RankedResults (line 18) | class RankedResults(BaseModel): method results_count (line 23) | def results_count(self) -> int: method top_k (line 27) | def top_k(self, k: int) -> List[Result]: method get_score_by_docid (line 41) | def get_score_by_docid(self, doc_id: Union[int, str]) -> Optional[float]: FILE: rag_retrieval/infer/reranker_models/utils.py function vprint (line 4) | def vprint(txt: str, verbose: int) -> None: function get_dtype (line 13) | def get_dtype( function get_device (line 35) | def get_device( FILE: rag_retrieval/reranker.py function _get_model_type (line 28) | def _get_model_type( function Reranker (line 60) | def Reranker( FILE: rag_retrieval/train/colbert/data.py class ColBERTDTripletataset (line 14) | class ColBERTDTripletataset(Dataset): method __init__ (line 15) | def __init__(self, method read_train_data (line 29) | def read_train_data(self,train_data_path): method __len__ (line 51) | def __len__(self): method __getitem__ (line 54) | def __getitem__(self,idx): method collate_fn (line 57) | def collate_fn(self,batch): function test_ColBERTDTripletataset (line 93) | def test_ColBERTDTripletataset(): FILE: rag_retrieval/train/colbert/model.py class ColBERT (line 12) | class ColBERT(nn.Module): method __init__ (line 13) | def __init__(self, method get_embedding (line 36) | def get_embedding(self,input_ids,attention_mask): method punctuation_padding_mask (line 52) | def punctuation_padding_mask(self,input_ids): method score (line 58) | def score(self,query_embedding,doc_embedding,query_attention_mask): method forward (line 69) | def forward( method compute_score (line 105) | def compute_score(self, method preprocess (line 128) | def preprocess(self, method save_pretrained (line 154) | def save_pretrained(self, method from_pretrained (line 168) | def from_pretrained( function test_relecance (line 192) | def test_relecance(): FILE: rag_retrieval/train/colbert/train_colbert.py function create_adamw_optimizer (line 15) | def create_adamw_optimizer( function parse_args (line 36) | def parse_args(): function main (line 68) | def main(): FILE: rag_retrieval/train/colbert/trainer.py class Trainer (line 23) | class Trainer: method __init__ (line 24) | def __init__( method train (line 59) | def train(self): method log_metrics (line 114) | def log_metrics(self, metrics: dict[str, float], step: int): method add_prefix (line 119) | def add_prefix(values: dict[str, Any], prefix: str): method get_checkpoint_dir (line 122) | def get_checkpoint_dir(self, current_epoch): function evaluate (line 146) | def evaluate( class DummyProgressBar (line 162) | class DummyProgressBar: method update (line 163) | def update(self, n: int = 1) -> None: method close (line 166) | def close(self) -> None: method set_description (line 169) | def set_description(self, description: str) -> None: class DistributedTqdmProgressBar (line 173) | class DistributedTqdmProgressBar: method __init__ (line 174) | def __init__(self, epochs: int, num_steps_per_epoch: int | None, **kwa... method on_epoch_start (line 181) | def on_epoch_start(self): method update (line 187) | def update(self, n: int = 1) -> None: method close (line 190) | def close(self) -> None: method on_epoch_end (line 193) | def on_epoch_end(self) -> None: method show_metrics (line 197) | def show_metrics(self, metrics: dict[str, float]) -> None: class LossTracker (line 204) | class LossTracker: method __init__ (line 205) | def __init__( method update (line 214) | def update(self, loss_tensor: torch.Tensor): method reset (line 219) | def reset(self): method on_epoch_end (line 223) | def on_epoch_end(self, reset: bool = True): method loss (line 229) | def loss(self) -> float: FILE: rag_retrieval/train/embedding/data.py class EmbeddingDataset (line 13) | class EmbeddingDataset(Dataset): method __init__ (line 14) | def __init__( method read_train_data (line 44) | def read_train_data(self, train_data_path): method __len__ (line 74) | def __len__(self): method __getitem__ (line 77) | def __getitem__(self, idx): method triplet_collate_fn (line 80) | def triplet_collate_fn(self, batch): method pair_collate_fn (line 103) | def pair_collate_fn(self, batch): method pair_score_collate_fn (line 121) | def pair_score_collate_fn(self, batch): class EmbeddingDistillDataset (line 142) | class EmbeddingDistillDataset(Dataset): method __init__ (line 143) | def __init__( method read_train_data (line 165) | def read_train_data(self, train_data_path): method __len__ (line 178) | def __len__(self): method __getitem__ (line 181) | def __getitem__(self, idx): method collate_fn (line 186) | def collate_fn(self, batch): function test_EmbeddingDataset (line 205) | def test_EmbeddingDataset(): function test_EmbeddingDistillDataset (line 222) | def test_EmbeddingDistillDataset(): FILE: rag_retrieval/train/embedding/model.py class Embedding (line 14) | class Embedding(nn.Module): method __init__ (line 15) | def __init__( method get_embedding (line 33) | def get_embedding(self, input_ids, attention_mask): method forward (line 41) | def forward( method pair_inbatch_softmax_loss (line 205) | def pair_inbatch_softmax_loss( method triplet_inbatch_softmax_loss (line 231) | def triplet_inbatch_softmax_loss( method pair_kl_loss (line 270) | def pair_kl_loss( method encode (line 296) | def encode( method preprocess (line 329) | def preprocess( method _text_length (line 342) | def _text_length(self, text): method save_pretrained (line 358) | def save_pretrained( method from_pretrained (line 366) | def from_pretrained( function test_model_embedding (line 408) | def test_model_embedding(): FILE: rag_retrieval/train/embedding/model_distill.py class DistillEmbedding (line 13) | class DistillEmbedding(nn.Module): method __init__ (line 14) | def __init__( method get_embedding (line 26) | def get_embedding(self, input_ids, attention_mask): method forward (line 33) | def forward( method cosine_embedding_loss (line 71) | def cosine_embedding_loss( method pair_inbatch_similarity_loss (line 84) | def pair_inbatch_similarity_loss( method pair_inbatch_triplet_loss (line 97) | def pair_inbatch_triplet_loss( method get_score_diff (line 108) | def get_score_diff( method encode (line 119) | def encode( method preprocess (line 152) | def preprocess( method _text_length (line 165) | def _text_length(self, text): method save_pretrained (line 181) | def save_pretrained( method from_pretrained (line 190) | def from_pretrained( function test_model_embedding (line 215) | def test_model_embedding(): FILE: rag_retrieval/train/embedding/train_embedding.py function create_adamw_optimizer (line 16) | def create_adamw_optimizer( function parse_args (line 37) | def parse_args(): function main (line 94) | def main(): FILE: rag_retrieval/train/embedding/trainer.py class Trainer (line 20) | class Trainer: method __init__ (line 21) | def __init__( method train (line 59) | def train(self): method log_metrics (line 154) | def log_metrics(self, metrics: dict[str, float], step: int): method add_prefix (line 159) | def add_prefix(values: dict[str, Any], prefix: str): method get_checkpoint_dir (line 162) | def get_checkpoint_dir(self, current_epoch, is_step=False): function evaluate (line 186) | def evaluate( class DummyProgressBar (line 214) | class DummyProgressBar: method update (line 215) | def update(self, n: int = 1) -> None: method close (line 218) | def close(self) -> None: method set_description (line 221) | def set_description(self, description: str) -> None: class DistributedTqdmProgressBar (line 225) | class DistributedTqdmProgressBar: method __init__ (line 226) | def __init__(self, accelerator, epochs: int, num_steps_per_epoch: int ... method on_epoch_start (line 233) | def on_epoch_start(self): method update (line 239) | def update(self, n: int = 1) -> None: method close (line 242) | def close(self) -> None: method on_epoch_end (line 245) | def on_epoch_end(self) -> None: method show_metrics (line 249) | def show_metrics(self, metrics: dict[str, float]) -> None: class LossTracker (line 256) | class LossTracker: method __init__ (line 257) | def __init__( method update (line 266) | def update(self, loss_tensor: torch.Tensor): method reset (line 271) | def reset(self): method on_epoch_end (line 275) | def on_epoch_end(self, reset: bool = True): method loss (line 281) | def loss(self) -> float: FILE: rag_retrieval/train/reranker/data.py class PointwiseRankerDataset (line 10) | class PointwiseRankerDataset(Dataset): method __init__ (line 11) | def __init__(self, data_path=None, label_key="label", target_model=Non... method read_data (line 26) | def read_data(self, data_path): method __len__ (line 51) | def __len__(self): method __getitem__ (line 54) | def __getitem__(self, idx): method collate_fn (line 57) | def collate_fn(self, batch): class GroupedRankerDataset (line 73) | class GroupedRankerDataset(Dataset): method __init__ (line 74) | def __init__(self, data_path=None, label_key=None, target_model=None, ... method read_data (line 89) | def read_data(self, data_path): method __len__ (line 155) | def __len__(self): method __getitem__ (line 158) | def __getitem__(self, idx): method collate_fn (line 161) | def collate_fn(self, batch): function test_PointwiseRankerDataset (line 179) | def test_PointwiseRankerDataset(): function test_GroupedRankerDataset (line 208) | def test_GroupedRankerDataset(): FILE: rag_retrieval/train/reranker/model_bert.py class CrossEncoder (line 9) | class CrossEncoder(nn.Module): method __init__ (line 10) | def __init__( method forward (line 27) | def forward(self, batch, labels=None): method compute_score (line 50) | def compute_score( method preprocess (line 71) | def preprocess(self, sentences_pairs, max_len): method from_pretrained (line 90) | def from_pretrained( method save_pretrained (line 112) | def save_pretrained(self, save_dir, safe_serialization=False): function test_CrossEncoder (line 127) | def test_CrossEncoder(): FILE: rag_retrieval/train/reranker/model_llm.py class LLMDecoder (line 8) | class LLMDecoder(nn.Module): method __init__ (line 9) | def __init__( method forward (line 36) | def forward(self, batch, labels=None): method compute_score (line 58) | def compute_score( method preprocess (line 80) | def preprocess(self, sentences_pairs, max_len): method from_pretrained (line 119) | def from_pretrained( method save_pretrained (line 160) | def save_pretrained(self, save_dir, safe_serialization=False): function test_LLMDecoder (line 175) | def test_LLMDecoder(): FILE: rag_retrieval/train/reranker/ranking_loss.py function pointwise_mse (line 6) | def pointwise_mse(logits, labels): function pointwise_bce (line 12) | def pointwise_bce(logits, labels): function pairwise_ranknet (line 16) | def pairwise_ranknet(logits, labels, group_size): function listwise_ce (line 51) | def listwise_ce(logits, labels, group_size): FILE: rag_retrieval/train/reranker/train_reranker.py function create_adamw_optimizer (line 16) | def create_adamw_optimizer( function parse_args (line 37) | def parse_args(): function main (line 104) | def main(): FILE: rag_retrieval/train/reranker/trainer.py class Trainer (line 21) | class Trainer: method __init__ (line 22) | def __init__( method train (line 59) | def train(self): method log_metrics (line 143) | def log_metrics(self, metrics: dict[str, float], step: int): method add_prefix (line 148) | def add_prefix(values: dict[str, Any], prefix: str): method get_checkpoint_dir (line 151) | def get_checkpoint_dir(self, current_epoch): function evaluate (line 183) | def evaluate( class DummyProgressBar (line 198) | class DummyProgressBar: method update (line 199) | def update(self, n: int = 1) -> None: method close (line 202) | def close(self) -> None: method set_description (line 205) | def set_description(self, description: str) -> None: class DistributedTqdmProgressBar (line 209) | class DistributedTqdmProgressBar: method __init__ (line 210) | def __init__( method on_epoch_start (line 219) | def on_epoch_start(self): method update (line 225) | def update(self, n: int = 1) -> None: method close (line 228) | def close(self) -> None: method on_epoch_end (line 231) | def on_epoch_end(self) -> None: method show_metrics (line 235) | def show_metrics(self, metrics: dict[str, float]) -> None: class LossTracker (line 242) | class LossTracker: method __init__ (line 243) | def __init__( method update (line 252) | def update(self, loss_tensor: torch.Tensor): method reset (line 257) | def reset(self): method on_epoch_end (line 261) | def on_epoch_end(self, reset: bool = True): method loss (line 267) | def loss(self) -> float: FILE: rag_retrieval/train/reranker/utils.py function map_label_to_continuous (line 4) | def map_label_to_continuous(label, min_label, max_label): function visualize_label_distribution (line 22) | def visualize_label_distribution(label_distribution): function shuffle_text (line 65) | def shuffle_text(text, shuffle_ratio=0.15): function create_adamw_optimizer_with_special_lr_groups (line 85) | def create_adamw_optimizer_with_special_lr_groups( FILE: tests/test_cross_encoder_reranker_bce.py function test_rag_retrieval_cross_encode (line 10) | def test_rag_retrieval_cross_encode(query,docs): function test_bce_cross_encoder (line 29) | def test_bce_cross_encoder(query,docs): FILE: tests/test_cross_encoder_reranker_bge.py function test_rag_retrieval_cross_encode (line 10) | def test_rag_retrieval_cross_encode(query,docs): function test_bge_cross_encode (line 29) | def test_bge_cross_encode(query,docs): FILE: tests/test_cross_encoder_reranker_bge_m3.py function test_rag_retrieval_cross_encode (line 10) | def test_rag_retrieval_cross_encode(query,docs): function test_bge_cross_encode (line 29) | def test_bge_cross_encode(query,docs): FILE: tests/test_llm_reranker_bge_cpm.py function test_rag_retrieval_cpm (line 10) | def test_rag_retrieval_cpm(query,docs): function test_bge_cpm (line 28) | def test_bge_cpm(query,docs): FILE: tests/test_llm_reranker_bge_gemma.py function test_rag_retrieval_gemma (line 10) | def test_rag_retrieval_gemma(query,docs): function test_bge_gemma (line 28) | def test_bge_gemma(query,docs):