SYMBOL INDEX (179 symbols across 29 files) FILE: src/Args/base_args.py function get_args (line 17) | def get_args(args: Optional[Dict[str, Any]] = None): function save_args (line 74) | def save_args(args, file_format: str = "json") -> None: FILE: src/Args/dpo_args.py class DPODatasetArguments (line 15) | class DPODatasetArguments(DatasetArguments): class DPOTrainingArguments (line 25) | class DPOTrainingArguments(TrainingArguments): class DPOArguments (line 40) | class DPOArguments: method __post_init__ (line 50) | def __post_init__(self): method table_beauty (line 68) | def table_beauty(self): function to_dict (line 116) | def to_dict(obj: Any) -> Dict[str, Any]: function save_args (line 143) | def save_args(args: DPOArguments, file_format: str = "json") -> None: FILE: src/Args/sft_args.py class ExperimentArguments (line 18) | class ExperimentArguments: method generate_experiment_name (line 36) | def generate_experiment_name(self) -> str: method __post_init__ (line 39) | def __post_init__(self): class DatasetArguments (line 72) | class DatasetArguments: method __post_init__ (line 96) | def __post_init__(self): class ModelArguments (line 103) | class ModelArguments: class TrainingArguments (line 121) | class TrainingArguments: method __post_init__ (line 163) | def __post_init__(self): class InferenceArguments (line 169) | class InferenceArguments: method __post_init__ (line 200) | def __post_init__(self): class EnvironmentArguments (line 217) | class EnvironmentArguments: class SFTArguments (line 248) | class SFTArguments: method __post_init__ (line 258) | def __post_init__(self): method table_beauty (line 276) | def table_beauty(self): function to_dict (line 324) | def to_dict(obj: Any) -> Dict[str, Any]: FILE: src/Dataset/data_util.py function batch_padding (line 19) | def batch_padding( function preprocess_conversation (line 53) | def preprocess_conversation(args: Any, df: pd.DataFrame) -> List[Dict]: function parse_system (line 129) | def parse_system(args: Any, system: str): function parse_prompt (line 141) | def parse_prompt(args: Any, prompt: str): function parse_response (line 148) | def parse_response(args: Any, response: str): function nested_dicts_to_dataframe (line 155) | def nested_dicts_to_dataframe(data, args): function worker_init_fn (line 220) | def worker_init_fn(worker_id: int) -> None: class OrderedDistributedSampler (line 235) | class OrderedDistributedSampler(Sampler): method __init__ (line 245) | def __init__( method __iter__ (line 272) | def __iter__(self): method __len__ (line 288) | def __len__(self): FILE: src/Dataset/dataset.py function load_data (line 19) | def load_data(args): function read_data (line 65) | def read_data(file_path: str, args: Any) -> pd.DataFrame: function get_train_dataloader (line 110) | def get_train_dataloader(train_dataset: Any, args: Any): function get_valid_dataloader (line 166) | def get_valid_dataloader(valid_dataset: Any, args: Any): FILE: src/Dataset/dpo_dataset.py class DPO_Dataset (line 11) | class DPO_Dataset(Dataset): method __init__ (line 16) | def __init__(self, conversations: pd.DataFrame, args: Any, mode: str =... method __len__ (line 34) | def __len__(self) -> int: method __getitem__ (line 37) | def __getitem__(self, idx: int) -> Dict: method get_encodings (line 116) | def get_encodings(self, input_text_dict: Dict[str, List[str]]): method _get_sample_encoding (line 149) | def _get_sample_encoding(self, system: str, prompt: str, chosen_respon... method encode (line 182) | def encode(tokenizer, text: str, max_length: int, truncation_side: str... method get_labels (line 194) | def get_labels(self, prompt_encodings, answer_encodings, prefix: str =... method pad_tokens (line 247) | def pad_tokens( FILE: src/Dataset/sft_dataset.py class LLM_Dataset (line 11) | class LLM_Dataset(Dataset): method __init__ (line 16) | def __init__(self, conversations: pd.DataFrame, args: Any, mode: str =... method __len__ (line 33) | def __len__(self) -> int: method __getitem__ (line 36) | def __getitem__(self, idx: int) -> Dict: method get_encodings (line 103) | def get_encodings(self, input_text_dict: Dict[str, List[str]]): method _get_sample_encoding (line 133) | def _get_sample_encoding(self, system: str, prompt: str, answer: str) ... method encode (line 158) | def encode(tokenizer, text: str, max_length: int, truncation_side: str... method get_labels (line 170) | def get_labels(self, prompt_encodings, answer_encodings): method pad_tokens (line 206) | def pad_tokens( FILE: src/Enviroment/env.py function Prepare_environment (line 23) | def Prepare_environment(args: Any) -> None: function wrap_model_distributed (line 90) | def wrap_model_distributed( function check_disk_space (line 165) | def check_disk_space( FILE: src/Enviroment/env_deepspeed.py function get_ds_config (line 7) | def get_ds_config(args): function get_ds_config_from_file (line 65) | def get_ds_config_from_file(config_file: str): FILE: src/Enviroment/env_utils.py function seed_everything (line 17) | def seed_everything(seed: int = 42) -> None: function sync_across_processes (line 28) | def sync_across_processes( FILE: src/Evaluation/AI_utils.py function get_ai_template (line 43) | def get_ai_template(template_name: str) -> str: class AIEvaluator (line 61) | class AIEvaluator: method __init__ (line 62) | def __init__(self, args): method evaluate_response (line 78) | def evaluate_response( FILE: src/Evaluation/eval.py function LLM_eval (line 16) | def LLM_eval( FILE: src/Evaluation/eval_utils.py function eval_infer_result (line 14) | def eval_infer_result( function clean_output (line 62) | def clean_output(infer_result: Dict[str, Any], args: Any) -> Dict[str, A... function save_predictions (line 84) | def save_predictions( function get_end_conversation_ids (line 106) | def get_end_conversation_ids(conversations: List[Dict[str, List[Any]]]) ... function format_output (line 128) | def format_output( FILE: src/Evaluation/infer.py function LLM_infer (line 17) | def LLM_infer( FILE: src/Evaluation/infer_utils.py function batch_decode (line 8) | def batch_decode(args, output: Dict) -> Dict: function contains_nan (line 21) | def contains_nan(output: Dict): function no_type_check (line 35) | def no_type_check(arg): function cat_batches (line 62) | def cat_batches( FILE: src/Main.py function main (line 32) | def main(): FILE: src/Model/dpo_model.py class DPO_LLM (line 16) | class DPO_LLM(nn.Module): method __init__ (line 17) | def __init__(self, args: Any): method init_deepspeed (line 38) | def init_deepspeed(self): method generate (line 53) | def generate(self, batch: Dict, args: Any, streamer=None): method get_position_ids (line 59) | def get_position_ids(attention_mask): method forward (line 64) | def forward(self, batch: Dict, padding: bool = True) -> Dict: function get_batch_logps (line 176) | def get_batch_logps( FILE: src/Model/model_utils.py function get_llm_backbone (line 22) | def get_llm_backbone(args: Any) -> nn.Module: function update_backbone_config (line 175) | def update_backbone_config(config: Any, args: Any): function prepare_lora (line 219) | def prepare_lora(args, backbone): class TokenStoppingCriteria (line 276) | class TokenStoppingCriteria(StoppingCriteria): method __init__ (line 283) | def __init__(self, stop_word_ids, prompt_input_ids_len): method should_stop (line 290) | def should_stop(self, generated_ids: torch.Tensor, stop_word_id: torch... method get_num_vector_found_in_matrix_rows (line 302) | def get_num_vector_found_in_matrix_rows(vector, matrix): method __call__ (line 321) | def __call__(self, input_ids: torch.Tensor, scores: torch.FloatTensor,... class EnvVariableStoppingCriteria (line 334) | class EnvVariableStoppingCriteria(StoppingCriteria): method __call__ (line 342) | def __call__(self, input_ids: torch.Tensor, scores: torch.FloatTensor,... function contains_nan (line 349) | def contains_nan(output: Dict): function unwrap_model (line 363) | def unwrap_model(model: torch.nn.Module): function save_checkpoint (line 372) | def save_checkpoint(model: torch.nn.Module, path: str, args) -> None: function generate (line 425) | def generate(backbone, batch, args, streamer, remove_prompt=True): function set_generation_config (line 480) | def set_generation_config(backbone: torch.nn.Module, args: Any): FILE: src/Model/sft_model.py class LLM (line 14) | class LLM(nn.Module): method __init__ (line 15) | def __init__(self, args: Any): method init_deepspeed (line 26) | def init_deepspeed(self): method generate (line 41) | def generate(self, batch: Dict, args: Any, streamer=None): method get_position_ids (line 47) | def get_position_ids(attention_mask): method forward (line 52) | def forward(self, batch: Dict, padding: bool = True) -> Dict: FILE: src/Model/tokenizer.py function get_tokenizer (line 11) | def get_tokenizer(args: Any) -> PreTrainedTokenizer: function _add_missing_special_tokens (line 54) | def _add_missing_special_tokens(tokenizer: PreTrainedTokenizer) -> None: FILE: src/Others/exceptions.py class CustomException (line 1) | class CustomException(Exception): method __init__ (line 10) | def __init__(self, message: str, exception_type: str = "Error"): class ArgumentException (line 21) | class ArgumentException(CustomException): method __init__ (line 22) | def __init__(self, message): class DataException (line 26) | class DataException(CustomException): method __init__ (line 27) | def __init__(self, message): class ModelException (line 31) | class ModelException(CustomException): method __init__ (line 32) | def __init__(self, message): class MetricException (line 36) | class MetricException(CustomException): method __init__ (line 37) | def __init__(self, message): class TrainingException (line 41) | class TrainingException(CustomException): method __init__ (line 42) | def __init__(self, message): class EnviromentException (line 46) | class EnviromentException(CustomException): method __init__ (line 47) | def __init__(self, message): FILE: src/Train/dpo_loss_func.py class DPOLoss (line 14) | class DPOLoss(nn.Module): method __init__ (line 21) | def __init__(self, cfg: Any): method forward (line 27) | def forward( method get_losses (line 59) | def get_losses(self, logits: torch.FloatTensor) -> torch.Tensor: class LossFunction (line 77) | class LossFunction: method get_loss_function (line 84) | def get_loss_function(cls, name: str) -> nn.Module: function get_loss_func (line 103) | def get_loss_func(cfg: Any) -> nn.Module: FILE: src/Train/lr_scheduler.py class Scheduler (line 9) | class Scheduler: method get_scheduler (line 19) | def get_scheduler(cls, name: str) -> callable: function get_scheduler (line 40) | def get_scheduler( FILE: src/Train/metric.py function sacrebleu_score (line 18) | def sacrebleu_score(args: Any, results: Dict, valid_data: pd.DataFrame) ... function AI_eval_score (line 51) | def AI_eval_score( class Perplexity (line 129) | class Perplexity(nn.Module): method __init__ (line 138) | def __init__(self, args: Any, reduce: bool = True): method forward (line 144) | def forward(self, logits: torch.Tensor, labels: torch.Tensor) -> torch... function perplexity (line 169) | def perplexity(args: Any, results: Dict, valid_df: pd.DataFrame) -> np.n... class Metrics (line 184) | class Metrics: method get_metric (line 196) | def get_metric(cls, name: str) -> tuple: function get_metric (line 215) | def get_metric(args: Any) -> Callable: FILE: src/Train/optimizer.py class Optimizers (line 7) | class Optimizers: method get_optimizer (line 19) | def get_optimizer(cls, name: str) -> torch.optim.Optimizer: function get_optimizer (line 38) | def get_optimizer(model: torch.nn.Module, args: Any) -> torch.optim.Opti... FILE: src/Train/sft_loss_func.py class TokenAveragedCrossEntropyLoss (line 10) | class TokenAveragedCrossEntropyLoss(nn.Module): method __init__ (line 20) | def __init__(self, args: Any): method forward (line 25) | def forward(self, logits: torch.Tensor, labels: torch.Tensor) -> torch... class SampleAveragedCrossEntropyLoss (line 44) | class SampleAveragedCrossEntropyLoss(nn.Module): method __init__ (line 54) | def __init__(self, args: Any): method forward (line 59) | def forward(self, logits: torch.Tensor, labels: torch.Tensor) -> torch... class CrossEntropyLoss (line 83) | class CrossEntropyLoss(nn.Module): method __init__ (line 91) | def __init__(self, args: Any): method forward (line 96) | def forward(self, logits: torch.Tensor, labels: torch.Tensor) -> torch... class BinaryCrossEntropyLoss (line 110) | class BinaryCrossEntropyLoss(nn.Module): method __init__ (line 118) | def __init__(self, args: Any): method forward (line 123) | def forward(self, logits: torch.Tensor, labels: torch.Tensor) -> torch... class LossFunction (line 137) | class LossFunction: method get_loss_function (line 148) | def get_loss_function(cls, name: str) -> nn.Module: function get_loss_func (line 167) | def get_loss_func(args: Any) -> nn.Module: FILE: src/Train/train.py function LLM_train (line 21) | def LLM_train( FILE: src/Train/train_utils.py function batch_to_device (line 9) | def batch_to_device( function calculate_steps (line 36) | def calculate_steps(args, train_dataloader, valid_dataloader): function compile_model (line 76) | def compile_model(model: torch.nn.Module, args) -> torch.nn.Module: function get_torch_dtype (line 101) | def get_torch_dtype(dtype: str) -> torch.dtype: FILE: src/Utils/utils.py function seed_everything (line 19) | def seed_everything(seed: int = 42) -> None: class LocalRankFilter (line 42) | class LocalRankFilter(logging.Filter): method filter (line 43) | def filter(self, record): function get_logger (line 57) | def get_logger(args): class WandbHandler (line 116) | class WandbHandler(logging.Handler): method __init__ (line 117) | def __init__(self, args): method setup_wandb (line 123) | def setup_wandb(self): method emit (line 134) | def emit(self, record): class TqdmTologger (line 139) | class TqdmTologger(io.StringIO): method __init__ (line 148) | def __init__(self, logger): method write (line 153) | def write(self, buf): method flush (line 156) | def flush(self): function flatten_dict (line 164) | def flatten_dict(nested_dict):