SYMBOL INDEX (179 symbols across 29 files)

FILE: src/Args/base_args.py
  function get_args (line 17) | def get_args(args: Optional[Dict[str, Any]] = None):
  function save_args (line 74) | def save_args(args, file_format: str = "json") -> None:

FILE: src/Args/dpo_args.py
  class DPODatasetArguments (line 15) | class DPODatasetArguments(DatasetArguments):
  class DPOTrainingArguments (line 25) | class DPOTrainingArguments(TrainingArguments):
  class DPOArguments (line 40) | class DPOArguments:
    method __post_init__ (line 50) | def __post_init__(self):
    method table_beauty (line 68) | def table_beauty(self):
  function to_dict (line 116) | def to_dict(obj: Any) -> Dict[str, Any]:
  function save_args (line 143) | def save_args(args: DPOArguments, file_format: str = "json") -> None:

FILE: src/Args/sft_args.py
  class ExperimentArguments (line 18) | class ExperimentArguments:
    method generate_experiment_name (line 36) | def generate_experiment_name(self) -> str:
    method __post_init__ (line 39) | def __post_init__(self):
  class DatasetArguments (line 72) | class DatasetArguments:
    method __post_init__ (line 96) | def __post_init__(self):
  class ModelArguments (line 103) | class ModelArguments:
  class TrainingArguments (line 121) | class TrainingArguments:
    method __post_init__ (line 163) | def __post_init__(self):
  class InferenceArguments (line 169) | class InferenceArguments:
    method __post_init__ (line 200) | def __post_init__(self):
  class EnvironmentArguments (line 217) | class EnvironmentArguments:
  class SFTArguments (line 248) | class SFTArguments:
    method __post_init__ (line 258) | def __post_init__(self):
    method table_beauty (line 276) | def table_beauty(self):
  function to_dict (line 324) | def to_dict(obj: Any) -> Dict[str, Any]:

FILE: src/Dataset/data_util.py
  function batch_padding (line 19) | def batch_padding(
  function preprocess_conversation (line 53) | def preprocess_conversation(args: Any, df: pd.DataFrame) -> List[Dict]:
  function parse_system (line 129) | def parse_system(args: Any, system: str):
  function parse_prompt (line 141) | def parse_prompt(args: Any, prompt: str):
  function parse_response (line 148) | def parse_response(args: Any, response: str):
  function nested_dicts_to_dataframe (line 155) | def nested_dicts_to_dataframe(data, args):
  function worker_init_fn (line 220) | def worker_init_fn(worker_id: int) -> None:
  class OrderedDistributedSampler (line 235) | class OrderedDistributedSampler(Sampler):
    method __init__ (line 245) | def __init__(
    method __iter__ (line 272) | def __iter__(self):
    method __len__ (line 288) | def __len__(self):

FILE: src/Dataset/dataset.py
  function load_data (line 19) | def load_data(args):
  function read_data (line 65) | def read_data(file_path: str, args: Any) -> pd.DataFrame:
  function get_train_dataloader (line 110) | def get_train_dataloader(train_dataset: Any, args: Any):
  function get_valid_dataloader (line 166) | def get_valid_dataloader(valid_dataset: Any, args: Any):

FILE: src/Dataset/dpo_dataset.py
  class DPO_Dataset (line 11) | class DPO_Dataset(Dataset):
    method __init__ (line 16) | def __init__(self, conversations: pd.DataFrame, args: Any, mode: str =...
    method __len__ (line 34) | def __len__(self) -> int:
    method __getitem__ (line 37) | def __getitem__(self, idx: int) -> Dict:
    method get_encodings (line 116) | def get_encodings(self, input_text_dict: Dict[str, List[str]]):
    method _get_sample_encoding (line 149) | def _get_sample_encoding(self, system: str, prompt: str, chosen_respon...
    method encode (line 182) | def encode(tokenizer, text: str, max_length: int, truncation_side: str...
    method get_labels (line 194) | def get_labels(self, prompt_encodings, answer_encodings, prefix: str =...
    method pad_tokens (line 247) | def pad_tokens(

FILE: src/Dataset/sft_dataset.py
  class LLM_Dataset (line 11) | class LLM_Dataset(Dataset):
    method __init__ (line 16) | def __init__(self, conversations: pd.DataFrame, args: Any, mode: str =...
    method __len__ (line 33) | def __len__(self) -> int:
    method __getitem__ (line 36) | def __getitem__(self, idx: int) -> Dict:
    method get_encodings (line 103) | def get_encodings(self, input_text_dict: Dict[str, List[str]]):
    method _get_sample_encoding (line 133) | def _get_sample_encoding(self, system: str, prompt: str, answer: str) ...
    method encode (line 158) | def encode(tokenizer, text: str, max_length: int, truncation_side: str...
    method get_labels (line 170) | def get_labels(self, prompt_encodings, answer_encodings):
    method pad_tokens (line 206) | def pad_tokens(

FILE: src/Enviroment/env.py
  function Prepare_environment (line 23) | def Prepare_environment(args: Any) -> None:
  function wrap_model_distributed (line 90) | def wrap_model_distributed(
  function check_disk_space (line 165) | def check_disk_space(

FILE: src/Enviroment/env_deepspeed.py
  function get_ds_config (line 7) | def get_ds_config(args):
  function get_ds_config_from_file (line 65) | def get_ds_config_from_file(config_file: str):

FILE: src/Enviroment/env_utils.py
  function seed_everything (line 17) | def seed_everything(seed: int = 42) -> None:
  function sync_across_processes (line 28) | def sync_across_processes(

FILE: src/Evaluation/AI_utils.py
  function get_ai_template (line 43) | def get_ai_template(template_name: str) -> str:
  class AIEvaluator (line 61) | class AIEvaluator:
    method __init__ (line 62) | def __init__(self, args):
    method evaluate_response (line 78) | def evaluate_response(

FILE: src/Evaluation/eval.py
  function LLM_eval (line 16) | def LLM_eval(

FILE: src/Evaluation/eval_utils.py
  function eval_infer_result (line 14) | def eval_infer_result(
  function clean_output (line 62) | def clean_output(infer_result: Dict[str, Any], args: Any) -> Dict[str, A...
  function save_predictions (line 84) | def save_predictions(
  function get_end_conversation_ids (line 106) | def get_end_conversation_ids(conversations: List[Dict[str, List[Any]]]) ...
  function format_output (line 128) | def format_output(

FILE: src/Evaluation/infer.py
  function LLM_infer (line 17) | def LLM_infer(

FILE: src/Evaluation/infer_utils.py
  function batch_decode (line 8) | def batch_decode(args, output: Dict) -> Dict:
  function contains_nan (line 21) | def contains_nan(output: Dict):
  function no_type_check (line 35) | def no_type_check(arg):
  function cat_batches (line 62) | def cat_batches(

FILE: src/Main.py
  function main (line 32) | def main():

FILE: src/Model/dpo_model.py
  class DPO_LLM (line 16) | class DPO_LLM(nn.Module):
    method __init__ (line 17) | def __init__(self, args: Any):
    method init_deepspeed (line 38) | def init_deepspeed(self):
    method generate (line 53) | def generate(self, batch: Dict, args: Any, streamer=None):
    method get_position_ids (line 59) | def get_position_ids(attention_mask):
    method forward (line 64) | def forward(self, batch: Dict, padding: bool = True) -> Dict:
  function get_batch_logps (line 176) | def get_batch_logps(

FILE: src/Model/model_utils.py
  function get_llm_backbone (line 22) | def get_llm_backbone(args: Any) -> nn.Module:
  function update_backbone_config (line 175) | def update_backbone_config(config: Any, args: Any):
  function prepare_lora (line 219) | def prepare_lora(args, backbone):
  class TokenStoppingCriteria (line 276) | class TokenStoppingCriteria(StoppingCriteria):
    method __init__ (line 283) | def __init__(self, stop_word_ids, prompt_input_ids_len):
    method should_stop (line 290) | def should_stop(self, generated_ids: torch.Tensor, stop_word_id: torch...
    method get_num_vector_found_in_matrix_rows (line 302) | def get_num_vector_found_in_matrix_rows(vector, matrix):
    method __call__ (line 321) | def __call__(self, input_ids: torch.Tensor, scores: torch.FloatTensor,...
  class EnvVariableStoppingCriteria (line 334) | class EnvVariableStoppingCriteria(StoppingCriteria):
    method __call__ (line 342) | def __call__(self, input_ids: torch.Tensor, scores: torch.FloatTensor,...
  function contains_nan (line 349) | def contains_nan(output: Dict):
  function unwrap_model (line 363) | def unwrap_model(model: torch.nn.Module):
  function save_checkpoint (line 372) | def save_checkpoint(model: torch.nn.Module, path: str, args) -> None:
  function generate (line 425) | def generate(backbone, batch, args, streamer, remove_prompt=True):
  function set_generation_config (line 480) | def set_generation_config(backbone: torch.nn.Module, args: Any):

FILE: src/Model/sft_model.py
  class LLM (line 14) | class LLM(nn.Module):
    method __init__ (line 15) | def __init__(self, args: Any):
    method init_deepspeed (line 26) | def init_deepspeed(self):
    method generate (line 41) | def generate(self, batch: Dict, args: Any, streamer=None):
    method get_position_ids (line 47) | def get_position_ids(attention_mask):
    method forward (line 52) | def forward(self, batch: Dict, padding: bool = True) -> Dict:

FILE: src/Model/tokenizer.py
  function get_tokenizer (line 11) | def get_tokenizer(args: Any) -> PreTrainedTokenizer:
  function _add_missing_special_tokens (line 54) | def _add_missing_special_tokens(tokenizer: PreTrainedTokenizer) -> None:

FILE: src/Others/exceptions.py
  class CustomException (line 1) | class CustomException(Exception):
    method __init__ (line 10) | def __init__(self, message: str, exception_type: str = "Error"):
  class ArgumentException (line 21) | class ArgumentException(CustomException):
    method __init__ (line 22) | def __init__(self, message):
  class DataException (line 26) | class DataException(CustomException):
    method __init__ (line 27) | def __init__(self, message):
  class ModelException (line 31) | class ModelException(CustomException):
    method __init__ (line 32) | def __init__(self, message):
  class MetricException (line 36) | class MetricException(CustomException):
    method __init__ (line 37) | def __init__(self, message):
  class TrainingException (line 41) | class TrainingException(CustomException):
    method __init__ (line 42) | def __init__(self, message):
  class EnviromentException (line 46) | class EnviromentException(CustomException):
    method __init__ (line 47) | def __init__(self, message):

FILE: src/Train/dpo_loss_func.py
  class DPOLoss (line 14) | class DPOLoss(nn.Module):
    method __init__ (line 21) | def __init__(self, cfg: Any):
    method forward (line 27) | def forward(
    method get_losses (line 59) | def get_losses(self, logits: torch.FloatTensor) -> torch.Tensor:
  class LossFunction (line 77) | class LossFunction:
    method get_loss_function (line 84) | def get_loss_function(cls, name: str) -> nn.Module:
  function get_loss_func (line 103) | def get_loss_func(cfg: Any) -> nn.Module:

FILE: src/Train/lr_scheduler.py
  class Scheduler (line 9) | class Scheduler:
    method get_scheduler (line 19) | def get_scheduler(cls, name: str) -> callable:
  function get_scheduler (line 40) | def get_scheduler(

FILE: src/Train/metric.py
  function sacrebleu_score (line 18) | def sacrebleu_score(args: Any, results: Dict, valid_data: pd.DataFrame) ...
  function AI_eval_score (line 51) | def AI_eval_score(
  class Perplexity (line 129) | class Perplexity(nn.Module):
    method __init__ (line 138) | def __init__(self, args: Any, reduce: bool = True):
    method forward (line 144) | def forward(self, logits: torch.Tensor, labels: torch.Tensor) -> torch...
  function perplexity (line 169) | def perplexity(args: Any, results: Dict, valid_df: pd.DataFrame) -> np.n...
  class Metrics (line 184) | class Metrics:
    method get_metric (line 196) | def get_metric(cls, name: str) -> tuple:
  function get_metric (line 215) | def get_metric(args: Any) -> Callable:

FILE: src/Train/optimizer.py
  class Optimizers (line 7) | class Optimizers:
    method get_optimizer (line 19) | def get_optimizer(cls, name: str) -> torch.optim.Optimizer:
  function get_optimizer (line 38) | def get_optimizer(model: torch.nn.Module, args: Any) -> torch.optim.Opti...

FILE: src/Train/sft_loss_func.py
  class TokenAveragedCrossEntropyLoss (line 10) | class TokenAveragedCrossEntropyLoss(nn.Module):
    method __init__ (line 20) | def __init__(self, args: Any):
    method forward (line 25) | def forward(self, logits: torch.Tensor, labels: torch.Tensor) -> torch...
  class SampleAveragedCrossEntropyLoss (line 44) | class SampleAveragedCrossEntropyLoss(nn.Module):
    method __init__ (line 54) | def __init__(self, args: Any):
    method forward (line 59) | def forward(self, logits: torch.Tensor, labels: torch.Tensor) -> torch...
  class CrossEntropyLoss (line 83) | class CrossEntropyLoss(nn.Module):
    method __init__ (line 91) | def __init__(self, args: Any):
    method forward (line 96) | def forward(self, logits: torch.Tensor, labels: torch.Tensor) -> torch...
  class BinaryCrossEntropyLoss (line 110) | class BinaryCrossEntropyLoss(nn.Module):
    method __init__ (line 118) | def __init__(self, args: Any):
    method forward (line 123) | def forward(self, logits: torch.Tensor, labels: torch.Tensor) -> torch...
  class LossFunction (line 137) | class LossFunction:
    method get_loss_function (line 148) | def get_loss_function(cls, name: str) -> nn.Module:
  function get_loss_func (line 167) | def get_loss_func(args: Any) -> nn.Module:

FILE: src/Train/train.py
  function LLM_train (line 21) | def LLM_train(

FILE: src/Train/train_utils.py
  function batch_to_device (line 9) | def batch_to_device(
  function calculate_steps (line 36) | def calculate_steps(args, train_dataloader, valid_dataloader):
  function compile_model (line 76) | def compile_model(model: torch.nn.Module, args) -> torch.nn.Module:
  function get_torch_dtype (line 101) | def get_torch_dtype(dtype: str) -> torch.dtype:

FILE: src/Utils/utils.py
  function seed_everything (line 19) | def seed_everything(seed: int = 42) -> None:
  class LocalRankFilter (line 42) | class LocalRankFilter(logging.Filter):
    method filter (line 43) | def filter(self, record):
  function get_logger (line 57) | def get_logger(args):
  class WandbHandler (line 116) | class WandbHandler(logging.Handler):
    method __init__ (line 117) | def __init__(self, args):
    method setup_wandb (line 123) | def setup_wandb(self):
    method emit (line 134) | def emit(self, record):
  class TqdmTologger (line 139) | class TqdmTologger(io.StringIO):
    method __init__ (line 148) | def __init__(self, logger):
    method write (line 153) | def write(self, buf):
    method flush (line 156) | def flush(self):
  function flatten_dict (line 164) | def flatten_dict(nested_dict):