SYMBOL INDEX (1225 symbols across 64 files)

FILE: src/data/data.py
  function chatglm3_encode (line 21) | def chatglm3_encode(tokenizer: PreTrainedTokenizerBase,
  function chatglm2_encode (line 83) | def chatglm2_encode(tokenizer: PreTrainedTokenizerBase,
  class DataCollatorReward (line 147) | class DataCollatorReward:
    method __call__ (line 148) | def __call__(self, data):
  class DataCollatorRLHF (line 160) | class DataCollatorRLHF:
    method __init__ (line 162) | def __init__(self, max_token_len, inference_tp_size):
    method __call__ (line 166) | def __call__(self, data):
  class PretrainDataset (line 197) | class PretrainDataset(Dataset):
    method __init__ (line 198) | def __init__(self, args, filename, tokenizer, concat_samples=True):
    method __len__ (line 209) | def __len__(self):
    method __getitem__ (line 212) | def __getitem__(self, idx):
    method load_dataset (line 306) | def load_dataset(self, filename):
  class SFTDataset (line 351) | class SFTDataset(Dataset):
    method __init__ (line 352) | def __init__(self, args, filename, tokenizer, concat_samples=True):
    method __len__ (line 363) | def __len__(self):
    method __getitem__ (line 366) | def __getitem__(self, idx):
    method load_dataset (line 509) | def load_dataset(self, filename):
  class PairwiseDataset (line 563) | class PairwiseDataset(Dataset):
    method __init__ (line 564) | def __init__(self, args, filename, tokenizer):
    method __len__ (line 572) | def __len__(self):
    method __getitem__ (line 575) | def __getitem__(self, idx):
    method load_dataset (line 683) | def load_dataset(filename):
  class RLHFDataset (line 721) | class RLHFDataset(Dataset):
    method __init__ (line 722) | def __init__(self, args, filename, tokenizer):
    method __len__ (line 731) | def __len__(self):
    method __getitem__ (line 734) | def __getitem__(self, idx):
    method load_dataset (line 781) | def load_dataset(filename):
  class PPODataset (line 804) | class PPODataset:
    method __init__ (line 805) | def __init__(self, max_size, small_batch_size):
    method separate (line 810) | def separate(self):
    method add (line 834) | def add(self, data):
    method free (line 846) | def free(self):
  class DPODataset (line 850) | class DPODataset(Dataset):
    method __init__ (line 851) | def __init__(self, args, filename, tokenizer):
    method __len__ (line 859) | def __len__(self):
    method __getitem__ (line 862) | def __getitem__(self, idx):
    method load_dataset (line 928) | def load_dataset(filename):
  class OCNLIDataset (line 969) | class OCNLIDataset(Dataset):
    method __init__ (line 970) | def __init__(self, args, eval_filename, tokenizer, train_filename=None):
    method __len__ (line 983) | def __len__(self):
    method __getitem__ (line 986) | def __getitem__(self, idx):
    method load_dataset (line 1017) | def load_dataset(self, filename):
  class CMNLIDataset (line 1040) | class CMNLIDataset(Dataset):
    method __init__ (line 1041) | def __init__(self, args, eval_filename, tokenizer, train_filename=None):
    method __len__ (line 1054) | def __len__(self):
    method __getitem__ (line 1057) | def __getitem__(self, idx):
    method load_dataset (line 1090) | def load_dataset(self, filename):
  class CHIDDataset (line 1113) | class CHIDDataset(Dataset):
    method __init__ (line 1114) | def __init__(self, args, eval_filename, tokenizer, train_filename=None):
    method __len__ (line 1127) | def __len__(self):
    method __getitem__ (line 1130) | def __getitem__(self, idx):
    method load_dataset (line 1165) | def load_dataset(self, filename):
    method load_idiom_dict (line 1186) | def load_idiom_dict(self):
  class CMRCDataset (line 1195) | class CMRCDataset(Dataset):
    method __init__ (line 1196) | def __init__(self, args, eval_filename, tokenizer, train_filename=None):
    method __len__ (line 1208) | def __len__(self):
    method __getitem__ (line 1211) | def __getitem__(self, idx):
    method load_dataset (line 1242) | def load_dataset(self, filename):
  class CLUEWSCDataset (line 1267) | class CLUEWSCDataset(Dataset):
    method __init__ (line 1268) | def __init__(self, args, eval_filename, tokenizer, train_filename=None):
    method __len__ (line 1281) | def __len__(self):
    method __getitem__ (line 1284) | def __getitem__(self, idx):
    method load_dataset (line 1317) | def load_dataset(self, filename):
  class C3Dataset (line 1338) | class C3Dataset(Dataset):
    method __init__ (line 1339) | def __init__(self, args, eval_filename, tokenizer, train_filename=None):
    method __len__ (line 1351) | def __len__(self):
    method __getitem__ (line 1354) | def __getitem__(self, idx):
    method load_dataset (line 1389) | def load_dataset(self, filename):
  class AFQMCDataset (line 1412) | class AFQMCDataset(Dataset):
    method __init__ (line 1413) | def __init__(self, args, eval_filename, tokenizer, train_filename=None):
    method __len__ (line 1426) | def __len__(self):
    method __getitem__ (line 1429) | def __getitem__(self, idx):
    method load_dataset (line 1462) | def load_dataset(self, filename):
  class CSLDataset (line 1482) | class CSLDataset(Dataset):
    method __init__ (line 1483) | def __init__(self, args, eval_filename, tokenizer, train_filename=None):
    method __len__ (line 1496) | def __len__(self):
    method __getitem__ (line 1499) | def __getitem__(self, idx):
    method load_dataset (line 1532) | def load_dataset(self, filename):
  class IFLYTEKDataset (line 1552) | class IFLYTEKDataset(Dataset):
    method __init__ (line 1553) | def __init__(self, args, eval_filename, tokenizer, train_filename=None):
    method __len__ (line 1587) | def __len__(self):
    method __getitem__ (line 1590) | def __getitem__(self, idx):
    method load_dataset (line 1625) | def load_dataset(self, filename):
  class TNEWSDataset (line 1648) | class TNEWSDataset(Dataset):
    method __init__ (line 1649) | def __init__(self, args, eval_filename, tokenizer, train_filename=None):
    method __len__ (line 1676) | def __len__(self):
    method __getitem__ (line 1679) | def __getitem__(self, idx):
    method load_dataset (line 1714) | def load_dataset(self, filename):
  class CEvalDataset (line 1737) | class CEvalDataset(Dataset):
    method __init__ (line 1738) | def __init__(self, args, eval_filename, tokenizer, train_filename=None):
    method __len__ (line 1754) | def __len__(self):
    method format_example (line 1757) | def format_example(self, line, include_answer=True, cot=False):
    method __getitem__ (line 1785) | def __getitem__(self, idx):
    method load_dataset (line 1863) | def load_dataset(self, filename, return_format="list"):
  class MMLUDataset (line 1887) | class MMLUDataset(Dataset):
    method __init__ (line 1888) | def __init__(self, args, eval_filename, tokenizer, train_filename=None):
    method __len__ (line 1904) | def __len__(self):
    method format_example (line 1907) | def format_example(self, line, include_answer=True):
    method __getitem__ (line 1926) | def __getitem__(self, idx):
    method load_dataset (line 2003) | def load_dataset(self, filename, return_format="list"):

FILE: src/data/data_types.py
  class PromptElement (line 9) | class PromptElement:
  class PromptBatch (line 25) | class PromptBatch:
  class AccelerateRLElement (line 41) | class AccelerateRLElement:
  class AccelerateRLBatchElement (line 57) | class AccelerateRLBatchElement:
  class PPORLElement (line 73) | class PPORLElement:
  class PPORLBatch (line 107) | class PPORLBatch:

FILE: src/data/pipeline.py
  class GeneralElement (line 25) | class GeneralElement:
  class RLElement (line 34) | class RLElement:
  class BatchElement (line 45) | class BatchElement:
  class GLMDataCollator (line 55) | class GLMDataCollator:
    method __call__ (line 63) | def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]:
  function register_datapipeline (line 102) | def register_datapipeline(name):
  class BasePipeline (line 125) | class BasePipeline(Dataset):
    method __init__ (line 126) | def __init__(self, path: str = "dataset"):
    method __getitem__ (line 130) | def __getitem__(self, index: int) -> GeneralElement:
    method __len__ (line 134) | def __len__(self) -> int:
    method create_loader (line 138) | def create_loader(
  class BaseRolloutStore (line 153) | class BaseRolloutStore(Dataset):
    method __init__ (line 154) | def __init__(self, capacity=-1):
    method push (line 159) | def push(self, exps: Iterable[Any]):
    method __getitem__ (line 165) | def __getitem__(self, index: int) -> RLElement:
    method __len__ (line 168) | def __len__(self) -> int:
    method create_loader (line 172) | def create_loader(
  class PanguPipeline (line 189) | class PanguPipeline(BasePipeline):
    method __init__ (line 190) | def __init__(self, prompts: List[dict], config: TRLConfig, tokenizer: ...
    method __len__ (line 199) | def __len__(self):
    method __getitem__ (line 202) | def __getitem__(self, idx):
    method create_loader (line 219) | def create_loader(self, batch_size: int, shuffle=False) -> DataLoader:
  class GLMPipeline (line 224) | class GLMPipeline(BasePipeline):
    method __init__ (line 225) | def __init__(self, prompts: List[dict], config: TRLConfig, tokenizer: ...
    method __len__ (line 235) | def __len__(self):
    method __getitem__ (line 238) | def __getitem__(self, idx):
    method create_loader (line 257) | def create_loader(self, batch_size: int, shuffle=False) -> DataLoader:
  class ChatGLMPipeline (line 263) | class ChatGLMPipeline(BasePipeline):
    method __init__ (line 264) | def __init__(self, prompts: List[dict], config: TRLConfig, tokenizer: ...
    method __len__ (line 273) | def __len__(self):
    method __getitem__ (line 276) | def __getitem__(self, idx):
    method create_loader (line 287) | def create_loader(self, batch_size: int, shuffle=False) -> DataLoader:
  class PPORolloutStorage (line 291) | class PPORolloutStorage(BaseRolloutStore):
    method __init__ (line 296) | def __init__(self, pad_token_id):
    method push (line 302) | def push(self, exps: Iterable[PPORLElement]):
    method clear_history (line 305) | def clear_history(self):
    method export_history (line 308) | def export_history(self, location: str):
    method __getitem__ (line 320) | def __getitem__(self, index: int) -> PPORLElement:
    method __len__ (line 323) | def __len__(self) -> int:
    method create_loader (line 326) | def create_loader(

FILE: src/data_prepare.py
  function weibo_summary_comment (line 24) | def weibo_summary_comment(args, tokenizer):
  function couplets (line 61) | def couplets(args, tokenizer):
  function zhidao (line 117) | def zhidao(args, tokenizer):
  function chinese_classical (line 159) | def chinese_classical(args, tokenizer):
  function chinese_poetry (line 221) | def chinese_poetry(args, tokenizer):
  function baike_qa_2019 (line 372) | def baike_qa_2019(args, tokenizer):
  function get_parser (line 404) | def get_parser():
  function main (line 418) | def main():

FILE: src/eval_pretrain.py
  function get_parser (line 60) | def get_parser():
  function extract_cot_answer (line 101) | def extract_cot_answer(line, response):
  function main (line 106) | def main():

FILE: src/models/loss.py
  class PairWiseLoss (line 6) | class PairWiseLoss(nn.Module):
    method forward (line 11) | def forward(self, chosen_reward: torch.Tensor, reject_reward: torch.Te...

FILE: src/models/ppo.py
  class PreTrainedModelWrapper (line 51) | class PreTrainedModelWrapper(nn.Module, transformers.utils.PushToHubMixin):
    method __init__ (line 76) | def __init__(self, base_model: Optional[transformers.PreTrainedModel] ...
    method _split_kwargs (line 83) | def _split_kwargs(cls, kwargs: Dict[str, Any]):
    method from_config (line 97) | def from_config(cls, config: transformers.PretrainedConfig, **kwargs):
    method from_pretrained (line 118) | def from_pretrained(  # noqa: max-complexity
    method save_pretrained (line 210) | def save_pretrained(self, *args, **kwargs):
    method state_dict (line 231) | def state_dict(self, *args, **kwargs):
    method post_init (line 235) | def post_init(self, *args, **kwargs):
    method get_compatible_forward_kwargs (line 242) | def get_compatible_forward_kwargs(self, **kwargs) -> Dict[str, Any]:
  class AdaptiveKLController (line 252) | class AdaptiveKLController:
    method __init__ (line 258) | def __init__(self, init_kl_coef: float, target: float, horizon: int):
    method update (line 263) | def update(self, current: float, n_steps: int):
  class FixedKLController (line 273) | class FixedKLController:
    method __init__ (line 276) | def __init__(self, kl_coef):
    method update (line 279) | def update(self, current: float, n_steps: int):
  class CausalLMOutputWithValue (line 289) | class CausalLMOutputWithValue(ModelOutput):
  class AutoModelForCausalLMWithValueHead (line 299) | class AutoModelForCausalLMWithValueHead(PreTrainedModelWrapper):
    method __init__ (line 308) | def __init__(
    method forward (line 316) | def forward(
    method generate (line 353) | def generate(self, *args, **kwargs) -> Union[ModelOutput, torch.LongTe...
    method state_dict (line 356) | def state_dict(self, *args, **kwargs):
    method post_init (line 367) | def post_init(self, state_dict):
  class AutoModelForCausalLMWithHydraValueHead (line 381) | class AutoModelForCausalLMWithHydraValueHead(AutoModelForCausalLMWithVal...
    method __init__ (line 385) | def __init__(
    method forward_hydra (line 414) | def forward_hydra(
    method from_pretrained (line 457) | def from_pretrained(  # noqa: max-complexity
  class ModelBranch (line 564) | class ModelBranch(transformers.PreTrainedModel):
    method __init__ (line 569) | def __init__(
  class GPTModelBranch (line 599) | class GPTModelBranch(ModelBranch):
    method forward (line 600) | def forward(  # noqa: max-complexity
  class OPTModelBranch (line 733) | class OPTModelBranch(ModelBranch):
    method forward (line 734) | def forward(  # noqa: max-complexity
  class BloomModelBranch (line 853) | class BloomModelBranch(ModelBranch):
    method forward (line 854) | def forward(  # noqa: max-complexity
  class Seq2SeqLMOutputWithValue (line 971) | class Seq2SeqLMOutputWithValue(ModelOutput):
  class AutoModelForSeq2SeqLMWithValueHead (line 984) | class AutoModelForSeq2SeqLMWithValueHead(PreTrainedModelWrapper):
    method __init__ (line 993) | def __init__(
    method forward (line 1001) | def forward(
    method generate (line 1053) | def generate(self, *args, **kwargs) -> Union[ModelOutput, torch.LongTe...
    method state_dict (line 1056) | def state_dict(self, *args, **kwargs):
    method post_init (line 1067) | def post_init(self, state_dict):
  class AutoModelForSeq2SeqLMWithHydraValueHead (line 1081) | class AutoModelForSeq2SeqLMWithHydraValueHead(AutoModelForSeq2SeqLMWithV...
    method __init__ (line 1085) | def __init__(
    method forward_hydra (line 1103) | def forward_hydra(
    method from_pretrained (line 1161) | def from_pretrained(  # noqa: max-complexity
  class T5Branch (line 1268) | class T5Branch(ModelBranch):
    method __init__ (line 1271) | def __init__(
    method forward (line 1281) | def forward(  # noqa: max-complexity
  function hf_get_branch_class (line 1381) | def hf_get_branch_class(

FILE: src/models/reward.py
  class RewardModel (line 7) | class RewardModel(PreTrainedModel):
    method __init__ (line 10) | def __init__(self, config, model, tokenizer):
    method gradient_checkpointing_enable (line 24) | def gradient_checkpointing_enable(self):
    method gradient_checkpointing_disable (line 27) | def gradient_checkpointing_disable(self):
    method _set_gradient_checkpointing (line 30) | def _set_gradient_checkpointing(self, module, value=False):
    method reward (line 34) | def reward(
    method forward (line 71) | def forward(

FILE: src/models/rlhf_engine.py
  function log_init (line 37) | def log_init(model_name, rank, stime=None):
  function create_hf_model (line 49) | def create_hf_model(model_class,
  function create_critic_model (line 93) | def create_critic_model(model_name_or_path,
  class DeepSpeedRLHFEngine (line 130) | class DeepSpeedRLHFEngine:
    method __init__ (line 132) | def __init__(self, actor_model_name_or_path, critic_model_name_or_path,
    method _init_actor (line 164) | def _init_actor(self, actor_model_name_or_path):
    method _init_ref (line 232) | def _init_ref(self, actor_model_name_or_path):
    method _init_ema (line 255) | def _init_ema(self, actor_model_name_or_path):
    method _init_critic (line 291) | def _init_critic(self, critic_model_name_or_path):
    method _init_reward (line 362) | def _init_reward(self, critic_model_name_or_path):

FILE: src/models/trainer.py
  function register_trainer (line 75) | def register_trainer(name):
  class BaseRLTrainer (line 98) | class BaseRLTrainer:
    method __init__ (line 99) | def __init__(
    method push_to_store (line 116) | def push_to_store(self, data):
    method add_eval_pipeline (line 119) | def add_eval_pipeline(self, eval_pipeline):
    method sample (line 124) | def sample(self, prompts: Iterable[str], length: int, n_samples: int) ...
    method learn (line 138) | def learn(
    method save (line 159) | def save(self, directory: Optional[str] = None):
    method load (line 164) | def load(self, directory=None):
  class AccelerateRLTrainer (line 170) | class AccelerateRLTrainer(BaseRLTrainer):
    method __init__ (line 175) | def __init__(self, config, **kwargs):  # noqa: C901
    method setup_model (line 252) | def setup_model(self):
    method setup_optimizer (line 278) | def setup_optimizer(self):
    method setup_scheduler (line 300) | def setup_scheduler(self):
    method decode (line 308) | def decode(
    method generate (line 374) | def generate(self, input_ids, attention_mask=None, **kwargs):
    method generate_eval (line 389) | def generate_eval(self, input_ids, attention_mask=None, **kwargs):
    method save_pretrained (line 402) | def save_pretrained(self, directory: Optional[str] = None, **kwargs):
    method save (line 420) | def save(self, directory: Optional[str] = None, **kwargs):
    method load (line 424) | def load(self, directory: Optional[str] = None, **kwargs):
    method add_eval_pipeline (line 428) | def add_eval_pipeline(self, eval_pipeline):
    method evaluate (line 432) | def evaluate(self):  # noqa: C901
    method learn (line 579) | def learn(self):  # noqa: C901
    method get_arch (line 703) | def get_arch(self, config: TRLConfig):
    method loss (line 708) | def loss(self, batch) -> Tuple[float, Dict]:
    method post_backward_callback (line 713) | def post_backward_callback(self):
    method post_epoch_callback (line 718) | def post_epoch_callback(self):
  class AcceleratePPOTrainer (line 724) | class AcceleratePPOTrainer(AccelerateRLTrainer):
    method __init__ (line 730) | def __init__(self, config: TRLConfig, **kwargs):
    method get_arch (line 810) | def get_arch(self, config: TRLConfig):
    method loss (line 830) | def loss(self, batch: PPORLBatch):
    method setup_rollout_logging (line 913) | def setup_rollout_logging(self, config):
    method post_epoch_callback (line 926) | def post_epoch_callback(self):
    method post_backward_callback (line 937) | def post_backward_callback(self):
    method prepare_learning (line 940) | def prepare_learning(self):
    method add_prompt_pipeline (line 949) | def add_prompt_pipeline(self, pipeline: BasePipeline):
    method make_experience (line 955) | def make_experience(self, num_rollouts: int = 1024, iter_count: int = ...
  function get_model_norm (line 1259) | def get_model_norm(model):
  function gather_log_probs (line 1273) | def gather_log_probs(logits, labels):
  class DeepSpeedPPOTrainer (line 1279) | class DeepSpeedPPOTrainer():
    method __init__ (line 1281) | def __init__(self, rlhf_engine, args):
    method generate_sequence (line 1302) | def generate_sequence(self, inputs):
    method generate_experience (line 1416) | def generate_experience(self, output_sequences, answer_start_indices):
    method compute_rewards (line 1455) | def compute_rewards(self, starts, log_probs, ref_log_probs, reward_sco...
    method train_rlhf (line 1482) | def train_rlhf(self, inputs):
    method actor_loss_fn (line 1563) | def actor_loss_fn(self, logprobs, old_logprobs, advantages, mask):
    method critic_loss_fn (line 1573) | def critic_loss_fn(self, values, old_values, returns, mask):
    method get_advantages_and_returns (line 1588) | def get_advantages_and_returns(self, values, rewards, starts):
    method _validate_training_mode (line 1632) | def _validate_training_mode(self):
    method _validate_evaluation_mode (line 1637) | def _validate_evaluation_mode(self):
    method train (line 1644) | def train(self):
    method eval (line 1649) | def eval(self):
    method dump_model_norms (line 1656) | def dump_model_norms(self, tag):
  class DeepSpeedPPOPTXTrainer (line 1674) | class DeepSpeedPPOPTXTrainer(DeepSpeedPPOTrainer):
    method __init__ (line 1676) | def __init__(self, *args, **kwargs):
    method train_unsupervised (line 1679) | def train_unsupervised(self, inputs, unsup_coef):
  class DPOTrainer (line 1691) | class DPOTrainer(Trainer):
    method __init__ (line 1738) | def __init__(
    method concatenated_inputs (line 1863) | def concatenated_inputs(self, batch: Dict[str, Union[List, torch.LongT...
    method dpo_loss (line 1892) | def dpo_loss(
    method _get_batch_logps (line 1929) | def _get_batch_logps(
    method concatenated_forward (line 1962) | def concatenated_forward(
    method separate_forward (line 1986) | def separate_forward(
    method get_batch_metrics (line 2012) | def get_batch_metrics(
    method compute_loss (line 2077) | def compute_loss(
    method get_batch_samples (line 2093) | def get_batch_samples(self, model, batch: Dict[str, torch.LongTensor])...
    method prediction_step (line 2130) | def prediction_step(
    method store_metrics (line 2164) | def store_metrics(self, metrics: Dict[str, float], train_eval: Literal...
    method log (line 2168) | def log(self, logs: Dict[str, float]) -> None:

FILE: src/pretrain.py
  function preprocess_logits_for_metrics (line 28) | def preprocess_logits_for_metrics(logits, labels):
  function get_parser (line 35) | def get_parser():
  function main (line 109) | def main():

FILE: src/pretrain_wo_trainer.py
  function preprocess_logits_for_metrics (line 30) | def preprocess_logits_for_metrics(logits, labels):
  function get_parser (line 37) | def get_parser():
  function pred_single_sample (line 110) | def pred_single_sample(prompt, prefix, model, tokenizer, args, device, e...
  function pred (line 180) | def pred(args, model, tokenizer, device, eos_token_id, step=-1):
  function main (line 196) | def main():

FILE: src/resources/models/baichuan-13B-base/configuration_baichuan.py
  class BaichuanConfig (line 5) | class BaichuanConfig(PretrainedConfig):
    method __init__ (line 9) | def __init__(

FILE: src/resources/models/baichuan-13B-base/modeling_baichuan.py
  function _get_interleave (line 20) | def _get_interleave(n):
  function _fill_with_neg_inf (line 34) | def _fill_with_neg_inf(t):
  function _gen_alibi_mask (line 39) | def _gen_alibi_mask(n_head, max_pos, alibi_mask=None):
  class RMSNorm (line 52) | class RMSNorm(torch.nn.Module):
    method __init__ (line 53) | def __init__(self, hidden_size, epsilon=1e-6):
    method forward (line 58) | def forward(self, hidden_states):
  class MLP (line 69) | class MLP(torch.nn.Module):
    method __init__ (line 70) | def __init__(
    method forward (line 82) | def forward(self, x):
  class BaichuanAttention (line 86) | class BaichuanAttention(torch.nn.Module):
    method __init__ (line 88) | def __init__(self, config: BaichuanConfig):
    method _shape (line 103) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
    method forward (line 106) | def forward(
  class BaichuanLayer (line 155) | class BaichuanLayer(torch.nn.Module):
    method __init__ (line 156) | def __init__(self, config: BaichuanConfig):
    method forward (line 168) | def forward(
  class BaichuanPreTrainedModel (line 205) | class BaichuanPreTrainedModel(PreTrainedModel):
    method _init_weights (line 212) | def _init_weights(self, module):
    method _set_gradient_checkpointing (line 223) | def _set_gradient_checkpointing(self, module, value=False):
  class BaichuanModel (line 228) | class BaichuanModel(BaichuanPreTrainedModel):
    method __init__ (line 229) | def __init__(self, config: BaichuanConfig):
    method get_input_embeddings (line 243) | def get_input_embeddings(self):
    method set_input_embeddings (line 246) | def set_input_embeddings(self, value):
    method get_alibi_mask (line 249) | def get_alibi_mask(self, tensor, seq_length_with_past, attention_mask):
    method forward (line 267) | def forward(
  class BaichuanForCausalLM (line 368) | class BaichuanForCausalLM(BaichuanPreTrainedModel):
    method __init__ (line 369) | def __init__(self, config):
    method forward (line 377) | def forward(
    method prepare_inputs_for_generation (line 430) | def prepare_inputs_for_generation(
    method _reorder_cache (line 451) | def _reorder_cache(past_key_values, beam_idx):
    method quantize (line 457) | def quantize(self, bits: int):
    method _build_chat_input (line 493) | def _build_chat_input(self, tokenizer, messages: List[dict], max_new_t...
    method chat (line 524) | def chat(self, tokenizer, messages: List[dict], stream=False,

FILE: src/resources/models/baichuan-13B-base/tokenization_baichuan.py
  class BaichuanTokenizer (line 23) | class BaichuanTokenizer(PreTrainedTokenizer):
    method __init__ (line 37) | def __init__(
    method __getstate__ (line 72) | def __getstate__(self):
    method __setstate__ (line 77) | def __setstate__(self, d):
    method vocab_size (line 83) | def vocab_size(self):
    method get_vocab (line 87) | def get_vocab(self):
    method _tokenize (line 93) | def _tokenize(self, text):
    method _convert_token_to_id (line 97) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 101) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 106) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 125) | def save_vocabulary(self, save_directory, filename_prefix: Optional[st...
    method build_inputs_with_special_tokens (line 152) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No...
    method get_special_tokens_mask (line 163) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 200) | def create_token_type_ids_from_sequences(

FILE: src/resources/models/baichuan-13B-chat/configuration_baichuan.py
  class BaichuanConfig (line 5) | class BaichuanConfig(PretrainedConfig):
    method __init__ (line 9) | def __init__(

FILE: src/resources/models/baichuan-13B-chat/modeling_baichuan.py
  function _get_interleave (line 20) | def _get_interleave(n):
  function _fill_with_neg_inf (line 34) | def _fill_with_neg_inf(t):
  function _gen_alibi_mask (line 39) | def _gen_alibi_mask(n_head, max_pos, alibi_mask=None):
  class RMSNorm (line 52) | class RMSNorm(torch.nn.Module):
    method __init__ (line 53) | def __init__(self, hidden_size, epsilon=1e-6):
    method forward (line 58) | def forward(self, hidden_states):
  class MLP (line 69) | class MLP(torch.nn.Module):
    method __init__ (line 70) | def __init__(
    method forward (line 82) | def forward(self, x):
  class BaichuanAttention (line 86) | class BaichuanAttention(torch.nn.Module):
    method __init__ (line 88) | def __init__(self, config: BaichuanConfig):
    method _shape (line 103) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
    method forward (line 106) | def forward(
  class BaichuanLayer (line 155) | class BaichuanLayer(torch.nn.Module):
    method __init__ (line 156) | def __init__(self, config: BaichuanConfig):
    method forward (line 168) | def forward(
  class BaichuanPreTrainedModel (line 205) | class BaichuanPreTrainedModel(PreTrainedModel):
    method _init_weights (line 212) | def _init_weights(self, module):
    method _set_gradient_checkpointing (line 223) | def _set_gradient_checkpointing(self, module, value=False):
  class BaichuanModel (line 228) | class BaichuanModel(BaichuanPreTrainedModel):
    method __init__ (line 229) | def __init__(self, config: BaichuanConfig):
    method get_input_embeddings (line 243) | def get_input_embeddings(self):
    method set_input_embeddings (line 246) | def set_input_embeddings(self, value):
    method get_alibi_mask (line 249) | def get_alibi_mask(self, tensor, seq_length_with_past, attention_mask):
    method forward (line 267) | def forward(
  class BaichuanForCausalLM (line 368) | class BaichuanForCausalLM(BaichuanPreTrainedModel):
    method __init__ (line 369) | def __init__(self, config):
    method forward (line 377) | def forward(
    method prepare_inputs_for_generation (line 430) | def prepare_inputs_for_generation(
    method _reorder_cache (line 451) | def _reorder_cache(past_key_values, beam_idx):
    method quantize (line 458) | def quantize(self, bits: int):
    method _build_chat_input (line 494) | def _build_chat_input(self, tokenizer, messages: List[dict], max_new_t...
    method chat (line 525) | def chat(self, tokenizer, messages: List[dict], stream=False,

FILE: src/resources/models/baichuan-13B-chat/tokenization_baichuan.py
  class BaichuanTokenizer (line 23) | class BaichuanTokenizer(PreTrainedTokenizer):
    method __init__ (line 37) | def __init__(
    method __getstate__ (line 72) | def __getstate__(self):
    method __setstate__ (line 77) | def __setstate__(self, d):
    method vocab_size (line 83) | def vocab_size(self):
    method get_vocab (line 87) | def get_vocab(self):
    method _tokenize (line 93) | def _tokenize(self, text):
    method _convert_token_to_id (line 97) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 101) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 106) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 125) | def save_vocabulary(self, save_directory, filename_prefix: Optional[st...
    method build_inputs_with_special_tokens (line 152) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No...
    method get_special_tokens_mask (line 163) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 200) | def create_token_type_ids_from_sequences(

FILE: src/resources/models/baichuan-7B/configuration_baichuan.py
  class BaiChuanConfig (line 28) | class BaiChuanConfig(PretrainedConfig):
    method __init__ (line 32) | def __init__(

FILE: src/resources/models/baichuan-7B/modeling_baichuan.py
  function _make_causal_mask (line 39) | def _make_causal_mask(
  function _expand_mask (line 57) | def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Option...
  class RMSNorm (line 71) | class RMSNorm(nn.Module):
    method __init__ (line 72) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 80) | def forward(self, hidden_states):
  class RotaryEmbedding (line 91) | class RotaryEmbedding(torch.nn.Module):
    method __init__ (line 92) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method forward (line 106) | def forward(self, x, seq_len=None):
  function rotate_half (line 123) | def rotate_half(x):
  function apply_rotary_pos_emb (line 130) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids):
  class MLP (line 141) | class MLP(nn.Module):
    method __init__ (line 142) | def __init__(
    method forward (line 154) | def forward(self, x):
  class Attention (line 158) | class Attention(nn.Module):
    method __init__ (line 161) | def __init__(self, config: BaiChuanConfig):
    method _shape (line 181) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
    method forward (line 184) | def forward(
  class DecoderLayer (line 259) | class DecoderLayer(nn.Module):
    method __init__ (line 260) | def __init__(self, config: BaiChuanConfig):
    method forward (line 272) | def forward(
  class PreTrainedModel (line 327) | class PreTrainedModel(PreTrainedModel):
    method _init_weights (line 334) | def _init_weights(self, module):
    method _set_gradient_checkpointing (line 345) | def _set_gradient_checkpointing(self, module, value=False):
  class Model (line 350) | class Model(PreTrainedModel):
    method __init__ (line 358) | def __init__(self, config: BaiChuanConfig):
    method get_input_embeddings (line 371) | def get_input_embeddings(self):
    method set_input_embeddings (line 374) | def set_input_embeddings(self, value):
    method _prepare_decoder_attention_mask (line 378) | def _prepare_decoder_attention_mask(self, attention_mask, input_shape,...
    method forward (line 401) | def forward(
  class BaiChuanForCausalLM (line 529) | class BaiChuanForCausalLM(PreTrainedModel):
    method __init__ (line 530) | def __init__(self, config):
    method get_input_embeddings (line 539) | def get_input_embeddings(self):
    method set_input_embeddings (line 542) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 545) | def get_output_embeddings(self):
    method set_output_embeddings (line 548) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 551) | def set_decoder(self, decoder):
    method get_decoder (line 554) | def get_decoder(self):
    method forward (line 557) | def forward(
    method prepare_inputs_for_generation (line 643) | def prepare_inputs_for_generation(
    method _reorder_cache (line 674) | def _reorder_cache(past_key_values, beam_idx):

FILE: src/resources/models/baichuan-7B/tokenization_baichuan.py
  class BaiChuanTokenizer (line 42) | class BaiChuanTokenizer(PreTrainedTokenizer):
    method __init__ (line 56) | def __init__(
    method __getstate__ (line 91) | def __getstate__(self):
    method __setstate__ (line 96) | def __setstate__(self, d):
    method vocab_size (line 102) | def vocab_size(self):
    method get_vocab (line 106) | def get_vocab(self):
    method _tokenize (line 112) | def _tokenize(self, text):
    method _convert_token_to_id (line 116) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 120) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 125) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 144) | def save_vocabulary(self, save_directory, filename_prefix: Optional[st...
    method build_inputs_with_special_tokens (line 171) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No...
    method get_special_tokens_mask (line 182) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 219) | def create_token_type_ids_from_sequences(

FILE: src/resources/models/chatglm-6B/configuration_chatglm.py
  class ChatGLMConfig (line 9) | class ChatGLMConfig(PretrainedConfig):
    method __init__ (line 59) | def __init__(

FILE: src/resources/models/chatglm-6B/modeling_chatglm.py
  class InvalidScoreLogitsProcessor (line 54) | class InvalidScoreLogitsProcessor(LogitsProcessor):
    method __call__ (line 55) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
  function load_tf_weights_in_chatglm_6b (line 62) | def load_tf_weights_in_chatglm_6b(model, config, tf_checkpoint_path):
  class PrefixEncoder (line 136) | class PrefixEncoder(torch.nn.Module):
    method __init__ (line 143) | def __init__(self, config):
    method forward (line 157) | def forward(self, prefix: torch.Tensor):
  function gelu_impl (line 167) | def gelu_impl(x):
  function gelu (line 173) | def gelu(x):
  class RotaryEmbedding (line 177) | class RotaryEmbedding(torch.nn.Module):
    method __init__ (line 178) | def __init__(self, dim, base=10000, precision=torch.half, learnable=Fa...
    method _load_from_state_dict (line 193) | def _load_from_state_dict(self, state_dict, prefix, local_metadata, st...
    method forward (line 197) | def forward(self, x, seq_dim=1, seq_len=None):
    method _apply (line 220) | def _apply(self, fn):
  function rotate_half (line 228) | def rotate_half(x):
  function apply_rotary_pos_emb_index (line 234) | def apply_rotary_pos_emb_index(q, k, cos, sin, position_id):
  function attention_fn (line 242) | def attention_fn(
  class SelfAttention (line 351) | class SelfAttention(torch.nn.Module):
    method __init__ (line 352) | def __init__(self, hidden_size, num_attention_heads,
    method attention_mask_func (line 399) | def attention_mask_func(attention_scores, attention_mask):
    method split_tensor_along_last_dim (line 403) | def split_tensor_along_last_dim(self, tensor, num_partitions,
    method forward (line 423) | def forward(
  class GEGLU (line 490) | class GEGLU(torch.nn.Module):
    method __init__ (line 491) | def __init__(self):
    method forward (line 495) | def forward(self, x):
  class GLU (line 501) | class GLU(torch.nn.Module):
    method __init__ (line 502) | def __init__(self, hidden_size, inner_hidden_size=None,
    method forward (line 529) | def forward(self, hidden_states):
  class GLMBlock (line 544) | class GLMBlock(torch.nn.Module):
    method __init__ (line 545) | def __init__(
    method forward (line 594) | def forward(
  class ChatGLMPreTrainedModel (line 648) | class ChatGLMPreTrainedModel(PreTrainedModel):
    method __init__ (line 660) | def __init__(self, *inputs, **kwargs):
    method _init_weights (line 663) | def _init_weights(self, module: nn.Module):
    method _set_gradient_checkpointing (line 667) | def _set_gradient_checkpointing(self, module, value=False):
  class ChatGLMModel (line 737) | class ChatGLMModel(ChatGLMPreTrainedModel):
    method __init__ (line 753) | def __init__(self, config: ChatGLMConfig):
    method get_input_embeddings (line 809) | def get_input_embeddings(self):
    method set_input_embeddings (line 812) | def set_input_embeddings(self, new_embeddings: torch.Tensor):
    method get_prompt (line 815) | def get_prompt(self, batch_size, device, dtype=torch.half):
    method get_masks (line 831) | def get_masks(self, input_ids, device):
    method get_position_ids (line 843) | def get_position_ids(self, input_ids, mask_positions, device, gmask=Fa...
    method forward (line 871) | def forward(
  class ChatGLMForConditionalGeneration (line 1008) | class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
    method __init__ (line 1009) | def __init__(self, config: ChatGLMConfig):
    method get_output_embeddings (line 1036) | def get_output_embeddings(self):
    method set_output_embeddings (line 1039) | def set_output_embeddings(self, new_embeddings):
    method get_masks_and_position_ids (line 1042) | def get_masks_and_position_ids(self, input_ids, mask_positions, device...
    method prepare_inputs_for_generation (line 1073) | def prepare_inputs_for_generation(
    method forward (line 1122) | def forward(
    method _reorder_cache (line 1181) | def _reorder_cache(
    method process_response (line 1199) | def process_response(self, response):
    method chat (line 1215) | def chat(self, tokenizer, query: str, history: List[Tuple[str, str]] =...
    method stream_chat (line 1241) | def stream_chat(self, tokenizer, query: str, history: List[Tuple[str, ...
    method stream_generate (line 1267) | def stream_generate(
    method quantize (line 1368) | def quantize(self, bits: int, empty_init=False, **kwargs):

FILE: src/resources/models/chatglm-6B/quantization.py
  class Kernel (line 18) | class Kernel:
    method __init__ (line 19) | def __init__(self, code: bytes, function_names: List[str]):
  class W8A16Linear (line 44) | class W8A16Linear(torch.autograd.Function):
    method forward (line 46) | def forward(ctx, inp: torch.Tensor, quant_w: torch.Tensor, scale_w: to...
    method backward (line 58) | def backward(ctx, grad_output: torch.Tensor):
  function compress_int4_weight (line 67) | def compress_int4_weight(weight: torch.Tensor):  # (n, m)
  function extract_weight_to_half (line 88) | def extract_weight_to_half(weight: torch.Tensor, scale_list: torch.Tenso...
  class QuantizedLinear (line 120) | class QuantizedLinear(Linear):
    method __init__ (line 121) | def __init__(self, weight_bit_width: int, weight_tensor=None, bias_ten...
    method forward (line 146) | def forward(self, input):
  function quantize (line 153) | def quantize(model, weight_bit_width, empty_init=False, **kwargs):

FILE: src/resources/models/chatglm-6B/tokenization_chatglm.py
  class SPTokenizer (line 23) | class SPTokenizer:
    method __init__ (line 24) | def __init__(
    method _configure_tokenizer (line 39) | def _configure_tokenizer(
    method _build_text_tokenizer (line 66) | def _build_text_tokenizer(self, encode_special_tokens=False):
    method _get_text_tokenizer (line 73) | def _get_text_tokenizer(self, encode_special_tokens=False):
    method get_blank_token (line 80) | def get_blank_token(length: int):
    method get_tab_token (line 85) | def get_tab_token():
    method num_image_tokens (line 89) | def num_image_tokens(self):
    method num_text_tokens (line 93) | def num_text_tokens(self):
    method num_tokens (line 97) | def num_tokens(self):
    method _encode_whitespaces (line 101) | def _encode_whitespaces(text: str, max_len: int = 80):
    method _preprocess (line 107) | def _preprocess(self, text: str, linebreak=True, whitespaces=True):
    method encode (line 114) | def encode(
    method decode (line 131) | def decode(self, text_ids: List[int], special_tokens=False) -> str:
    method tokenize (line 145) | def tokenize(
    method __getitem__ (line 161) | def __getitem__(self, x: Union[int, str]):
  class ChatGLMTokenizer (line 176) | class ChatGLMTokenizer(PreTrainedTokenizer):
    method __init__ (line 189) | def __init__(
    method eop_token_id (line 224) | def eop_token_id(self) -> Optional[int]:
    method gmask_token_id (line 234) | def gmask_token_id(self) -> Optional[int]:
    method vocab_size (line 244) | def vocab_size(self):
    method get_vocab (line 248) | def get_vocab(self):
    method preprocess_text (line 254) | def preprocess_text(self, inputs):
    method _tokenize (line 265) | def _tokenize(self, text, **kwargs):
    method decode (line 273) | def decode(
    method _convert_token_to_id (line 293) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 297) | def _convert_id_to_token(self, index):
    method save_vocabulary (line 301) | def save_vocabulary(self, save_directory, filename_prefix=None):
    method build_inputs_with_special_tokens (line 353) | def build_inputs_with_special_tokens(

FILE: src/resources/models/chatglm2-6B/configuration_chatglm.py
  class ChatGLMConfig (line 4) | class ChatGLMConfig(PretrainedConfig):
    method __init__ (line 5) | def __init__(

FILE: src/resources/models/chatglm2-6B/modeling_chatglm.py
  function default_init (line 47) | def default_init(cls, *args, **kwargs):
  class InvalidScoreLogitsProcessor (line 51) | class InvalidScoreLogitsProcessor(LogitsProcessor):
    method __call__ (line 52) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
  function split_tensor_along_last_dim (line 59) | def split_tensor_along_last_dim(
  class RotaryEmbedding (line 87) | class RotaryEmbedding(nn.Module):
    method __init__ (line 88) | def __init__(self, dim, original_impl=False, device=None, dtype=None):
    method forward_impl (line 95) | def forward_impl(
    method forward (line 120) | def forward(self, max_seq_len, offset=0):
  function apply_rotary_pos_emb (line 127) | def apply_rotary_pos_emb(x: torch.Tensor, rope_cache: torch.Tensor) -> t...
  class RMSNorm (line 147) | class RMSNorm(torch.nn.Module):
    method __init__ (line 148) | def __init__(self, normalized_shape, eps=1e-5, device=None, dtype=None...
    method forward (line 153) | def forward(self, hidden_states: torch.Tensor):
  class CoreAttention (line 161) | class CoreAttention(torch.nn.Module):
    method __init__ (line 162) | def __init__(self, config: ChatGLMConfig, layer_number):
    method forward (line 187) | def forward(self, query_layer, key_layer, value_layer, attention_mask):
  class SelfAttention (line 282) | class SelfAttention(torch.nn.Module):
    method __init__ (line 289) | def __init__(self, config: ChatGLMConfig, layer_number, device=None):
    method _allocate_memory (line 318) | def _allocate_memory(self, inference_max_sequence_len, batch_size, dev...
    method forward (line 332) | def forward(
  function _config_to_kwargs (line 421) | def _config_to_kwargs(args):
  class MLP (line 428) | class MLP(torch.nn.Module):
    method __init__ (line 436) | def __init__(self, config: ChatGLMConfig, device=None):
    method forward (line 465) | def forward(self, hidden_states):
  class GLMBlock (line 474) | class GLMBlock(torch.nn.Module):
    method __init__ (line 481) | def __init__(self, config: ChatGLMConfig, layer_number, device=None):
    method forward (line 505) | def forward(
  class GLMTransformer (line 548) | class GLMTransformer(torch.nn.Module):
    method __init__ (line 551) | def __init__(self, config: ChatGLMConfig, device=None):
    method _get_layer (line 574) | def _get_layer(self, layer_number):
    method forward (line 577) | def forward(
  class ChatGLMPreTrainedModel (line 624) | class ChatGLMPreTrainedModel(PreTrainedModel):
    method _init_weights (line 636) | def _init_weights(self, module: nn.Module):
    method get_masks (line 640) | def get_masks(self, input_ids, past_key_values, padding_mask=None):
    method get_position_ids (line 658) | def get_position_ids(self, input_ids, device):
    method _set_gradient_checkpointing (line 663) | def _set_gradient_checkpointing(self, module, value=False):
  class Embedding (line 671) | class Embedding(torch.nn.Module):
    method __init__ (line 674) | def __init__(self, config: ChatGLMConfig, device=None):
    method forward (line 687) | def forward(self, input_ids):
  class ChatGLMModel (line 699) | class ChatGLMModel(ChatGLMPreTrainedModel):
    method __init__ (line 700) | def __init__(self, config: ChatGLMConfig, device=None, empty_init=True):
    method get_input_embeddings (line 724) | def get_input_embeddings(self):
    method forward (line 727) | def forward(
    method quantize (line 785) | def quantize(self, weight_bit_width: int):
  class ChatGLMForConditionalGeneration (line 791) | class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
    method __init__ (line 792) | def __init__(self, config: ChatGLMConfig, empty_init=True, device=None):
    method _update_model_kwargs_for_generation (line 803) | def _update_model_kwargs_for_generation(
    method forward (line 834) | def forward(
    method prepare_inputs_for_generation (line 896) | def prepare_inputs_for_generation(
    method _reorder_cache (line 920) | def _reorder_cache(
    method process_response (line 938) | def process_response(self, response):
    method build_inputs (line 943) | def build_inputs(self, tokenizer, query: str, history: List[Tuple[str,...
    method build_stream_inputs (line 952) | def build_stream_inputs(self, tokenizer, query: str, history: List[Tup...
    method chat (line 966) | def chat(self, tokenizer, query: str, history: List[Tuple[str, str]] =...
    method stream_chat (line 984) | def stream_chat(self, tokenizer, query: str, history: List[Tuple[str, ...
    method stream_generate (line 1018) | def stream_generate(
    method quantize (line 1122) | def quantize(self, bits: int, empty_init=False, device=None, **kwargs):

FILE: src/resources/models/chatglm2-6B/quantization.py
  class Kernel (line 18) | class Kernel:
    method __init__ (line 19) | def __init__(self, code: bytes, function_names: List[str]):
  class W8A16Linear (line 44) | class W8A16Linear(torch.autograd.Function):
    method forward (line 46) | def forward(ctx, inp: torch.Tensor, quant_w: torch.Tensor, scale_w: to...
    method backward (line 58) | def backward(ctx, grad_output: torch.Tensor):
  function compress_int4_weight (line 67) | def compress_int4_weight(weight: torch.Tensor):  # (n, m)
  function extract_weight_to_half (line 88) | def extract_weight_to_half(weight: torch.Tensor, scale_list: torch.Tenso...
  class QuantizedLinear (line 124) | class QuantizedLinear(torch.nn.Module):
    method __init__ (line 125) | def __init__(self, weight_bit_width: int, weight, bias=None, device="c...
    method forward (line 145) | def forward(self, input):
  function quantize (line 152) | def quantize(model, weight_bit_width, empty_init=False, device=None):

FILE: src/resources/models/chatglm2-6B/tokenization_chatglm.py
  class SPTokenizer (line 10) | class SPTokenizer:
    method __init__ (line 11) | def __init__(self, model_path: str):
    method tokenize (line 31) | def tokenize(self, s: str):
    method encode (line 34) | def encode(self, s: str, bos: bool = False, eos: bool = False) -> List...
    method decode (line 43) | def decode(self, t: List[int]) -> str:
    method decode_tokens (line 46) | def decode_tokens(self, tokens: List[str]) -> str:
    method convert_token_to_id (line 50) | def convert_token_to_id(self, token):
    method convert_id_to_token (line 56) | def convert_id_to_token(self, index):
  class ChatGLMTokenizer (line 63) | class ChatGLMTokenizer(PreTrainedTokenizer):
    method __init__ (line 68) | def __init__(self, vocab_file, padding_side="right", **kwargs):
    method get_command (line 80) | def get_command(self, token):
    method pad_token (line 87) | def pad_token(self) -> str:
    method pad_token_id (line 91) | def pad_token_id(self):
    method vocab_size (line 95) | def vocab_size(self):
    method get_vocab (line 98) | def get_vocab(self):
    method _tokenize (line 104) | def _tokenize(self, text, **kwargs):
    method _convert_token_to_id (line 107) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 111) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 115) | def convert_tokens_to_string(self, tokens: List[str]) -> str:
    method save_vocabulary (line 133) | def save_vocabulary(self, save_directory, filename_prefix=None):
    method get_prefix_tokens (line 161) | def get_prefix_tokens(self):
    method build_inputs_with_special_tokens (line 165) | def build_inputs_with_special_tokens(
    method _pad (line 191) | def _pad(

FILE: src/resources/models/chatglm3-6B/configuration_chatglm.py
  class ChatGLMConfig (line 4) | class ChatGLMConfig(PretrainedConfig):
    method __init__ (line 6) | def __init__(

FILE: src/resources/models/chatglm3-6B/modeling_chatglm.py
  function default_init (line 49) | def default_init(cls, *args, **kwargs):
  class InvalidScoreLogitsProcessor (line 53) | class InvalidScoreLogitsProcessor(LogitsProcessor):
    method __call__ (line 54) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
  class PrefixEncoder (line 61) | class PrefixEncoder(torch.nn.Module):
    method __init__ (line 68) | def __init__(self, config: ChatGLMConfig):
    method forward (line 84) | def forward(self, prefix: torch.Tensor):
  function split_tensor_along_last_dim (line 93) | def split_tensor_along_last_dim(
  class RotaryEmbedding (line 121) | class RotaryEmbedding(nn.Module):
    method __init__ (line 122) | def __init__(self, dim, original_impl=False, device=None, dtype=None):
    method forward_impl (line 129) | def forward_impl(
    method forward (line 154) | def forward(self, max_seq_len, offset=0):
  function apply_rotary_pos_emb (line 161) | def apply_rotary_pos_emb(x: torch.Tensor, rope_cache: torch.Tensor) -> t...
  class RMSNorm (line 181) | class RMSNorm(torch.nn.Module):
    method __init__ (line 182) | def __init__(self, normalized_shape, eps=1e-5, device=None, dtype=None...
    method forward (line 187) | def forward(self, hidden_states: torch.Tensor):
  class CoreAttention (line 195) | class CoreAttention(torch.nn.Module):
    method __init__ (line 196) | def __init__(self, config: ChatGLMConfig, layer_number):
    method forward (line 221) | def forward(self, query_layer, key_layer, value_layer, attention_mask):
  class SelfAttention (line 313) | class SelfAttention(torch.nn.Module):
    method __init__ (line 320) | def __init__(self, config: ChatGLMConfig, layer_number, device=None):
    method _allocate_memory (line 349) | def _allocate_memory(self, inference_max_sequence_len, batch_size, dev...
    method forward (line 363) | def forward(
  function _config_to_kwargs (line 452) | def _config_to_kwargs(args):
  class MLP (line 459) | class MLP(torch.nn.Module):
    method __init__ (line 467) | def __init__(self, config: ChatGLMConfig, device=None):
    method forward (line 496) | def forward(self, hidden_states):
  class GLMBlock (line 505) | class GLMBlock(torch.nn.Module):
    method __init__ (line 512) | def __init__(self, config: ChatGLMConfig, layer_number, device=None):
    method forward (line 536) | def forward(
  class GLMTransformer (line 579) | class GLMTransformer(torch.nn.Module):
    method __init__ (line 582) | def __init__(self, config: ChatGLMConfig, device=None):
    method _get_layer (line 605) | def _get_layer(self, layer_number):
    method forward (line 608) | def forward(
  class ChatGLMPreTrainedModel (line 661) | class ChatGLMPreTrainedModel(PreTrainedModel):
    method _init_weights (line 673) | def _init_weights(self, module: nn.Module):
    method get_masks (line 677) | def get_masks(self, input_ids, past_key_values, padding_mask=None):
    method get_position_ids (line 695) | def get_position_ids(self, input_ids, device):
    method _set_gradient_checkpointing (line 700) | def _set_gradient_checkpointing(self, module, value=False):
  class Embedding (line 705) | class Embedding(torch.nn.Module):
    method __init__ (line 708) | def __init__(self, config: ChatGLMConfig, device=None):
    method forward (line 721) | def forward(self, input_ids):
  class ChatGLMModel (line 733) | class ChatGLMModel(ChatGLMPreTrainedModel):
    method __init__ (line 734) | def __init__(self, config: ChatGLMConfig, device=None, empty_init=True):
    method get_input_embeddings (line 768) | def get_input_embeddings(self):
    method get_prompt (line 771) | def get_prompt(self, batch_size, device, dtype=torch.half):
    method forward (line 786) | def forward(
    method quantize (line 845) | def quantize(self, weight_bit_width: int):
  class ChatGLMForConditionalGeneration (line 851) | class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
    method __init__ (line 852) | def __init__(self, config: ChatGLMConfig, empty_init=True, device=None):
    method _update_model_kwargs_for_generation (line 863) | def _update_model_kwargs_for_generation(
    method prepare_inputs_for_generation (line 894) | def prepare_inputs_for_generation(
    method forward (line 920) | def forward(
    method _reorder_cache (line 981) | def _reorder_cache(
    method process_response (line 999) | def process_response(self, output, history):
    method chat (line 1021) | def chat(self, tokenizer, query: str, history: List[Tuple[str, str]] =...
    method stream_chat (line 1043) | def stream_chat(self, tokenizer, query: str, history: List[Tuple[str, ...
    method stream_generate (line 1084) | def stream_generate(
    method quantize (line 1191) | def quantize(self, bits: int, empty_init=False, device=None, **kwargs):
  class ChatGLMForSequenceClassification (line 1210) | class ChatGLMForSequenceClassification(ChatGLMPreTrainedModel):
    method __init__ (line 1211) | def __init__(self, config: ChatGLMConfig, empty_init=True, device=None):
    method forward (line 1227) | def forward(

FILE: src/resources/models/chatglm3-6B/quantization.py
  class Kernel (line 18) | class Kernel:
    method __init__ (line 19) | def __init__(self, code: bytes, function_names: List[str]):
  class W8A16Linear (line 44) | class W8A16Linear(torch.autograd.Function):
    method forward (line 46) | def forward(ctx, inp: torch.Tensor, quant_w: torch.Tensor, scale_w: to...
    method backward (line 58) | def backward(ctx, grad_output: torch.Tensor):
  function compress_int4_weight (line 67) | def compress_int4_weight(weight: torch.Tensor):  # (n, m)
  function extract_weight_to_half (line 88) | def extract_weight_to_half(weight: torch.Tensor, scale_list: torch.Tenso...
  class QuantizedLinear (line 124) | class QuantizedLinear(torch.nn.Module):
    method __init__ (line 125) | def __init__(self, weight_bit_width: int, weight, bias=None, device="c...
    method forward (line 145) | def forward(self, input):
  function quantize (line 152) | def quantize(model, weight_bit_width, empty_init=False, device=None):

FILE: src/resources/models/chatglm3-6B/tokenization_chatglm.py
  class SPTokenizer (line 11) | class SPTokenizer:
    method __init__ (line 12) | def __init__(self, model_path: str):
    method tokenize (line 34) | def tokenize(self, s: str):
    method encode (line 37) | def encode(self, s: str, bos: bool = False, eos: bool = False) -> List...
    method decode (line 46) | def decode(self, t: List[int]) -> str:
    method decode_tokens (line 60) | def decode_tokens(self, tokens: List[str]) -> str:
    method convert_token_to_id (line 64) | def convert_token_to_id(self, token):
    method convert_id_to_token (line 70) | def convert_id_to_token(self, index):
  class ChatGLMTokenizer (line 79) | class ChatGLMTokenizer(PreTrainedTokenizer):
    method __init__ (line 84) | def __init__(self, vocab_file, padding_side="left", clean_up_tokenizat...
    method get_command (line 96) | def get_command(self, token):
    method unk_token (line 103) | def unk_token(self) -> str:
    method pad_token (line 107) | def pad_token(self) -> str:
    method pad_token_id (line 111) | def pad_token_id(self):
    method eos_token (line 115) | def eos_token(self) -> str:
    method eos_token_id (line 119) | def eos_token_id(self):
    method vocab_size (line 123) | def vocab_size(self):
    method get_vocab (line 126) | def get_vocab(self):
    method _tokenize (line 132) | def _tokenize(self, text, **kwargs):
    method _convert_token_to_id (line 135) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 139) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 143) | def convert_tokens_to_string(self, tokens: List[str]) -> str:
    method save_vocabulary (line 146) | def save_vocabulary(self, save_directory, filename_prefix=None):
    method get_prefix_tokens (line 174) | def get_prefix_tokens(self):
    method build_single_message (line 178) | def build_single_message(self, role, metadata, message):
    method build_chat_input (line 185) | def build_chat_input(self, query, history=None, role="user"):
    method build_inputs_with_special_tokens (line 198) | def build_inputs_with_special_tokens(
    method _pad (line 223) | def _pad(

FILE: src/resources/models/glm-10B-chinese/configuration_glm.py
  class GLMConfig (line 28) | class GLMConfig(PretrainedConfig):
    method __init__ (line 93) | def __init__(

FILE: src/resources/models/glm-10B-chinese/modeling_glm.py
  function unscaled_init_method (line 52) | def unscaled_init_method(sigma):
  function scaled_init_method (line 61) | def scaled_init_method(mean, std, num_layers):
  function ensure_divisibility (line 71) | def ensure_divisibility(numerator, denominator):
  function divide (line 77) | def divide(numerator, denominator):
  function split_tensor_along_last_dim (line 84) | def split_tensor_along_last_dim(tensor, num_partitions,
  class MLP (line 105) | class MLP(torch.nn.Module):
    method __init__ (line 124) | def __init__(self, hidden_size, output_dropout_prob, init_method,
    method forward (line 140) | def forward(self, hidden_states):
  class VocabEmbedding (line 151) | class VocabEmbedding(torch.nn.Module):
    method __init__ (line 162) | def __init__(self, config):
    method forward (line 185) | def forward(self, input_):
  class PositionalEmbedding (line 194) | class PositionalEmbedding(torch.nn.Module):
    method __init__ (line 196) | def __init__(self, hidden_size):
    method forward (line 204) | def forward(self, pos_seq, bsz=None):
  class SelfAttention (line 214) | class SelfAttention(torch.nn.Module):
    method __init__ (line 241) | def __init__(self, hidden_size, num_attention_heads,
    method _transpose_for_scores (line 269) | def _transpose_for_scores(self, tensor):
    method forward (line 279) | def forward(self, hidden_states, ltor_mask, mem=None):
  class GLMBlock (line 346) | class GLMBlock(torch.nn.Module):
    method __init__ (line 375) | def __init__(self,
    method forward (line 413) | def forward(self, hidden_states, ltor_mask, mem=None):
  class GLMStack (line 434) | class GLMStack(torch.nn.Module):
    method __init__ (line 469) | def __init__(self,
    method forward (line 528) | def forward(self, hidden_states, position_ids, attention_mask, memory_...
    method update_mems (line 604) | def update_mems(self, hiddens, mems):
  class GLMPreTrainedModel (line 619) | class GLMPreTrainedModel(PreTrainedModel):
    method _init_weights (line 630) | def _init_weights(self, module):
    method _set_gradient_checkpointing (line 646) | def _set_gradient_checkpointing(self, module, value=False):
  class GLMModel (line 716) | class GLMModel(GLMPreTrainedModel):
    method __init__ (line 732) | def __init__(self, config):
    method forward (line 762) | def forward(
  class GLMForMultipleChoice (line 801) | class GLMForMultipleChoice(GLMPreTrainedModel):
    method __init__ (line 802) | def __init__(self, config):
    method forward (line 807) | def forward(
  class GLMForConditionalGeneration (line 843) | class GLMForConditionalGeneration(GLMPreTrainedModel):
    method __init__ (line 844) | def __init__(self, config):
    method _reorder_cache (line 849) | def _reorder_cache(self, past, beam_idx):
    method prepare_inputs_for_generation (line 861) | def prepare_inputs_for_generation(self, input_ids, past=None, position...
    method forward (line 894) | def forward(
  class GLMForSequenceClassification (line 921) | class GLMForSequenceClassification(GLMPreTrainedModel):
    method __init__ (line 922) | def __init__(self, config: GLMConfig, hidden_dropout=None, num_class=1):
    method forward (line 946) | def forward(self,

FILE: src/resources/models/glm-10B-chinese/tokenization_glm.py
  class GLMBatchEncoding (line 17) | class GLMBatchEncoding(BatchEncoding):
    method to (line 19) | def to(self, device: Union[str, "torch.device"]) -> "BatchEncoding":
  class GLMTokenizerMixin (line 38) | class GLMTokenizerMixin:
    method sop_token (line 40) | def sop_token(self) -> Optional[str]:
    method sop_token_id (line 44) | def sop_token_id(self) -> Optional[int]:
    method eop_token (line 51) | def eop_token(self) -> Optional[str]:
    method eop_token_id (line 55) | def eop_token_id(self) -> Optional[int]:
    method gmask_token_id (line 62) | def gmask_token_id(self) -> int:
    method smask_token_id (line 66) | def smask_token_id(self) -> int:
    method mask_token_ids (line 70) | def mask_token_ids(self):
    method _build_input_for_multiple_choice (line 73) | def _build_input_for_multiple_choice(self, context, choices):
    method _pad_batch (line 110) | def _pad_batch(self, tokens, position_ids, attention_mask, max_seq_len...
    method _collate (line 122) | def _collate(self, samples):
    method build_inputs_for_multiple_choice (line 146) | def build_inputs_for_multiple_choice(self, model_input: BatchEncoding,...
    method build_inputs_for_generation (line 153) | def build_inputs_for_generation(self, model_input: BatchEncoding, max_...
  class GLMRobertaTokenizer (line 209) | class GLMRobertaTokenizer(RobertaTokenizer, GLMTokenizerMixin):
    method gmask_token_id (line 214) | def gmask_token_id(self) -> int:
    method smask_token_id (line 218) | def smask_token_id(self) -> int:
    method mask_token_ids (line 222) | def mask_token_ids(self):
  class GLMChineseTokenizer (line 226) | class GLMChineseTokenizer(PreTrainedTokenizer, GLMTokenizerMixin):
    method __init__ (line 230) | def __init__(self, vocab_file, **kwargs):
    method vocab_size (line 237) | def vocab_size(self):
    method get_vocab (line 240) | def get_vocab(self):
    method _tokenize (line 245) | def _tokenize(self, text, **kwargs):
    method _convert_token_to_id (line 248) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 252) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 256) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 259) | def save_vocabulary(self, save_directory: str, filename_prefix: Option...
    method build_inputs_with_special_tokens (line 276) | def build_inputs_with_special_tokens(
  class GLMGPT2Tokenizer (line 308) | class GLMGPT2Tokenizer(GPT2Tokenizer, GLMTokenizerMixin):
    method build_inputs_with_special_tokens (line 312) | def build_inputs_with_special_tokens(
  class GLMBertTokenizer (line 334) | class GLMBertTokenizer(BertTokenizer, GLMTokenizerMixin):
    method gmask_token_id (line 339) | def gmask_token_id(self) -> int:
    method smask_token_id (line 343) | def smask_token_id(self) -> int:
    method mask_token_ids (line 347) | def mask_token_ids(self):
  class GLMTokenizer (line 351) | class GLMTokenizer:
    method from_pretrained (line 353) | def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwa...

FILE: src/resources/models/glm-350M-chinese/configuration_glm.py
  class GLMConfig (line 28) | class GLMConfig(PretrainedConfig):
    method __init__ (line 86) | def __init__(

FILE: src/resources/models/glm-350M-chinese/modeling_glm.py
  function unscaled_init_method (line 51) | def unscaled_init_method(sigma):
  function scaled_init_method (line 60) | def scaled_init_method(mean, std, num_layers):
  function ensure_divisibility (line 70) | def ensure_divisibility(numerator, denominator):
  function divide (line 76) | def divide(numerator, denominator):
  function split_tensor_along_last_dim (line 83) | def split_tensor_along_last_dim(tensor, num_partitions,
  class MLP (line 104) | class MLP(torch.nn.Module):
    method __init__ (line 121) | def __init__(self, hidden_size, output_dropout_prob, init_method,
    method forward (line 137) | def forward(self, hidden_states):
  class VocabEmbedding (line 148) | class VocabEmbedding(torch.nn.Module):
    method __init__ (line 158) | def __init__(self, config):
    method forward (line 181) | def forward(self, input_):
  class PositionalEmbedding (line 190) | class PositionalEmbedding(torch.nn.Module):
    method __init__ (line 192) | def __init__(self, hidden_size):
    method forward (line 200) | def forward(self, pos_seq, bsz=None):
  class SelfAttention (line 210) | class SelfAttention(torch.nn.Module):
    method __init__ (line 236) | def __init__(self, hidden_size, num_attention_heads,
    method _transpose_for_scores (line 264) | def _transpose_for_scores(self, tensor):
    method forward (line 274) | def forward(self, hidden_states, ltor_mask, mem=None):
  class GLMBlock (line 341) | class GLMBlock(torch.nn.Module):
    method __init__ (line 368) | def __init__(self,
    method forward (line 406) | def forward(self, hidden_states, ltor_mask, mem=None):
  class GLMStack (line 427) | class GLMStack(torch.nn.Module):
    method __init__ (line 460) | def __init__(self,
    method forward (line 519) | def forward(self, hidden_states, position_ids, attention_mask, memory_...
    method update_mems (line 595) | def update_mems(self, hiddens, mems):
  class GLMPreTrainedModel (line 610) | class GLMPreTrainedModel(PreTrainedModel):
    method _init_weights (line 621) | def _init_weights(self, module):
    method _set_gradient_checkpointing (line 637) | def _set_gradient_checkpointing(self, module, value=False):
  class GLMModel (line 697) | class GLMModel(GLMPreTrainedModel):
    method __init__ (line 711) | def __init__(self, config):
    method forward (line 741) | def forward(
  class GLMForMultipleChoice (line 782) | class GLMForMultipleChoice(GLMPreTrainedModel):
    method __init__ (line 783) | def __init__(self, config):
    method forward (line 788) | def forward(
  class GLMForConditionalGeneration (line 824) | class GLMForConditionalGeneration(GLMPreTrainedModel):
    method __init__ (line 825) | def __init__(self, config):
    method _reorder_cache (line 830) | def _reorder_cache(self, past, beam_idx):
    method prepare_inputs_for_generation (line 842) | def prepare_inputs_for_generation(self, input_ids, past=None, position...
    method forward (line 865) | def forward(

FILE: src/resources/models/glm-350M-chinese/tokenization_glm.py
  class GLMBatchEncoding (line 17) | class GLMBatchEncoding(BatchEncoding):
    method to (line 19) | def to(self, device: Union[str, "torch.device"]) -> "BatchEncoding":
  class GLMTokenizerMixin (line 38) | class GLMTokenizerMixin:
    method sop_token (line 40) | def sop_token(self) -> Optional[str]:
    method sop_token_id (line 44) | def sop_token_id(self) -> Optional[int]:
    method eop_token (line 51) | def eop_token(self) -> Optional[str]:
    method eop_token_id (line 55) | def eop_token_id(self) -> Optional[int]:
    method gmask_token_id (line 62) | def gmask_token_id(self) -> int:
    method smask_token_id (line 66) | def smask_token_id(self) -> int:
    method mask_token_ids (line 70) | def mask_token_ids(self):
    method _build_input_for_multiple_choice (line 73) | def _build_input_for_multiple_choice(self, context, choices):
    method _pad_batch (line 110) | def _pad_batch(self, tokens, position_ids, attention_mask, max_seq_len...
    method _collate (line 122) | def _collate(self, samples):
    method build_inputs_for_multiple_choice (line 146) | def build_inputs_for_multiple_choice(self, model_input: BatchEncoding,...
    method build_inputs_for_generation (line 153) | def build_inputs_for_generation(self, model_input: BatchEncoding, max_...
  class GLMRobertaTokenizer (line 216) | class GLMRobertaTokenizer(RobertaTokenizer, GLMTokenizerMixin):
    method gmask_token_id (line 221) | def gmask_token_id(self) -> int:
    method smask_token_id (line 225) | def smask_token_id(self) -> int:
    method mask_token_ids (line 229) | def mask_token_ids(self):
  class GLMChineseTokenizer (line 233) | class GLMChineseTokenizer(PreTrainedTokenizer, GLMTokenizerMixin):
    method __init__ (line 237) | def __init__(self, vocab_file, **kwargs):
    method vocab_size (line 244) | def vocab_size(self):
    method get_vocab (line 247) | def get_vocab(self):
    method _tokenize (line 252) | def _tokenize(self, text, **kwargs):
    method _convert_token_to_id (line 255) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 259) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 263) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 266) | def save_vocabulary(self, save_directory: str, filename_prefix: Option...
    method build_inputs_with_special_tokens (line 283) | def build_inputs_with_special_tokens(
  class GLMGPT2Tokenizer (line 315) | class GLMGPT2Tokenizer(GPT2Tokenizer, GLMTokenizerMixin):
    method build_inputs_with_special_tokens (line 319) | def build_inputs_with_special_tokens(
  class GLMBertTokenizer (line 341) | class GLMBertTokenizer(BertTokenizer, GLMTokenizerMixin):
    method gmask_token_id (line 346) | def gmask_token_id(self) -> int:
    method smask_token_id (line 350) | def smask_token_id(self) -> int:
    method mask_token_ids (line 354) | def mask_token_ids(self):
  class GLMTokenizer (line 358) | class GLMTokenizer:
    method from_pretrained (line 360) | def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwa...

FILE: src/resources/models/pangu-13B/configuration_gptpangu.py
  class GPTPanguConfig (line 4) | class GPTPanguConfig(PretrainedConfig):
    method __init__ (line 8) | def __init__(

FILE: src/resources/models/pangu-13B/modeling_gptpangu.py
  class GPTPanguAttention (line 19) | class GPTPanguAttention(nn.Module):
    method __init__ (line 20) | def __init__(self, config):
    method _attn (line 51) | def _attn(self, query, key, value, attention_mask=None, head_mask=None):
    method _split_heads (line 79) | def _split_heads(self, tensor, num_heads, attn_head_size):
    method _merge_heads (line 87) | def _merge_heads(self, tensor, num_heads, attn_head_size):
    method forward (line 95) | def forward(
  class GPTPanguMLP (line 136) | class GPTPanguMLP(nn.Module):
    method __init__ (line 137) | def __init__(self, intermediate_size, config):  # in MLP: intermediate...
    method forward (line 145) | def forward(self, hidden_states):
  class GPTPanguBlock (line 153) | class GPTPanguBlock(nn.Module):
    method __init__ (line 154) | def __init__(self, config):
    method forward (line 164) | def forward(
  class GPTPanguPreTrainedModel (line 204) | class GPTPanguPreTrainedModel(PreTrainedModel):
    method __init__ (line 214) | def __init__(self, *inputs, **kwargs):
    method _init_weights (line 217) | def _init_weights(self, module):
    method _set_gradient_checkpointing (line 244) | def _set_gradient_checkpointing(self, module, value=False):
  class GPTPanguModel (line 249) | class GPTPanguModel(GPTPanguPreTrainedModel):
    method __init__ (line 250) | def __init__(self, config):
    method get_input_embeddings (line 267) | def get_input_embeddings(self):
    method set_input_embeddings (line 270) | def set_input_embeddings(self, new_embeddings):
    method forward (line 273) | def forward(
  class GPTPanguForCausalLM (line 438) | class GPTPanguForCausalLM(GPTPanguPreTrainedModel):
    method __init__ (line 439) | def __init__(self, config):
    method get_output_embeddings (line 447) | def get_output_embeddings(self):
    method set_output_embeddings (line 450) | def set_output_embeddings(self, new_embeddings):
    method prepare_inputs_for_generation (line 453) | def prepare_inputs_for_generation(self, input_ids, past=None, **kwargs):
    method forward (line 481) | def forward(
    method _reorder_cache (line 543) | def _reorder_cache(past: Tuple[Tuple[torch.Tensor]], beam_idx: torch.T...

FILE: src/resources/models/pangu-13B/tokenization_gptpangu.py
  class GPTPanguTokenizer (line 17) | class GPTPanguTokenizer(PreTrainedTokenizer):
    method __init__ (line 23) | def __init__(
    method build_inputs_with_special_tokens (line 37) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No...
    method tokenize (line 68) | def tokenize(self, text, **kwargs):
    method convert_tokens_to_ids (line 73) | def convert_tokens_to_ids(self, tokens):
    method _convert_token_to_id (line 99) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 102) | def _convert_id_to_token(self, index):
    method convert_ids_to_tokens (line 105) | def convert_ids_to_tokens(self, ids):
    method decode (line 108) | def decode(self, ids, **kwargs):
    method vocab_size (line 121) | def vocab_size(self) -> int:
    method save_vocabulary (line 127) | def save_vocabulary(self, save_directory, filename_prefix=None):

FILE: src/resources/models/pangu-2.6B/configuration_gptpangu.py
  class GPTPanguConfig (line 4) | class GPTPanguConfig(PretrainedConfig):
    method __init__ (line 8) | def __init__(

FILE: src/resources/models/pangu-2.6B/modeling_gptpangu.py
  class GPTPanguAttention (line 19) | class GPTPanguAttention(nn.Module):
    method __init__ (line 20) | def __init__(self, config):
    method _attn (line 51) | def _attn(self, query, key, value, attention_mask=None, head_mask=None):
    method _split_heads (line 79) | def _split_heads(self, tensor, num_heads, attn_head_size):
    method _merge_heads (line 87) | def _merge_heads(self, tensor, num_heads, attn_head_size):
    method forward (line 95) | def forward(
  class GPTPanguMLP (line 136) | class GPTPanguMLP(nn.Module):
    method __init__ (line 137) | def __init__(self, intermediate_size, config):  # in MLP: intermediate...
    method forward (line 145) | def forward(self, hidden_states):
  class GPTPanguBlock (line 153) | class GPTPanguBlock(nn.Module):
    method __init__ (line 154) | def __init__(self, config):
    method forward (line 164) | def forward(
  class GPTPanguPreTrainedModel (line 204) | class GPTPanguPreTrainedModel(PreTrainedModel):
    method __init__ (line 214) | def __init__(self, *inputs, **kwargs):
    method _init_weights (line 217) | def _init_weights(self, module):
    method _set_gradient_checkpointing (line 244) | def _set_gradient_checkpointing(self, module, value=False):
  class GPTPanguModel (line 249) | class GPTPanguModel(GPTPanguPreTrainedModel):
    method __init__ (line 250) | def __init__(self, config):
    method get_input_embeddings (line 267) | def get_input_embeddings(self):
    method set_input_embeddings (line 270) | def set_input_embeddings(self, new_embeddings):
    method forward (line 273) | def forward(
  class GPTPanguForCausalLM (line 438) | class GPTPanguForCausalLM(GPTPanguPreTrainedModel):
    method __init__ (line 439) | def __init__(self, config):
    method get_output_embeddings (line 447) | def get_output_embeddings(self):
    method set_output_embeddings (line 450) | def set_output_embeddings(self, new_embeddings):
    method prepare_inputs_for_generation (line 453) | def prepare_inputs_for_generation(self, input_ids, past=None, **kwargs):
    method forward (line 481) | def forward(
    method _reorder_cache (line 543) | def _reorder_cache(past: Tuple[Tuple[torch.Tensor]], beam_idx: torch.T...

FILE: src/resources/models/pangu-2.6B/tokenization_gptpangu.py
  class GPTPanguTokenizer (line 17) | class GPTPanguTokenizer(PreTrainedTokenizer):
    method __init__ (line 23) | def __init__(
    method build_inputs_with_special_tokens (line 37) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No...
    method tokenize (line 68) | def tokenize(self, text, **kwargs):
    method convert_tokens_to_ids (line 73) | def convert_tokens_to_ids(self, tokens):
    method _convert_token_to_id (line 99) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 102) | def _convert_id_to_token(self, index):
    method convert_ids_to_tokens (line 105) | def convert_ids_to_tokens(self, ids):
    method decode (line 108) | def decode(self, ids, **kwargs):
    method vocab_size (line 121) | def vocab_size(self) -> int:
    method save_vocabulary (line 127) | def save_vocabulary(self, save_directory, filename_prefix=None):

FILE: src/resources/models/pangu-350M/configuration_gptpangu.py
  class GPTPanguConfig (line 4) | class GPTPanguConfig(PretrainedConfig):
    method __init__ (line 8) | def __init__(

FILE: src/resources/models/pangu-350M/modeling_gptpangu.py
  class GPTPanguAttention (line 19) | class GPTPanguAttention(nn.Module):
    method __init__ (line 20) | def __init__(self, config):
    method _attn (line 51) | def _attn(self, query, key, value, attention_mask=None, head_mask=None):
    method _split_heads (line 79) | def _split_heads(self, tensor, num_heads, attn_head_size):
    method _merge_heads (line 87) | def _merge_heads(self, tensor, num_heads, attn_head_size):
    method forward (line 95) | def forward(
  class GPTPanguMLP (line 136) | class GPTPanguMLP(nn.Module):
    method __init__ (line 137) | def __init__(self, intermediate_size, config):  # in MLP: intermediate...
    method forward (line 145) | def forward(self, hidden_states):
  class GPTPanguBlock (line 153) | class GPTPanguBlock(nn.Module):
    method __init__ (line 154) | def __init__(self, config):
    method forward (line 164) | def forward(
  class GPTPanguPreTrainedModel (line 204) | class GPTPanguPreTrainedModel(PreTrainedModel):
    method __init__ (line 214) | def __init__(self, *inputs, **kwargs):
    method _init_weights (line 217) | def _init_weights(self, module):
    method _set_gradient_checkpointing (line 244) | def _set_gradient_checkpointing(self, module, value=False):
  class GPTPanguModel (line 249) | class GPTPanguModel(GPTPanguPreTrainedModel):
    method __init__ (line 250) | def __init__(self, config):
    method get_input_embeddings (line 267) | def get_input_embeddings(self):
    method set_input_embeddings (line 270) | def set_input_embeddings(self, new_embeddings):
    method forward (line 273) | def forward(
  class GPTPanguForCausalLM (line 438) | class GPTPanguForCausalLM(GPTPanguPreTrainedModel):
    method __init__ (line 439) | def __init__(self, config):
    method get_output_embeddings (line 447) | def get_output_embeddings(self):
    method set_output_embeddings (line 450) | def set_output_embeddings(self, new_embeddings):
    method prepare_inputs_for_generation (line 453) | def prepare_inputs_for_generation(self, input_ids, past=None, **kwargs):
    method forward (line 481) | def forward(
    method _reorder_cache (line 543) | def _reorder_cache(past: Tuple[Tuple[torch.Tensor]], beam_idx: torch.T...

FILE: src/resources/models/pangu-350M/tokenization_gptpangu.py
  class GPTPanguTokenizer (line 17) | class GPTPanguTokenizer(PreTrainedTokenizer):
    method __init__ (line 23) | def __init__(
    method build_inputs_with_special_tokens (line 37) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No...
    method tokenize (line 68) | def tokenize(self, text, **kwargs):
    method convert_tokens_to_ids (line 73) | def convert_tokens_to_ids(self, tokens):
    method _convert_token_to_id (line 99) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 102) | def _convert_id_to_token(self, index):
    method convert_ids_to_tokens (line 105) | def convert_ids_to_tokens(self, ids):
    method decode (line 108) | def decode(self, ids, **kwargs):
    method vocab_size (line 121) | def vocab_size(self) -> int:
    method save_vocabulary (line 127) | def save_vocabulary(self, save_directory, filename_prefix=None):

FILE: src/resources/models/qwen-7B-chat/configuration_qwen.py
  class QWenConfig (line 9) | class QWenConfig(PretrainedConfig):
    method __init__ (line 19) | def __init__(

FILE: src/resources/models/qwen-7B-chat/modeling_qwen.py
  class FlashSelfAttention (line 76) | class FlashSelfAttention(torch.nn.Module):
    method __init__ (line 77) | def __init__(
    method forward (line 94) | def forward(self, q, k, v):
  class QWenAttention (line 140) | class QWenAttention(nn.Module):
    method __init__ (line 141) | def __init__(self, config, layer_number=None):
    method _attn (line 213) | def _attn(self, query, key, value, attention_mask=None, head_mask=None):
    method _upcast_and_reordered_attn (line 249) | def _upcast_and_reordered_attn(
    method _split_heads (line 305) | def _split_heads(self, tensor, num_heads, attn_head_size):
    method _merge_heads (line 310) | def _merge_heads(self, tensor, num_heads, attn_head_size):
    method forward (line 315) | def forward(
  class QWenMLP (line 409) | class QWenMLP(nn.Module):
    method __init__ (line 410) | def __init__(self, config):
    method forward (line 421) | def forward(self, hidden_states):
  class QWenBlock (line 429) | class QWenBlock(nn.Module):
    method __init__ (line 430) | def __init__(self, config, layer_idx=None, num_expert=1):
    method forward (line 455) | def forward(
  class QWenPreTrainedModel (line 504) | class QWenPreTrainedModel(PreTrainedModel):
    method __init__ (line 511) | def __init__(self, *inputs, **kwargs):
    method _init_weights (line 514) | def _init_weights(self, module):
    method _set_gradient_checkpointing (line 537) | def _set_gradient_checkpointing(self, module, value=False):
  class QWenModel (line 542) | class QWenModel(QWenPreTrainedModel):
    method __init__ (line 545) | def __init__(self, config):
    method get_input_embeddings (line 583) | def get_input_embeddings(self):
    method set_input_embeddings (line 586) | def set_input_embeddings(self, new_embeddings):
    method forward (line 589) | def forward(
  class QWenLMHeadModel (line 745) | class QWenLMHeadModel(QWenPreTrainedModel):
    method __init__ (line 749) | def __init__(self, config):
    method get_output_embeddings (line 762) | def get_output_embeddings(self):
    method set_output_embeddings (line 765) | def set_output_embeddings(self, new_embeddings):
    method prepare_inputs_for_generation (line 768) | def prepare_inputs_for_generation(
    method forward (line 804) | def forward(
    method _reorder_cache (line 868) | def _reorder_cache(
    method chat (line 880) | def chat(
    method generate (line 926) | def generate(
  class RotaryEmbedding (line 964) | class RotaryEmbedding(torch.nn.Module):
    method __init__ (line 965) | def __init__(self, dim, base=10000):
    method update_rotary_pos_emb_cache (line 977) | def update_rotary_pos_emb_cache(self, max_seq_len, offset=0, ntk_alpha...
    method forward (line 991) | def forward(self, max_seq_len, offset=0, ntk_alpha=1.0):
  function _rotate_half (line 996) | def _rotate_half(x):
  function apply_rotary_pos_emb (line 1004) | def apply_rotary_pos_emb(t, freqs, use_flash_rotary=False):
  class RMSNorm (line 1021) | class RMSNorm(torch.nn.Module):
    method __init__ (line 1022) | def __init__(self, dim: int, eps: float = 1e-6):
    method _norm (line 1027) | def _norm(self, x):
    method forward (line 1030) | def forward(self, x):

FILE: src/resources/models/qwen-7B-chat/qwen_generation_utils.py
  function pad_batch (line 25) | def pad_batch(batch: BatchTokensType, pad_id: int, seq_length: int) -> B...
  function get_ltor_masks_and_position_ids (line 33) | def get_ltor_masks_and_position_ids(
  function get_batch (line 94) | def get_batch(context_tokens: torch.LongTensor, eod_id: int):
  function get_stop_words_ids (line 109) | def get_stop_words_ids(chat_format, tokenizer):
  function make_context (line 119) | def make_context(
  function _decode_default (line 192) | def _decode_default(
  function _decode_chatml (line 224) | def _decode_chatml(
  function decode_tokens (line 259) | def decode_tokens(
  class StopWordsLogitsProcessor (line 296) | class StopWordsLogitsProcessor(LogitsProcessor):
    method __init__ (line 309) | def __init__(self, stop_words_ids: Iterable[Iterable[int]], eos_token_...
    method __call__ (line 343) | def __call__(
    method _tokens_match (line 352) | def _tokens_match(self, prev_tokens: torch.LongTensor, tokens: List[in...
    method _calc_stopped_samples (line 365) | def _calc_stopped_samples(self, prev_input_ids: Iterable[int]) -> Iter...
  function top_k_logits (line 379) | def top_k_logits(logits, top_k=0, top_p=0.0, filter_value=-float("Inf")):
  function switch (line 409) | def switch(val1, val2, boolean):

FILE: src/resources/models/qwen-7B-chat/tokenization_qwen.py
  class QWenTokenizer (line 26) | class QWenTokenizer(PreTrainedTokenizer):
    method __init__ (line 33) | def __init__(
    method __len__ (line 134) | def __len__(self):
    method get_vocab (line 137) | def get_vocab(self):
    method convert_tokens_to_ids (line 140) | def convert_tokens_to_ids(self, tokens):
    method save_vocabulary (line 163) | def save_vocabulary(self, save_directory: str, **kwargs) -> Tuple[str]:
    method tokenize (line 177) | def tokenize(self, text: str, **kwargs) -> List[str]:
    method convert_tokens_to_string (line 197) | def convert_tokens_to_string(self, tokens: List[str]) -> str:
    method vocab_size (line 209) | def vocab_size(self):
    method _convert_id_to_token (line 212) | def _convert_id_to_token(self, index: int) -> str:
    method _convert_token_to_id (line 217) | def _convert_token_to_id(self, token: str) -> int:
    method all_special_tokens (line 222) | def all_special_tokens(self) -> List[str]:
    method all_special_ids (line 232) | def all_special_ids(self) -> List[int]:
    method _tokenize (line 239) | def _tokenize(self, text, **kwargs):
    method _decode (line 248) | def _decode(
    method build_inputs_with_special_tokens (line 260) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No...

FILE: src/train_dpo.py
  function preprocess_logits_for_metrics (line 27) | def preprocess_logits_for_metrics(logits, labels):
  function get_parser (line 34) | def get_parser():
  function main (line 110) | def main():

FILE: src/train_reward.py
  function get_parser (line 24) | def get_parser():
  function main (line 92) | def main():

FILE: src/train_rlhf.py
  function get_parser (line 26) | def get_parser():
  function create_datasets (line 144) | def create_datasets(args, tokenizer_padding_from_left, ppo_ptx_enabled, ...
  function create_dataloader (line 165) | def create_dataloader(args, train_dataset, pretrain_dataset=None):
  function main (line 203) | def main():

FILE: src/train_rlhf_trlx.py
  function get_parser (line 24) | def get_parser():
  function train (line 86) | def train(model_path: Optional[str] = None,
  function main (line 166) | def main():

FILE: src/train_sft.py
  function preprocess_logits_for_metrics (line 26) | def preprocess_logits_for_metrics(logits, labels):
  function get_parser (line 33) | def get_parser():
  function main (line 107) | def main():

FILE: src/train_sft_wo_trainer.py
  function preprocess_logits_for_metrics (line 27) | def preprocess_logits_for_metrics(logits, labels):
  function get_parser (line 34) | def get_parser():
  function main (line 106) | def main():

FILE: src/utils/config.py
  function merge (line 18) | def merge(base: Dict, update: Dict, updated: Set) -> Dict:
  function _merge_dicts (line 31) | def _merge_dicts(base: Dict, update: Dict) -> Dict:
  class ModelConfig (line 46) | class ModelConfig:
    method from_dict (line 81) | def from_dict(cls, config: Dict[str, Any]):
  class TokenizerConfig (line 86) | class TokenizerConfig:
    method from_dict (line 105) | def from_dict(cls, config: Dict[str, Any]):
  class OptimizerConfig (line 110) | class OptimizerConfig:
    method from_dict (line 125) | def from_dict(cls, config: Dict[str, Any]):
  class SchedulerConfig (line 130) | class SchedulerConfig:
    method from_dict (line 145) | def from_dict(cls, config: Dict[str, Any]):
  class TrainConfig (line 150) | class TrainConfig:
    method from_dict (line 239) | def from_dict(cls, config: Dict[str, Any]):
  class TRLConfig (line 244) | class TRLConfig:
    method load_yaml (line 257) | def load_yaml(cls, yml_fp: str):
    method to_dict (line 268) | def to_dict(self):
    method evolve (line 283) | def evolve(self, **kwargs) -> "TRLConfig":
    method from_dict (line 294) | def from_dict(cls, config: Dict):
    method update (line 308) | def update(cls, baseconfig: Dict, config: Dict):
    method __str__ (line 321) | def __str__(self):
  function default_ppo_config (line 328) | def default_ppo_config():
  function default_ilql_config (line 373) | def default_ilql_config():
  function default_sft_config (line 408) | def default_sft_config():
  function get_train_ds_config (line 435) | def get_train_ds_config(global_batch_size=32,
  function get_eval_ds_config (line 485) | def get_eval_ds_config(global_batch_size=32, micro_batch_size=4, gradien...

FILE: src/utils/file_utils.py
  function set_seed (line 14) | def set_seed(seed_val=42):
  function print_rank_0 (line 21) | def print_rank_0(*message):
  function significant (line 29) | def significant(x: Number, ndigits=2) -> Number:
  function print_gpu_utilization (line 53) | def print_gpu_utilization(prefix: str = "", index: int = 0, only_rank_0:...
  function print_gpu_utilization_torch (line 65) | def print_gpu_utilization_torch(prefix: str = "", index: int = 0, only_r...
  function print_trainable_parameters (line 85) | def print_trainable_parameters(model):

FILE: src/utils/loading.py
  function _trainer_unavailble (line 38) | def _trainer_unavailble(name):
  function prepare_decoder_attention_mask (line 47) | def prepare_decoder_attention_mask(self, *args, **kwargs):
  function chatglm_auto_configure_device_map (line 51) | def chatglm_auto_configure_device_map(num_gpus: int, model_name: str, lo...
  function llama_and_baichuan_auto_configure_device_map (line 98) | def llama_and_baichuan_auto_configure_device_map(num_gpus: int, model_na...
  function load_params_8bit_or_4bit (line 132) | def load_params_8bit_or_4bit(args, model: PreTrainedModel) -> Dict:
  function load_tokenizer_and_model (line 166) | def load_tokenizer_and_model(args, with_trainer: bool = True) -> Tuple[P...
  function to_peft_model (line 285) | def to_peft_model(args, model: PreTrainedModel) -> PreTrainedModel:
  function load_checkpoint (line 320) | def load_checkpoint(args, model: PreTrainedModel, strict: bool = True) -...
  function get_trainer (line 329) | def get_trainer(name: str) -> Callable:
  function get_pipeline (line 340) | def get_pipeline(name: str) -> Callable:

FILE: src/utils/method_configs.py
  function register_method (line 20) | def register_method(name):
  class MethodConfig (line 44) | class MethodConfig:
    method from_dict (line 55) | def from_dict(cls, config: Dict[str, Any]):
  function get_method (line 59) | def get_method(name: str) -> MethodConfig:
  class PPOConfig (line 73) | class PPOConfig(MethodConfig):
    method get_advantages_and_returns (line 133) | def get_advantages_and_returns(
    method loss (line 171) | def loss(
  class SFTConfig (line 238) | class SFTConfig(MethodConfig):
  class ILQLConfig (line 251) | class ILQLConfig(MethodConfig):
    method loss (line 262) | def loss(self, outputs, labels):

FILE: src/utils/modeling_utils.py
  function _make_causal_mask (line 41) | def _make_causal_mask(
  function _expand_mask (line 59) | def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Option...
  function _prepare_decoder_attention_mask (line 73) | def _prepare_decoder_attention_mask(attention_mask, input_shape, input_e...
  function get_distributed_config (line 102) | def get_distributed_config(accelerator: Accelerator):
  class OptimizerName (line 127) | class OptimizerName(str, Enum):
  function get_optimizer_class (line 137) | def get_optimizer_class(name: OptimizerName):
  class SchedulerName (line 174) | class SchedulerName(str, Enum):
  function get_scheduler_class (line 181) | def get_scheduler_class(name: SchedulerName):
  class Clock (line 193) | class Clock:
    method __init__ (line 198) | def __init__(self):
    method tick (line 203) | def tick(self, samples: int = 0) -> float:
    method get_stat (line 219) | def get_stat(self, n_samp: int = 1000, reset: bool = False):
  function tree_map (line 234) | def tree_map(f, tree: Any) -> Any:
  function to_device (line 248) | def to_device(tree, device, non_blocking=False):
  function filter_non_scalars (line 255) | def filter_non_scalars(xs: Dict) -> Dict:
  function get_git_tag (line 269) | def get_git_tag() -> Tuple[str, str]:
  function make_head (line 281) | def make_head(n_embd: int, out: int, dtype: type = torch.float32) -> nn....
  function freeze_bottom_causal_layers (line 290) | def freeze_bottom_causal_layers(model: nn.Module, num_layers_unfrozen: i...
  function freeze_bottom_seq2seq_layers (line 303) | def freeze_bottom_seq2seq_layers(model: nn.Module, num_layers_unfrozen: ...
  function rhasattr (line 325) | def rhasattr(obj, attr):
  function rgetattr (line 341) | def rgetattr(obj, attr: str, *args) -> object:
  function findattr (line 354) | def findattr(obj, attrs: Tuple[str]) -> Union[object, None]:
  function hf_get_decoder (line 361) | def hf_get_decoder(model: nn.Module) -> nn.Module:
  function hf_get_decoder_final_norm (line 374) | def hf_get_decoder_final_norm(model: nn.Module) -> float:
  function hf_get_decoder_blocks (line 390) | def hf_get_decoder_blocks(model: nn.Module) -> Tuple[nn.Module]:
  function hf_get_lm_head (line 411) | def hf_get_lm_head(model: nn.Module) -> nn.Module:
  function hf_get_hidden_size (line 421) | def hf_get_hidden_size(config: transformers.PretrainedConfig) -> int:
  function hf_get_num_hidden_layers (line 433) | def hf_get_num_hidden_layers(config: transformers.PretrainedConfig) -> int:
  function get_global_statistics (line 445) | def get_global_statistics(xs: torch.Tensor) -> Tuple[float, float, int]:
  function whiten (line 460) | def whiten(xs: torch.Tensor, shift_mean=True, distributed=True) -> torch...
  function logprobs_of_labels (line 473) | def logprobs_of_labels(logits, labels):
  function flatten_dict (line 482) | def flatten_dict(
  function get_tensor_stats (line 498) | def get_tensor_stats(xs: torch.Tensor, mask: torch.Tensor, n: int):
  class RunningMoments (line 508) | class RunningMoments:
    method __init__ (line 509) | def __init__(self):
    method update (line 519) | def update(self, xs: torch.Tensor) -> Tuple[float, float]:
  function generate_layer_regex (line 629) | def generate_layer_regex(config: transformers.PretrainedConfig, num_laye...
  function get_delta_modified_modules (line 641) | def get_delta_modified_modules(
  function get_delta_model_class (line 658) | def get_delta_model_class(model_type: str):
  function parse_delta_kwargs (line 671) | def parse_delta_kwargs(
  function regex_for_range (line 705) | def regex_for_range(min_: int, max_: int) -> str:  # noqa
  function get_optimizer_grouped_parameters (line 782) | def get_optimizer_grouped_parameters(model,
  function get_all_reduce_mean (line 808) | def get_all_reduce_mean(tensor):
  function moving_average (line 814) | def moving_average(model, model_ema, beta=0.992, device=None, zero_stage...
  function save_hf_format (line 831) | def save_hf_format(model, tokenizer, args, sub_folder=""):
  function _z3_params_to_fetch (line 852) | def _z3_params_to_fetch(param_list):
  function save_zero_three_model (line 859) | def save_zero_three_model(model_ema, global_rank, save_dir, zero_stage=0):
  function sorted_checkpoints (line 886) | def sorted_checkpoints(output_dir=None, checkpoint_prefix="checkpoint", ...
  function rotate_checkpoints (line 905) | def rotate_checkpoints(save_total_limit, use_mtime=False, output_dir=Non...
  function qwen_make_context (line 931) | def qwen_make_context(

FILE: src/utils/nlp_utils.py
  function clean_text (line 7) | def clean_text(text):