SYMBOL INDEX (1225 symbols across 64 files) FILE: src/data/data.py function chatglm3_encode (line 21) | def chatglm3_encode(tokenizer: PreTrainedTokenizerBase, function chatglm2_encode (line 83) | def chatglm2_encode(tokenizer: PreTrainedTokenizerBase, class DataCollatorReward (line 147) | class DataCollatorReward: method __call__ (line 148) | def __call__(self, data): class DataCollatorRLHF (line 160) | class DataCollatorRLHF: method __init__ (line 162) | def __init__(self, max_token_len, inference_tp_size): method __call__ (line 166) | def __call__(self, data): class PretrainDataset (line 197) | class PretrainDataset(Dataset): method __init__ (line 198) | def __init__(self, args, filename, tokenizer, concat_samples=True): method __len__ (line 209) | def __len__(self): method __getitem__ (line 212) | def __getitem__(self, idx): method load_dataset (line 306) | def load_dataset(self, filename): class SFTDataset (line 351) | class SFTDataset(Dataset): method __init__ (line 352) | def __init__(self, args, filename, tokenizer, concat_samples=True): method __len__ (line 363) | def __len__(self): method __getitem__ (line 366) | def __getitem__(self, idx): method load_dataset (line 509) | def load_dataset(self, filename): class PairwiseDataset (line 563) | class PairwiseDataset(Dataset): method __init__ (line 564) | def __init__(self, args, filename, tokenizer): method __len__ (line 572) | def __len__(self): method __getitem__ (line 575) | def __getitem__(self, idx): method load_dataset (line 683) | def load_dataset(filename): class RLHFDataset (line 721) | class RLHFDataset(Dataset): method __init__ (line 722) | def __init__(self, args, filename, tokenizer): method __len__ (line 731) | def __len__(self): method __getitem__ (line 734) | def __getitem__(self, idx): method load_dataset (line 781) | def load_dataset(filename): class PPODataset (line 804) | class PPODataset: method __init__ (line 805) | def __init__(self, max_size, small_batch_size): method separate (line 810) | def separate(self): method add (line 834) | def add(self, data): method free (line 846) | def free(self): class DPODataset (line 850) | class DPODataset(Dataset): method __init__ (line 851) | def __init__(self, args, filename, tokenizer): method __len__ (line 859) | def __len__(self): method __getitem__ (line 862) | def __getitem__(self, idx): method load_dataset (line 928) | def load_dataset(filename): class OCNLIDataset (line 969) | class OCNLIDataset(Dataset): method __init__ (line 970) | def __init__(self, args, eval_filename, tokenizer, train_filename=None): method __len__ (line 983) | def __len__(self): method __getitem__ (line 986) | def __getitem__(self, idx): method load_dataset (line 1017) | def load_dataset(self, filename): class CMNLIDataset (line 1040) | class CMNLIDataset(Dataset): method __init__ (line 1041) | def __init__(self, args, eval_filename, tokenizer, train_filename=None): method __len__ (line 1054) | def __len__(self): method __getitem__ (line 1057) | def __getitem__(self, idx): method load_dataset (line 1090) | def load_dataset(self, filename): class CHIDDataset (line 1113) | class CHIDDataset(Dataset): method __init__ (line 1114) | def __init__(self, args, eval_filename, tokenizer, train_filename=None): method __len__ (line 1127) | def __len__(self): method __getitem__ (line 1130) | def __getitem__(self, idx): method load_dataset (line 1165) | def load_dataset(self, filename): method load_idiom_dict (line 1186) | def load_idiom_dict(self): class CMRCDataset (line 1195) | class CMRCDataset(Dataset): method __init__ (line 1196) | def __init__(self, args, eval_filename, tokenizer, train_filename=None): method __len__ (line 1208) | def __len__(self): method __getitem__ (line 1211) | def __getitem__(self, idx): method load_dataset (line 1242) | def load_dataset(self, filename): class CLUEWSCDataset (line 1267) | class CLUEWSCDataset(Dataset): method __init__ (line 1268) | def __init__(self, args, eval_filename, tokenizer, train_filename=None): method __len__ (line 1281) | def __len__(self): method __getitem__ (line 1284) | def __getitem__(self, idx): method load_dataset (line 1317) | def load_dataset(self, filename): class C3Dataset (line 1338) | class C3Dataset(Dataset): method __init__ (line 1339) | def __init__(self, args, eval_filename, tokenizer, train_filename=None): method __len__ (line 1351) | def __len__(self): method __getitem__ (line 1354) | def __getitem__(self, idx): method load_dataset (line 1389) | def load_dataset(self, filename): class AFQMCDataset (line 1412) | class AFQMCDataset(Dataset): method __init__ (line 1413) | def __init__(self, args, eval_filename, tokenizer, train_filename=None): method __len__ (line 1426) | def __len__(self): method __getitem__ (line 1429) | def __getitem__(self, idx): method load_dataset (line 1462) | def load_dataset(self, filename): class CSLDataset (line 1482) | class CSLDataset(Dataset): method __init__ (line 1483) | def __init__(self, args, eval_filename, tokenizer, train_filename=None): method __len__ (line 1496) | def __len__(self): method __getitem__ (line 1499) | def __getitem__(self, idx): method load_dataset (line 1532) | def load_dataset(self, filename): class IFLYTEKDataset (line 1552) | class IFLYTEKDataset(Dataset): method __init__ (line 1553) | def __init__(self, args, eval_filename, tokenizer, train_filename=None): method __len__ (line 1587) | def __len__(self): method __getitem__ (line 1590) | def __getitem__(self, idx): method load_dataset (line 1625) | def load_dataset(self, filename): class TNEWSDataset (line 1648) | class TNEWSDataset(Dataset): method __init__ (line 1649) | def __init__(self, args, eval_filename, tokenizer, train_filename=None): method __len__ (line 1676) | def __len__(self): method __getitem__ (line 1679) | def __getitem__(self, idx): method load_dataset (line 1714) | def load_dataset(self, filename): class CEvalDataset (line 1737) | class CEvalDataset(Dataset): method __init__ (line 1738) | def __init__(self, args, eval_filename, tokenizer, train_filename=None): method __len__ (line 1754) | def __len__(self): method format_example (line 1757) | def format_example(self, line, include_answer=True, cot=False): method __getitem__ (line 1785) | def __getitem__(self, idx): method load_dataset (line 1863) | def load_dataset(self, filename, return_format="list"): class MMLUDataset (line 1887) | class MMLUDataset(Dataset): method __init__ (line 1888) | def __init__(self, args, eval_filename, tokenizer, train_filename=None): method __len__ (line 1904) | def __len__(self): method format_example (line 1907) | def format_example(self, line, include_answer=True): method __getitem__ (line 1926) | def __getitem__(self, idx): method load_dataset (line 2003) | def load_dataset(self, filename, return_format="list"): FILE: src/data/data_types.py class PromptElement (line 9) | class PromptElement: class PromptBatch (line 25) | class PromptBatch: class AccelerateRLElement (line 41) | class AccelerateRLElement: class AccelerateRLBatchElement (line 57) | class AccelerateRLBatchElement: class PPORLElement (line 73) | class PPORLElement: class PPORLBatch (line 107) | class PPORLBatch: FILE: src/data/pipeline.py class GeneralElement (line 25) | class GeneralElement: class RLElement (line 34) | class RLElement: class BatchElement (line 45) | class BatchElement: class GLMDataCollator (line 55) | class GLMDataCollator: method __call__ (line 63) | def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]: function register_datapipeline (line 102) | def register_datapipeline(name): class BasePipeline (line 125) | class BasePipeline(Dataset): method __init__ (line 126) | def __init__(self, path: str = "dataset"): method __getitem__ (line 130) | def __getitem__(self, index: int) -> GeneralElement: method __len__ (line 134) | def __len__(self) -> int: method create_loader (line 138) | def create_loader( class BaseRolloutStore (line 153) | class BaseRolloutStore(Dataset): method __init__ (line 154) | def __init__(self, capacity=-1): method push (line 159) | def push(self, exps: Iterable[Any]): method __getitem__ (line 165) | def __getitem__(self, index: int) -> RLElement: method __len__ (line 168) | def __len__(self) -> int: method create_loader (line 172) | def create_loader( class PanguPipeline (line 189) | class PanguPipeline(BasePipeline): method __init__ (line 190) | def __init__(self, prompts: List[dict], config: TRLConfig, tokenizer: ... method __len__ (line 199) | def __len__(self): method __getitem__ (line 202) | def __getitem__(self, idx): method create_loader (line 219) | def create_loader(self, batch_size: int, shuffle=False) -> DataLoader: class GLMPipeline (line 224) | class GLMPipeline(BasePipeline): method __init__ (line 225) | def __init__(self, prompts: List[dict], config: TRLConfig, tokenizer: ... method __len__ (line 235) | def __len__(self): method __getitem__ (line 238) | def __getitem__(self, idx): method create_loader (line 257) | def create_loader(self, batch_size: int, shuffle=False) -> DataLoader: class ChatGLMPipeline (line 263) | class ChatGLMPipeline(BasePipeline): method __init__ (line 264) | def __init__(self, prompts: List[dict], config: TRLConfig, tokenizer: ... method __len__ (line 273) | def __len__(self): method __getitem__ (line 276) | def __getitem__(self, idx): method create_loader (line 287) | def create_loader(self, batch_size: int, shuffle=False) -> DataLoader: class PPORolloutStorage (line 291) | class PPORolloutStorage(BaseRolloutStore): method __init__ (line 296) | def __init__(self, pad_token_id): method push (line 302) | def push(self, exps: Iterable[PPORLElement]): method clear_history (line 305) | def clear_history(self): method export_history (line 308) | def export_history(self, location: str): method __getitem__ (line 320) | def __getitem__(self, index: int) -> PPORLElement: method __len__ (line 323) | def __len__(self) -> int: method create_loader (line 326) | def create_loader( FILE: src/data_prepare.py function weibo_summary_comment (line 24) | def weibo_summary_comment(args, tokenizer): function couplets (line 61) | def couplets(args, tokenizer): function zhidao (line 117) | def zhidao(args, tokenizer): function chinese_classical (line 159) | def chinese_classical(args, tokenizer): function chinese_poetry (line 221) | def chinese_poetry(args, tokenizer): function baike_qa_2019 (line 372) | def baike_qa_2019(args, tokenizer): function get_parser (line 404) | def get_parser(): function main (line 418) | def main(): FILE: src/eval_pretrain.py function get_parser (line 60) | def get_parser(): function extract_cot_answer (line 101) | def extract_cot_answer(line, response): function main (line 106) | def main(): FILE: src/models/loss.py class PairWiseLoss (line 6) | class PairWiseLoss(nn.Module): method forward (line 11) | def forward(self, chosen_reward: torch.Tensor, reject_reward: torch.Te... FILE: src/models/ppo.py class PreTrainedModelWrapper (line 51) | class PreTrainedModelWrapper(nn.Module, transformers.utils.PushToHubMixin): method __init__ (line 76) | def __init__(self, base_model: Optional[transformers.PreTrainedModel] ... method _split_kwargs (line 83) | def _split_kwargs(cls, kwargs: Dict[str, Any]): method from_config (line 97) | def from_config(cls, config: transformers.PretrainedConfig, **kwargs): method from_pretrained (line 118) | def from_pretrained( # noqa: max-complexity method save_pretrained (line 210) | def save_pretrained(self, *args, **kwargs): method state_dict (line 231) | def state_dict(self, *args, **kwargs): method post_init (line 235) | def post_init(self, *args, **kwargs): method get_compatible_forward_kwargs (line 242) | def get_compatible_forward_kwargs(self, **kwargs) -> Dict[str, Any]: class AdaptiveKLController (line 252) | class AdaptiveKLController: method __init__ (line 258) | def __init__(self, init_kl_coef: float, target: float, horizon: int): method update (line 263) | def update(self, current: float, n_steps: int): class FixedKLController (line 273) | class FixedKLController: method __init__ (line 276) | def __init__(self, kl_coef): method update (line 279) | def update(self, current: float, n_steps: int): class CausalLMOutputWithValue (line 289) | class CausalLMOutputWithValue(ModelOutput): class AutoModelForCausalLMWithValueHead (line 299) | class AutoModelForCausalLMWithValueHead(PreTrainedModelWrapper): method __init__ (line 308) | def __init__( method forward (line 316) | def forward( method generate (line 353) | def generate(self, *args, **kwargs) -> Union[ModelOutput, torch.LongTe... method state_dict (line 356) | def state_dict(self, *args, **kwargs): method post_init (line 367) | def post_init(self, state_dict): class AutoModelForCausalLMWithHydraValueHead (line 381) | class AutoModelForCausalLMWithHydraValueHead(AutoModelForCausalLMWithVal... method __init__ (line 385) | def __init__( method forward_hydra (line 414) | def forward_hydra( method from_pretrained (line 457) | def from_pretrained( # noqa: max-complexity class ModelBranch (line 564) | class ModelBranch(transformers.PreTrainedModel): method __init__ (line 569) | def __init__( class GPTModelBranch (line 599) | class GPTModelBranch(ModelBranch): method forward (line 600) | def forward( # noqa: max-complexity class OPTModelBranch (line 733) | class OPTModelBranch(ModelBranch): method forward (line 734) | def forward( # noqa: max-complexity class BloomModelBranch (line 853) | class BloomModelBranch(ModelBranch): method forward (line 854) | def forward( # noqa: max-complexity class Seq2SeqLMOutputWithValue (line 971) | class Seq2SeqLMOutputWithValue(ModelOutput): class AutoModelForSeq2SeqLMWithValueHead (line 984) | class AutoModelForSeq2SeqLMWithValueHead(PreTrainedModelWrapper): method __init__ (line 993) | def __init__( method forward (line 1001) | def forward( method generate (line 1053) | def generate(self, *args, **kwargs) -> Union[ModelOutput, torch.LongTe... method state_dict (line 1056) | def state_dict(self, *args, **kwargs): method post_init (line 1067) | def post_init(self, state_dict): class AutoModelForSeq2SeqLMWithHydraValueHead (line 1081) | class AutoModelForSeq2SeqLMWithHydraValueHead(AutoModelForSeq2SeqLMWithV... method __init__ (line 1085) | def __init__( method forward_hydra (line 1103) | def forward_hydra( method from_pretrained (line 1161) | def from_pretrained( # noqa: max-complexity class T5Branch (line 1268) | class T5Branch(ModelBranch): method __init__ (line 1271) | def __init__( method forward (line 1281) | def forward( # noqa: max-complexity function hf_get_branch_class (line 1381) | def hf_get_branch_class( FILE: src/models/reward.py class RewardModel (line 7) | class RewardModel(PreTrainedModel): method __init__ (line 10) | def __init__(self, config, model, tokenizer): method gradient_checkpointing_enable (line 24) | def gradient_checkpointing_enable(self): method gradient_checkpointing_disable (line 27) | def gradient_checkpointing_disable(self): method _set_gradient_checkpointing (line 30) | def _set_gradient_checkpointing(self, module, value=False): method reward (line 34) | def reward( method forward (line 71) | def forward( FILE: src/models/rlhf_engine.py function log_init (line 37) | def log_init(model_name, rank, stime=None): function create_hf_model (line 49) | def create_hf_model(model_class, function create_critic_model (line 93) | def create_critic_model(model_name_or_path, class DeepSpeedRLHFEngine (line 130) | class DeepSpeedRLHFEngine: method __init__ (line 132) | def __init__(self, actor_model_name_or_path, critic_model_name_or_path, method _init_actor (line 164) | def _init_actor(self, actor_model_name_or_path): method _init_ref (line 232) | def _init_ref(self, actor_model_name_or_path): method _init_ema (line 255) | def _init_ema(self, actor_model_name_or_path): method _init_critic (line 291) | def _init_critic(self, critic_model_name_or_path): method _init_reward (line 362) | def _init_reward(self, critic_model_name_or_path): FILE: src/models/trainer.py function register_trainer (line 75) | def register_trainer(name): class BaseRLTrainer (line 98) | class BaseRLTrainer: method __init__ (line 99) | def __init__( method push_to_store (line 116) | def push_to_store(self, data): method add_eval_pipeline (line 119) | def add_eval_pipeline(self, eval_pipeline): method sample (line 124) | def sample(self, prompts: Iterable[str], length: int, n_samples: int) ... method learn (line 138) | def learn( method save (line 159) | def save(self, directory: Optional[str] = None): method load (line 164) | def load(self, directory=None): class AccelerateRLTrainer (line 170) | class AccelerateRLTrainer(BaseRLTrainer): method __init__ (line 175) | def __init__(self, config, **kwargs): # noqa: C901 method setup_model (line 252) | def setup_model(self): method setup_optimizer (line 278) | def setup_optimizer(self): method setup_scheduler (line 300) | def setup_scheduler(self): method decode (line 308) | def decode( method generate (line 374) | def generate(self, input_ids, attention_mask=None, **kwargs): method generate_eval (line 389) | def generate_eval(self, input_ids, attention_mask=None, **kwargs): method save_pretrained (line 402) | def save_pretrained(self, directory: Optional[str] = None, **kwargs): method save (line 420) | def save(self, directory: Optional[str] = None, **kwargs): method load (line 424) | def load(self, directory: Optional[str] = None, **kwargs): method add_eval_pipeline (line 428) | def add_eval_pipeline(self, eval_pipeline): method evaluate (line 432) | def evaluate(self): # noqa: C901 method learn (line 579) | def learn(self): # noqa: C901 method get_arch (line 703) | def get_arch(self, config: TRLConfig): method loss (line 708) | def loss(self, batch) -> Tuple[float, Dict]: method post_backward_callback (line 713) | def post_backward_callback(self): method post_epoch_callback (line 718) | def post_epoch_callback(self): class AcceleratePPOTrainer (line 724) | class AcceleratePPOTrainer(AccelerateRLTrainer): method __init__ (line 730) | def __init__(self, config: TRLConfig, **kwargs): method get_arch (line 810) | def get_arch(self, config: TRLConfig): method loss (line 830) | def loss(self, batch: PPORLBatch): method setup_rollout_logging (line 913) | def setup_rollout_logging(self, config): method post_epoch_callback (line 926) | def post_epoch_callback(self): method post_backward_callback (line 937) | def post_backward_callback(self): method prepare_learning (line 940) | def prepare_learning(self): method add_prompt_pipeline (line 949) | def add_prompt_pipeline(self, pipeline: BasePipeline): method make_experience (line 955) | def make_experience(self, num_rollouts: int = 1024, iter_count: int = ... function get_model_norm (line 1259) | def get_model_norm(model): function gather_log_probs (line 1273) | def gather_log_probs(logits, labels): class DeepSpeedPPOTrainer (line 1279) | class DeepSpeedPPOTrainer(): method __init__ (line 1281) | def __init__(self, rlhf_engine, args): method generate_sequence (line 1302) | def generate_sequence(self, inputs): method generate_experience (line 1416) | def generate_experience(self, output_sequences, answer_start_indices): method compute_rewards (line 1455) | def compute_rewards(self, starts, log_probs, ref_log_probs, reward_sco... method train_rlhf (line 1482) | def train_rlhf(self, inputs): method actor_loss_fn (line 1563) | def actor_loss_fn(self, logprobs, old_logprobs, advantages, mask): method critic_loss_fn (line 1573) | def critic_loss_fn(self, values, old_values, returns, mask): method get_advantages_and_returns (line 1588) | def get_advantages_and_returns(self, values, rewards, starts): method _validate_training_mode (line 1632) | def _validate_training_mode(self): method _validate_evaluation_mode (line 1637) | def _validate_evaluation_mode(self): method train (line 1644) | def train(self): method eval (line 1649) | def eval(self): method dump_model_norms (line 1656) | def dump_model_norms(self, tag): class DeepSpeedPPOPTXTrainer (line 1674) | class DeepSpeedPPOPTXTrainer(DeepSpeedPPOTrainer): method __init__ (line 1676) | def __init__(self, *args, **kwargs): method train_unsupervised (line 1679) | def train_unsupervised(self, inputs, unsup_coef): class DPOTrainer (line 1691) | class DPOTrainer(Trainer): method __init__ (line 1738) | def __init__( method concatenated_inputs (line 1863) | def concatenated_inputs(self, batch: Dict[str, Union[List, torch.LongT... method dpo_loss (line 1892) | def dpo_loss( method _get_batch_logps (line 1929) | def _get_batch_logps( method concatenated_forward (line 1962) | def concatenated_forward( method separate_forward (line 1986) | def separate_forward( method get_batch_metrics (line 2012) | def get_batch_metrics( method compute_loss (line 2077) | def compute_loss( method get_batch_samples (line 2093) | def get_batch_samples(self, model, batch: Dict[str, torch.LongTensor])... method prediction_step (line 2130) | def prediction_step( method store_metrics (line 2164) | def store_metrics(self, metrics: Dict[str, float], train_eval: Literal... method log (line 2168) | def log(self, logs: Dict[str, float]) -> None: FILE: src/pretrain.py function preprocess_logits_for_metrics (line 28) | def preprocess_logits_for_metrics(logits, labels): function get_parser (line 35) | def get_parser(): function main (line 109) | def main(): FILE: src/pretrain_wo_trainer.py function preprocess_logits_for_metrics (line 30) | def preprocess_logits_for_metrics(logits, labels): function get_parser (line 37) | def get_parser(): function pred_single_sample (line 110) | def pred_single_sample(prompt, prefix, model, tokenizer, args, device, e... function pred (line 180) | def pred(args, model, tokenizer, device, eos_token_id, step=-1): function main (line 196) | def main(): FILE: src/resources/models/baichuan-13B-base/configuration_baichuan.py class BaichuanConfig (line 5) | class BaichuanConfig(PretrainedConfig): method __init__ (line 9) | def __init__( FILE: src/resources/models/baichuan-13B-base/modeling_baichuan.py function _get_interleave (line 20) | def _get_interleave(n): function _fill_with_neg_inf (line 34) | def _fill_with_neg_inf(t): function _gen_alibi_mask (line 39) | def _gen_alibi_mask(n_head, max_pos, alibi_mask=None): class RMSNorm (line 52) | class RMSNorm(torch.nn.Module): method __init__ (line 53) | def __init__(self, hidden_size, epsilon=1e-6): method forward (line 58) | def forward(self, hidden_states): class MLP (line 69) | class MLP(torch.nn.Module): method __init__ (line 70) | def __init__( method forward (line 82) | def forward(self, x): class BaichuanAttention (line 86) | class BaichuanAttention(torch.nn.Module): method __init__ (line 88) | def __init__(self, config: BaichuanConfig): method _shape (line 103) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): method forward (line 106) | def forward( class BaichuanLayer (line 155) | class BaichuanLayer(torch.nn.Module): method __init__ (line 156) | def __init__(self, config: BaichuanConfig): method forward (line 168) | def forward( class BaichuanPreTrainedModel (line 205) | class BaichuanPreTrainedModel(PreTrainedModel): method _init_weights (line 212) | def _init_weights(self, module): method _set_gradient_checkpointing (line 223) | def _set_gradient_checkpointing(self, module, value=False): class BaichuanModel (line 228) | class BaichuanModel(BaichuanPreTrainedModel): method __init__ (line 229) | def __init__(self, config: BaichuanConfig): method get_input_embeddings (line 243) | def get_input_embeddings(self): method set_input_embeddings (line 246) | def set_input_embeddings(self, value): method get_alibi_mask (line 249) | def get_alibi_mask(self, tensor, seq_length_with_past, attention_mask): method forward (line 267) | def forward( class BaichuanForCausalLM (line 368) | class BaichuanForCausalLM(BaichuanPreTrainedModel): method __init__ (line 369) | def __init__(self, config): method forward (line 377) | def forward( method prepare_inputs_for_generation (line 430) | def prepare_inputs_for_generation( method _reorder_cache (line 451) | def _reorder_cache(past_key_values, beam_idx): method quantize (line 457) | def quantize(self, bits: int): method _build_chat_input (line 493) | def _build_chat_input(self, tokenizer, messages: List[dict], max_new_t... method chat (line 524) | def chat(self, tokenizer, messages: List[dict], stream=False, FILE: src/resources/models/baichuan-13B-base/tokenization_baichuan.py class BaichuanTokenizer (line 23) | class BaichuanTokenizer(PreTrainedTokenizer): method __init__ (line 37) | def __init__( method __getstate__ (line 72) | def __getstate__(self): method __setstate__ (line 77) | def __setstate__(self, d): method vocab_size (line 83) | def vocab_size(self): method get_vocab (line 87) | def get_vocab(self): method _tokenize (line 93) | def _tokenize(self, text): method _convert_token_to_id (line 97) | def _convert_token_to_id(self, token): method _convert_id_to_token (line 101) | def _convert_id_to_token(self, index): method convert_tokens_to_string (line 106) | def convert_tokens_to_string(self, tokens): method save_vocabulary (line 125) | def save_vocabulary(self, save_directory, filename_prefix: Optional[st... method build_inputs_with_special_tokens (line 152) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No... method get_special_tokens_mask (line 163) | def get_special_tokens_mask( method create_token_type_ids_from_sequences (line 200) | def create_token_type_ids_from_sequences( FILE: src/resources/models/baichuan-13B-chat/configuration_baichuan.py class BaichuanConfig (line 5) | class BaichuanConfig(PretrainedConfig): method __init__ (line 9) | def __init__( FILE: src/resources/models/baichuan-13B-chat/modeling_baichuan.py function _get_interleave (line 20) | def _get_interleave(n): function _fill_with_neg_inf (line 34) | def _fill_with_neg_inf(t): function _gen_alibi_mask (line 39) | def _gen_alibi_mask(n_head, max_pos, alibi_mask=None): class RMSNorm (line 52) | class RMSNorm(torch.nn.Module): method __init__ (line 53) | def __init__(self, hidden_size, epsilon=1e-6): method forward (line 58) | def forward(self, hidden_states): class MLP (line 69) | class MLP(torch.nn.Module): method __init__ (line 70) | def __init__( method forward (line 82) | def forward(self, x): class BaichuanAttention (line 86) | class BaichuanAttention(torch.nn.Module): method __init__ (line 88) | def __init__(self, config: BaichuanConfig): method _shape (line 103) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): method forward (line 106) | def forward( class BaichuanLayer (line 155) | class BaichuanLayer(torch.nn.Module): method __init__ (line 156) | def __init__(self, config: BaichuanConfig): method forward (line 168) | def forward( class BaichuanPreTrainedModel (line 205) | class BaichuanPreTrainedModel(PreTrainedModel): method _init_weights (line 212) | def _init_weights(self, module): method _set_gradient_checkpointing (line 223) | def _set_gradient_checkpointing(self, module, value=False): class BaichuanModel (line 228) | class BaichuanModel(BaichuanPreTrainedModel): method __init__ (line 229) | def __init__(self, config: BaichuanConfig): method get_input_embeddings (line 243) | def get_input_embeddings(self): method set_input_embeddings (line 246) | def set_input_embeddings(self, value): method get_alibi_mask (line 249) | def get_alibi_mask(self, tensor, seq_length_with_past, attention_mask): method forward (line 267) | def forward( class BaichuanForCausalLM (line 368) | class BaichuanForCausalLM(BaichuanPreTrainedModel): method __init__ (line 369) | def __init__(self, config): method forward (line 377) | def forward( method prepare_inputs_for_generation (line 430) | def prepare_inputs_for_generation( method _reorder_cache (line 451) | def _reorder_cache(past_key_values, beam_idx): method quantize (line 458) | def quantize(self, bits: int): method _build_chat_input (line 494) | def _build_chat_input(self, tokenizer, messages: List[dict], max_new_t... method chat (line 525) | def chat(self, tokenizer, messages: List[dict], stream=False, FILE: src/resources/models/baichuan-13B-chat/tokenization_baichuan.py class BaichuanTokenizer (line 23) | class BaichuanTokenizer(PreTrainedTokenizer): method __init__ (line 37) | def __init__( method __getstate__ (line 72) | def __getstate__(self): method __setstate__ (line 77) | def __setstate__(self, d): method vocab_size (line 83) | def vocab_size(self): method get_vocab (line 87) | def get_vocab(self): method _tokenize (line 93) | def _tokenize(self, text): method _convert_token_to_id (line 97) | def _convert_token_to_id(self, token): method _convert_id_to_token (line 101) | def _convert_id_to_token(self, index): method convert_tokens_to_string (line 106) | def convert_tokens_to_string(self, tokens): method save_vocabulary (line 125) | def save_vocabulary(self, save_directory, filename_prefix: Optional[st... method build_inputs_with_special_tokens (line 152) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No... method get_special_tokens_mask (line 163) | def get_special_tokens_mask( method create_token_type_ids_from_sequences (line 200) | def create_token_type_ids_from_sequences( FILE: src/resources/models/baichuan-7B/configuration_baichuan.py class BaiChuanConfig (line 28) | class BaiChuanConfig(PretrainedConfig): method __init__ (line 32) | def __init__( FILE: src/resources/models/baichuan-7B/modeling_baichuan.py function _make_causal_mask (line 39) | def _make_causal_mask( function _expand_mask (line 57) | def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Option... class RMSNorm (line 71) | class RMSNorm(nn.Module): method __init__ (line 72) | def __init__(self, hidden_size, eps=1e-6): method forward (line 80) | def forward(self, hidden_states): class RotaryEmbedding (line 91) | class RotaryEmbedding(torch.nn.Module): method __init__ (line 92) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi... method forward (line 106) | def forward(self, x, seq_len=None): function rotate_half (line 123) | def rotate_half(x): function apply_rotary_pos_emb (line 130) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids): class MLP (line 141) | class MLP(nn.Module): method __init__ (line 142) | def __init__( method forward (line 154) | def forward(self, x): class Attention (line 158) | class Attention(nn.Module): method __init__ (line 161) | def __init__(self, config: BaiChuanConfig): method _shape (line 181) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): method forward (line 184) | def forward( class DecoderLayer (line 259) | class DecoderLayer(nn.Module): method __init__ (line 260) | def __init__(self, config: BaiChuanConfig): method forward (line 272) | def forward( class PreTrainedModel (line 327) | class PreTrainedModel(PreTrainedModel): method _init_weights (line 334) | def _init_weights(self, module): method _set_gradient_checkpointing (line 345) | def _set_gradient_checkpointing(self, module, value=False): class Model (line 350) | class Model(PreTrainedModel): method __init__ (line 358) | def __init__(self, config: BaiChuanConfig): method get_input_embeddings (line 371) | def get_input_embeddings(self): method set_input_embeddings (line 374) | def set_input_embeddings(self, value): method _prepare_decoder_attention_mask (line 378) | def _prepare_decoder_attention_mask(self, attention_mask, input_shape,... method forward (line 401) | def forward( class BaiChuanForCausalLM (line 529) | class BaiChuanForCausalLM(PreTrainedModel): method __init__ (line 530) | def __init__(self, config): method get_input_embeddings (line 539) | def get_input_embeddings(self): method set_input_embeddings (line 542) | def set_input_embeddings(self, value): method get_output_embeddings (line 545) | def get_output_embeddings(self): method set_output_embeddings (line 548) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 551) | def set_decoder(self, decoder): method get_decoder (line 554) | def get_decoder(self): method forward (line 557) | def forward( method prepare_inputs_for_generation (line 643) | def prepare_inputs_for_generation( method _reorder_cache (line 674) | def _reorder_cache(past_key_values, beam_idx): FILE: src/resources/models/baichuan-7B/tokenization_baichuan.py class BaiChuanTokenizer (line 42) | class BaiChuanTokenizer(PreTrainedTokenizer): method __init__ (line 56) | def __init__( method __getstate__ (line 91) | def __getstate__(self): method __setstate__ (line 96) | def __setstate__(self, d): method vocab_size (line 102) | def vocab_size(self): method get_vocab (line 106) | def get_vocab(self): method _tokenize (line 112) | def _tokenize(self, text): method _convert_token_to_id (line 116) | def _convert_token_to_id(self, token): method _convert_id_to_token (line 120) | def _convert_id_to_token(self, index): method convert_tokens_to_string (line 125) | def convert_tokens_to_string(self, tokens): method save_vocabulary (line 144) | def save_vocabulary(self, save_directory, filename_prefix: Optional[st... method build_inputs_with_special_tokens (line 171) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No... method get_special_tokens_mask (line 182) | def get_special_tokens_mask( method create_token_type_ids_from_sequences (line 219) | def create_token_type_ids_from_sequences( FILE: src/resources/models/chatglm-6B/configuration_chatglm.py class ChatGLMConfig (line 9) | class ChatGLMConfig(PretrainedConfig): method __init__ (line 59) | def __init__( FILE: src/resources/models/chatglm-6B/modeling_chatglm.py class InvalidScoreLogitsProcessor (line 54) | class InvalidScoreLogitsProcessor(LogitsProcessor): method __call__ (line 55) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen... function load_tf_weights_in_chatglm_6b (line 62) | def load_tf_weights_in_chatglm_6b(model, config, tf_checkpoint_path): class PrefixEncoder (line 136) | class PrefixEncoder(torch.nn.Module): method __init__ (line 143) | def __init__(self, config): method forward (line 157) | def forward(self, prefix: torch.Tensor): function gelu_impl (line 167) | def gelu_impl(x): function gelu (line 173) | def gelu(x): class RotaryEmbedding (line 177) | class RotaryEmbedding(torch.nn.Module): method __init__ (line 178) | def __init__(self, dim, base=10000, precision=torch.half, learnable=Fa... method _load_from_state_dict (line 193) | def _load_from_state_dict(self, state_dict, prefix, local_metadata, st... method forward (line 197) | def forward(self, x, seq_dim=1, seq_len=None): method _apply (line 220) | def _apply(self, fn): function rotate_half (line 228) | def rotate_half(x): function apply_rotary_pos_emb_index (line 234) | def apply_rotary_pos_emb_index(q, k, cos, sin, position_id): function attention_fn (line 242) | def attention_fn( class SelfAttention (line 351) | class SelfAttention(torch.nn.Module): method __init__ (line 352) | def __init__(self, hidden_size, num_attention_heads, method attention_mask_func (line 399) | def attention_mask_func(attention_scores, attention_mask): method split_tensor_along_last_dim (line 403) | def split_tensor_along_last_dim(self, tensor, num_partitions, method forward (line 423) | def forward( class GEGLU (line 490) | class GEGLU(torch.nn.Module): method __init__ (line 491) | def __init__(self): method forward (line 495) | def forward(self, x): class GLU (line 501) | class GLU(torch.nn.Module): method __init__ (line 502) | def __init__(self, hidden_size, inner_hidden_size=None, method forward (line 529) | def forward(self, hidden_states): class GLMBlock (line 544) | class GLMBlock(torch.nn.Module): method __init__ (line 545) | def __init__( method forward (line 594) | def forward( class ChatGLMPreTrainedModel (line 648) | class ChatGLMPreTrainedModel(PreTrainedModel): method __init__ (line 660) | def __init__(self, *inputs, **kwargs): method _init_weights (line 663) | def _init_weights(self, module: nn.Module): method _set_gradient_checkpointing (line 667) | def _set_gradient_checkpointing(self, module, value=False): class ChatGLMModel (line 737) | class ChatGLMModel(ChatGLMPreTrainedModel): method __init__ (line 753) | def __init__(self, config: ChatGLMConfig): method get_input_embeddings (line 809) | def get_input_embeddings(self): method set_input_embeddings (line 812) | def set_input_embeddings(self, new_embeddings: torch.Tensor): method get_prompt (line 815) | def get_prompt(self, batch_size, device, dtype=torch.half): method get_masks (line 831) | def get_masks(self, input_ids, device): method get_position_ids (line 843) | def get_position_ids(self, input_ids, mask_positions, device, gmask=Fa... method forward (line 871) | def forward( class ChatGLMForConditionalGeneration (line 1008) | class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel): method __init__ (line 1009) | def __init__(self, config: ChatGLMConfig): method get_output_embeddings (line 1036) | def get_output_embeddings(self): method set_output_embeddings (line 1039) | def set_output_embeddings(self, new_embeddings): method get_masks_and_position_ids (line 1042) | def get_masks_and_position_ids(self, input_ids, mask_positions, device... method prepare_inputs_for_generation (line 1073) | def prepare_inputs_for_generation( method forward (line 1122) | def forward( method _reorder_cache (line 1181) | def _reorder_cache( method process_response (line 1199) | def process_response(self, response): method chat (line 1215) | def chat(self, tokenizer, query: str, history: List[Tuple[str, str]] =... method stream_chat (line 1241) | def stream_chat(self, tokenizer, query: str, history: List[Tuple[str, ... method stream_generate (line 1267) | def stream_generate( method quantize (line 1368) | def quantize(self, bits: int, empty_init=False, **kwargs): FILE: src/resources/models/chatglm-6B/quantization.py class Kernel (line 18) | class Kernel: method __init__ (line 19) | def __init__(self, code: bytes, function_names: List[str]): class W8A16Linear (line 44) | class W8A16Linear(torch.autograd.Function): method forward (line 46) | def forward(ctx, inp: torch.Tensor, quant_w: torch.Tensor, scale_w: to... method backward (line 58) | def backward(ctx, grad_output: torch.Tensor): function compress_int4_weight (line 67) | def compress_int4_weight(weight: torch.Tensor): # (n, m) function extract_weight_to_half (line 88) | def extract_weight_to_half(weight: torch.Tensor, scale_list: torch.Tenso... class QuantizedLinear (line 120) | class QuantizedLinear(Linear): method __init__ (line 121) | def __init__(self, weight_bit_width: int, weight_tensor=None, bias_ten... method forward (line 146) | def forward(self, input): function quantize (line 153) | def quantize(model, weight_bit_width, empty_init=False, **kwargs): FILE: src/resources/models/chatglm-6B/tokenization_chatglm.py class SPTokenizer (line 23) | class SPTokenizer: method __init__ (line 24) | def __init__( method _configure_tokenizer (line 39) | def _configure_tokenizer( method _build_text_tokenizer (line 66) | def _build_text_tokenizer(self, encode_special_tokens=False): method _get_text_tokenizer (line 73) | def _get_text_tokenizer(self, encode_special_tokens=False): method get_blank_token (line 80) | def get_blank_token(length: int): method get_tab_token (line 85) | def get_tab_token(): method num_image_tokens (line 89) | def num_image_tokens(self): method num_text_tokens (line 93) | def num_text_tokens(self): method num_tokens (line 97) | def num_tokens(self): method _encode_whitespaces (line 101) | def _encode_whitespaces(text: str, max_len: int = 80): method _preprocess (line 107) | def _preprocess(self, text: str, linebreak=True, whitespaces=True): method encode (line 114) | def encode( method decode (line 131) | def decode(self, text_ids: List[int], special_tokens=False) -> str: method tokenize (line 145) | def tokenize( method __getitem__ (line 161) | def __getitem__(self, x: Union[int, str]): class ChatGLMTokenizer (line 176) | class ChatGLMTokenizer(PreTrainedTokenizer): method __init__ (line 189) | def __init__( method eop_token_id (line 224) | def eop_token_id(self) -> Optional[int]: method gmask_token_id (line 234) | def gmask_token_id(self) -> Optional[int]: method vocab_size (line 244) | def vocab_size(self): method get_vocab (line 248) | def get_vocab(self): method preprocess_text (line 254) | def preprocess_text(self, inputs): method _tokenize (line 265) | def _tokenize(self, text, **kwargs): method decode (line 273) | def decode( method _convert_token_to_id (line 293) | def _convert_token_to_id(self, token): method _convert_id_to_token (line 297) | def _convert_id_to_token(self, index): method save_vocabulary (line 301) | def save_vocabulary(self, save_directory, filename_prefix=None): method build_inputs_with_special_tokens (line 353) | def build_inputs_with_special_tokens( FILE: src/resources/models/chatglm2-6B/configuration_chatglm.py class ChatGLMConfig (line 4) | class ChatGLMConfig(PretrainedConfig): method __init__ (line 5) | def __init__( FILE: src/resources/models/chatglm2-6B/modeling_chatglm.py function default_init (line 47) | def default_init(cls, *args, **kwargs): class InvalidScoreLogitsProcessor (line 51) | class InvalidScoreLogitsProcessor(LogitsProcessor): method __call__ (line 52) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen... function split_tensor_along_last_dim (line 59) | def split_tensor_along_last_dim( class RotaryEmbedding (line 87) | class RotaryEmbedding(nn.Module): method __init__ (line 88) | def __init__(self, dim, original_impl=False, device=None, dtype=None): method forward_impl (line 95) | def forward_impl( method forward (line 120) | def forward(self, max_seq_len, offset=0): function apply_rotary_pos_emb (line 127) | def apply_rotary_pos_emb(x: torch.Tensor, rope_cache: torch.Tensor) -> t... class RMSNorm (line 147) | class RMSNorm(torch.nn.Module): method __init__ (line 148) | def __init__(self, normalized_shape, eps=1e-5, device=None, dtype=None... method forward (line 153) | def forward(self, hidden_states: torch.Tensor): class CoreAttention (line 161) | class CoreAttention(torch.nn.Module): method __init__ (line 162) | def __init__(self, config: ChatGLMConfig, layer_number): method forward (line 187) | def forward(self, query_layer, key_layer, value_layer, attention_mask): class SelfAttention (line 282) | class SelfAttention(torch.nn.Module): method __init__ (line 289) | def __init__(self, config: ChatGLMConfig, layer_number, device=None): method _allocate_memory (line 318) | def _allocate_memory(self, inference_max_sequence_len, batch_size, dev... method forward (line 332) | def forward( function _config_to_kwargs (line 421) | def _config_to_kwargs(args): class MLP (line 428) | class MLP(torch.nn.Module): method __init__ (line 436) | def __init__(self, config: ChatGLMConfig, device=None): method forward (line 465) | def forward(self, hidden_states): class GLMBlock (line 474) | class GLMBlock(torch.nn.Module): method __init__ (line 481) | def __init__(self, config: ChatGLMConfig, layer_number, device=None): method forward (line 505) | def forward( class GLMTransformer (line 548) | class GLMTransformer(torch.nn.Module): method __init__ (line 551) | def __init__(self, config: ChatGLMConfig, device=None): method _get_layer (line 574) | def _get_layer(self, layer_number): method forward (line 577) | def forward( class ChatGLMPreTrainedModel (line 624) | class ChatGLMPreTrainedModel(PreTrainedModel): method _init_weights (line 636) | def _init_weights(self, module: nn.Module): method get_masks (line 640) | def get_masks(self, input_ids, past_key_values, padding_mask=None): method get_position_ids (line 658) | def get_position_ids(self, input_ids, device): method _set_gradient_checkpointing (line 663) | def _set_gradient_checkpointing(self, module, value=False): class Embedding (line 671) | class Embedding(torch.nn.Module): method __init__ (line 674) | def __init__(self, config: ChatGLMConfig, device=None): method forward (line 687) | def forward(self, input_ids): class ChatGLMModel (line 699) | class ChatGLMModel(ChatGLMPreTrainedModel): method __init__ (line 700) | def __init__(self, config: ChatGLMConfig, device=None, empty_init=True): method get_input_embeddings (line 724) | def get_input_embeddings(self): method forward (line 727) | def forward( method quantize (line 785) | def quantize(self, weight_bit_width: int): class ChatGLMForConditionalGeneration (line 791) | class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel): method __init__ (line 792) | def __init__(self, config: ChatGLMConfig, empty_init=True, device=None): method _update_model_kwargs_for_generation (line 803) | def _update_model_kwargs_for_generation( method forward (line 834) | def forward( method prepare_inputs_for_generation (line 896) | def prepare_inputs_for_generation( method _reorder_cache (line 920) | def _reorder_cache( method process_response (line 938) | def process_response(self, response): method build_inputs (line 943) | def build_inputs(self, tokenizer, query: str, history: List[Tuple[str,... method build_stream_inputs (line 952) | def build_stream_inputs(self, tokenizer, query: str, history: List[Tup... method chat (line 966) | def chat(self, tokenizer, query: str, history: List[Tuple[str, str]] =... method stream_chat (line 984) | def stream_chat(self, tokenizer, query: str, history: List[Tuple[str, ... method stream_generate (line 1018) | def stream_generate( method quantize (line 1122) | def quantize(self, bits: int, empty_init=False, device=None, **kwargs): FILE: src/resources/models/chatglm2-6B/quantization.py class Kernel (line 18) | class Kernel: method __init__ (line 19) | def __init__(self, code: bytes, function_names: List[str]): class W8A16Linear (line 44) | class W8A16Linear(torch.autograd.Function): method forward (line 46) | def forward(ctx, inp: torch.Tensor, quant_w: torch.Tensor, scale_w: to... method backward (line 58) | def backward(ctx, grad_output: torch.Tensor): function compress_int4_weight (line 67) | def compress_int4_weight(weight: torch.Tensor): # (n, m) function extract_weight_to_half (line 88) | def extract_weight_to_half(weight: torch.Tensor, scale_list: torch.Tenso... class QuantizedLinear (line 124) | class QuantizedLinear(torch.nn.Module): method __init__ (line 125) | def __init__(self, weight_bit_width: int, weight, bias=None, device="c... method forward (line 145) | def forward(self, input): function quantize (line 152) | def quantize(model, weight_bit_width, empty_init=False, device=None): FILE: src/resources/models/chatglm2-6B/tokenization_chatglm.py class SPTokenizer (line 10) | class SPTokenizer: method __init__ (line 11) | def __init__(self, model_path: str): method tokenize (line 31) | def tokenize(self, s: str): method encode (line 34) | def encode(self, s: str, bos: bool = False, eos: bool = False) -> List... method decode (line 43) | def decode(self, t: List[int]) -> str: method decode_tokens (line 46) | def decode_tokens(self, tokens: List[str]) -> str: method convert_token_to_id (line 50) | def convert_token_to_id(self, token): method convert_id_to_token (line 56) | def convert_id_to_token(self, index): class ChatGLMTokenizer (line 63) | class ChatGLMTokenizer(PreTrainedTokenizer): method __init__ (line 68) | def __init__(self, vocab_file, padding_side="right", **kwargs): method get_command (line 80) | def get_command(self, token): method pad_token (line 87) | def pad_token(self) -> str: method pad_token_id (line 91) | def pad_token_id(self): method vocab_size (line 95) | def vocab_size(self): method get_vocab (line 98) | def get_vocab(self): method _tokenize (line 104) | def _tokenize(self, text, **kwargs): method _convert_token_to_id (line 107) | def _convert_token_to_id(self, token): method _convert_id_to_token (line 111) | def _convert_id_to_token(self, index): method convert_tokens_to_string (line 115) | def convert_tokens_to_string(self, tokens: List[str]) -> str: method save_vocabulary (line 133) | def save_vocabulary(self, save_directory, filename_prefix=None): method get_prefix_tokens (line 161) | def get_prefix_tokens(self): method build_inputs_with_special_tokens (line 165) | def build_inputs_with_special_tokens( method _pad (line 191) | def _pad( FILE: src/resources/models/chatglm3-6B/configuration_chatglm.py class ChatGLMConfig (line 4) | class ChatGLMConfig(PretrainedConfig): method __init__ (line 6) | def __init__( FILE: src/resources/models/chatglm3-6B/modeling_chatglm.py function default_init (line 49) | def default_init(cls, *args, **kwargs): class InvalidScoreLogitsProcessor (line 53) | class InvalidScoreLogitsProcessor(LogitsProcessor): method __call__ (line 54) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen... class PrefixEncoder (line 61) | class PrefixEncoder(torch.nn.Module): method __init__ (line 68) | def __init__(self, config: ChatGLMConfig): method forward (line 84) | def forward(self, prefix: torch.Tensor): function split_tensor_along_last_dim (line 93) | def split_tensor_along_last_dim( class RotaryEmbedding (line 121) | class RotaryEmbedding(nn.Module): method __init__ (line 122) | def __init__(self, dim, original_impl=False, device=None, dtype=None): method forward_impl (line 129) | def forward_impl( method forward (line 154) | def forward(self, max_seq_len, offset=0): function apply_rotary_pos_emb (line 161) | def apply_rotary_pos_emb(x: torch.Tensor, rope_cache: torch.Tensor) -> t... class RMSNorm (line 181) | class RMSNorm(torch.nn.Module): method __init__ (line 182) | def __init__(self, normalized_shape, eps=1e-5, device=None, dtype=None... method forward (line 187) | def forward(self, hidden_states: torch.Tensor): class CoreAttention (line 195) | class CoreAttention(torch.nn.Module): method __init__ (line 196) | def __init__(self, config: ChatGLMConfig, layer_number): method forward (line 221) | def forward(self, query_layer, key_layer, value_layer, attention_mask): class SelfAttention (line 313) | class SelfAttention(torch.nn.Module): method __init__ (line 320) | def __init__(self, config: ChatGLMConfig, layer_number, device=None): method _allocate_memory (line 349) | def _allocate_memory(self, inference_max_sequence_len, batch_size, dev... method forward (line 363) | def forward( function _config_to_kwargs (line 452) | def _config_to_kwargs(args): class MLP (line 459) | class MLP(torch.nn.Module): method __init__ (line 467) | def __init__(self, config: ChatGLMConfig, device=None): method forward (line 496) | def forward(self, hidden_states): class GLMBlock (line 505) | class GLMBlock(torch.nn.Module): method __init__ (line 512) | def __init__(self, config: ChatGLMConfig, layer_number, device=None): method forward (line 536) | def forward( class GLMTransformer (line 579) | class GLMTransformer(torch.nn.Module): method __init__ (line 582) | def __init__(self, config: ChatGLMConfig, device=None): method _get_layer (line 605) | def _get_layer(self, layer_number): method forward (line 608) | def forward( class ChatGLMPreTrainedModel (line 661) | class ChatGLMPreTrainedModel(PreTrainedModel): method _init_weights (line 673) | def _init_weights(self, module: nn.Module): method get_masks (line 677) | def get_masks(self, input_ids, past_key_values, padding_mask=None): method get_position_ids (line 695) | def get_position_ids(self, input_ids, device): method _set_gradient_checkpointing (line 700) | def _set_gradient_checkpointing(self, module, value=False): class Embedding (line 705) | class Embedding(torch.nn.Module): method __init__ (line 708) | def __init__(self, config: ChatGLMConfig, device=None): method forward (line 721) | def forward(self, input_ids): class ChatGLMModel (line 733) | class ChatGLMModel(ChatGLMPreTrainedModel): method __init__ (line 734) | def __init__(self, config: ChatGLMConfig, device=None, empty_init=True): method get_input_embeddings (line 768) | def get_input_embeddings(self): method get_prompt (line 771) | def get_prompt(self, batch_size, device, dtype=torch.half): method forward (line 786) | def forward( method quantize (line 845) | def quantize(self, weight_bit_width: int): class ChatGLMForConditionalGeneration (line 851) | class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel): method __init__ (line 852) | def __init__(self, config: ChatGLMConfig, empty_init=True, device=None): method _update_model_kwargs_for_generation (line 863) | def _update_model_kwargs_for_generation( method prepare_inputs_for_generation (line 894) | def prepare_inputs_for_generation( method forward (line 920) | def forward( method _reorder_cache (line 981) | def _reorder_cache( method process_response (line 999) | def process_response(self, output, history): method chat (line 1021) | def chat(self, tokenizer, query: str, history: List[Tuple[str, str]] =... method stream_chat (line 1043) | def stream_chat(self, tokenizer, query: str, history: List[Tuple[str, ... method stream_generate (line 1084) | def stream_generate( method quantize (line 1191) | def quantize(self, bits: int, empty_init=False, device=None, **kwargs): class ChatGLMForSequenceClassification (line 1210) | class ChatGLMForSequenceClassification(ChatGLMPreTrainedModel): method __init__ (line 1211) | def __init__(self, config: ChatGLMConfig, empty_init=True, device=None): method forward (line 1227) | def forward( FILE: src/resources/models/chatglm3-6B/quantization.py class Kernel (line 18) | class Kernel: method __init__ (line 19) | def __init__(self, code: bytes, function_names: List[str]): class W8A16Linear (line 44) | class W8A16Linear(torch.autograd.Function): method forward (line 46) | def forward(ctx, inp: torch.Tensor, quant_w: torch.Tensor, scale_w: to... method backward (line 58) | def backward(ctx, grad_output: torch.Tensor): function compress_int4_weight (line 67) | def compress_int4_weight(weight: torch.Tensor): # (n, m) function extract_weight_to_half (line 88) | def extract_weight_to_half(weight: torch.Tensor, scale_list: torch.Tenso... class QuantizedLinear (line 124) | class QuantizedLinear(torch.nn.Module): method __init__ (line 125) | def __init__(self, weight_bit_width: int, weight, bias=None, device="c... method forward (line 145) | def forward(self, input): function quantize (line 152) | def quantize(model, weight_bit_width, empty_init=False, device=None): FILE: src/resources/models/chatglm3-6B/tokenization_chatglm.py class SPTokenizer (line 11) | class SPTokenizer: method __init__ (line 12) | def __init__(self, model_path: str): method tokenize (line 34) | def tokenize(self, s: str): method encode (line 37) | def encode(self, s: str, bos: bool = False, eos: bool = False) -> List... method decode (line 46) | def decode(self, t: List[int]) -> str: method decode_tokens (line 60) | def decode_tokens(self, tokens: List[str]) -> str: method convert_token_to_id (line 64) | def convert_token_to_id(self, token): method convert_id_to_token (line 70) | def convert_id_to_token(self, index): class ChatGLMTokenizer (line 79) | class ChatGLMTokenizer(PreTrainedTokenizer): method __init__ (line 84) | def __init__(self, vocab_file, padding_side="left", clean_up_tokenizat... method get_command (line 96) | def get_command(self, token): method unk_token (line 103) | def unk_token(self) -> str: method pad_token (line 107) | def pad_token(self) -> str: method pad_token_id (line 111) | def pad_token_id(self): method eos_token (line 115) | def eos_token(self) -> str: method eos_token_id (line 119) | def eos_token_id(self): method vocab_size (line 123) | def vocab_size(self): method get_vocab (line 126) | def get_vocab(self): method _tokenize (line 132) | def _tokenize(self, text, **kwargs): method _convert_token_to_id (line 135) | def _convert_token_to_id(self, token): method _convert_id_to_token (line 139) | def _convert_id_to_token(self, index): method convert_tokens_to_string (line 143) | def convert_tokens_to_string(self, tokens: List[str]) -> str: method save_vocabulary (line 146) | def save_vocabulary(self, save_directory, filename_prefix=None): method get_prefix_tokens (line 174) | def get_prefix_tokens(self): method build_single_message (line 178) | def build_single_message(self, role, metadata, message): method build_chat_input (line 185) | def build_chat_input(self, query, history=None, role="user"): method build_inputs_with_special_tokens (line 198) | def build_inputs_with_special_tokens( method _pad (line 223) | def _pad( FILE: src/resources/models/glm-10B-chinese/configuration_glm.py class GLMConfig (line 28) | class GLMConfig(PretrainedConfig): method __init__ (line 93) | def __init__( FILE: src/resources/models/glm-10B-chinese/modeling_glm.py function unscaled_init_method (line 52) | def unscaled_init_method(sigma): function scaled_init_method (line 61) | def scaled_init_method(mean, std, num_layers): function ensure_divisibility (line 71) | def ensure_divisibility(numerator, denominator): function divide (line 77) | def divide(numerator, denominator): function split_tensor_along_last_dim (line 84) | def split_tensor_along_last_dim(tensor, num_partitions, class MLP (line 105) | class MLP(torch.nn.Module): method __init__ (line 124) | def __init__(self, hidden_size, output_dropout_prob, init_method, method forward (line 140) | def forward(self, hidden_states): class VocabEmbedding (line 151) | class VocabEmbedding(torch.nn.Module): method __init__ (line 162) | def __init__(self, config): method forward (line 185) | def forward(self, input_): class PositionalEmbedding (line 194) | class PositionalEmbedding(torch.nn.Module): method __init__ (line 196) | def __init__(self, hidden_size): method forward (line 204) | def forward(self, pos_seq, bsz=None): class SelfAttention (line 214) | class SelfAttention(torch.nn.Module): method __init__ (line 241) | def __init__(self, hidden_size, num_attention_heads, method _transpose_for_scores (line 269) | def _transpose_for_scores(self, tensor): method forward (line 279) | def forward(self, hidden_states, ltor_mask, mem=None): class GLMBlock (line 346) | class GLMBlock(torch.nn.Module): method __init__ (line 375) | def __init__(self, method forward (line 413) | def forward(self, hidden_states, ltor_mask, mem=None): class GLMStack (line 434) | class GLMStack(torch.nn.Module): method __init__ (line 469) | def __init__(self, method forward (line 528) | def forward(self, hidden_states, position_ids, attention_mask, memory_... method update_mems (line 604) | def update_mems(self, hiddens, mems): class GLMPreTrainedModel (line 619) | class GLMPreTrainedModel(PreTrainedModel): method _init_weights (line 630) | def _init_weights(self, module): method _set_gradient_checkpointing (line 646) | def _set_gradient_checkpointing(self, module, value=False): class GLMModel (line 716) | class GLMModel(GLMPreTrainedModel): method __init__ (line 732) | def __init__(self, config): method forward (line 762) | def forward( class GLMForMultipleChoice (line 801) | class GLMForMultipleChoice(GLMPreTrainedModel): method __init__ (line 802) | def __init__(self, config): method forward (line 807) | def forward( class GLMForConditionalGeneration (line 843) | class GLMForConditionalGeneration(GLMPreTrainedModel): method __init__ (line 844) | def __init__(self, config): method _reorder_cache (line 849) | def _reorder_cache(self, past, beam_idx): method prepare_inputs_for_generation (line 861) | def prepare_inputs_for_generation(self, input_ids, past=None, position... method forward (line 894) | def forward( class GLMForSequenceClassification (line 921) | class GLMForSequenceClassification(GLMPreTrainedModel): method __init__ (line 922) | def __init__(self, config: GLMConfig, hidden_dropout=None, num_class=1): method forward (line 946) | def forward(self, FILE: src/resources/models/glm-10B-chinese/tokenization_glm.py class GLMBatchEncoding (line 17) | class GLMBatchEncoding(BatchEncoding): method to (line 19) | def to(self, device: Union[str, "torch.device"]) -> "BatchEncoding": class GLMTokenizerMixin (line 38) | class GLMTokenizerMixin: method sop_token (line 40) | def sop_token(self) -> Optional[str]: method sop_token_id (line 44) | def sop_token_id(self) -> Optional[int]: method eop_token (line 51) | def eop_token(self) -> Optional[str]: method eop_token_id (line 55) | def eop_token_id(self) -> Optional[int]: method gmask_token_id (line 62) | def gmask_token_id(self) -> int: method smask_token_id (line 66) | def smask_token_id(self) -> int: method mask_token_ids (line 70) | def mask_token_ids(self): method _build_input_for_multiple_choice (line 73) | def _build_input_for_multiple_choice(self, context, choices): method _pad_batch (line 110) | def _pad_batch(self, tokens, position_ids, attention_mask, max_seq_len... method _collate (line 122) | def _collate(self, samples): method build_inputs_for_multiple_choice (line 146) | def build_inputs_for_multiple_choice(self, model_input: BatchEncoding,... method build_inputs_for_generation (line 153) | def build_inputs_for_generation(self, model_input: BatchEncoding, max_... class GLMRobertaTokenizer (line 209) | class GLMRobertaTokenizer(RobertaTokenizer, GLMTokenizerMixin): method gmask_token_id (line 214) | def gmask_token_id(self) -> int: method smask_token_id (line 218) | def smask_token_id(self) -> int: method mask_token_ids (line 222) | def mask_token_ids(self): class GLMChineseTokenizer (line 226) | class GLMChineseTokenizer(PreTrainedTokenizer, GLMTokenizerMixin): method __init__ (line 230) | def __init__(self, vocab_file, **kwargs): method vocab_size (line 237) | def vocab_size(self): method get_vocab (line 240) | def get_vocab(self): method _tokenize (line 245) | def _tokenize(self, text, **kwargs): method _convert_token_to_id (line 248) | def _convert_token_to_id(self, token): method _convert_id_to_token (line 252) | def _convert_id_to_token(self, index): method convert_tokens_to_string (line 256) | def convert_tokens_to_string(self, tokens): method save_vocabulary (line 259) | def save_vocabulary(self, save_directory: str, filename_prefix: Option... method build_inputs_with_special_tokens (line 276) | def build_inputs_with_special_tokens( class GLMGPT2Tokenizer (line 308) | class GLMGPT2Tokenizer(GPT2Tokenizer, GLMTokenizerMixin): method build_inputs_with_special_tokens (line 312) | def build_inputs_with_special_tokens( class GLMBertTokenizer (line 334) | class GLMBertTokenizer(BertTokenizer, GLMTokenizerMixin): method gmask_token_id (line 339) | def gmask_token_id(self) -> int: method smask_token_id (line 343) | def smask_token_id(self) -> int: method mask_token_ids (line 347) | def mask_token_ids(self): class GLMTokenizer (line 351) | class GLMTokenizer: method from_pretrained (line 353) | def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwa... FILE: src/resources/models/glm-350M-chinese/configuration_glm.py class GLMConfig (line 28) | class GLMConfig(PretrainedConfig): method __init__ (line 86) | def __init__( FILE: src/resources/models/glm-350M-chinese/modeling_glm.py function unscaled_init_method (line 51) | def unscaled_init_method(sigma): function scaled_init_method (line 60) | def scaled_init_method(mean, std, num_layers): function ensure_divisibility (line 70) | def ensure_divisibility(numerator, denominator): function divide (line 76) | def divide(numerator, denominator): function split_tensor_along_last_dim (line 83) | def split_tensor_along_last_dim(tensor, num_partitions, class MLP (line 104) | class MLP(torch.nn.Module): method __init__ (line 121) | def __init__(self, hidden_size, output_dropout_prob, init_method, method forward (line 137) | def forward(self, hidden_states): class VocabEmbedding (line 148) | class VocabEmbedding(torch.nn.Module): method __init__ (line 158) | def __init__(self, config): method forward (line 181) | def forward(self, input_): class PositionalEmbedding (line 190) | class PositionalEmbedding(torch.nn.Module): method __init__ (line 192) | def __init__(self, hidden_size): method forward (line 200) | def forward(self, pos_seq, bsz=None): class SelfAttention (line 210) | class SelfAttention(torch.nn.Module): method __init__ (line 236) | def __init__(self, hidden_size, num_attention_heads, method _transpose_for_scores (line 264) | def _transpose_for_scores(self, tensor): method forward (line 274) | def forward(self, hidden_states, ltor_mask, mem=None): class GLMBlock (line 341) | class GLMBlock(torch.nn.Module): method __init__ (line 368) | def __init__(self, method forward (line 406) | def forward(self, hidden_states, ltor_mask, mem=None): class GLMStack (line 427) | class GLMStack(torch.nn.Module): method __init__ (line 460) | def __init__(self, method forward (line 519) | def forward(self, hidden_states, position_ids, attention_mask, memory_... method update_mems (line 595) | def update_mems(self, hiddens, mems): class GLMPreTrainedModel (line 610) | class GLMPreTrainedModel(PreTrainedModel): method _init_weights (line 621) | def _init_weights(self, module): method _set_gradient_checkpointing (line 637) | def _set_gradient_checkpointing(self, module, value=False): class GLMModel (line 697) | class GLMModel(GLMPreTrainedModel): method __init__ (line 711) | def __init__(self, config): method forward (line 741) | def forward( class GLMForMultipleChoice (line 782) | class GLMForMultipleChoice(GLMPreTrainedModel): method __init__ (line 783) | def __init__(self, config): method forward (line 788) | def forward( class GLMForConditionalGeneration (line 824) | class GLMForConditionalGeneration(GLMPreTrainedModel): method __init__ (line 825) | def __init__(self, config): method _reorder_cache (line 830) | def _reorder_cache(self, past, beam_idx): method prepare_inputs_for_generation (line 842) | def prepare_inputs_for_generation(self, input_ids, past=None, position... method forward (line 865) | def forward( FILE: src/resources/models/glm-350M-chinese/tokenization_glm.py class GLMBatchEncoding (line 17) | class GLMBatchEncoding(BatchEncoding): method to (line 19) | def to(self, device: Union[str, "torch.device"]) -> "BatchEncoding": class GLMTokenizerMixin (line 38) | class GLMTokenizerMixin: method sop_token (line 40) | def sop_token(self) -> Optional[str]: method sop_token_id (line 44) | def sop_token_id(self) -> Optional[int]: method eop_token (line 51) | def eop_token(self) -> Optional[str]: method eop_token_id (line 55) | def eop_token_id(self) -> Optional[int]: method gmask_token_id (line 62) | def gmask_token_id(self) -> int: method smask_token_id (line 66) | def smask_token_id(self) -> int: method mask_token_ids (line 70) | def mask_token_ids(self): method _build_input_for_multiple_choice (line 73) | def _build_input_for_multiple_choice(self, context, choices): method _pad_batch (line 110) | def _pad_batch(self, tokens, position_ids, attention_mask, max_seq_len... method _collate (line 122) | def _collate(self, samples): method build_inputs_for_multiple_choice (line 146) | def build_inputs_for_multiple_choice(self, model_input: BatchEncoding,... method build_inputs_for_generation (line 153) | def build_inputs_for_generation(self, model_input: BatchEncoding, max_... class GLMRobertaTokenizer (line 216) | class GLMRobertaTokenizer(RobertaTokenizer, GLMTokenizerMixin): method gmask_token_id (line 221) | def gmask_token_id(self) -> int: method smask_token_id (line 225) | def smask_token_id(self) -> int: method mask_token_ids (line 229) | def mask_token_ids(self): class GLMChineseTokenizer (line 233) | class GLMChineseTokenizer(PreTrainedTokenizer, GLMTokenizerMixin): method __init__ (line 237) | def __init__(self, vocab_file, **kwargs): method vocab_size (line 244) | def vocab_size(self): method get_vocab (line 247) | def get_vocab(self): method _tokenize (line 252) | def _tokenize(self, text, **kwargs): method _convert_token_to_id (line 255) | def _convert_token_to_id(self, token): method _convert_id_to_token (line 259) | def _convert_id_to_token(self, index): method convert_tokens_to_string (line 263) | def convert_tokens_to_string(self, tokens): method save_vocabulary (line 266) | def save_vocabulary(self, save_directory: str, filename_prefix: Option... method build_inputs_with_special_tokens (line 283) | def build_inputs_with_special_tokens( class GLMGPT2Tokenizer (line 315) | class GLMGPT2Tokenizer(GPT2Tokenizer, GLMTokenizerMixin): method build_inputs_with_special_tokens (line 319) | def build_inputs_with_special_tokens( class GLMBertTokenizer (line 341) | class GLMBertTokenizer(BertTokenizer, GLMTokenizerMixin): method gmask_token_id (line 346) | def gmask_token_id(self) -> int: method smask_token_id (line 350) | def smask_token_id(self) -> int: method mask_token_ids (line 354) | def mask_token_ids(self): class GLMTokenizer (line 358) | class GLMTokenizer: method from_pretrained (line 360) | def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwa... FILE: src/resources/models/pangu-13B/configuration_gptpangu.py class GPTPanguConfig (line 4) | class GPTPanguConfig(PretrainedConfig): method __init__ (line 8) | def __init__( FILE: src/resources/models/pangu-13B/modeling_gptpangu.py class GPTPanguAttention (line 19) | class GPTPanguAttention(nn.Module): method __init__ (line 20) | def __init__(self, config): method _attn (line 51) | def _attn(self, query, key, value, attention_mask=None, head_mask=None): method _split_heads (line 79) | def _split_heads(self, tensor, num_heads, attn_head_size): method _merge_heads (line 87) | def _merge_heads(self, tensor, num_heads, attn_head_size): method forward (line 95) | def forward( class GPTPanguMLP (line 136) | class GPTPanguMLP(nn.Module): method __init__ (line 137) | def __init__(self, intermediate_size, config): # in MLP: intermediate... method forward (line 145) | def forward(self, hidden_states): class GPTPanguBlock (line 153) | class GPTPanguBlock(nn.Module): method __init__ (line 154) | def __init__(self, config): method forward (line 164) | def forward( class GPTPanguPreTrainedModel (line 204) | class GPTPanguPreTrainedModel(PreTrainedModel): method __init__ (line 214) | def __init__(self, *inputs, **kwargs): method _init_weights (line 217) | def _init_weights(self, module): method _set_gradient_checkpointing (line 244) | def _set_gradient_checkpointing(self, module, value=False): class GPTPanguModel (line 249) | class GPTPanguModel(GPTPanguPreTrainedModel): method __init__ (line 250) | def __init__(self, config): method get_input_embeddings (line 267) | def get_input_embeddings(self): method set_input_embeddings (line 270) | def set_input_embeddings(self, new_embeddings): method forward (line 273) | def forward( class GPTPanguForCausalLM (line 438) | class GPTPanguForCausalLM(GPTPanguPreTrainedModel): method __init__ (line 439) | def __init__(self, config): method get_output_embeddings (line 447) | def get_output_embeddings(self): method set_output_embeddings (line 450) | def set_output_embeddings(self, new_embeddings): method prepare_inputs_for_generation (line 453) | def prepare_inputs_for_generation(self, input_ids, past=None, **kwargs): method forward (line 481) | def forward( method _reorder_cache (line 543) | def _reorder_cache(past: Tuple[Tuple[torch.Tensor]], beam_idx: torch.T... FILE: src/resources/models/pangu-13B/tokenization_gptpangu.py class GPTPanguTokenizer (line 17) | class GPTPanguTokenizer(PreTrainedTokenizer): method __init__ (line 23) | def __init__( method build_inputs_with_special_tokens (line 37) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No... method tokenize (line 68) | def tokenize(self, text, **kwargs): method convert_tokens_to_ids (line 73) | def convert_tokens_to_ids(self, tokens): method _convert_token_to_id (line 99) | def _convert_token_to_id(self, token): method _convert_id_to_token (line 102) | def _convert_id_to_token(self, index): method convert_ids_to_tokens (line 105) | def convert_ids_to_tokens(self, ids): method decode (line 108) | def decode(self, ids, **kwargs): method vocab_size (line 121) | def vocab_size(self) -> int: method save_vocabulary (line 127) | def save_vocabulary(self, save_directory, filename_prefix=None): FILE: src/resources/models/pangu-2.6B/configuration_gptpangu.py class GPTPanguConfig (line 4) | class GPTPanguConfig(PretrainedConfig): method __init__ (line 8) | def __init__( FILE: src/resources/models/pangu-2.6B/modeling_gptpangu.py class GPTPanguAttention (line 19) | class GPTPanguAttention(nn.Module): method __init__ (line 20) | def __init__(self, config): method _attn (line 51) | def _attn(self, query, key, value, attention_mask=None, head_mask=None): method _split_heads (line 79) | def _split_heads(self, tensor, num_heads, attn_head_size): method _merge_heads (line 87) | def _merge_heads(self, tensor, num_heads, attn_head_size): method forward (line 95) | def forward( class GPTPanguMLP (line 136) | class GPTPanguMLP(nn.Module): method __init__ (line 137) | def __init__(self, intermediate_size, config): # in MLP: intermediate... method forward (line 145) | def forward(self, hidden_states): class GPTPanguBlock (line 153) | class GPTPanguBlock(nn.Module): method __init__ (line 154) | def __init__(self, config): method forward (line 164) | def forward( class GPTPanguPreTrainedModel (line 204) | class GPTPanguPreTrainedModel(PreTrainedModel): method __init__ (line 214) | def __init__(self, *inputs, **kwargs): method _init_weights (line 217) | def _init_weights(self, module): method _set_gradient_checkpointing (line 244) | def _set_gradient_checkpointing(self, module, value=False): class GPTPanguModel (line 249) | class GPTPanguModel(GPTPanguPreTrainedModel): method __init__ (line 250) | def __init__(self, config): method get_input_embeddings (line 267) | def get_input_embeddings(self): method set_input_embeddings (line 270) | def set_input_embeddings(self, new_embeddings): method forward (line 273) | def forward( class GPTPanguForCausalLM (line 438) | class GPTPanguForCausalLM(GPTPanguPreTrainedModel): method __init__ (line 439) | def __init__(self, config): method get_output_embeddings (line 447) | def get_output_embeddings(self): method set_output_embeddings (line 450) | def set_output_embeddings(self, new_embeddings): method prepare_inputs_for_generation (line 453) | def prepare_inputs_for_generation(self, input_ids, past=None, **kwargs): method forward (line 481) | def forward( method _reorder_cache (line 543) | def _reorder_cache(past: Tuple[Tuple[torch.Tensor]], beam_idx: torch.T... FILE: src/resources/models/pangu-2.6B/tokenization_gptpangu.py class GPTPanguTokenizer (line 17) | class GPTPanguTokenizer(PreTrainedTokenizer): method __init__ (line 23) | def __init__( method build_inputs_with_special_tokens (line 37) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No... method tokenize (line 68) | def tokenize(self, text, **kwargs): method convert_tokens_to_ids (line 73) | def convert_tokens_to_ids(self, tokens): method _convert_token_to_id (line 99) | def _convert_token_to_id(self, token): method _convert_id_to_token (line 102) | def _convert_id_to_token(self, index): method convert_ids_to_tokens (line 105) | def convert_ids_to_tokens(self, ids): method decode (line 108) | def decode(self, ids, **kwargs): method vocab_size (line 121) | def vocab_size(self) -> int: method save_vocabulary (line 127) | def save_vocabulary(self, save_directory, filename_prefix=None): FILE: src/resources/models/pangu-350M/configuration_gptpangu.py class GPTPanguConfig (line 4) | class GPTPanguConfig(PretrainedConfig): method __init__ (line 8) | def __init__( FILE: src/resources/models/pangu-350M/modeling_gptpangu.py class GPTPanguAttention (line 19) | class GPTPanguAttention(nn.Module): method __init__ (line 20) | def __init__(self, config): method _attn (line 51) | def _attn(self, query, key, value, attention_mask=None, head_mask=None): method _split_heads (line 79) | def _split_heads(self, tensor, num_heads, attn_head_size): method _merge_heads (line 87) | def _merge_heads(self, tensor, num_heads, attn_head_size): method forward (line 95) | def forward( class GPTPanguMLP (line 136) | class GPTPanguMLP(nn.Module): method __init__ (line 137) | def __init__(self, intermediate_size, config): # in MLP: intermediate... method forward (line 145) | def forward(self, hidden_states): class GPTPanguBlock (line 153) | class GPTPanguBlock(nn.Module): method __init__ (line 154) | def __init__(self, config): method forward (line 164) | def forward( class GPTPanguPreTrainedModel (line 204) | class GPTPanguPreTrainedModel(PreTrainedModel): method __init__ (line 214) | def __init__(self, *inputs, **kwargs): method _init_weights (line 217) | def _init_weights(self, module): method _set_gradient_checkpointing (line 244) | def _set_gradient_checkpointing(self, module, value=False): class GPTPanguModel (line 249) | class GPTPanguModel(GPTPanguPreTrainedModel): method __init__ (line 250) | def __init__(self, config): method get_input_embeddings (line 267) | def get_input_embeddings(self): method set_input_embeddings (line 270) | def set_input_embeddings(self, new_embeddings): method forward (line 273) | def forward( class GPTPanguForCausalLM (line 438) | class GPTPanguForCausalLM(GPTPanguPreTrainedModel): method __init__ (line 439) | def __init__(self, config): method get_output_embeddings (line 447) | def get_output_embeddings(self): method set_output_embeddings (line 450) | def set_output_embeddings(self, new_embeddings): method prepare_inputs_for_generation (line 453) | def prepare_inputs_for_generation(self, input_ids, past=None, **kwargs): method forward (line 481) | def forward( method _reorder_cache (line 543) | def _reorder_cache(past: Tuple[Tuple[torch.Tensor]], beam_idx: torch.T... FILE: src/resources/models/pangu-350M/tokenization_gptpangu.py class GPTPanguTokenizer (line 17) | class GPTPanguTokenizer(PreTrainedTokenizer): method __init__ (line 23) | def __init__( method build_inputs_with_special_tokens (line 37) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No... method tokenize (line 68) | def tokenize(self, text, **kwargs): method convert_tokens_to_ids (line 73) | def convert_tokens_to_ids(self, tokens): method _convert_token_to_id (line 99) | def _convert_token_to_id(self, token): method _convert_id_to_token (line 102) | def _convert_id_to_token(self, index): method convert_ids_to_tokens (line 105) | def convert_ids_to_tokens(self, ids): method decode (line 108) | def decode(self, ids, **kwargs): method vocab_size (line 121) | def vocab_size(self) -> int: method save_vocabulary (line 127) | def save_vocabulary(self, save_directory, filename_prefix=None): FILE: src/resources/models/qwen-7B-chat/configuration_qwen.py class QWenConfig (line 9) | class QWenConfig(PretrainedConfig): method __init__ (line 19) | def __init__( FILE: src/resources/models/qwen-7B-chat/modeling_qwen.py class FlashSelfAttention (line 76) | class FlashSelfAttention(torch.nn.Module): method __init__ (line 77) | def __init__( method forward (line 94) | def forward(self, q, k, v): class QWenAttention (line 140) | class QWenAttention(nn.Module): method __init__ (line 141) | def __init__(self, config, layer_number=None): method _attn (line 213) | def _attn(self, query, key, value, attention_mask=None, head_mask=None): method _upcast_and_reordered_attn (line 249) | def _upcast_and_reordered_attn( method _split_heads (line 305) | def _split_heads(self, tensor, num_heads, attn_head_size): method _merge_heads (line 310) | def _merge_heads(self, tensor, num_heads, attn_head_size): method forward (line 315) | def forward( class QWenMLP (line 409) | class QWenMLP(nn.Module): method __init__ (line 410) | def __init__(self, config): method forward (line 421) | def forward(self, hidden_states): class QWenBlock (line 429) | class QWenBlock(nn.Module): method __init__ (line 430) | def __init__(self, config, layer_idx=None, num_expert=1): method forward (line 455) | def forward( class QWenPreTrainedModel (line 504) | class QWenPreTrainedModel(PreTrainedModel): method __init__ (line 511) | def __init__(self, *inputs, **kwargs): method _init_weights (line 514) | def _init_weights(self, module): method _set_gradient_checkpointing (line 537) | def _set_gradient_checkpointing(self, module, value=False): class QWenModel (line 542) | class QWenModel(QWenPreTrainedModel): method __init__ (line 545) | def __init__(self, config): method get_input_embeddings (line 583) | def get_input_embeddings(self): method set_input_embeddings (line 586) | def set_input_embeddings(self, new_embeddings): method forward (line 589) | def forward( class QWenLMHeadModel (line 745) | class QWenLMHeadModel(QWenPreTrainedModel): method __init__ (line 749) | def __init__(self, config): method get_output_embeddings (line 762) | def get_output_embeddings(self): method set_output_embeddings (line 765) | def set_output_embeddings(self, new_embeddings): method prepare_inputs_for_generation (line 768) | def prepare_inputs_for_generation( method forward (line 804) | def forward( method _reorder_cache (line 868) | def _reorder_cache( method chat (line 880) | def chat( method generate (line 926) | def generate( class RotaryEmbedding (line 964) | class RotaryEmbedding(torch.nn.Module): method __init__ (line 965) | def __init__(self, dim, base=10000): method update_rotary_pos_emb_cache (line 977) | def update_rotary_pos_emb_cache(self, max_seq_len, offset=0, ntk_alpha... method forward (line 991) | def forward(self, max_seq_len, offset=0, ntk_alpha=1.0): function _rotate_half (line 996) | def _rotate_half(x): function apply_rotary_pos_emb (line 1004) | def apply_rotary_pos_emb(t, freqs, use_flash_rotary=False): class RMSNorm (line 1021) | class RMSNorm(torch.nn.Module): method __init__ (line 1022) | def __init__(self, dim: int, eps: float = 1e-6): method _norm (line 1027) | def _norm(self, x): method forward (line 1030) | def forward(self, x): FILE: src/resources/models/qwen-7B-chat/qwen_generation_utils.py function pad_batch (line 25) | def pad_batch(batch: BatchTokensType, pad_id: int, seq_length: int) -> B... function get_ltor_masks_and_position_ids (line 33) | def get_ltor_masks_and_position_ids( function get_batch (line 94) | def get_batch(context_tokens: torch.LongTensor, eod_id: int): function get_stop_words_ids (line 109) | def get_stop_words_ids(chat_format, tokenizer): function make_context (line 119) | def make_context( function _decode_default (line 192) | def _decode_default( function _decode_chatml (line 224) | def _decode_chatml( function decode_tokens (line 259) | def decode_tokens( class StopWordsLogitsProcessor (line 296) | class StopWordsLogitsProcessor(LogitsProcessor): method __init__ (line 309) | def __init__(self, stop_words_ids: Iterable[Iterable[int]], eos_token_... method __call__ (line 343) | def __call__( method _tokens_match (line 352) | def _tokens_match(self, prev_tokens: torch.LongTensor, tokens: List[in... method _calc_stopped_samples (line 365) | def _calc_stopped_samples(self, prev_input_ids: Iterable[int]) -> Iter... function top_k_logits (line 379) | def top_k_logits(logits, top_k=0, top_p=0.0, filter_value=-float("Inf")): function switch (line 409) | def switch(val1, val2, boolean): FILE: src/resources/models/qwen-7B-chat/tokenization_qwen.py class QWenTokenizer (line 26) | class QWenTokenizer(PreTrainedTokenizer): method __init__ (line 33) | def __init__( method __len__ (line 134) | def __len__(self): method get_vocab (line 137) | def get_vocab(self): method convert_tokens_to_ids (line 140) | def convert_tokens_to_ids(self, tokens): method save_vocabulary (line 163) | def save_vocabulary(self, save_directory: str, **kwargs) -> Tuple[str]: method tokenize (line 177) | def tokenize(self, text: str, **kwargs) -> List[str]: method convert_tokens_to_string (line 197) | def convert_tokens_to_string(self, tokens: List[str]) -> str: method vocab_size (line 209) | def vocab_size(self): method _convert_id_to_token (line 212) | def _convert_id_to_token(self, index: int) -> str: method _convert_token_to_id (line 217) | def _convert_token_to_id(self, token: str) -> int: method all_special_tokens (line 222) | def all_special_tokens(self) -> List[str]: method all_special_ids (line 232) | def all_special_ids(self) -> List[int]: method _tokenize (line 239) | def _tokenize(self, text, **kwargs): method _decode (line 248) | def _decode( method build_inputs_with_special_tokens (line 260) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No... FILE: src/train_dpo.py function preprocess_logits_for_metrics (line 27) | def preprocess_logits_for_metrics(logits, labels): function get_parser (line 34) | def get_parser(): function main (line 110) | def main(): FILE: src/train_reward.py function get_parser (line 24) | def get_parser(): function main (line 92) | def main(): FILE: src/train_rlhf.py function get_parser (line 26) | def get_parser(): function create_datasets (line 144) | def create_datasets(args, tokenizer_padding_from_left, ppo_ptx_enabled, ... function create_dataloader (line 165) | def create_dataloader(args, train_dataset, pretrain_dataset=None): function main (line 203) | def main(): FILE: src/train_rlhf_trlx.py function get_parser (line 24) | def get_parser(): function train (line 86) | def train(model_path: Optional[str] = None, function main (line 166) | def main(): FILE: src/train_sft.py function preprocess_logits_for_metrics (line 26) | def preprocess_logits_for_metrics(logits, labels): function get_parser (line 33) | def get_parser(): function main (line 107) | def main(): FILE: src/train_sft_wo_trainer.py function preprocess_logits_for_metrics (line 27) | def preprocess_logits_for_metrics(logits, labels): function get_parser (line 34) | def get_parser(): function main (line 106) | def main(): FILE: src/utils/config.py function merge (line 18) | def merge(base: Dict, update: Dict, updated: Set) -> Dict: function _merge_dicts (line 31) | def _merge_dicts(base: Dict, update: Dict) -> Dict: class ModelConfig (line 46) | class ModelConfig: method from_dict (line 81) | def from_dict(cls, config: Dict[str, Any]): class TokenizerConfig (line 86) | class TokenizerConfig: method from_dict (line 105) | def from_dict(cls, config: Dict[str, Any]): class OptimizerConfig (line 110) | class OptimizerConfig: method from_dict (line 125) | def from_dict(cls, config: Dict[str, Any]): class SchedulerConfig (line 130) | class SchedulerConfig: method from_dict (line 145) | def from_dict(cls, config: Dict[str, Any]): class TrainConfig (line 150) | class TrainConfig: method from_dict (line 239) | def from_dict(cls, config: Dict[str, Any]): class TRLConfig (line 244) | class TRLConfig: method load_yaml (line 257) | def load_yaml(cls, yml_fp: str): method to_dict (line 268) | def to_dict(self): method evolve (line 283) | def evolve(self, **kwargs) -> "TRLConfig": method from_dict (line 294) | def from_dict(cls, config: Dict): method update (line 308) | def update(cls, baseconfig: Dict, config: Dict): method __str__ (line 321) | def __str__(self): function default_ppo_config (line 328) | def default_ppo_config(): function default_ilql_config (line 373) | def default_ilql_config(): function default_sft_config (line 408) | def default_sft_config(): function get_train_ds_config (line 435) | def get_train_ds_config(global_batch_size=32, function get_eval_ds_config (line 485) | def get_eval_ds_config(global_batch_size=32, micro_batch_size=4, gradien... FILE: src/utils/file_utils.py function set_seed (line 14) | def set_seed(seed_val=42): function print_rank_0 (line 21) | def print_rank_0(*message): function significant (line 29) | def significant(x: Number, ndigits=2) -> Number: function print_gpu_utilization (line 53) | def print_gpu_utilization(prefix: str = "", index: int = 0, only_rank_0:... function print_gpu_utilization_torch (line 65) | def print_gpu_utilization_torch(prefix: str = "", index: int = 0, only_r... function print_trainable_parameters (line 85) | def print_trainable_parameters(model): FILE: src/utils/loading.py function _trainer_unavailble (line 38) | def _trainer_unavailble(name): function prepare_decoder_attention_mask (line 47) | def prepare_decoder_attention_mask(self, *args, **kwargs): function chatglm_auto_configure_device_map (line 51) | def chatglm_auto_configure_device_map(num_gpus: int, model_name: str, lo... function llama_and_baichuan_auto_configure_device_map (line 98) | def llama_and_baichuan_auto_configure_device_map(num_gpus: int, model_na... function load_params_8bit_or_4bit (line 132) | def load_params_8bit_or_4bit(args, model: PreTrainedModel) -> Dict: function load_tokenizer_and_model (line 166) | def load_tokenizer_and_model(args, with_trainer: bool = True) -> Tuple[P... function to_peft_model (line 285) | def to_peft_model(args, model: PreTrainedModel) -> PreTrainedModel: function load_checkpoint (line 320) | def load_checkpoint(args, model: PreTrainedModel, strict: bool = True) -... function get_trainer (line 329) | def get_trainer(name: str) -> Callable: function get_pipeline (line 340) | def get_pipeline(name: str) -> Callable: FILE: src/utils/method_configs.py function register_method (line 20) | def register_method(name): class MethodConfig (line 44) | class MethodConfig: method from_dict (line 55) | def from_dict(cls, config: Dict[str, Any]): function get_method (line 59) | def get_method(name: str) -> MethodConfig: class PPOConfig (line 73) | class PPOConfig(MethodConfig): method get_advantages_and_returns (line 133) | def get_advantages_and_returns( method loss (line 171) | def loss( class SFTConfig (line 238) | class SFTConfig(MethodConfig): class ILQLConfig (line 251) | class ILQLConfig(MethodConfig): method loss (line 262) | def loss(self, outputs, labels): FILE: src/utils/modeling_utils.py function _make_causal_mask (line 41) | def _make_causal_mask( function _expand_mask (line 59) | def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Option... function _prepare_decoder_attention_mask (line 73) | def _prepare_decoder_attention_mask(attention_mask, input_shape, input_e... function get_distributed_config (line 102) | def get_distributed_config(accelerator: Accelerator): class OptimizerName (line 127) | class OptimizerName(str, Enum): function get_optimizer_class (line 137) | def get_optimizer_class(name: OptimizerName): class SchedulerName (line 174) | class SchedulerName(str, Enum): function get_scheduler_class (line 181) | def get_scheduler_class(name: SchedulerName): class Clock (line 193) | class Clock: method __init__ (line 198) | def __init__(self): method tick (line 203) | def tick(self, samples: int = 0) -> float: method get_stat (line 219) | def get_stat(self, n_samp: int = 1000, reset: bool = False): function tree_map (line 234) | def tree_map(f, tree: Any) -> Any: function to_device (line 248) | def to_device(tree, device, non_blocking=False): function filter_non_scalars (line 255) | def filter_non_scalars(xs: Dict) -> Dict: function get_git_tag (line 269) | def get_git_tag() -> Tuple[str, str]: function make_head (line 281) | def make_head(n_embd: int, out: int, dtype: type = torch.float32) -> nn.... function freeze_bottom_causal_layers (line 290) | def freeze_bottom_causal_layers(model: nn.Module, num_layers_unfrozen: i... function freeze_bottom_seq2seq_layers (line 303) | def freeze_bottom_seq2seq_layers(model: nn.Module, num_layers_unfrozen: ... function rhasattr (line 325) | def rhasattr(obj, attr): function rgetattr (line 341) | def rgetattr(obj, attr: str, *args) -> object: function findattr (line 354) | def findattr(obj, attrs: Tuple[str]) -> Union[object, None]: function hf_get_decoder (line 361) | def hf_get_decoder(model: nn.Module) -> nn.Module: function hf_get_decoder_final_norm (line 374) | def hf_get_decoder_final_norm(model: nn.Module) -> float: function hf_get_decoder_blocks (line 390) | def hf_get_decoder_blocks(model: nn.Module) -> Tuple[nn.Module]: function hf_get_lm_head (line 411) | def hf_get_lm_head(model: nn.Module) -> nn.Module: function hf_get_hidden_size (line 421) | def hf_get_hidden_size(config: transformers.PretrainedConfig) -> int: function hf_get_num_hidden_layers (line 433) | def hf_get_num_hidden_layers(config: transformers.PretrainedConfig) -> int: function get_global_statistics (line 445) | def get_global_statistics(xs: torch.Tensor) -> Tuple[float, float, int]: function whiten (line 460) | def whiten(xs: torch.Tensor, shift_mean=True, distributed=True) -> torch... function logprobs_of_labels (line 473) | def logprobs_of_labels(logits, labels): function flatten_dict (line 482) | def flatten_dict( function get_tensor_stats (line 498) | def get_tensor_stats(xs: torch.Tensor, mask: torch.Tensor, n: int): class RunningMoments (line 508) | class RunningMoments: method __init__ (line 509) | def __init__(self): method update (line 519) | def update(self, xs: torch.Tensor) -> Tuple[float, float]: function generate_layer_regex (line 629) | def generate_layer_regex(config: transformers.PretrainedConfig, num_laye... function get_delta_modified_modules (line 641) | def get_delta_modified_modules( function get_delta_model_class (line 658) | def get_delta_model_class(model_type: str): function parse_delta_kwargs (line 671) | def parse_delta_kwargs( function regex_for_range (line 705) | def regex_for_range(min_: int, max_: int) -> str: # noqa function get_optimizer_grouped_parameters (line 782) | def get_optimizer_grouped_parameters(model, function get_all_reduce_mean (line 808) | def get_all_reduce_mean(tensor): function moving_average (line 814) | def moving_average(model, model_ema, beta=0.992, device=None, zero_stage... function save_hf_format (line 831) | def save_hf_format(model, tokenizer, args, sub_folder=""): function _z3_params_to_fetch (line 852) | def _z3_params_to_fetch(param_list): function save_zero_three_model (line 859) | def save_zero_three_model(model_ema, global_rank, save_dir, zero_stage=0): function sorted_checkpoints (line 886) | def sorted_checkpoints(output_dir=None, checkpoint_prefix="checkpoint", ... function rotate_checkpoints (line 905) | def rotate_checkpoints(save_total_limit, use_mtime=False, output_dir=Non... function qwen_make_context (line 931) | def qwen_make_context( FILE: src/utils/nlp_utils.py function clean_text (line 7) | def clean_text(text):