SYMBOL INDEX (271 symbols across 24 files) FILE: air_llm/airllm/airllm.py class AirLLMLlama2 (line 7) | class AirLLMLlama2(AirLLMBaseModel): method __init__ (line 8) | def __init__(self, *args, **kwargs): FILE: air_llm/airllm/airllm_baichuan.py class AirLLMBaichuan (line 10) | class AirLLMBaichuan(AirLLMBaseModel): method __init__ (line 13) | def __init__(self, *args, **kwargs): method get_use_better_transformer (line 18) | def get_use_better_transformer(self): method get_tokenizer (line 20) | def get_tokenizer(self, hf_token=None): method get_generation_config (line 24) | def get_generation_config(self): FILE: air_llm/airllm/airllm_base.py class AirLLMBaseModel (line 46) | class AirLLMBaseModel(GenerationMixin): method set_layer_names_dict (line 49) | def set_layer_names_dict(self): method __init__ (line 57) | def __init__(self, model_local_path_or_repo_id, device="cuda:0", dtype... method get_generation_config (line 163) | def get_generation_config(self): method get_tokenizer (line 172) | def get_tokenizer(self, hf_token=None): method get_use_better_transformer (line 178) | def get_use_better_transformer(self): method init_model (line 181) | def init_model(self): method set_layers_from_layer_names (line 240) | def set_layers_from_layer_names(self): method load_rotary_pos_emb_to_device (line 265) | def load_rotary_pos_emb_to_device(self): method load_layer_to_cpu (line 269) | def load_layer_to_cpu(self, layer_name): method move_layer_to_device (line 302) | def move_layer_to_device(self, state_dict): method can_generate (line 326) | def can_generate(self): method prepare_inputs_for_generation (line 329) | def prepare_inputs_for_generation( method __call__ (line 368) | def __call__(self, *args, **kwargs): method get_past_key_values_cache_seq_len (line 371) | def get_past_key_values_cache_seq_len(self, past_key_values): method get_sequence_len (line 373) | def get_sequence_len(self, seq): method get_pos_emb_args (line 376) | def get_pos_emb_args(self, len_p, len_s): method get_past_key_value_args (line 379) | def get_past_key_value_args(self, k_cache, v_cache): method get_attention_mask_args (line 382) | def get_attention_mask_args(self, full_attention_mask, len_p, len_s): method get_position_ids_args (line 385) | def get_position_ids_args(self, full_position_ids, len_p, len_s): method run_lm_head (line 390) | def run_lm_head(self, layer, seq): method run_norm (line 393) | def run_norm(self, layer, seq): method forward (line 396) | def forward( FILE: air_llm/airllm/airllm_chatglm.py class AirLLMChatGLM (line 8) | class AirLLMChatGLM(AirLLMBaseModel): method __init__ (line 11) | def __init__(self, *args, **kwargs): method get_use_better_transformer (line 16) | def get_use_better_transformer(self): method get_generation_config (line 19) | def get_generation_config(self): method get_sequence_len (line 22) | def get_sequence_len(self, seq): method get_past_key_values_cache_seq_len (line 25) | def get_past_key_values_cache_seq_len(self, past_key_values): method set_layer_names_dict (line 30) | def set_layer_names_dict(self): method get_pos_emb_args (line 37) | def get_pos_emb_args(self, len_p, len_s): method get_past_key_value_args (line 45) | def get_past_key_value_args(self, k_cache, v_cache): method get_attention_mask_args (line 48) | def get_attention_mask_args(self, full_attention_mask, len_p, len_s): method get_position_ids_args (line 51) | def get_position_ids_args(self, full_position_ids, len_p, len_s): FILE: air_llm/airllm/airllm_internlm.py class AirLLMInternLM (line 8) | class AirLLMInternLM(AirLLMBaseModel): method __init__ (line 11) | def __init__(self, *args, **kwargs): method get_use_better_transformer (line 16) | def get_use_better_transformer(self): method get_generation_config (line 18) | def get_generation_config(self): FILE: air_llm/airllm/airllm_llama_mlx.py class ModelArgs (line 23) | class ModelArgs: function sanitize_config (line 35) | def sanitize_config(config, weights=None): function get_model_args_from_config (line 53) | def get_model_args_from_config(config): class RMSNorm (line 71) | class RMSNorm(nn.Module): method __init__ (line 72) | def __init__(self, dims: int, eps: float = 1e-5): method _norm (line 77) | def _norm(self, x): method __call__ (line 80) | def __call__(self, x): class Attention (line 85) | class Attention(nn.Module): method __init__ (line 86) | def __init__(self, args: ModelArgs): method __call__ (line 105) | def __call__( class FeedForward (line 144) | class FeedForward(nn.Module): method __init__ (line 145) | def __init__(self, args: ModelArgs): method __call__ (line 152) | def __call__(self, x) -> mx.array: class TransformerBlock (line 156) | class TransformerBlock(nn.Module): method __init__ (line 157) | def __init__(self, args: ModelArgs): method __call__ (line 167) | def __call__( function sample (line 179) | def sample(logits, temperature=0): class AirLLMLlamaMlx (line 185) | class AirLLMLlamaMlx: method set_layer_names_dict (line 188) | def set_layer_names_dict(self): method record_memory (line 195) | def record_memory(self, msg=None): method __init__ (line 210) | def __init__(self, model_local_path_or_repo_id, device="cuda:0", dtype... method get_tokenizer (line 245) | def get_tokenizer(self, hf_token=None): method generate (line 252) | def generate(self, x, temperature=0, max_new_tokens=None, **kwargs): method model_generate (line 265) | def model_generate(self, x, temperature=0, max_new_tokens=None): FILE: air_llm/airllm/airllm_mistral.py class AirLLMMistral (line 8) | class AirLLMMistral(AirLLMBaseModel): method __init__ (line 11) | def __init__(self, *args, **kwargs): method get_use_better_transformer (line 16) | def get_use_better_transformer(self): method get_generation_config (line 18) | def get_generation_config(self): FILE: air_llm/airllm/airllm_mixtral.py class AirLLMMixtral (line 8) | class AirLLMMixtral(AirLLMBaseModel): method __init__ (line 11) | def __init__(self, *args, **kwargs): method get_use_better_transformer (line 16) | def get_use_better_transformer(self): method get_generation_config (line 19) | def get_generation_config(self): FILE: air_llm/airllm/airllm_qwen.py class AirLLMQWen (line 8) | class AirLLMQWen(AirLLMBaseModel): method __init__ (line 11) | def __init__(self, *args, **kwargs): method get_use_better_transformer (line 16) | def get_use_better_transformer(self): method get_generation_config (line 18) | def get_generation_config(self): method get_past_key_values_cache_seq_len (line 22) | def get_past_key_values_cache_seq_len(self, past_key_values): method set_layer_names_dict (line 27) | def set_layer_names_dict(self): method get_pos_emb_args (line 33) | def get_pos_emb_args(self, len_p, len_s): method get_past_key_value_args (line 49) | def get_past_key_value_args(self, k_cache, v_cache): method get_attention_mask_args (line 52) | def get_attention_mask_args(self, full_attention_mask, len_p, len_s): method get_position_ids_args (line 55) | def get_position_ids_args(self, full_position_ids, len_p, len_s): FILE: air_llm/airllm/airllm_qwen2.py class AirLLMQWen2 (line 9) | class AirLLMQWen2(AirLLMBaseModel): method __init__ (line 12) | def __init__(self, *args, **kwargs): method get_use_better_transformer (line 17) | def get_use_better_transformer(self): FILE: air_llm/airllm/auto_model.py class AutoModel (line 13) | class AutoModel: method __init__ (line 14) | def __init__(self): method get_module_class (line 20) | def get_module_class(cls, pretrained_model_name_or_path, *inputs, **kw... method from_pretrained (line 48) | def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwa... FILE: air_llm/airllm/persist/mlx_model_persister.py function map_torch_to_mlx (line 16) | def map_torch_to_mlx(model): class MlxModelPersister (line 57) | class MlxModelPersister(ModelPersister): method __init__ (line 60) | def __init__(self, *args, **kwargs): method model_persist_exist (line 66) | def model_persist_exist(self, layer_name, saving_path): method persist_model (line 77) | def persist_model(self, state_dict, layer_name, saving_path): method load_model (line 91) | def load_model(self, layer_name, path): FILE: air_llm/airllm/persist/model_persister.py class ModelPersister (line 6) | class ModelPersister: method __init__ (line 7) | def __init__(self): method get_model_persister (line 11) | def get_model_persister(cls): method model_persist_exist (line 32) | def model_persist_exist(self, layer_name, saving_path): method persist_model (line 35) | def persist_model(self, state_dict, layer_name, path): method load_model (line 38) | def load_model(self, layer_name, path): FILE: air_llm/airllm/persist/safetensor_model_persister.py class SafetensorModelPersister (line 11) | class SafetensorModelPersister(ModelPersister): method __init__ (line 14) | def __init__(self, *args, **kwargs): method model_persist_exist (line 20) | def model_persist_exist(self, layer_name, saving_path): method persist_model (line 27) | def persist_model(self, state_dict, layer_name, saving_path): method load_model (line 36) | def load_model(self, layer_name, path): FILE: air_llm/airllm/profiler.py class LayeredProfiler (line 5) | class LayeredProfiler: method __init__ (line 6) | def __init__(self, print_memory=False): method add_profiling_time (line 12) | def add_profiling_time(self, item, time): method clear_profiling_time (line 24) | def clear_profiling_time(self): method print_profiling_time (line 28) | def print_profiling_time(self): FILE: air_llm/airllm/tokenization_baichuan.py class BaichuanTokenizer (line 43) | class BaichuanTokenizer(PreTrainedTokenizer): method __init__ (line 57) | def __init__( method __getstate__ (line 92) | def __getstate__(self): method __setstate__ (line 97) | def __setstate__(self, d): method vocab_size (line 103) | def vocab_size(self): method get_vocab (line 107) | def get_vocab(self): method _tokenize (line 113) | def _tokenize(self, text): method _convert_token_to_id (line 117) | def _convert_token_to_id(self, token): method _convert_id_to_token (line 121) | def _convert_id_to_token(self, index): method convert_tokens_to_string (line 126) | def convert_tokens_to_string(self, tokens): method save_vocabulary (line 145) | def save_vocabulary(self, save_directory, filename_prefix: Optional[st... method build_inputs_with_special_tokens (line 172) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No... method get_special_tokens_mask (line 183) | def get_special_tokens_mask( method create_token_type_ids_from_sequences (line 220) | def create_token_type_ids_from_sequences( FILE: air_llm/airllm/utils.py function save_quant_state_to_dict (line 40) | def save_quant_state_to_dict(self, packed=True): class NotEnoughSpaceException (line 71) | class NotEnoughSpaceException(Exception): function clean_memory (line 75) | def clean_memory(): function uncompress_layer_state_dict (line 85) | def uncompress_layer_state_dict(layer_state_dict): function load_layer (line 115) | def load_layer(local_path, layer_name, profiling=False): function check_space (line 134) | def check_space(checkpoint_path, layer_shards_saving_path=None, compress... function compress_layer_state_dict (line 157) | def compress_layer_state_dict(layer_state_dict, compression=None): function remove_real_and_linked_file (line 178) | def remove_real_and_linked_file(to_delete): function split_and_save_layers (line 188) | def split_and_save_layers(checkpoint_path, layer_shards_saving_path=None... function find_or_create_local_splitted_path (line 341) | def find_or_create_local_splitted_path(model_local_path_or_repo_id, laye... FILE: air_llm/setup.py class PostInstallCommand (line 7) | class PostInstallCommand(install): method run (line 8) | def run(self): FILE: air_llm/tests/test_automodel.py class TestAutoModel (line 10) | class TestAutoModel(unittest.TestCase): method setUp (line 11) | def setUp(self): method tearDown (line 13) | def tearDown(self): method test_auto_model_should_return_correct_model (line 16) | def test_auto_model_should_return_correct_model(self): FILE: air_llm/tests/test_compression.py class TestCompression (line 12) | class TestCompression(unittest.TestCase): method setUp (line 13) | def setUp(self): method tearDown (line 15) | def tearDown(self): method test_should_compress_uncompress (line 18) | def test_should_compress_uncompress(self): FILE: anima_100k/longer_training.py function print_tensors (line 69) | def print_tensors(where_str=''): function _get_tensors (line 83) | def _get_tensors(gpu_only=True): function get_sample_gen_test_examples (line 99) | def get_sample_gen_test_examples(): class ModelArguments (line 119) | class ModelArguments: class DataArguments (line 129) | class DataArguments: class TrainingArguments (line 165) | class TrainingArguments(transformers.Seq2SeqTrainingArguments): class GenerationArguments (line 256) | class GenerationArguments: function find_all_linear_names (line 287) | def find_all_linear_names(args, model): class SampleGenerateCallback (line 301) | class SampleGenerateCallback(transformers.TrainerCallback): method on_substep_end (line 304) | def on_substep_end(self, args, state, control, **kwargs ): method on_evaluate (line 308) | def on_evaluate(self, args, state, control, **kwargs): class SavePeftModelCallback (line 341) | class SavePeftModelCallback(transformers.TrainerCallback): method save_model (line 342) | def save_model(self, args, state, kwargs): method on_save (line 356) | def on_save(self, args, state, control, **kwargs): method on_train_end (line 360) | def on_train_end(self, args, state, control, **kwargs): function get_accelerate_model (line 368) | def get_accelerate_model(args, checkpoint_dir): function print_trainable_parameters (line 454) | def print_trainable_parameters(args, model): function smart_tokenizer_and_embedding_resize (line 471) | def smart_tokenizer_and_embedding_resize( class DataCollatorForCausalLM (line 494) | class DataCollatorForCausalLM(object): method __call__ (line 501) | def __call__(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]: function extract_unnatural_instructions_data (line 546) | def extract_unnatural_instructions_data(examples, extract_reformulations... function extract_alpaca_dataset (line 576) | def extract_alpaca_dataset(example): function local_dataset (line 583) | def local_dataset(dataset_name): function make_data_module (line 598) | def make_data_module(tokenizer: transformers.PreTrainedTokenizer, args) ... function get_last_checkpoint (line 705) | def get_last_checkpoint(checkpoint_dir): function train (line 719) | def train(): FILE: anima_100k/modeling_flash_llama.py function rmsnorm_func (line 75) | def rmsnorm_func(hidden_states, weight, variance_epsilon): class LlamaRMSNorm (line 83) | class LlamaRMSNorm(nn.Module): method __init__ (line 84) | def __init__(self, hidden_size, eps=1e-6): method forward (line 96) | def forward(self, hidden_states): class FlashRotaryEmbedding (line 100) | class FlashRotaryEmbedding(torch.nn.Module): method __init__ (line 118) | def __init__(self, dim: int, base=10000.0, interleaved=False, scale_ba... method _compute_inv_freq (line 155) | def _compute_inv_freq(self, device=None): method _update_cos_sin_cache (line 160) | def _update_cos_sin_cache(self, seqlen, device=None, dtype=None): method forward (line 202) | def forward(self, q: torch.Tensor, k: torch.Tensor, seqlen_offset: int... class LlamaMLP (line 221) | class LlamaMLP(nn.Module): method __init__ (line 222) | def __init__(self, config): method forward (line 232) | def forward(self, x): function repeat_kv (line 255) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: class LlamaAttention (line 267) | class LlamaAttention(nn.Module): method __init__ (line 270) | def __init__(self, config: LlamaConfig): method _shape (line 307) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): method forward (line 310) | def forward( class LlamaDecoderLayer (line 418) | class LlamaDecoderLayer(nn.Module): method __init__ (line 419) | def __init__(self, config: LlamaConfig): method forward (line 427) | def forward( class LlamaPreTrainedModel (line 505) | class LlamaPreTrainedModel(PreTrainedModel): method _init_weights (line 512) | def _init_weights(self, module): method _set_gradient_checkpointing (line 523) | def _set_gradient_checkpointing(self, module, value=False): class LlamaModel (line 596) | class LlamaModel(LlamaPreTrainedModel): method __init__ (line 604) | def __init__(self, config: LlamaConfig): method get_input_embeddings (line 617) | def get_input_embeddings(self): method set_input_embeddings (line 620) | def set_input_embeddings(self, value): method forward (line 624) | def forward( class LlamaForCausalLM (line 740) | class LlamaForCausalLM(LlamaPreTrainedModel): method __init__ (line 743) | def __init__(self, config): method get_input_embeddings (line 752) | def get_input_embeddings(self): method set_input_embeddings (line 755) | def set_input_embeddings(self, value): method get_output_embeddings (line 758) | def get_output_embeddings(self): method set_output_embeddings (line 761) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 764) | def set_decoder(self, decoder): method get_decoder (line 767) | def get_decoder(self): method forward (line 772) | def forward( method prepare_inputs_for_generation (line 878) | def prepare_inputs_for_generation( method _reorder_cache (line 907) | def _reorder_cache(past_key_values, beam_idx): class LlamaForSequenceClassification (line 931) | class LlamaForSequenceClassification(LlamaPreTrainedModel): method __init__ (line 932) | def __init__(self, config): method get_input_embeddings (line 941) | def get_input_embeddings(self): method set_input_embeddings (line 944) | def set_input_embeddings(self, value): method forward (line 948) | def forward( FILE: rlhf/qlora_dpo.py class ModelArguments (line 68) | class ModelArguments: class DataArguments (line 78) | class DataArguments: class TrainingArguments (line 114) | class TrainingArguments(transformers.Seq2SeqTrainingArguments): class GenerationArguments (line 207) | class GenerationArguments: function find_all_linear_names (line 238) | def find_all_linear_names(args, model): class SampleGenerateCallback (line 252) | class SampleGenerateCallback(transformers.TrainerCallback): method on_evaluate (line 255) | def on_evaluate(self, args, state, control, **kwargs): class SavePeftModelCallback (line 284) | class SavePeftModelCallback(transformers.TrainerCallback): method save_model (line 285) | def save_model(self, args, state, kwargs): method on_save (line 299) | def on_save(self, args, state, control, **kwargs): method on_train_end (line 303) | def on_train_end(self, args, state, control, **kwargs): function get_reference_model (line 311) | def get_reference_model(args, checkpoint_dir): function get_accelerate_model (line 354) | def get_accelerate_model(args, checkpoint_dir): function print_trainable_parameters (line 429) | def print_trainable_parameters(args, model): function smart_tokenizer_and_embedding_resize (line 446) | def smart_tokenizer_and_embedding_resize( class DataCollatorForCausalLM (line 469) | class DataCollatorForCausalLM(object): method __call__ (line 476) | def __call__(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]: function extract_unnatural_instructions_data (line 516) | def extract_unnatural_instructions_data(examples, extract_reformulations... function extract_alpaca_dataset (line 546) | def extract_alpaca_dataset(example): function local_dataset (line 553) | def local_dataset(dataset_name): function make_data_module (line 568) | def make_data_module(tokenizer: transformers.PreTrainedTokenizer, args) ... function get_last_checkpoint (line 652) | def get_last_checkpoint(checkpoint_dir): function _get_batch_logps (line 666) | def _get_batch_logps(logits: torch.FloatTensor, labels: torch.LongTensor... function dpo_loss (line 694) | def dpo_loss(policy_chosen_logps: torch.FloatTensor, class DPOSeq2SeqTrainer (line 742) | class DPOSeq2SeqTrainer(Seq2SeqTrainer): method __init__ (line 743) | def __init__(self, reference_model: torch.nn.Module, method compute_loss (line 753) | def compute_loss(self, model, inputs, return_outputs=False): function compute_metrics (line 781) | def compute_metrics(ep: EvalPrediction): function train (line 785) | def train(): FILE: training/qlora.py class ModelArguments (line 66) | class ModelArguments: class DataArguments (line 76) | class DataArguments: class TrainingArguments (line 112) | class TrainingArguments(transformers.Seq2SeqTrainingArguments): class GenerationArguments (line 202) | class GenerationArguments: function find_all_linear_names (line 233) | def find_all_linear_names(args, model): class SampleGenerateCallback (line 247) | class SampleGenerateCallback(transformers.TrainerCallback): method on_evaluate (line 250) | def on_evaluate(self, args, state, control, **kwargs): class SavePeftModelCallback (line 279) | class SavePeftModelCallback(transformers.TrainerCallback): method save_model (line 280) | def save_model(self, args, state, kwargs): method on_save (line 294) | def on_save(self, args, state, control, **kwargs): method on_train_end (line 298) | def on_train_end(self, args, state, control, **kwargs): function get_accelerate_model (line 306) | def get_accelerate_model(args, checkpoint_dir): function print_trainable_parameters (line 381) | def print_trainable_parameters(args, model): function smart_tokenizer_and_embedding_resize (line 398) | def smart_tokenizer_and_embedding_resize( class DataCollatorForCausalLM (line 421) | class DataCollatorForCausalLM(object): method __call__ (line 428) | def __call__(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]: function extract_unnatural_instructions_data (line 473) | def extract_unnatural_instructions_data(examples, extract_reformulations... function extract_alpaca_dataset (line 503) | def extract_alpaca_dataset(example): function local_dataset (line 510) | def local_dataset(dataset_name): function make_data_module (line 525) | def make_data_module(tokenizer: transformers.PreTrainedTokenizer, args) ... function get_last_checkpoint (line 651) | def get_last_checkpoint(checkpoint_dir): function train (line 665) | def train():