SYMBOL INDEX (296 symbols across 21 files) FILE: chat_gradio/app.py class StopOnTokens (line 17) | class StopOnTokens(StoppingCriteria): method __call__ (line 18) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen... function predict (line 27) | def predict(message, history): FILE: lit_gpt/adapter.py class Config (line 22) | class Config(BaseConfig): class GPT (line 27) | class GPT(BaseModel): method __init__ (line 31) | def __init__(self, config: Config) -> None: method reset_cache (line 50) | def reset_cache(self) -> None: method forward (line 54) | def forward( method from_name (line 115) | def from_name(cls, name: str, **kwargs: Any) -> Self: method _init_weights (line 118) | def _init_weights(self, module: nn.Module) -> None: class Block (line 125) | class Block(nn.Module): method __init__ (line 129) | def __init__(self, config: Config, block_idx: int) -> None: method forward (line 139) | def forward( class CausalSelfAttention (line 167) | class CausalSelfAttention(BaseCausalSelfAttention): method __init__ (line 171) | def __init__(self, config: Config, block_idx: int) -> None: method forward (line 181) | def forward( method reset_parameters (line 266) | def reset_parameters(self) -> None: method _load_from_state_dict (line 269) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... function mark_only_adapter_as_trainable (line 276) | def mark_only_adapter_as_trainable(model: GPT) -> None: function adapter_filter (line 282) | def adapter_filter(key: str, value: Any) -> bool: FILE: lit_gpt/adapter_v2.py class Config (line 26) | class Config(BaseConfig): method mlp_class (line 28) | def mlp_class(self) -> Type: function adapter_filter (line 32) | def adapter_filter(key: str, value: Any) -> bool: class AdapterV2Linear (line 48) | class AdapterV2Linear(torch.nn.Module): method __init__ (line 49) | def __init__(self, in_features: int, out_features: int, **kwargs) -> N... method forward (line 56) | def forward(self, x: torch.Tensor) -> torch.Tensor: method reset_parameters (line 59) | def reset_parameters(self) -> None: class GPT (line 64) | class GPT(BaseModel): method __init__ (line 65) | def __init__(self, config: Config) -> None: method from_name (line 86) | def from_name(cls, name: str, **kwargs: Any) -> Self: method _init_weights (line 89) | def _init_weights(self, module: nn.Module) -> None: method _load_from_state_dict (line 97) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... class Block (line 104) | class Block(BaseBlock): method __init__ (line 108) | def __init__(self, config: Config, block_idx: int) -> None: class CausalSelfAttention (line 120) | class CausalSelfAttention(BaseCausalSelfAttention): method __init__ (line 121) | def __init__(self, config: Config, block_idx: int) -> None: method forward (line 145) | def forward( method reset_parameters (line 230) | def reset_parameters(self) -> None: method _load_from_state_dict (line 233) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... class GptNeoxMLP (line 248) | class GptNeoxMLP(lit_gpt.model.GptNeoxMLP): method __init__ (line 249) | def __init__(self, config: Config) -> None: method _load_from_state_dict (line 254) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... class LLaMAMLP (line 266) | class LLaMAMLP(lit_gpt.model.LLaMAMLP): method __init__ (line 267) | def __init__(self, config: Config) -> None: method _load_from_state_dict (line 273) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... function mark_only_adapter_v2_as_trainable (line 287) | def mark_only_adapter_v2_as_trainable(model: GPT) -> None: FILE: lit_gpt/config.py class Config (line 12) | class Config: method __post_init__ (line 53) | def __post_init__(self): method head_size (line 71) | def head_size(self) -> int: method from_name (line 75) | def from_name(cls, name: str, **kwargs: Any) -> Self: method mlp_class (line 81) | def mlp_class(self) -> Type: method norm_class (line 86) | def norm_class(self) -> Type: FILE: lit_gpt/fused_cross_entropy.py class SoftmaxCrossEntropyLossFn (line 15) | class SoftmaxCrossEntropyLossFn(torch.autograd.Function): method forward (line 17) | def forward( method backward (line 103) | def backward(ctx, grad_loss): class FusedCrossEntropyLoss (line 113) | class FusedCrossEntropyLoss(nn.Module): method __init__ (line 114) | def __init__( method forward (line 131) | def forward(self, input, target): FILE: lit_gpt/fused_rotary_embedding.py class ApplyRotaryEmb (line 10) | class ApplyRotaryEmb(torch.autograd.Function): method forward (line 12) | def forward(ctx, x, cos, sin, interleaved=False, inplace=False): method backward (line 56) | def backward(ctx, do): FILE: lit_gpt/lora.py class LoRALayer (line 62) | class LoRALayer(nn.Module): method __init__ (line 63) | def __init__(self, r: int, lora_alpha: int, lora_dropout: float): class LoRALinear (line 87) | class LoRALinear(LoRALayer): method __init__ (line 89) | def __init__( method reset_parameters (line 128) | def reset_parameters(self): method merge (line 136) | def merge(self): method forward (line 143) | def forward(self, x: torch.Tensor): class LoRAQKVLinear (line 153) | class LoRAQKVLinear(LoRALinear): method __init__ (line 155) | def __init__( method zero_pad (line 256) | def zero_pad(self, x: torch.Tensor) -> torch.Tensor: method conv1d (line 298) | def conv1d(self, input: torch.Tensor, weight: torch.Tensor) -> torch.T... method merge (line 333) | def merge(self): method forward (line 351) | def forward(self, x: torch.Tensor) -> torch.Tensor: function mark_only_lora_as_trainable (line 389) | def mark_only_lora_as_trainable(model: nn.Module, bias: str = "none") ->... function lora_filter (line 422) | def lora_filter(key: str, value: Any) -> bool: class Config (line 427) | class Config(BaseConfig): method mlp_class (line 450) | def mlp_class(self) -> Type: class GPT (line 454) | class GPT(BaseModel): method __init__ (line 455) | def __init__(self, config: Config) -> None: method forward (line 481) | def forward( method from_name (line 539) | def from_name(cls, name: str, **kwargs: Any) -> Self: method _init_weights (line 542) | def _init_weights(self, module: nn.Module) -> None: method _load_from_state_dict (line 548) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... class Block (line 555) | class Block(BaseBlock): method __init__ (line 556) | def __init__(self, config: Config) -> None: class CausalSelfAttention (line 567) | class CausalSelfAttention(BaseCausalSelfAttention): method __init__ (line 568) | def __init__(self, config: Config) -> None: method _load_from_state_dict (line 604) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... class GptNeoxMLP (line 616) | class GptNeoxMLP(lit_gpt.model.GptNeoxMLP): method __init__ (line 617) | def __init__(self, config: Config) -> None: method _load_from_state_dict (line 636) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... class LLaMAMLP (line 648) | class LLaMAMLP(lit_gpt.model.LLaMAMLP): method __init__ (line 649) | def __init__(self, config: Config) -> None: method _load_from_state_dict (line 676) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... function merge_lora_weights (line 690) | def merge_lora_weights(model: GPT) -> None: FILE: lit_gpt/model.py class GPT (line 22) | class GPT(nn.Module): method __init__ (line 23) | def __init__(self, config: Config) -> None: method _init_weights (line 40) | def _init_weights(self, module: nn.Module, n_layer) -> None: method reset_cache (line 57) | def reset_cache(self) -> None: method forward (line 64) | def forward( method from_name (line 116) | def from_name(cls, name: str, **kwargs: Any) -> Self: method build_rope_cache (line 119) | def build_rope_cache(self, idx: torch.Tensor) -> RoPECache: method build_mask_cache (line 128) | def build_mask_cache(self, idx: torch.Tensor) -> torch.Tensor: method build_kv_caches (line 132) | def build_kv_caches(self, idx: torch.Tensor, max_seq_length: int, rope... class Block (line 150) | class Block(nn.Module): method __init__ (line 151) | def __init__(self, config: Config) -> None: method forward (line 159) | def forward( class CausalSelfAttention (line 186) | class CausalSelfAttention(nn.Module): method __init__ (line 187) | def __init__(self, config: Config) -> None: method forward (line 197) | def forward( method scaled_dot_product_attention (line 268) | def scaled_dot_product_attention( class GptNeoxMLP (line 294) | class GptNeoxMLP(nn.Module): method __init__ (line 295) | def __init__(self, config: Config) -> None: method forward (line 300) | def forward(self, x: torch.Tensor) -> torch.Tensor: class LLaMAMLP (line 306) | class LLaMAMLP(nn.Module): method __init__ (line 307) | def __init__(self, config: Config) -> None: method forward (line 313) | def forward(self, x: torch.Tensor) -> torch.Tensor: function build_rope_cache (line 321) | def build_rope_cache( function apply_rope (line 350) | def apply_rope(x: torch.Tensor, cos: torch.Tensor, sin: torch.Tensor) ->... FILE: lit_gpt/packed_dataset.py function code (line 16) | def code(dtype): class PackedDataset (line 27) | class PackedDataset(IterableDataset): method __init__ (line 28) | def __init__( method __iter__ (line 40) | def __iter__(self): class PackedDatasetBuilder (line 60) | class PackedDatasetBuilder(object): method __init__ (line 61) | def __init__(self, outdir, prefix, chunk_size, sep_token, dtype="auto"... method _write_chunk (line 82) | def _write_chunk(self): method dtype (line 99) | def dtype(self): method filenames (line 103) | def filenames(self): method add_array (line 106) | def add_array(self, arr): method write_reminder (line 117) | def write_reminder(self): class PackedDatasetIterator (line 121) | class PackedDatasetIterator: method __init__ (line 122) | def __init__(self, filenames, n_chunks, block_size, seed, shuffle, wrap): method _read_header (line 150) | def _read_header(self, path): method _close_mmaps (line 161) | def _close_mmaps(self): method _load_n_chunks (line 165) | def _load_n_chunks(self): method __del__ (line 192) | def __del__(self): method __iter__ (line 197) | def __iter__(self): method __next__ (line 200) | def __next__(self): class CombinedDataset (line 214) | class CombinedDataset(IterableDataset): method __init__ (line 215) | def __init__(self, datasets, seed, weights=None): method __iter__ (line 223) | def __iter__(self): class CombinedDatasetIterator (line 227) | class CombinedDatasetIterator: method __init__ (line 228) | def __init__(self, datasets, seed, weights): method __next__ (line 233) | def __next__(self): FILE: lit_gpt/rmsnorm.py function maybe_align (line 10) | def maybe_align(x, alignment_in_bytes=16): function _dropout_add_layer_norm_forward (line 17) | def _dropout_add_layer_norm_forward( function _dropout_add_layer_norm_backward (line 56) | def _dropout_add_layer_norm_backward( function _dropout_add_layer_norm_subset_forward (line 111) | def _dropout_add_layer_norm_subset_forward( function _dropout_add_layer_norm_subset_backward (line 154) | def _dropout_add_layer_norm_subset_backward( function _dropout_add_layer_norm_parallel_residual_forward (line 213) | def _dropout_add_layer_norm_parallel_residual_forward( function _dropout_add_layer_norm_parallel_residual_backward (line 258) | def _dropout_add_layer_norm_parallel_residual_backward( class DropoutAddLayerNormFn (line 312) | class DropoutAddLayerNormFn(torch.autograd.Function): method forward (line 314) | def forward( method backward (line 375) | def backward(ctx, dz, *args): class DropoutAddLayerNormSubsetFn (line 417) | class DropoutAddLayerNormSubsetFn(torch.autograd.Function): method forward (line 419) | def forward( method backward (line 484) | def backward(ctx, dz, *args): class DropoutAddLayerNormParallelResidualFn (line 532) | class DropoutAddLayerNormParallelResidualFn(torch.autograd.Function): method forward (line 534) | def forward( method backward (line 606) | def backward(ctx, dz0, dz1, *args): function layer_norm (line 658) | def layer_norm(x, weight, bias, epsilon): function dropout_add_layer_norm (line 662) | def dropout_add_layer_norm( function dropout_add_layer_norm_subset (line 694) | def dropout_add_layer_norm_subset( function dropout_add_layer_norm_parallel_residual (line 732) | def dropout_add_layer_norm_parallel_residual( class DropoutAddLayerNorm (line 766) | class DropoutAddLayerNorm(torch.nn.Module): method __init__ (line 767) | def __init__( method reset_parameters (line 787) | def reset_parameters(self): method forward (line 791) | def forward(self, x0, residual=None): function rms_norm (line 803) | def rms_norm(x, weight, epsilon): class FusedRMSNorm (line 807) | class FusedRMSNorm(torch.nn.Module): method __init__ (line 808) | def __init__(self, size: int, dim: int = -1, eps: float = 1e-5): method reset_parameters (line 815) | def reset_parameters(self): method forward (line 818) | def forward(self, x): class RMSNorm (line 822) | class RMSNorm(torch.nn.Module): method __init__ (line 829) | def __init__(self, size: int, dim: int = -1, eps: float = 1e-5) -> None: method forward (line 835) | def forward(self, x: torch.Tensor) -> torch.Tensor: method reset_parameters (line 841) | def reset_parameters(self): FILE: lit_gpt/speed_monitor.py function get_flops_available (line 74) | def get_flops_available(device: torch.device, precision: str) -> Optiona... class SpeedMonitorBase (line 122) | class SpeedMonitorBase: method __init__ (line 183) | def __init__( method on_train_batch_end (line 219) | def on_train_batch_end( method eval_end (line 297) | def eval_end(self, eval_elapsed: float): class SpeedMonitorFabric (line 301) | class SpeedMonitorFabric(SpeedMonitorBase): method __init__ (line 302) | def __init__(self, fabric: Fabric, *args: Any, **kwargs: Any) -> None: method on_train_batch_end (line 308) | def on_train_batch_end(self, *args: Any, **kwargs: Any): class SpeedMonitorCallback (line 312) | class SpeedMonitorCallback(Callback): method __init__ (line 313) | def __init__(self, length_fn: Callable[[Any], int], batch_size: int, *... method setup (line 323) | def setup(self, trainer: Trainer, pl_module: LightningModule, stage: s... method on_train_start (line 333) | def on_train_start(self, trainer: Trainer, pl_module: LightningModule)... method on_train_batch_end (line 340) | def on_train_batch_end( method on_validation_start (line 360) | def on_validation_start(self, trainer: Trainer, pl_module: LightningMo... method on_validation_end (line 364) | def on_validation_end(self, trainer: Trainer, pl_module: LightningModu... function flops_per_param (line 370) | def flops_per_param(config: Config, n_params: int) -> int: function estimate_flops (line 379) | def estimate_flops(model: GPT) -> int: function measure_flops (line 401) | def measure_flops(model: GPT, x: torch.Tensor) -> int: FILE: lit_gpt/tokenizer.py class Tokenizer (line 8) | class Tokenizer: method __init__ (line 9) | def __init__(self, checkpoint_dir: Path) -> None: method vocab_size (line 32) | def vocab_size(self) -> int: method token_to_id (line 39) | def token_to_id(self, token: str) -> int: method encode (line 50) | def encode( method decode (line 75) | def decode(self, tensor: torch.Tensor) -> str: FILE: lit_gpt/utils.py function find_multiple (line 20) | def find_multiple(n: int, k: int) -> int: function num_parameters (line 27) | def num_parameters(module: nn.Module, requires_grad: Optional[bool] = No... function quantization (line 32) | def quantization(mode: Optional[str] = None): class NotYetLoadedTensor (line 94) | class NotYetLoadedTensor: method __init__ (line 95) | def __init__(self, metatensor, archiveinfo, storageinfo, rebuild_args): method rebuild_from_type_v2 (line 102) | def rebuild_from_type_v2(cls, func, new_type, args, state, *, archivei... method rebuild_parameter (line 116) | def rebuild_parameter(cls, data, requires_grad, backward_hooks, *, arc... method rebuild_tensor_v2 (line 129) | def rebuild_tensor_v2( method _load_tensor (line 139) | def _load_tensor(self): method __torch_function__ (line 156) | def __torch_function__(cls, func, types, args=(), kwargs=None): method __getattr__ (line 163) | def __getattr__(self, name): method __repr__ (line 190) | def __repr__(self): class LazyLoadingUnpickler (line 194) | class LazyLoadingUnpickler(pickle.Unpickler): method __init__ (line 195) | def __init__(self, file, zipfile_context): method find_class (line 199) | def find_class(self, module, name): method persistent_load (line 209) | def persistent_load(self, pid): class lazy_load (line 218) | class lazy_load: method __init__ (line 219) | def __init__(self, fn): method __enter__ (line 225) | def __enter__(self): method __exit__ (line 228) | def __exit__(self, exc_type, exc_val, exc_tb): function check_valid_checkpoint_dir (line 233) | def check_valid_checkpoint_dir(checkpoint_dir: Path) -> None: class SavingProxyForStorage (line 267) | class SavingProxyForStorage: method __init__ (line 268) | def __init__(self, obj, saver, protocol_version=5): method __reduce_ex__ (line 291) | def __reduce_ex__(self, protocol_version): class SavingProxyForTensor (line 295) | class SavingProxyForTensor: method __init__ (line 296) | def __init__(self, tensor, saver, protocol_version=5): method __reduce_ex__ (line 303) | def __reduce_ex__(self, protocol_version): class IncrementalPyTorchPickler (line 309) | class IncrementalPyTorchPickler(pickle.Pickler): method __init__ (line 310) | def __init__(self, saver, *args, **kwargs): method persistent_id (line 317) | def persistent_id(self, obj): class incremental_save (line 365) | class incremental_save: method __init__ (line 366) | def __init__(self, name): method __enter__ (line 372) | def __enter__(self): method store_early (line 375) | def store_early(self, tensor): method save (line 380) | def save(self, obj): method _write_storage_and_return_key (line 391) | def _write_storage_and_return_key(self, storage): method __exit__ (line 403) | def __exit__(self, type, value, traceback): function step_csv_logger (line 410) | def step_csv_logger(*args: Any, cls: Type[T] = CSVLogger, **kwargs: Any)... function chunked_cross_entropy (line 440) | def chunked_cross_entropy( function map_old_state_dict_weights (line 482) | def map_old_state_dict_weights(state_dict: Dict, mapping: Mapping, prefi... function get_default_supported_precision (line 491) | def get_default_supported_precision(training: bool, tpu: bool = False) -... FILE: pretrain/tinyllama.py function setup (line 78) | def setup( function main (line 110) | def main(fabric, train_data_dir, val_data_dir, resume): function train (line 165) | def train(fabric, state, train_dataloader, val_dataloader, monitor, resu... function validate (line 278) | def validate(fabric: L.Fabric, model: torch.nn.Module, val_dataloader: D... function create_dataloader (line 301) | def create_dataloader( function create_dataloaders (line 339) | def create_dataloaders( function get_lr (line 375) | def get_lr(it): FILE: pretrain/tinyllama_code.py function setup (line 77) | def setup( function main (line 109) | def main(fabric, train_data_dir, val_data_dir, resume): function train (line 169) | def train(fabric, state, train_dataloader, val_dataloader, monitor, resu... function validate (line 282) | def validate(fabric: L.Fabric, model: torch.nn.Module, val_dataloader: D... function create_dataloader (line 305) | def create_dataloader( function create_dataloaders (line 343) | def create_dataloaders( function get_lr (line 379) | def get_lr(it): FILE: scripts/convert_hf_checkpoint.py function copy_weights_gpt_neox (line 19) | def copy_weights_gpt_neox( function copy_weights_falcon (line 62) | def copy_weights_falcon( function copy_weights_hf_llama (line 111) | def copy_weights_hf_llama( function layer_template (line 173) | def layer_template(layer_name: str, idx: int) -> Tuple[str, int]: function load_param (line 181) | def load_param(param: Union[torch.Tensor, NotYetLoadedTensor], name: str... function convert_hf_checkpoint (line 193) | def convert_hf_checkpoint( FILE: scripts/convert_lit_checkpoint.py function layer_template (line 20) | def layer_template(layer_name: str, idx: int) -> Tuple[str, int]: function load_param (line 28) | def load_param(param: Union[torch.Tensor, NotYetLoadedTensor], name: str... function copy_weights_falcon (line 37) | def copy_weights_falcon( function copy_weights_gpt_neox (line 85) | def copy_weights_gpt_neox( function copy_weights_llama (line 121) | def copy_weights_llama( function tensor_split (line 170) | def tensor_split( function maybe_unwrap_state_dict (line 215) | def maybe_unwrap_state_dict(lit_weights: Dict[str, torch.Tensor]) -> Dic... function check_conversion_supported (line 219) | def check_conversion_supported(lit_weights: Dict[str, torch.Tensor]) -> ... function get_tinyllama_init_hf_config (line 232) | def get_tinyllama_init_hf_config() -> dict: function convert_config_lit_to_hf (line 257) | def convert_config_lit_to_hf(lit_config_dict: dict) -> dict: function convert_lit_checkpoint (line 277) | def convert_lit_checkpoint(*, FILE: scripts/prepare_redpajama.py function prepare_sample (line 42) | def prepare_sample( function prepare_full (line 86) | def prepare_full( function prepare (line 142) | def prepare( FILE: scripts/prepare_slimpajama.py function prepare_full (line 26) | def prepare_full( function prepare (line 73) | def prepare( FILE: scripts/prepare_starcoder.py function prepare_full (line 21) | def prepare_full( function prepare (line 69) | def prepare( FILE: sft/finetune.py class ModelArguments (line 52) | class ModelArguments: class DataArguments (line 63) | class DataArguments: class TrainingArguments (line 99) | class TrainingArguments(transformers.Seq2SeqTrainingArguments): class GenerationArguments (line 133) | class GenerationArguments: function get_accelerate_model (line 168) | def get_accelerate_model(args, checkpoint_dir): function print_trainable_parameters (line 212) | def print_trainable_parameters(args, model): function smart_tokenizer_and_embedding_resize (line 227) | def smart_tokenizer_and_embedding_resize( class DataCollatorForCausalLM (line 252) | class DataCollatorForCausalLM(object): method __call__ (line 259) | def __call__(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]: function extract_unnatural_instructions_data (line 304) | def extract_unnatural_instructions_data(examples, extract_reformulations... function extract_alpaca_dataset (line 334) | def extract_alpaca_dataset(example): function local_dataset (line 341) | def local_dataset(dataset_name): function make_data_module (line 354) | def make_data_module(tokenizer: transformers.PreTrainedTokenizer, args) ... function get_last_checkpoint (line 478) | def get_last_checkpoint(checkpoint_dir): function train (line 492) | def train():