SYMBOL INDEX (276 symbols across 24 files) FILE: lit_gpt/adapter.py class Config (line 37) | class Config(BaseConfig): class GPT (line 42) | class GPT(BaseModel): method __init__ (line 46) | def __init__(self, config: Config) -> None: method reset_cache (line 65) | def reset_cache(self) -> None: method forward (line 69) | def forward( method from_name (line 130) | def from_name(cls, name: str, **kwargs: Any) -> Self: method _init_weights (line 133) | def _init_weights(self, module: nn.Module) -> None: class Block (line 140) | class Block(nn.Module): method __init__ (line 144) | def __init__(self, config: Config, block_idx: int) -> None: method forward (line 154) | def forward( class CausalSelfAttention (line 182) | class CausalSelfAttention(BaseCausalSelfAttention): method __init__ (line 186) | def __init__(self, config: Config, block_idx: int) -> None: method forward (line 196) | def forward( method reset_parameters (line 281) | def reset_parameters(self) -> None: method _load_from_state_dict (line 284) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... function mark_only_adapter_as_trainable (line 291) | def mark_only_adapter_as_trainable(model: GPT) -> None: function adapter_filter (line 297) | def adapter_filter(key: str, value: Any) -> bool: FILE: lit_gpt/adapter_v2.py class Config (line 41) | class Config(BaseConfig): method mlp_class (line 43) | def mlp_class(self) -> Type: function adapter_filter (line 47) | def adapter_filter(key: str, value: Any) -> bool: class AdapterV2Linear (line 63) | class AdapterV2Linear(torch.nn.Module): method __init__ (line 64) | def __init__(self, in_features: int, out_features: int, **kwargs) -> N... method forward (line 71) | def forward(self, x: torch.Tensor) -> torch.Tensor: method reset_parameters (line 74) | def reset_parameters(self) -> None: class GPT (line 79) | class GPT(BaseModel): method __init__ (line 80) | def __init__(self, config: Config) -> None: method from_name (line 101) | def from_name(cls, name: str, **kwargs: Any) -> Self: method _init_weights (line 104) | def _init_weights(self, module: nn.Module) -> None: method _load_from_state_dict (line 112) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... class Block (line 119) | class Block(BaseBlock): method __init__ (line 123) | def __init__(self, config: Config, block_idx: int) -> None: class CausalSelfAttention (line 135) | class CausalSelfAttention(BaseCausalSelfAttention): method __init__ (line 136) | def __init__(self, config: Config, block_idx: int) -> None: method forward (line 160) | def forward( method reset_parameters (line 245) | def reset_parameters(self) -> None: method _load_from_state_dict (line 248) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... class GptNeoxMLP (line 263) | class GptNeoxMLP(lit_gpt.model.GptNeoxMLP): method __init__ (line 264) | def __init__(self, config: Config) -> None: method _load_from_state_dict (line 269) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... class LLaMAMLP (line 281) | class LLaMAMLP(lit_gpt.model.LLaMAMLP): method __init__ (line 282) | def __init__(self, config: Config) -> None: method _load_from_state_dict (line 288) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... function mark_only_adapter_v2_as_trainable (line 302) | def mark_only_adapter_v2_as_trainable(model: GPT) -> None: FILE: lit_gpt/config.py class Config (line 27) | class Config: method __post_init__ (line 72) | def __post_init__(self): method head_size (line 90) | def head_size(self) -> int: method from_name (line 94) | def from_name(cls, name: str, **kwargs: Any) -> Self: method mlp_class (line 100) | def mlp_class(self) -> Type: method norm_class (line 105) | def norm_class(self) -> Type: FILE: lit_gpt/fused_cross_entropy.py class SoftmaxCrossEntropyLossFn (line 30) | class SoftmaxCrossEntropyLossFn(torch.autograd.Function): method forward (line 32) | def forward( method backward (line 118) | def backward(ctx, grad_loss): class FusedCrossEntropyLoss (line 128) | class FusedCrossEntropyLoss(nn.Module): method __init__ (line 129) | def __init__( method forward (line 146) | def forward(self, input, target): FILE: lit_gpt/fused_rotary_embedding.py class ApplyRotaryEmb (line 25) | class ApplyRotaryEmb(torch.autograd.Function): method forward (line 27) | def forward(ctx, x, cos, sin, interleaved=False, inplace=False): method backward (line 71) | def backward(ctx, do): FILE: lit_gpt/lora.py class LoRALayer (line 77) | class LoRALayer(nn.Module): method __init__ (line 78) | def __init__(self, r: int, lora_alpha: int, lora_dropout: float): class LoRALinear (line 102) | class LoRALinear(LoRALayer): method __init__ (line 104) | def __init__( method reset_parameters (line 143) | def reset_parameters(self): method merge (line 151) | def merge(self): method forward (line 158) | def forward(self, x: torch.Tensor): class LoRAQKVLinear (line 168) | class LoRAQKVLinear(LoRALinear): method __init__ (line 170) | def __init__( method zero_pad (line 271) | def zero_pad(self, x: torch.Tensor) -> torch.Tensor: method conv1d (line 313) | def conv1d(self, input: torch.Tensor, weight: torch.Tensor) -> torch.T... method merge (line 348) | def merge(self): method forward (line 366) | def forward(self, x: torch.Tensor) -> torch.Tensor: function mark_only_lora_as_trainable (line 404) | def mark_only_lora_as_trainable(model: nn.Module, bias: str = "none") ->... function lora_filter (line 437) | def lora_filter(key: str, value: Any) -> bool: class Config (line 442) | class Config(BaseConfig): method mlp_class (line 465) | def mlp_class(self) -> Type: class GPT (line 469) | class GPT(BaseModel): method __init__ (line 470) | def __init__(self, config: Config) -> None: method forward (line 496) | def forward( method from_name (line 554) | def from_name(cls, name: str, **kwargs: Any) -> Self: method _init_weights (line 557) | def _init_weights(self, module: nn.Module) -> None: method _load_from_state_dict (line 563) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... class Block (line 570) | class Block(BaseBlock): method __init__ (line 571) | def __init__(self, config: Config) -> None: class CausalSelfAttention (line 582) | class CausalSelfAttention(BaseCausalSelfAttention): method __init__ (line 583) | def __init__(self, config: Config) -> None: method _load_from_state_dict (line 619) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... class GptNeoxMLP (line 631) | class GptNeoxMLP(lit_gpt.model.GptNeoxMLP): method __init__ (line 632) | def __init__(self, config: Config) -> None: method _load_from_state_dict (line 651) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... class LLaMAMLP (line 663) | class LLaMAMLP(lit_gpt.model.LLaMAMLP): method __init__ (line 664) | def __init__(self, config: Config) -> None: method _load_from_state_dict (line 691) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... function merge_lora_weights (line 705) | def merge_lora_weights(model: GPT) -> None: FILE: lit_gpt/model.py class GPT (line 42) | class GPT(nn.Module): method __init__ (line 43) | def __init__(self, config: Config) -> None: method _init_weights (line 60) | def _init_weights(self, module: nn.Module, n_layer) -> None: method reset_cache (line 77) | def reset_cache(self) -> None: method forward (line 84) | def forward( method from_name (line 136) | def from_name(cls, name: str, **kwargs: Any) -> Self: method build_rope_cache (line 139) | def build_rope_cache(self, idx: torch.Tensor) -> RoPECache: method build_mask_cache (line 148) | def build_mask_cache(self, idx: torch.Tensor) -> torch.Tensor: method build_kv_caches (line 152) | def build_kv_caches(self, idx: torch.Tensor, max_seq_length: int, rope... class Block (line 170) | class Block(nn.Module): method __init__ (line 171) | def __init__(self, config: Config) -> None: method forward (line 179) | def forward( class CausalSelfAttention (line 206) | class CausalSelfAttention(nn.Module): method __init__ (line 207) | def __init__(self, config: Config) -> None: method forward (line 217) | def forward( method scaled_dot_product_attention (line 289) | def scaled_dot_product_attention( method raw_product_attention (line 317) | def raw_product_attention(self, query, key, value, attn_mask=None, dro... function test_attn (line 340) | def test_attn(query, key, value, attn_mask=None, dropout_p=0.0, is_causa... class GptNeoxMLP (line 362) | class GptNeoxMLP(nn.Module): method __init__ (line 363) | def __init__(self, config: Config) -> None: method forward (line 368) | def forward(self, x: torch.Tensor) -> torch.Tensor: class LLaMAMLP (line 374) | class LLaMAMLP(nn.Module): ##NOTE: changed to use torch ativation versi... method __init__ (line 375) | def __init__(self, config: Config) -> None: method forward (line 381) | def forward(self, x: torch.Tensor) -> torch.Tensor: function build_rope_cache (line 389) | def build_rope_cache( function apply_rope (line 419) | def apply_rope(x: torch.Tensor, cos: torch.Tensor, sin: torch.Tensor) ->... FILE: lit_gpt/packed_dataset.py function code (line 27) | def code(dtype): class PackedDataset (line 38) | class PackedDataset(IterableDataset): method __init__ (line 39) | def __init__( method __iter__ (line 51) | def __iter__(self): class PackedDatasetBuilder (line 71) | class PackedDatasetBuilder(object): method __init__ (line 72) | def __init__(self, outdir, prefix, chunk_size, sep_token, dtype="auto"... method _write_chunk (line 94) | def _write_chunk(self): method dtype (line 111) | def dtype(self): method filenames (line 115) | def filenames(self): method add_array (line 118) | def add_array(self, arr): method write_reminder (line 129) | def write_reminder(self): class PackedDatasetIterator (line 133) | class PackedDatasetIterator: method __init__ (line 134) | def __init__(self, filenames, n_chunks, block_size, seed, shuffle, wrap): method _read_header (line 162) | def _read_header(self, path): method _close_mmaps (line 173) | def _close_mmaps(self): method _load_n_chunks (line 177) | def _load_n_chunks(self): method __del__ (line 204) | def __del__(self): method __iter__ (line 209) | def __iter__(self): method __next__ (line 212) | def __next__(self): class CombinedDataset (line 226) | class CombinedDataset(IterableDataset): method __init__ (line 227) | def __init__(self, datasets, seed, weights=None): method __iter__ (line 235) | def __iter__(self): class CombinedDatasetIterator (line 239) | class CombinedDatasetIterator: method __init__ (line 240) | def __init__(self, datasets, seed, weights): method __next__ (line 245) | def __next__(self): FILE: lit_gpt/rmsnorm.py class RMSNorm (line 24) | class RMSNorm(torch.nn.Module): method __init__ (line 31) | def __init__(self, size: int, dim: int = -1, eps: float = 1e-5) -> None: method forward (line 37) | def forward(self, x: torch.Tensor) -> torch.Tensor: method reset_parameters (line 43) | def reset_parameters(self): class FusedRMSNorm (line 48) | class FusedRMSNorm(apex.normalization.FusedRMSNorm): method __init__ (line 49) | def __init__(self, size: int, dim: int = -1, eps: float = 1e-5): method reset_parameters (line 56) | def reset_parameters(self): FILE: lit_gpt/rotary_ebm.py function apply_rotary_pos_emb (line 19) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): function rotate_half (line 47) | def rotate_half(x): FILE: lit_gpt/speed_monitor.py function get_flops_available (line 89) | def get_flops_available(device: torch.device, precision: str) -> Optiona... class SpeedMonitorBase (line 137) | class SpeedMonitorBase: method __init__ (line 198) | def __init__( method on_train_batch_end (line 234) | def on_train_batch_end( method eval_end (line 312) | def eval_end(self, eval_elapsed: float): class SpeedMonitorFabric (line 316) | class SpeedMonitorFabric(SpeedMonitorBase): method __init__ (line 317) | def __init__(self, fabric: Fabric, *args: Any, **kwargs: Any) -> None: method on_train_batch_end (line 323) | def on_train_batch_end(self, *args: Any, **kwargs: Any): class SpeedMonitorCallback (line 327) | class SpeedMonitorCallback(Callback): method __init__ (line 328) | def __init__(self, length_fn: Callable[[Any], int], batch_size: int, *... method setup (line 338) | def setup(self, trainer: Trainer, pl_module: LightningModule, stage: s... method on_train_start (line 348) | def on_train_start(self, trainer: Trainer, pl_module: LightningModule)... method on_train_batch_end (line 355) | def on_train_batch_end( method on_validation_start (line 375) | def on_validation_start(self, trainer: Trainer, pl_module: LightningMo... method on_validation_end (line 379) | def on_validation_end(self, trainer: Trainer, pl_module: LightningModu... function flops_per_param (line 385) | def flops_per_param(config: Config, n_params: int) -> int: function estimate_flops (line 394) | def estimate_flops(model: GPT) -> int: function measure_flops (line 416) | def measure_flops(model: GPT, x: torch.Tensor) -> int: FILE: lit_gpt/tokenizer.py class Tokenizer (line 23) | class Tokenizer: method __init__ (line 24) | def __init__(self, checkpoint_dir: Path) -> None: method vocab_size (line 47) | def vocab_size(self) -> int: method token_to_id (line 54) | def token_to_id(self, token: str) -> int: method encode (line 65) | def encode( method decode (line 90) | def decode(self, tensor: torch.Tensor) -> str: FILE: lit_gpt/utils.py function find_multiple (line 34) | def find_multiple(n: int, k: int) -> int: function num_parameters (line 41) | def num_parameters(module: nn.Module, requires_grad: Optional[bool] = No... function quantization (line 46) | def quantization(mode: Optional[str] = None): class NotYetLoadedTensor (line 108) | class NotYetLoadedTensor: method __init__ (line 109) | def __init__(self, metatensor, archiveinfo, storageinfo, rebuild_args): method rebuild_from_type_v2 (line 116) | def rebuild_from_type_v2(cls, func, new_type, args, state, *, archivei... method rebuild_parameter (line 130) | def rebuild_parameter(cls, data, requires_grad, backward_hooks, *, arc... method rebuild_tensor_v2 (line 143) | def rebuild_tensor_v2( method _load_tensor (line 153) | def _load_tensor(self): method __torch_function__ (line 170) | def __torch_function__(cls, func, types, args=(), kwargs=None): method __getattr__ (line 177) | def __getattr__(self, name): method __repr__ (line 204) | def __repr__(self): class LazyLoadingUnpickler (line 208) | class LazyLoadingUnpickler(pickle.Unpickler): method __init__ (line 209) | def __init__(self, file, zipfile_context): method find_class (line 213) | def find_class(self, module, name): method persistent_load (line 223) | def persistent_load(self, pid): class lazy_load (line 232) | class lazy_load: method __init__ (line 233) | def __init__(self, fn): method __enter__ (line 239) | def __enter__(self): method __exit__ (line 242) | def __exit__(self, exc_type, exc_val, exc_tb): function check_valid_checkpoint_dir (line 247) | def check_valid_checkpoint_dir(checkpoint_dir: Path) -> None: class SavingProxyForStorage (line 281) | class SavingProxyForStorage: method __init__ (line 282) | def __init__(self, obj, saver, protocol_version=5): method __reduce_ex__ (line 305) | def __reduce_ex__(self, protocol_version): class SavingProxyForTensor (line 309) | class SavingProxyForTensor: method __init__ (line 310) | def __init__(self, tensor, saver, protocol_version=5): method __reduce_ex__ (line 317) | def __reduce_ex__(self, protocol_version): class IncrementalPyTorchPickler (line 323) | class IncrementalPyTorchPickler(pickle.Pickler): method __init__ (line 324) | def __init__(self, saver, *args, **kwargs): method persistent_id (line 331) | def persistent_id(self, obj): class incremental_save (line 379) | class incremental_save: method __init__ (line 380) | def __init__(self, name): method __enter__ (line 386) | def __enter__(self): method store_early (line 389) | def store_early(self, tensor): method save (line 394) | def save(self, obj): method _write_storage_and_return_key (line 405) | def _write_storage_and_return_key(self, storage): method __exit__ (line 417) | def __exit__(self, type, value, traceback): function step_csv_logger (line 424) | def step_csv_logger(*args: Any, cls: Type[T] = CSVLogger, **kwargs: Any)... function chunked_cross_entropy (line 454) | def chunked_cross_entropy( function map_old_state_dict_weights (line 496) | def map_old_state_dict_weights(state_dict: Dict, mapping: Mapping, prefi... function get_default_supported_precision (line 505) | def get_default_supported_precision(training: bool, tpu: bool = False) -... FILE: pretrain/tinyllama.py function setup (line 102) | def setup( function main (line 135) | def main(fabric, train_data_dir, val_data_dir, resume, model_name=None): function train (line 190) | def train(fabric, state, train_dataloader, val_dataloader, monitor, resu... function validate (line 304) | def validate(fabric: L.Fabric, model: torch.nn.Module, val_dataloader: D... function create_dataloader (line 327) | def create_dataloader( function create_dataloaders (line 365) | def create_dataloaders( function get_lr (line 401) | def get_lr(it): FILE: pretrain/tinyllama_code.py function setup (line 102) | def setup( function main (line 136) | def main(fabric, train_data_dir, val_data_dir, resume, model_name=None, ... function train (line 192) | def train(fabric, state, train_dataloader, val_dataloader, monitor, resu... function validate (line 306) | def validate(fabric: L.Fabric, model: torch.nn.Module, val_dataloader: D... function create_dataloader (line 329) | def create_dataloader( function create_dataloaders (line 367) | def create_dataloaders( function get_lr (line 403) | def get_lr(it): FILE: scripts/convert_hf_checkpoint.py function copy_weights_gpt_neox (line 34) | def copy_weights_gpt_neox( function copy_weights_falcon (line 77) | def copy_weights_falcon( function copy_weights_hf_llama (line 126) | def copy_weights_hf_llama( function layer_template (line 188) | def layer_template(layer_name: str, idx: int) -> Tuple[str, int]: function load_param (line 196) | def load_param(param: Union[torch.Tensor, NotYetLoadedTensor], name: str... function convert_hf_checkpoint (line 208) | def convert_hf_checkpoint( FILE: scripts/convert_lit_checkpoint.py function layer_template (line 35) | def layer_template(layer_name: str, idx: int) -> Tuple[str, int]: function load_param (line 43) | def load_param(param: Union[torch.Tensor, NotYetLoadedTensor], name: str... function copy_weights_falcon (line 52) | def copy_weights_falcon( function copy_weights_gpt_neox (line 100) | def copy_weights_gpt_neox( function copy_weights_llama (line 136) | def copy_weights_llama( function tensor_split (line 185) | def tensor_split( function maybe_unwrap_state_dict (line 230) | def maybe_unwrap_state_dict(lit_weights: Dict[str, torch.Tensor]) -> Dic... function check_conversion_supported (line 234) | def check_conversion_supported(lit_weights: Dict[str, torch.Tensor]) -> ... function get_tinyllama_init_hf_config (line 247) | def get_tinyllama_init_hf_config() -> dict: function convert_config_lit_to_hf (line 272) | def convert_config_lit_to_hf(lit_config_dict: dict) -> dict: function convert_lit_checkpoint (line 292) | def convert_lit_checkpoint(*, FILE: scripts/prepare_mnbvc.py function prepare_full (line 36) | def prepare_full( function prepare (line 89) | def prepare( FILE: scripts/prepare_project_gutenberg.py function prepare_full (line 36) | def prepare_full( function prepare (line 86) | def prepare( FILE: scripts/prepare_skypile.py function prepare_full (line 36) | def prepare_full( function prepare (line 85) | def prepare( FILE: scripts/prepare_slimpajama.py function prepare_full (line 41) | def prepare_full( function prepare (line 101) | def prepare( FILE: scripts/prepare_starcoder.py function prepare_full (line 36) | def prepare_full( function prepare (line 84) | def prepare( FILE: scripts/prepare_starcoder_python.py function prepare_full (line 36) | def prepare_full( function prepare (line 85) | def prepare( FILE: speculative_decoding/codellama_spec.py class LlamaModelEval (line 32) | class LlamaModelEval(LlamaForCausalLM): method __init__ (line 33) | def __init__(self, *args, **kwargs): method forward (line 38) | def forward(self, *args, **kwargs): class LlamaModelEval_Draft (line 46) | class LlamaModelEval_Draft(LlamaForCausalLM): method __init__ (line 47) | def __init__(self, *args, **kwargs): method forward (line 52) | def forward(self, *args, **kwargs): class ProfileLLM (line 63) | class ProfileLLM: method clear_entries (line 75) | def clear_entries(cls): method collect_sections (line 87) | def collect_sections(cls, logf): method parse_section (line 112) | def parse_section( method parse_section_ag (line 202) | def parse_section_ag( method analyze_profiling (line 300) | def analyze_profiling(cls, in_file, out_file): function benchmark_code (line 385) | def benchmark_code(model, log_file, max_seqlen=512, assistant_model=None... function warmup (line 501) | def warmup(model, prompts, max_new_tokens=30): function main (line 509) | def main():