SYMBOL INDEX (276 symbols across 24 files)

FILE: lit_gpt/adapter.py
  class Config (line 37) | class Config(BaseConfig):
  class GPT (line 42) | class GPT(BaseModel):
    method __init__ (line 46) | def __init__(self, config: Config) -> None:
    method reset_cache (line 65) | def reset_cache(self) -> None:
    method forward (line 69) | def forward(
    method from_name (line 130) | def from_name(cls, name: str, **kwargs: Any) -> Self:
    method _init_weights (line 133) | def _init_weights(self, module: nn.Module) -> None:
  class Block (line 140) | class Block(nn.Module):
    method __init__ (line 144) | def __init__(self, config: Config, block_idx: int) -> None:
    method forward (line 154) | def forward(
  class CausalSelfAttention (line 182) | class CausalSelfAttention(BaseCausalSelfAttention):
    method __init__ (line 186) | def __init__(self, config: Config, block_idx: int) -> None:
    method forward (line 196) | def forward(
    method reset_parameters (line 281) | def reset_parameters(self) -> None:
    method _load_from_state_dict (line 284) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  function mark_only_adapter_as_trainable (line 291) | def mark_only_adapter_as_trainable(model: GPT) -> None:
  function adapter_filter (line 297) | def adapter_filter(key: str, value: Any) -> bool:

FILE: lit_gpt/adapter_v2.py
  class Config (line 41) | class Config(BaseConfig):
    method mlp_class (line 43) | def mlp_class(self) -> Type:
  function adapter_filter (line 47) | def adapter_filter(key: str, value: Any) -> bool:
  class AdapterV2Linear (line 63) | class AdapterV2Linear(torch.nn.Module):
    method __init__ (line 64) | def __init__(self, in_features: int, out_features: int, **kwargs) -> N...
    method forward (line 71) | def forward(self, x: torch.Tensor) -> torch.Tensor:
    method reset_parameters (line 74) | def reset_parameters(self) -> None:
  class GPT (line 79) | class GPT(BaseModel):
    method __init__ (line 80) | def __init__(self, config: Config) -> None:
    method from_name (line 101) | def from_name(cls, name: str, **kwargs: Any) -> Self:
    method _init_weights (line 104) | def _init_weights(self, module: nn.Module) -> None:
    method _load_from_state_dict (line 112) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  class Block (line 119) | class Block(BaseBlock):
    method __init__ (line 123) | def __init__(self, config: Config, block_idx: int) -> None:
  class CausalSelfAttention (line 135) | class CausalSelfAttention(BaseCausalSelfAttention):
    method __init__ (line 136) | def __init__(self, config: Config, block_idx: int) -> None:
    method forward (line 160) | def forward(
    method reset_parameters (line 245) | def reset_parameters(self) -> None:
    method _load_from_state_dict (line 248) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  class GptNeoxMLP (line 263) | class GptNeoxMLP(lit_gpt.model.GptNeoxMLP):
    method __init__ (line 264) | def __init__(self, config: Config) -> None:
    method _load_from_state_dict (line 269) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  class LLaMAMLP (line 281) | class LLaMAMLP(lit_gpt.model.LLaMAMLP):
    method __init__ (line 282) | def __init__(self, config: Config) -> None:
    method _load_from_state_dict (line 288) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  function mark_only_adapter_v2_as_trainable (line 302) | def mark_only_adapter_v2_as_trainable(model: GPT) -> None:

FILE: lit_gpt/config.py
  class Config (line 27) | class Config:
    method __post_init__ (line 72) | def __post_init__(self):
    method head_size (line 90) | def head_size(self) -> int:
    method from_name (line 94) | def from_name(cls, name: str, **kwargs: Any) -> Self:
    method mlp_class (line 100) | def mlp_class(self) -> Type:
    method norm_class (line 105) | def norm_class(self) -> Type:

FILE: lit_gpt/fused_cross_entropy.py
  class SoftmaxCrossEntropyLossFn (line 30) | class SoftmaxCrossEntropyLossFn(torch.autograd.Function):
    method forward (line 32) | def forward(
    method backward (line 118) | def backward(ctx, grad_loss):
  class FusedCrossEntropyLoss (line 128) | class FusedCrossEntropyLoss(nn.Module):
    method __init__ (line 129) | def __init__(
    method forward (line 146) | def forward(self, input, target):

FILE: lit_gpt/fused_rotary_embedding.py
  class ApplyRotaryEmb (line 25) | class ApplyRotaryEmb(torch.autograd.Function):
    method forward (line 27) | def forward(ctx, x, cos, sin, interleaved=False, inplace=False):
    method backward (line 71) | def backward(ctx, do):

FILE: lit_gpt/lora.py
  class LoRALayer (line 77) | class LoRALayer(nn.Module):
    method __init__ (line 78) | def __init__(self, r: int, lora_alpha: int, lora_dropout: float):
  class LoRALinear (line 102) | class LoRALinear(LoRALayer):
    method __init__ (line 104) | def __init__(
    method reset_parameters (line 143) | def reset_parameters(self):
    method merge (line 151) | def merge(self):
    method forward (line 158) | def forward(self, x: torch.Tensor):
  class LoRAQKVLinear (line 168) | class LoRAQKVLinear(LoRALinear):
    method __init__ (line 170) | def __init__(
    method zero_pad (line 271) | def zero_pad(self, x: torch.Tensor) -> torch.Tensor:
    method conv1d (line 313) | def conv1d(self, input: torch.Tensor, weight: torch.Tensor) -> torch.T...
    method merge (line 348) | def merge(self):
    method forward (line 366) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  function mark_only_lora_as_trainable (line 404) | def mark_only_lora_as_trainable(model: nn.Module, bias: str = "none") ->...
  function lora_filter (line 437) | def lora_filter(key: str, value: Any) -> bool:
  class Config (line 442) | class Config(BaseConfig):
    method mlp_class (line 465) | def mlp_class(self) -> Type:
  class GPT (line 469) | class GPT(BaseModel):
    method __init__ (line 470) | def __init__(self, config: Config) -> None:
    method forward (line 496) | def forward(
    method from_name (line 554) | def from_name(cls, name: str, **kwargs: Any) -> Self:
    method _init_weights (line 557) | def _init_weights(self, module: nn.Module) -> None:
    method _load_from_state_dict (line 563) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  class Block (line 570) | class Block(BaseBlock):
    method __init__ (line 571) | def __init__(self, config: Config) -> None:
  class CausalSelfAttention (line 582) | class CausalSelfAttention(BaseCausalSelfAttention):
    method __init__ (line 583) | def __init__(self, config: Config) -> None:
    method _load_from_state_dict (line 619) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  class GptNeoxMLP (line 631) | class GptNeoxMLP(lit_gpt.model.GptNeoxMLP):
    method __init__ (line 632) | def __init__(self, config: Config) -> None:
    method _load_from_state_dict (line 651) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  class LLaMAMLP (line 663) | class LLaMAMLP(lit_gpt.model.LLaMAMLP):
    method __init__ (line 664) | def __init__(self, config: Config) -> None:
    method _load_from_state_dict (line 691) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  function merge_lora_weights (line 705) | def merge_lora_weights(model: GPT) -> None:

FILE: lit_gpt/model.py
  class GPT (line 42) | class GPT(nn.Module):
    method __init__ (line 43) | def __init__(self, config: Config) -> None:
    method _init_weights (line 60) | def _init_weights(self, module: nn.Module, n_layer) -> None:
    method reset_cache (line 77) | def reset_cache(self) -> None:
    method forward (line 84) | def forward(
    method from_name (line 136) | def from_name(cls, name: str, **kwargs: Any) -> Self:
    method build_rope_cache (line 139) | def build_rope_cache(self, idx: torch.Tensor) -> RoPECache:
    method build_mask_cache (line 148) | def build_mask_cache(self, idx: torch.Tensor) -> torch.Tensor:
    method build_kv_caches (line 152) | def build_kv_caches(self, idx: torch.Tensor, max_seq_length: int, rope...
  class Block (line 170) | class Block(nn.Module):
    method __init__ (line 171) | def __init__(self, config: Config) -> None:
    method forward (line 179) | def forward(
  class CausalSelfAttention (line 206) | class CausalSelfAttention(nn.Module):
    method __init__ (line 207) | def __init__(self, config: Config) -> None:
    method forward (line 217) | def forward(
    method scaled_dot_product_attention (line 289) | def scaled_dot_product_attention(
    method raw_product_attention (line 317) | def raw_product_attention(self, query, key, value, attn_mask=None, dro...
  function test_attn (line 340) | def test_attn(query, key, value, attn_mask=None, dropout_p=0.0, is_causa...
  class GptNeoxMLP (line 362) | class GptNeoxMLP(nn.Module):
    method __init__ (line 363) | def __init__(self, config: Config) -> None:
    method forward (line 368) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class LLaMAMLP (line 374) | class LLaMAMLP(nn.Module):  ##NOTE: changed to use torch ativation versi...
    method __init__ (line 375) | def __init__(self, config: Config) -> None:
    method forward (line 381) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  function build_rope_cache (line 389) | def build_rope_cache(
  function apply_rope (line 419) | def apply_rope(x: torch.Tensor, cos: torch.Tensor, sin: torch.Tensor) ->...

FILE: lit_gpt/packed_dataset.py
  function code (line 27) | def code(dtype):
  class PackedDataset (line 38) | class PackedDataset(IterableDataset):
    method __init__ (line 39) | def __init__(
    method __iter__ (line 51) | def __iter__(self):
  class PackedDatasetBuilder (line 71) | class PackedDatasetBuilder(object):
    method __init__ (line 72) | def __init__(self, outdir, prefix, chunk_size, sep_token, dtype="auto"...
    method _write_chunk (line 94) | def _write_chunk(self):
    method dtype (line 111) | def dtype(self):
    method filenames (line 115) | def filenames(self):
    method add_array (line 118) | def add_array(self, arr):
    method write_reminder (line 129) | def write_reminder(self):
  class PackedDatasetIterator (line 133) | class PackedDatasetIterator:
    method __init__ (line 134) | def __init__(self, filenames, n_chunks, block_size, seed, shuffle, wrap):
    method _read_header (line 162) | def _read_header(self, path):
    method _close_mmaps (line 173) | def _close_mmaps(self):
    method _load_n_chunks (line 177) | def _load_n_chunks(self):
    method __del__ (line 204) | def __del__(self):
    method __iter__ (line 209) | def __iter__(self):
    method __next__ (line 212) | def __next__(self):
  class CombinedDataset (line 226) | class CombinedDataset(IterableDataset):
    method __init__ (line 227) | def __init__(self, datasets, seed, weights=None):
    method __iter__ (line 235) | def __iter__(self):
  class CombinedDatasetIterator (line 239) | class CombinedDatasetIterator:
    method __init__ (line 240) | def __init__(self, datasets, seed, weights):
    method __next__ (line 245) | def __next__(self):

FILE: lit_gpt/rmsnorm.py
  class RMSNorm (line 24) | class RMSNorm(torch.nn.Module):
    method __init__ (line 31) | def __init__(self, size: int, dim: int = -1, eps: float = 1e-5) -> None:
    method forward (line 37) | def forward(self, x: torch.Tensor) -> torch.Tensor:
    method reset_parameters (line 43) | def reset_parameters(self):
  class FusedRMSNorm (line 48) | class FusedRMSNorm(apex.normalization.FusedRMSNorm):
    method __init__ (line 49) | def __init__(self, size: int, dim: int = -1, eps: float = 1e-5):
    method reset_parameters (line 56) | def reset_parameters(self):

FILE: lit_gpt/rotary_ebm.py
  function apply_rotary_pos_emb (line 19) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1):
  function rotate_half (line 47) | def rotate_half(x):

FILE: lit_gpt/speed_monitor.py
  function get_flops_available (line 89) | def get_flops_available(device: torch.device, precision: str) -> Optiona...
  class SpeedMonitorBase (line 137) | class SpeedMonitorBase:
    method __init__ (line 198) | def __init__(
    method on_train_batch_end (line 234) | def on_train_batch_end(
    method eval_end (line 312) | def eval_end(self, eval_elapsed: float):
  class SpeedMonitorFabric (line 316) | class SpeedMonitorFabric(SpeedMonitorBase):
    method __init__ (line 317) | def __init__(self, fabric: Fabric, *args: Any, **kwargs: Any) -> None:
    method on_train_batch_end (line 323) | def on_train_batch_end(self, *args: Any, **kwargs: Any):
  class SpeedMonitorCallback (line 327) | class SpeedMonitorCallback(Callback):
    method __init__ (line 328) | def __init__(self, length_fn: Callable[[Any], int], batch_size: int, *...
    method setup (line 338) | def setup(self, trainer: Trainer, pl_module: LightningModule, stage: s...
    method on_train_start (line 348) | def on_train_start(self, trainer: Trainer, pl_module: LightningModule)...
    method on_train_batch_end (line 355) | def on_train_batch_end(
    method on_validation_start (line 375) | def on_validation_start(self, trainer: Trainer, pl_module: LightningMo...
    method on_validation_end (line 379) | def on_validation_end(self, trainer: Trainer, pl_module: LightningModu...
  function flops_per_param (line 385) | def flops_per_param(config: Config, n_params: int) -> int:
  function estimate_flops (line 394) | def estimate_flops(model: GPT) -> int:
  function measure_flops (line 416) | def measure_flops(model: GPT, x: torch.Tensor) -> int:

FILE: lit_gpt/tokenizer.py
  class Tokenizer (line 23) | class Tokenizer:
    method __init__ (line 24) | def __init__(self, checkpoint_dir: Path) -> None:
    method vocab_size (line 47) | def vocab_size(self) -> int:
    method token_to_id (line 54) | def token_to_id(self, token: str) -> int:
    method encode (line 65) | def encode(
    method decode (line 90) | def decode(self, tensor: torch.Tensor) -> str:

FILE: lit_gpt/utils.py
  function find_multiple (line 34) | def find_multiple(n: int, k: int) -> int:
  function num_parameters (line 41) | def num_parameters(module: nn.Module, requires_grad: Optional[bool] = No...
  function quantization (line 46) | def quantization(mode: Optional[str] = None):
  class NotYetLoadedTensor (line 108) | class NotYetLoadedTensor:
    method __init__ (line 109) | def __init__(self, metatensor, archiveinfo, storageinfo, rebuild_args):
    method rebuild_from_type_v2 (line 116) | def rebuild_from_type_v2(cls, func, new_type, args, state, *, archivei...
    method rebuild_parameter (line 130) | def rebuild_parameter(cls, data, requires_grad, backward_hooks, *, arc...
    method rebuild_tensor_v2 (line 143) | def rebuild_tensor_v2(
    method _load_tensor (line 153) | def _load_tensor(self):
    method __torch_function__ (line 170) | def __torch_function__(cls, func, types, args=(), kwargs=None):
    method __getattr__ (line 177) | def __getattr__(self, name):
    method __repr__ (line 204) | def __repr__(self):
  class LazyLoadingUnpickler (line 208) | class LazyLoadingUnpickler(pickle.Unpickler):
    method __init__ (line 209) | def __init__(self, file, zipfile_context):
    method find_class (line 213) | def find_class(self, module, name):
    method persistent_load (line 223) | def persistent_load(self, pid):
  class lazy_load (line 232) | class lazy_load:
    method __init__ (line 233) | def __init__(self, fn):
    method __enter__ (line 239) | def __enter__(self):
    method __exit__ (line 242) | def __exit__(self, exc_type, exc_val, exc_tb):
  function check_valid_checkpoint_dir (line 247) | def check_valid_checkpoint_dir(checkpoint_dir: Path) -> None:
  class SavingProxyForStorage (line 281) | class SavingProxyForStorage:
    method __init__ (line 282) | def __init__(self, obj, saver, protocol_version=5):
    method __reduce_ex__ (line 305) | def __reduce_ex__(self, protocol_version):
  class SavingProxyForTensor (line 309) | class SavingProxyForTensor:
    method __init__ (line 310) | def __init__(self, tensor, saver, protocol_version=5):
    method __reduce_ex__ (line 317) | def __reduce_ex__(self, protocol_version):
  class IncrementalPyTorchPickler (line 323) | class IncrementalPyTorchPickler(pickle.Pickler):
    method __init__ (line 324) | def __init__(self, saver, *args, **kwargs):
    method persistent_id (line 331) | def persistent_id(self, obj):
  class incremental_save (line 379) | class incremental_save:
    method __init__ (line 380) | def __init__(self, name):
    method __enter__ (line 386) | def __enter__(self):
    method store_early (line 389) | def store_early(self, tensor):
    method save (line 394) | def save(self, obj):
    method _write_storage_and_return_key (line 405) | def _write_storage_and_return_key(self, storage):
    method __exit__ (line 417) | def __exit__(self, type, value, traceback):
  function step_csv_logger (line 424) | def step_csv_logger(*args: Any, cls: Type[T] = CSVLogger, **kwargs: Any)...
  function chunked_cross_entropy (line 454) | def chunked_cross_entropy(
  function map_old_state_dict_weights (line 496) | def map_old_state_dict_weights(state_dict: Dict, mapping: Mapping, prefi...
  function get_default_supported_precision (line 505) | def get_default_supported_precision(training: bool, tpu: bool = False) -...

FILE: pretrain/tinyllama.py
  function setup (line 102) | def setup(
  function main (line 135) | def main(fabric, train_data_dir, val_data_dir, resume, model_name=None):
  function train (line 190) | def train(fabric, state, train_dataloader, val_dataloader, monitor, resu...
  function validate (line 304) | def validate(fabric: L.Fabric, model: torch.nn.Module, val_dataloader: D...
  function create_dataloader (line 327) | def create_dataloader(
  function create_dataloaders (line 365) | def create_dataloaders(
  function get_lr (line 401) | def get_lr(it):

FILE: pretrain/tinyllama_code.py
  function setup (line 102) | def setup(
  function main (line 136) | def main(fabric, train_data_dir, val_data_dir, resume, model_name=None, ...
  function train (line 192) | def train(fabric, state, train_dataloader, val_dataloader, monitor, resu...
  function validate (line 306) | def validate(fabric: L.Fabric, model: torch.nn.Module, val_dataloader: D...
  function create_dataloader (line 329) | def create_dataloader(
  function create_dataloaders (line 367) | def create_dataloaders(
  function get_lr (line 403) | def get_lr(it):

FILE: scripts/convert_hf_checkpoint.py
  function copy_weights_gpt_neox (line 34) | def copy_weights_gpt_neox(
  function copy_weights_falcon (line 77) | def copy_weights_falcon(
  function copy_weights_hf_llama (line 126) | def copy_weights_hf_llama(
  function layer_template (line 188) | def layer_template(layer_name: str, idx: int) -> Tuple[str, int]:
  function load_param (line 196) | def load_param(param: Union[torch.Tensor, NotYetLoadedTensor], name: str...
  function convert_hf_checkpoint (line 208) | def convert_hf_checkpoint(

FILE: scripts/convert_lit_checkpoint.py
  function layer_template (line 35) | def layer_template(layer_name: str, idx: int) -> Tuple[str, int]:
  function load_param (line 43) | def load_param(param: Union[torch.Tensor, NotYetLoadedTensor], name: str...
  function copy_weights_falcon (line 52) | def copy_weights_falcon(
  function copy_weights_gpt_neox (line 100) | def copy_weights_gpt_neox(
  function copy_weights_llama (line 136) | def copy_weights_llama(
  function tensor_split (line 185) | def tensor_split(
  function maybe_unwrap_state_dict (line 230) | def maybe_unwrap_state_dict(lit_weights: Dict[str, torch.Tensor]) -> Dic...
  function check_conversion_supported (line 234) | def check_conversion_supported(lit_weights: Dict[str, torch.Tensor]) -> ...
  function get_tinyllama_init_hf_config (line 247) | def get_tinyllama_init_hf_config() -> dict:
  function convert_config_lit_to_hf (line 272) | def convert_config_lit_to_hf(lit_config_dict: dict) -> dict:
  function convert_lit_checkpoint (line 292) | def convert_lit_checkpoint(*,

FILE: scripts/prepare_mnbvc.py
  function prepare_full (line 36) | def prepare_full(
  function prepare (line 89) | def prepare(

FILE: scripts/prepare_project_gutenberg.py
  function prepare_full (line 36) | def prepare_full(
  function prepare (line 86) | def prepare(

FILE: scripts/prepare_skypile.py
  function prepare_full (line 36) | def prepare_full(
  function prepare (line 85) | def prepare(

FILE: scripts/prepare_slimpajama.py
  function prepare_full (line 41) | def prepare_full(
  function prepare (line 101) | def prepare(

FILE: scripts/prepare_starcoder.py
  function prepare_full (line 36) | def prepare_full(
  function prepare (line 84) | def prepare(

FILE: scripts/prepare_starcoder_python.py
  function prepare_full (line 36) | def prepare_full(
  function prepare (line 85) | def prepare(

FILE: speculative_decoding/codellama_spec.py
  class LlamaModelEval (line 32) | class LlamaModelEval(LlamaForCausalLM):
    method __init__ (line 33) | def __init__(self, *args, **kwargs):
    method forward (line 38) | def forward(self, *args, **kwargs):
  class LlamaModelEval_Draft (line 46) | class LlamaModelEval_Draft(LlamaForCausalLM):
    method __init__ (line 47) | def __init__(self, *args, **kwargs):
    method forward (line 52) | def forward(self, *args, **kwargs):
  class ProfileLLM (line 63) | class ProfileLLM:
    method clear_entries (line 75) | def clear_entries(cls):
    method collect_sections (line 87) | def collect_sections(cls, logf):
    method parse_section (line 112) | def parse_section(
    method parse_section_ag (line 202) | def parse_section_ag(
    method analyze_profiling (line 300) | def analyze_profiling(cls, in_file, out_file):
  function benchmark_code (line 385) | def benchmark_code(model, log_file, max_seqlen=512, assistant_model=None...
  function warmup (line 501) | def warmup(model, prompts, max_new_tokens=30):
  function main (line 509) | def main():