SYMBOL INDEX (296 symbols across 21 files)

FILE: chat_gradio/app.py
  class StopOnTokens (line 17) | class StopOnTokens(StoppingCriteria):
    method __call__ (line 18) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
  function predict (line 27) | def predict(message, history):

FILE: lit_gpt/adapter.py
  class Config (line 22) | class Config(BaseConfig):
  class GPT (line 27) | class GPT(BaseModel):
    method __init__ (line 31) | def __init__(self, config: Config) -> None:
    method reset_cache (line 50) | def reset_cache(self) -> None:
    method forward (line 54) | def forward(
    method from_name (line 115) | def from_name(cls, name: str, **kwargs: Any) -> Self:
    method _init_weights (line 118) | def _init_weights(self, module: nn.Module) -> None:
  class Block (line 125) | class Block(nn.Module):
    method __init__ (line 129) | def __init__(self, config: Config, block_idx: int) -> None:
    method forward (line 139) | def forward(
  class CausalSelfAttention (line 167) | class CausalSelfAttention(BaseCausalSelfAttention):
    method __init__ (line 171) | def __init__(self, config: Config, block_idx: int) -> None:
    method forward (line 181) | def forward(
    method reset_parameters (line 266) | def reset_parameters(self) -> None:
    method _load_from_state_dict (line 269) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  function mark_only_adapter_as_trainable (line 276) | def mark_only_adapter_as_trainable(model: GPT) -> None:
  function adapter_filter (line 282) | def adapter_filter(key: str, value: Any) -> bool:

FILE: lit_gpt/adapter_v2.py
  class Config (line 26) | class Config(BaseConfig):
    method mlp_class (line 28) | def mlp_class(self) -> Type:
  function adapter_filter (line 32) | def adapter_filter(key: str, value: Any) -> bool:
  class AdapterV2Linear (line 48) | class AdapterV2Linear(torch.nn.Module):
    method __init__ (line 49) | def __init__(self, in_features: int, out_features: int, **kwargs) -> N...
    method forward (line 56) | def forward(self, x: torch.Tensor) -> torch.Tensor:
    method reset_parameters (line 59) | def reset_parameters(self) -> None:
  class GPT (line 64) | class GPT(BaseModel):
    method __init__ (line 65) | def __init__(self, config: Config) -> None:
    method from_name (line 86) | def from_name(cls, name: str, **kwargs: Any) -> Self:
    method _init_weights (line 89) | def _init_weights(self, module: nn.Module) -> None:
    method _load_from_state_dict (line 97) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  class Block (line 104) | class Block(BaseBlock):
    method __init__ (line 108) | def __init__(self, config: Config, block_idx: int) -> None:
  class CausalSelfAttention (line 120) | class CausalSelfAttention(BaseCausalSelfAttention):
    method __init__ (line 121) | def __init__(self, config: Config, block_idx: int) -> None:
    method forward (line 145) | def forward(
    method reset_parameters (line 230) | def reset_parameters(self) -> None:
    method _load_from_state_dict (line 233) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  class GptNeoxMLP (line 248) | class GptNeoxMLP(lit_gpt.model.GptNeoxMLP):
    method __init__ (line 249) | def __init__(self, config: Config) -> None:
    method _load_from_state_dict (line 254) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  class LLaMAMLP (line 266) | class LLaMAMLP(lit_gpt.model.LLaMAMLP):
    method __init__ (line 267) | def __init__(self, config: Config) -> None:
    method _load_from_state_dict (line 273) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  function mark_only_adapter_v2_as_trainable (line 287) | def mark_only_adapter_v2_as_trainable(model: GPT) -> None:

FILE: lit_gpt/config.py
  class Config (line 12) | class Config:
    method __post_init__ (line 53) | def __post_init__(self):
    method head_size (line 71) | def head_size(self) -> int:
    method from_name (line 75) | def from_name(cls, name: str, **kwargs: Any) -> Self:
    method mlp_class (line 81) | def mlp_class(self) -> Type:
    method norm_class (line 86) | def norm_class(self) -> Type:

FILE: lit_gpt/fused_cross_entropy.py
  class SoftmaxCrossEntropyLossFn (line 15) | class SoftmaxCrossEntropyLossFn(torch.autograd.Function):
    method forward (line 17) | def forward(
    method backward (line 103) | def backward(ctx, grad_loss):
  class FusedCrossEntropyLoss (line 113) | class FusedCrossEntropyLoss(nn.Module):
    method __init__ (line 114) | def __init__(
    method forward (line 131) | def forward(self, input, target):

FILE: lit_gpt/fused_rotary_embedding.py
  class ApplyRotaryEmb (line 10) | class ApplyRotaryEmb(torch.autograd.Function):
    method forward (line 12) | def forward(ctx, x, cos, sin, interleaved=False, inplace=False):
    method backward (line 56) | def backward(ctx, do):

FILE: lit_gpt/lora.py
  class LoRALayer (line 62) | class LoRALayer(nn.Module):
    method __init__ (line 63) | def __init__(self, r: int, lora_alpha: int, lora_dropout: float):
  class LoRALinear (line 87) | class LoRALinear(LoRALayer):
    method __init__ (line 89) | def __init__(
    method reset_parameters (line 128) | def reset_parameters(self):
    method merge (line 136) | def merge(self):
    method forward (line 143) | def forward(self, x: torch.Tensor):
  class LoRAQKVLinear (line 153) | class LoRAQKVLinear(LoRALinear):
    method __init__ (line 155) | def __init__(
    method zero_pad (line 256) | def zero_pad(self, x: torch.Tensor) -> torch.Tensor:
    method conv1d (line 298) | def conv1d(self, input: torch.Tensor, weight: torch.Tensor) -> torch.T...
    method merge (line 333) | def merge(self):
    method forward (line 351) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  function mark_only_lora_as_trainable (line 389) | def mark_only_lora_as_trainable(model: nn.Module, bias: str = "none") ->...
  function lora_filter (line 422) | def lora_filter(key: str, value: Any) -> bool:
  class Config (line 427) | class Config(BaseConfig):
    method mlp_class (line 450) | def mlp_class(self) -> Type:
  class GPT (line 454) | class GPT(BaseModel):
    method __init__ (line 455) | def __init__(self, config: Config) -> None:
    method forward (line 481) | def forward(
    method from_name (line 539) | def from_name(cls, name: str, **kwargs: Any) -> Self:
    method _init_weights (line 542) | def _init_weights(self, module: nn.Module) -> None:
    method _load_from_state_dict (line 548) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  class Block (line 555) | class Block(BaseBlock):
    method __init__ (line 556) | def __init__(self, config: Config) -> None:
  class CausalSelfAttention (line 567) | class CausalSelfAttention(BaseCausalSelfAttention):
    method __init__ (line 568) | def __init__(self, config: Config) -> None:
    method _load_from_state_dict (line 604) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  class GptNeoxMLP (line 616) | class GptNeoxMLP(lit_gpt.model.GptNeoxMLP):
    method __init__ (line 617) | def __init__(self, config: Config) -> None:
    method _load_from_state_dict (line 636) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  class LLaMAMLP (line 648) | class LLaMAMLP(lit_gpt.model.LLaMAMLP):
    method __init__ (line 649) | def __init__(self, config: Config) -> None:
    method _load_from_state_dict (line 676) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  function merge_lora_weights (line 690) | def merge_lora_weights(model: GPT) -> None:

FILE: lit_gpt/model.py
  class GPT (line 22) | class GPT(nn.Module):
    method __init__ (line 23) | def __init__(self, config: Config) -> None:
    method _init_weights (line 40) | def _init_weights(self, module: nn.Module, n_layer) -> None:
    method reset_cache (line 57) | def reset_cache(self) -> None:
    method forward (line 64) | def forward(
    method from_name (line 116) | def from_name(cls, name: str, **kwargs: Any) -> Self:
    method build_rope_cache (line 119) | def build_rope_cache(self, idx: torch.Tensor) -> RoPECache:
    method build_mask_cache (line 128) | def build_mask_cache(self, idx: torch.Tensor) -> torch.Tensor:
    method build_kv_caches (line 132) | def build_kv_caches(self, idx: torch.Tensor, max_seq_length: int, rope...
  class Block (line 150) | class Block(nn.Module):
    method __init__ (line 151) | def __init__(self, config: Config) -> None:
    method forward (line 159) | def forward(
  class CausalSelfAttention (line 186) | class CausalSelfAttention(nn.Module):
    method __init__ (line 187) | def __init__(self, config: Config) -> None:
    method forward (line 197) | def forward(
    method scaled_dot_product_attention (line 268) | def scaled_dot_product_attention(
  class GptNeoxMLP (line 294) | class GptNeoxMLP(nn.Module):
    method __init__ (line 295) | def __init__(self, config: Config) -> None:
    method forward (line 300) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class LLaMAMLP (line 306) | class LLaMAMLP(nn.Module):
    method __init__ (line 307) | def __init__(self, config: Config) -> None:
    method forward (line 313) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  function build_rope_cache (line 321) | def build_rope_cache(
  function apply_rope (line 350) | def apply_rope(x: torch.Tensor, cos: torch.Tensor, sin: torch.Tensor) ->...

FILE: lit_gpt/packed_dataset.py
  function code (line 16) | def code(dtype):
  class PackedDataset (line 27) | class PackedDataset(IterableDataset):
    method __init__ (line 28) | def __init__(
    method __iter__ (line 40) | def __iter__(self):
  class PackedDatasetBuilder (line 60) | class PackedDatasetBuilder(object):
    method __init__ (line 61) | def __init__(self, outdir, prefix, chunk_size, sep_token, dtype="auto"...
    method _write_chunk (line 82) | def _write_chunk(self):
    method dtype (line 99) | def dtype(self):
    method filenames (line 103) | def filenames(self):
    method add_array (line 106) | def add_array(self, arr):
    method write_reminder (line 117) | def write_reminder(self):
  class PackedDatasetIterator (line 121) | class PackedDatasetIterator:
    method __init__ (line 122) | def __init__(self, filenames, n_chunks, block_size, seed, shuffle, wrap):
    method _read_header (line 150) | def _read_header(self, path):
    method _close_mmaps (line 161) | def _close_mmaps(self):
    method _load_n_chunks (line 165) | def _load_n_chunks(self):
    method __del__ (line 192) | def __del__(self):
    method __iter__ (line 197) | def __iter__(self):
    method __next__ (line 200) | def __next__(self):
  class CombinedDataset (line 214) | class CombinedDataset(IterableDataset):
    method __init__ (line 215) | def __init__(self, datasets, seed, weights=None):
    method __iter__ (line 223) | def __iter__(self):
  class CombinedDatasetIterator (line 227) | class CombinedDatasetIterator:
    method __init__ (line 228) | def __init__(self, datasets, seed, weights):
    method __next__ (line 233) | def __next__(self):

FILE: lit_gpt/rmsnorm.py
  function maybe_align (line 10) | def maybe_align(x, alignment_in_bytes=16):
  function _dropout_add_layer_norm_forward (line 17) | def _dropout_add_layer_norm_forward(
  function _dropout_add_layer_norm_backward (line 56) | def _dropout_add_layer_norm_backward(
  function _dropout_add_layer_norm_subset_forward (line 111) | def _dropout_add_layer_norm_subset_forward(
  function _dropout_add_layer_norm_subset_backward (line 154) | def _dropout_add_layer_norm_subset_backward(
  function _dropout_add_layer_norm_parallel_residual_forward (line 213) | def _dropout_add_layer_norm_parallel_residual_forward(
  function _dropout_add_layer_norm_parallel_residual_backward (line 258) | def _dropout_add_layer_norm_parallel_residual_backward(
  class DropoutAddLayerNormFn (line 312) | class DropoutAddLayerNormFn(torch.autograd.Function):
    method forward (line 314) | def forward(
    method backward (line 375) | def backward(ctx, dz, *args):
  class DropoutAddLayerNormSubsetFn (line 417) | class DropoutAddLayerNormSubsetFn(torch.autograd.Function):
    method forward (line 419) | def forward(
    method backward (line 484) | def backward(ctx, dz, *args):
  class DropoutAddLayerNormParallelResidualFn (line 532) | class DropoutAddLayerNormParallelResidualFn(torch.autograd.Function):
    method forward (line 534) | def forward(
    method backward (line 606) | def backward(ctx, dz0, dz1, *args):
  function layer_norm (line 658) | def layer_norm(x, weight, bias, epsilon):
  function dropout_add_layer_norm (line 662) | def dropout_add_layer_norm(
  function dropout_add_layer_norm_subset (line 694) | def dropout_add_layer_norm_subset(
  function dropout_add_layer_norm_parallel_residual (line 732) | def dropout_add_layer_norm_parallel_residual(
  class DropoutAddLayerNorm (line 766) | class DropoutAddLayerNorm(torch.nn.Module):
    method __init__ (line 767) | def __init__(
    method reset_parameters (line 787) | def reset_parameters(self):
    method forward (line 791) | def forward(self, x0, residual=None):
  function rms_norm (line 803) | def rms_norm(x, weight, epsilon):
  class FusedRMSNorm (line 807) | class FusedRMSNorm(torch.nn.Module):
    method __init__ (line 808) | def __init__(self, size: int, dim: int = -1, eps: float = 1e-5):
    method reset_parameters (line 815) | def reset_parameters(self):
    method forward (line 818) | def forward(self, x):
  class RMSNorm (line 822) | class RMSNorm(torch.nn.Module):
    method __init__ (line 829) | def __init__(self, size: int, dim: int = -1, eps: float = 1e-5) -> None:
    method forward (line 835) | def forward(self, x: torch.Tensor) -> torch.Tensor:
    method reset_parameters (line 841) | def reset_parameters(self):

FILE: lit_gpt/speed_monitor.py
  function get_flops_available (line 74) | def get_flops_available(device: torch.device, precision: str) -> Optiona...
  class SpeedMonitorBase (line 122) | class SpeedMonitorBase:
    method __init__ (line 183) | def __init__(
    method on_train_batch_end (line 219) | def on_train_batch_end(
    method eval_end (line 297) | def eval_end(self, eval_elapsed: float):
  class SpeedMonitorFabric (line 301) | class SpeedMonitorFabric(SpeedMonitorBase):
    method __init__ (line 302) | def __init__(self, fabric: Fabric, *args: Any, **kwargs: Any) -> None:
    method on_train_batch_end (line 308) | def on_train_batch_end(self, *args: Any, **kwargs: Any):
  class SpeedMonitorCallback (line 312) | class SpeedMonitorCallback(Callback):
    method __init__ (line 313) | def __init__(self, length_fn: Callable[[Any], int], batch_size: int, *...
    method setup (line 323) | def setup(self, trainer: Trainer, pl_module: LightningModule, stage: s...
    method on_train_start (line 333) | def on_train_start(self, trainer: Trainer, pl_module: LightningModule)...
    method on_train_batch_end (line 340) | def on_train_batch_end(
    method on_validation_start (line 360) | def on_validation_start(self, trainer: Trainer, pl_module: LightningMo...
    method on_validation_end (line 364) | def on_validation_end(self, trainer: Trainer, pl_module: LightningModu...
  function flops_per_param (line 370) | def flops_per_param(config: Config, n_params: int) -> int:
  function estimate_flops (line 379) | def estimate_flops(model: GPT) -> int:
  function measure_flops (line 401) | def measure_flops(model: GPT, x: torch.Tensor) -> int:

FILE: lit_gpt/tokenizer.py
  class Tokenizer (line 8) | class Tokenizer:
    method __init__ (line 9) | def __init__(self, checkpoint_dir: Path) -> None:
    method vocab_size (line 32) | def vocab_size(self) -> int:
    method token_to_id (line 39) | def token_to_id(self, token: str) -> int:
    method encode (line 50) | def encode(
    method decode (line 75) | def decode(self, tensor: torch.Tensor) -> str:

FILE: lit_gpt/utils.py
  function find_multiple (line 20) | def find_multiple(n: int, k: int) -> int:
  function num_parameters (line 27) | def num_parameters(module: nn.Module, requires_grad: Optional[bool] = No...
  function quantization (line 32) | def quantization(mode: Optional[str] = None):
  class NotYetLoadedTensor (line 94) | class NotYetLoadedTensor:
    method __init__ (line 95) | def __init__(self, metatensor, archiveinfo, storageinfo, rebuild_args):
    method rebuild_from_type_v2 (line 102) | def rebuild_from_type_v2(cls, func, new_type, args, state, *, archivei...
    method rebuild_parameter (line 116) | def rebuild_parameter(cls, data, requires_grad, backward_hooks, *, arc...
    method rebuild_tensor_v2 (line 129) | def rebuild_tensor_v2(
    method _load_tensor (line 139) | def _load_tensor(self):
    method __torch_function__ (line 156) | def __torch_function__(cls, func, types, args=(), kwargs=None):
    method __getattr__ (line 163) | def __getattr__(self, name):
    method __repr__ (line 190) | def __repr__(self):
  class LazyLoadingUnpickler (line 194) | class LazyLoadingUnpickler(pickle.Unpickler):
    method __init__ (line 195) | def __init__(self, file, zipfile_context):
    method find_class (line 199) | def find_class(self, module, name):
    method persistent_load (line 209) | def persistent_load(self, pid):
  class lazy_load (line 218) | class lazy_load:
    method __init__ (line 219) | def __init__(self, fn):
    method __enter__ (line 225) | def __enter__(self):
    method __exit__ (line 228) | def __exit__(self, exc_type, exc_val, exc_tb):
  function check_valid_checkpoint_dir (line 233) | def check_valid_checkpoint_dir(checkpoint_dir: Path) -> None:
  class SavingProxyForStorage (line 267) | class SavingProxyForStorage:
    method __init__ (line 268) | def __init__(self, obj, saver, protocol_version=5):
    method __reduce_ex__ (line 291) | def __reduce_ex__(self, protocol_version):
  class SavingProxyForTensor (line 295) | class SavingProxyForTensor:
    method __init__ (line 296) | def __init__(self, tensor, saver, protocol_version=5):
    method __reduce_ex__ (line 303) | def __reduce_ex__(self, protocol_version):
  class IncrementalPyTorchPickler (line 309) | class IncrementalPyTorchPickler(pickle.Pickler):
    method __init__ (line 310) | def __init__(self, saver, *args, **kwargs):
    method persistent_id (line 317) | def persistent_id(self, obj):
  class incremental_save (line 365) | class incremental_save:
    method __init__ (line 366) | def __init__(self, name):
    method __enter__ (line 372) | def __enter__(self):
    method store_early (line 375) | def store_early(self, tensor):
    method save (line 380) | def save(self, obj):
    method _write_storage_and_return_key (line 391) | def _write_storage_and_return_key(self, storage):
    method __exit__ (line 403) | def __exit__(self, type, value, traceback):
  function step_csv_logger (line 410) | def step_csv_logger(*args: Any, cls: Type[T] = CSVLogger, **kwargs: Any)...
  function chunked_cross_entropy (line 440) | def chunked_cross_entropy(
  function map_old_state_dict_weights (line 482) | def map_old_state_dict_weights(state_dict: Dict, mapping: Mapping, prefi...
  function get_default_supported_precision (line 491) | def get_default_supported_precision(training: bool, tpu: bool = False) -...

FILE: pretrain/tinyllama.py
  function setup (line 78) | def setup(
  function main (line 110) | def main(fabric, train_data_dir, val_data_dir, resume):
  function train (line 165) | def train(fabric, state, train_dataloader, val_dataloader, monitor, resu...
  function validate (line 278) | def validate(fabric: L.Fabric, model: torch.nn.Module, val_dataloader: D...
  function create_dataloader (line 301) | def create_dataloader(
  function create_dataloaders (line 339) | def create_dataloaders(
  function get_lr (line 375) | def get_lr(it):

FILE: pretrain/tinyllama_code.py
  function setup (line 77) | def setup(
  function main (line 109) | def main(fabric, train_data_dir, val_data_dir, resume):
  function train (line 169) | def train(fabric, state, train_dataloader, val_dataloader, monitor, resu...
  function validate (line 282) | def validate(fabric: L.Fabric, model: torch.nn.Module, val_dataloader: D...
  function create_dataloader (line 305) | def create_dataloader(
  function create_dataloaders (line 343) | def create_dataloaders(
  function get_lr (line 379) | def get_lr(it):

FILE: scripts/convert_hf_checkpoint.py
  function copy_weights_gpt_neox (line 19) | def copy_weights_gpt_neox(
  function copy_weights_falcon (line 62) | def copy_weights_falcon(
  function copy_weights_hf_llama (line 111) | def copy_weights_hf_llama(
  function layer_template (line 173) | def layer_template(layer_name: str, idx: int) -> Tuple[str, int]:
  function load_param (line 181) | def load_param(param: Union[torch.Tensor, NotYetLoadedTensor], name: str...
  function convert_hf_checkpoint (line 193) | def convert_hf_checkpoint(

FILE: scripts/convert_lit_checkpoint.py
  function layer_template (line 20) | def layer_template(layer_name: str, idx: int) -> Tuple[str, int]:
  function load_param (line 28) | def load_param(param: Union[torch.Tensor, NotYetLoadedTensor], name: str...
  function copy_weights_falcon (line 37) | def copy_weights_falcon(
  function copy_weights_gpt_neox (line 85) | def copy_weights_gpt_neox(
  function copy_weights_llama (line 121) | def copy_weights_llama(
  function tensor_split (line 170) | def tensor_split(
  function maybe_unwrap_state_dict (line 215) | def maybe_unwrap_state_dict(lit_weights: Dict[str, torch.Tensor]) -> Dic...
  function check_conversion_supported (line 219) | def check_conversion_supported(lit_weights: Dict[str, torch.Tensor]) -> ...
  function get_tinyllama_init_hf_config (line 232) | def get_tinyllama_init_hf_config() -> dict:
  function convert_config_lit_to_hf (line 257) | def convert_config_lit_to_hf(lit_config_dict: dict) -> dict:
  function convert_lit_checkpoint (line 277) | def convert_lit_checkpoint(*,

FILE: scripts/prepare_redpajama.py
  function prepare_sample (line 42) | def prepare_sample(
  function prepare_full (line 86) | def prepare_full(
  function prepare (line 142) | def prepare(

FILE: scripts/prepare_slimpajama.py
  function prepare_full (line 26) | def prepare_full(
  function prepare (line 73) | def prepare(

FILE: scripts/prepare_starcoder.py
  function prepare_full (line 21) | def prepare_full(
  function prepare (line 69) | def prepare(

FILE: sft/finetune.py
  class ModelArguments (line 52) | class ModelArguments:
  class DataArguments (line 63) | class DataArguments:
  class TrainingArguments (line 99) | class TrainingArguments(transformers.Seq2SeqTrainingArguments):
  class GenerationArguments (line 133) | class GenerationArguments:
  function get_accelerate_model (line 168) | def get_accelerate_model(args, checkpoint_dir):
  function print_trainable_parameters (line 212) | def print_trainable_parameters(args, model):
  function smart_tokenizer_and_embedding_resize (line 227) | def smart_tokenizer_and_embedding_resize(
  class DataCollatorForCausalLM (line 252) | class DataCollatorForCausalLM(object):
    method __call__ (line 259) | def __call__(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]:
  function extract_unnatural_instructions_data (line 304) | def extract_unnatural_instructions_data(examples, extract_reformulations...
  function extract_alpaca_dataset (line 334) | def extract_alpaca_dataset(example):
  function local_dataset (line 341) | def local_dataset(dataset_name):
  function make_data_module (line 354) | def make_data_module(tokenizer: transformers.PreTrainedTokenizer, args) ...
  function get_last_checkpoint (line 478) | def get_last_checkpoint(checkpoint_dir):
  function train (line 492) | def train():