SYMBOL INDEX (1090 symbols across 117 files)

FILE: extensions/thunder/pretrain.py
  function forward_and_loss (line 49) | def forward_and_loss(model: nn.Module, input_ids: torch.Tensor, targets:...
  function setup (line 56) | def setup(
  function main (line 189) | def main(
  function fit (line 276) | def fit(
  function validate (line 403) | def validate(fabric: L.Fabric, model: nn.Module, val_dataloader: DataLoa...
  function get_dataloaders (line 423) | def get_dataloaders(
  function get_lr (line 436) | def get_lr(learning_rate: float, it: int, warmup_iters: int, max_iters: ...
  function initialize_weights (line 450) | def initialize_weights(fabric: L.Fabric, model: GPT, n_layer: int, n_emb...
  function init_out_dir (line 472) | def init_out_dir(out_dir: Path) -> Path:
  function save_checkpoint (line 478) | def save_checkpoint(fabric, state, tokenizer_dir, checkpoint_file):
  function validate_args (line 490) | def validate_args(train: TrainArgs, eval: EvalArgs, initial_checkpoint_d...
  function jit (line 508) | def jit(fn: Callable, executors: List[str]) -> Any:

FILE: extensions/thunder/strategies/thunder_ddp.py
  class ThunderDDPStrategy (line 36) | class ThunderDDPStrategy(ParallelStrategy):
    method __init__ (line 37) | def __init__(
    method root_device (line 82) | def root_device(self) -> torch.device:
    method num_nodes (line 87) | def num_nodes(self) -> int:
    method num_nodes (line 91) | def num_nodes(self, num_nodes: int) -> None:
    method num_processes (line 96) | def num_processes(self) -> int:
    method distributed_sampler_kwargs (line 101) | def distributed_sampler_kwargs(self) -> Dict[str, Any]:
    method _configure_launcher (line 105) | def _configure_launcher(self) -> None:
    method process_group_backend (line 111) | def process_group_backend(self) -> Optional[str]:
    method _configure_launcher (line 115) | def _configure_launcher(self) -> None:
    method setup_environment (line 120) | def setup_environment(self) -> None:
    method setup_module (line 125) | def setup_module(self, module: Module) -> Module:
    method module_to_device (line 148) | def module_to_device(self, module: Module) -> None:
    method all_reduce (line 152) | def all_reduce(
    method barrier (line 160) | def barrier(self, *args: Any, **kwargs: Any) -> None:
    method broadcast (line 169) | def broadcast(self, obj: TBroadcast, src: int = 0) -> TBroadcast:
    method _setup_distributed (line 177) | def _setup_distributed(self) -> None:
    method _get_process_group_backend (line 183) | def _get_process_group_backend(self) -> str:
    method _set_world_ranks (line 186) | def _set_world_ranks(self) -> None:
  class _ThunderDataParalellBackwardSyncControl (line 195) | class _ThunderDataParalellBackwardSyncControl(_BackwardSyncControl):
    method __init__ (line 196) | def __init__(self):
    method no_backward_sync (line 200) | def no_backward_sync(self, module: Module, enabled: bool) -> ContextMa...
  class _SyncGradsContextManager (line 246) | class _SyncGradsContextManager:
    method __init__ (line 247) | def __init__(self, module: Module) -> None:
    method __enter__ (line 251) | def __enter__(self) -> None:
    method __exit__ (line 257) | def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> N...

FILE: extensions/thunder/strategies/thunder_fsdp.py
  class ThunderFSDPStrategy (line 46) | class ThunderFSDPStrategy(ParallelStrategy, _Sharded):
    method __init__ (line 47) | def __init__(
    method root_device (line 129) | def root_device(self) -> torch.device:
    method num_nodes (line 134) | def num_nodes(self) -> int:
    method num_processes (line 138) | def num_processes(self) -> int:
    method distributed_sampler_kwargs (line 143) | def distributed_sampler_kwargs(self) -> Dict[str, Any]:
    method _configure_launcher (line 147) | def _configure_launcher(self) -> None:
    method setup_environment (line 153) | def setup_environment(self) -> None:
    method setup_module (line 158) | def setup_module(self, module: Module) -> Module:
    method module_to_device (line 193) | def module_to_device(self, module: Module) -> None:
    method module_init_context (line 197) | def module_init_context(self, empty_init: Optional[bool] = None) -> Co...
    method module_sharded_context (line 209) | def module_sharded_context(self) -> ContextManager:
    method all_reduce (line 213) | def all_reduce(
    method barrier (line 221) | def barrier(self, *args: Any, **kwargs: Any) -> None:
    method broadcast (line 230) | def broadcast(self, obj: TBroadcast, src: int = 0) -> TBroadcast:
    method clip_gradients_norm (line 239) | def clip_gradients_norm(
    method save_checkpoint (line 250) | def save_checkpoint(
    method load_checkpoint (line 310) | def load_checkpoint(
    method _setup_distributed (line 399) | def _setup_distributed(self) -> None:
    method _set_world_ranks (line 406) | def _set_world_ranks(self) -> None:
  function _is_sharded_checkpoint (line 415) | def _is_sharded_checkpoint(path: Path) -> bool:
  function _is_full_checkpoint (line 420) | def _is_full_checkpoint(path: Path) -> bool:
  function _get_state_dict (line 424) | def _get_state_dict(
  function _unwrap_tom (line 453) | def _unwrap_tom(obj: object) -> object:

FILE: extensions/thunder/unsloth/executor.py
  function unsloth_cross_entropy_meta (line 36) | def unsloth_cross_entropy_meta(logits: TensorProxy, labels: TensorProxy)...
  function unsloth_cross_entropy_backward_impl (line 54) | def unsloth_cross_entropy_backward_impl(dlosses: Tensor, logits: Tensor,...
  function unsloth_cross_entropy_backward_meta (line 59) | def unsloth_cross_entropy_backward_meta(
  function unsloth_cross_entropy_checker (line 70) | def unsloth_cross_entropy_checker(
  function cross_entropy_to_unsloth (line 92) | def cross_entropy_to_unsloth(
  function unsloth_cross_entropy_grad (line 113) | def unsloth_cross_entropy_grad(
  function swiglu (line 158) | def swiglu(e: torch.Tensor, g: torch.Tensor) -> torch.Tensor:
  class ThunderLLaMAMLP (line 162) | class ThunderLLaMAMLP(OriginalLLaMAMLP):
    method forward (line 163) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  function swiglu_forward_meta (line 173) | def swiglu_forward_meta(e: TensorProxy, g: TensorProxy) -> TensorProxy:
  function unsloth_swiglu_backward_meta (line 185) | def unsloth_swiglu_backward_meta(DW: TensorProxy, e: TensorProxy, g: Ten...
  function unsloth_swiglu_backward_fn (line 189) | def unsloth_swiglu_backward_fn(DW: Tensor, e: Tensor, g: Tensor) -> Tupl...
  function swiglu_to_unsloth_checker (line 204) | def swiglu_to_unsloth_checker(e: TensorProxy, g: TensorProxy) -> bool:
  function unsloth_swiglu_grad (line 208) | def unsloth_swiglu_grad(e: TensorProxy, g: TensorProxy) -> TensorProxy:
  function apply_rope_meta (line 231) | def apply_rope_meta(x: TensorProxy, cos: TensorProxy, sin: TensorProxy) ...
  function unsloth_apply_rope_meta (line 240) | def unsloth_apply_rope_meta(
  function unsloth_apply_rope_backward_meta (line 256) | def unsloth_apply_rope_backward_meta(
  function apply_rope_to_unsloth_checker (line 267) | def apply_rope_to_unsloth_checker(x: TensorProxy, cos: TensorProxy, sin:...
  function unsloth_apply_rope_grad (line 271) | def unsloth_apply_rope_grad(x: TensorProxy, cos: TensorProxy, sin: Tenso...

FILE: extensions/thunder/unsloth/kernels/cross_entropy_loss.py
  function _cross_entropy_forward (line 27) | def _cross_entropy_forward(
  function _chunked_cross_entropy_forward (line 83) | def _chunked_cross_entropy_forward(
  function _cross_entropy_backward (line 149) | def _cross_entropy_backward(
  function _cross_entropy_forward_impl (line 204) | def _cross_entropy_forward_impl(logits, labels):
  function _cross_entropy_backward_impl (line 262) | def _cross_entropy_backward_impl(dlosses, logits, logsumexp, labels):

FILE: extensions/thunder/unsloth/kernels/rope_embedding.py
  function _rope_embedding (line 32) | def _rope_embedding(
  function _rope_embedding_forward_impl (line 86) | def _rope_embedding_forward_impl(Q, cos, sin):
  function _rope_embedding_backward_impl (line 126) | def _rope_embedding_backward_impl(dY, cos, sin, n_groups, BLOCK_SIZE, nu...

FILE: extensions/thunder/unsloth/kernels/swiglu.py
  function _fg_kernel (line 25) | def _fg_kernel(
  function swiglu_fg_kernel (line 52) | def swiglu_fg_kernel(e, g):
  function _DWf_DW_dfg_kernel (line 71) | def _DWf_DW_dfg_kernel(
  function swiglu_DWf_DW_dfg_kernel (line 120) | def swiglu_DWf_DW_dfg_kernel(DW, e, g):

FILE: extensions/thunder/unsloth/kernels/utils.py
  function calculate_settings (line 25) | def calculate_settings(n):

FILE: extensions/xla/finetune/adapter.py
  function setup (line 54) | def setup(
  function main (line 76) | def main(fabric: L.Fabric, data_dir: Path, checkpoint_dir: Path, out_dir...
  function train (line 122) | def train(
  function validate (line 222) | def validate(
  function get_batch (line 254) | def get_batch(fabric: L.Fabric, data: List[Dict], longest_seq_length: in...
  function get_longest_seq_length (line 272) | def get_longest_seq_length(data: List[Dict]) -> int:
  function save_adapter_checkpoint (line 277) | def save_adapter_checkpoint(fabric: L.Fabric, model: torch.nn.Module, fi...

FILE: extensions/xla/generate/adapter.py
  function setup (line 25) | def setup(
  function main (line 60) | def main(

FILE: extensions/xla/generate/base.py
  function generate (line 27) | def generate(
  function setup (line 97) | def setup(
  function main (line 125) | def main(

FILE: extensions/xla/scripts/prepare_alpaca.py
  function prepare (line 19) | def prepare(
  function download_if_missing (line 86) | def download_if_missing(file_path: Path, file_url: str) -> None:
  function prepare_sample (line 99) | def prepare_sample(example: dict, tokenizer: Tokenizer, max_length: int,...
  function generate_prompt (line 129) | def generate_prompt(example: dict) -> str:

FILE: extensions/xla/utils.py
  function rank_print (line 16) | def rank_print(fabric: L.Fabric, message: object, *, flush: bool = True,...
  function materialize_parameters (line 25) | def materialize_parameters(module: torch.nn.Module, device: torch.device...
  function sequential_load_and_fsdp_wrap (line 34) | def sequential_load_and_fsdp_wrap(

FILE: litgpt/__main__.py
  function _check_commands (line 57) | def _check_commands():
  function main (line 63) | def main() -> None:

FILE: litgpt/adapter.py
  class Config (line 25) | class Config(BaseConfig):
  class GPT (line 30) | class GPT(BaseModel):
    method __init__ (line 32) | def __init__(self, config: Config) -> None:
    method from_name (line 49) | def from_name(cls, name: str, **kwargs: Any) -> Self:
    method _init_weights (line 52) | def _init_weights(self, module: nn.Module) -> None:
  class Block (line 59) | class Block(BaseBlock):
    method __init__ (line 60) | def __init__(self, config: Config, block_idx: int) -> None:
  class CausalSelfAttention (line 65) | class CausalSelfAttention(BaseCausalSelfAttention):
    method __init__ (line 69) | def __init__(self, config: Config, block_idx: int) -> None:
    method scaled_dot_product_attention (line 79) | def scaled_dot_product_attention(
    method reset_parameters (line 111) | def reset_parameters(self) -> None:
    method _load_from_state_dict (line 115) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  function mark_only_adapter_as_trainable (line 122) | def mark_only_adapter_as_trainable(model: GPT) -> None:
  function adapter_filter (line 128) | def adapter_filter(key: str, value: Any) -> bool:

FILE: litgpt/adapter_v2.py
  class Config (line 28) | class Config(BaseConfig):
    method mlp_class (line 30) | def mlp_class(self) -> Type:
  function adapter_filter (line 34) | def adapter_filter(key: str, value: Any) -> bool:
  class AdapterV2Linear (line 50) | class AdapterV2Linear(torch.nn.Module):
    method __init__ (line 51) | def __init__(self, in_features: int, out_features: int, **kwargs) -> N...
    method forward (line 57) | def forward(self, x: torch.Tensor) -> torch.Tensor:
    method reset_parameters (line 60) | def reset_parameters(self) -> None:
  class GPT (line 65) | class GPT(BaseModel):
    method __init__ (line 67) | def __init__(self, config: Config) -> None:
    method from_name (line 84) | def from_name(cls, name: str, **kwargs: Any) -> Self:
    method _init_weights (line 87) | def _init_weights(self, module: nn.Module) -> None:
    method _load_from_state_dict (line 93) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  class Block (line 100) | class Block(BaseBlock):
    method __init__ (line 101) | def __init__(self, config: Config, block_idx: int) -> None:
  class CausalSelfAttention (line 107) | class CausalSelfAttention(BaseCausalSelfAttention):
    method __init__ (line 111) | def __init__(self, config: Config, block_idx: int) -> None:
    method _load_from_state_dict (line 119) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  class GptNeoxMLP (line 141) | class GptNeoxMLP(litgpt.model.GptNeoxMLP):
    method __init__ (line 142) | def __init__(self, config: Config) -> None:
    method _load_from_state_dict (line 148) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  class LLaMAMLP (line 160) | class LLaMAMLP(litgpt.model.LLaMAMLP):
    method __init__ (line 161) | def __init__(self, config: Config, intermediate_size: Optional[int] = ...
    method _load_from_state_dict (line 169) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  class GemmaMLP (line 183) | class GemmaMLP(LLaMAMLP):
    method forward (line 184) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class LLaMAMoE (line 191) | class LLaMAMoE(litgpt.model.LLaMAMoE):
    method __init__ (line 192) | def __init__(self, config: Config) -> None:
    method _load_from_state_dict (line 200) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  function mark_only_adapter_v2_as_trainable (line 207) | def mark_only_adapter_v2_as_trainable(model: GPT) -> None:

FILE: litgpt/api.py
  class LLM (line 37) | class LLM(torch.nn.Module):
    method __init__ (line 38) | def __init__(
    method tokenizer (line 76) | def tokenizer(self):
    method state_dict (line 79) | def state_dict(self, destination=None, prefix="", keep_vars=False):
    method load_state_dict (line 82) | def load_state_dict(self, state_dict, strict=True):
    method forward (line 85) | def forward(
    method trainer_setup (line 100) | def trainer_setup(self, trainer_ckpt: Optional[Path] = None) -> None:
    method save (line 126) | def save(self, out_dir: Optional[Path] = None, prompt_style: Optional[...
    method load (line 148) | def load(
    method distribute (line 256) | def distribute(
    method generate (line 461) | def generate(
    method _text_to_token_ids (line 570) | def _text_to_token_ids(self, prompt: str, sys_prompt: Optional[str] = ...
    method benchmark (line 576) | def benchmark(self, num_iterations=1, **kwargs):
  class Preprocessor (line 619) | class Preprocessor:
    method __init__ (line 624) | def __init__(self, tokenizer: Tokenizer, device: str = "cpu") -> None:
    method encode (line 628) | def encode(self, text: str) -> torch.Tensor:
    method decode (line 631) | def decode(self, token_ids: torch.Tensor) -> str:
  function calculate_number_of_devices (line 635) | def calculate_number_of_devices(devices):
  function benchmark_dict_to_markdown_table (line 643) | def benchmark_dict_to_markdown_table(data):
  function pull_request_benchmark_util (line 666) | def pull_request_benchmark_util(model_name="microsoft/phi-2", num_iterat...

FILE: litgpt/args.py
  class TrainArgs (line 9) | class TrainArgs:
    method __post_init__ (line 42) | def __post_init__(self) -> None:
    method gradient_accumulation_iters (line 57) | def gradient_accumulation_iters(self, devices: int, num_nodes: int = 1...
    method batch_size (line 63) | def batch_size(self, devices: int, num_nodes: int = 1) -> int:
    method warmup_iters (line 69) | def warmup_iters(self, devices: int, num_nodes: int, max_iters: int, t...
  class EvalArgs (line 79) | class EvalArgs:
  class LogArgs (line 98) | class LogArgs:

FILE: litgpt/chat/base.py
  function generate (line 28) | def generate(
  function process_prompt (line 77) | def process_prompt(
  function interact (line 123) | def interact(multiline, model, tokenizer, prompt_style, fabric, temperat...
  function main (line 151) | def main(

FILE: litgpt/config.py
  function find_multiple (line 12) | def find_multiple(n: int, k: int) -> int:
  class Config (line 26) | class Config:
    method __post_init__ (line 118) | def __post_init__(self):
    method from_name (line 186) | def from_name(cls, name: str, **kwargs: Any) -> Optional[Self]:
    method from_file (line 206) | def from_file(cls, path: Union[str, Path], **kwargs: Any) -> Self:
    method from_checkpoint (line 215) | def from_checkpoint(cls, path: Path, **kwargs: Any) -> Self:
    method mlp_class (line 224) | def mlp_class(self) -> Type:
    method norm_class (line 231) | def norm_class(self) -> Type:
  function check_indicator_and_length (line 252) | def check_indicator_and_length(

FILE: litgpt/data/alpaca.py
  class Alpaca (line 21) | class Alpaca(DataModule):
    method __post_init__ (line 49) | def __post_init__(self) -> None:
    method connect (line 54) | def connect(
    method prepare_data (line 61) | def prepare_data(self) -> None:
    method setup (line 65) | def setup(self, stage: str = "") -> None:
    method train_dataloader (line 94) | def train_dataloader(self) -> DataLoader:
    method val_dataloader (line 104) | def val_dataloader(self) -> DataLoader:
  function download_if_missing (line 114) | def download_if_missing(file_path: Path, file_url: str, mode: str = "w",...

FILE: litgpt/data/alpaca_2k.py
  class Alpaca2k (line 12) | class Alpaca2k(Alpaca):
    method prepare_data (line 24) | def prepare_data(self) -> None:
    method setup (line 29) | def setup(self, stage: str = "") -> None:

FILE: litgpt/data/alpaca_gpt4.py
  class AlpacaGPT4 (line 13) | class AlpacaGPT4(Alpaca):

FILE: litgpt/data/base.py
  class DataModule (line 15) | class DataModule(LightningDataModule):
    method connect (line 19) | def connect(
    method setup (line 30) | def setup(self, stage: str = "") -> None:
    method __repr__ (line 34) | def __repr__(self) -> str:
  class SFTDataset (line 38) | class SFTDataset(Dataset):
    method __init__ (line 58) | def __init__(
    method __len__ (line 78) | def __len__(self) -> int:
    method __getitem__ (line 81) | def __getitem__(self, idx: int) -> Dict[str, Union[Tensor, Dict[str, i...
  function get_sft_collate_fn (line 111) | def get_sft_collate_fn(max_seq_length: int = -1, pad_id: int = 0, ignore...
  function _sft_collate_fn (line 121) | def _sft_collate_fn(

FILE: litgpt/data/deita.py
  class Deita (line 17) | class Deita(DataModule):
    method __post_init__ (line 43) | def __post_init__(self) -> None:
    method connect (line 48) | def connect(
    method prepare_data (line 55) | def prepare_data(self) -> None:
    method setup (line 60) | def setup(self, stage: str = "") -> None:
    method train_dataloader (line 84) | def train_dataloader(self) -> DataLoader:
    method val_dataloader (line 94) | def val_dataloader(self) -> DataLoader:
  function format_dataset (line 104) | def format_dataset(dataset: List[dict], include_multi_turn_conversations...

FILE: litgpt/data/flan.py
  class FLAN (line 22) | class FLAN(DataModule):
    method __post_init__ (line 48) | def __post_init__(self):
    method connect (line 62) | def connect(
    method prepare_data (line 69) | def prepare_data(self) -> None:
    method train_dataloader (line 77) | def train_dataloader(self):
    method val_dataloader (line 80) | def val_dataloader(self):
    method _dataloader (line 83) | def _dataloader(self, split: str) -> DataLoader:
  function load_jsonl (line 108) | def load_jsonl(filename: Path) -> List[Dict[str, str]]:
  function _transform (line 116) | def _transform(item: dict) -> dict:
  function _supported_subsets (line 122) | def _supported_subsets() -> Set[str]:

FILE: litgpt/data/json_data.py
  class JSON (line 18) | class JSON(DataModule):
    method __post_init__ (line 45) | def __post_init__(self):
    method connect (line 69) | def connect(
    method setup (line 76) | def setup(self, stage: str = "") -> None:
    method train_dataloader (line 96) | def train_dataloader(self) -> DataLoader:
    method val_dataloader (line 106) | def val_dataloader(self) -> DataLoader:
    method get_splits (line 115) | def get_splits(self) -> Tuple:
    method find_split (line 138) | def find_split(self, split_name: str) -> Optional[Path]:
  function load_split (line 145) | def load_split(json_path: Path) -> Any:

FILE: litgpt/data/lima.py
  class LIMA (line 17) | class LIMA(DataModule):
    method __post_init__ (line 46) | def __post_init__(self):
    method connect (line 57) | def connect(
    method prepare_data (line 64) | def prepare_data(self) -> None:
    method setup (line 69) | def setup(self, stage: str = "") -> None:
    method train_dataloader (line 100) | def train_dataloader(self) -> DataLoader:
    method val_dataloader (line 110) | def val_dataloader(self) -> DataLoader:
  function format_dataset (line 120) | def format_dataset(dataset_partition: dict, include_multi_turn_conversat...

FILE: litgpt/data/lit_data.py
  class LitData (line 14) | class LitData(DataModule):
    method __post_init__ (line 33) | def __post_init__(self) -> None:
    method connect (line 38) | def connect(
    method train_dataloader (line 44) | def train_dataloader(self) -> DataLoader:
    method val_dataloader (line 48) | def val_dataloader(self) -> DataLoader:
    method _dataloader (line 52) | def _dataloader(self, input_dir: str, train: bool):

FILE: litgpt/data/longform.py
  class LongForm (line 20) | class LongForm(DataModule):
    method __post_init__ (line 42) | def __post_init__(self) -> None:
    method connect (line 47) | def connect(
    method prepare_data (line 54) | def prepare_data(self) -> None:
    method train_dataloader (line 59) | def train_dataloader(self):
    method val_dataloader (line 62) | def val_dataloader(self):
    method _dataloader (line 65) | def _dataloader(self, split: str) -> DataLoader:
  function _transform (line 88) | def _transform(item: dict) -> dict:

FILE: litgpt/data/microllama.py
  class MicroLlama (line 10) | class MicroLlama(TinyLlama):
    method __init__ (line 13) | def __init__(self, data_path: Union[str, Path] = Path("data/"), seed: ...

FILE: litgpt/data/openwebtext.py
  class OpenWebText (line 15) | class OpenWebText(DataModule):
    method __post_init__ (line 32) | def __post_init__(self) -> None:
    method connect (line 38) | def connect(
    method prepare_data (line 45) | def prepare_data(self) -> None:
    method train_dataloader (line 83) | def train_dataloader(self) -> DataLoader:
    method val_dataloader (line 96) | def val_dataloader(self) -> DataLoader:

FILE: litgpt/data/prepare_slimpajama.py
  class SlimPajamaDataRecipe (line 13) | class SlimPajamaDataRecipe(DataChunkRecipe):
    method __init__ (line 16) | def __init__(self, tokenizer: Tokenizer, chunk_size: int):
    method prepare_structure (line 20) | def prepare_structure(self, input_dir):
    method prepare_item (line 24) | def prepare_item(self, filepath):
  function prepare (line 36) | def prepare(

FILE: litgpt/data/prepare_starcoder.py
  class StarcoderDataRecipe (line 18) | class StarcoderDataRecipe(DataChunkRecipe):
    method __init__ (line 21) | def __init__(self, tokenizer: Tokenizer, chunk_size: int):
    method prepare_structure (line 25) | def prepare_structure(self, input_dir):
    method prepare_item (line 29) | def prepare_item(self, item_metadata):
  function prepare (line 52) | def prepare(

FILE: litgpt/data/text_files.py
  class TextFiles (line 16) | class TextFiles(DataModule):
    method __post_init__ (line 39) | def __post_init__(self) -> None:
    method connect (line 47) | def connect(self, tokenizer: Optional[Tokenizer] = None, batch_size: i...
    method prepare_data (line 52) | def prepare_data(self) -> None:
    method train_dataloader (line 108) | def train_dataloader(self) -> DataLoader:
    method val_dataloader (line 122) | def val_dataloader(self) -> DataLoader:
  function tokenize (line 136) | def tokenize(filename: str, tokenizer: Tokenizer):
  function validate_tokenizer (line 143) | def validate_tokenizer(tokenizer: Tokenizer) -> None:

FILE: litgpt/data/tinyllama.py
  class TinyLlama (line 13) | class TinyLlama(DataModule):
    method __post_init__ (line 33) | def __post_init__(self):
    method connect (line 44) | def connect(
    method prepare_data (line 50) | def prepare_data(self) -> None:
    method train_dataloader (line 59) | def train_dataloader(self) -> DataLoader:
    method val_dataloader (line 92) | def val_dataloader(self) -> DataLoader:

FILE: litgpt/data/tinystories.py
  class TinyStories (line 20) | class TinyStories(DataModule):
    method __post_init__ (line 38) | def __post_init__(self) -> None:
    method connect (line 43) | def connect(self, tokenizer: Optional[Tokenizer] = None, batch_size: i...
    method prepare_data (line 48) | def prepare_data(self) -> None:
    method train_dataloader (line 81) | def train_dataloader(self) -> DataLoader:
    method val_dataloader (line 94) | def val_dataloader(self) -> DataLoader:
  function tokenize (line 108) | def tokenize(filename: str, tokenizer: Tokenizer):
  function download (line 124) | def download(data_dir: Path):

FILE: litgpt/deploy/serve.py
  class BaseLitAPI (line 21) | class BaseLitAPI(LitAPI):
    method __init__ (line 22) | def __init__(
    method setup (line 50) | def setup(self, device: str) -> None:
    method decode_request (line 71) | def decode_request(self, request: Dict[str, Any]) -> Any:
  class SimpleLitAPI (line 76) | class SimpleLitAPI(BaseLitAPI):
    method __init__ (line 77) | def __init__(
    method setup (line 103) | def setup(self, device: str):
    method predict (line 106) | def predict(self, inputs: str) -> Any:
    method encode_response (line 116) | def encode_response(self, output: str) -> Dict[str, Any]:
  class StreamLitAPI (line 121) | class StreamLitAPI(BaseLitAPI):
    method __init__ (line 122) | def __init__(
    method setup (line 148) | def setup(self, device: str):
    method predict (line 151) | def predict(self, inputs: torch.Tensor) -> Any:
    method encode_response (line 161) | def encode_response(self, output):
  class OpenAISpecLitAPI (line 166) | class OpenAISpecLitAPI(BaseLitAPI):
    method __init__ (line 167) | def __init__(
    method setup (line 193) | def setup(self, device: str):
    method decode_request (line 213) | def decode_request(self, request: "ChatCompletionRequest") -> Any:
    method predict (line 217) | def predict(self, inputs: str, context: dict) -> Any:
  function run_server (line 234) | def run_server(

FILE: litgpt/eval/evaluate.py
  function prepare_results (line 15) | def prepare_results(results, save_filepath, print_results=True):
  function convert_and_evaluate (line 27) | def convert_and_evaluate(

FILE: litgpt/finetune/adapter.py
  function setup (line 48) | def setup(
  function main (line 151) | def main(
  function fit (line 244) | def fit(
  function validate (line 391) | def validate(
  function generate_example (line 412) | def generate_example(fabric: L.Fabric, model: GPT, tokenizer: Tokenizer,...
  function get_lr_scheduler (line 444) | def get_lr_scheduler(optimizer, warmup_steps: int, max_steps: int):
  function get_dataloaders (line 451) | def get_dataloaders(
  function get_longest_seq_length (line 464) | def get_longest_seq_length(data: List[Dict]) -> Tuple[int, int]:
  function save_adapter_checkpoint (line 472) | def save_adapter_checkpoint(fabric: L.Fabric, model: torch.nn.Module, fi...
  function validate_args (line 477) | def validate_args(train: TrainArgs, eval: EvalArgs) -> None:

FILE: litgpt/finetune/adapter_v2.py
  function setup (line 49) | def setup(
  function main (line 153) | def main(
  function fit (line 261) | def fit(
  function validate (line 418) | def validate(
  function generate_example (line 439) | def generate_example(fabric: L.Fabric, model: GPT, tokenizer: Tokenizer,...
  function get_lr_scheduler (line 467) | def get_lr_scheduler(optimizer, warmup_steps: int, max_steps: int):
  function get_dataloaders (line 474) | def get_dataloaders(
  function get_longest_seq_length (line 487) | def get_longest_seq_length(data: List[Dict]) -> Tuple[int, int]:
  function save_adapter_v2_checkpoint (line 495) | def save_adapter_v2_checkpoint(fabric: L.Fabric, model: torch.nn.Module,...
  function validate_args (line 500) | def validate_args(train: TrainArgs, eval: EvalArgs) -> None:

FILE: litgpt/finetune/full.py
  function setup (line 44) | def setup(
  function main (line 126) | def main(
  function fit (line 209) | def fit(
  function validate (line 363) | def validate(
  function generate_example (line 383) | def generate_example(fabric: L.Fabric, model: GPT, tokenizer: Tokenizer,...
  function get_lr_scheduler (line 415) | def get_lr_scheduler(optimizer, warmup_steps: int, max_steps: int):
  function get_dataloaders (line 422) | def get_dataloaders(
  function get_longest_seq_length (line 435) | def get_longest_seq_length(data: List[Dict]) -> Tuple[int, int]:
  function validate_args (line 443) | def validate_args(train: TrainArgs, eval: EvalArgs) -> None:

FILE: litgpt/finetune/lora.py
  function setup (line 49) | def setup(
  function main (line 183) | def main(
  function fit (line 285) | def fit(
  function validate (line 440) | def validate(
  function generate_example (line 461) | def generate_example(fabric: L.Fabric, model: GPT, tokenizer: Tokenizer,...
  function get_lr_scheduler (line 490) | def get_lr_scheduler(optimizer, warmup_steps: int, max_steps: int):
  function get_dataloaders (line 497) | def get_dataloaders(
  function get_longest_seq_length (line 510) | def get_longest_seq_length(data: List[Dict]) -> Tuple[int, int]:
  function parallelize_fn (line 518) | def parallelize_fn(model, device_mesh, activation_checkpointing=True):
  function save_lora_checkpoint (line 542) | def save_lora_checkpoint(fabric: L.Fabric, model: torch.nn.Module, file_...
  function validate_args (line 559) | def validate_args(train: TrainArgs, eval: EvalArgs) -> None:

FILE: litgpt/finetune/lora_legacy.py
  function setup (line 49) | def setup(
  function main (line 183) | def main(
  function fit (line 278) | def fit(
  function validate (line 425) | def validate(
  function generate_example (line 446) | def generate_example(fabric: L.Fabric, model: GPT, tokenizer: Tokenizer,...
  function get_lr_scheduler (line 475) | def get_lr_scheduler(optimizer, warmup_steps: int, max_steps: int):
  function get_dataloaders (line 482) | def get_dataloaders(
  function get_longest_seq_length (line 495) | def get_longest_seq_length(data: List[Dict]) -> Tuple[int, int]:
  function save_lora_checkpoint (line 503) | def save_lora_checkpoint(fabric: L.Fabric, model: torch.nn.Module, file_...
  function validate_args (line 508) | def validate_args(train: TrainArgs, eval: EvalArgs) -> None:

FILE: litgpt/generate/adapter.py
  function main (line 28) | def main(

FILE: litgpt/generate/adapter_v2.py
  function main (line 28) | def main(

FILE: litgpt/generate/base.py
  function multinomial_num_samples_1 (line 30) | def multinomial_num_samples_1(probs: torch.Tensor) -> torch.Tensor:
  function sample_top_p (line 38) | def sample_top_p(logits: torch.Tensor, top_p: float) -> torch.Tensor:
  function sample (line 53) | def sample(
  function next_token (line 76) | def next_token(
  function batched_sample (line 88) | def batched_sample(logits: list[torch.Tensor], kwargs: list[dict]) -> to...
  function batched_next_token (line 95) | def batched_next_token(
  function generate_fn (line 130) | def generate_fn(
  function batched_generate_fn (line 241) | def batched_generate_fn(
  function generate (line 374) | def generate(
  function main (line 431) | def main(

FILE: litgpt/generate/full.py
  function main (line 27) | def main(

FILE: litgpt/generate/sequentially.py
  function sequential (line 36) | def sequential(model: GPT, root: torch.device, max_seq_length: int, devi...
  function chunk_sizes (line 96) | def chunk_sizes(num_units: int, devices: int) -> List[int]:
  function layer_to_device (line 102) | def layer_to_device(
  function move_block_input (line 117) | def move_block_input(device: torch.device, module: torch.nn.Module, ins):
  function move_block_output (line 123) | def move_block_output(device: torch.device, module: torch.nn.Module, ins...
  function replace_device (line 128) | def replace_device(module: torch.nn.Module, replace: torch.device, by: t...
  function main (line 146) | def main(

FILE: litgpt/generate/speculative_decoding.py
  function sample (line 32) | def sample(
  function speculative_decoding (line 60) | def speculative_decoding(
  function generate (line 172) | def generate(
  function setup_model (line 306) | def setup_model(config: Config, max_returned_tokens: int, fabric: L.Fabr...
  function load_model (line 319) | def load_model(checkpoint_dir: Path, fabric: L.Fabric) -> Tuple[Config, ...
  function main (line 329) | def main(

FILE: litgpt/generate/tp.py
  function tensor_parallel_linear (line 33) | def tensor_parallel_linear(fabric: L.Fabric, linear: torch.nn.Linear, st...
  function tensor_parallel_mlp (line 53) | def tensor_parallel_mlp(fabric: L.Fabric, mlp: Union[GptNeoxMLP, LLaMAML...
  function tensor_parallel_attn (line 72) | def tensor_parallel_attn(fabric: L.Fabric, attn: CausalSelfAttention) ->...
  function all_reduce_output (line 78) | def all_reduce_output(world_size: int, module: torch.nn.Module, ins, out...
  function tensor_parallel (line 84) | def tensor_parallel(fabric: L.Fabric, model: GPT) -> GPT:
  function main (line 103) | def main(

FILE: litgpt/lora.py
  class LoRALayer (line 64) | class LoRALayer(nn.Module):
    method __init__ (line 65) | def __init__(self, r: int, lora_alpha: int, lora_dropout: float):
  class LoRALinear (line 89) | class LoRALinear(LoRALayer):
    method __init__ (line 91) | def __init__(
    method reset_parameters (line 130) | def reset_parameters(self) -> None:
    method get_lora_AB (line 138) | def get_lora_AB(self) -> torch.Tensor:
    method merge (line 142) | def merge(self) -> None:
    method forward (line 165) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class LoRAQKVLinear (line 175) | class LoRAQKVLinear(LoRALinear):
    method __init__ (line 177) | def __init__(
    method lora_ind (line 265) | def lora_ind(self) -> torch.Tensor:
    method zero_pad (line 285) | def zero_pad(self, x: torch.Tensor) -> torch.Tensor:
    method conv1d (line 325) | def conv1d(self, input: torch.Tensor, weight: torch.Tensor) -> torch.T...
    method get_lora_AB (line 361) | def get_lora_AB(self) -> torch.Tensor:
    method merge (line 373) | def merge(self) -> None:
    method forward (line 378) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  function mark_only_lora_as_trainable (line 414) | def mark_only_lora_as_trainable(model: nn.Module, bias: str = "none") ->...
  function lora_filter (line 447) | def lora_filter(key: str, value: Any) -> bool:
  class Config (line 452) | class Config(BaseConfig):
    method mlp_class (line 475) | def mlp_class(self) -> Type:
  class GPT (line 479) | class GPT(BaseModel):
    method __init__ (line 481) | def __init__(self, config: Config) -> None:
    method from_name (line 504) | def from_name(cls, name: str, **kwargs: Any) -> Self:
    method _init_weights (line 507) | def _init_weights(self, module: nn.Module) -> None:
    method _load_from_state_dict (line 513) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  class Block (line 520) | class Block(BaseBlock):
    method __init__ (line 521) | def __init__(self, config: Config, block_idx: int) -> None:
  class CausalSelfAttention (line 527) | class CausalSelfAttention(BaseCausalSelfAttention):
    method __init__ (line 528) | def __init__(self, config: Config, block_idx: int) -> None:
    method _load_from_state_dict (line 553) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  function create_lora_linear (line 572) | def create_lora_linear(
  class GptNeoxMLP (line 593) | class GptNeoxMLP(litgpt.model.GptNeoxMLP):
    method __init__ (line 594) | def __init__(self, config: Config) -> None:
    method _load_from_state_dict (line 600) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  class LLaMAMLP (line 612) | class LLaMAMLP(litgpt.model.LLaMAMLP):
    method __init__ (line 613) | def __init__(self, config: Config, intermediate_size: Optional[int] = ...
    method _load_from_state_dict (line 621) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  class GemmaMLP (line 635) | class GemmaMLP(LLaMAMLP):
    method forward (line 636) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class LLaMAMoE (line 643) | class LLaMAMoE(litgpt.model.LLaMAMoE):
    method __init__ (line 644) | def __init__(self, config: Config) -> None:
    method _load_from_state_dict (line 652) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ...
  function merge_lora_weights (line 659) | def merge_lora_weights(model: GPT) -> None:

FILE: litgpt/model.py
  class GPT (line 22) | class GPT(nn.Module):
    method __init__ (line 23) | def __init__(self, config: Config) -> None:
    method max_seq_length (line 40) | def max_seq_length(self) -> int:
    method max_seq_length (line 44) | def max_seq_length(self, value: int) -> None:
    method reset_parameters (line 70) | def reset_parameters(self) -> None:
    method _init_weights (line 74) | def _init_weights(self, module: nn.Module) -> None:
    method forward (line 85) | def forward(
    method from_name (line 184) | def from_name(cls, name: str, **kwargs: Any) -> Self:
    method rope_cache (line 187) | def rope_cache(self, device: Optional[torch.device] = None) -> Tuple[t...
    method rope_cache_length (line 261) | def rope_cache_length(self) -> int:
    method set_kv_cache (line 274) | def set_kv_cache(
    method clear_kv_cache (line 303) | def clear_kv_cache(self) -> None:
  class Block (line 309) | class Block(nn.Module):
    method __init__ (line 310) | def __init__(
    method forward (line 345) | def forward(
  class CausalSelfAttention (line 390) | class CausalSelfAttention(nn.Module):
    method __init__ (line 391) | def __init__(self, config: Config, block_idx: int) -> None:
    method forward (line 430) | def forward(
    method scaled_dot_product_attention (line 576) | def scaled_dot_product_attention(
    method build_kv_cache (line 598) | def build_kv_cache(
    method _load_from_state_dict (line 637) | def _load_from_state_dict(self, state_dict: dict, prefix: str, *args: ...
  class MultiheadLatentAttention (line 649) | class MultiheadLatentAttention(nn.Module):
    method __init__ (line 650) | def __init__(self, config: Config, block_idx: int) -> None:
    method forward (line 685) | def forward(
    method scaled_dot_product_attention (line 763) | def scaled_dot_product_attention(
    method build_kv_cache (line 785) | def build_kv_cache(
  class GptNeoxMLP (line 804) | class GptNeoxMLP(nn.Module):
    method __init__ (line 805) | def __init__(self, config: Config, intermediate_size: Optional[int] = ...
    method forward (line 812) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class LLaMAMLP (line 818) | class LLaMAMLP(nn.Module):
    method __init__ (line 819) | def __init__(self, config: Config, intermediate_size: Optional[int] = ...
    method forward (line 827) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class GemmaMLP (line 834) | class GemmaMLP(LLaMAMLP):
    method forward (line 835) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class LLaMAMoE (line 842) | class LLaMAMoE(nn.Module):
    method __init__ (line 843) | def __init__(self, config: Config) -> None:
    method forward (line 859) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class GroupedTopkRouter (line 888) | class GroupedTopkRouter(nn.Module):
    method __init__ (line 894) | def __init__(self, config: Config) -> None:
    method get_topk_indices (line 901) | def get_topk_indices(self, scores: torch.Tensor) -> torch.Tensor:
    method forward (line 921) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  function yarn_get_mscale (line 933) | def yarn_get_mscale(scale=1, mscale=1):
  function build_rope_cache (line 939) | def build_rope_cache(
  function batched_index_select (line 1075) | def batched_index_select(t, dim, idx):
  function batched_index_copy_ (line 1094) | def batched_index_copy_(t, dim, idx, val):
  function apply_rope (line 1144) | def apply_rope(x: torch.Tensor, cos: torch.Tensor, sin: torch.Tensor) ->...
  function apply_rope_interleave (line 1176) | def apply_rope_interleave(x: torch.Tensor, cos: torch.Tensor, sin: torch...
  function do_softcapping (line 1217) | def do_softcapping(x: torch.Tensor, thresh: float) -> torch.Tensor:
  class KVCache (line 1221) | class KVCache(nn.Module):
    method __init__ (line 1227) | def __init__(
    method forward (line 1243) | def forward(self, input_pos: torch.Tensor, k: torch.Tensor, v: torch.T...
    method reset_parameters (line 1283) | def reset_parameters(self) -> None:
  function build_mask_cache (line 1288) | def build_mask_cache(max_seq_length: int, device: Optional[torch.device]...
  class RMSNorm (line 1293) | class RMSNorm(torch.nn.Module):
    method __init__ (line 1300) | def __init__(self, size: int, dim: int = -1, eps: float = 1e-6, add_un...
    method forward (line 1307) | def forward(self, x: torch.Tensor) -> torch.Tensor:
    method reset_parameters (line 1316) | def reset_parameters(self) -> None:

FILE: litgpt/parser_config.py
  function parser_commands (line 8) | def parser_commands() -> List[str]:
  function save_hyperparameters (line 34) | def save_hyperparameters(

FILE: litgpt/pretrain.py
  function setup (line 49) | def setup(
  function main (line 177) | def main(
  function fit (line 288) | def fit(
  function validate (line 427) | def validate(
  function get_dataloaders (line 451) | def get_dataloaders(
  function get_lr (line 464) | def get_lr(learning_rate: float, it: int, warmup_iters: int, max_iters: ...
  function initialize_weights (line 478) | def initialize_weights(fabric: L.Fabric, model: GPT, n_layer: int, n_emb...
  function save_checkpoint (line 500) | def save_checkpoint(fabric, state, tokenizer_dir, checkpoint_file):
  function validate_args (line 512) | def validate_args(train: TrainArgs, eval: EvalArgs, initial_checkpoint_d...

FILE: litgpt/prompts.py
  class PromptStyle (line 17) | class PromptStyle:
    method apply (line 21) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw...
    method stop_tokens (line 24) | def stop_tokens(self, tokenizer: "Tokenizer") -> Tuple[List[int], ...]:
    method from_name (line 28) | def from_name(cls, name: str) -> "PromptStyle":
    method from_config (line 32) | def from_config(cls, config: Config) -> "PromptStyle":
  class Default (line 36) | class Default(PromptStyle):
    method apply (line 37) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw...
    method stop_tokens (line 40) | def stop_tokens(self, tokenizer: "Tokenizer") -> Tuple[List[int], ...]:
  class Alpaca (line 44) | class Alpaca(PromptStyle):
    method apply (line 45) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw...
  class FLAN (line 60) | class FLAN(PromptStyle):
    method apply (line 61) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw...
  class Longform (line 69) | class Longform(PromptStyle):
    method apply (line 70) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw...
  class StableLMAlpha (line 78) | class StableLMAlpha(PromptStyle):
    method apply (line 79) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw...
    method stop_tokens (line 89) | def stop_tokens(self, tokenizer: "Tokenizer") -> Tuple[List[int], ...]:
  class StableLMZephyr (line 98) | class StableLMZephyr(PromptStyle):
    method apply (line 99) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw...
  class Falcon (line 103) | class Falcon(PromptStyle):
    method apply (line 104) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw...
    method stop_tokens (line 107) | def stop_tokens(self, tokenizer: "Tokenizer") -> Tuple[List[int], ...]:
  class Falcon3 (line 117) | class Falcon3(PromptStyle):
    method apply (line 118) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw...
    method stop_tokens (line 121) | def stop_tokens(self, tokenizer: "Tokenizer") -> Tuple[List[int], ...]:
  class Llama2FunctionCalling (line 128) | class Llama2FunctionCalling(PromptStyle):
    method apply (line 129) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw...
  class Llama2 (line 155) | class Llama2(PromptStyle):
    method apply (line 156) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw...
  class Llama3 (line 170) | class Llama3(PromptStyle):
    method apply (line 171) | def apply(
    method stop_tokens (line 216) | def stop_tokens(self, tokenizer: "Tokenizer") -> Tuple[List[int], ...]:
  class R1Base (line 223) | class R1Base(PromptStyle):
    method apply (line 224) | def apply(
    method stop_tokens (line 265) | def stop_tokens(self, tokenizer: "Tokenizer") -> Tuple[List[int], ...]:
  class FreeWilly2 (line 272) | class FreeWilly2(PromptStyle):
    method apply (line 273) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw...
  class Platypus (line 278) | class Platypus(PromptStyle):
    method apply (line 279) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw...
  class StableCode (line 283) | class StableCode(PromptStyle):
    method apply (line 284) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw...
  class CodeLlama (line 288) | class CodeLlama(PromptStyle):
    method apply (line 289) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw...
  class Phi1 (line 300) | class Phi1(PromptStyle):
    method apply (line 301) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw...
    method stop_tokens (line 304) | def stop_tokens(self, tokenizer: "Tokenizer") -> Tuple[List[int], ...]:
  class Phi2 (line 315) | class Phi2(PromptStyle):
    method apply (line 316) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw...
  class Phi3 (line 320) | class Phi3(PromptStyle):
    method apply (line 321) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw...
  class Phi4 (line 326) | class Phi4(PromptStyle):
    method apply (line 327) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw...
  class Phi4Reasoning (line 335) | class Phi4Reasoning(PromptStyle):
    method apply (line 336) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw...
  class Phi4Mini (line 344) | class Phi4Mini(PromptStyle):
    method apply (line 345) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw...
  class Phi4MiniReasoning (line 353) | class Phi4MiniReasoning(PromptStyle):
    method apply (line 354) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw...
  class TinyLlama (line 359) | class TinyLlama(PromptStyle):
    method apply (line 360) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw...
  class Gemma (line 365) | class Gemma(PromptStyle):
    method apply (line 366) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw...
  class OLMo (line 370) | class OLMo(PromptStyle):
    method apply (line 371) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw...
  class ChatML (line 375) | class ChatML(PromptStyle):
    method __init__ (line 376) | def __init__(self, system_message: Optional[str] = None):
    method apply (line 379) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw...
  class Qwen2_5 (line 386) | class Qwen2_5(ChatML):
    method __init__ (line 387) | def __init__(self):
  class Qwen2_5_Math (line 391) | class Qwen2_5_Math(ChatML):
    method __init__ (line 392) | def __init__(self):
  class QwQ (line 396) | class QwQ(ChatML):
    method __init__ (line 397) | def __init__(self):
  class Qwen3 (line 403) | class Qwen3(ChatML):
    method __init__ (line 404) | def __init__(self):
  class SmolLM2 (line 408) | class SmolLM2(ChatML):
    method __init__ (line 409) | def __init__(self):
  class Salamandra (line 413) | class Salamandra(ChatML):
    method __init__ (line 414) | def __init__(self):
  function model_name_to_prompt_style (line 456) | def model_name_to_prompt_style(model_name: str) -> PromptStyle:
  function save_prompt_style (line 520) | def save_prompt_style(style: Union[str, PromptStyle], checkpoint_dir: Pa...
  function load_prompt_style (line 529) | def load_prompt_style(checkpoint_dir: Path) -> PromptStyle:
  function has_prompt_style (line 539) | def has_prompt_style(checkpoint_dir: Path) -> bool:

FILE: litgpt/scripts/convert_hf_checkpoint.py
  function copy_weights_gpt_neox (line 28) | def copy_weights_gpt_neox(
  function copy_weights_falcon (line 81) | def copy_weights_falcon(
  function copy_weights_hf_llama (line 139) | def copy_weights_hf_llama(
  function copy_weights_gemma_2 (line 226) | def copy_weights_gemma_2(
  function copy_weights_gemma_3 (line 294) | def copy_weights_gemma_3(
  function copy_weights_phi (line 397) | def copy_weights_phi(
  function copy_weights_qwen_2_5 (line 493) | def copy_weights_qwen_2_5(
  function copy_weights_olmo2 (line 563) | def copy_weights_olmo2(
  function copy_weights_qwen_3 (line 642) | def copy_weights_qwen_3(
  function qkv_reassemble (line 727) | def qkv_reassemble(
  function layer_template (line 748) | def layer_template(layer_name: str, num_matches: int = 1) -> Tuple[str, ...
  function load_param (line 756) | def load_param(
  function convert_hf_checkpoint (line 772) | def convert_hf_checkpoint(

FILE: litgpt/scripts/convert_lit_checkpoint.py
  function copy_weights_falcon (line 18) | def copy_weights_falcon(
  function copy_weights_gpt_neox (line 66) | def copy_weights_gpt_neox(
  function copy_weights_llama (line 103) | def copy_weights_llama(
  function copy_weights_gemma_2 (line 169) | def copy_weights_gemma_2(
  function copy_weights_gemma_3 (line 218) | def copy_weights_gemma_3(
  function copy_weights_phi (line 269) | def copy_weights_phi(
  function copy_weights_qwen_2_5 (line 348) | def copy_weights_qwen_2_5(
  function copy_weights_olmo2 (line 396) | def copy_weights_olmo2(
  function copy_weights_qwen_3 (line 454) | def copy_weights_qwen_3(
  function qkv_reassemble (line 520) | def qkv_reassemble(param: Union[torch.Tensor, NotYetLoadedTensor], confi...
  function check_conversion_supported (line 538) | def check_conversion_supported(lit_weights: Dict[str, torch.Tensor]) -> ...
  function convert_lit_checkpoint (line 546) | def convert_lit_checkpoint(checkpoint_dir: Path, output_dir: Path) -> None:

FILE: litgpt/scripts/convert_pretrained_checkpoint.py
  function convert_pretrained_checkpoint (line 12) | def convert_pretrained_checkpoint(checkpoint_dir: Path, output_dir: Path...

FILE: litgpt/scripts/download.py
  function download_from_hub (line 14) | def download_from_hub(
  function find_weight_files (line 101) | def find_weight_files(repo_id: str, access_token: Optional[str]) -> Tupl...
  function gated_repo_catcher (line 114) | def gated_repo_catcher(repo_id: str, access_token: Optional[str]):

FILE: litgpt/scripts/merge_lora.py
  function merge_lora (line 17) | def merge_lora(
  function load_lora_metadata (line 86) | def load_lora_metadata(checkpoint_dir: Path) -> Tuple[Dict[str, Any], Pa...

FILE: litgpt/tokenizer.py
  class Tokenizer (line 12) | class Tokenizer:
    method __init__ (line 13) | def __init__(self, checkpoint_dir: Union[Path, str]) -> None:
    method vocab_size (line 73) | def vocab_size(self) -> int:
    method token_to_id (line 80) | def token_to_id(self, token: str) -> int:
    method check_if_bos_token_used (line 91) | def check_if_bos_token_used(self, checkpoint_dir: Path) -> bool:
    method encode (line 108) | def encode(
    method decode (line 144) | def decode(self, tensor: torch.Tensor) -> str:
    method decode_stream (line 155) | def decode_stream(

FILE: litgpt/utils.py
  function init_out_dir (line 47) | def init_out_dir(out_dir: Path) -> Path:
  function find_resume_path (line 55) | def find_resume_path(resume: Union[bool, Literal["auto"], Path], out_dir...
  function num_parameters (line 69) | def num_parameters(module: nn.Module, requires_grad: Optional[bool] = No...
  function reset_parameters (line 81) | def reset_parameters(module: nn.Module) -> None:
  function check_valid_checkpoint_dir (line 88) | def check_valid_checkpoint_dir(
  class SavingProxyForStorage (line 138) | class SavingProxyForStorage:
    method __init__ (line 139) | def __init__(self, obj, saver, protocol_version=5):
    method __reduce_ex__ (line 162) | def __reduce_ex__(self, protocol_version):
  class SavingProxyForTensor (line 166) | class SavingProxyForTensor:
    method __init__ (line 167) | def __init__(self, tensor, saver, protocol_version=5):
    method __reduce_ex__ (line 186) | def __reduce_ex__(self, protocol_version):
  class IncrementalPyTorchPickler (line 192) | class IncrementalPyTorchPickler(pickle.Pickler):
    method __init__ (line 193) | def __init__(self, saver, *args, **kwargs):
    method persistent_id (line 200) | def persistent_id(self, obj):
  class incremental_save (line 248) | class incremental_save:
    method __init__ (line 249) | def __init__(self, name):
    method __enter__ (line 256) | def __enter__(self):
    method store_early (line 259) | def store_early(self, tensor):
    method save (line 264) | def save(self, obj):
    method _write_storage_and_return_key (line 275) | def _write_storage_and_return_key(self, storage):
    method __exit__ (line 294) | def __exit__(self, type, value, traceback):
  function chunked_cross_entropy (line 301) | def chunked_cross_entropy(
  function map_old_state_dict_weights (line 353) | def map_old_state_dict_weights(state_dict: Dict, mapping: Mapping, prefi...
  function get_default_supported_precision (line 362) | def get_default_supported_precision(training: bool) -> str:
  function load_checkpoint (line 382) | def load_checkpoint(fabric: L.Fabric, model: nn.Module, checkpoint_path:...
  function load_checkpoint_update (line 400) | def load_checkpoint_update(
  function load_from_full_model_state_dict (line 413) | def load_from_full_model_state_dict(
  function flops_per_param (line 448) | def flops_per_param(max_seq_length: int, n_layer: int, n_embd: int, n_pa...
  function estimate_flops (line 457) | def estimate_flops(model: "GPT", training: bool) -> int:
  class CycleIterator (line 481) | class CycleIterator:
    method __init__ (line 493) | def __init__(self, iterable: Iterable) -> None:
    method __next__ (line 498) | def __next__(self) -> Any:
    method __iter__ (line 508) | def __iter__(self) -> Self:
  function copy_config_files (line 512) | def copy_config_files(source_dir: Path, out_dir: Path) -> None:
  function CLI (line 524) | def CLI(*args: Any, **kwargs: Any) -> Any:
  function capture_hparams (line 533) | def capture_hparams() -> Dict[str, Any]:
  function save_config (line 548) | def save_config(config: "Config", checkpoint_dir: Path) -> None:
  function parse_devices (line 554) | def parse_devices(devices: Union[str, int]) -> int:
  function choose_logger (line 562) | def choose_logger(
  function get_argument_names (line 609) | def get_argument_names(cls):
  function instantiate_bnb_optimizer (line 618) | def instantiate_bnb_optimizer(optimizer, model_parameters):
  function instantiate_torch_optimizer (line 635) | def instantiate_torch_optimizer(optimizer, model_parameters, **kwargs):
  function extend_checkpoint_dir (line 670) | def extend_checkpoint_dir(checkpoint_dir: Path) -> Path:
  function check_file_size_on_cpu_and_warn (line 681) | def check_file_size_on_cpu_and_warn(checkpoint_path, device, size_limit=...
  function auto_download_checkpoint (line 697) | def auto_download_checkpoint(model_name, access_token=None, ignore_token...
  function check_nvlink_connectivity (line 718) | def check_nvlink_connectivity(fabric=None):
  function _check_nvidia_connectivity (line 746) | def _check_nvidia_connectivity(custom_print):
  function _check_amd_connectivity (line 781) | def _check_amd_connectivity(custom_print):
  function fix_and_load_json (line 827) | def fix_and_load_json(s):
  function create_finetuning_performance_report (line 844) | def create_finetuning_performance_report(training_time, token_counts, de...
  function select_sft_generate_example (line 868) | def select_sft_generate_example(eval, data):
  function _RunIf (line 897) | def _RunIf(thunder: bool = False, **kwargs):
  function kill_process_tree (line 910) | def kill_process_tree(pid: int):

FILE: tests/conftest.py
  function fake_checkpoint_dir (line 23) | def fake_checkpoint_dir(tmp_path):
  class TensorLike (line 34) | class TensorLike:
    method __eq__ (line 35) | def __eq__(self, other):
  function tensor_like (line 40) | def tensor_like():
  class FloatLike (line 44) | class FloatLike:
    method __eq__ (line 45) | def __eq__(self, other):
  function float_like (line 50) | def float_like():
  function restore_default_dtype (line 55) | def restore_default_dtype():
  function destroy_process_group (line 61) | def destroy_process_group():
  function turn_off_tf32_and_set_seed (line 71) | def turn_off_tf32_and_set_seed(monkeypatch):
  class MockTokenizer (line 78) | class MockTokenizer:
    method encode (line 84) | def encode(self, text: str, bos: Optional[bool] = None, eos: bool = Fa...
    method decode (line 94) | def decode(self, tokens: torch.Tensor) -> str:
  function mock_tokenizer (line 99) | def mock_tokenizer():
  function alpaca_path (line 104) | def alpaca_path(tmp_path):
  function dolly_path (line 111) | def dolly_path(tmp_path):
  function longform_path (line 118) | def longform_path(tmp_path):
  function pytest_collection_modifyitems (line 128) | def pytest_collection_modifyitems(items: List[pytest.Function], config: ...

FILE: tests/convert/test_hf_checkpoint.py
  function test_llama2_70b_conversion (line 12) | def test_llama2_70b_conversion():
  function test_convert_hf_checkpoint (line 105) | def test_convert_hf_checkpoint(tmp_path, model_name):
  function test_qkv_reassemble (line 125) | def test_qkv_reassemble():

FILE: tests/convert/test_lit_checkpoint.py
  function test_convert_lit_checkpoint (line 42) | def test_convert_lit_checkpoint(tmp_path, model_name):
  function test_against_falcon_40b (line 64) | def test_against_falcon_40b():
  function test_against_original_gpt_neox (line 94) | def test_against_original_gpt_neox():
  function test_against_hf_llama2 (line 133) | def test_against_hf_llama2(ours_kwargs):
  function test_against_mixtral (line 167) | def test_against_mixtral(model_name):
  function test_against_olmo (line 209) | def test_against_olmo(model_name):
  function test_against_original_open_llama_3b (line 252) | def test_against_original_open_llama_3b():
  function test_against_hf_phi (line 281) | def test_against_hf_phi(model_name):
  function test_against_hf_phi_3 (line 316) | def test_against_hf_phi_3(model_name):
  function test_against_original_stablelm_zephyr_3b (line 354) | def test_against_original_stablelm_zephyr_3b():
  function test_against_original_gemma (line 402) | def test_against_original_gemma(model_name, device, dtype):
  function test_against_original_gemma_2 (line 462) | def test_against_original_gemma_2(model_name, device, dtype):
  function test_against_original_gemma_3 (line 535) | def test_against_original_gemma_3(model_name, device, dtype):
  function test_check_conversion_supported_adapter (line 590) | def test_check_conversion_supported_adapter():
  function test_check_conversion_supported_lora (line 600) | def test_check_conversion_supported_lora():
  function test_against_original_qwen_2_5 (line 634) | def test_against_original_qwen_2_5(model_name, device, dtype):
  function test_qkv_reassemble (line 681) | def test_qkv_reassemble():

FILE: tests/convert/test_pretrained_checkpoint.py
  function test_convert_pretrained_checkpoint (line 10) | def test_convert_pretrained_checkpoint(tmp_path, fake_checkpoint_dir):

FILE: tests/data/test_alpaca.py
  function test_alpaca (line 6) | def test_alpaca(mock_tokenizer, alpaca_path):

FILE: tests/data/test_base.py
  function test_sft_dataset (line 15) | def test_sft_dataset(max_seq_length, ignore_index, mask_prompt, mock_tok...
  function test_sft_collate_fn_padding (line 49) | def test_sft_collate_fn_padding(pad_id, ignore_index):
  function test_sft_collate_fn_truncation (line 74) | def test_sft_collate_fn_truncation():

FILE: tests/data/test_deita.py
  function test_format_dataset (line 9) | def test_format_dataset():
  function test_deita (line 47) | def test_deita(_, format_dataset_mock, mock_tokenizer, tmp_path):

FILE: tests/data/test_json.py
  function test_json (line 12) | def test_json(as_jsonl, tmp_path, mock_tokenizer):
  function test_json_input_validation (line 69) | def test_json_input_validation(tmp_path):
  function test_json_with_splits (line 95) | def test_json_with_splits(as_jsonl, tmp_path, mock_tokenizer):

FILE: tests/data/test_lit_data.py
  function test_input_dir_and_splits (line 13) | def test_input_dir_and_splits(dl_mock, tmp_path):
  function test_dataset_args (line 42) | def test_dataset_args(streaming_dataloader_mock, streaming_dataset_mock,...

FILE: tests/data/test_longform.py
  function test_longform (line 6) | def test_longform(mock_tokenizer, longform_path):

FILE: tests/data/test_openwebtext.py
  function test_openwebtext (line 17) | def test_openwebtext(_, __, optimize_mock, tmp_path, mock_tokenizer):

FILE: tests/data/test_textfiles.py
  class Tokenizer (line 10) | class Tokenizer:
    method encode (line 13) | def encode(self, text, bos, eos):
  function tokenize (line 19) | def tokenize(data):
  function fake_chunk (line 24) | def fake_chunk(path, data):
  function test_textfiles_datamodule (line 35) | def test_textfiles_datamodule(tmp_path):
  class MockTokenizer (line 71) | class MockTokenizer:
    method encode (line 76) | def encode(self, text, bos=True, eos=False, device=None, max_length=-1):
    method decode (line 87) | def decode(self, tensor):
    method decode_stream (line 99) | def decode_stream(self, token_stream, device=None):
    method vocab_size (line 104) | def vocab_size(self):
  function test_textfiles_token_loader (line 108) | def test_textfiles_token_loader(tmp_path):

FILE: tests/data/test_tinyllama.py
  function test_tinyllama (line 12) | def test_tinyllama(_, tmp_path):

FILE: tests/data/test_tinystories.py
  function tokenize (line 10) | def tokenize(data):
  function fake_chunk (line 15) | def fake_chunk(path, data):
  function test_pretok_dataset (line 35) | def test_pretok_dataset(tmp_path, max_seq_len, expected):
  function test_tokenize (line 47) | def test_tokenize(tmp_path, monkeypatch):
  function test_tinystories_datamodule (line 70) | def test_tinystories_datamodule(tmp_path):

FILE: tests/ext_thunder/test_thunder_distributed.py
  function test_thunder_strategy_ddp_input_parsing (line 24) | def test_thunder_strategy_ddp_input_parsing():
  function test_no_backward_sync_thunder (line 32) | def test_no_backward_sync_thunder(choice):
  function test_jit_ddp_before_setup (line 81) | def test_jit_ddp_before_setup(jit):
  function test_strategy_ddp_setup_already_traced (line 98) | def test_strategy_ddp_setup_already_traced():
  function test_thunder_strategy_fsdp_input_parsing (line 114) | def test_thunder_strategy_fsdp_input_parsing():
  function test_save_checkpoint_invalid_settings_raise (line 127) | def test_save_checkpoint_invalid_settings_raise(tmp_path):
  class Submodule (line 160) | class Submodule(torch.nn.Module):
    method __init__ (line 161) | def __init__(self, h: int):
    method forward (line 165) | def forward(self, x):
  class MyModel (line 170) | class MyModel(torch.nn.Module):
    method __init__ (line 171) | def __init__(self, h: int):
    method forward (line 177) | def forward(self):
    method reset_parameters (line 181) | def reset_parameters(self):
  function test_materialize_meta_tensors (line 187) | def test_materialize_meta_tensors():
  class StatefulThing (line 203) | class StatefulThing:
    method state_dict (line 204) | def state_dict(self):
    method load_state_dict (line 207) | def load_state_dict(self, state_dict):
  class TensorLike (line 211) | class TensorLike:
    method __init__ (line 212) | def __init__(self, device: Optional[Union[str, torch.device]] = None, ...
    method __eq__ (line 216) | def __eq__(self, other):
  function test_save_load_full_checkpoint (line 226) | def test_save_load_full_checkpoint(tmp_path):
  function test_load_full_checkpoint_only_model (line 278) | def test_load_full_checkpoint_only_model(tmp_path):
  function distributed_ckpt_to_regular (line 312) | def distributed_ckpt_to_regular(path):
  function test_save_load_sharded_checkpoint (line 348) | def test_save_load_sharded_checkpoint(tmp_path):
  function test_jit_fsdp_before_setup (line 403) | def test_jit_fsdp_before_setup(jit):
  function test_strategy_fsdp_setup_already_traced (line 420) | def test_strategy_fsdp_setup_already_traced():

FILE: tests/ext_thunder/test_thunder_pretrain.py
  function test_pretrain_thunder (line 19) | def test_pretrain_thunder(tmp_path, monkeypatch):

FILE: tests/ext_thunder/test_unsloth_executor.py
  function test_unsloth_cross_entropy (line 11) | def test_unsloth_cross_entropy(reduction):
  function test_unsloth_rope (line 46) | def test_unsloth_rope():
  function test_unsloth_swiglu (line 76) | def test_unsloth_swiglu():
  function test_unsloth_gpt (line 106) | def test_unsloth_gpt():

FILE: tests/generate/test_adapter.py
  function test_main (line 23) | def test_main(fake_checkpoint_dir, monkeypatch, version, tensor_like):
  function test_cli (line 72) | def test_cli(version):

FILE: tests/generate/test_main.py
  function test_generate (line 29) | def test_generate(max_seq_length):
  function test_main (line 61) | def test_main(fake_checkpoint_dir, monkeypatch, tensor_like):
  function test_cli (line 105) | def test_cli():
  function test_sample (line 113) | def test_sample(temperature):
  function test_generate_different_results_with_different_top_p (line 129) | def test_generate_different_results_with_different_top_p():

FILE: tests/generate/test_sequentially.py
  function test_layer_to_device (line 40) | def test_layer_to_device(n_layer, devices, expected):
  function path_to_device (line 50) | def path_to_device(model):
  function test_replace_device (line 54) | def test_replace_device():
  function _test_model_1device (line 98) | def _test_model_1device(accelerator):
  function test_model_1device_cuda (line 145) | def test_model_1device_cuda():
  function test_model_1device_cpu (line 149) | def test_model_1device_cpu():
  function test_model_forward_hooks (line 154) | def test_model_forward_hooks():
  function test_base_with_sequentially (line 269) | def test_base_with_sequentially(tmp_path):
  function test_cli (line 296) | def test_cli():

FILE: tests/generate/test_tp.py
  function test_tensor_parallel_linear (line 19) | def test_tensor_parallel_linear():
  function test_tensor_parallel_llama (line 87) | def test_tensor_parallel_llama(name, expected):
  function test_tp (line 110) | def test_tp(tmp_path):
  function test_cli (line 136) | def test_cli():

FILE: tests/generate/utils.py
  function find_forward_hooks (line 4) | def find_forward_hooks(module):

FILE: tests/test_adapter.py
  function test_config_identical (line 32) | def test_config_identical():
  function test_adapter_filter (line 46) | def test_adapter_filter(tmp_path):
  function test_adapter_script (line 63) | def test_adapter_script(tmp_path, fake_checkpoint_dir, monkeypatch, alpa...
  function test_adapter_gpt_init_weights (line 110) | def test_adapter_gpt_init_weights():
  function test_adapter_compile (line 124) | def test_adapter_compile():
  function test_adapter_bitsandbytes (line 143) | def test_adapter_bitsandbytes(monkeypatch, tmp_path, fake_checkpoint_dir...
  function test_against_hf_gemma (line 255) | def test_against_hf_gemma(model_name):
  function test_against_original_gemma_2 (line 312) | def test_against_original_gemma_2(model_name, device, dtype):
  function test_against_original_gemma_3 (line 383) | def test_against_original_gemma_3(model_name, device, dtype):
  function test_load_legacy_state_dict (line 436) | def test_load_legacy_state_dict():

FILE: tests/test_adapter_v2.py
  function test_config_identical (line 33) | def test_config_identical():
  function test_adapter_v2_filter (line 45) | def test_adapter_v2_filter(tmp_path):
  function test_adapter_v2_script (line 80) | def test_adapter_v2_script(tmp_path, fake_checkpoint_dir, monkeypatch, a...
  function test_adapter_v2_gpt_init_weights (line 127) | def test_adapter_v2_gpt_init_weights():
  function test_base_model_can_be_adapter_v2_loaded (line 140) | def test_base_model_can_be_adapter_v2_loaded(name):
  function test_adapter_v2_compile (line 153) | def test_adapter_v2_compile():
  function test_against_hf_mixtral (line 172) | def test_against_hf_mixtral():
  function test_against_hf_gemma (line 218) | def test_against_hf_gemma(model_name):
  function test_against_original_gemma_2 (line 262) | def test_against_original_gemma_2(model_name):
  function test_against_original_gemma_3 (line 326) | def test_against_original_gemma_3(model_name):
  function test_adapter_v2_bitsandbytes (line 386) | def test_adapter_v2_bitsandbytes(monkeypatch, tmp_path, fake_checkpoint_...
  function test_load_legacy_state_dict (line 542) | def test_load_legacy_state_dict():

FILE: tests/test_api.py
  function mock_llm (line 33) | def mock_llm():
  function test_load_model (line 43) | def test_load_model(mock_llm):
  function test_generate (line 52) | def test_generate(mock_llm):
  function test_stream_generate (line 60) | def test_stream_generate(mock_llm):
  function test_generate_token_ids (line 73) | def test_generate_token_ids(mock_llm):
  function test_calculate_number_of_devices (line 83) | def test_calculate_number_of_devices():
  function test_llm_load_random_init (line 89) | def test_llm_load_random_init(tmp_path):
  function test_llm_load_hub_init (line 115) | def test_llm_load_hub_init(tmp_path):
  function test_model_not_initialized (line 128) | def test_model_not_initialized(tmp_path):
  function test_more_than_1_device_for_sequential_gpu (line 141) | def test_more_than_1_device_for_sequential_gpu(tmp_path):
  function test_more_than_1_device_for_tensor_parallel_gpu (line 174) | def test_more_than_1_device_for_tensor_parallel_gpu(tmp_path):
  function test_sequential_tp_incompatibility_with_random_weights (line 188) | def test_sequential_tp_incompatibility_with_random_weights(strategy, tmp...
  function test_sequential_tp_cpu (line 201) | def test_sequential_tp_cpu(strategy, tmp_path):
  function test_initialization_for_trainer (line 213) | def test_initialization_for_trainer(tmp_path):
  function test_quantization_is_applied (line 225) | def test_quantization_is_applied(tmp_path):
  function test_fixed_kv_cache (line 236) | def test_fixed_kv_cache(tmp_path):
  function test_invalid_accelerator (line 248) | def test_invalid_accelerator(tmp_path):
  function test_returned_benchmark_dir (line 254) | def test_returned_benchmark_dir(tmp_path):
  function test_benchmark_dict_to_markdown_table_single_values (line 276) | def test_benchmark_dict_to_markdown_table_single_values():
  function test_benchmark_dict_to_markdown_table_multiple_values (line 298) | def test_benchmark_dict_to_markdown_table_multiple_values():
  function test_state_dict (line 364) | def test_state_dict(tmp_path):
  function test_save_method (line 373) | def test_save_method(tmp_path):
  function test_forward_method (line 397) | def test_forward_method(tmp_path):
  function test_precision_selection (line 411) | def test_precision_selection(tmp_path):

FILE: tests/test_args.py
  function test_compute_warmup_iters (line 7) | def test_compute_warmup_iters():

FILE: tests/test_batch.py
  function create_llm (line 22) | def create_llm(tmp_path, batch_size, max_seq_length, device) -> tuple[LL...
  function test_batched_equivalence (line 40) | def test_batched_equivalence(tmp_path):
  function test_simple_batch (line 94) | def test_simple_batch():
  function test_batch_generate (line 133) | def test_batch_generate(tmp_path):
  function test_batch_generate_equivalence (line 257) | def test_batch_generate_equivalence(tmp_path):

FILE: tests/test_chat.py
  function test_generate (line 39) | def test_generate(monkeypatch, generated, stop_tokens, expected):
  function test_decode (line 69) | def test_decode():
  function test_main (line 94) | def test_main(mocked_input, stop_iteration, fake_checkpoint_dir, monkeyp...
  function test_cli (line 134) | def test_cli():
  function test_merge_lora_if_needed (line 144) | def test_merge_lora_if_needed(mocked_merge_lora, mocked_input, fake_chec...
  function test_litgpt_chat_endtoend (line 166) | def test_litgpt_chat_endtoend():
  function test_litgpt_generate_endtoend (line 191) | def test_litgpt_generate_endtoend():

FILE: tests/test_ci.py
  function test_gpu_ci_installs_bitsandbytes (line 9) | def test_gpu_ci_installs_bitsandbytes():

FILE: tests/test_cli.py
  function test_cli (line 12) | def test_cli():
  function test_pretrain_allows_max_steps (line 60) | def test_pretrain_allows_max_steps():
  function test_rewrite_finetune_command (line 79) | def test_rewrite_finetune_command():

FILE: tests/test_config.py
  function test_config (line 11) | def test_config():
  function test_from_hf_name (line 29) | def test_from_hf_name():
  function test_nonexisting_name (line 39) | def test_nonexisting_name():
  function test_short_and_hf_names_are_equal_unless_on_purpose (line 45) | def test_short_and_hf_names_are_equal_unless_on_purpose(config):
  function test_from_hf_name_with_org_string (line 53) | def test_from_hf_name_with_org_string():
  function test_from_checkpoint (line 72) | def test_from_checkpoint(tmp_path):
  function test_head_size (line 103) | def test_head_size(head_size):
  function test_find_multiple (line 109) | def test_find_multiple():

FILE: tests/test_config_hub.py
  function test_config_help (line 39) | def test_config_help(script_file, config_file, monkeypatch):

FILE: tests/test_deepseek_moe.py
  function test_deepseek_moe_litgpt_vs_hf (line 15) | def test_deepseek_moe_litgpt_vs_hf(batch_size, seq_len, device):
  function sync_weights (line 94) | def sync_weights(litgpt_model, hf_model):

FILE: tests/test_distributed.py
  function test_no_backward_sync (line 10) | def test_no_backward_sync(strategy):

FILE: tests/test_evaluate.py
  function test_evaluate_script (line 19) | def test_evaluate_script(tmp_path):
  function test_cli (line 72) | def test_cli():

FILE: tests/test_full.py
  function test_full_script (line 18) | def test_full_script(tmp_path, fake_checkpoint_dir, monkeypatch, alpaca_...

FILE: tests/test_generate_speculatively.py
  function test_speculative_decoding_target_never_accepts_draft_tokens (line 19) | def test_speculative_decoding_target_never_accepts_draft_tokens():
  function test_speculative_decoding_target_always_accepts_draft_tokens (line 45) | def test_speculative_decoding_target_always_accepts_draft_tokens():
  function test_speculative_decoding_target_sometimes_accepts_draft_tokens (line 71) | def test_speculative_decoding_target_sometimes_accepts_draft_tokens():
  function test_generate (line 106) | def test_generate(max_seq_length, speculative_k):
  function test_main (line 130) | def test_main(fake_checkpoint_dir, monkeypatch, tensor_like):
  function test_cli (line 211) | def test_cli():

FILE: tests/test_lora.py
  function test_lora_layer_replacement (line 45) | def test_lora_layer_replacement():
  function test_lora_merge (line 55) | def test_lora_merge():
  function test_lora_mqa_gqa (line 99) | def test_lora_mqa_gqa():
  function test_lora_ind_correctness (line 186) | def test_lora_ind_correctness(n_head, n_query_groups, enable_lora):
  function test_lora_filter (line 227) | def test_lora_filter(tmp_path):
  function test_lora_script (line 246) | def test_lora_script(tmp_path, fake_checkpoint_dir, monkeypatch, alpaca_...
  function test_lora_init_when_linear_overridden (line 293) | def test_lora_init_when_linear_overridden():
  function test_lora_linear_utilization (line 318) | def test_lora_linear_utilization(apply_to, target_layer_names, mlp_class...
  function test_lora_gpt_apply_lora_forward_no_exception (line 354) | def test_lora_gpt_apply_lora_forward_no_exception(apply_to):
  function test_lora_gpt_query_groups_merge_and_forward_no_exception (line 368) | def test_lora_gpt_query_groups_merge_and_forward_no_exception(n_query_gr...
  function test_lora_qkv_linear_compare_conv1d (line 406) | def test_lora_qkv_linear_compare_conv1d(head_size, n_head, enable_lora):
  function test_lora_linear_weights_merged_status (line 430) | def test_lora_linear_weights_merged_status(rank, expected_merged):
  function test_lora_qkv_linear_weights_merged_status (line 441) | def test_lora_qkv_linear_weights_merged_status(rank, enable_lora, expect...
  function test_lora_merge_with_bitsandbytes (line 450) | def test_lora_merge_with_bitsandbytes():
  function test_lora_gpt_init_weights (line 517) | def test_lora_gpt_init_weights():
  function test_base_model_can_be_lora_loaded (line 530) | def test_base_model_can_be_lora_loaded(name):
  function test_lora_compile (line 553) | def test_lora_compile():
  function test_against_hf_mixtral (line 584) | def test_against_hf_mixtral():
  function test_against_hf_gemma (line 635) | def test_against_hf_gemma(model_name):
  function test_against_original_gemma_2 (line 690) | def test_against_original_gemma_2(model_name):
  function test_against_original_gemma_3 (line 746) | def test_against_original_gemma_3(model_name):
  function test_lora_bitsandbytes (line 800) | def test_lora_bitsandbytes(monkeypatch, tmp_path, fake_checkpoint_dir, a...
  function test_lora_model_fsdp_init (line 924) | def test_lora_model_fsdp_init():
  function test_zero_pad_cpu_and_mocked_mps (line 958) | def test_zero_pad_cpu_and_mocked_mps():
  function test_load_legacy_state_dict (line 997) | def test_load_legacy_state_dict():
  function test_parallelize_fn (line 1016) | def test_parallelize_fn():
  function test_load_from_full_model_state_dict (line 1089) | def test_load_from_full_model_state_dict():

FILE: tests/test_merge_lora.py
  function test_merge_lora (line 24) | def test_merge_lora(tmp_path, fake_checkpoint_dir, pretrained_dtype, lor...
  function test_load_lora_metadata (line 77) | def test_load_lora_metadata(fake_checkpoint_dir):

FILE: tests/test_model.py
  function test_against_gpt_neox_model (line 76) | def test_against_gpt_neox_model(rotary_pct, batch_size, n_embd, parallel...
  function test_against_hf_falcon (line 145) | def test_against_hf_falcon(kwargs, device, dtype):
  function test_against_original_open_llama_3b (line 191) | def test_against_original_open_llama_3b(device, dtype):
  function test_against_hf_llama_2_and_3 (line 255) | def test_against_hf_llama_2_and_3(ours_kwargs, device, dtype):
  function test_against_hf_phi (line 304) | def test_against_hf_phi(model_name, device, dtype):
  function test_against_hf_phi_3 (line 364) | def test_against_hf_phi_3(model_name, device, dtype):
  function test_against_mistral_hf_models (line 429) | def test_against_mistral_hf_models(device, dtype, model_name):
  function test_against_mathstral_hf_models (line 493) | def test_against_mathstral_hf_models(device, dtype):
  function test_against_hf_mixtral (line 538) | def test_against_hf_mixtral(model_name):
  function test_against_olmo (line 599) | def test_against_olmo(model_name, device, dtype):
  function test_against_olmo2 (line 658) | def test_against_olmo2(model_name, device, dtype):
  function test_against_original_stablelm_zephyr_3b (line 717) | def test_against_original_stablelm_zephyr_3b(device, dtype):
  function test_against_original_gemma (line 768) | def test_against_original_gemma(model_name, device, dtype):
  function test_against_original_gemma_2 (line 825) | def test_against_original_gemma_2(model_name, device, dtype):
  function test_against_original_gemma_3 (line 895) | def test_against_original_gemma_3(model_name, device, dtype):
  function test_against_multimodal_gemma_3 (line 966) | def test_against_multimodal_gemma_3(model_name, device, dtype):
  function test_against_original_qwen_2_5 (line 1040) | def test_against_original_qwen_2_5(model_name, device, dtype):
  function test_against_original_qwen_3 (line 1113) | def test_against_original_qwen_3(model_name, device, dtype):
  function test_against_original_qwen_3_moe (line 1174) | def test_against_original_qwen_3_moe(model_name, device, dtype):
  function test_against_original_salamandra (line 1240) | def test_against_original_salamandra(model_name, device, dtype):
  function test_against_original_smollm2 (line 1300) | def test_against_original_smollm2(model_name, device, dtype):
  function test_against_hf_falcon3 (line 1360) | def test_against_hf_falcon3(model_name, device, dtype):
  function test_model_compile (line 1404) | def test_model_compile():
  function test_kv_cache (line 1427) | def test_kv_cache(max_seq_length):
  function test_model_kv_cache_amp (line 1458) | def test_model_kv_cache_amp():
  function test_rope_cache_length (line 1469) | def test_rope_cache_length(model_name):
  function test_sdpa_choice (line 1491) | def test_sdpa_choice(config):
  function test_sdpa_choice_kv_cache (line 1543) | def test_sdpa_choice_kv_cache(config):
  function test_rope_init_under_fsdp (line 1595) | def test_rope_init_under_fsdp():
  function test_reset_parameters_device (line 1614) | def test_reset_parameters_device():
  function test_batched_index_copy_modes (line 1622) | def test_batched_index_copy_modes():
  function test_load_legacy_state_dict (line 1684) | def test_load_legacy_state_dict():
  function test_kv_cache_buffer_shape (line 1708) | def test_kv_cache_buffer_shape(n_query_groups):
  function test_rope_cos_sin_shapes_if_rope_n_elem_is_odd (line 1732) | def test_rope_cos_sin_shapes_if_rope_n_elem_is_odd(rotary_percentage, fi...
  function test_forward_with_without_input_pos_maxp1 (line 1748) | def test_forward_with_without_input_pos_maxp1():

FILE: tests/test_multihead_latent_attention.py
  function test_multihead_latent_attention_kv_cache (line 12) | def test_multihead_latent_attention_kv_cache():
  function test_multihead_latent_attention_with_mask (line 40) | def test_multihead_latent_attention_with_mask():
  function test_multihead_latent_attention_litgpt_vs_hf (line 78) | def test_multihead_latent_attention_litgpt_vs_hf(batch_size, seq_len, de...
  function sync_weights (line 139) | def sync_weights(litgpt_model, hf_model):

FILE: tests/test_pretrain.py
  function test_optimizer_args (line 23) | def test_optimizer_args(_, tmp_path):
  function test_pretrain (line 49) | def test_pretrain(_, tmp_path):
  function test_initial_checkpoint_dir (line 93) | def test_initial_checkpoint_dir(_, load_mock, tmp_path):
  function test_initialize_weights (line 113) | def test_initialize_weights(strategy, expected):

FILE: tests/test_prompts.py
  function test_default_prompt_style (line 23) | def test_default_prompt_style(mock_tokenizer):
  function test_sys_prompt (line 31) | def test_sys_prompt(mock_tokenizer, sys_prompt: Optional[str]):
  function test_sys_prompt_with_kwargs (line 41) | def test_sys_prompt_with_kwargs(mock_tokenizer, sys_prompt: Optional[str]):
  function test_prompt_style_from_name (line 50) | def test_prompt_style_from_name():
  function test_prompt_style_from_config (line 55) | def test_prompt_style_from_config():
  function test_apply_prompts (line 93) | def test_apply_prompts():
  class CustomPromptStyle (line 104) | class CustomPromptStyle(PromptStyle):
    method apply (line 105) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw...
  function test_save_load_prompt_style (line 109) | def test_save_load_prompt_style(tmp_path):
  function test_multiturn_prompt (line 133) | def test_multiturn_prompt():

FILE: tests/test_readme.py
  function run_command (line 22) | def run_command(command):
  function _wait_and_check_response (line 37) | def _wait_and_check_response(waiting: int = 30):
  function test_download_model (line 54) | def test_download_model():
  function test_download_books (line 71) | def test_download_books():
  function test_chat_with_model (line 86) | def test_chat_with_model():
  function test_chat_with_quantized_model (line 95) | def test_chat_with_quantized_model():
  function test_finetune_model (line 105) | def test_finetune_model(tmp_path):
  function test_pretrain_model (line 151) | def test_pretrain_model(tmp_path):
  function test_continue_pretrain_model (line 188) | def test_continue_pretrain_model(tmp_path):
  function test_serve (line 220) | def test_serve():

FILE: tests/test_rope.py
  function test_rope_gptneox (line 14) | def test_rope_gptneox():
  function test_rope_llama_2 (line 36) | def test_rope_llama_2():
  function test_rope_llama_3 (line 82) | def test_rope_llama_3():
  function test_rope_llama_3_1 (line 128) | def test_rope_llama_3_1():
  function test_rope_llama_3_2 (line 181) | def test_rope_llama_3_2():
  function test_rope_gemma_3 (line 234) | def test_rope_gemma_3():
  function test_rope_cos_sin_shapes_if_rope_n_elem_is_odd (line 284) | def test_rope_cos_sin_shapes_if_rope_n_elem_is_odd():

FILE: tests/test_serve.py
  function _wait_and_check_response (line 22) | def _wait_and_check_response(waiting: int = 30):
  function test_simple (line 40) | def test_simple(tmp_path):
  function test_quantize (line 75) | def test_quantize(tmp_path):
  function test_multi_gpu_serve (line 110) | def test_multi_gpu_serve(tmp_path):
  function test_serve_with_openai_spec_missing_chat_template (line 145) | def test_serve_with_openai_spec_missing_chat_template(tmp_path):
  function test_serve_with_openai_spec (line 180) | def test_serve_with_openai_spec(tmp_path):
  function test_serve_with_generate_strategy (line 266) | def test_serve_with_generate_strategy(tmp_path, generate_strategy):

FILE: tests/test_tokenizer.py
  function test_tokenizer_against_hf (line 21) | def test_tokenizer_against_hf(config, tmp_path):
  function test_tokenizer_input_validation (line 89) | def test_tokenizer_input_validation():
  function test_tokenizer_bos_eos (line 99) | def test_tokenizer_bos_eos(

FILE: tests/test_trainer_support.py
  class LitLLM (line 17) | class LitLLM(L.LightningModule):
    method __init__ (line 18) | def __init__(self, checkpoint_dir, tokenizer_dir=None, trainer_ckpt_pa...
    method setup (line 24) | def setup(self, stage):
    method training_step (line 27) | def training_step(self, batch):
    method validation_step (line 32) | def validation_step(self, batch):
    method configure_optimizers (line 37) | def configure_optimizers(self):
  function test_download_model (line 45) | def test_download_model():
  function test_usecase1_pretraining_from_random_weights (line 51) | def test_usecase1_pretraining_from_random_weights(tmp_path):
  function test_usecase2_continued_pretraining_from_checkpoint (line 75) | def test_usecase2_continued_pretraining_from_checkpoint(tmp_path):
  function test_usecase3_resume_from_trainer_checkpoint (line 95) | def test_usecase3_resume_from_trainer_checkpoint(tmp_path):
  function test_usecase4_manually_save_and_resume (line 132) | def test_usecase4_manually_save_and_resume(tmp_path):

FILE: tests/test_types.py
  function test_logger_types_match_constants (line 8) | def test_logger_types_match_constants():

FILE: tests/test_utils.py
  function test_check_valid_checkpoint_dir (line 55) | def test_check_valid_checkpoint_dir(tmp_path):
  function test_incremental_write (line 104) | def test_incremental_write(tmp_path):
  function test_chunked_cross_entropy (line 129) | def test_chunked_cross_entropy(ignore_index, B):
  function test_num_parameters (line 165) | def test_num_parameters():
  function test_num_parameters_bitsandbytes (line 180) | def test_num_parameters_bitsandbytes(mode):
  function test_cycle_iterator (line 193) | def test_cycle_iterator():
  function test_parse_devices (line 210) | def test_parse_devices():
  function test_copy_config_files (line 228) | def test_copy_config_files(fake_checkpoint_dir, tmp_path):
  function test_capture_hparams (line 235) | def test_capture_hparams():
  function _test_function (line 255) | def _test_function(out_dir: Path, foo: bool = False, bar: int = 1):
  function test_save_hyperparameters (line 259) | def test_save_hyperparameters(tmp_path):
  function _test_function2 (line 271) | def _test_function2(out_dir: Path, foo: bool = False, bar: int = 1):
  function test_save_hyperparameters_known_commands (line 287) | def test_save_hyperparameters_known_commands(command, tmp_path):
  function test_choose_logger (line 299) | def test_choose_logger(tmp_path):
  function test_init_out_dir (line 322) | def test_init_out_dir(path_type, input_path, expected):
  function test_find_resume_path (line 337) | def test_find_resume_path(tmp_path):
  function model_parameters (line 365) | def model_parameters():
  function test_instantiate_bnb_optimizer_with_str (line 369) | def test_instantiate_bnb_optimizer_with_str(model_parameters):
  function test_instantiate_bnb_optimizer_with_dict (line 377) | def test_instantiate_bnb_optimizer_with_dict(model_parameters):
  function test_instantiate_bnb_optimizer_with_invalid_str (line 387) | def test_instantiate_bnb_optimizer_with_invalid_str(model_parameters):
  function test_instantiate_torch_optimizer_with_str (line 392) | def test_instantiate_torch_optimizer_with_str(model_parameters):
  function test_instantiate_torch_optimizer_with_class (line 398) | def test_instantiate_torch_optimizer_with_class(model_parameters):
  function test_extend_checkpoint_dir_is_prefixed (line 414) | def test_extend_checkpoint_dir_is_prefixed(input_path, expected):
  function test_extend_checkpoint_dir (line 438) | def test_extend_checkpoint_dir(input_path, expected):
  function test_extend_checkpoint_dir_dont_exist (line 462) | def test_extend_checkpoint_dir_dont_exist(input_path, expected):
  function test_file_size_below_limit_on_cpu (line 466) | def test_file_size_below_limit_on_cpu():
  function test_file_size_above_limit_on_cpu (line 474) | def test_file_size_above_limit_on_cpu():
  function test_file_size_above_limit_on_gpu (line 484) | def test_file_size_above_limit_on_gpu():
  function mock_cuda_is_available_true (line 493) | def mock_cuda_is_available_true(monkeypatch):
  function mock_nvidia_device_properties (line 499) | def mock_nvidia_device_properties(monkeypatch):
  function mock_amd_device_properties (line 507) | def mock_amd_device_properties(monkeypatch):
  function all_nvlink_connected_output (line 515) | def all_nvlink_connected_output():
  function test_all_nvlink_connected (line 527) | def test_all_nvlink_connected(
  function nvlink_partially_connected_output (line 537) | def nvlink_partially_connected_output():
  function test_nvlink_partially_connected_output (line 554) | def test_nvlink_partially_connected_output(
  function nvlink_not_connected_output (line 567) | def nvlink_not_connected_output():
  function test_nvlink_not_connected_output (line 589) | def test_nvlink_not_connected_output(
  function nvlink_all_gpu_connected_but_other_connected_output (line 602) | def nvlink_all_gpu_connected_but_other_connected_output():
  function test_nvlink_all_gpu_connected_but_other_connected_output (line 653) | def test_nvlink_all_gpu_connected_but_other_connected_output(
  function nvidia_smi_nvlink_output_dual_gpu_no_numa (line 666) | def nvidia_smi_nvlink_output_dual_gpu_no_numa():
  function test_check_nvlink_connectivity__returns_fully_connected_when_nvidia_all_nvlink_two_gpus (line 688) | def test_check_nvlink_connectivity__returns_fully_connected_when_nvidia_...
  function rocm_smi_xgmi_output_multi_gpu (line 698) | def rocm_smi_xgmi_output_multi_gpu():
  function test_check_nvlink_connectivity__returns_fully_connected_when_amd_all_xgmi_8_gpus (line 722) | def test_check_nvlink_connectivity__returns_fully_connected_when_amd_all...
  function test_check_nvlink_connectivity__returns_no_gpus_when_no_gpus (line 732) | def test_check_nvlink_connectivity__returns_no_gpus_when_no_gpus(mock_ru...
  function test_check_nvlink_connectivity__returns_unrecognized_vendor_when_unrecognized_vendor (line 740) | def test_check_nvlink_connectivity__returns_unrecognized_vendor_when_unr...
  function test_fix_and_load_json (line 751) | def test_fix_and_load_json():
  function test_select_sft_generate_example (line 805) | def test_select_sft_generate_example():

FILE: tests/test_yarn.py
  function test_deepseek_v3_block_with_yarn (line 15) | def test_deepseek_v3_block_with_yarn(batch_size, seq_len, device):
  function sync_weights (line 177) | def sync_weights(litgpt_model, hf_model):
  function sync_block_weights (line 191) | def sync_block_weights(block_litgpt, block_hf):

FILE: tutorials/examples/ptl-trainer/litgpt_ptl_medium.py
  class LitLLM (line 9) | class LitLLM(L.LightningModule):
    method __init__ (line 10) | def __init__(self):
    method on_train_start (line 22) | def on_train_start(self):
    method training_step (line 26) | def training_step(self, batch):
    method configure_optimizers (line 33) | def configure_optimizers(self):

FILE: tutorials/examples/ptl-trainer/litgpt_ptl_small.py
  class LitLLM (line 10) | class LitLLM(L.LightningModule):
    method __init__ (line 11) | def __init__(self, checkpoint_dir, tokenizer_dir=None, trainer_ckpt_pa...
    method setup (line 17) | def setup(self, stage):
    method training_step (line 20) | def training_step(self, batch):
    method validation_step (line 25) | def validation_step(self, batch):
    method configure_optimizers (line 30) | def configure_optimizers(self):
  function find_latest_checkpoint (line 97) | def find_latest_checkpoint(directory):

FILE: tutorials/full_finetune_example.py
  function validate (line 35) | def validate(model, val_dataloader):
  function train (line 48) | def train(fabric, model, optimizer, scheduler, train_dataloader, val_dat...
  function main (line 80) | def main(fabric):