SYMBOL INDEX (1090 symbols across 117 files) FILE: extensions/thunder/pretrain.py function forward_and_loss (line 49) | def forward_and_loss(model: nn.Module, input_ids: torch.Tensor, targets:... function setup (line 56) | def setup( function main (line 189) | def main( function fit (line 276) | def fit( function validate (line 403) | def validate(fabric: L.Fabric, model: nn.Module, val_dataloader: DataLoa... function get_dataloaders (line 423) | def get_dataloaders( function get_lr (line 436) | def get_lr(learning_rate: float, it: int, warmup_iters: int, max_iters: ... function initialize_weights (line 450) | def initialize_weights(fabric: L.Fabric, model: GPT, n_layer: int, n_emb... function init_out_dir (line 472) | def init_out_dir(out_dir: Path) -> Path: function save_checkpoint (line 478) | def save_checkpoint(fabric, state, tokenizer_dir, checkpoint_file): function validate_args (line 490) | def validate_args(train: TrainArgs, eval: EvalArgs, initial_checkpoint_d... function jit (line 508) | def jit(fn: Callable, executors: List[str]) -> Any: FILE: extensions/thunder/strategies/thunder_ddp.py class ThunderDDPStrategy (line 36) | class ThunderDDPStrategy(ParallelStrategy): method __init__ (line 37) | def __init__( method root_device (line 82) | def root_device(self) -> torch.device: method num_nodes (line 87) | def num_nodes(self) -> int: method num_nodes (line 91) | def num_nodes(self, num_nodes: int) -> None: method num_processes (line 96) | def num_processes(self) -> int: method distributed_sampler_kwargs (line 101) | def distributed_sampler_kwargs(self) -> Dict[str, Any]: method _configure_launcher (line 105) | def _configure_launcher(self) -> None: method process_group_backend (line 111) | def process_group_backend(self) -> Optional[str]: method _configure_launcher (line 115) | def _configure_launcher(self) -> None: method setup_environment (line 120) | def setup_environment(self) -> None: method setup_module (line 125) | def setup_module(self, module: Module) -> Module: method module_to_device (line 148) | def module_to_device(self, module: Module) -> None: method all_reduce (line 152) | def all_reduce( method barrier (line 160) | def barrier(self, *args: Any, **kwargs: Any) -> None: method broadcast (line 169) | def broadcast(self, obj: TBroadcast, src: int = 0) -> TBroadcast: method _setup_distributed (line 177) | def _setup_distributed(self) -> None: method _get_process_group_backend (line 183) | def _get_process_group_backend(self) -> str: method _set_world_ranks (line 186) | def _set_world_ranks(self) -> None: class _ThunderDataParalellBackwardSyncControl (line 195) | class _ThunderDataParalellBackwardSyncControl(_BackwardSyncControl): method __init__ (line 196) | def __init__(self): method no_backward_sync (line 200) | def no_backward_sync(self, module: Module, enabled: bool) -> ContextMa... class _SyncGradsContextManager (line 246) | class _SyncGradsContextManager: method __init__ (line 247) | def __init__(self, module: Module) -> None: method __enter__ (line 251) | def __enter__(self) -> None: method __exit__ (line 257) | def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> N... FILE: extensions/thunder/strategies/thunder_fsdp.py class ThunderFSDPStrategy (line 46) | class ThunderFSDPStrategy(ParallelStrategy, _Sharded): method __init__ (line 47) | def __init__( method root_device (line 129) | def root_device(self) -> torch.device: method num_nodes (line 134) | def num_nodes(self) -> int: method num_processes (line 138) | def num_processes(self) -> int: method distributed_sampler_kwargs (line 143) | def distributed_sampler_kwargs(self) -> Dict[str, Any]: method _configure_launcher (line 147) | def _configure_launcher(self) -> None: method setup_environment (line 153) | def setup_environment(self) -> None: method setup_module (line 158) | def setup_module(self, module: Module) -> Module: method module_to_device (line 193) | def module_to_device(self, module: Module) -> None: method module_init_context (line 197) | def module_init_context(self, empty_init: Optional[bool] = None) -> Co... method module_sharded_context (line 209) | def module_sharded_context(self) -> ContextManager: method all_reduce (line 213) | def all_reduce( method barrier (line 221) | def barrier(self, *args: Any, **kwargs: Any) -> None: method broadcast (line 230) | def broadcast(self, obj: TBroadcast, src: int = 0) -> TBroadcast: method clip_gradients_norm (line 239) | def clip_gradients_norm( method save_checkpoint (line 250) | def save_checkpoint( method load_checkpoint (line 310) | def load_checkpoint( method _setup_distributed (line 399) | def _setup_distributed(self) -> None: method _set_world_ranks (line 406) | def _set_world_ranks(self) -> None: function _is_sharded_checkpoint (line 415) | def _is_sharded_checkpoint(path: Path) -> bool: function _is_full_checkpoint (line 420) | def _is_full_checkpoint(path: Path) -> bool: function _get_state_dict (line 424) | def _get_state_dict( function _unwrap_tom (line 453) | def _unwrap_tom(obj: object) -> object: FILE: extensions/thunder/unsloth/executor.py function unsloth_cross_entropy_meta (line 36) | def unsloth_cross_entropy_meta(logits: TensorProxy, labels: TensorProxy)... function unsloth_cross_entropy_backward_impl (line 54) | def unsloth_cross_entropy_backward_impl(dlosses: Tensor, logits: Tensor,... function unsloth_cross_entropy_backward_meta (line 59) | def unsloth_cross_entropy_backward_meta( function unsloth_cross_entropy_checker (line 70) | def unsloth_cross_entropy_checker( function cross_entropy_to_unsloth (line 92) | def cross_entropy_to_unsloth( function unsloth_cross_entropy_grad (line 113) | def unsloth_cross_entropy_grad( function swiglu (line 158) | def swiglu(e: torch.Tensor, g: torch.Tensor) -> torch.Tensor: class ThunderLLaMAMLP (line 162) | class ThunderLLaMAMLP(OriginalLLaMAMLP): method forward (line 163) | def forward(self, x: torch.Tensor) -> torch.Tensor: function swiglu_forward_meta (line 173) | def swiglu_forward_meta(e: TensorProxy, g: TensorProxy) -> TensorProxy: function unsloth_swiglu_backward_meta (line 185) | def unsloth_swiglu_backward_meta(DW: TensorProxy, e: TensorProxy, g: Ten... function unsloth_swiglu_backward_fn (line 189) | def unsloth_swiglu_backward_fn(DW: Tensor, e: Tensor, g: Tensor) -> Tupl... function swiglu_to_unsloth_checker (line 204) | def swiglu_to_unsloth_checker(e: TensorProxy, g: TensorProxy) -> bool: function unsloth_swiglu_grad (line 208) | def unsloth_swiglu_grad(e: TensorProxy, g: TensorProxy) -> TensorProxy: function apply_rope_meta (line 231) | def apply_rope_meta(x: TensorProxy, cos: TensorProxy, sin: TensorProxy) ... function unsloth_apply_rope_meta (line 240) | def unsloth_apply_rope_meta( function unsloth_apply_rope_backward_meta (line 256) | def unsloth_apply_rope_backward_meta( function apply_rope_to_unsloth_checker (line 267) | def apply_rope_to_unsloth_checker(x: TensorProxy, cos: TensorProxy, sin:... function unsloth_apply_rope_grad (line 271) | def unsloth_apply_rope_grad(x: TensorProxy, cos: TensorProxy, sin: Tenso... FILE: extensions/thunder/unsloth/kernels/cross_entropy_loss.py function _cross_entropy_forward (line 27) | def _cross_entropy_forward( function _chunked_cross_entropy_forward (line 83) | def _chunked_cross_entropy_forward( function _cross_entropy_backward (line 149) | def _cross_entropy_backward( function _cross_entropy_forward_impl (line 204) | def _cross_entropy_forward_impl(logits, labels): function _cross_entropy_backward_impl (line 262) | def _cross_entropy_backward_impl(dlosses, logits, logsumexp, labels): FILE: extensions/thunder/unsloth/kernels/rope_embedding.py function _rope_embedding (line 32) | def _rope_embedding( function _rope_embedding_forward_impl (line 86) | def _rope_embedding_forward_impl(Q, cos, sin): function _rope_embedding_backward_impl (line 126) | def _rope_embedding_backward_impl(dY, cos, sin, n_groups, BLOCK_SIZE, nu... FILE: extensions/thunder/unsloth/kernels/swiglu.py function _fg_kernel (line 25) | def _fg_kernel( function swiglu_fg_kernel (line 52) | def swiglu_fg_kernel(e, g): function _DWf_DW_dfg_kernel (line 71) | def _DWf_DW_dfg_kernel( function swiglu_DWf_DW_dfg_kernel (line 120) | def swiglu_DWf_DW_dfg_kernel(DW, e, g): FILE: extensions/thunder/unsloth/kernels/utils.py function calculate_settings (line 25) | def calculate_settings(n): FILE: extensions/xla/finetune/adapter.py function setup (line 54) | def setup( function main (line 76) | def main(fabric: L.Fabric, data_dir: Path, checkpoint_dir: Path, out_dir... function train (line 122) | def train( function validate (line 222) | def validate( function get_batch (line 254) | def get_batch(fabric: L.Fabric, data: List[Dict], longest_seq_length: in... function get_longest_seq_length (line 272) | def get_longest_seq_length(data: List[Dict]) -> int: function save_adapter_checkpoint (line 277) | def save_adapter_checkpoint(fabric: L.Fabric, model: torch.nn.Module, fi... FILE: extensions/xla/generate/adapter.py function setup (line 25) | def setup( function main (line 60) | def main( FILE: extensions/xla/generate/base.py function generate (line 27) | def generate( function setup (line 97) | def setup( function main (line 125) | def main( FILE: extensions/xla/scripts/prepare_alpaca.py function prepare (line 19) | def prepare( function download_if_missing (line 86) | def download_if_missing(file_path: Path, file_url: str) -> None: function prepare_sample (line 99) | def prepare_sample(example: dict, tokenizer: Tokenizer, max_length: int,... function generate_prompt (line 129) | def generate_prompt(example: dict) -> str: FILE: extensions/xla/utils.py function rank_print (line 16) | def rank_print(fabric: L.Fabric, message: object, *, flush: bool = True,... function materialize_parameters (line 25) | def materialize_parameters(module: torch.nn.Module, device: torch.device... function sequential_load_and_fsdp_wrap (line 34) | def sequential_load_and_fsdp_wrap( FILE: litgpt/__main__.py function _check_commands (line 57) | def _check_commands(): function main (line 63) | def main() -> None: FILE: litgpt/adapter.py class Config (line 25) | class Config(BaseConfig): class GPT (line 30) | class GPT(BaseModel): method __init__ (line 32) | def __init__(self, config: Config) -> None: method from_name (line 49) | def from_name(cls, name: str, **kwargs: Any) -> Self: method _init_weights (line 52) | def _init_weights(self, module: nn.Module) -> None: class Block (line 59) | class Block(BaseBlock): method __init__ (line 60) | def __init__(self, config: Config, block_idx: int) -> None: class CausalSelfAttention (line 65) | class CausalSelfAttention(BaseCausalSelfAttention): method __init__ (line 69) | def __init__(self, config: Config, block_idx: int) -> None: method scaled_dot_product_attention (line 79) | def scaled_dot_product_attention( method reset_parameters (line 111) | def reset_parameters(self) -> None: method _load_from_state_dict (line 115) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... function mark_only_adapter_as_trainable (line 122) | def mark_only_adapter_as_trainable(model: GPT) -> None: function adapter_filter (line 128) | def adapter_filter(key: str, value: Any) -> bool: FILE: litgpt/adapter_v2.py class Config (line 28) | class Config(BaseConfig): method mlp_class (line 30) | def mlp_class(self) -> Type: function adapter_filter (line 34) | def adapter_filter(key: str, value: Any) -> bool: class AdapterV2Linear (line 50) | class AdapterV2Linear(torch.nn.Module): method __init__ (line 51) | def __init__(self, in_features: int, out_features: int, **kwargs) -> N... method forward (line 57) | def forward(self, x: torch.Tensor) -> torch.Tensor: method reset_parameters (line 60) | def reset_parameters(self) -> None: class GPT (line 65) | class GPT(BaseModel): method __init__ (line 67) | def __init__(self, config: Config) -> None: method from_name (line 84) | def from_name(cls, name: str, **kwargs: Any) -> Self: method _init_weights (line 87) | def _init_weights(self, module: nn.Module) -> None: method _load_from_state_dict (line 93) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... class Block (line 100) | class Block(BaseBlock): method __init__ (line 101) | def __init__(self, config: Config, block_idx: int) -> None: class CausalSelfAttention (line 107) | class CausalSelfAttention(BaseCausalSelfAttention): method __init__ (line 111) | def __init__(self, config: Config, block_idx: int) -> None: method _load_from_state_dict (line 119) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... class GptNeoxMLP (line 141) | class GptNeoxMLP(litgpt.model.GptNeoxMLP): method __init__ (line 142) | def __init__(self, config: Config) -> None: method _load_from_state_dict (line 148) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... class LLaMAMLP (line 160) | class LLaMAMLP(litgpt.model.LLaMAMLP): method __init__ (line 161) | def __init__(self, config: Config, intermediate_size: Optional[int] = ... method _load_from_state_dict (line 169) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... class GemmaMLP (line 183) | class GemmaMLP(LLaMAMLP): method forward (line 184) | def forward(self, x: torch.Tensor) -> torch.Tensor: class LLaMAMoE (line 191) | class LLaMAMoE(litgpt.model.LLaMAMoE): method __init__ (line 192) | def __init__(self, config: Config) -> None: method _load_from_state_dict (line 200) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... function mark_only_adapter_v2_as_trainable (line 207) | def mark_only_adapter_v2_as_trainable(model: GPT) -> None: FILE: litgpt/api.py class LLM (line 37) | class LLM(torch.nn.Module): method __init__ (line 38) | def __init__( method tokenizer (line 76) | def tokenizer(self): method state_dict (line 79) | def state_dict(self, destination=None, prefix="", keep_vars=False): method load_state_dict (line 82) | def load_state_dict(self, state_dict, strict=True): method forward (line 85) | def forward( method trainer_setup (line 100) | def trainer_setup(self, trainer_ckpt: Optional[Path] = None) -> None: method save (line 126) | def save(self, out_dir: Optional[Path] = None, prompt_style: Optional[... method load (line 148) | def load( method distribute (line 256) | def distribute( method generate (line 461) | def generate( method _text_to_token_ids (line 570) | def _text_to_token_ids(self, prompt: str, sys_prompt: Optional[str] = ... method benchmark (line 576) | def benchmark(self, num_iterations=1, **kwargs): class Preprocessor (line 619) | class Preprocessor: method __init__ (line 624) | def __init__(self, tokenizer: Tokenizer, device: str = "cpu") -> None: method encode (line 628) | def encode(self, text: str) -> torch.Tensor: method decode (line 631) | def decode(self, token_ids: torch.Tensor) -> str: function calculate_number_of_devices (line 635) | def calculate_number_of_devices(devices): function benchmark_dict_to_markdown_table (line 643) | def benchmark_dict_to_markdown_table(data): function pull_request_benchmark_util (line 666) | def pull_request_benchmark_util(model_name="microsoft/phi-2", num_iterat... FILE: litgpt/args.py class TrainArgs (line 9) | class TrainArgs: method __post_init__ (line 42) | def __post_init__(self) -> None: method gradient_accumulation_iters (line 57) | def gradient_accumulation_iters(self, devices: int, num_nodes: int = 1... method batch_size (line 63) | def batch_size(self, devices: int, num_nodes: int = 1) -> int: method warmup_iters (line 69) | def warmup_iters(self, devices: int, num_nodes: int, max_iters: int, t... class EvalArgs (line 79) | class EvalArgs: class LogArgs (line 98) | class LogArgs: FILE: litgpt/chat/base.py function generate (line 28) | def generate( function process_prompt (line 77) | def process_prompt( function interact (line 123) | def interact(multiline, model, tokenizer, prompt_style, fabric, temperat... function main (line 151) | def main( FILE: litgpt/config.py function find_multiple (line 12) | def find_multiple(n: int, k: int) -> int: class Config (line 26) | class Config: method __post_init__ (line 118) | def __post_init__(self): method from_name (line 186) | def from_name(cls, name: str, **kwargs: Any) -> Optional[Self]: method from_file (line 206) | def from_file(cls, path: Union[str, Path], **kwargs: Any) -> Self: method from_checkpoint (line 215) | def from_checkpoint(cls, path: Path, **kwargs: Any) -> Self: method mlp_class (line 224) | def mlp_class(self) -> Type: method norm_class (line 231) | def norm_class(self) -> Type: function check_indicator_and_length (line 252) | def check_indicator_and_length( FILE: litgpt/data/alpaca.py class Alpaca (line 21) | class Alpaca(DataModule): method __post_init__ (line 49) | def __post_init__(self) -> None: method connect (line 54) | def connect( method prepare_data (line 61) | def prepare_data(self) -> None: method setup (line 65) | def setup(self, stage: str = "") -> None: method train_dataloader (line 94) | def train_dataloader(self) -> DataLoader: method val_dataloader (line 104) | def val_dataloader(self) -> DataLoader: function download_if_missing (line 114) | def download_if_missing(file_path: Path, file_url: str, mode: str = "w",... FILE: litgpt/data/alpaca_2k.py class Alpaca2k (line 12) | class Alpaca2k(Alpaca): method prepare_data (line 24) | def prepare_data(self) -> None: method setup (line 29) | def setup(self, stage: str = "") -> None: FILE: litgpt/data/alpaca_gpt4.py class AlpacaGPT4 (line 13) | class AlpacaGPT4(Alpaca): FILE: litgpt/data/base.py class DataModule (line 15) | class DataModule(LightningDataModule): method connect (line 19) | def connect( method setup (line 30) | def setup(self, stage: str = "") -> None: method __repr__ (line 34) | def __repr__(self) -> str: class SFTDataset (line 38) | class SFTDataset(Dataset): method __init__ (line 58) | def __init__( method __len__ (line 78) | def __len__(self) -> int: method __getitem__ (line 81) | def __getitem__(self, idx: int) -> Dict[str, Union[Tensor, Dict[str, i... function get_sft_collate_fn (line 111) | def get_sft_collate_fn(max_seq_length: int = -1, pad_id: int = 0, ignore... function _sft_collate_fn (line 121) | def _sft_collate_fn( FILE: litgpt/data/deita.py class Deita (line 17) | class Deita(DataModule): method __post_init__ (line 43) | def __post_init__(self) -> None: method connect (line 48) | def connect( method prepare_data (line 55) | def prepare_data(self) -> None: method setup (line 60) | def setup(self, stage: str = "") -> None: method train_dataloader (line 84) | def train_dataloader(self) -> DataLoader: method val_dataloader (line 94) | def val_dataloader(self) -> DataLoader: function format_dataset (line 104) | def format_dataset(dataset: List[dict], include_multi_turn_conversations... FILE: litgpt/data/flan.py class FLAN (line 22) | class FLAN(DataModule): method __post_init__ (line 48) | def __post_init__(self): method connect (line 62) | def connect( method prepare_data (line 69) | def prepare_data(self) -> None: method train_dataloader (line 77) | def train_dataloader(self): method val_dataloader (line 80) | def val_dataloader(self): method _dataloader (line 83) | def _dataloader(self, split: str) -> DataLoader: function load_jsonl (line 108) | def load_jsonl(filename: Path) -> List[Dict[str, str]]: function _transform (line 116) | def _transform(item: dict) -> dict: function _supported_subsets (line 122) | def _supported_subsets() -> Set[str]: FILE: litgpt/data/json_data.py class JSON (line 18) | class JSON(DataModule): method __post_init__ (line 45) | def __post_init__(self): method connect (line 69) | def connect( method setup (line 76) | def setup(self, stage: str = "") -> None: method train_dataloader (line 96) | def train_dataloader(self) -> DataLoader: method val_dataloader (line 106) | def val_dataloader(self) -> DataLoader: method get_splits (line 115) | def get_splits(self) -> Tuple: method find_split (line 138) | def find_split(self, split_name: str) -> Optional[Path]: function load_split (line 145) | def load_split(json_path: Path) -> Any: FILE: litgpt/data/lima.py class LIMA (line 17) | class LIMA(DataModule): method __post_init__ (line 46) | def __post_init__(self): method connect (line 57) | def connect( method prepare_data (line 64) | def prepare_data(self) -> None: method setup (line 69) | def setup(self, stage: str = "") -> None: method train_dataloader (line 100) | def train_dataloader(self) -> DataLoader: method val_dataloader (line 110) | def val_dataloader(self) -> DataLoader: function format_dataset (line 120) | def format_dataset(dataset_partition: dict, include_multi_turn_conversat... FILE: litgpt/data/lit_data.py class LitData (line 14) | class LitData(DataModule): method __post_init__ (line 33) | def __post_init__(self) -> None: method connect (line 38) | def connect( method train_dataloader (line 44) | def train_dataloader(self) -> DataLoader: method val_dataloader (line 48) | def val_dataloader(self) -> DataLoader: method _dataloader (line 52) | def _dataloader(self, input_dir: str, train: bool): FILE: litgpt/data/longform.py class LongForm (line 20) | class LongForm(DataModule): method __post_init__ (line 42) | def __post_init__(self) -> None: method connect (line 47) | def connect( method prepare_data (line 54) | def prepare_data(self) -> None: method train_dataloader (line 59) | def train_dataloader(self): method val_dataloader (line 62) | def val_dataloader(self): method _dataloader (line 65) | def _dataloader(self, split: str) -> DataLoader: function _transform (line 88) | def _transform(item: dict) -> dict: FILE: litgpt/data/microllama.py class MicroLlama (line 10) | class MicroLlama(TinyLlama): method __init__ (line 13) | def __init__(self, data_path: Union[str, Path] = Path("data/"), seed: ... FILE: litgpt/data/openwebtext.py class OpenWebText (line 15) | class OpenWebText(DataModule): method __post_init__ (line 32) | def __post_init__(self) -> None: method connect (line 38) | def connect( method prepare_data (line 45) | def prepare_data(self) -> None: method train_dataloader (line 83) | def train_dataloader(self) -> DataLoader: method val_dataloader (line 96) | def val_dataloader(self) -> DataLoader: FILE: litgpt/data/prepare_slimpajama.py class SlimPajamaDataRecipe (line 13) | class SlimPajamaDataRecipe(DataChunkRecipe): method __init__ (line 16) | def __init__(self, tokenizer: Tokenizer, chunk_size: int): method prepare_structure (line 20) | def prepare_structure(self, input_dir): method prepare_item (line 24) | def prepare_item(self, filepath): function prepare (line 36) | def prepare( FILE: litgpt/data/prepare_starcoder.py class StarcoderDataRecipe (line 18) | class StarcoderDataRecipe(DataChunkRecipe): method __init__ (line 21) | def __init__(self, tokenizer: Tokenizer, chunk_size: int): method prepare_structure (line 25) | def prepare_structure(self, input_dir): method prepare_item (line 29) | def prepare_item(self, item_metadata): function prepare (line 52) | def prepare( FILE: litgpt/data/text_files.py class TextFiles (line 16) | class TextFiles(DataModule): method __post_init__ (line 39) | def __post_init__(self) -> None: method connect (line 47) | def connect(self, tokenizer: Optional[Tokenizer] = None, batch_size: i... method prepare_data (line 52) | def prepare_data(self) -> None: method train_dataloader (line 108) | def train_dataloader(self) -> DataLoader: method val_dataloader (line 122) | def val_dataloader(self) -> DataLoader: function tokenize (line 136) | def tokenize(filename: str, tokenizer: Tokenizer): function validate_tokenizer (line 143) | def validate_tokenizer(tokenizer: Tokenizer) -> None: FILE: litgpt/data/tinyllama.py class TinyLlama (line 13) | class TinyLlama(DataModule): method __post_init__ (line 33) | def __post_init__(self): method connect (line 44) | def connect( method prepare_data (line 50) | def prepare_data(self) -> None: method train_dataloader (line 59) | def train_dataloader(self) -> DataLoader: method val_dataloader (line 92) | def val_dataloader(self) -> DataLoader: FILE: litgpt/data/tinystories.py class TinyStories (line 20) | class TinyStories(DataModule): method __post_init__ (line 38) | def __post_init__(self) -> None: method connect (line 43) | def connect(self, tokenizer: Optional[Tokenizer] = None, batch_size: i... method prepare_data (line 48) | def prepare_data(self) -> None: method train_dataloader (line 81) | def train_dataloader(self) -> DataLoader: method val_dataloader (line 94) | def val_dataloader(self) -> DataLoader: function tokenize (line 108) | def tokenize(filename: str, tokenizer: Tokenizer): function download (line 124) | def download(data_dir: Path): FILE: litgpt/deploy/serve.py class BaseLitAPI (line 21) | class BaseLitAPI(LitAPI): method __init__ (line 22) | def __init__( method setup (line 50) | def setup(self, device: str) -> None: method decode_request (line 71) | def decode_request(self, request: Dict[str, Any]) -> Any: class SimpleLitAPI (line 76) | class SimpleLitAPI(BaseLitAPI): method __init__ (line 77) | def __init__( method setup (line 103) | def setup(self, device: str): method predict (line 106) | def predict(self, inputs: str) -> Any: method encode_response (line 116) | def encode_response(self, output: str) -> Dict[str, Any]: class StreamLitAPI (line 121) | class StreamLitAPI(BaseLitAPI): method __init__ (line 122) | def __init__( method setup (line 148) | def setup(self, device: str): method predict (line 151) | def predict(self, inputs: torch.Tensor) -> Any: method encode_response (line 161) | def encode_response(self, output): class OpenAISpecLitAPI (line 166) | class OpenAISpecLitAPI(BaseLitAPI): method __init__ (line 167) | def __init__( method setup (line 193) | def setup(self, device: str): method decode_request (line 213) | def decode_request(self, request: "ChatCompletionRequest") -> Any: method predict (line 217) | def predict(self, inputs: str, context: dict) -> Any: function run_server (line 234) | def run_server( FILE: litgpt/eval/evaluate.py function prepare_results (line 15) | def prepare_results(results, save_filepath, print_results=True): function convert_and_evaluate (line 27) | def convert_and_evaluate( FILE: litgpt/finetune/adapter.py function setup (line 48) | def setup( function main (line 151) | def main( function fit (line 244) | def fit( function validate (line 391) | def validate( function generate_example (line 412) | def generate_example(fabric: L.Fabric, model: GPT, tokenizer: Tokenizer,... function get_lr_scheduler (line 444) | def get_lr_scheduler(optimizer, warmup_steps: int, max_steps: int): function get_dataloaders (line 451) | def get_dataloaders( function get_longest_seq_length (line 464) | def get_longest_seq_length(data: List[Dict]) -> Tuple[int, int]: function save_adapter_checkpoint (line 472) | def save_adapter_checkpoint(fabric: L.Fabric, model: torch.nn.Module, fi... function validate_args (line 477) | def validate_args(train: TrainArgs, eval: EvalArgs) -> None: FILE: litgpt/finetune/adapter_v2.py function setup (line 49) | def setup( function main (line 153) | def main( function fit (line 261) | def fit( function validate (line 418) | def validate( function generate_example (line 439) | def generate_example(fabric: L.Fabric, model: GPT, tokenizer: Tokenizer,... function get_lr_scheduler (line 467) | def get_lr_scheduler(optimizer, warmup_steps: int, max_steps: int): function get_dataloaders (line 474) | def get_dataloaders( function get_longest_seq_length (line 487) | def get_longest_seq_length(data: List[Dict]) -> Tuple[int, int]: function save_adapter_v2_checkpoint (line 495) | def save_adapter_v2_checkpoint(fabric: L.Fabric, model: torch.nn.Module,... function validate_args (line 500) | def validate_args(train: TrainArgs, eval: EvalArgs) -> None: FILE: litgpt/finetune/full.py function setup (line 44) | def setup( function main (line 126) | def main( function fit (line 209) | def fit( function validate (line 363) | def validate( function generate_example (line 383) | def generate_example(fabric: L.Fabric, model: GPT, tokenizer: Tokenizer,... function get_lr_scheduler (line 415) | def get_lr_scheduler(optimizer, warmup_steps: int, max_steps: int): function get_dataloaders (line 422) | def get_dataloaders( function get_longest_seq_length (line 435) | def get_longest_seq_length(data: List[Dict]) -> Tuple[int, int]: function validate_args (line 443) | def validate_args(train: TrainArgs, eval: EvalArgs) -> None: FILE: litgpt/finetune/lora.py function setup (line 49) | def setup( function main (line 183) | def main( function fit (line 285) | def fit( function validate (line 440) | def validate( function generate_example (line 461) | def generate_example(fabric: L.Fabric, model: GPT, tokenizer: Tokenizer,... function get_lr_scheduler (line 490) | def get_lr_scheduler(optimizer, warmup_steps: int, max_steps: int): function get_dataloaders (line 497) | def get_dataloaders( function get_longest_seq_length (line 510) | def get_longest_seq_length(data: List[Dict]) -> Tuple[int, int]: function parallelize_fn (line 518) | def parallelize_fn(model, device_mesh, activation_checkpointing=True): function save_lora_checkpoint (line 542) | def save_lora_checkpoint(fabric: L.Fabric, model: torch.nn.Module, file_... function validate_args (line 559) | def validate_args(train: TrainArgs, eval: EvalArgs) -> None: FILE: litgpt/finetune/lora_legacy.py function setup (line 49) | def setup( function main (line 183) | def main( function fit (line 278) | def fit( function validate (line 425) | def validate( function generate_example (line 446) | def generate_example(fabric: L.Fabric, model: GPT, tokenizer: Tokenizer,... function get_lr_scheduler (line 475) | def get_lr_scheduler(optimizer, warmup_steps: int, max_steps: int): function get_dataloaders (line 482) | def get_dataloaders( function get_longest_seq_length (line 495) | def get_longest_seq_length(data: List[Dict]) -> Tuple[int, int]: function save_lora_checkpoint (line 503) | def save_lora_checkpoint(fabric: L.Fabric, model: torch.nn.Module, file_... function validate_args (line 508) | def validate_args(train: TrainArgs, eval: EvalArgs) -> None: FILE: litgpt/generate/adapter.py function main (line 28) | def main( FILE: litgpt/generate/adapter_v2.py function main (line 28) | def main( FILE: litgpt/generate/base.py function multinomial_num_samples_1 (line 30) | def multinomial_num_samples_1(probs: torch.Tensor) -> torch.Tensor: function sample_top_p (line 38) | def sample_top_p(logits: torch.Tensor, top_p: float) -> torch.Tensor: function sample (line 53) | def sample( function next_token (line 76) | def next_token( function batched_sample (line 88) | def batched_sample(logits: list[torch.Tensor], kwargs: list[dict]) -> to... function batched_next_token (line 95) | def batched_next_token( function generate_fn (line 130) | def generate_fn( function batched_generate_fn (line 241) | def batched_generate_fn( function generate (line 374) | def generate( function main (line 431) | def main( FILE: litgpt/generate/full.py function main (line 27) | def main( FILE: litgpt/generate/sequentially.py function sequential (line 36) | def sequential(model: GPT, root: torch.device, max_seq_length: int, devi... function chunk_sizes (line 96) | def chunk_sizes(num_units: int, devices: int) -> List[int]: function layer_to_device (line 102) | def layer_to_device( function move_block_input (line 117) | def move_block_input(device: torch.device, module: torch.nn.Module, ins): function move_block_output (line 123) | def move_block_output(device: torch.device, module: torch.nn.Module, ins... function replace_device (line 128) | def replace_device(module: torch.nn.Module, replace: torch.device, by: t... function main (line 146) | def main( FILE: litgpt/generate/speculative_decoding.py function sample (line 32) | def sample( function speculative_decoding (line 60) | def speculative_decoding( function generate (line 172) | def generate( function setup_model (line 306) | def setup_model(config: Config, max_returned_tokens: int, fabric: L.Fabr... function load_model (line 319) | def load_model(checkpoint_dir: Path, fabric: L.Fabric) -> Tuple[Config, ... function main (line 329) | def main( FILE: litgpt/generate/tp.py function tensor_parallel_linear (line 33) | def tensor_parallel_linear(fabric: L.Fabric, linear: torch.nn.Linear, st... function tensor_parallel_mlp (line 53) | def tensor_parallel_mlp(fabric: L.Fabric, mlp: Union[GptNeoxMLP, LLaMAML... function tensor_parallel_attn (line 72) | def tensor_parallel_attn(fabric: L.Fabric, attn: CausalSelfAttention) ->... function all_reduce_output (line 78) | def all_reduce_output(world_size: int, module: torch.nn.Module, ins, out... function tensor_parallel (line 84) | def tensor_parallel(fabric: L.Fabric, model: GPT) -> GPT: function main (line 103) | def main( FILE: litgpt/lora.py class LoRALayer (line 64) | class LoRALayer(nn.Module): method __init__ (line 65) | def __init__(self, r: int, lora_alpha: int, lora_dropout: float): class LoRALinear (line 89) | class LoRALinear(LoRALayer): method __init__ (line 91) | def __init__( method reset_parameters (line 130) | def reset_parameters(self) -> None: method get_lora_AB (line 138) | def get_lora_AB(self) -> torch.Tensor: method merge (line 142) | def merge(self) -> None: method forward (line 165) | def forward(self, x: torch.Tensor) -> torch.Tensor: class LoRAQKVLinear (line 175) | class LoRAQKVLinear(LoRALinear): method __init__ (line 177) | def __init__( method lora_ind (line 265) | def lora_ind(self) -> torch.Tensor: method zero_pad (line 285) | def zero_pad(self, x: torch.Tensor) -> torch.Tensor: method conv1d (line 325) | def conv1d(self, input: torch.Tensor, weight: torch.Tensor) -> torch.T... method get_lora_AB (line 361) | def get_lora_AB(self) -> torch.Tensor: method merge (line 373) | def merge(self) -> None: method forward (line 378) | def forward(self, x: torch.Tensor) -> torch.Tensor: function mark_only_lora_as_trainable (line 414) | def mark_only_lora_as_trainable(model: nn.Module, bias: str = "none") ->... function lora_filter (line 447) | def lora_filter(key: str, value: Any) -> bool: class Config (line 452) | class Config(BaseConfig): method mlp_class (line 475) | def mlp_class(self) -> Type: class GPT (line 479) | class GPT(BaseModel): method __init__ (line 481) | def __init__(self, config: Config) -> None: method from_name (line 504) | def from_name(cls, name: str, **kwargs: Any) -> Self: method _init_weights (line 507) | def _init_weights(self, module: nn.Module) -> None: method _load_from_state_dict (line 513) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... class Block (line 520) | class Block(BaseBlock): method __init__ (line 521) | def __init__(self, config: Config, block_idx: int) -> None: class CausalSelfAttention (line 527) | class CausalSelfAttention(BaseCausalSelfAttention): method __init__ (line 528) | def __init__(self, config: Config, block_idx: int) -> None: method _load_from_state_dict (line 553) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... function create_lora_linear (line 572) | def create_lora_linear( class GptNeoxMLP (line 593) | class GptNeoxMLP(litgpt.model.GptNeoxMLP): method __init__ (line 594) | def __init__(self, config: Config) -> None: method _load_from_state_dict (line 600) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... class LLaMAMLP (line 612) | class LLaMAMLP(litgpt.model.LLaMAMLP): method __init__ (line 613) | def __init__(self, config: Config, intermediate_size: Optional[int] = ... method _load_from_state_dict (line 621) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... class GemmaMLP (line 635) | class GemmaMLP(LLaMAMLP): method forward (line 636) | def forward(self, x: torch.Tensor) -> torch.Tensor: class LLaMAMoE (line 643) | class LLaMAMoE(litgpt.model.LLaMAMoE): method __init__ (line 644) | def __init__(self, config: Config) -> None: method _load_from_state_dict (line 652) | def _load_from_state_dict(self, state_dict: Dict, prefix: str, *args: ... function merge_lora_weights (line 659) | def merge_lora_weights(model: GPT) -> None: FILE: litgpt/model.py class GPT (line 22) | class GPT(nn.Module): method __init__ (line 23) | def __init__(self, config: Config) -> None: method max_seq_length (line 40) | def max_seq_length(self) -> int: method max_seq_length (line 44) | def max_seq_length(self, value: int) -> None: method reset_parameters (line 70) | def reset_parameters(self) -> None: method _init_weights (line 74) | def _init_weights(self, module: nn.Module) -> None: method forward (line 85) | def forward( method from_name (line 184) | def from_name(cls, name: str, **kwargs: Any) -> Self: method rope_cache (line 187) | def rope_cache(self, device: Optional[torch.device] = None) -> Tuple[t... method rope_cache_length (line 261) | def rope_cache_length(self) -> int: method set_kv_cache (line 274) | def set_kv_cache( method clear_kv_cache (line 303) | def clear_kv_cache(self) -> None: class Block (line 309) | class Block(nn.Module): method __init__ (line 310) | def __init__( method forward (line 345) | def forward( class CausalSelfAttention (line 390) | class CausalSelfAttention(nn.Module): method __init__ (line 391) | def __init__(self, config: Config, block_idx: int) -> None: method forward (line 430) | def forward( method scaled_dot_product_attention (line 576) | def scaled_dot_product_attention( method build_kv_cache (line 598) | def build_kv_cache( method _load_from_state_dict (line 637) | def _load_from_state_dict(self, state_dict: dict, prefix: str, *args: ... class MultiheadLatentAttention (line 649) | class MultiheadLatentAttention(nn.Module): method __init__ (line 650) | def __init__(self, config: Config, block_idx: int) -> None: method forward (line 685) | def forward( method scaled_dot_product_attention (line 763) | def scaled_dot_product_attention( method build_kv_cache (line 785) | def build_kv_cache( class GptNeoxMLP (line 804) | class GptNeoxMLP(nn.Module): method __init__ (line 805) | def __init__(self, config: Config, intermediate_size: Optional[int] = ... method forward (line 812) | def forward(self, x: torch.Tensor) -> torch.Tensor: class LLaMAMLP (line 818) | class LLaMAMLP(nn.Module): method __init__ (line 819) | def __init__(self, config: Config, intermediate_size: Optional[int] = ... method forward (line 827) | def forward(self, x: torch.Tensor) -> torch.Tensor: class GemmaMLP (line 834) | class GemmaMLP(LLaMAMLP): method forward (line 835) | def forward(self, x: torch.Tensor) -> torch.Tensor: class LLaMAMoE (line 842) | class LLaMAMoE(nn.Module): method __init__ (line 843) | def __init__(self, config: Config) -> None: method forward (line 859) | def forward(self, x: torch.Tensor) -> torch.Tensor: class GroupedTopkRouter (line 888) | class GroupedTopkRouter(nn.Module): method __init__ (line 894) | def __init__(self, config: Config) -> None: method get_topk_indices (line 901) | def get_topk_indices(self, scores: torch.Tensor) -> torch.Tensor: method forward (line 921) | def forward(self, x: torch.Tensor) -> torch.Tensor: function yarn_get_mscale (line 933) | def yarn_get_mscale(scale=1, mscale=1): function build_rope_cache (line 939) | def build_rope_cache( function batched_index_select (line 1075) | def batched_index_select(t, dim, idx): function batched_index_copy_ (line 1094) | def batched_index_copy_(t, dim, idx, val): function apply_rope (line 1144) | def apply_rope(x: torch.Tensor, cos: torch.Tensor, sin: torch.Tensor) ->... function apply_rope_interleave (line 1176) | def apply_rope_interleave(x: torch.Tensor, cos: torch.Tensor, sin: torch... function do_softcapping (line 1217) | def do_softcapping(x: torch.Tensor, thresh: float) -> torch.Tensor: class KVCache (line 1221) | class KVCache(nn.Module): method __init__ (line 1227) | def __init__( method forward (line 1243) | def forward(self, input_pos: torch.Tensor, k: torch.Tensor, v: torch.T... method reset_parameters (line 1283) | def reset_parameters(self) -> None: function build_mask_cache (line 1288) | def build_mask_cache(max_seq_length: int, device: Optional[torch.device]... class RMSNorm (line 1293) | class RMSNorm(torch.nn.Module): method __init__ (line 1300) | def __init__(self, size: int, dim: int = -1, eps: float = 1e-6, add_un... method forward (line 1307) | def forward(self, x: torch.Tensor) -> torch.Tensor: method reset_parameters (line 1316) | def reset_parameters(self) -> None: FILE: litgpt/parser_config.py function parser_commands (line 8) | def parser_commands() -> List[str]: function save_hyperparameters (line 34) | def save_hyperparameters( FILE: litgpt/pretrain.py function setup (line 49) | def setup( function main (line 177) | def main( function fit (line 288) | def fit( function validate (line 427) | def validate( function get_dataloaders (line 451) | def get_dataloaders( function get_lr (line 464) | def get_lr(learning_rate: float, it: int, warmup_iters: int, max_iters: ... function initialize_weights (line 478) | def initialize_weights(fabric: L.Fabric, model: GPT, n_layer: int, n_emb... function save_checkpoint (line 500) | def save_checkpoint(fabric, state, tokenizer_dir, checkpoint_file): function validate_args (line 512) | def validate_args(train: TrainArgs, eval: EvalArgs, initial_checkpoint_d... FILE: litgpt/prompts.py class PromptStyle (line 17) | class PromptStyle: method apply (line 21) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw... method stop_tokens (line 24) | def stop_tokens(self, tokenizer: "Tokenizer") -> Tuple[List[int], ...]: method from_name (line 28) | def from_name(cls, name: str) -> "PromptStyle": method from_config (line 32) | def from_config(cls, config: Config) -> "PromptStyle": class Default (line 36) | class Default(PromptStyle): method apply (line 37) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw... method stop_tokens (line 40) | def stop_tokens(self, tokenizer: "Tokenizer") -> Tuple[List[int], ...]: class Alpaca (line 44) | class Alpaca(PromptStyle): method apply (line 45) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw... class FLAN (line 60) | class FLAN(PromptStyle): method apply (line 61) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw... class Longform (line 69) | class Longform(PromptStyle): method apply (line 70) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw... class StableLMAlpha (line 78) | class StableLMAlpha(PromptStyle): method apply (line 79) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw... method stop_tokens (line 89) | def stop_tokens(self, tokenizer: "Tokenizer") -> Tuple[List[int], ...]: class StableLMZephyr (line 98) | class StableLMZephyr(PromptStyle): method apply (line 99) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw... class Falcon (line 103) | class Falcon(PromptStyle): method apply (line 104) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw... method stop_tokens (line 107) | def stop_tokens(self, tokenizer: "Tokenizer") -> Tuple[List[int], ...]: class Falcon3 (line 117) | class Falcon3(PromptStyle): method apply (line 118) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw... method stop_tokens (line 121) | def stop_tokens(self, tokenizer: "Tokenizer") -> Tuple[List[int], ...]: class Llama2FunctionCalling (line 128) | class Llama2FunctionCalling(PromptStyle): method apply (line 129) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw... class Llama2 (line 155) | class Llama2(PromptStyle): method apply (line 156) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw... class Llama3 (line 170) | class Llama3(PromptStyle): method apply (line 171) | def apply( method stop_tokens (line 216) | def stop_tokens(self, tokenizer: "Tokenizer") -> Tuple[List[int], ...]: class R1Base (line 223) | class R1Base(PromptStyle): method apply (line 224) | def apply( method stop_tokens (line 265) | def stop_tokens(self, tokenizer: "Tokenizer") -> Tuple[List[int], ...]: class FreeWilly2 (line 272) | class FreeWilly2(PromptStyle): method apply (line 273) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw... class Platypus (line 278) | class Platypus(PromptStyle): method apply (line 279) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw... class StableCode (line 283) | class StableCode(PromptStyle): method apply (line 284) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw... class CodeLlama (line 288) | class CodeLlama(PromptStyle): method apply (line 289) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw... class Phi1 (line 300) | class Phi1(PromptStyle): method apply (line 301) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw... method stop_tokens (line 304) | def stop_tokens(self, tokenizer: "Tokenizer") -> Tuple[List[int], ...]: class Phi2 (line 315) | class Phi2(PromptStyle): method apply (line 316) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw... class Phi3 (line 320) | class Phi3(PromptStyle): method apply (line 321) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw... class Phi4 (line 326) | class Phi4(PromptStyle): method apply (line 327) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw... class Phi4Reasoning (line 335) | class Phi4Reasoning(PromptStyle): method apply (line 336) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw... class Phi4Mini (line 344) | class Phi4Mini(PromptStyle): method apply (line 345) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw... class Phi4MiniReasoning (line 353) | class Phi4MiniReasoning(PromptStyle): method apply (line 354) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw... class TinyLlama (line 359) | class TinyLlama(PromptStyle): method apply (line 360) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw... class Gemma (line 365) | class Gemma(PromptStyle): method apply (line 366) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw... class OLMo (line 370) | class OLMo(PromptStyle): method apply (line 371) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw... class ChatML (line 375) | class ChatML(PromptStyle): method __init__ (line 376) | def __init__(self, system_message: Optional[str] = None): method apply (line 379) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw... class Qwen2_5 (line 386) | class Qwen2_5(ChatML): method __init__ (line 387) | def __init__(self): class Qwen2_5_Math (line 391) | class Qwen2_5_Math(ChatML): method __init__ (line 392) | def __init__(self): class QwQ (line 396) | class QwQ(ChatML): method __init__ (line 397) | def __init__(self): class Qwen3 (line 403) | class Qwen3(ChatML): method __init__ (line 404) | def __init__(self): class SmolLM2 (line 408) | class SmolLM2(ChatML): method __init__ (line 409) | def __init__(self): class Salamandra (line 413) | class Salamandra(ChatML): method __init__ (line 414) | def __init__(self): function model_name_to_prompt_style (line 456) | def model_name_to_prompt_style(model_name: str) -> PromptStyle: function save_prompt_style (line 520) | def save_prompt_style(style: Union[str, PromptStyle], checkpoint_dir: Pa... function load_prompt_style (line 529) | def load_prompt_style(checkpoint_dir: Path) -> PromptStyle: function has_prompt_style (line 539) | def has_prompt_style(checkpoint_dir: Path) -> bool: FILE: litgpt/scripts/convert_hf_checkpoint.py function copy_weights_gpt_neox (line 28) | def copy_weights_gpt_neox( function copy_weights_falcon (line 81) | def copy_weights_falcon( function copy_weights_hf_llama (line 139) | def copy_weights_hf_llama( function copy_weights_gemma_2 (line 226) | def copy_weights_gemma_2( function copy_weights_gemma_3 (line 294) | def copy_weights_gemma_3( function copy_weights_phi (line 397) | def copy_weights_phi( function copy_weights_qwen_2_5 (line 493) | def copy_weights_qwen_2_5( function copy_weights_olmo2 (line 563) | def copy_weights_olmo2( function copy_weights_qwen_3 (line 642) | def copy_weights_qwen_3( function qkv_reassemble (line 727) | def qkv_reassemble( function layer_template (line 748) | def layer_template(layer_name: str, num_matches: int = 1) -> Tuple[str, ... function load_param (line 756) | def load_param( function convert_hf_checkpoint (line 772) | def convert_hf_checkpoint( FILE: litgpt/scripts/convert_lit_checkpoint.py function copy_weights_falcon (line 18) | def copy_weights_falcon( function copy_weights_gpt_neox (line 66) | def copy_weights_gpt_neox( function copy_weights_llama (line 103) | def copy_weights_llama( function copy_weights_gemma_2 (line 169) | def copy_weights_gemma_2( function copy_weights_gemma_3 (line 218) | def copy_weights_gemma_3( function copy_weights_phi (line 269) | def copy_weights_phi( function copy_weights_qwen_2_5 (line 348) | def copy_weights_qwen_2_5( function copy_weights_olmo2 (line 396) | def copy_weights_olmo2( function copy_weights_qwen_3 (line 454) | def copy_weights_qwen_3( function qkv_reassemble (line 520) | def qkv_reassemble(param: Union[torch.Tensor, NotYetLoadedTensor], confi... function check_conversion_supported (line 538) | def check_conversion_supported(lit_weights: Dict[str, torch.Tensor]) -> ... function convert_lit_checkpoint (line 546) | def convert_lit_checkpoint(checkpoint_dir: Path, output_dir: Path) -> None: FILE: litgpt/scripts/convert_pretrained_checkpoint.py function convert_pretrained_checkpoint (line 12) | def convert_pretrained_checkpoint(checkpoint_dir: Path, output_dir: Path... FILE: litgpt/scripts/download.py function download_from_hub (line 14) | def download_from_hub( function find_weight_files (line 101) | def find_weight_files(repo_id: str, access_token: Optional[str]) -> Tupl... function gated_repo_catcher (line 114) | def gated_repo_catcher(repo_id: str, access_token: Optional[str]): FILE: litgpt/scripts/merge_lora.py function merge_lora (line 17) | def merge_lora( function load_lora_metadata (line 86) | def load_lora_metadata(checkpoint_dir: Path) -> Tuple[Dict[str, Any], Pa... FILE: litgpt/tokenizer.py class Tokenizer (line 12) | class Tokenizer: method __init__ (line 13) | def __init__(self, checkpoint_dir: Union[Path, str]) -> None: method vocab_size (line 73) | def vocab_size(self) -> int: method token_to_id (line 80) | def token_to_id(self, token: str) -> int: method check_if_bos_token_used (line 91) | def check_if_bos_token_used(self, checkpoint_dir: Path) -> bool: method encode (line 108) | def encode( method decode (line 144) | def decode(self, tensor: torch.Tensor) -> str: method decode_stream (line 155) | def decode_stream( FILE: litgpt/utils.py function init_out_dir (line 47) | def init_out_dir(out_dir: Path) -> Path: function find_resume_path (line 55) | def find_resume_path(resume: Union[bool, Literal["auto"], Path], out_dir... function num_parameters (line 69) | def num_parameters(module: nn.Module, requires_grad: Optional[bool] = No... function reset_parameters (line 81) | def reset_parameters(module: nn.Module) -> None: function check_valid_checkpoint_dir (line 88) | def check_valid_checkpoint_dir( class SavingProxyForStorage (line 138) | class SavingProxyForStorage: method __init__ (line 139) | def __init__(self, obj, saver, protocol_version=5): method __reduce_ex__ (line 162) | def __reduce_ex__(self, protocol_version): class SavingProxyForTensor (line 166) | class SavingProxyForTensor: method __init__ (line 167) | def __init__(self, tensor, saver, protocol_version=5): method __reduce_ex__ (line 186) | def __reduce_ex__(self, protocol_version): class IncrementalPyTorchPickler (line 192) | class IncrementalPyTorchPickler(pickle.Pickler): method __init__ (line 193) | def __init__(self, saver, *args, **kwargs): method persistent_id (line 200) | def persistent_id(self, obj): class incremental_save (line 248) | class incremental_save: method __init__ (line 249) | def __init__(self, name): method __enter__ (line 256) | def __enter__(self): method store_early (line 259) | def store_early(self, tensor): method save (line 264) | def save(self, obj): method _write_storage_and_return_key (line 275) | def _write_storage_and_return_key(self, storage): method __exit__ (line 294) | def __exit__(self, type, value, traceback): function chunked_cross_entropy (line 301) | def chunked_cross_entropy( function map_old_state_dict_weights (line 353) | def map_old_state_dict_weights(state_dict: Dict, mapping: Mapping, prefi... function get_default_supported_precision (line 362) | def get_default_supported_precision(training: bool) -> str: function load_checkpoint (line 382) | def load_checkpoint(fabric: L.Fabric, model: nn.Module, checkpoint_path:... function load_checkpoint_update (line 400) | def load_checkpoint_update( function load_from_full_model_state_dict (line 413) | def load_from_full_model_state_dict( function flops_per_param (line 448) | def flops_per_param(max_seq_length: int, n_layer: int, n_embd: int, n_pa... function estimate_flops (line 457) | def estimate_flops(model: "GPT", training: bool) -> int: class CycleIterator (line 481) | class CycleIterator: method __init__ (line 493) | def __init__(self, iterable: Iterable) -> None: method __next__ (line 498) | def __next__(self) -> Any: method __iter__ (line 508) | def __iter__(self) -> Self: function copy_config_files (line 512) | def copy_config_files(source_dir: Path, out_dir: Path) -> None: function CLI (line 524) | def CLI(*args: Any, **kwargs: Any) -> Any: function capture_hparams (line 533) | def capture_hparams() -> Dict[str, Any]: function save_config (line 548) | def save_config(config: "Config", checkpoint_dir: Path) -> None: function parse_devices (line 554) | def parse_devices(devices: Union[str, int]) -> int: function choose_logger (line 562) | def choose_logger( function get_argument_names (line 609) | def get_argument_names(cls): function instantiate_bnb_optimizer (line 618) | def instantiate_bnb_optimizer(optimizer, model_parameters): function instantiate_torch_optimizer (line 635) | def instantiate_torch_optimizer(optimizer, model_parameters, **kwargs): function extend_checkpoint_dir (line 670) | def extend_checkpoint_dir(checkpoint_dir: Path) -> Path: function check_file_size_on_cpu_and_warn (line 681) | def check_file_size_on_cpu_and_warn(checkpoint_path, device, size_limit=... function auto_download_checkpoint (line 697) | def auto_download_checkpoint(model_name, access_token=None, ignore_token... function check_nvlink_connectivity (line 718) | def check_nvlink_connectivity(fabric=None): function _check_nvidia_connectivity (line 746) | def _check_nvidia_connectivity(custom_print): function _check_amd_connectivity (line 781) | def _check_amd_connectivity(custom_print): function fix_and_load_json (line 827) | def fix_and_load_json(s): function create_finetuning_performance_report (line 844) | def create_finetuning_performance_report(training_time, token_counts, de... function select_sft_generate_example (line 868) | def select_sft_generate_example(eval, data): function _RunIf (line 897) | def _RunIf(thunder: bool = False, **kwargs): function kill_process_tree (line 910) | def kill_process_tree(pid: int): FILE: tests/conftest.py function fake_checkpoint_dir (line 23) | def fake_checkpoint_dir(tmp_path): class TensorLike (line 34) | class TensorLike: method __eq__ (line 35) | def __eq__(self, other): function tensor_like (line 40) | def tensor_like(): class FloatLike (line 44) | class FloatLike: method __eq__ (line 45) | def __eq__(self, other): function float_like (line 50) | def float_like(): function restore_default_dtype (line 55) | def restore_default_dtype(): function destroy_process_group (line 61) | def destroy_process_group(): function turn_off_tf32_and_set_seed (line 71) | def turn_off_tf32_and_set_seed(monkeypatch): class MockTokenizer (line 78) | class MockTokenizer: method encode (line 84) | def encode(self, text: str, bos: Optional[bool] = None, eos: bool = Fa... method decode (line 94) | def decode(self, tokens: torch.Tensor) -> str: function mock_tokenizer (line 99) | def mock_tokenizer(): function alpaca_path (line 104) | def alpaca_path(tmp_path): function dolly_path (line 111) | def dolly_path(tmp_path): function longform_path (line 118) | def longform_path(tmp_path): function pytest_collection_modifyitems (line 128) | def pytest_collection_modifyitems(items: List[pytest.Function], config: ... FILE: tests/convert/test_hf_checkpoint.py function test_llama2_70b_conversion (line 12) | def test_llama2_70b_conversion(): function test_convert_hf_checkpoint (line 105) | def test_convert_hf_checkpoint(tmp_path, model_name): function test_qkv_reassemble (line 125) | def test_qkv_reassemble(): FILE: tests/convert/test_lit_checkpoint.py function test_convert_lit_checkpoint (line 42) | def test_convert_lit_checkpoint(tmp_path, model_name): function test_against_falcon_40b (line 64) | def test_against_falcon_40b(): function test_against_original_gpt_neox (line 94) | def test_against_original_gpt_neox(): function test_against_hf_llama2 (line 133) | def test_against_hf_llama2(ours_kwargs): function test_against_mixtral (line 167) | def test_against_mixtral(model_name): function test_against_olmo (line 209) | def test_against_olmo(model_name): function test_against_original_open_llama_3b (line 252) | def test_against_original_open_llama_3b(): function test_against_hf_phi (line 281) | def test_against_hf_phi(model_name): function test_against_hf_phi_3 (line 316) | def test_against_hf_phi_3(model_name): function test_against_original_stablelm_zephyr_3b (line 354) | def test_against_original_stablelm_zephyr_3b(): function test_against_original_gemma (line 402) | def test_against_original_gemma(model_name, device, dtype): function test_against_original_gemma_2 (line 462) | def test_against_original_gemma_2(model_name, device, dtype): function test_against_original_gemma_3 (line 535) | def test_against_original_gemma_3(model_name, device, dtype): function test_check_conversion_supported_adapter (line 590) | def test_check_conversion_supported_adapter(): function test_check_conversion_supported_lora (line 600) | def test_check_conversion_supported_lora(): function test_against_original_qwen_2_5 (line 634) | def test_against_original_qwen_2_5(model_name, device, dtype): function test_qkv_reassemble (line 681) | def test_qkv_reassemble(): FILE: tests/convert/test_pretrained_checkpoint.py function test_convert_pretrained_checkpoint (line 10) | def test_convert_pretrained_checkpoint(tmp_path, fake_checkpoint_dir): FILE: tests/data/test_alpaca.py function test_alpaca (line 6) | def test_alpaca(mock_tokenizer, alpaca_path): FILE: tests/data/test_base.py function test_sft_dataset (line 15) | def test_sft_dataset(max_seq_length, ignore_index, mask_prompt, mock_tok... function test_sft_collate_fn_padding (line 49) | def test_sft_collate_fn_padding(pad_id, ignore_index): function test_sft_collate_fn_truncation (line 74) | def test_sft_collate_fn_truncation(): FILE: tests/data/test_deita.py function test_format_dataset (line 9) | def test_format_dataset(): function test_deita (line 47) | def test_deita(_, format_dataset_mock, mock_tokenizer, tmp_path): FILE: tests/data/test_json.py function test_json (line 12) | def test_json(as_jsonl, tmp_path, mock_tokenizer): function test_json_input_validation (line 69) | def test_json_input_validation(tmp_path): function test_json_with_splits (line 95) | def test_json_with_splits(as_jsonl, tmp_path, mock_tokenizer): FILE: tests/data/test_lit_data.py function test_input_dir_and_splits (line 13) | def test_input_dir_and_splits(dl_mock, tmp_path): function test_dataset_args (line 42) | def test_dataset_args(streaming_dataloader_mock, streaming_dataset_mock,... FILE: tests/data/test_longform.py function test_longform (line 6) | def test_longform(mock_tokenizer, longform_path): FILE: tests/data/test_openwebtext.py function test_openwebtext (line 17) | def test_openwebtext(_, __, optimize_mock, tmp_path, mock_tokenizer): FILE: tests/data/test_textfiles.py class Tokenizer (line 10) | class Tokenizer: method encode (line 13) | def encode(self, text, bos, eos): function tokenize (line 19) | def tokenize(data): function fake_chunk (line 24) | def fake_chunk(path, data): function test_textfiles_datamodule (line 35) | def test_textfiles_datamodule(tmp_path): class MockTokenizer (line 71) | class MockTokenizer: method encode (line 76) | def encode(self, text, bos=True, eos=False, device=None, max_length=-1): method decode (line 87) | def decode(self, tensor): method decode_stream (line 99) | def decode_stream(self, token_stream, device=None): method vocab_size (line 104) | def vocab_size(self): function test_textfiles_token_loader (line 108) | def test_textfiles_token_loader(tmp_path): FILE: tests/data/test_tinyllama.py function test_tinyllama (line 12) | def test_tinyllama(_, tmp_path): FILE: tests/data/test_tinystories.py function tokenize (line 10) | def tokenize(data): function fake_chunk (line 15) | def fake_chunk(path, data): function test_pretok_dataset (line 35) | def test_pretok_dataset(tmp_path, max_seq_len, expected): function test_tokenize (line 47) | def test_tokenize(tmp_path, monkeypatch): function test_tinystories_datamodule (line 70) | def test_tinystories_datamodule(tmp_path): FILE: tests/ext_thunder/test_thunder_distributed.py function test_thunder_strategy_ddp_input_parsing (line 24) | def test_thunder_strategy_ddp_input_parsing(): function test_no_backward_sync_thunder (line 32) | def test_no_backward_sync_thunder(choice): function test_jit_ddp_before_setup (line 81) | def test_jit_ddp_before_setup(jit): function test_strategy_ddp_setup_already_traced (line 98) | def test_strategy_ddp_setup_already_traced(): function test_thunder_strategy_fsdp_input_parsing (line 114) | def test_thunder_strategy_fsdp_input_parsing(): function test_save_checkpoint_invalid_settings_raise (line 127) | def test_save_checkpoint_invalid_settings_raise(tmp_path): class Submodule (line 160) | class Submodule(torch.nn.Module): method __init__ (line 161) | def __init__(self, h: int): method forward (line 165) | def forward(self, x): class MyModel (line 170) | class MyModel(torch.nn.Module): method __init__ (line 171) | def __init__(self, h: int): method forward (line 177) | def forward(self): method reset_parameters (line 181) | def reset_parameters(self): function test_materialize_meta_tensors (line 187) | def test_materialize_meta_tensors(): class StatefulThing (line 203) | class StatefulThing: method state_dict (line 204) | def state_dict(self): method load_state_dict (line 207) | def load_state_dict(self, state_dict): class TensorLike (line 211) | class TensorLike: method __init__ (line 212) | def __init__(self, device: Optional[Union[str, torch.device]] = None, ... method __eq__ (line 216) | def __eq__(self, other): function test_save_load_full_checkpoint (line 226) | def test_save_load_full_checkpoint(tmp_path): function test_load_full_checkpoint_only_model (line 278) | def test_load_full_checkpoint_only_model(tmp_path): function distributed_ckpt_to_regular (line 312) | def distributed_ckpt_to_regular(path): function test_save_load_sharded_checkpoint (line 348) | def test_save_load_sharded_checkpoint(tmp_path): function test_jit_fsdp_before_setup (line 403) | def test_jit_fsdp_before_setup(jit): function test_strategy_fsdp_setup_already_traced (line 420) | def test_strategy_fsdp_setup_already_traced(): FILE: tests/ext_thunder/test_thunder_pretrain.py function test_pretrain_thunder (line 19) | def test_pretrain_thunder(tmp_path, monkeypatch): FILE: tests/ext_thunder/test_unsloth_executor.py function test_unsloth_cross_entropy (line 11) | def test_unsloth_cross_entropy(reduction): function test_unsloth_rope (line 46) | def test_unsloth_rope(): function test_unsloth_swiglu (line 76) | def test_unsloth_swiglu(): function test_unsloth_gpt (line 106) | def test_unsloth_gpt(): FILE: tests/generate/test_adapter.py function test_main (line 23) | def test_main(fake_checkpoint_dir, monkeypatch, version, tensor_like): function test_cli (line 72) | def test_cli(version): FILE: tests/generate/test_main.py function test_generate (line 29) | def test_generate(max_seq_length): function test_main (line 61) | def test_main(fake_checkpoint_dir, monkeypatch, tensor_like): function test_cli (line 105) | def test_cli(): function test_sample (line 113) | def test_sample(temperature): function test_generate_different_results_with_different_top_p (line 129) | def test_generate_different_results_with_different_top_p(): FILE: tests/generate/test_sequentially.py function test_layer_to_device (line 40) | def test_layer_to_device(n_layer, devices, expected): function path_to_device (line 50) | def path_to_device(model): function test_replace_device (line 54) | def test_replace_device(): function _test_model_1device (line 98) | def _test_model_1device(accelerator): function test_model_1device_cuda (line 145) | def test_model_1device_cuda(): function test_model_1device_cpu (line 149) | def test_model_1device_cpu(): function test_model_forward_hooks (line 154) | def test_model_forward_hooks(): function test_base_with_sequentially (line 269) | def test_base_with_sequentially(tmp_path): function test_cli (line 296) | def test_cli(): FILE: tests/generate/test_tp.py function test_tensor_parallel_linear (line 19) | def test_tensor_parallel_linear(): function test_tensor_parallel_llama (line 87) | def test_tensor_parallel_llama(name, expected): function test_tp (line 110) | def test_tp(tmp_path): function test_cli (line 136) | def test_cli(): FILE: tests/generate/utils.py function find_forward_hooks (line 4) | def find_forward_hooks(module): FILE: tests/test_adapter.py function test_config_identical (line 32) | def test_config_identical(): function test_adapter_filter (line 46) | def test_adapter_filter(tmp_path): function test_adapter_script (line 63) | def test_adapter_script(tmp_path, fake_checkpoint_dir, monkeypatch, alpa... function test_adapter_gpt_init_weights (line 110) | def test_adapter_gpt_init_weights(): function test_adapter_compile (line 124) | def test_adapter_compile(): function test_adapter_bitsandbytes (line 143) | def test_adapter_bitsandbytes(monkeypatch, tmp_path, fake_checkpoint_dir... function test_against_hf_gemma (line 255) | def test_against_hf_gemma(model_name): function test_against_original_gemma_2 (line 312) | def test_against_original_gemma_2(model_name, device, dtype): function test_against_original_gemma_3 (line 383) | def test_against_original_gemma_3(model_name, device, dtype): function test_load_legacy_state_dict (line 436) | def test_load_legacy_state_dict(): FILE: tests/test_adapter_v2.py function test_config_identical (line 33) | def test_config_identical(): function test_adapter_v2_filter (line 45) | def test_adapter_v2_filter(tmp_path): function test_adapter_v2_script (line 80) | def test_adapter_v2_script(tmp_path, fake_checkpoint_dir, monkeypatch, a... function test_adapter_v2_gpt_init_weights (line 127) | def test_adapter_v2_gpt_init_weights(): function test_base_model_can_be_adapter_v2_loaded (line 140) | def test_base_model_can_be_adapter_v2_loaded(name): function test_adapter_v2_compile (line 153) | def test_adapter_v2_compile(): function test_against_hf_mixtral (line 172) | def test_against_hf_mixtral(): function test_against_hf_gemma (line 218) | def test_against_hf_gemma(model_name): function test_against_original_gemma_2 (line 262) | def test_against_original_gemma_2(model_name): function test_against_original_gemma_3 (line 326) | def test_against_original_gemma_3(model_name): function test_adapter_v2_bitsandbytes (line 386) | def test_adapter_v2_bitsandbytes(monkeypatch, tmp_path, fake_checkpoint_... function test_load_legacy_state_dict (line 542) | def test_load_legacy_state_dict(): FILE: tests/test_api.py function mock_llm (line 33) | def mock_llm(): function test_load_model (line 43) | def test_load_model(mock_llm): function test_generate (line 52) | def test_generate(mock_llm): function test_stream_generate (line 60) | def test_stream_generate(mock_llm): function test_generate_token_ids (line 73) | def test_generate_token_ids(mock_llm): function test_calculate_number_of_devices (line 83) | def test_calculate_number_of_devices(): function test_llm_load_random_init (line 89) | def test_llm_load_random_init(tmp_path): function test_llm_load_hub_init (line 115) | def test_llm_load_hub_init(tmp_path): function test_model_not_initialized (line 128) | def test_model_not_initialized(tmp_path): function test_more_than_1_device_for_sequential_gpu (line 141) | def test_more_than_1_device_for_sequential_gpu(tmp_path): function test_more_than_1_device_for_tensor_parallel_gpu (line 174) | def test_more_than_1_device_for_tensor_parallel_gpu(tmp_path): function test_sequential_tp_incompatibility_with_random_weights (line 188) | def test_sequential_tp_incompatibility_with_random_weights(strategy, tmp... function test_sequential_tp_cpu (line 201) | def test_sequential_tp_cpu(strategy, tmp_path): function test_initialization_for_trainer (line 213) | def test_initialization_for_trainer(tmp_path): function test_quantization_is_applied (line 225) | def test_quantization_is_applied(tmp_path): function test_fixed_kv_cache (line 236) | def test_fixed_kv_cache(tmp_path): function test_invalid_accelerator (line 248) | def test_invalid_accelerator(tmp_path): function test_returned_benchmark_dir (line 254) | def test_returned_benchmark_dir(tmp_path): function test_benchmark_dict_to_markdown_table_single_values (line 276) | def test_benchmark_dict_to_markdown_table_single_values(): function test_benchmark_dict_to_markdown_table_multiple_values (line 298) | def test_benchmark_dict_to_markdown_table_multiple_values(): function test_state_dict (line 364) | def test_state_dict(tmp_path): function test_save_method (line 373) | def test_save_method(tmp_path): function test_forward_method (line 397) | def test_forward_method(tmp_path): function test_precision_selection (line 411) | def test_precision_selection(tmp_path): FILE: tests/test_args.py function test_compute_warmup_iters (line 7) | def test_compute_warmup_iters(): FILE: tests/test_batch.py function create_llm (line 22) | def create_llm(tmp_path, batch_size, max_seq_length, device) -> tuple[LL... function test_batched_equivalence (line 40) | def test_batched_equivalence(tmp_path): function test_simple_batch (line 94) | def test_simple_batch(): function test_batch_generate (line 133) | def test_batch_generate(tmp_path): function test_batch_generate_equivalence (line 257) | def test_batch_generate_equivalence(tmp_path): FILE: tests/test_chat.py function test_generate (line 39) | def test_generate(monkeypatch, generated, stop_tokens, expected): function test_decode (line 69) | def test_decode(): function test_main (line 94) | def test_main(mocked_input, stop_iteration, fake_checkpoint_dir, monkeyp... function test_cli (line 134) | def test_cli(): function test_merge_lora_if_needed (line 144) | def test_merge_lora_if_needed(mocked_merge_lora, mocked_input, fake_chec... function test_litgpt_chat_endtoend (line 166) | def test_litgpt_chat_endtoend(): function test_litgpt_generate_endtoend (line 191) | def test_litgpt_generate_endtoend(): FILE: tests/test_ci.py function test_gpu_ci_installs_bitsandbytes (line 9) | def test_gpu_ci_installs_bitsandbytes(): FILE: tests/test_cli.py function test_cli (line 12) | def test_cli(): function test_pretrain_allows_max_steps (line 60) | def test_pretrain_allows_max_steps(): function test_rewrite_finetune_command (line 79) | def test_rewrite_finetune_command(): FILE: tests/test_config.py function test_config (line 11) | def test_config(): function test_from_hf_name (line 29) | def test_from_hf_name(): function test_nonexisting_name (line 39) | def test_nonexisting_name(): function test_short_and_hf_names_are_equal_unless_on_purpose (line 45) | def test_short_and_hf_names_are_equal_unless_on_purpose(config): function test_from_hf_name_with_org_string (line 53) | def test_from_hf_name_with_org_string(): function test_from_checkpoint (line 72) | def test_from_checkpoint(tmp_path): function test_head_size (line 103) | def test_head_size(head_size): function test_find_multiple (line 109) | def test_find_multiple(): FILE: tests/test_config_hub.py function test_config_help (line 39) | def test_config_help(script_file, config_file, monkeypatch): FILE: tests/test_deepseek_moe.py function test_deepseek_moe_litgpt_vs_hf (line 15) | def test_deepseek_moe_litgpt_vs_hf(batch_size, seq_len, device): function sync_weights (line 94) | def sync_weights(litgpt_model, hf_model): FILE: tests/test_distributed.py function test_no_backward_sync (line 10) | def test_no_backward_sync(strategy): FILE: tests/test_evaluate.py function test_evaluate_script (line 19) | def test_evaluate_script(tmp_path): function test_cli (line 72) | def test_cli(): FILE: tests/test_full.py function test_full_script (line 18) | def test_full_script(tmp_path, fake_checkpoint_dir, monkeypatch, alpaca_... FILE: tests/test_generate_speculatively.py function test_speculative_decoding_target_never_accepts_draft_tokens (line 19) | def test_speculative_decoding_target_never_accepts_draft_tokens(): function test_speculative_decoding_target_always_accepts_draft_tokens (line 45) | def test_speculative_decoding_target_always_accepts_draft_tokens(): function test_speculative_decoding_target_sometimes_accepts_draft_tokens (line 71) | def test_speculative_decoding_target_sometimes_accepts_draft_tokens(): function test_generate (line 106) | def test_generate(max_seq_length, speculative_k): function test_main (line 130) | def test_main(fake_checkpoint_dir, monkeypatch, tensor_like): function test_cli (line 211) | def test_cli(): FILE: tests/test_lora.py function test_lora_layer_replacement (line 45) | def test_lora_layer_replacement(): function test_lora_merge (line 55) | def test_lora_merge(): function test_lora_mqa_gqa (line 99) | def test_lora_mqa_gqa(): function test_lora_ind_correctness (line 186) | def test_lora_ind_correctness(n_head, n_query_groups, enable_lora): function test_lora_filter (line 227) | def test_lora_filter(tmp_path): function test_lora_script (line 246) | def test_lora_script(tmp_path, fake_checkpoint_dir, monkeypatch, alpaca_... function test_lora_init_when_linear_overridden (line 293) | def test_lora_init_when_linear_overridden(): function test_lora_linear_utilization (line 318) | def test_lora_linear_utilization(apply_to, target_layer_names, mlp_class... function test_lora_gpt_apply_lora_forward_no_exception (line 354) | def test_lora_gpt_apply_lora_forward_no_exception(apply_to): function test_lora_gpt_query_groups_merge_and_forward_no_exception (line 368) | def test_lora_gpt_query_groups_merge_and_forward_no_exception(n_query_gr... function test_lora_qkv_linear_compare_conv1d (line 406) | def test_lora_qkv_linear_compare_conv1d(head_size, n_head, enable_lora): function test_lora_linear_weights_merged_status (line 430) | def test_lora_linear_weights_merged_status(rank, expected_merged): function test_lora_qkv_linear_weights_merged_status (line 441) | def test_lora_qkv_linear_weights_merged_status(rank, enable_lora, expect... function test_lora_merge_with_bitsandbytes (line 450) | def test_lora_merge_with_bitsandbytes(): function test_lora_gpt_init_weights (line 517) | def test_lora_gpt_init_weights(): function test_base_model_can_be_lora_loaded (line 530) | def test_base_model_can_be_lora_loaded(name): function test_lora_compile (line 553) | def test_lora_compile(): function test_against_hf_mixtral (line 584) | def test_against_hf_mixtral(): function test_against_hf_gemma (line 635) | def test_against_hf_gemma(model_name): function test_against_original_gemma_2 (line 690) | def test_against_original_gemma_2(model_name): function test_against_original_gemma_3 (line 746) | def test_against_original_gemma_3(model_name): function test_lora_bitsandbytes (line 800) | def test_lora_bitsandbytes(monkeypatch, tmp_path, fake_checkpoint_dir, a... function test_lora_model_fsdp_init (line 924) | def test_lora_model_fsdp_init(): function test_zero_pad_cpu_and_mocked_mps (line 958) | def test_zero_pad_cpu_and_mocked_mps(): function test_load_legacy_state_dict (line 997) | def test_load_legacy_state_dict(): function test_parallelize_fn (line 1016) | def test_parallelize_fn(): function test_load_from_full_model_state_dict (line 1089) | def test_load_from_full_model_state_dict(): FILE: tests/test_merge_lora.py function test_merge_lora (line 24) | def test_merge_lora(tmp_path, fake_checkpoint_dir, pretrained_dtype, lor... function test_load_lora_metadata (line 77) | def test_load_lora_metadata(fake_checkpoint_dir): FILE: tests/test_model.py function test_against_gpt_neox_model (line 76) | def test_against_gpt_neox_model(rotary_pct, batch_size, n_embd, parallel... function test_against_hf_falcon (line 145) | def test_against_hf_falcon(kwargs, device, dtype): function test_against_original_open_llama_3b (line 191) | def test_against_original_open_llama_3b(device, dtype): function test_against_hf_llama_2_and_3 (line 255) | def test_against_hf_llama_2_and_3(ours_kwargs, device, dtype): function test_against_hf_phi (line 304) | def test_against_hf_phi(model_name, device, dtype): function test_against_hf_phi_3 (line 364) | def test_against_hf_phi_3(model_name, device, dtype): function test_against_mistral_hf_models (line 429) | def test_against_mistral_hf_models(device, dtype, model_name): function test_against_mathstral_hf_models (line 493) | def test_against_mathstral_hf_models(device, dtype): function test_against_hf_mixtral (line 538) | def test_against_hf_mixtral(model_name): function test_against_olmo (line 599) | def test_against_olmo(model_name, device, dtype): function test_against_olmo2 (line 658) | def test_against_olmo2(model_name, device, dtype): function test_against_original_stablelm_zephyr_3b (line 717) | def test_against_original_stablelm_zephyr_3b(device, dtype): function test_against_original_gemma (line 768) | def test_against_original_gemma(model_name, device, dtype): function test_against_original_gemma_2 (line 825) | def test_against_original_gemma_2(model_name, device, dtype): function test_against_original_gemma_3 (line 895) | def test_against_original_gemma_3(model_name, device, dtype): function test_against_multimodal_gemma_3 (line 966) | def test_against_multimodal_gemma_3(model_name, device, dtype): function test_against_original_qwen_2_5 (line 1040) | def test_against_original_qwen_2_5(model_name, device, dtype): function test_against_original_qwen_3 (line 1113) | def test_against_original_qwen_3(model_name, device, dtype): function test_against_original_qwen_3_moe (line 1174) | def test_against_original_qwen_3_moe(model_name, device, dtype): function test_against_original_salamandra (line 1240) | def test_against_original_salamandra(model_name, device, dtype): function test_against_original_smollm2 (line 1300) | def test_against_original_smollm2(model_name, device, dtype): function test_against_hf_falcon3 (line 1360) | def test_against_hf_falcon3(model_name, device, dtype): function test_model_compile (line 1404) | def test_model_compile(): function test_kv_cache (line 1427) | def test_kv_cache(max_seq_length): function test_model_kv_cache_amp (line 1458) | def test_model_kv_cache_amp(): function test_rope_cache_length (line 1469) | def test_rope_cache_length(model_name): function test_sdpa_choice (line 1491) | def test_sdpa_choice(config): function test_sdpa_choice_kv_cache (line 1543) | def test_sdpa_choice_kv_cache(config): function test_rope_init_under_fsdp (line 1595) | def test_rope_init_under_fsdp(): function test_reset_parameters_device (line 1614) | def test_reset_parameters_device(): function test_batched_index_copy_modes (line 1622) | def test_batched_index_copy_modes(): function test_load_legacy_state_dict (line 1684) | def test_load_legacy_state_dict(): function test_kv_cache_buffer_shape (line 1708) | def test_kv_cache_buffer_shape(n_query_groups): function test_rope_cos_sin_shapes_if_rope_n_elem_is_odd (line 1732) | def test_rope_cos_sin_shapes_if_rope_n_elem_is_odd(rotary_percentage, fi... function test_forward_with_without_input_pos_maxp1 (line 1748) | def test_forward_with_without_input_pos_maxp1(): FILE: tests/test_multihead_latent_attention.py function test_multihead_latent_attention_kv_cache (line 12) | def test_multihead_latent_attention_kv_cache(): function test_multihead_latent_attention_with_mask (line 40) | def test_multihead_latent_attention_with_mask(): function test_multihead_latent_attention_litgpt_vs_hf (line 78) | def test_multihead_latent_attention_litgpt_vs_hf(batch_size, seq_len, de... function sync_weights (line 139) | def sync_weights(litgpt_model, hf_model): FILE: tests/test_pretrain.py function test_optimizer_args (line 23) | def test_optimizer_args(_, tmp_path): function test_pretrain (line 49) | def test_pretrain(_, tmp_path): function test_initial_checkpoint_dir (line 93) | def test_initial_checkpoint_dir(_, load_mock, tmp_path): function test_initialize_weights (line 113) | def test_initialize_weights(strategy, expected): FILE: tests/test_prompts.py function test_default_prompt_style (line 23) | def test_default_prompt_style(mock_tokenizer): function test_sys_prompt (line 31) | def test_sys_prompt(mock_tokenizer, sys_prompt: Optional[str]): function test_sys_prompt_with_kwargs (line 41) | def test_sys_prompt_with_kwargs(mock_tokenizer, sys_prompt: Optional[str]): function test_prompt_style_from_name (line 50) | def test_prompt_style_from_name(): function test_prompt_style_from_config (line 55) | def test_prompt_style_from_config(): function test_apply_prompts (line 93) | def test_apply_prompts(): class CustomPromptStyle (line 104) | class CustomPromptStyle(PromptStyle): method apply (line 105) | def apply(self, prompt: str, *, sys_prompt: Optional[str] = None, **kw... function test_save_load_prompt_style (line 109) | def test_save_load_prompt_style(tmp_path): function test_multiturn_prompt (line 133) | def test_multiturn_prompt(): FILE: tests/test_readme.py function run_command (line 22) | def run_command(command): function _wait_and_check_response (line 37) | def _wait_and_check_response(waiting: int = 30): function test_download_model (line 54) | def test_download_model(): function test_download_books (line 71) | def test_download_books(): function test_chat_with_model (line 86) | def test_chat_with_model(): function test_chat_with_quantized_model (line 95) | def test_chat_with_quantized_model(): function test_finetune_model (line 105) | def test_finetune_model(tmp_path): function test_pretrain_model (line 151) | def test_pretrain_model(tmp_path): function test_continue_pretrain_model (line 188) | def test_continue_pretrain_model(tmp_path): function test_serve (line 220) | def test_serve(): FILE: tests/test_rope.py function test_rope_gptneox (line 14) | def test_rope_gptneox(): function test_rope_llama_2 (line 36) | def test_rope_llama_2(): function test_rope_llama_3 (line 82) | def test_rope_llama_3(): function test_rope_llama_3_1 (line 128) | def test_rope_llama_3_1(): function test_rope_llama_3_2 (line 181) | def test_rope_llama_3_2(): function test_rope_gemma_3 (line 234) | def test_rope_gemma_3(): function test_rope_cos_sin_shapes_if_rope_n_elem_is_odd (line 284) | def test_rope_cos_sin_shapes_if_rope_n_elem_is_odd(): FILE: tests/test_serve.py function _wait_and_check_response (line 22) | def _wait_and_check_response(waiting: int = 30): function test_simple (line 40) | def test_simple(tmp_path): function test_quantize (line 75) | def test_quantize(tmp_path): function test_multi_gpu_serve (line 110) | def test_multi_gpu_serve(tmp_path): function test_serve_with_openai_spec_missing_chat_template (line 145) | def test_serve_with_openai_spec_missing_chat_template(tmp_path): function test_serve_with_openai_spec (line 180) | def test_serve_with_openai_spec(tmp_path): function test_serve_with_generate_strategy (line 266) | def test_serve_with_generate_strategy(tmp_path, generate_strategy): FILE: tests/test_tokenizer.py function test_tokenizer_against_hf (line 21) | def test_tokenizer_against_hf(config, tmp_path): function test_tokenizer_input_validation (line 89) | def test_tokenizer_input_validation(): function test_tokenizer_bos_eos (line 99) | def test_tokenizer_bos_eos( FILE: tests/test_trainer_support.py class LitLLM (line 17) | class LitLLM(L.LightningModule): method __init__ (line 18) | def __init__(self, checkpoint_dir, tokenizer_dir=None, trainer_ckpt_pa... method setup (line 24) | def setup(self, stage): method training_step (line 27) | def training_step(self, batch): method validation_step (line 32) | def validation_step(self, batch): method configure_optimizers (line 37) | def configure_optimizers(self): function test_download_model (line 45) | def test_download_model(): function test_usecase1_pretraining_from_random_weights (line 51) | def test_usecase1_pretraining_from_random_weights(tmp_path): function test_usecase2_continued_pretraining_from_checkpoint (line 75) | def test_usecase2_continued_pretraining_from_checkpoint(tmp_path): function test_usecase3_resume_from_trainer_checkpoint (line 95) | def test_usecase3_resume_from_trainer_checkpoint(tmp_path): function test_usecase4_manually_save_and_resume (line 132) | def test_usecase4_manually_save_and_resume(tmp_path): FILE: tests/test_types.py function test_logger_types_match_constants (line 8) | def test_logger_types_match_constants(): FILE: tests/test_utils.py function test_check_valid_checkpoint_dir (line 55) | def test_check_valid_checkpoint_dir(tmp_path): function test_incremental_write (line 104) | def test_incremental_write(tmp_path): function test_chunked_cross_entropy (line 129) | def test_chunked_cross_entropy(ignore_index, B): function test_num_parameters (line 165) | def test_num_parameters(): function test_num_parameters_bitsandbytes (line 180) | def test_num_parameters_bitsandbytes(mode): function test_cycle_iterator (line 193) | def test_cycle_iterator(): function test_parse_devices (line 210) | def test_parse_devices(): function test_copy_config_files (line 228) | def test_copy_config_files(fake_checkpoint_dir, tmp_path): function test_capture_hparams (line 235) | def test_capture_hparams(): function _test_function (line 255) | def _test_function(out_dir: Path, foo: bool = False, bar: int = 1): function test_save_hyperparameters (line 259) | def test_save_hyperparameters(tmp_path): function _test_function2 (line 271) | def _test_function2(out_dir: Path, foo: bool = False, bar: int = 1): function test_save_hyperparameters_known_commands (line 287) | def test_save_hyperparameters_known_commands(command, tmp_path): function test_choose_logger (line 299) | def test_choose_logger(tmp_path): function test_init_out_dir (line 322) | def test_init_out_dir(path_type, input_path, expected): function test_find_resume_path (line 337) | def test_find_resume_path(tmp_path): function model_parameters (line 365) | def model_parameters(): function test_instantiate_bnb_optimizer_with_str (line 369) | def test_instantiate_bnb_optimizer_with_str(model_parameters): function test_instantiate_bnb_optimizer_with_dict (line 377) | def test_instantiate_bnb_optimizer_with_dict(model_parameters): function test_instantiate_bnb_optimizer_with_invalid_str (line 387) | def test_instantiate_bnb_optimizer_with_invalid_str(model_parameters): function test_instantiate_torch_optimizer_with_str (line 392) | def test_instantiate_torch_optimizer_with_str(model_parameters): function test_instantiate_torch_optimizer_with_class (line 398) | def test_instantiate_torch_optimizer_with_class(model_parameters): function test_extend_checkpoint_dir_is_prefixed (line 414) | def test_extend_checkpoint_dir_is_prefixed(input_path, expected): function test_extend_checkpoint_dir (line 438) | def test_extend_checkpoint_dir(input_path, expected): function test_extend_checkpoint_dir_dont_exist (line 462) | def test_extend_checkpoint_dir_dont_exist(input_path, expected): function test_file_size_below_limit_on_cpu (line 466) | def test_file_size_below_limit_on_cpu(): function test_file_size_above_limit_on_cpu (line 474) | def test_file_size_above_limit_on_cpu(): function test_file_size_above_limit_on_gpu (line 484) | def test_file_size_above_limit_on_gpu(): function mock_cuda_is_available_true (line 493) | def mock_cuda_is_available_true(monkeypatch): function mock_nvidia_device_properties (line 499) | def mock_nvidia_device_properties(monkeypatch): function mock_amd_device_properties (line 507) | def mock_amd_device_properties(monkeypatch): function all_nvlink_connected_output (line 515) | def all_nvlink_connected_output(): function test_all_nvlink_connected (line 527) | def test_all_nvlink_connected( function nvlink_partially_connected_output (line 537) | def nvlink_partially_connected_output(): function test_nvlink_partially_connected_output (line 554) | def test_nvlink_partially_connected_output( function nvlink_not_connected_output (line 567) | def nvlink_not_connected_output(): function test_nvlink_not_connected_output (line 589) | def test_nvlink_not_connected_output( function nvlink_all_gpu_connected_but_other_connected_output (line 602) | def nvlink_all_gpu_connected_but_other_connected_output(): function test_nvlink_all_gpu_connected_but_other_connected_output (line 653) | def test_nvlink_all_gpu_connected_but_other_connected_output( function nvidia_smi_nvlink_output_dual_gpu_no_numa (line 666) | def nvidia_smi_nvlink_output_dual_gpu_no_numa(): function test_check_nvlink_connectivity__returns_fully_connected_when_nvidia_all_nvlink_two_gpus (line 688) | def test_check_nvlink_connectivity__returns_fully_connected_when_nvidia_... function rocm_smi_xgmi_output_multi_gpu (line 698) | def rocm_smi_xgmi_output_multi_gpu(): function test_check_nvlink_connectivity__returns_fully_connected_when_amd_all_xgmi_8_gpus (line 722) | def test_check_nvlink_connectivity__returns_fully_connected_when_amd_all... function test_check_nvlink_connectivity__returns_no_gpus_when_no_gpus (line 732) | def test_check_nvlink_connectivity__returns_no_gpus_when_no_gpus(mock_ru... function test_check_nvlink_connectivity__returns_unrecognized_vendor_when_unrecognized_vendor (line 740) | def test_check_nvlink_connectivity__returns_unrecognized_vendor_when_unr... function test_fix_and_load_json (line 751) | def test_fix_and_load_json(): function test_select_sft_generate_example (line 805) | def test_select_sft_generate_example(): FILE: tests/test_yarn.py function test_deepseek_v3_block_with_yarn (line 15) | def test_deepseek_v3_block_with_yarn(batch_size, seq_len, device): function sync_weights (line 177) | def sync_weights(litgpt_model, hf_model): function sync_block_weights (line 191) | def sync_block_weights(block_litgpt, block_hf): FILE: tutorials/examples/ptl-trainer/litgpt_ptl_medium.py class LitLLM (line 9) | class LitLLM(L.LightningModule): method __init__ (line 10) | def __init__(self): method on_train_start (line 22) | def on_train_start(self): method training_step (line 26) | def training_step(self, batch): method configure_optimizers (line 33) | def configure_optimizers(self): FILE: tutorials/examples/ptl-trainer/litgpt_ptl_small.py class LitLLM (line 10) | class LitLLM(L.LightningModule): method __init__ (line 11) | def __init__(self, checkpoint_dir, tokenizer_dir=None, trainer_ckpt_pa... method setup (line 17) | def setup(self, stage): method training_step (line 20) | def training_step(self, batch): method validation_step (line 25) | def validation_step(self, batch): method configure_optimizers (line 30) | def configure_optimizers(self): function find_latest_checkpoint (line 97) | def find_latest_checkpoint(directory): FILE: tutorials/full_finetune_example.py function validate (line 35) | def validate(model, val_dataloader): function train (line 48) | def train(fabric, model, optimizer, scheduler, train_dataloader, val_dat... function main (line 80) | def main(fabric):