SYMBOL INDEX (503 symbols across 50 files)

FILE: models/checkpoint.py
  function map_mp_rank (line 18) | def map_mp_rank(old_mp_size: int, new_mp_size: int, new_mp_rank: int) ->...
  function maybe_reshard_state_dict (line 34) | def maybe_reshard_state_dict(
  function reshard_mp (line 100) | def reshard_mp(
  function convert_moe_weights (line 158) | def convert_moe_weights(state_dict: Dict[str, Any], num_experts: int) ->...

FILE: models/cli/describe.py
  class Describe (line 16) | class Describe(Subcommand):
    method __init__ (line 19) | def __init__(self, subparsers: argparse._SubParsersAction):
    method _add_arguments (line 30) | def _add_arguments(self):
    method _run_model_describe_cmd (line 39) | def _run_model_describe_cmd(self, args: argparse.Namespace) -> None:

FILE: models/cli/download.py
  class Download (line 37) | class Download(Subcommand):
    method __init__ (line 40) | def __init__(self, subparsers: argparse._SubParsersAction):
  function setup_download_parser (line 51) | def setup_download_parser(parser: argparse.ArgumentParser) -> None:
  class DownloadTask (line 101) | class DownloadTask:
  class DownloadError (line 111) | class DownloadError(Exception):
  class CustomTransferSpeedColumn (line 115) | class CustomTransferSpeedColumn(TransferSpeedColumn):
    method render (line 116) | def render(self, task):
  class ParallelDownloader (line 122) | class ParallelDownloader:
    method __init__ (line 123) | def __init__(
    method retry_with_exponential_backoff (line 148) | async def retry_with_exponential_backoff(self, task: DownloadTask, fun...
    method get_file_info (line 165) | async def get_file_info(self, client: httpx.AsyncClient, task: Downloa...
    method verify_file_integrity (line 195) | def verify_file_integrity(self, task: DownloadTask) -> bool:
    method download_chunk (line 200) | async def download_chunk(self, client: httpx.AsyncClient, task: Downlo...
    method prepare_download (line 223) | async def prepare_download(self, task: DownloadTask) -> None:
    method download_file (line 230) | async def download_file(self, task: DownloadTask) -> None:
    method has_disk_space (line 266) | def has_disk_space(self, tasks: list[DownloadTask]) -> bool:
    method download_all (line 286) | async def download_all(self, tasks: list[DownloadTask]) -> None:
  function _hf_download (line 318) | def _hf_download(
  function _meta_download (line 358) | def _meta_download(
  class ModelEntry (line 394) | class ModelEntry(BaseModel):
  class Manifest (line 401) | class Manifest(BaseModel):
  function _download_from_manifest (line 406) | def _download_from_manifest(manifest_file: str, max_concurrent_downloads...
  function run_download_cmd (line 448) | def run_download_cmd(args: argparse.Namespace, parser: argparse.Argument...

FILE: models/cli/list.py
  function _get_model_size (line 19) | def _get_model_size(model_dir):
  function _convert_to_model_descriptor (line 23) | def _convert_to_model_descriptor(model):
  function _run_model_list_downloaded_cmd (line 30) | def _run_model_list_downloaded_cmd() -> None:
  class List (line 54) | class List(Subcommand):
    method __init__ (line 57) | def __init__(self, subparsers: argparse._SubParsersAction):
    method _add_arguments (line 68) | def _add_arguments(self):
    method _run_model_list_cmd (line 87) | def _run_model_list_cmd(self, args: argparse.Namespace) -> None:

FILE: models/cli/llama.py
  class LlamaModelsCLIParser (line 19) | class LlamaModelsCLIParser:
    method __init__ (line 22) | def __init__(self):
    method parse_args (line 45) | def parse_args(self) -> argparse.Namespace:
    method run (line 51) | def run(self, args: argparse.Namespace) -> None:
  function main (line 55) | def main():

FILE: models/cli/prompt_format.py
  class PromptFormat (line 20) | class PromptFormat(Subcommand):
    method __init__ (line 23) | def __init__(self, subparsers: argparse._SubParsersAction):
    method _add_arguments (line 40) | def _add_arguments(self):
    method _run_model_template_cmd (line 55) | def _run_model_template_cmd(self, args: argparse.Namespace) -> None:
  function render_markdown_to_pager (line 112) | def render_markdown_to_pager(markdown_content: str):

FILE: models/cli/remove.py
  class Remove (line 17) | class Remove(Subcommand):
    method __init__ (line 20) | def __init__(self, subparsers: argparse._SubParsersAction):
    method _add_arguments (line 31) | def _add_arguments(self):
    method _run_model_remove_cmd (line 45) | def _run_model_remove_cmd(self, args: argparse.Namespace) -> None:

FILE: models/cli/safety_models.py
  class PromptGuardModel (line 16) | class PromptGuardModel(BaseModel):
    method descriptor (line 28) | def descriptor(self) -> str:
  function prompt_guard_model_skus (line 34) | def prompt_guard_model_skus():
  function prompt_guard_model_sku_map (line 48) | def prompt_guard_model_sku_map() -> dict[str, Any]:
  function prompt_guard_download_info_map (line 52) | def prompt_guard_download_info_map() -> dict[str, LlamaDownloadInfo]:

FILE: models/cli/subcommand.py
  class Subcommand (line 9) | class Subcommand:
    method __init__ (line 12) | def __init__(self, *args, **kwargs):
    method create (line 16) | def create(cls, *args, **kwargs):
    method _add_arguments (line 19) | def _add_arguments(self):

FILE: models/cli/table.py
  function print_table (line 14) | def print_table(rows, headers=None, separate_rows: bool = False, sort_by...

FILE: models/cli/utils.py
  function print_subcommand_description (line 9) | def print_subcommand_description(parser, subparsers):

FILE: models/cli/verify_download.py
  class VerificationResult (line 21) | class VerificationResult:
  class VerifyDownload (line 29) | class VerifyDownload(Subcommand):
    method __init__ (line 32) | def __init__(self, subparsers: argparse._SubParsersAction):
  function setup_verify_download_parser (line 43) | def setup_verify_download_parser(parser: argparse.ArgumentParser) -> None:
  function calculate_sha256 (line 52) | def calculate_sha256(filepath: Path, chunk_size: int = 8192) -> str:
  function load_checksums (line 60) | def load_checksums(checklist_path: Path) -> dict[str, str]:
  function verify_files (line 72) | def verify_files(model_dir: Path, checksums: dict[str, str], console: Co...
  function run_verify_cmd (line 107) | def run_verify_cmd(args: argparse.Namespace, parser: argparse.ArgumentPa...

FILE: models/datatypes.py
  class Role (line 21) | class Role(Enum):
  class BuiltinTool (line 28) | class BuiltinTool(Enum):
  class ToolCall (line 39) | class ToolCall(BaseModel):
    method validate_field (line 53) | def validate_field(cls, v):
  class ToolPromptFormat (line 62) | class ToolPromptFormat(Enum):
  class StopReason (line 87) | class StopReason(Enum):
  class ToolParamDefinition (line 93) | class ToolParamDefinition(BaseModel):
  class ToolDefinition (line 100) | class ToolDefinition(BaseModel):
    method validate_field (line 107) | def validate_field(cls, v):
  class RawMediaItem (line 116) | class RawMediaItem(BaseModel):
    method serialize_data (line 123) | def serialize_data(self, data: Optional[bytes], _info):
    method validate_data (line 130) | def validate_data(cls, v):
  class RawTextItem (line 136) | class RawTextItem(BaseModel):
  class RawMessage (line 146) | class RawMessage(BaseModel):
  class GenerationResult (line 158) | class GenerationResult(BaseModel):
  class QuantizationMode (line 176) | class QuantizationMode(str, Enum):

FILE: models/llama3/args.py
  class QuantizationScheme (line 13) | class QuantizationScheme(Enum):
  class QuantizationArgs (line 18) | class QuantizationArgs:
    method __init__ (line 23) | def __init__(self, **kwargs):
  class LoRAArgs (line 33) | class LoRAArgs:
  class ModelArgs (line 39) | class ModelArgs:
    method __init__ (line 62) | def __init__(self, **kwargs):

FILE: models/llama3/chat_format.py
  class VisionInput (line 31) | class VisionInput:
  class LLMInput (line 37) | class LLMInput:
  function role_str (line 42) | def role_str(role: Role) -> str:
  class ChatFormat (line 52) | class ChatFormat:
    method __init__ (line 55) | def __init__(self, tokenizer: Tokenizer):
    method _encode_header (line 61) | def _encode_header(self, role: str) -> List[int]:
    method encode_content (line 69) | def encode_content(self, content: RawContent) -> LLMInput:
    method _encode_content (line 73) | def _encode_content(self, content: RawContent, bos: bool = False) -> T...
    method encode_message (line 108) | def encode_message(
    method encode_dialog_prompt (line 146) | def encode_dialog_prompt(
    method decode_assistant_message (line 166) | def decode_assistant_message(self, tokens: List[int], stop_reason: Sto...
    method decode_assistant_message_from_content (line 171) | def decode_assistant_message_from_content(self, content: str, stop_rea...
    method _model_input_from_tokens_images (line 236) | def _model_input_from_tokens_images(self, tokens: List[int], images: L...
  function create_vision_mask (line 250) | def create_vision_mask(

FILE: models/llama3/generation.py
  function is_xccl_available (line 33) | def is_xccl_available():
  class Llama3 (line 39) | class Llama3:
    method build (line 41) | def build(
    method __init__ (line 147) | def __init__(self, model: Transformer | CrossAttentionTransformer, tok...
    method generate (line 154) | def generate(
    method completion (line 302) | def completion(
    method chat_completion (line 324) | def chat_completion(
  function sample_top_p (line 348) | def sample_top_p(probs, p):

FILE: models/llama3/model.py
  class RMSNorm (line 31) | class RMSNorm(torch.nn.Module):
    method __init__ (line 32) | def __init__(self, dim: int, eps: float = 1e-6):
    method _norm (line 37) | def _norm(self, x):
    method forward (line 40) | def forward(self, x):
  function apply_scaling (line 45) | def apply_scaling(freqs: torch.Tensor) -> torch.Tensor:
  function precompute_freqs_cis (line 65) | def precompute_freqs_cis(dim: int, end: int, theta: float = 10000.0, use...
  function reshape_for_broadcast (line 75) | def reshape_for_broadcast(freqs_cis: torch.Tensor, x: torch.Tensor):
  function apply_rotary_emb (line 83) | def apply_rotary_emb(
  function repeat_kv (line 96) | def repeat_kv(x: torch.Tensor, n_rep: int) -> torch.Tensor:
  class Attention (line 108) | class Attention(nn.Module):
    method __init__ (line 109) | def __init__(self, args: ModelArgs):
    method forward (line 164) | def forward(
  class FeedForward (line 205) | class FeedForward(nn.Module):
    method __init__ (line 206) | def __init__(
    method forward (line 224) | def forward(self, x):
  class TransformerBlock (line 228) | class TransformerBlock(nn.Module):
    method __init__ (line 229) | def __init__(self, layer_id: int, args: ModelArgs):
    method forward (line 245) | def forward(
  class Transformer (line 257) | class Transformer(nn.Module):
    method __init__ (line 258) | def __init__(self, params: ModelArgs):
    method forward (line 281) | def forward(self, tokens: torch.Tensor, start_pos: int):

FILE: models/llama3/multimodal/encoder_utils.py
  function resize_local_position_embedding (line 20) | def resize_local_position_embedding(orig_pos_embed, grid_size):
  function initialize_global_position_embedding_from_local (line 49) | def initialize_global_position_embedding_from_local(pos_and_cls_embed, g...
  function resize_global_position_embedding (line 77) | def resize_global_position_embedding(pos_and_cls_embed, grid_size, x_sca...
  function build_encoder_attention_mask (line 128) | def build_encoder_attention_mask(
  function expand_num_tokens_to_mult8 (line 150) | def expand_num_tokens_to_mult8(x):
  function contract_num_tokens_from_mult8 (line 171) | def contract_num_tokens_from_mult8(x, num_pad_tokens):

FILE: models/llama3/multimodal/image_transform.py
  class VariableSizeImageTransform (line 26) | class VariableSizeImageTransform(object):
    method __init__ (line 61) | def __init__(self, size: int = IMAGE_RES) -> None:
    method get_factors (line 75) | def get_factors(n: int) -> Set[int]:
    method find_supported_resolutions (line 94) | def find_supported_resolutions(self, max_num_chunks: int, patch_size: ...
    method get_max_res_without_distortion (line 144) | def get_max_res_without_distortion(
    method _pad (line 179) | def _pad(self, image: Image.Image, target_size) -> Image.Image:
    method _split (line 185) | def _split(self, image: torch.Tensor, ncw: int, nch: int) -> torch.Ten...
    method resize_without_distortion (line 195) | def resize_without_distortion(
    method get_best_fit (line 259) | def get_best_fit(
    method __call__ (line 358) | def __call__(

FILE: models/llama3/multimodal/model.py
  function reduce_from_tensor_model_parallel_region (line 41) | def reduce_from_tensor_model_parallel_region(input_):
  function gather_from_tensor_model_parallel_region (line 48) | def gather_from_tensor_model_parallel_region(input_):
  function _get_full_row_masked_out_mask (line 67) | def _get_full_row_masked_out_mask(
  class LayerNorm (line 83) | class LayerNorm(nn.LayerNorm):
    method forward (line 86) | def forward(self, x: torch.Tensor):
  class ColumnParallelConv2dPatch (line 91) | class ColumnParallelConv2dPatch(torch.nn.Module):
    method __init__ (line 104) | def __init__(
    method forward (line 122) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class ImageFeedForward (line 130) | class ImageFeedForward(torch.nn.Module):
    method __init__ (line 131) | def __init__(
    method forward (line 157) | def forward(self, x):
  class ImageAttention (line 166) | class ImageAttention(nn.Module):
    method __init__ (line 167) | def __init__(
    method forward (line 215) | def forward(
  class ImageTransformerBlock (line 243) | class ImageTransformerBlock(nn.Module):
    method __init__ (line 244) | def __init__(
    method forward (line 274) | def forward(
  class ImageTransformer (line 286) | class ImageTransformer(nn.Module):
    method __init__ (line 287) | def __init__(
    method forward (line 312) | def forward(self, x: torch.Tensor, return_intermediate=None, mask=None):
  class VisionEncoder (line 323) | class VisionEncoder(nn.Module):
    method __init__ (line 324) | def __init__(
    method load_hook (line 392) | def load_hook(
    method apply_positional_embedding (line 433) | def apply_positional_embedding(self, x, ar):
    method apply_class_embedding (line 445) | def apply_class_embedding(self, x):
    method forward (line 456) | def forward(self, images: torch.Tensor, ar: torch.Tensor) -> torch.Ten...
  class Attention (line 508) | class Attention(nn.Module):
    method __init__ (line 511) | def __init__(self, args: ModelArgs):
    method setup_cache (line 574) | def setup_cache(self, max_batch_size: int, dtype: torch.dtype):
    method forward (line 598) | def forward(
  class FeedForward (line 639) | class FeedForward(nn.Module):
    method __init__ (line 640) | def __init__(
    method forward (line 670) | def forward(self, x):
  class TransformerBlock (line 679) | class TransformerBlock(nn.Module):
    method __init__ (line 680) | def __init__(self, layer_id: int, args: ModelArgs):
    method setup_cache (line 711) | def setup_cache(self, max_batch_size: int, dtype: torch.dtype):
    method forward (line 714) | def forward(
  class TilePositionEmbedding (line 742) | class TilePositionEmbedding(nn.Module):
    method __init__ (line 743) | def __init__(
    method load_hook (line 759) | def load_hook(
    method _dynamic_resize (line 780) | def _dynamic_resize(embed: torch.Tensor, num_tiles: int):
    method forward (line 794) | def forward(self, x: torch.Tensor, ar: torch.Tensor, num_tiles: int = ...
  function _noinit (line 810) | def _noinit(x):
  class CrossAttention (line 814) | class CrossAttention(torch.nn.Module):
    method __init__ (line 817) | def __init__(
    method _compute_xattn_kv_cache (line 889) | def _compute_xattn_kv_cache(self, xattn_tokens: torch.Tensor) -> torch...
    method compute_xattn_kv_cache (line 909) | def compute_xattn_kv_cache(self, xattn_tokens: torch.Tensor) -> torch....
    method forward (line 912) | def forward(
  class CrossAttentionTransformerBlock (line 937) | class CrossAttentionTransformerBlock(torch.nn.Module):
    method __init__ (line 940) | def __init__(
    method compute_xattn_kv_cache (line 980) | def compute_xattn_kv_cache(self, xattn_tokens: torch.Tensor) -> torch....
    method forward (line 983) | def forward(
  class DummyCrossAttentionTransformerBlock (line 1003) | class DummyCrossAttentionTransformerBlock:
    method __call__ (line 1006) | def __call__(
  class DummySelfAttentionTransformerBlock (line 1015) | class DummySelfAttentionTransformerBlock:
    method __call__ (line 1018) | def __call__(
  class CrossAttentionTransformerVision (line 1027) | class CrossAttentionTransformerVision(torch.nn.Module):
    method __init__ (line 1028) | def __init__(self, args: ModelArgs) -> None:
    method forward (line 1054) | def forward(self, images: torch.Tensor, aspect_ratios: torch.Tensor) -...
  class CrossAttentionTransformerText (line 1065) | class CrossAttentionTransformerText(torch.nn.Module):
    method __init__ (line 1068) | def __init__(self, args: ModelArgs) -> None:
    method _init_fusion_schedule (line 1145) | def _init_fusion_schedule(
    method get_partially_trainable_embedding (line 1155) | def get_partially_trainable_embedding(self, x):
    method forward (line 1168) | def forward(
    method setup_cache (line 1208) | def setup_cache(self, max_batch_size: int, device: torch.device, dtype...
    method _get_xattn_mask (line 1228) | def _get_xattn_mask(
  class CrossAttentionTransformer (line 1264) | class CrossAttentionTransformer(torch.nn.Module):
    method __init__ (line 1265) | def __init__(self, args: ModelArgs) -> None:
    method setup_cache (line 1279) | def setup_cache(self, max_batch_size: int, device: torch.device, dtype...
    method compute_vision_tokens_masks (line 1282) | def compute_vision_tokens_masks(
    method forward (line 1353) | def forward(
  function _stack_images (line 1374) | def _stack_images(
  function _pad_masks (line 1403) | def _pad_masks(

FILE: models/llama3/multimodal/utils.py
  function get_negative_inf_value (line 13) | def get_negative_inf_value(dtype):
  function to_2tuple (line 17) | def to_2tuple(x):

FILE: models/llama3/quantization/loader.py
  function swiglu_wrapper (line 30) | def swiglu_wrapper(
  function convert_to_quantized_model (line 38) | def convert_to_quantized_model(
  function convert_to_fp8_quantized_model (line 53) | def convert_to_fp8_quantized_model(
  class Int8DynActInt4WeightLinearLoRA (line 99) | class Int8DynActInt4WeightLinearLoRA(Int8DynActInt4WeightLinear):
    method __init__ (line 115) | def __init__(
    method load_hook (line 149) | def load_hook(
    method forward (line 165) | def forward(self, input_: torch.Tensor) -> torch.Tensor:
  class Int8WeightEmbedding (line 173) | class Int8WeightEmbedding(torch.nn.Embedding):
    method __init__ (line 182) | def __init__(
    method load_hook (line 193) | def load_hook(
  class Int8WeightLinear (line 209) | class Int8WeightLinear(torch.nn.Linear):
    method __init__ (line 218) | def __init__(self, in_features: int, out_features: int, bias: bool = T...
    method load_hook (line 223) | def load_hook(
  function _prepare_model_int4_weight_int8_dynamic_activation (line 239) | def _prepare_model_int4_weight_int8_dynamic_activation(
  function convert_to_int4_quantized_model (line 287) | def convert_to_int4_quantized_model(

FILE: models/llama3/scripts/chat_completion.py
  function get_device (line 27) | def get_device():
  function run_main (line 37) | def run_main(
  function main (line 120) | def main():

FILE: models/llama3/scripts/completion.py
  function get_device (line 28) | def get_device():
  function run_main (line 38) | def run_main(
  function main (line 92) | def main():

FILE: models/llama3/tests/api/test_generation.py
  function get_device (line 21) | def get_device():
  function build_generator (line 32) | def build_generator(env_var: str, device: str):
  class TestTextModelInference (line 43) | class TestTextModelInference(unittest.TestCase):
    method setUpClass (line 47) | def setUpClass(cls):
    method test_run_generation (line 50) | def test_run_generation(self):
  class TestTextModelInferenceOnDevice (line 79) | class TestTextModelInferenceOnDevice(TestTextModelInference):
  class TestVisionModelInference (line 83) | class TestVisionModelInference(unittest.TestCase):
    method setUpClass (line 87) | def setUpClass(cls):
    method test_run_generation (line 92) | def test_run_generation(self):
  class TestVisionModelInferenceOnDevice (line 132) | class TestVisionModelInferenceOnDevice(TestVisionModelInference):

FILE: models/llama3/tests/api/test_tokenizer.py
  class TokenizerTests (line 18) | class TokenizerTests(TestCase):
    method setUp (line 19) | def setUp(self):
    method test_special_tokens (line 23) | def test_special_tokens(self):
    method test_encode (line 29) | def test_encode(self):
    method test_decode (line 35) | def test_decode(self):
    method test_encode_message (line 43) | def test_encode_message(self):
    method test_encode_dialog (line 65) | def test_encode_dialog(self):

FILE: models/llama3/tests/api/test_tool_utils.py
  class TestToolUtils (line 16) | class TestToolUtils(unittest.TestCase):
    method test_maybe_extract_custom_tool_call (line 17) | def test_maybe_extract_custom_tool_call(self):
  class TestPythonListCheck (line 25) | class TestPythonListCheck(unittest.TestCase):
    method test_valid_list_with_single_function_call (line 26) | def test_valid_list_with_single_function_call(self):
    method test_valid_list_with_multiple_function_calls (line 30) | def test_valid_list_with_multiple_function_calls(self):
    method test_invalid_empty_list (line 36) | def test_invalid_empty_list(self):
    method test_invalid_list_with_non_function_call (line 40) | def test_invalid_list_with_non_function_call(self):
    method test_invalid_list_with_positional_args (line 44) | def test_invalid_list_with_positional_args(self):
    method test_invalid_nested_list (line 48) | def test_invalid_nested_list(self):
    method test_invalid_dict (line 52) | def test_invalid_dict(self):
    method test_invalid_syntax (line 56) | def test_invalid_syntax(self):
    method test_valid_list_with_boolean_args (line 60) | def test_valid_list_with_boolean_args(self):
    method test_valid_list_with_numeric_args (line 64) | def test_valid_list_with_numeric_args(self):
    method test_invalid_bare_function_call (line 68) | def test_invalid_bare_function_call(self):
    method test_invalid_extra_char_function_call (line 72) | def test_invalid_extra_char_function_call(self):
  class TestParsePythonList (line 77) | class TestParsePythonList(unittest.TestCase):
    method test_single_function_call (line 78) | def test_single_function_call(self):
    method test_multiple_function_calls (line 83) | def test_multiple_function_calls(self):
    method test_function_call_with_numeric_args (line 93) | def test_function_call_with_numeric_args(self):
    method test_function_call_with_mixed_type_args (line 98) | def test_function_call_with_mixed_type_args(self):
    method test_function_call_with_empty_args (line 108) | def test_function_call_with_empty_args(self):
    method test_function_call_with_string_containing_spaces (line 113) | def test_function_call_with_string_containing_spaces(self):
    method test_function_names_with_underscores_lists_and_dicts (line 118) | def test_function_names_with_underscores_lists_and_dicts(self):

FILE: models/llama3/tokenizer.py
  class Tokenizer (line 46) | class Tokenizer:
    method get_instance (line 58) | def get_instance(cls):
    method __init__ (line 65) | def __init__(self, model_path: Path):
    method encode (line 118) | def encode(
    method decode (line 174) | def decode(self, t: Sequence[int]) -> str:
    method _split_whitespaces_or_nonwhitespaces (line 188) | def _split_whitespaces_or_nonwhitespaces(s: str, max_consecutive_slice...

FILE: models/llama3/tool_utils.py
  function is_json (line 18) | def is_json(s):
  function is_valid_python_list (line 28) | def is_valid_python_list(input_string):
  function parse_python_list_for_function_calls (line 67) | def parse_python_list_for_function_calls(input_string):
  class ToolUtils (line 96) | class ToolUtils:
    method is_builtin_tool_call (line 98) | def is_builtin_tool_call(message_body: str) -> bool:
    method maybe_extract_builtin_tool_call (line 103) | def maybe_extract_builtin_tool_call(message_body: str) -> Optional[Tup...
    method maybe_extract_custom_tool_call (line 116) | def maybe_extract_custom_tool_call(message_body: str) -> Optional[Tupl...
    method encode_tool_call (line 149) | def encode_tool_call(t: ToolCall, tool_prompt_format: ToolPromptFormat...

FILE: models/llama4/args.py
  class QuantizationScheme (line 14) | class QuantizationScheme(Enum):
  class QuantizationArgs (line 18) | class QuantizationArgs(BaseModel):
  class LoRAArgs (line 24) | class LoRAArgs(BaseModel):
  class MoEArgs (line 29) | class MoEArgs(BaseModel):
  class Size (line 39) | class Size(BaseModel):
  class VisionArgs (line 44) | class VisionArgs(BaseModel):
  class ModelArgs (line 58) | class ModelArgs(BaseModel):
    method validate (line 93) | def validate(self) -> "ModelArgs":

FILE: models/llama4/chat_format.py
  function role_str (line 36) | def role_str(role: Role) -> str:
  class TransformedImage (line 47) | class TransformedImage:
  function convert_image_to_rgb (line 53) | def convert_image_to_rgb(image: PIL_Image.Image, bg: Tuple[int, int, int...
  class ChatFormat (line 62) | class ChatFormat:
    method __init__ (line 65) | def __init__(
    method _encode_header (line 85) | def _encode_header(self, role: str) -> List[int]:
    method encode_content (line 95) | def encode_content(self, content: RawContent) -> LLMInput:
    method _encode_image (line 99) | def _encode_image(
    method _encode_content (line 144) | def _encode_content(self, content: RawContent, bos: bool = False) -> T...
    method encode_message (line 191) | def encode_message(
    method encode_dialog_prompt (line 224) | def encode_dialog_prompt(
    method decode_assistant_message (line 243) | def decode_assistant_message(self, tokens: List[int], stop_reason: Sto...
    method decode_assistant_message_from_content (line 248) | def decode_assistant_message_from_content(self, content: str, stop_rea...
    method _model_input_from_tokens_images (line 314) | def _model_input_from_tokens_images(self, tokens: List[int], images: L...

FILE: models/llama4/datatypes.py
  class MaskedEmbedding (line 15) | class MaskedEmbedding:
  class LLMInput (line 21) | class LLMInput:
  class TransformerInput (line 38) | class TransformerInput:
  class LLMOutput (line 54) | class LLMOutput:

FILE: models/llama4/ffn.py
  class FeedForward (line 16) | class FeedForward(nn.Module):
    method __init__ (line 17) | def __init__(
    method load_hook (line 31) | def load_hook(
    method forward (line 47) | def forward(self, x):

FILE: models/llama4/generation.py
  class Llama4 (line 36) | class Llama4:
    method build (line 38) | def build(
    method __init__ (line 112) | def __init__(self, model: Transformer, tokenizer: Tokenizer, args: Mod...
    method generate (line 119) | def generate(
    method completion (line 247) | def completion(
    method chat_completion (line 269) | def chat_completion(
  function sample_top_p (line 292) | def sample_top_p(probs, p):

FILE: models/llama4/model.py
  function rmsnorm (line 27) | def rmsnorm(x, eps):
  class RMSNorm (line 34) | class RMSNorm(torch.nn.Module):
    method __init__ (line 35) | def __init__(self, dim: int, eps: float = 1e-6):
    method forward (line 40) | def forward(self, x):
  function apply_scaling (line 44) | def apply_scaling(freqs: torch.Tensor, scale_factor: float, high_freq_fa...
  function precompute_freqs_cis (line 64) | def precompute_freqs_cis(
  function reshape_for_broadcast (line 81) | def reshape_for_broadcast(freqs_cis: torch.Tensor, x: torch.Tensor):
  function apply_rotary_emb (line 89) | def apply_rotary_emb(
  class Attention (line 102) | class Attention(nn.Module):
    method __init__ (line 105) | def __init__(
    method load_hook (line 176) | def load_hook(
    method forward (line 199) | def forward(
  class TransformerBlock (line 251) | class TransformerBlock(nn.Module):
    method __init__ (line 252) | def __init__(self, layer_id: int, args: ModelArgs):
    method load_hook (line 290) | def load_hook(
    method forward (line 317) | def forward(
  class Transformer (line 337) | class Transformer(nn.Module):
    method __init__ (line 338) | def __init__(self, args: ModelArgs, **kwargs) -> None:
    method load_hook (line 376) | def load_hook(
    method forward (line 390) | def forward(self, model_input: TransformerInput) -> TransformerOutput:
  function create_chunked_attention_mask (line 431) | def create_chunked_attention_mask(seq_len: int, attention_chunk_size: in...

FILE: models/llama4/moe.py
  class Experts (line 22) | class Experts(nn.Module):
    method __init__ (line 23) | def __init__(
    method load_hook (line 65) | def load_hook(
    method forward (line 83) | def forward(
    method batched_swiglu (line 97) | def batched_swiglu(self, x: Tensor, w1: Tensor, w3: Tensor, w2: Tensor...
  class MoE (line 102) | class MoE(torch.nn.Module):
    method __init__ (line 121) | def __init__(
    method load_hook (line 160) | def load_hook(
    method forward (line 175) | def forward(self, x_bsD: Tensor) -> Tensor:  # noqa: N803
  function divide_exact (line 213) | def divide_exact(numerator: int, denominator: int) -> int:

FILE: models/llama4/preprocess.py
  class ResizeNormalizeImageTransform (line 22) | class ResizeNormalizeImageTransform:
    method __init__ (line 23) | def __init__(
    method __call__ (line 45) | def __call__(self, image: Image.Image) -> torch.Tensor:
  class VariableSizeImageTransform (line 49) | class VariableSizeImageTransform(object):
    method __init__ (line 84) | def __init__(self, size: int = IMAGE_RES) -> None:
    method get_factors (line 97) | def get_factors(n: int) -> Set[int]:
    method find_supported_resolutions (line 116) | def find_supported_resolutions(self, max_num_chunks: int, patch_size: ...
    method get_max_res_without_distortion (line 166) | def get_max_res_without_distortion(
    method _pad (line 201) | def _pad(self, image: Image.Image, target_size) -> Image.Image:
    method _split (line 207) | def _split(self, image: torch.Tensor, ncw: int, nch: int) -> torch.Ten...
    method resize_without_distortion (line 217) | def resize_without_distortion(
    method get_best_fit (line 284) | def get_best_fit(
    method __call__ (line 383) | def __call__(

FILE: models/llama4/quantization/loader.py
  function swiglu_wrapper_no_reduce (line 24) | def swiglu_wrapper_no_reduce(
  function experts_batched_swiglu_wrapper (line 33) | def experts_batched_swiglu_wrapper(
  function convert_to_quantized_model (line 46) | def convert_to_quantized_model(
  function logging_callbacks (line 174) | def logging_callbacks(

FILE: models/llama4/scripts/chat_completion.py
  function run_main (line 24) | def run_main(
  function main (line 109) | def main():

FILE: models/llama4/scripts/completion.py
  function run_main (line 24) | def run_main(
  function main (line 71) | def main():

FILE: models/llama4/scripts/quantize.py
  function ffn_quantize (line 45) | def ffn_quantize(
  function main (line 214) | def main():

FILE: models/llama4/tests/api/test_chat_format.py
  class TestChatFormatArgumentsJson (line 17) | class TestChatFormatArgumentsJson(unittest.TestCase):
    method setUp (line 20) | def setUp(self):
    method test_arguments_json_included_in_custom_tool_call (line 26) | def test_arguments_json_included_in_custom_tool_call(self):
    method test_arguments_json_included_in_builtin_tool_call (line 57) | def test_arguments_json_included_in_builtin_tool_call(self):
    method test_arguments_json_included_in_code_interpreter_call (line 94) | def test_arguments_json_included_in_code_interpreter_call(self):
    method test_arguments_json_with_complex_arguments (line 131) | def test_arguments_json_with_complex_arguments(self):
    method test_no_tool_calls_when_no_tools_detected (line 166) | def test_no_tool_calls_when_no_tools_detected(self):

FILE: models/llama4/tokenizer.py
  function get_reserved_special_tokens (line 43) | def get_reserved_special_tokens(name, count, start_index=0):
  class Tokenizer (line 113) | class Tokenizer:
    method get_instance (line 125) | def get_instance(cls):
    method __init__ (line 132) | def __init__(self, model_path: Path):
    method encode (line 181) | def encode(
    method decode (line 237) | def decode(self, t: Sequence[int]) -> str:
    method _split_whitespaces_or_nonwhitespaces (line 251) | def _split_whitespaces_or_nonwhitespaces(s: str, max_consecutive_slice...

FILE: models/llama4/vision/embedding.py
  class PixelShuffle (line 20) | class PixelShuffle(nn.Module):
    method __init__ (line 21) | def __init__(self, ps_ratio):
    method forward (line 25) | def forward(self, x):
  function pixel_shuffle_op (line 36) | def pixel_shuffle_op(input_x, ps_ratio):
  class SimpleMLP (line 50) | class SimpleMLP(torch.nn.Module):
    method __init__ (line 51) | def __init__(
    method forward (line 76) | def forward(self, x):
  class PixelShuffleMLP (line 83) | class PixelShuffleMLP(torch.nn.Module):
    method __init__ (line 84) | def __init__(
    method forward (line 108) | def forward(self, encoded_patches: torch.Tensor) -> torch.Tensor:
  class VisionEmbeddings (line 113) | class VisionEmbeddings(torch.nn.Module):
    method __init__ (line 114) | def __init__(self, args: VisionArgs):
    method load_hook (line 138) | def load_hook(
    method _get_empty_sequence (line 154) | def _get_empty_sequence(self, h):
    method forward (line 165) | def forward(
  function scatter_embeddings (line 180) | def scatter_embeddings(image_batch, image_mask, h_image, encoded_patches...

FILE: models/llama4/vision/encoder.py
  class LayerNorm (line 23) | class LayerNorm(nn.LayerNorm):
    method forward (line 26) | def forward(self, x: torch.Tensor):
  class ColumnParallelConv2dPatch (line 31) | class ColumnParallelConv2dPatch(torch.nn.Module):
    method __init__ (line 44) | def __init__(
    method forward (line 62) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class _FeedForward (line 69) | class _FeedForward(torch.nn.Module):
    method __init__ (line 70) | def __init__(
    method forward (line 96) | def forward(self, x):
  class _TransformerBlock (line 103) | class _TransformerBlock(nn.Module):
    method __init__ (line 104) | def __init__(
    method attention (line 141) | def attention(
    method forward (line 148) | def forward(
  class _Transformer (line 162) | class _Transformer(nn.Module):
    method __init__ (line 163) | def __init__(
    method forward (line 190) | def forward(self, x: torch.Tensor, return_intermediate=None, mask=None...
  class PackingIndex (line 201) | class PackingIndex:
  class VisionEncoder (line 225) | class VisionEncoder(nn.Module):
    method __init__ (line 226) | def __init__(
    method get_rope_freqs (line 307) | def get_rope_freqs(self, dim, theta=10000):
    method compute_rope_freqs (line 312) | def compute_rope_freqs(self, freqs, t):
    method load_hook (line 317) | def load_hook(
    method apply_class_embedding (line 367) | def apply_class_embedding(self, x):
    method forward (line 378) | def forward(self, images: torch.Tensor) -> torch.Tensor:

FILE: models/quantize_impls.py
  class Fp8ScaledWeights (line 26) | class Fp8ScaledWeights:
    method __class__ (line 30) | def __class__(self) -> Type[nn.parameter.Parameter]:
    method grad_fn (line 34) | def grad_fn(self) -> None:
  class Fp8RowwiseWeights (line 40) | class Fp8RowwiseWeights(
  class Int4ScaledWeights (line 50) | class Int4ScaledWeights:
    method __class__ (line 54) | def __class__(self) -> Type[nn.parameter.Parameter]:
    method grad_fn (line 58) | def grad_fn(self) -> None:
  class Int4Weights (line 64) | class Int4Weights(
  function int4_row_quantize (line 74) | def int4_row_quantize(
  function pack_int4 (line 107) | def pack_int4(x: torch.Tensor) -> torch.Tensor:
  function bmm_nt (line 121) | def bmm_nt(
  function ffn_swiglu (line 135) | def ffn_swiglu(
  function quantize_fp8 (line 163) | def quantize_fp8(
  function quantize_int4 (line 190) | def quantize_int4(
  function load_fp8 (line 215) | def load_fp8(
  function load_int4 (line 241) | def load_int4(
  function fc_dynamic (line 259) | def fc_dynamic(
  function ffn_swiglu_dynamic (line 278) | def ffn_swiglu_dynamic(

FILE: models/sku_list.py
  function resolve_model (line 22) | def resolve_model(descriptor: str) -> Model | None:
  function all_registered_models (line 29) | def all_registered_models() -> list[Model]:
  function llama2_family (line 41) | def llama2_family() -> list[Model]:
  function llama3_family (line 48) | def llama3_family() -> list[Model]:
  function llama3_1_family (line 55) | def llama3_1_family() -> list[Model]:
  function llama3_2_family (line 62) | def llama3_2_family() -> list[Model]:
  function llama3_3_family (line 69) | def llama3_3_family() -> list[Model]:
  function llama4_family (line 75) | def llama4_family() -> list[Model]:
  function llama4_base_models (line 82) | def llama4_base_models() -> list[Model]:
  function llama4_instruct_models (line 101) | def llama4_instruct_models() -> list[Model]:
  function llama2_base_models (line 129) | def llama2_base_models() -> list[Model]:
  function llama3_base_models (line 188) | def llama3_base_models() -> list[Model]:
  function llama3_1_base_models (line 229) | def llama3_1_base_models() -> list[Model]:
  function llama3_2_base_models (line 327) | def llama3_2_base_models() -> list[Model]:
  function llama2_instruct_models (line 410) | def llama2_instruct_models() -> list[Model]:
  function llama3_instruct_models (line 469) | def llama3_instruct_models() -> list[Model]:
  function llama3_1_instruct_models (line 510) | def llama3_1_instruct_models() -> list[Model]:
  function arch_args_1b (line 608) | def arch_args_1b() -> dict:
  function arch_args_3b (line 623) | def arch_args_3b() -> dict:
  function llama3_2_quantized_models (line 638) | def llama3_2_quantized_models() -> list[Model]:
  function llama3_2_instruct_models (line 707) | def llama3_2_instruct_models() -> list[Model]:
  function llama3_3_instruct_models (line 769) | def llama3_3_instruct_models() -> list[Model]:
  function safety_models (line 793) | def safety_models() -> list[Model]:
  class LlamaDownloadInfo (line 920) | class LlamaDownloadInfo:
  function llama_meta_net_info (line 926) | def llama_meta_net_info(model: Model) -> LlamaDownloadInfo:
  function llama_meta_pth_size (line 1005) | def llama_meta_pth_size(model: Model) -> int:

FILE: models/sku_types.py
  class CheckpointQuantizationFormat (line 14) | class CheckpointQuantizationFormat(Enum):
  class ModelFamily (line 26) | class ModelFamily(Enum):
  class CoreModelId (line 36) | class CoreModelId(Enum):
  function is_multimodal (line 88) | def is_multimodal(model_id) -> bool:
  function model_family (line 100) | def model_family(model_id) -> ModelFamily:
  class Model (line 160) | class Model(BaseModel):
    method model_family (line 175) | def model_family(self) -> ModelFamily:
    method descriptor (line 179) | def descriptor(self, shorten_default_variant: bool = True) -> str:
    method is_instruct_model (line 185) | def is_instruct_model(self) -> bool:
    method is_featured (line 190) | def is_featured(self) -> bool:
    method max_seq_length (line 200) | def max_seq_length(self) -> int:

FILE: models/tokenizer_utils.py
  function load_bpe_file (line 15) | def load_bpe_file(model_path: Path) -> dict[bytes, int]:

FILE: models/utils/model_utils.py
  function model_local_dir (line 13) | def model_local_dir(descriptor: str) -> str: