SYMBOL INDEX (503 symbols across 50 files) FILE: models/checkpoint.py function map_mp_rank (line 18) | def map_mp_rank(old_mp_size: int, new_mp_size: int, new_mp_rank: int) ->... function maybe_reshard_state_dict (line 34) | def maybe_reshard_state_dict( function reshard_mp (line 100) | def reshard_mp( function convert_moe_weights (line 158) | def convert_moe_weights(state_dict: Dict[str, Any], num_experts: int) ->... FILE: models/cli/describe.py class Describe (line 16) | class Describe(Subcommand): method __init__ (line 19) | def __init__(self, subparsers: argparse._SubParsersAction): method _add_arguments (line 30) | def _add_arguments(self): method _run_model_describe_cmd (line 39) | def _run_model_describe_cmd(self, args: argparse.Namespace) -> None: FILE: models/cli/download.py class Download (line 37) | class Download(Subcommand): method __init__ (line 40) | def __init__(self, subparsers: argparse._SubParsersAction): function setup_download_parser (line 51) | def setup_download_parser(parser: argparse.ArgumentParser) -> None: class DownloadTask (line 101) | class DownloadTask: class DownloadError (line 111) | class DownloadError(Exception): class CustomTransferSpeedColumn (line 115) | class CustomTransferSpeedColumn(TransferSpeedColumn): method render (line 116) | def render(self, task): class ParallelDownloader (line 122) | class ParallelDownloader: method __init__ (line 123) | def __init__( method retry_with_exponential_backoff (line 148) | async def retry_with_exponential_backoff(self, task: DownloadTask, fun... method get_file_info (line 165) | async def get_file_info(self, client: httpx.AsyncClient, task: Downloa... method verify_file_integrity (line 195) | def verify_file_integrity(self, task: DownloadTask) -> bool: method download_chunk (line 200) | async def download_chunk(self, client: httpx.AsyncClient, task: Downlo... method prepare_download (line 223) | async def prepare_download(self, task: DownloadTask) -> None: method download_file (line 230) | async def download_file(self, task: DownloadTask) -> None: method has_disk_space (line 266) | def has_disk_space(self, tasks: list[DownloadTask]) -> bool: method download_all (line 286) | async def download_all(self, tasks: list[DownloadTask]) -> None: function _hf_download (line 318) | def _hf_download( function _meta_download (line 358) | def _meta_download( class ModelEntry (line 394) | class ModelEntry(BaseModel): class Manifest (line 401) | class Manifest(BaseModel): function _download_from_manifest (line 406) | def _download_from_manifest(manifest_file: str, max_concurrent_downloads... function run_download_cmd (line 448) | def run_download_cmd(args: argparse.Namespace, parser: argparse.Argument... FILE: models/cli/list.py function _get_model_size (line 19) | def _get_model_size(model_dir): function _convert_to_model_descriptor (line 23) | def _convert_to_model_descriptor(model): function _run_model_list_downloaded_cmd (line 30) | def _run_model_list_downloaded_cmd() -> None: class List (line 54) | class List(Subcommand): method __init__ (line 57) | def __init__(self, subparsers: argparse._SubParsersAction): method _add_arguments (line 68) | def _add_arguments(self): method _run_model_list_cmd (line 87) | def _run_model_list_cmd(self, args: argparse.Namespace) -> None: FILE: models/cli/llama.py class LlamaModelsCLIParser (line 19) | class LlamaModelsCLIParser: method __init__ (line 22) | def __init__(self): method parse_args (line 45) | def parse_args(self) -> argparse.Namespace: method run (line 51) | def run(self, args: argparse.Namespace) -> None: function main (line 55) | def main(): FILE: models/cli/prompt_format.py class PromptFormat (line 20) | class PromptFormat(Subcommand): method __init__ (line 23) | def __init__(self, subparsers: argparse._SubParsersAction): method _add_arguments (line 40) | def _add_arguments(self): method _run_model_template_cmd (line 55) | def _run_model_template_cmd(self, args: argparse.Namespace) -> None: function render_markdown_to_pager (line 112) | def render_markdown_to_pager(markdown_content: str): FILE: models/cli/remove.py class Remove (line 17) | class Remove(Subcommand): method __init__ (line 20) | def __init__(self, subparsers: argparse._SubParsersAction): method _add_arguments (line 31) | def _add_arguments(self): method _run_model_remove_cmd (line 45) | def _run_model_remove_cmd(self, args: argparse.Namespace) -> None: FILE: models/cli/safety_models.py class PromptGuardModel (line 16) | class PromptGuardModel(BaseModel): method descriptor (line 28) | def descriptor(self) -> str: function prompt_guard_model_skus (line 34) | def prompt_guard_model_skus(): function prompt_guard_model_sku_map (line 48) | def prompt_guard_model_sku_map() -> dict[str, Any]: function prompt_guard_download_info_map (line 52) | def prompt_guard_download_info_map() -> dict[str, LlamaDownloadInfo]: FILE: models/cli/subcommand.py class Subcommand (line 9) | class Subcommand: method __init__ (line 12) | def __init__(self, *args, **kwargs): method create (line 16) | def create(cls, *args, **kwargs): method _add_arguments (line 19) | def _add_arguments(self): FILE: models/cli/table.py function print_table (line 14) | def print_table(rows, headers=None, separate_rows: bool = False, sort_by... FILE: models/cli/utils.py function print_subcommand_description (line 9) | def print_subcommand_description(parser, subparsers): FILE: models/cli/verify_download.py class VerificationResult (line 21) | class VerificationResult: class VerifyDownload (line 29) | class VerifyDownload(Subcommand): method __init__ (line 32) | def __init__(self, subparsers: argparse._SubParsersAction): function setup_verify_download_parser (line 43) | def setup_verify_download_parser(parser: argparse.ArgumentParser) -> None: function calculate_sha256 (line 52) | def calculate_sha256(filepath: Path, chunk_size: int = 8192) -> str: function load_checksums (line 60) | def load_checksums(checklist_path: Path) -> dict[str, str]: function verify_files (line 72) | def verify_files(model_dir: Path, checksums: dict[str, str], console: Co... function run_verify_cmd (line 107) | def run_verify_cmd(args: argparse.Namespace, parser: argparse.ArgumentPa... FILE: models/datatypes.py class Role (line 21) | class Role(Enum): class BuiltinTool (line 28) | class BuiltinTool(Enum): class ToolCall (line 39) | class ToolCall(BaseModel): method validate_field (line 53) | def validate_field(cls, v): class ToolPromptFormat (line 62) | class ToolPromptFormat(Enum): class StopReason (line 87) | class StopReason(Enum): class ToolParamDefinition (line 93) | class ToolParamDefinition(BaseModel): class ToolDefinition (line 100) | class ToolDefinition(BaseModel): method validate_field (line 107) | def validate_field(cls, v): class RawMediaItem (line 116) | class RawMediaItem(BaseModel): method serialize_data (line 123) | def serialize_data(self, data: Optional[bytes], _info): method validate_data (line 130) | def validate_data(cls, v): class RawTextItem (line 136) | class RawTextItem(BaseModel): class RawMessage (line 146) | class RawMessage(BaseModel): class GenerationResult (line 158) | class GenerationResult(BaseModel): class QuantizationMode (line 176) | class QuantizationMode(str, Enum): FILE: models/llama3/args.py class QuantizationScheme (line 13) | class QuantizationScheme(Enum): class QuantizationArgs (line 18) | class QuantizationArgs: method __init__ (line 23) | def __init__(self, **kwargs): class LoRAArgs (line 33) | class LoRAArgs: class ModelArgs (line 39) | class ModelArgs: method __init__ (line 62) | def __init__(self, **kwargs): FILE: models/llama3/chat_format.py class VisionInput (line 31) | class VisionInput: class LLMInput (line 37) | class LLMInput: function role_str (line 42) | def role_str(role: Role) -> str: class ChatFormat (line 52) | class ChatFormat: method __init__ (line 55) | def __init__(self, tokenizer: Tokenizer): method _encode_header (line 61) | def _encode_header(self, role: str) -> List[int]: method encode_content (line 69) | def encode_content(self, content: RawContent) -> LLMInput: method _encode_content (line 73) | def _encode_content(self, content: RawContent, bos: bool = False) -> T... method encode_message (line 108) | def encode_message( method encode_dialog_prompt (line 146) | def encode_dialog_prompt( method decode_assistant_message (line 166) | def decode_assistant_message(self, tokens: List[int], stop_reason: Sto... method decode_assistant_message_from_content (line 171) | def decode_assistant_message_from_content(self, content: str, stop_rea... method _model_input_from_tokens_images (line 236) | def _model_input_from_tokens_images(self, tokens: List[int], images: L... function create_vision_mask (line 250) | def create_vision_mask( FILE: models/llama3/generation.py function is_xccl_available (line 33) | def is_xccl_available(): class Llama3 (line 39) | class Llama3: method build (line 41) | def build( method __init__ (line 147) | def __init__(self, model: Transformer | CrossAttentionTransformer, tok... method generate (line 154) | def generate( method completion (line 302) | def completion( method chat_completion (line 324) | def chat_completion( function sample_top_p (line 348) | def sample_top_p(probs, p): FILE: models/llama3/model.py class RMSNorm (line 31) | class RMSNorm(torch.nn.Module): method __init__ (line 32) | def __init__(self, dim: int, eps: float = 1e-6): method _norm (line 37) | def _norm(self, x): method forward (line 40) | def forward(self, x): function apply_scaling (line 45) | def apply_scaling(freqs: torch.Tensor) -> torch.Tensor: function precompute_freqs_cis (line 65) | def precompute_freqs_cis(dim: int, end: int, theta: float = 10000.0, use... function reshape_for_broadcast (line 75) | def reshape_for_broadcast(freqs_cis: torch.Tensor, x: torch.Tensor): function apply_rotary_emb (line 83) | def apply_rotary_emb( function repeat_kv (line 96) | def repeat_kv(x: torch.Tensor, n_rep: int) -> torch.Tensor: class Attention (line 108) | class Attention(nn.Module): method __init__ (line 109) | def __init__(self, args: ModelArgs): method forward (line 164) | def forward( class FeedForward (line 205) | class FeedForward(nn.Module): method __init__ (line 206) | def __init__( method forward (line 224) | def forward(self, x): class TransformerBlock (line 228) | class TransformerBlock(nn.Module): method __init__ (line 229) | def __init__(self, layer_id: int, args: ModelArgs): method forward (line 245) | def forward( class Transformer (line 257) | class Transformer(nn.Module): method __init__ (line 258) | def __init__(self, params: ModelArgs): method forward (line 281) | def forward(self, tokens: torch.Tensor, start_pos: int): FILE: models/llama3/multimodal/encoder_utils.py function resize_local_position_embedding (line 20) | def resize_local_position_embedding(orig_pos_embed, grid_size): function initialize_global_position_embedding_from_local (line 49) | def initialize_global_position_embedding_from_local(pos_and_cls_embed, g... function resize_global_position_embedding (line 77) | def resize_global_position_embedding(pos_and_cls_embed, grid_size, x_sca... function build_encoder_attention_mask (line 128) | def build_encoder_attention_mask( function expand_num_tokens_to_mult8 (line 150) | def expand_num_tokens_to_mult8(x): function contract_num_tokens_from_mult8 (line 171) | def contract_num_tokens_from_mult8(x, num_pad_tokens): FILE: models/llama3/multimodal/image_transform.py class VariableSizeImageTransform (line 26) | class VariableSizeImageTransform(object): method __init__ (line 61) | def __init__(self, size: int = IMAGE_RES) -> None: method get_factors (line 75) | def get_factors(n: int) -> Set[int]: method find_supported_resolutions (line 94) | def find_supported_resolutions(self, max_num_chunks: int, patch_size: ... method get_max_res_without_distortion (line 144) | def get_max_res_without_distortion( method _pad (line 179) | def _pad(self, image: Image.Image, target_size) -> Image.Image: method _split (line 185) | def _split(self, image: torch.Tensor, ncw: int, nch: int) -> torch.Ten... method resize_without_distortion (line 195) | def resize_without_distortion( method get_best_fit (line 259) | def get_best_fit( method __call__ (line 358) | def __call__( FILE: models/llama3/multimodal/model.py function reduce_from_tensor_model_parallel_region (line 41) | def reduce_from_tensor_model_parallel_region(input_): function gather_from_tensor_model_parallel_region (line 48) | def gather_from_tensor_model_parallel_region(input_): function _get_full_row_masked_out_mask (line 67) | def _get_full_row_masked_out_mask( class LayerNorm (line 83) | class LayerNorm(nn.LayerNorm): method forward (line 86) | def forward(self, x: torch.Tensor): class ColumnParallelConv2dPatch (line 91) | class ColumnParallelConv2dPatch(torch.nn.Module): method __init__ (line 104) | def __init__( method forward (line 122) | def forward(self, x: torch.Tensor) -> torch.Tensor: class ImageFeedForward (line 130) | class ImageFeedForward(torch.nn.Module): method __init__ (line 131) | def __init__( method forward (line 157) | def forward(self, x): class ImageAttention (line 166) | class ImageAttention(nn.Module): method __init__ (line 167) | def __init__( method forward (line 215) | def forward( class ImageTransformerBlock (line 243) | class ImageTransformerBlock(nn.Module): method __init__ (line 244) | def __init__( method forward (line 274) | def forward( class ImageTransformer (line 286) | class ImageTransformer(nn.Module): method __init__ (line 287) | def __init__( method forward (line 312) | def forward(self, x: torch.Tensor, return_intermediate=None, mask=None): class VisionEncoder (line 323) | class VisionEncoder(nn.Module): method __init__ (line 324) | def __init__( method load_hook (line 392) | def load_hook( method apply_positional_embedding (line 433) | def apply_positional_embedding(self, x, ar): method apply_class_embedding (line 445) | def apply_class_embedding(self, x): method forward (line 456) | def forward(self, images: torch.Tensor, ar: torch.Tensor) -> torch.Ten... class Attention (line 508) | class Attention(nn.Module): method __init__ (line 511) | def __init__(self, args: ModelArgs): method setup_cache (line 574) | def setup_cache(self, max_batch_size: int, dtype: torch.dtype): method forward (line 598) | def forward( class FeedForward (line 639) | class FeedForward(nn.Module): method __init__ (line 640) | def __init__( method forward (line 670) | def forward(self, x): class TransformerBlock (line 679) | class TransformerBlock(nn.Module): method __init__ (line 680) | def __init__(self, layer_id: int, args: ModelArgs): method setup_cache (line 711) | def setup_cache(self, max_batch_size: int, dtype: torch.dtype): method forward (line 714) | def forward( class TilePositionEmbedding (line 742) | class TilePositionEmbedding(nn.Module): method __init__ (line 743) | def __init__( method load_hook (line 759) | def load_hook( method _dynamic_resize (line 780) | def _dynamic_resize(embed: torch.Tensor, num_tiles: int): method forward (line 794) | def forward(self, x: torch.Tensor, ar: torch.Tensor, num_tiles: int = ... function _noinit (line 810) | def _noinit(x): class CrossAttention (line 814) | class CrossAttention(torch.nn.Module): method __init__ (line 817) | def __init__( method _compute_xattn_kv_cache (line 889) | def _compute_xattn_kv_cache(self, xattn_tokens: torch.Tensor) -> torch... method compute_xattn_kv_cache (line 909) | def compute_xattn_kv_cache(self, xattn_tokens: torch.Tensor) -> torch.... method forward (line 912) | def forward( class CrossAttentionTransformerBlock (line 937) | class CrossAttentionTransformerBlock(torch.nn.Module): method __init__ (line 940) | def __init__( method compute_xattn_kv_cache (line 980) | def compute_xattn_kv_cache(self, xattn_tokens: torch.Tensor) -> torch.... method forward (line 983) | def forward( class DummyCrossAttentionTransformerBlock (line 1003) | class DummyCrossAttentionTransformerBlock: method __call__ (line 1006) | def __call__( class DummySelfAttentionTransformerBlock (line 1015) | class DummySelfAttentionTransformerBlock: method __call__ (line 1018) | def __call__( class CrossAttentionTransformerVision (line 1027) | class CrossAttentionTransformerVision(torch.nn.Module): method __init__ (line 1028) | def __init__(self, args: ModelArgs) -> None: method forward (line 1054) | def forward(self, images: torch.Tensor, aspect_ratios: torch.Tensor) -... class CrossAttentionTransformerText (line 1065) | class CrossAttentionTransformerText(torch.nn.Module): method __init__ (line 1068) | def __init__(self, args: ModelArgs) -> None: method _init_fusion_schedule (line 1145) | def _init_fusion_schedule( method get_partially_trainable_embedding (line 1155) | def get_partially_trainable_embedding(self, x): method forward (line 1168) | def forward( method setup_cache (line 1208) | def setup_cache(self, max_batch_size: int, device: torch.device, dtype... method _get_xattn_mask (line 1228) | def _get_xattn_mask( class CrossAttentionTransformer (line 1264) | class CrossAttentionTransformer(torch.nn.Module): method __init__ (line 1265) | def __init__(self, args: ModelArgs) -> None: method setup_cache (line 1279) | def setup_cache(self, max_batch_size: int, device: torch.device, dtype... method compute_vision_tokens_masks (line 1282) | def compute_vision_tokens_masks( method forward (line 1353) | def forward( function _stack_images (line 1374) | def _stack_images( function _pad_masks (line 1403) | def _pad_masks( FILE: models/llama3/multimodal/utils.py function get_negative_inf_value (line 13) | def get_negative_inf_value(dtype): function to_2tuple (line 17) | def to_2tuple(x): FILE: models/llama3/quantization/loader.py function swiglu_wrapper (line 30) | def swiglu_wrapper( function convert_to_quantized_model (line 38) | def convert_to_quantized_model( function convert_to_fp8_quantized_model (line 53) | def convert_to_fp8_quantized_model( class Int8DynActInt4WeightLinearLoRA (line 99) | class Int8DynActInt4WeightLinearLoRA(Int8DynActInt4WeightLinear): method __init__ (line 115) | def __init__( method load_hook (line 149) | def load_hook( method forward (line 165) | def forward(self, input_: torch.Tensor) -> torch.Tensor: class Int8WeightEmbedding (line 173) | class Int8WeightEmbedding(torch.nn.Embedding): method __init__ (line 182) | def __init__( method load_hook (line 193) | def load_hook( class Int8WeightLinear (line 209) | class Int8WeightLinear(torch.nn.Linear): method __init__ (line 218) | def __init__(self, in_features: int, out_features: int, bias: bool = T... method load_hook (line 223) | def load_hook( function _prepare_model_int4_weight_int8_dynamic_activation (line 239) | def _prepare_model_int4_weight_int8_dynamic_activation( function convert_to_int4_quantized_model (line 287) | def convert_to_int4_quantized_model( FILE: models/llama3/scripts/chat_completion.py function get_device (line 27) | def get_device(): function run_main (line 37) | def run_main( function main (line 120) | def main(): FILE: models/llama3/scripts/completion.py function get_device (line 28) | def get_device(): function run_main (line 38) | def run_main( function main (line 92) | def main(): FILE: models/llama3/tests/api/test_generation.py function get_device (line 21) | def get_device(): function build_generator (line 32) | def build_generator(env_var: str, device: str): class TestTextModelInference (line 43) | class TestTextModelInference(unittest.TestCase): method setUpClass (line 47) | def setUpClass(cls): method test_run_generation (line 50) | def test_run_generation(self): class TestTextModelInferenceOnDevice (line 79) | class TestTextModelInferenceOnDevice(TestTextModelInference): class TestVisionModelInference (line 83) | class TestVisionModelInference(unittest.TestCase): method setUpClass (line 87) | def setUpClass(cls): method test_run_generation (line 92) | def test_run_generation(self): class TestVisionModelInferenceOnDevice (line 132) | class TestVisionModelInferenceOnDevice(TestVisionModelInference): FILE: models/llama3/tests/api/test_tokenizer.py class TokenizerTests (line 18) | class TokenizerTests(TestCase): method setUp (line 19) | def setUp(self): method test_special_tokens (line 23) | def test_special_tokens(self): method test_encode (line 29) | def test_encode(self): method test_decode (line 35) | def test_decode(self): method test_encode_message (line 43) | def test_encode_message(self): method test_encode_dialog (line 65) | def test_encode_dialog(self): FILE: models/llama3/tests/api/test_tool_utils.py class TestToolUtils (line 16) | class TestToolUtils(unittest.TestCase): method test_maybe_extract_custom_tool_call (line 17) | def test_maybe_extract_custom_tool_call(self): class TestPythonListCheck (line 25) | class TestPythonListCheck(unittest.TestCase): method test_valid_list_with_single_function_call (line 26) | def test_valid_list_with_single_function_call(self): method test_valid_list_with_multiple_function_calls (line 30) | def test_valid_list_with_multiple_function_calls(self): method test_invalid_empty_list (line 36) | def test_invalid_empty_list(self): method test_invalid_list_with_non_function_call (line 40) | def test_invalid_list_with_non_function_call(self): method test_invalid_list_with_positional_args (line 44) | def test_invalid_list_with_positional_args(self): method test_invalid_nested_list (line 48) | def test_invalid_nested_list(self): method test_invalid_dict (line 52) | def test_invalid_dict(self): method test_invalid_syntax (line 56) | def test_invalid_syntax(self): method test_valid_list_with_boolean_args (line 60) | def test_valid_list_with_boolean_args(self): method test_valid_list_with_numeric_args (line 64) | def test_valid_list_with_numeric_args(self): method test_invalid_bare_function_call (line 68) | def test_invalid_bare_function_call(self): method test_invalid_extra_char_function_call (line 72) | def test_invalid_extra_char_function_call(self): class TestParsePythonList (line 77) | class TestParsePythonList(unittest.TestCase): method test_single_function_call (line 78) | def test_single_function_call(self): method test_multiple_function_calls (line 83) | def test_multiple_function_calls(self): method test_function_call_with_numeric_args (line 93) | def test_function_call_with_numeric_args(self): method test_function_call_with_mixed_type_args (line 98) | def test_function_call_with_mixed_type_args(self): method test_function_call_with_empty_args (line 108) | def test_function_call_with_empty_args(self): method test_function_call_with_string_containing_spaces (line 113) | def test_function_call_with_string_containing_spaces(self): method test_function_names_with_underscores_lists_and_dicts (line 118) | def test_function_names_with_underscores_lists_and_dicts(self): FILE: models/llama3/tokenizer.py class Tokenizer (line 46) | class Tokenizer: method get_instance (line 58) | def get_instance(cls): method __init__ (line 65) | def __init__(self, model_path: Path): method encode (line 118) | def encode( method decode (line 174) | def decode(self, t: Sequence[int]) -> str: method _split_whitespaces_or_nonwhitespaces (line 188) | def _split_whitespaces_or_nonwhitespaces(s: str, max_consecutive_slice... FILE: models/llama3/tool_utils.py function is_json (line 18) | def is_json(s): function is_valid_python_list (line 28) | def is_valid_python_list(input_string): function parse_python_list_for_function_calls (line 67) | def parse_python_list_for_function_calls(input_string): class ToolUtils (line 96) | class ToolUtils: method is_builtin_tool_call (line 98) | def is_builtin_tool_call(message_body: str) -> bool: method maybe_extract_builtin_tool_call (line 103) | def maybe_extract_builtin_tool_call(message_body: str) -> Optional[Tup... method maybe_extract_custom_tool_call (line 116) | def maybe_extract_custom_tool_call(message_body: str) -> Optional[Tupl... method encode_tool_call (line 149) | def encode_tool_call(t: ToolCall, tool_prompt_format: ToolPromptFormat... FILE: models/llama4/args.py class QuantizationScheme (line 14) | class QuantizationScheme(Enum): class QuantizationArgs (line 18) | class QuantizationArgs(BaseModel): class LoRAArgs (line 24) | class LoRAArgs(BaseModel): class MoEArgs (line 29) | class MoEArgs(BaseModel): class Size (line 39) | class Size(BaseModel): class VisionArgs (line 44) | class VisionArgs(BaseModel): class ModelArgs (line 58) | class ModelArgs(BaseModel): method validate (line 93) | def validate(self) -> "ModelArgs": FILE: models/llama4/chat_format.py function role_str (line 36) | def role_str(role: Role) -> str: class TransformedImage (line 47) | class TransformedImage: function convert_image_to_rgb (line 53) | def convert_image_to_rgb(image: PIL_Image.Image, bg: Tuple[int, int, int... class ChatFormat (line 62) | class ChatFormat: method __init__ (line 65) | def __init__( method _encode_header (line 85) | def _encode_header(self, role: str) -> List[int]: method encode_content (line 95) | def encode_content(self, content: RawContent) -> LLMInput: method _encode_image (line 99) | def _encode_image( method _encode_content (line 144) | def _encode_content(self, content: RawContent, bos: bool = False) -> T... method encode_message (line 191) | def encode_message( method encode_dialog_prompt (line 224) | def encode_dialog_prompt( method decode_assistant_message (line 243) | def decode_assistant_message(self, tokens: List[int], stop_reason: Sto... method decode_assistant_message_from_content (line 248) | def decode_assistant_message_from_content(self, content: str, stop_rea... method _model_input_from_tokens_images (line 314) | def _model_input_from_tokens_images(self, tokens: List[int], images: L... FILE: models/llama4/datatypes.py class MaskedEmbedding (line 15) | class MaskedEmbedding: class LLMInput (line 21) | class LLMInput: class TransformerInput (line 38) | class TransformerInput: class LLMOutput (line 54) | class LLMOutput: FILE: models/llama4/ffn.py class FeedForward (line 16) | class FeedForward(nn.Module): method __init__ (line 17) | def __init__( method load_hook (line 31) | def load_hook( method forward (line 47) | def forward(self, x): FILE: models/llama4/generation.py class Llama4 (line 36) | class Llama4: method build (line 38) | def build( method __init__ (line 112) | def __init__(self, model: Transformer, tokenizer: Tokenizer, args: Mod... method generate (line 119) | def generate( method completion (line 247) | def completion( method chat_completion (line 269) | def chat_completion( function sample_top_p (line 292) | def sample_top_p(probs, p): FILE: models/llama4/model.py function rmsnorm (line 27) | def rmsnorm(x, eps): class RMSNorm (line 34) | class RMSNorm(torch.nn.Module): method __init__ (line 35) | def __init__(self, dim: int, eps: float = 1e-6): method forward (line 40) | def forward(self, x): function apply_scaling (line 44) | def apply_scaling(freqs: torch.Tensor, scale_factor: float, high_freq_fa... function precompute_freqs_cis (line 64) | def precompute_freqs_cis( function reshape_for_broadcast (line 81) | def reshape_for_broadcast(freqs_cis: torch.Tensor, x: torch.Tensor): function apply_rotary_emb (line 89) | def apply_rotary_emb( class Attention (line 102) | class Attention(nn.Module): method __init__ (line 105) | def __init__( method load_hook (line 176) | def load_hook( method forward (line 199) | def forward( class TransformerBlock (line 251) | class TransformerBlock(nn.Module): method __init__ (line 252) | def __init__(self, layer_id: int, args: ModelArgs): method load_hook (line 290) | def load_hook( method forward (line 317) | def forward( class Transformer (line 337) | class Transformer(nn.Module): method __init__ (line 338) | def __init__(self, args: ModelArgs, **kwargs) -> None: method load_hook (line 376) | def load_hook( method forward (line 390) | def forward(self, model_input: TransformerInput) -> TransformerOutput: function create_chunked_attention_mask (line 431) | def create_chunked_attention_mask(seq_len: int, attention_chunk_size: in... FILE: models/llama4/moe.py class Experts (line 22) | class Experts(nn.Module): method __init__ (line 23) | def __init__( method load_hook (line 65) | def load_hook( method forward (line 83) | def forward( method batched_swiglu (line 97) | def batched_swiglu(self, x: Tensor, w1: Tensor, w3: Tensor, w2: Tensor... class MoE (line 102) | class MoE(torch.nn.Module): method __init__ (line 121) | def __init__( method load_hook (line 160) | def load_hook( method forward (line 175) | def forward(self, x_bsD: Tensor) -> Tensor: # noqa: N803 function divide_exact (line 213) | def divide_exact(numerator: int, denominator: int) -> int: FILE: models/llama4/preprocess.py class ResizeNormalizeImageTransform (line 22) | class ResizeNormalizeImageTransform: method __init__ (line 23) | def __init__( method __call__ (line 45) | def __call__(self, image: Image.Image) -> torch.Tensor: class VariableSizeImageTransform (line 49) | class VariableSizeImageTransform(object): method __init__ (line 84) | def __init__(self, size: int = IMAGE_RES) -> None: method get_factors (line 97) | def get_factors(n: int) -> Set[int]: method find_supported_resolutions (line 116) | def find_supported_resolutions(self, max_num_chunks: int, patch_size: ... method get_max_res_without_distortion (line 166) | def get_max_res_without_distortion( method _pad (line 201) | def _pad(self, image: Image.Image, target_size) -> Image.Image: method _split (line 207) | def _split(self, image: torch.Tensor, ncw: int, nch: int) -> torch.Ten... method resize_without_distortion (line 217) | def resize_without_distortion( method get_best_fit (line 284) | def get_best_fit( method __call__ (line 383) | def __call__( FILE: models/llama4/quantization/loader.py function swiglu_wrapper_no_reduce (line 24) | def swiglu_wrapper_no_reduce( function experts_batched_swiglu_wrapper (line 33) | def experts_batched_swiglu_wrapper( function convert_to_quantized_model (line 46) | def convert_to_quantized_model( function logging_callbacks (line 174) | def logging_callbacks( FILE: models/llama4/scripts/chat_completion.py function run_main (line 24) | def run_main( function main (line 109) | def main(): FILE: models/llama4/scripts/completion.py function run_main (line 24) | def run_main( function main (line 71) | def main(): FILE: models/llama4/scripts/quantize.py function ffn_quantize (line 45) | def ffn_quantize( function main (line 214) | def main(): FILE: models/llama4/tests/api/test_chat_format.py class TestChatFormatArgumentsJson (line 17) | class TestChatFormatArgumentsJson(unittest.TestCase): method setUp (line 20) | def setUp(self): method test_arguments_json_included_in_custom_tool_call (line 26) | def test_arguments_json_included_in_custom_tool_call(self): method test_arguments_json_included_in_builtin_tool_call (line 57) | def test_arguments_json_included_in_builtin_tool_call(self): method test_arguments_json_included_in_code_interpreter_call (line 94) | def test_arguments_json_included_in_code_interpreter_call(self): method test_arguments_json_with_complex_arguments (line 131) | def test_arguments_json_with_complex_arguments(self): method test_no_tool_calls_when_no_tools_detected (line 166) | def test_no_tool_calls_when_no_tools_detected(self): FILE: models/llama4/tokenizer.py function get_reserved_special_tokens (line 43) | def get_reserved_special_tokens(name, count, start_index=0): class Tokenizer (line 113) | class Tokenizer: method get_instance (line 125) | def get_instance(cls): method __init__ (line 132) | def __init__(self, model_path: Path): method encode (line 181) | def encode( method decode (line 237) | def decode(self, t: Sequence[int]) -> str: method _split_whitespaces_or_nonwhitespaces (line 251) | def _split_whitespaces_or_nonwhitespaces(s: str, max_consecutive_slice... FILE: models/llama4/vision/embedding.py class PixelShuffle (line 20) | class PixelShuffle(nn.Module): method __init__ (line 21) | def __init__(self, ps_ratio): method forward (line 25) | def forward(self, x): function pixel_shuffle_op (line 36) | def pixel_shuffle_op(input_x, ps_ratio): class SimpleMLP (line 50) | class SimpleMLP(torch.nn.Module): method __init__ (line 51) | def __init__( method forward (line 76) | def forward(self, x): class PixelShuffleMLP (line 83) | class PixelShuffleMLP(torch.nn.Module): method __init__ (line 84) | def __init__( method forward (line 108) | def forward(self, encoded_patches: torch.Tensor) -> torch.Tensor: class VisionEmbeddings (line 113) | class VisionEmbeddings(torch.nn.Module): method __init__ (line 114) | def __init__(self, args: VisionArgs): method load_hook (line 138) | def load_hook( method _get_empty_sequence (line 154) | def _get_empty_sequence(self, h): method forward (line 165) | def forward( function scatter_embeddings (line 180) | def scatter_embeddings(image_batch, image_mask, h_image, encoded_patches... FILE: models/llama4/vision/encoder.py class LayerNorm (line 23) | class LayerNorm(nn.LayerNorm): method forward (line 26) | def forward(self, x: torch.Tensor): class ColumnParallelConv2dPatch (line 31) | class ColumnParallelConv2dPatch(torch.nn.Module): method __init__ (line 44) | def __init__( method forward (line 62) | def forward(self, x: torch.Tensor) -> torch.Tensor: class _FeedForward (line 69) | class _FeedForward(torch.nn.Module): method __init__ (line 70) | def __init__( method forward (line 96) | def forward(self, x): class _TransformerBlock (line 103) | class _TransformerBlock(nn.Module): method __init__ (line 104) | def __init__( method attention (line 141) | def attention( method forward (line 148) | def forward( class _Transformer (line 162) | class _Transformer(nn.Module): method __init__ (line 163) | def __init__( method forward (line 190) | def forward(self, x: torch.Tensor, return_intermediate=None, mask=None... class PackingIndex (line 201) | class PackingIndex: class VisionEncoder (line 225) | class VisionEncoder(nn.Module): method __init__ (line 226) | def __init__( method get_rope_freqs (line 307) | def get_rope_freqs(self, dim, theta=10000): method compute_rope_freqs (line 312) | def compute_rope_freqs(self, freqs, t): method load_hook (line 317) | def load_hook( method apply_class_embedding (line 367) | def apply_class_embedding(self, x): method forward (line 378) | def forward(self, images: torch.Tensor) -> torch.Tensor: FILE: models/quantize_impls.py class Fp8ScaledWeights (line 26) | class Fp8ScaledWeights: method __class__ (line 30) | def __class__(self) -> Type[nn.parameter.Parameter]: method grad_fn (line 34) | def grad_fn(self) -> None: class Fp8RowwiseWeights (line 40) | class Fp8RowwiseWeights( class Int4ScaledWeights (line 50) | class Int4ScaledWeights: method __class__ (line 54) | def __class__(self) -> Type[nn.parameter.Parameter]: method grad_fn (line 58) | def grad_fn(self) -> None: class Int4Weights (line 64) | class Int4Weights( function int4_row_quantize (line 74) | def int4_row_quantize( function pack_int4 (line 107) | def pack_int4(x: torch.Tensor) -> torch.Tensor: function bmm_nt (line 121) | def bmm_nt( function ffn_swiglu (line 135) | def ffn_swiglu( function quantize_fp8 (line 163) | def quantize_fp8( function quantize_int4 (line 190) | def quantize_int4( function load_fp8 (line 215) | def load_fp8( function load_int4 (line 241) | def load_int4( function fc_dynamic (line 259) | def fc_dynamic( function ffn_swiglu_dynamic (line 278) | def ffn_swiglu_dynamic( FILE: models/sku_list.py function resolve_model (line 22) | def resolve_model(descriptor: str) -> Model | None: function all_registered_models (line 29) | def all_registered_models() -> list[Model]: function llama2_family (line 41) | def llama2_family() -> list[Model]: function llama3_family (line 48) | def llama3_family() -> list[Model]: function llama3_1_family (line 55) | def llama3_1_family() -> list[Model]: function llama3_2_family (line 62) | def llama3_2_family() -> list[Model]: function llama3_3_family (line 69) | def llama3_3_family() -> list[Model]: function llama4_family (line 75) | def llama4_family() -> list[Model]: function llama4_base_models (line 82) | def llama4_base_models() -> list[Model]: function llama4_instruct_models (line 101) | def llama4_instruct_models() -> list[Model]: function llama2_base_models (line 129) | def llama2_base_models() -> list[Model]: function llama3_base_models (line 188) | def llama3_base_models() -> list[Model]: function llama3_1_base_models (line 229) | def llama3_1_base_models() -> list[Model]: function llama3_2_base_models (line 327) | def llama3_2_base_models() -> list[Model]: function llama2_instruct_models (line 410) | def llama2_instruct_models() -> list[Model]: function llama3_instruct_models (line 469) | def llama3_instruct_models() -> list[Model]: function llama3_1_instruct_models (line 510) | def llama3_1_instruct_models() -> list[Model]: function arch_args_1b (line 608) | def arch_args_1b() -> dict: function arch_args_3b (line 623) | def arch_args_3b() -> dict: function llama3_2_quantized_models (line 638) | def llama3_2_quantized_models() -> list[Model]: function llama3_2_instruct_models (line 707) | def llama3_2_instruct_models() -> list[Model]: function llama3_3_instruct_models (line 769) | def llama3_3_instruct_models() -> list[Model]: function safety_models (line 793) | def safety_models() -> list[Model]: class LlamaDownloadInfo (line 920) | class LlamaDownloadInfo: function llama_meta_net_info (line 926) | def llama_meta_net_info(model: Model) -> LlamaDownloadInfo: function llama_meta_pth_size (line 1005) | def llama_meta_pth_size(model: Model) -> int: FILE: models/sku_types.py class CheckpointQuantizationFormat (line 14) | class CheckpointQuantizationFormat(Enum): class ModelFamily (line 26) | class ModelFamily(Enum): class CoreModelId (line 36) | class CoreModelId(Enum): function is_multimodal (line 88) | def is_multimodal(model_id) -> bool: function model_family (line 100) | def model_family(model_id) -> ModelFamily: class Model (line 160) | class Model(BaseModel): method model_family (line 175) | def model_family(self) -> ModelFamily: method descriptor (line 179) | def descriptor(self, shorten_default_variant: bool = True) -> str: method is_instruct_model (line 185) | def is_instruct_model(self) -> bool: method is_featured (line 190) | def is_featured(self) -> bool: method max_seq_length (line 200) | def max_seq_length(self) -> int: FILE: models/tokenizer_utils.py function load_bpe_file (line 15) | def load_bpe_file(model_path: Path) -> dict[bytes, int]: FILE: models/utils/model_utils.py function model_local_dir (line 13) | def model_local_dir(descriptor: str) -> str: