SYMBOL INDEX (12024 symbols across 1049 files)

FILE: .github/update_ci_permission.py
  function github_api_get (line 64) | def github_api_get(endpoint, params=None):
  function get_write_access_users (line 103) | def get_write_access_users():
  function get_top_contributors (line 120) | def get_top_contributors(days=90, limit=50):
  function load_existing_permissions (line 139) | def load_existing_permissions():
  function sort_permissions_file (line 149) | def sort_permissions_file():
  function main (line 165) | def main():

FILE: 3rdparty/amd/tuning/benchmark_moe_rocm.py
  function main (line 21) | def main(model, tp_size, dtype: str, batches):
  function prune_configs (line 28) | def prune_configs(M, N, K, configs):
  function union_of_list_of_dicts (line 103) | def union_of_list_of_dicts(l1, l2):
  function run_grid (line 114) | def run_grid(bs, model, method, tp_size, dtype: str):
  function run_timing (line 268) | def run_timing(

FILE: benchmark/asr/bench_sglang.py
  function to_bytes (line 19) | def to_bytes(y, sr):
  function run_asr_chat (line 26) | async def run_asr_chat(client, model_name, y, sr):
  function run_asr_transcription_sync (line 55) | def run_asr_transcription_sync(client, model_name, y, sr, language=None):
  function run_asr_transcription_stream_sync (line 75) | def run_asr_transcription_stream_sync(
  function run_asr_transcription (line 125) | async def run_asr_transcription(
  function bound_asr (line 153) | async def bound_asr(
  function process_dataset (line 199) | async def process_dataset(
  function run_evaluation (line 259) | def run_evaluation(args):

FILE: benchmark/bench_attention_sink/bench_attention_sink_triton.py
  function benchmark_decode (line 36) | def benchmark_decode(B, S, H_Q, H_KV, D):
  function benchmark_extend (line 139) | def benchmark_extend(B, S, H_Q, H_KV, D):

FILE: benchmark/bench_in_batch_prefix/bench_in_batch_prefix.py
  function generate_random_string (line 18) | def generate_random_string(token_length: int) -> str:
  function generate_unique_prefix (line 35) | def generate_unique_prefix(base_text, index):
  function text_qa (line 40) | def text_qa(s, question, gen_len):
  function prepare_prompts (line 45) | def prepare_prompts(num_prefix, num_samples_per_prefix, prefix_length, s...
  function test_batch_by_batch (line 62) | def test_batch_by_batch(all_prompts, gen_len):
  function test_batch_by_batch_with_hint (line 76) | def test_batch_by_batch_with_hint(all_prompts, gen_len):
  function test_send_all (line 92) | def test_send_all(all_prompts, gen_len):

FILE: benchmark/bench_linear_attention/bench_gdn_decode.py
  function make_inputs (line 44) | def make_inputs(
  function run_baseline (line 94) | def run_baseline(inp):
  function run_packed (line 132) | def run_packed(inp):
  function check_correctness (line 160) | def check_correctness(B, H, HV, K, V, pool_size, device, dtype, seed=42):
  function bench_shape (line 211) | def bench_shape(B, H, HV, K, V, pool_size, device, dtype):
  function run_correctness (line 304) | def run_correctness(device, dtype):
  function run_benchmark (line 359) | def run_benchmark(device, dtype, args):
  function main (line 424) | def main():

FILE: benchmark/bench_linear_attention/bench_gdn_prefill.py
  function make_k_contiguous (line 42) | def make_k_contiguous(t: torch.Tensor) -> torch.Tensor:
  function gdn_flops (line 50) | def gdn_flops(
  function gdn_bytes (line 68) | def gdn_bytes(
  function make_inputs (line 101) | def make_inputs(
  function run_triton (line 166) | def run_triton(inp):
  function run_flashinfer (line 185) | def run_flashinfer(inp):
  function check_shape (line 252) | def check_shape(
  function bench_shape (line 355) | def bench_shape(B, H, T_per_seq, K, V, pool_size, device, dtype):
  function run_correctness (line 453) | def run_correctness(device, dtype):
  function run_benchmark (line 520) | def run_benchmark(device, dtype, args):
  function main (line 570) | def main():

FILE: benchmark/bench_rope/benchmark_rope_index.py
  class DummyVisionConfig (line 25) | class DummyVisionConfig:
  class DummyHFConfig (line 30) | class DummyHFConfig:
  function calculate_stats (line 42) | def calculate_stats(times: list[float]) -> dict[str, float]:
  function _sync (line 54) | def _sync(device: torch.device):
  function _approx_hw (line 59) | def _approx_hw(patches: int, merge: int) -> tuple[int, int]:
  function generate_test_data (line 66) | def generate_test_data(
  function benchmark_rope_index (line 224) | def benchmark_rope_index(

FILE: benchmark/benchmark_batch/benchmark_batch.py
  function generate_random_prompt (line 30) | def generate_random_prompt(index, tokenizer_dir, num_tokens):
  function prepare_all_prompts (line 43) | def prepare_all_prompts(num_requests, batch_size, num_tokens, tokenizer_...
  function send_batch_request (line 75) | def send_batch_request(endpoint, prompts, gen_tokens, request_id):
  function run_benchmark (line 101) | def run_benchmark(endpoint, batched_prompts, batch_size, gen_tokens):
  function process_results (line 130) | def process_results(results, total_latency, num_requests):
  function main (line 167) | def main():

FILE: benchmark/benchmark_batch/benchmark_tokenizer.py
  function main (line 11) | def main():
  function run_benchmark (line 65) | def run_benchmark(
  function benchmark (line 86) | def benchmark(*, data, batch_size, sequential_fn, batch_fn, num_runs, ba...
  function print_results (line 119) | def print_results(*, results, func_name, batch_mode):
  function print_runs (line 163) | def print_runs(*, label, runs, avg):
  function measure_times (line 170) | def measure_times(*, fn, num_runs):
  function generate_random_token_ids (line 179) | def generate_random_token_ids(*, num_prompts, num_tokens, tokenizer):
  function parse_args (line 188) | def parse_args():

FILE: benchmark/boolq/bench_sglang.py
  function get_example (line 15) | def get_example(lines, i, answer):
  function few_shot_examples (line 22) | def few_shot_examples(lines, k):
  function main (line 29) | def main(args):

FILE: benchmark/boolq/convert_parquet_to_json.py
  function convert_parquet_to_json (line 6) | def convert_parquet_to_json(input_file, output_file):

FILE: benchmark/ceval/bench_sglang.py
  function get_one_example (line 20) | def get_one_example(line, include_answer):
  function get_few_shot_examples (line 32) | def get_few_shot_examples(lines):
  function get_answer_value (line 39) | def get_answer_value(response):
  function main (line 49) | def main(args):

FILE: benchmark/dspy/bench_dspy_intro.py
  class BasicQA (line 12) | class BasicQA(dspy.Signature):
  class GenerateAnswer (line 19) | class GenerateAnswer(dspy.Signature):
  class RAG (line 27) | class RAG(dspy.Module):
    method __init__ (line 28) | def __init__(self, num_passages=3):
    method forward (line 34) | def forward(self, question):
  function main (line 40) | def main(args):

FILE: benchmark/fla/benchmark_layernorm_gated.py
  function benchmark_layer_norm_fwd (line 15) | def benchmark_layer_norm_fwd(
  function main (line 264) | def main():

FILE: benchmark/generative_agents/agent_functions.py
  function poignancy_event (line 8) | def poignancy_event(s, persona_name, persona_iss, event):
  function poignancy_event_prompt (line 18) | def poignancy_event_prompt(persona_name, persona_iss, event):
  function generate_event_triple (line 31) | def generate_event_triple(s, persona_name, action):
  function generate_event_triple_prompt (line 56) | def generate_event_triple_prompt(persona_name, action):
  function generate_pronunciatio (line 83) | def generate_pronunciatio(s, action):
  function generate_pronunciatio_prompt (line 89) | def generate_pronunciatio_prompt(action):
  function action_location_sector (line 98) | def action_location_sector(
  function action_location_sector_prompt (line 158) | def action_location_sector_prompt(
  function action_location_object (line 219) | def action_location_object(
  function action_location_object_prompt (line 260) | def action_location_object_prompt(

FILE: benchmark/generative_agents/bench_other.py
  function main (line 18) | def main(args):

FILE: benchmark/generative_agents/bench_sglang.py
  function main (line 21) | def main(args):

FILE: benchmark/gsm8k/bench_other.py
  function get_one_example (line 19) | def get_one_example(lines, i, include_answer):
  function get_few_shot_examples (line 26) | def get_few_shot_examples(lines, k):
  function get_answer_value (line 33) | def get_answer_value(answer_str):
  function main (line 44) | def main(args):

FILE: benchmark/gsm8k/bench_sglang.py
  function get_one_example (line 22) | def get_one_example(lines, i, include_answer):
  function get_few_shot_examples (line 29) | def get_few_shot_examples(lines, k):
  function get_answer_value (line 36) | def get_answer_value(answer_str):
  function main (line 47) | def main(args):

FILE: benchmark/hellaswag/bench_other.py
  function get_one_example (line 14) | def get_one_example(lines, i, include_answer):
  function get_few_shot_examples (line 21) | def get_few_shot_examples(lines, k):
  function main (line 28) | def main(args):

FILE: benchmark/hellaswag/bench_sglang.py
  function get_one_example (line 16) | def get_one_example(lines, i, include_answer):
  function get_few_shot_examples (line 23) | def get_few_shot_examples(lines, k):
  function main (line 30) | def main(args):

FILE: benchmark/hf3fs/bench_client.py
  function print_stats (line 13) | def print_stats(x: List[int]):
  function test (line 26) | def test():
  function bench (line 66) | def bench():
  function main (line 155) | def main():

FILE: benchmark/hf3fs/bench_storage.py
  function print_stats (line 17) | def print_stats(x: List[int]):
  function test (line 30) | def test():
  function bench (line 129) | def bench():
  function allclose (line 195) | def allclose():
  function main (line 250) | def main():

FILE: benchmark/hicache/bench_long_context.py
  class ContextWorkloadGenerator (line 18) | class ContextWorkloadGenerator(WorkloadGenerator):
    method __init__ (line 19) | def __init__(self, args):
    method response_handler (line 68) | def response_handler(self):

FILE: benchmark/hicache/bench_mix.py
  function write_debug_log (line 28) | def write_debug_log(data):
  function parse_args (line 42) | def parse_args():
  function load_config (line 92) | def load_config():
  class UserData (line 125) | class UserData:
  function synchronized (line 134) | def synchronized():
  class UserGenerator (line 146) | class UserGenerator:
    method __init__ (line 147) | def __init__(self, config, model_path, dataset_path):
    method gen (line 195) | def gen(self):
    method push (line 230) | def push(self, user_data, generated_text, len_itl):
    method pop (line 270) | def pop(self):
  function gen_payload (line 279) | def gen_payload(prompt, output_len):
  function async_request_sglang_generate (line 299) | async def async_request_sglang_generate(
  class AtomicCounter (line 371) | class AtomicCounter:
    method __init__ (line 372) | def __init__(self, initial_value=0):
    method increment (line 377) | def increment(self, amount=1):
    method get (line 381) | def get(self):
  class WorkloadGenerator (line 385) | class WorkloadGenerator:
    method __init__ (line 386) | def __init__(self, args):
    method handle_request (line 417) | async def handle_request(self, user_data):
    method request_sender (line 427) | def request_sender(self):
    method response_handler (line 448) | def response_handler(self):
    method run (line 475) | def run(self):
  function main (line 544) | def main():

FILE: benchmark/hicache/bench_multiturn.py
  function parse_args (line 25) | def parse_args():
  function log_to_jsonl_file (line 171) | def log_to_jsonl_file(data, file_path="performance_metrics.jsonl", tag=""):
  class ReadyQueue (line 183) | class ReadyQueue:
    method __init__ (line 188) | def __init__(self, init_requests=None, policy="random"):
    method append (line 193) | def append(self, item):
    method pop (line 197) | def pop(self):
  class WorkloadGenerator (line 211) | class WorkloadGenerator:
    method __init__ (line 212) | def __init__(self, args):
    method handle_request (line 388) | async def handle_request(self, item):
    method request_sender (line 402) | def request_sender(self):
    method response_handler (line 435) | def response_handler(self):
    method run (line 544) | def run(self):

FILE: benchmark/hicache/bench_serving.py
  class RequestFuncInput (line 43) | class RequestFuncInput:
  class RequestFuncOutput (line 56) | class RequestFuncOutput:
  function async_request_openai_completions (line 69) | async def async_request_openai_completions(
  function async_request_profile (line 202) | async def async_request_profile(api_url: str) -> RequestFuncOutput:
  class BenchmarkMetrics (line 228) | class BenchmarkMetrics:
  function get_requests (line 261) | async def get_requests(
  function calculate_metrics (line 284) | def calculate_metrics(
  function benchmark (line 372) | async def benchmark(
  function run_benchmark (line 685) | def run_benchmark(args_: argparse.Namespace):

FILE: benchmark/hicache/data_processing.py
  function common_filter_chat (line 37) | def common_filter_chat(
  function sample_sharegpt_requests (line 94) | def sample_sharegpt_requests(
  function sample_ultrachat_requests (line 150) | def sample_ultrachat_requests(
  function sample_loogle_requests (line 198) | def sample_loogle_requests(
  function sample_nextqa_requests (line 267) | def sample_nextqa_requests(
  function sample_random_requests (line 347) | def sample_random_requests(
  function sample_generated_shared_prefix_requests (line 434) | def sample_generated_shared_prefix_requests(
  function get_dataset (line 520) | def get_dataset(args, tokenizer):

FILE: benchmark/hicache/nextqa.py
  function find_video_files (line 9) | def find_video_files(video_dir) -> List[str]:
  function video_frames (line 24) | def video_frames(video_path, max_frames) -> int:
  class Video (line 30) | class Video:
    method __init__ (line 31) | def __init__(self, video_path, num_frames):
    method __str__ (line 35) | def __str__(self):
    method __iter__ (line 38) | def __iter__(self):
  class VideoPrompt (line 42) | class VideoPrompt(Video):
    method __init__ (line 43) | def __init__(self, video_path, num_frames, prompt):
    method __str__ (line 47) | def __str__(self):
    method __iter__ (line 50) | def __iter__(self):
  class VideoLoader (line 54) | class VideoLoader:
  class VideoFileLoader (line 58) | class VideoFileLoader(VideoLoader):
    method __init__ (line 63) | def __init__(self, video_dir, batch_size=1, max_frames=sys.maxsize):
    method __iter__ (line 71) | def __iter__(self):  # (file, number of frames)
  class NExTQALoader (line 85) | class NExTQALoader(VideoLoader):
    method __init__ (line 91) | def __init__(
    method get_video_prompt (line 114) | def get_video_prompt(self, entry, max_frames) -> VideoPrompt:
    method __iter__ (line 126) | def __iter__(self):

FILE: benchmark/hicache/perf.py
  function jit_hicache_impl (line 8) | def jit_hicache_impl(
  function ref_hicache_impl (line 33) | def ref_hicache_impl(
  class HicacheBenchArgs (line 57) | class HicacheBenchArgs(NamedTuple):
  function perf (line 63) | def perf(f: Callable[[], Any], loop: int = 100) -> float:
  function test_hicache_kernel (line 79) | def test_hicache_kernel(args: HicacheBenchArgs) -> None:
  function main (line 210) | def main() -> None:

FILE: benchmark/json_decode_regex/bench_other.py
  function json_decode (line 17) | def json_decode(document, generate):
  function main (line 38) | def main(args):

FILE: benchmark/json_decode_regex/bench_sglang.py
  function json_warm_up (line 17) | def json_warm_up(s):
  function json_decode (line 32) | def json_decode(s, document):
  function main (line 47) | def main(args):

FILE: benchmark/json_decode_regex/build_dataset.py
  function get_content (line 35) | def get_content(city_name):

FILE: benchmark/json_jump_forward/bench_other.py
  function character_gen (line 44) | def character_gen(name, generate):
  function city_gen (line 51) | def city_gen(document, generate):
  function character_maker (line 61) | def character_maker(lm, name):
  function call_generate_lmql (line 85) | async def call_generate_lmql(
  function city_maker (line 109) | def city_maker(lm, document):
  function bench_character (line 132) | def bench_character(args):
  function bench_city_doc (line 210) | def bench_city_doc(args):
  function main (line 254) | def main(args):

FILE: benchmark/json_jump_forward/bench_sglang.py
  function character_gen (line 44) | def character_gen(s, name):
  function city_gen (line 51) | def city_gen(s, document):
  function bench_city_doc (line 59) | def bench_city_doc(args):
  function bench_character (line 82) | def bench_character(args):
  function main (line 106) | def main(args):

FILE: benchmark/json_jump_forward/build_dataset.py
  function get_content (line 35) | def get_content(city_name):

FILE: benchmark/json_schema/bench_sglang.py
  function schema_gen (line 20) | def schema_gen(s, message: Tuple[str, str], json_schema: str):
  function contains_formats (line 29) | def contains_formats(schema, formats: List[str]):
  function convert_dataset (line 43) | def convert_dataset(path: str):
  function bench_schema (line 76) | def bench_schema(args):
  function main (line 111) | def main(args):

FILE: benchmark/kernels/all_reduce/benchmark_aiter.py
  function parse_args (line 19) | def parse_args():
  function get_env_rank_world (line 55) | def get_env_rank_world() -> Tuple[int, int, int]:
  function init_dist (line 62) | def init_dist(backend: str):
  function get_device (line 73) | def get_device(local_rank: int) -> torch.device:
  function human_size (line 78) | def human_size(num_bytes: int) -> str:
  function get_message_sizes (line 87) | def get_message_sizes() -> List[int]:
  function run_once (line 105) | def run_once(comm, inp: torch.Tensor) -> Optional[torch.Tensor]:
  function bench_impl (line 114) | def bench_impl(
  function main (line 201) | def main():

FILE: benchmark/kernels/all_reduce/benchmark_all_reduce.py
  function parse_args (line 26) | def parse_args():
  function get_env_rank_world (line 62) | def get_env_rank_world() -> Tuple[int, int, int]:
  function init_dist (line 69) | def init_dist(backend: str):
  function get_device (line 92) | def get_device(local_rank: int) -> torch.device:
  function human_size (line 97) | def human_size(num_bytes: int) -> str:
  function get_message_sizes (line 106) | def get_message_sizes() -> List[int]:
  function run_once (line 124) | def run_once(comm, inp: torch.Tensor) -> Optional[torch.Tensor]:
  function bench_impl (line 133) | def bench_impl(
  function main (line 220) | def main():

FILE: benchmark/kernels/all_reduce/benchmark_fused_ar_rms_amd.py
  function parse_shapes (line 48) | def parse_shapes(raw: str) -> List[Shape]:
  function dtype_from_name (line 64) | def dtype_from_name(name: str) -> torch.dtype:
  function check_close (line 76) | def check_close(
  function _measure_us (line 92) | def _measure_us(
  function _barrier (line 128) | def _barrier(device: torch.device):
  function _mean_across_ranks (line 135) | def _mean_across_ranks(value: float, device: torch.device) -> float:
  function _all_true_across_ranks (line 142) | def _all_true_across_ranks(value: bool, device: torch.device) -> bool:
  function _make_inputs (line 148) | def _make_inputs(
  function _split_reference (line 171) | def _split_reference(
  function bench_eager (line 185) | def bench_eager(
  function bench_graph (line 234) | def bench_graph(
  function _shape_bytes (line 313) | def _shape_bytes(shape: Shape, dtype: torch.dtype) -> int:
  function parse_args (line 318) | def parse_args():
  function main (line 367) | def main():

FILE: benchmark/kernels/all_reduce/benchmark_mscclpp.py
  function torch_allreduce (line 34) | def torch_allreduce(torch_input: torch.Tensor, group: ProcessGroup) -> t...
  function msccl_allreduce (line 39) | def msccl_allreduce(
  function pynccl_allreduce (line 45) | def pynccl_allreduce(
  function _bench_graph_time (line 52) | def _bench_graph_time(func, inp_randn, warmup_loop=2, graph_loop=10, tes...
  function _bench_eager_time (line 84) | def _bench_eager_time(func, inp_randn, warmup_loop=2, test_loop=10):
  function get_torch_prof_ctx (line 106) | def get_torch_prof_ctx(do_prof: bool):
  function human_readable_size (line 122) | def human_readable_size(size, decimal_places=1):
  function print_markdown_table (line 137) | def print_markdown_table(data):

FILE: benchmark/kernels/all_reduce/benchmark_torch_symm_mem.py
  function torch_allreduce (line 55) | def torch_allreduce(torch_input: torch.Tensor, group: ProcessGroup) -> t...
  function torch_symm_mem_allreduce (line 60) | def torch_symm_mem_allreduce(
  function pynccl_allreduce (line 66) | def pynccl_allreduce(
  function _bench_graph_time (line 73) | def _bench_graph_time(func, inp_randn, warmup_loop=2, graph_loop=10, tes...
  function _bench_eager_time (line 105) | def _bench_eager_time(func, inp_randn, warmup_loop=2, test_loop=10):
  function get_torch_prof_ctx (line 127) | def get_torch_prof_ctx(do_prof: bool):
  function human_readable_size (line 143) | def human_readable_size(size, decimal_places=1):
  function print_markdown_table (line 158) | def print_markdown_table(data):

FILE: benchmark/kernels/decoding_attention_triton/triton_flashinfer_cudnn.py
  function benchmark_forward (line 13) | def benchmark_forward(
  function time_fwd (line 34) | def time_fwd(func, *args, **kwargs):
  function decode_attention_sglang (line 39) | def decode_attention_sglang(
  function decode_attention_flashinfer (line 98) | def decode_attention_flashinfer(dtype, head_num_q, head_num_kv):
  function convert_to_cudnn_type (line 159) | def convert_to_cudnn_type(torch_type):
  function decode_attention_cudnn (line 174) | def decode_attention_cudnn(
  function calculate_diff (line 287) | def calculate_diff():

FILE: benchmark/kernels/deepep/deepep_utils.py
  function init_dist (line 12) | def init_dist(local_rank: int, num_local_ranks: int, args):
  function calc_diff (line 36) | def calc_diff(x: torch.Tensor, y: torch.Tensor):
  function per_token_cast_to_fp8 (line 43) | def per_token_cast_to_fp8(x: torch.Tensor):
  function per_token_cast_back (line 53) | def per_token_cast_back(x_fp8: torch.Tensor, x_scales: torch.Tensor):
  function inplace_unique (line 59) | def inplace_unique(x: torch.Tensor, num_slots: int):
  function create_grouped_scores (line 74) | def create_grouped_scores(
  function bench (line 84) | def bench(fn, num_warmups: int = 20, num_tests: int = 30, post_fn=None):
  class empty_suppress (line 114) | class empty_suppress:
    method __enter__ (line 115) | def __enter__(self):
    method __exit__ (line 118) | def __exit__(self, *_):
  class suppress_stdout_stderr (line 122) | class suppress_stdout_stderr:
    method __enter__ (line 123) | def __enter__(self):
    method __exit__ (line 143) | def __exit__(self, *_):
  function bench_kineto (line 157) | def bench_kineto(
  function hash_tensor (line 217) | def hash_tensor(t: torch.Tensor):

FILE: benchmark/kernels/deepep/tuning_deepep.py
  function test_main (line 30) | def test_main(
  function _write_output (line 421) | def _write_output(args, output_data):
  function test_loop (line 429) | def test_loop(local_rank: int, num_local_ranks: int, args):

FILE: benchmark/kernels/deepseek/benchmark_deepgemm_fp8_gemm.py
  function tl_gemm (line 21) | def tl_gemm(
  function per_token_cast_to_fp8 (line 98) | def per_token_cast_to_fp8(x: torch.Tensor) -> Tuple[torch.Tensor, torch....
  function per_block_cast_to_fp8 (line 108) | def per_block_cast_to_fp8(x: torch.Tensor) -> Tuple[torch.Tensor, torch....
  function fp8_gemm_deepgemm (line 123) | def fp8_gemm_deepgemm(
  function fp8_gemm_sglang (line 140) | def fp8_gemm_sglang(
  function fp8_gemm_vllm (line 159) | def fp8_gemm_vllm(
  function calculate_diff (line 178) | def calculate_diff(m: int, n: int, k: int):
  function get_weight_shapes (line 236) | def get_weight_shapes(tp_size):
  function create_benchmark_configs (line 269) | def create_benchmark_configs(tp_size):
  function get_benchmark (line 281) | def get_benchmark(tp_size):

FILE: benchmark/kernels/deepseek/benchmark_deepgemm_fp8_gemm_blackwell.py
  function per_block_cast_to_fp8 (line 19) | def per_block_cast_to_fp8(x: torch.Tensor) -> Tuple[torch.Tensor, torch....
  function get_weight_shapes (line 35) | def get_weight_shapes(tp_size):
  function create_benchmark_configs (line 68) | def create_benchmark_configs(tp_size):
  function fp8_gemm_flashinfer (line 80) | def fp8_gemm_flashinfer(
  function fp8_gemm_deepgemm_blackwell (line 98) | def fp8_gemm_deepgemm_blackwell(
  function check_accuracy (line 112) | def check_accuracy(a, b, atol, rtol, percent):
  function calculate_diff (line 136) | def calculate_diff(m: int, n: int, k: int):
  function _benchmark (line 178) | def _benchmark(m, n, k, tp_size, provider):
  function get_benchmark_plot_friendly (line 231) | def get_benchmark_plot_friendly(tp_size):
  function get_benchmark (line 256) | def get_benchmark(tp_size):

FILE: benchmark/kernels/deepseek/benchmark_deepgemm_fp8_group_gemm.py
  function construct_grouped_and_flat_fp8 (line 18) | def construct_grouped_and_flat_fp8(
  function fp8_gemm_group_triton_kernel (line 87) | def fp8_gemm_group_triton_kernel(
  function fp8_gemm_group_triton (line 183) | def fp8_gemm_group_triton(a_tuple, b_tuple, c, num_groups):
  function fp8_gemm_group_deepgemm (line 244) | def fp8_gemm_group_deepgemm(x_fp8_grouped, y_fp8_grouped, out, m_indices):
  function calculate_diff (line 254) | def calculate_diff(m: int, n: int, k: int, num_groups: int):
  function get_weight_shapes (line 321) | def get_weight_shapes(tp_size):
  function create_benchmark_configs (line 354) | def create_benchmark_configs(tp_size):
  function get_benchmark (line 367) | def get_benchmark(tp_size):

FILE: benchmark/kernels/elementwise/benchmark_concat_mla.py
  function create_data (line 15) | def create_data(num_tokens):
  function fn_torch (line 36) | def fn_torch(k, k_nope, k_rope):
  function fn_hack_non_strided (line 41) | def fn_hack_non_strided(k, k_nope, k_rope):
  function fn_torch_compiled (line 50) | def fn_torch_compiled(k, k_nope, k_rope):
  function fn_cuda (line 54) | def fn_cuda(k, k_nope, k_rope):
  function fn_triton_kernel (line 59) | def fn_triton_kernel(
  function fn_triton (line 109) | def fn_triton(k, k_nope, k_rope):
  function execute_and_get_output (line 130) | def execute_and_get_output(f, data):
  function benchmark (line 182) | def benchmark(num_tokens, provider):

FILE: benchmark/kernels/flashinfer_allreduce_fusion/benchmark_fused_collective.py
  function setup_flashinfer_workspace (line 85) | def setup_flashinfer_workspace(
  function cleanup_flashinfer_workspace (line 122) | def cleanup_flashinfer_workspace(ipc_handles):
  class FlashInferFusedAllReduceParams (line 134) | class FlashInferFusedAllReduceParams:
    method __init__ (line 137) | def __init__(
    method get_trtllm_fused_allreduce_kwargs (line 152) | def get_trtllm_fused_allreduce_kwargs(self):
  function flashinfer_fused_allreduce_rmsnorm (line 162) | def flashinfer_fused_allreduce_rmsnorm(
  function flashinfer_fused_allreduce_rmsnorm_fp8_quant (line 202) | def flashinfer_fused_allreduce_rmsnorm_fp8_quant(
  function flashinfer_fused_allreduce_rmsnorm_fp4_quant (line 244) | def flashinfer_fused_allreduce_rmsnorm_fp4_quant(
  function standard_allreduce_rmsnorm (line 287) | def standard_allreduce_rmsnorm(
  function standard_allreduce_rmsnorm_fp8_quant (line 316) | def standard_allreduce_rmsnorm_fp8_quant(
  function standard_allreduce_rmsnorm_fp4_quant (line 353) | def standard_allreduce_rmsnorm_fp4_quant(
  function standard_allreduce_rmsnorm_native (line 397) | def standard_allreduce_rmsnorm_native(
  function standard_allreduce_rmsnorm_fp8_quant_native (line 415) | def standard_allreduce_rmsnorm_fp8_quant_native(
  function standard_allreduce_rmsnorm_fp4_quant_native (line 443) | def standard_allreduce_rmsnorm_fp4_quant_native(
  function standard_allreduce_rmsnorm_native_compiled (line 478) | def standard_allreduce_rmsnorm_native_compiled(
  function standard_allreduce_rmsnorm_fp8_quant_native_compiled (line 491) | def standard_allreduce_rmsnorm_fp8_quant_native_compiled(
  function standard_allreduce_rmsnorm_fp4_quant_native_compiled (line 511) | def standard_allreduce_rmsnorm_fp4_quant_native_compiled(
  function create_test_tensors (line 532) | def create_test_tensors(
  function benchmark_operation (line 566) | def benchmark_operation(
  function run_benchmarks (line 605) | def run_benchmarks(
  function prepare_results_with_speedups (line 894) | def prepare_results_with_speedups(results_dict):
  function print_results (line 993) | def print_results(results_dict, seq_len, hidden_dim, dtype, use_residual...
  function format_results_markdown (line 1019) | def format_results_markdown(
  function save_results_to_file (line 1064) | def save_results_to_file(
  function main (line 1087) | def main():

FILE: benchmark/kernels/fused_moe_triton/benchmark_sglang_fused_moe_triton.py
  function fused_moe_triton_api (line 31) | def fused_moe_triton_api(
  function fused_moe_sglang_api (line 61) | def fused_moe_sglang_api(
  function benchmark (line 115) | def benchmark(
  function main (line 190) | def main():

FILE: benchmark/kernels/fused_moe_triton/benchmark_torch_compile_fused_moe.py
  function get_model_config (line 16) | def get_model_config(model_name: str, tp_size: int):
  function fused_topk_native (line 77) | def fused_topk_native(
  function fused_moe_torch (line 97) | def fused_moe_torch(
  function fused_moe_torch_compile (line 127) | def fused_moe_torch_compile(
  function fused_moe_sglang_api (line 153) | def fused_moe_sglang_api(
  function benchmark (line 203) | def benchmark(batch_size, provider, model_config, use_fp8_w8a8=False):
  function main (line 281) | def main():

FILE: benchmark/kernels/fused_moe_triton/benchmark_vllm_vs_sglang_fused_moe_triton.py
  function fused_moe_vllm_api (line 22) | def fused_moe_vllm_api(
  function fused_moe_sglang_api (line 68) | def fused_moe_sglang_api(
  function benchmark (line 120) | def benchmark(batch_size, provider, model_config, use_fp8_w8a8=False):
  function main (line 214) | def main():

FILE: benchmark/kernels/fused_moe_triton/common_utils.py
  class BenchmarkConfig (line 14) | class BenchmarkConfig(TypedDict):
  function calculate_shard_intermediate_size (line 23) | def calculate_shard_intermediate_size(
  function get_model_config (line 32) | def get_model_config(
  function get_rocm_configs_compute_bound (line 161) | def get_rocm_configs_compute_bound() -> List[Dict[str, int]]:
  function get_configs_compute_bound (line 184) | def get_configs_compute_bound() -> List[Dict[str, int]]:
  function sort_config (line 208) | def sort_config(config: BenchmarkConfig) -> BenchmarkConfig:
  function save_configs (line 223) | def save_configs(
  function get_config_filename (line 233) | def get_config_filename(
  function get_default_batch_sizes (line 271) | def get_default_batch_sizes() -> List[int]:

FILE: benchmark/kernels/fused_moe_triton/tuning_client.py
  function read_long_prompt (line 21) | def read_long_prompt():
  function openai_stream_test (line 31) | def openai_stream_test(model, ip, port):

FILE: benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py
  function benchmark_config (line 40) | def benchmark_config(
  class BenchmarkWorker (line 236) | class BenchmarkWorker:
    method __init__ (line 238) | def __init__(self, seed: int, server_args: ServerArgs) -> None:
    method benchmark (line 247) | def benchmark(
    method tune (line 315) | def tune(
  function main (line 365) | def main(args: argparse.Namespace):

FILE: benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton_sep.py
  class MoeInputs (line 45) | class MoeInputs:
  class KernelWrapper (line 52) | class KernelWrapper:
    method __init__ (line 53) | def __init__(self, moe_inputs, use_cuda_graph=True, inner_iter=10, **k...
    method cuda_graph_wrapper (line 64) | def cuda_graph_wrapper(self):
    method forward_cost (line 95) | def forward_cost(self, try_cnt=2):
  function load_topk_ids (line 119) | def load_topk_ids(topk_ids_dir, i: int):
  function benchmark_config (line 128) | def benchmark_config(
  class BestConfigTrace (line 408) | class BestConfigTrace:
    method __init__ (line 409) | def __init__(self, name, down_moe=False):
    method update (line 414) | def update(self, config, time_cost_all):
    method time_cost (line 426) | def time_cost(self, block_m):
    method config_dict (line 432) | def config_dict(self, block_m):
  class BenchmarkWorker (line 445) | class BenchmarkWorker:
    method __init__ (line 447) | def __init__(self, seed: int, server_args: ServerArgs) -> None:
    method benchmark (line 456) | def benchmark(
    method tune (line 494) | def tune(
    method cmp_configs (line 563) | def cmp_configs(
  function save_configs_sep (line 621) | def save_configs_sep(
  function main (line 659) | def main(args: argparse.Namespace):

FILE: benchmark/kernels/quantization/bench_fp4_quant.py
  function _test_accuracy_once (line 17) | def _test_accuracy_once(E, M, K, input_dtype, device):
  function benchmark (line 53) | def benchmark(M, K, provider):
  function test_accuracy (line 114) | def test_accuracy():

FILE: benchmark/kernels/quantization/bench_int8_quant.py
  function torch_int8_quant (line 12) | def torch_int8_quant(x):
  function _test_accuracy_once (line 23) | def _test_accuracy_once(M, K, input_dtype, device):
  function test_accuracy (line 35) | def test_accuracy():
  function benchmark (line 59) | def benchmark(batch_size, provider):

FILE: benchmark/kernels/quantization/tuning_block_wise_kernel.py
  function w8a8_block_matmul (line 46) | def w8a8_block_matmul(
  function get_rocm_configs_compute_bound (line 138) | def get_rocm_configs_compute_bound():
  function get_configs_compute_bound (line 161) | def get_configs_compute_bound():
  function get_weight_shapes (line 185) | def get_weight_shapes(tp_size):
  function benchmark_config (line 218) | def benchmark_config(
  function tune (line 245) | def tune(M, N, K, block_size, out_dtype, search_space, input_type):
  function save_configs (line 318) | def save_configs(
  function get_available_gpu_count (line 354) | def get_available_gpu_count():
  function tune_on_gpu (line 359) | def tune_on_gpu(args_dict):
  function distribute_batch_sizes (line 405) | def distribute_batch_sizes(batch_sizes, num_gpus):
  function main (line 415) | def main(args):

FILE: benchmark/kernels/scheduler_batch/benchmark_get_last_loc_triton.py
  function get_last_loc_torch (line 11) | def get_last_loc_torch(
  function get_last_loc_kernel (line 24) | def get_last_loc_kernel(
  function get_last_loc_triton (line 47) | def get_last_loc_triton(
  function test_get_last_loc (line 69) | def test_get_last_loc():
  function get_benchmark (line 94) | def get_benchmark():
  function run_benchmark (line 144) | def run_benchmark(save_path: str = "./configs/benchmark_ops/get_last_loc...

FILE: benchmark/kernels/scheduler_batch/benchmark_write_req_to_token_pool_triton.py
  function write_req_to_token_pool_triton (line 12) | def write_req_to_token_pool_triton(
  function write_req_to_token_pool_triton_optimize (line 49) | def write_req_to_token_pool_triton_optimize(
  function write_req_to_token_pool_reference (line 91) | def write_req_to_token_pool_reference(
  function test_write_req_to_token_pool (line 114) | def test_write_req_to_token_pool():
  function get_benchmark (line 231) | def get_benchmark():
  function run_benchmark (line 315) | def run_benchmark(save_path: str = "./configs/benchmark_ops/write_req_to...

FILE: benchmark/kernels/sliding_window_attention_triton/bench_triton_swa_kernel.py
  function extend_attention_fwd_torch (line 11) | def extend_attention_fwd_torch(
  function _build_batch (line 85) | def _build_batch(
  function _run_triton (line 177) | def _run_triton(inputs):
  function _run_torch_ref (line 196) | def _run_torch_ref(inputs):
  function bench (line 241) | def bench(

FILE: benchmark/line_retrieval/bench_sglang.py
  function line_retrieval (line 17) | def line_retrieval(s, prefix, suffix, body_0, body_1, body_2, body_3):
  function eval_model (line 30) | def eval_model(args, line_obj, num_hoops, src_indices, dst_percents):
  function main (line 131) | def main(args):

FILE: benchmark/line_retrieval/gen_data.py
  function generate_lines (line 16) | def generate_lines(random_words, num_lines, redirect_ratio):

FILE: benchmark/llava_bench/bench_sglang.py
  function image_qa (line 17) | def image_qa(s, image_file, question):
  function main (line 22) | def main(args):

FILE: benchmark/llm_judge/bench_other.py
  function multi_dimension_judge (line 24) | def multi_dimension_judge(article, generate):
  function multi_dimension_judge_async (line 52) | async def multi_dimension_judge_async(article, generate):
  function main (line 80) | def main(args):

FILE: benchmark/llm_judge/bench_sglang.py
  function multi_dimension_judge (line 25) | def multi_dimension_judge(s, article):
  function main (line 54) | def main(args):

FILE: benchmark/long_json_decode/bench_other.py
  function json_decode (line 13) | def json_decode(document, generate):
  function main (line 30) | def main(args):

FILE: benchmark/long_json_decode/bench_sglang.py
  function json_decode (line 14) | def json_decode(s, document):
  function main (line 34) | def main(args):

FILE: benchmark/lora/launch_server.py
  function launch_server (line 11) | def launch_server(args):

FILE: benchmark/lora/lora_bench.py
  function async_request_openai_completions (line 46) | async def async_request_openai_completions(
  function benchmark (line 147) | async def benchmark(
  function run_benchmark (line 343) | def run_benchmark(args_: argparse.Namespace):
  function set_ulimit (line 401) | def set_ulimit(target_soft_limit=65535):

FILE: benchmark/mmlu/bench_other.py
  function format_subject (line 20) | def format_subject(subject):
  function format_example (line 28) | def format_example(df, idx, include_answer=True):
  function gen_prompt (line 39) | def gen_prompt(train_df, subject, k=-1):
  function evaluate (line 50) | def evaluate(args, subject, dev_df, test_df, call_generate):
  function main (line 115) | def main(args):

FILE: benchmark/mmlu/bench_sglang.py
  function format_subject (line 25) | def format_subject(subject):
  function format_example (line 33) | def format_example(df, idx, include_answer=True):
  function gen_prompt (line 44) | def gen_prompt(train_df, subject, k=-1):
  function download_data (line 55) | def download_data(data_dir):
  function main (line 77) | def main(args):

FILE: benchmark/mmmu/bench_hf.py
  function eval_mmmu (line 18) | def eval_mmmu(args):

FILE: benchmark/mmmu/bench_sglang.py
  class RequestFuncOutput (line 42) | class RequestFuncOutput:
  function async_request_profile (line 54) | async def async_request_profile(api_url: str) -> RequestFuncOutput:
  function _get_prefix_suffix (line 72) | def _get_prefix_suffix(prompt: str) -> Tuple[str, str]:
  function process_sample (line 79) | async def process_sample(
  function process_sample_with_semaphore (line 127) | async def process_sample_with_semaphore(
  function eval_mmmu (line 143) | async def eval_mmmu(args) -> None:
  function parse_args (line 236) | def parse_args():
  function main (line 249) | def main():

FILE: benchmark/mmmu/data_utils.py
  function get_multi_choice_info (line 78) | def get_multi_choice_info(options):
  function load_yaml (line 94) | def load_yaml(file_path):
  function parse_img_path (line 104) | def parse_img_path(text):
  function process_single_sample (line 109) | def process_single_sample(data):
  function save_json (line 138) | def save_json(filename, ds):
  function save_jsonl (line 145) | def save_jsonl(filename, data):
  function save_args (line 163) | def save_args(args, path_dir):
  function construct_prompt (line 173) | def construct_prompt(sample, config):

FILE: benchmark/mmmu/eval_utils.py
  class EvalArgs (line 28) | class EvalArgs:
    method add_cli_args (line 46) | def add_cli_args(parser: argparse.ArgumentParser):
    method from_cli_args (line 133) | def from_cli_args(cls, args: argparse.Namespace):
  function set_seed (line 138) | def set_seed(seed_value):
  function prepare_samples (line 154) | def prepare_samples(eval_args: EvalArgs):
  function get_sampling_params (line 258) | def get_sampling_params(eval_args):
  function parse_multi_choice_response (line 276) | def parse_multi_choice_response(response, all_choices, index2ans):
  function check_is_number (line 332) | def check_is_number(string):
  function normalize_str (line 344) | def normalize_str(string):
  function extract_numbers (line 369) | def extract_numbers(string):
  function parse_open_response (line 392) | def parse_open_response(response):
  function eval_multi_choice (line 468) | def eval_multi_choice(gold_i, pred_i):
  function eval_open (line 497) | def eval_open(gold_i, pred_i):
  function evaluate (line 526) | def evaluate(samples):
  function calculate_ins_level_acc (line 553) | def calculate_ins_level_acc(results: Dict):
  function process_result (line 565) | def process_result(response, sample, answer_dict, out_samples):
  function eval_result (line 589) | def eval_result(model_answer_path, answer_dict, eval_output_path=None):

FILE: benchmark/mtbench/bench_other.py
  function load_questions (line 15) | def load_questions(filename):
  function write_answers (line 24) | def write_answers(filename, model_id, questions, answers):
  function main (line 40) | def main(args):

FILE: benchmark/mtbench/bench_sglang.py
  function load_questions (line 15) | def load_questions(filename):
  function write_answers (line 24) | def write_answers(filename, model_id, questions, answers):
  function answer_mt_bench (line 41) | def answer_mt_bench(s, question_1, question_2):
  function main (line 49) | def main(args):

FILE: benchmark/mtbench/bench_sglang_eagle.py
  function load_questions (line 24) | def load_questions(filename):
  function write_answers (line 33) | def write_answers(filename, model_id, questions, answers):
  function answer_mt_bench (line 50) | def answer_mt_bench(s, question_1, question_2):
  function main (line 60) | def main(args):

FILE: benchmark/multi_chain_reasoning/bench_other.py
  function get_answer_value (line 18) | def get_answer_value(answer_str):
  function multi_chain_gsm8k (line 39) | def multi_chain_gsm8k(question, num_chains, call_generate):
  function multi_chain_gsm8k_async (line 65) | async def multi_chain_gsm8k_async(question, num_chains, call_generate):
  function main (line 91) | def main(args):

FILE: benchmark/multi_chain_reasoning/bench_sglang.py
  function get_answer_value (line 18) | def get_answer_value(answer_str):
  function main (line 39) | def main(args):

FILE: benchmark/multi_document_qa/bench_other.py
  function multi_document_qa (line 18) | def multi_document_qa(docs, question, generate):
  function main (line 37) | def main(args):

FILE: benchmark/multi_document_qa/bench_sglang.py
  function multi_document_qa (line 14) | def multi_document_qa(s, docs, question):
  function main (line 33) | def main(args):

FILE: benchmark/multi_turn_chat/bench_other.py
  function multi_turns (line 15) | def multi_turns(generate, qas):
  function main (line 24) | def main(args):

FILE: benchmark/multi_turn_chat/bench_sglang.py
  function multi_turns (line 17) | def multi_turns(s, qas):
  function main (line 23) | def main(args):

FILE: benchmark/multi_turn_chat/data_gen.py
  function gen_prompt (line 7) | def gen_prompt(tokenizer, token_num):
  function gen_arguments (line 15) | def gen_arguments(args, tokenizer):

FILE: benchmark/multi_turn_chat/long_prompt_multi_turn.py
  function gen_prompt (line 18) | def gen_prompt(tokenizer, token_num):
  function get_cache_path (line 25) | def get_cache_path(args):
  function gen_arguments (line 34) | def gen_arguments(args, tokenizer):
  function multi_turns (line 72) | def multi_turns(s, system_prompt, qas):
  function main (line 80) | def main(args):

FILE: benchmark/prefill_only/bench_embeddings.py
  function build_embeddings_request (line 81) | def build_embeddings_request(index: int, item_count: int) -> tuple:
  function validate_embeddings_response (line 100) | def validate_embeddings_response(response_data: dict) -> bool:
  function build_warmup_embeddings_request (line 110) | def build_warmup_embeddings_request() -> dict:
  function run_benchmark (line 122) | async def run_benchmark(rps, duration_secs, item_count):
  function main (line 137) | async def main():

FILE: benchmark/prefill_only/bench_score.py
  function create_score_request_builder (line 65) | def create_score_request_builder():
  function validate_score_response (line 115) | def validate_score_response(response_data: dict) -> bool:
  function build_warmup_score_request (line 120) | def build_warmup_score_request() -> dict:
  function run_benchmark (line 155) | async def run_benchmark(rps, duration_secs, item_count):
  function main (line 173) | async def main():

FILE: benchmark/prefill_only/util.py
  class BenchmarkConfig (line 22) | class BenchmarkConfig:
    method __init__ (line 25) | def __init__(self):
  function generate_text_with_token_count (line 46) | def generate_text_with_token_count(
  function setup_profiler (line 87) | def setup_profiler(config: BenchmarkConfig, benchmark_name: str) -> None:
  function prepare_all_requests_parallel (line 106) | def prepare_all_requests_parallel(
  function sleep_with_distribution (line 187) | async def sleep_with_distribution(distribution: str, rps: float) -> None:
  function build_http_request_json (line 208) | def build_http_request_json(request_data: Any) -> str:
  function make_http_call (line 221) | async def make_http_call(
  function send_profile_request (line 284) | async def send_profile_request(
  function call_freeze_gc_http (line 331) | async def call_freeze_gc_http(session: aiohttp.ClientSession, http_url: ...
  function send_warmup_requests (line 359) | async def send_warmup_requests(
  function perform_global_warmup_and_freeze (line 398) | async def perform_global_warmup_and_freeze(
  function process_results (line 430) | async def process_results(
  function print_csv_results (line 573) | def print_csv_results(all_results: List[Dict[str, Any]]) -> None:
  function run_benchmark_main (line 628) | async def run_benchmark_main(
  function run_generic_benchmark (line 696) | async def run_generic_benchmark(

FILE: benchmark/react/bench_other.py
  function get_prompt (line 12) | def get_prompt(question):
  function main (line 86) | def main(args):

FILE: benchmark/react/bench_sglang.py
  function webthink (line 14) | def webthink(s, question, triplets):
  function main (line 109) | def main(args):

FILE: benchmark/reasoning_benchmark/answer_extraction.py
  function _fix_fracs (line 8) | def _fix_fracs(string):
  function _fix_a_slash_b (line 40) | def _fix_a_slash_b(string):
  function _fix_sqrt (line 57) | def _fix_sqrt(string):
  function _fix_tan (line 63) | def _fix_tan(string):
  function strip_string (line 69) | def strip_string(string):
  function extract_boxed_answers (line 182) | def extract_boxed_answers(text):
  function extract_program_output (line 200) | def extract_program_output(pred_str):
  function extract_answer (line 214) | def extract_answer(pred_str, exhaust=False):
  function extract_math_answer (line 253) | def extract_math_answer(question, reasoning, task):

FILE: benchmark/reasoning_benchmark/bench_sglang.py
  function reasoning_gen (line 19) | def reasoning_gen(s, question: str):
  function convert_dataset (line 31) | def convert_dataset(path: str, question_key: str, answer_key: str, num_t...
  function main (line 44) | def main(args):

FILE: benchmark/reasoning_benchmark/eval_utils.py
  function parse_digits (line 11) | def parse_digits(num):
  function is_digit (line 28) | def is_digit(num):
  function symbolic_equal (line 33) | def symbolic_equal(a, b):
  function math_equal (line 59) | def math_equal(prediction, reference, include_percentage=True, is_close=...

FILE: benchmark/tip_suggestion/bench_other.py
  function expand_tip (line 15) | def expand_tip(topic, tip, generate):
  function suggest_tips (line 34) | def suggest_tips(topic, generate):
  function main (line 57) | def main(args):

FILE: benchmark/tip_suggestion/bench_sglang.py
  function expand_tip (line 16) | def expand_tip(s, topic, tip):
  function suggest_tips (line 36) | def suggest_tips(s, topic):
  function main (line 54) | def main(args):

FILE: benchmark/tip_suggestion/lmql_funcs.py
  function expand_tip_async (line 4) | async def expand_tip_async(topic, tip, generate):
  function suggest_tips_async (line 23) | async def suggest_tips_async(topic, generate):

FILE: benchmark/tree_of_thought_deep/bench_other.py
  function get_answer_value (line 18) | def get_answer_value(answer_str):
  function most_frequent_number (line 29) | def most_frequent_number(numbers):
  function propose_plan (line 47) | def propose_plan(s, question, num_branches, call_generate):
  function execute_plan (line 62) | def execute_plan(s, num_branches, call_generate):
  function reflect_solution (line 75) | def reflect_solution(s, num_branches, call_generate):
  function get_final_answer (line 88) | def get_final_answer(s, num_branches, call_generate):
  function tree_search (line 101) | def tree_search(question, num_branches, call_generate):
  function main (line 122) | def main(args):

FILE: benchmark/tree_of_thought_deep/bench_sglang.py
  function get_answer_value (line 20) | def get_answer_value(answer_str):
  function most_frequent_number (line 31) | def most_frequent_number(numbers):
  function propose_plan (line 44) | def propose_plan(s, question, num_branches):
  function execute_plan (line 54) | def execute_plan(s, num_branches):
  function reflect_solution (line 63) | def reflect_solution(s, num_branches):
  function get_final_answer (line 72) | def get_final_answer(s, num_branches):
  function tree_search (line 82) | def tree_search(s, question, num_branches):
  function main (line 104) | def main(args):

FILE: benchmark/tree_of_thought_deep/lmql_funcs.py
  function propose_plan_async (line 10) | async def propose_plan_async(s, question, num_branches, call_generate):
  function execute_plan_async (line 25) | async def execute_plan_async(s, num_branches, call_generate):
  function reflect_solution_async (line 38) | async def reflect_solution_async(s, num_branches, call_generate):
  function get_final_answer_async (line 51) | async def get_final_answer_async(s, num_branches, call_generate):
  function tree_search_async (line 64) | async def tree_search_async(question, num_branches, call_generate):

FILE: benchmark/tree_of_thought_v0/bench_other.py
  function get_answer_value (line 18) | def get_answer_value(answer_str):
  function most_frequent_number (line 29) | def most_frequent_number(numbers):
  function propose_plan (line 47) | def propose_plan(s, question, num_branches, call_generate):
  function execute_plan (line 62) | def execute_plan(s, num_branches, call_generate):
  function reflect_solution (line 75) | def reflect_solution(s, num_branches, call_generate):
  function tree_search (line 88) | def tree_search(question, num_branches, call_generate):
  function main (line 102) | def main(args):

FILE: benchmark/tree_of_thought_v0/bench_sglang.py
  function get_answer_value (line 20) | def get_answer_value(answer_str):
  function most_frequent_number (line 31) | def most_frequent_number(numbers):
  function propose_plan (line 44) | def propose_plan(s, question, num_branches):
  function execute_plan (line 54) | def execute_plan(s, num_branches):
  function reflect_solution (line 63) | def reflect_solution(s, num_branches):
  function tree_search (line 73) | def tree_search(s, question, num_branches):
  function main (line 93) | def main(args):

FILE: docs/conf.py
  function setup (line 136) | def setup(app):

FILE: docs/deploy.py
  function run_cmd (line 7) | def run_cmd(cmd):

FILE: docs/performance_dashboard/app.js
  constant GITHUB_REPO (line 3) | const GITHUB_REPO = 'sgl-project/sglang';
  constant WORKFLOW_NAME (line 4) | const WORKFLOW_NAME = 'nightly-test-nvidia.yml';
  constant ARTIFACT_PREFIX (line 5) | const ARTIFACT_PREFIX = 'consolidated-metrics-';
  function init (line 40) | async function init() {
  function loadData (line 58) | async function loadData() {
  function fetchWorkflowRuns (line 93) | async function fetchWorkflowRuns() {
  function fetchMetricsForRun (line 112) | async function fetchMetricsForRun(run) {
  function isDiffusionResult (line 151) | function isDiffusionResult(result) {
  function populateFilters (line 156) | function populateFilters() {
  function formatIoLenLabel (line 227) | function formatIoLenLabel(ioKey) {
  function sortIoLengths (line 237) | function sortIoLengths(ioLengths) {
  function populateSelectWithLabels (line 247) | function populateSelectWithLabels(selectId, options, labelFormatter) {
  function updateIoLenFilter (line 258) | function updateIoLenFilter() {
  function updateVariantFilter (line 308) | function updateVariantFilter() {
  function populateSelect (line 344) | function populateSelect(selectId, options) {
  function populateSelectNoAll (line 354) | function populateSelectNoAll(selectId, options) {
  function createMetricTabs (line 368) | function createMetricTabs() {
  function detectCurrentDataType (line 396) | function detectCurrentDataType() {
  function selectMetricTab (line 416) | function selectMetricTab(metricKey, tabElement) {
  function handleModelFilterChange (line 429) | function handleModelFilterChange(model) {
  function handleGpuFilterChange (line 441) | function handleGpuFilterChange() {
  function updateStats (line 452) | function updateStats() {
  function updateCharts (line 498) | function updateCharts() {
  function prepareChartData (line 512) | function prepareChartData(gpuFilter, modelFilter, variantFilter, ioLenFi...
  function prepareChartDataByBatch (line 581) | function prepareChartDataByBatch(gpuFilter, modelFilter, variantFilter, ...
  function updateMetricChart (line 741) | function updateMetricChart(chartDataByBatch, metricType) {
  function getChartOptions (line 826) | function getChartOptions(yAxisLabel) {
  function escapeHtml (line 880) | function escapeHtml(text) {
  function updateRunsTable (line 887) | function updateRunsTable() {
  function refreshData (line 937) | async function refreshData() {
  function formatNumber (line 944) | function formatNumber(num) {
  function getAuthHeaders (line 955) | function getAuthHeaders() {
  function checkAuthAndInit (line 964) | async function checkAuthAndInit() {
  function handleLogin (line 1014) | async function handleLogin(event) {

FILE: docs/performance_dashboard/fetch_metrics.py
  function get_github_token (line 31) | def get_github_token() -> Optional[str]:
  function get_headers (line 55) | def get_headers(token: Optional[str]) -> dict:
  function fetch_workflow_runs (line 65) | def fetch_workflow_runs(
  function fetch_run_artifacts (line 97) | def fetch_run_artifacts(token: Optional[str], run_id: int) -> list:
  function download_artifact (line 107) | def download_artifact(token: Optional[str], artifact_id: int) -> Optiona...
  function extract_metrics_from_zip (line 128) | def extract_metrics_from_zip(zip_content: bytes) -> Optional[dict]:
  function fetch_metrics_for_run (line 144) | def fetch_metrics_for_run(token: Optional[str], run: dict) -> Optional[d...
  function fetch_single_run (line 184) | def fetch_single_run(token: Optional[str], run_id: int) -> Optional[dict]:
  function main (line 195) | def main():

FILE: docs/performance_dashboard/server.py
  function hash_password (line 64) | def hash_password(password):
  function create_auth_token (line 69) | def create_auth_token():
  function verify_auth_token (line 82) | def verify_auth_token(token):
  function get_github_token (line 95) | def get_github_token():
  function fetch_metrics_from_github (line 117) | def fetch_metrics_from_github(days=30):
  function update_cache_async (line 219) | def update_cache_async():
  function start_periodic_refresh (line 237) | def start_periodic_refresh(interval_hours):
  class DashboardHandler (line 252) | class DashboardHandler(http.server.SimpleHTTPRequestHandler):
    method __init__ (line 255) | def __init__(self, *args, directory=None, **kwargs):
    method _send_json (line 258) | def _send_json(self, data, status=200):
    method _check_auth (line 266) | def _check_auth(self):
    method do_GET (line 278) | def do_GET(self):
    method do_POST (line 297) | def do_POST(self):
    method handle_auth_check (line 305) | def handle_auth_check(self):
    method handle_login (line 309) | def handle_login(self):
    method handle_metrics_api (line 335) | def handle_metrics_api(self, parsed):
    method handle_refresh_api (line 351) | def handle_refresh_api(self):
    method log_message (line 356) | def log_message(self, format, *args):
  function main (line 361) | def main():

FILE: docs/release_lookup/generate_index.py
  function run_git (line 14) | def run_git(cmd):
  function is_stable_release (line 23) | def is_stable_release(tag_name):
  function get_tags (line 31) | def get_tags():
  function extract_pr_num (line 60) | def extract_pr_num(message):
  function process_tag_line (line 75) | def process_tag_line(tags, commit_map, pr_map, tag_type, tag_to_idx):
  function main (line 129) | def main():

FILE: docs/wrap_run_llm.py
  function insert_runllm_widget (line 5) | def insert_runllm_widget(html_content):
  function process_html_files (line 16) | def process_html_files(build_dir):
  function main (line 34) | def main():

FILE: examples/checkpoint_engine/update.py
  function timer (line 29) | def timer(msg: str):
  function check_sglang_ready (line 36) | def check_sglang_ready(
  function split_checkpoint_files (line 60) | def split_checkpoint_files(
  function split_tensors (line 73) | def split_tensors(
  function req_inference (line 94) | def req_inference(
  function update_weights (line 123) | def update_weights(
  function join (line 161) | def join(

FILE: examples/frontend_language/quick_start/anthropic_example_chat.py
  function multi_turn_question (line 11) | def multi_turn_question(s, question_1, question_2):
  function single (line 18) | def single():
  function stream (line 30) | def stream():
  function batch (line 42) | def batch():

FILE: examples/frontend_language/quick_start/anthropic_example_complete.py
  function few_shot_qa (line 11) | def few_shot_qa(s, question):
  function single (line 24) | def single():
  function stream (line 33) | def stream():
  function batch (line 43) | def batch():

FILE: examples/frontend_language/quick_start/azure_openai_example_chat.py
  function multi_turn_question (line 13) | def multi_turn_question(s, question_1, question_2):
  function single (line 21) | def single():
  function stream (line 33) | def stream():
  function batch (line 45) | def batch():

FILE: examples/frontend_language/quick_start/gemini_example_chat.py
  function multi_turn_question (line 11) | def multi_turn_question(s, question_1, question_2):
  function single (line 18) | def single():
  function stream (line 30) | def stream():
  function batch (line 42) | def batch():

FILE: examples/frontend_language/quick_start/gemini_example_complete.py
  function few_shot_qa (line 11) | def few_shot_qa(s, question):
  function single (line 24) | def single():
  function stream (line 33) | def stream():
  function batch (line 43) | def batch():

FILE: examples/frontend_language/quick_start/gemini_example_multimodal_chat.py
  function image_qa (line 11) | def image_qa(s, image_file1, image_file2, question):

FILE: examples/frontend_language/quick_start/local_example_chat.py
  function multi_turn_question (line 10) | def multi_turn_question(s, question_1, question_2):
  function single (line 17) | def single():
  function stream (line 29) | def stream():
  function batch (line 41) | def batch():

FILE: examples/frontend_language/quick_start/local_example_complete.py
  function few_shot_qa (line 10) | def few_shot_qa(s, question):
  function single (line 23) | def single():
  function stream (line 32) | def stream():
  function batch (line 42) | def batch():

FILE: examples/frontend_language/quick_start/local_example_llava_next.py
  function image_qa (line 10) | def image_qa(s, image_path, question):
  function single (line 15) | def single():
  function stream (line 22) | def stream():
  function batch (line 35) | def batch():

FILE: examples/frontend_language/quick_start/openai_example_chat.py
  function multi_turn_question (line 11) | def multi_turn_question(s, question_1, question_2):
  function single (line 19) | def single():
  function stream (line 31) | def stream():
  function batch (line 43) | def batch():

FILE: examples/frontend_language/quick_start/openai_example_complete.py
  function few_shot_qa (line 11) | def few_shot_qa(s, question):
  function single (line 24) | def single():
  function stream (line 33) | def stream():
  function batch (line 43) | def batch():

FILE: examples/frontend_language/quick_start/openai_example_n.py
  function multi_turn_question (line 11) | def multi_turn_question(s, question_1, question_2):
  function single (line 24) | def single():
  function batch (line 40) | def batch():

FILE: examples/frontend_language/quick_start/openai_example_o1.py
  function multi_turn_question (line 11) | def multi_turn_question(s, question_1, question_2):
  function single (line 19) | def single():
  function batch (line 31) | def batch():

FILE: examples/frontend_language/quick_start/openrouter_example_chat.py
  function multi_turn_question (line 13) | def multi_turn_question(s, question_1, question_2):
  function single (line 21) | def single():
  function stream (line 33) | def stream():
  function batch (line 45) | def batch():

FILE: examples/frontend_language/quick_start/together_example_chat.py
  function multi_turn_question (line 13) | def multi_turn_question(s, question_1, question_2):
  function single (line 21) | def single():
  function stream (line 33) | def stream():
  function batch (line 45) | def batch():

FILE: examples/frontend_language/quick_start/together_example_complete.py
  function few_shot_qa (line 13) | def few_shot_qa(s, question):
  function single (line 26) | def single():
  function stream (line 35) | def stream():
  function batch (line 45) | def batch():

FILE: examples/frontend_language/usage/chinese_regex.py
  function character_gen (line 22) | def character_gen(s, name):
  function main (line 45) | def main():

FILE: examples/frontend_language/usage/choices_logprob.py
  function tool_use (line 11) | def tool_use(s, question):
  function main (line 16) | def main():

FILE: examples/frontend_language/usage/cot_decoding.py
  function cot_decoding (line 13) | def cot_decoding(s, question, get_top_k, is_chat_model, verbose):

FILE: examples/frontend_language/usage/json_decode.py
  function character_gen (line 33) | def character_gen(s, name):
  function driver_character_gen (line 44) | def driver_character_gen():
  class Weapon (line 49) | class Weapon(str, Enum):
  class Wizard (line 58) | class Wizard(BaseModel):
  function pydantic_wizard_gen (line 65) | def pydantic_wizard_gen(s):
  function driver_pydantic_wizard_gen (line 75) | def driver_pydantic_wizard_gen():

FILE: examples/frontend_language/usage/json_logprobs.py
  function openai_api_request (line 15) | def openai_api_request(name):
  function srt_api_request (line 38) | def srt_api_request(name):
  function pretty_print (line 70) | def pretty_print(res):

FILE: examples/frontend_language/usage/llava_video/srt_example_llava_v.py
  function video_qa (line 20) | def video_qa(s, num_frames, video_path, question):
  function single (line 25) | def single(path, num_frames=16):
  function split_into_chunks (line 36) | def split_into_chunks(lst, num_chunks):
  function save_batch_results (line 50) | def save_batch_results(batch_video_files, states, cur_chunk, batch_idx, ...
  function compile_and_cleanup_final_results (line 60) | def compile_and_cleanup_final_results(cur_chunk, num_batches, save_dir):
  function find_video_files (line 75) | def find_video_files(video_dir):
  function batch (line 90) | def batch(video_dir, save_dir, cur_chunk, num_chunks, num_frames=16, bat...

FILE: examples/frontend_language/usage/openai_chat_speculative.py
  function gen_character_spec (line 24) | def gen_character_spec(s):
  function gen_character_spec_no_few_shot (line 42) | def gen_character_spec_no_few_shot(s):
  function gen_character_normal (line 55) | def gen_character_normal(s):
  function multi_turn_question (line 62) | def multi_turn_question(s, question_1, question_2):
  function test_spec_single_turn (line 80) | def test_spec_single_turn():
  function test_inaccurate_spec_single_turn (line 93) | def test_inaccurate_spec_single_turn():
  function test_normal_single_turn (line 103) | def test_normal_single_turn():
  function test_spec_multi_turn (line 109) | def test_spec_multi_turn():
  function test_spec_multi_turn_stream (line 122) | def test_spec_multi_turn_stream():

FILE: examples/frontend_language/usage/openai_speculative.py
  function gen_character_spec (line 10) | def gen_character_spec(s):
  function gen_character_no_spec (line 19) | def gen_character_no_spec(s):
  function gen_character_spec_no_few_shot (line 28) | def gen_character_spec_no_few_shot(s):

FILE: examples/frontend_language/usage/parallel_sample.py
  function parallel_sample (line 10) | def parallel_sample(s, question, n):

FILE: examples/frontend_language/usage/readme_examples.py
  function tool_use (line 11) | def tool_use(s, question):
  function tip_suggestion (line 26) | def tip_suggestion(s):
  function regular_expression_gen (line 43) | def regular_expression_gen(s):
  function text_qa (line 53) | def text_qa(s, question):
  function driver_tool_use (line 58) | def driver_tool_use():
  function driver_tip_suggestion (line 64) | def driver_tip_suggestion():
  function driver_regex (line 70) | def driver_regex():
  function driver_batching (line 76) | def driver_batching():
  function driver_stream (line 91) | def driver_stream():

FILE: examples/frontend_language/usage/sgl_gen_min_tokens.py
  function long_answer (line 12) | def long_answer(s):
  function short_answer (line 18) | def short_answer(s):

FILE: examples/frontend_language/usage/streaming.py
  function multi_turn_question (line 12) | def multi_turn_question(s, question_1, question_2):
  function stream_a_variable (line 23) | def stream_a_variable():
  function async_stream (line 35) | async def async_stream():

FILE: examples/frontend_language/usage/triton/models/character_generation/1/model.py
  class Character (line 12) | class Character(BaseModel):
  function character_gen (line 19) | def character_gen(s, name):
  class TritonPythonModel (line 29) | class TritonPythonModel:
    method initialize (line 30) | def initialize(self, args):
    method execute (line 33) | def execute(self, requests):

FILE: examples/profiler/nsys_profile_tools/gputrc2graph.py
  function load_engine_model (line 18) | def load_engine_model():
  class GPUTrace2Graph (line 34) | class GPUTrace2Graph:
    method __init__ (line 39) | def __init__(self):
    method gen_nonoverlapped_sum_from_gputrace (line 45) | def gen_nonoverlapped_sum_from_gputrace(self, in_file, out_file):
    method sum_non_overlapping_intervals (line 66) | def sum_non_overlapping_intervals(self, df):
    method make_html (line 107) | def make_html(self, df, output_dir, title):
    method anno_gpu_kernname (line 165) | def anno_gpu_kernname(self, df, mapping):
    method make_nongpu_row (line 175) | def make_nongpu_row(self, df, nongpu_sec):
    method is_valid_file (line 183) | def is_valid_file(self, base_file):
    method should_gen_file (line 189) | def should_gen_file(self, new_file, base_file):
    method gen_sum_file (line 203) | def gen_sum_file(self, file, nsys_cmd):
    method gen_graph (line 250) | def gen_graph(self, in_file, out_dir, title, nsys_cmd, engine_model):
  function parse_tuple (line 290) | def parse_tuple(s):
  function main (line 294) | def main():

FILE: examples/runtime/engine/custom_server.py
  function generate (line 14) | async def generate(request):
  function generate_stream (line 26) | async def generate_stream(request):
  function run_server (line 46) | def run_server():

FILE: examples/runtime/engine/embedding.py
  function main (line 4) | def main():

FILE: examples/runtime/engine/fastapi_engine_inference.py
  function lifespan (line 26) | async def lifespan(app: FastAPI):
  function generate_text (line 47) | async def generate_text(request: Request):
  function start_server (line 78) | def start_server(args, timeout=60):
  function send_requests (line 129) | def send_requests(server_url, prompts, max_new_tokens, temperature):

FILE: examples/runtime/engine/launch_engine.py
  function main (line 8) | def main():

FILE: examples/runtime/engine/offline_batch_inference.py
  function main (line 13) | def main(

FILE: examples/runtime/engine/offline_batch_inference_async.py
  class InferenceEngine (line 19) | class InferenceEngine:
    method __init__ (line 20) | def __init__(self, **kwargs):
    method generate (line 23) | async def generate(self, prompt, sampling_params):
  function run_server (line 28) | async def run_server(server_args):

FILE: examples/runtime/engine/offline_batch_inference_eagle.py
  function main (line 4) | def main():

FILE: examples/runtime/engine/offline_batch_inference_qwen_1m.py
  function load_prompt (line 11) | def load_prompt() -> str:
  function process_requests (line 29) | def process_requests(llm: sgl.Engine, prompts: list[str]) -> None:
  function initialize_engine (line 50) | def initialize_engine() -> sgl.Engine:
  function main (line 67) | def main():

FILE: examples/runtime/engine/offline_batch_inference_vlm.py
  function main (line 14) | def main(

FILE: examples/runtime/engine/save_remote_state.py
  function main (line 45) | def main(args):

FILE: examples/runtime/engine/save_sharded_state.py
  function main (line 50) | def main(args):

FILE: examples/runtime/hidden_states/hidden_states_engine.py
  function main (line 15) | def main():

FILE: examples/runtime/hidden_states/hidden_states_server.py
  function main (line 23) | def main():

FILE: examples/runtime/lora.py
  function main (line 16) | def main():

FILE: examples/runtime/multimodal/llama3_llava_server.py
  function send_request (line 27) | async def send_request(url, data, delay=0):
  function test_concurrent (line 35) | async def test_concurrent(args):
  function test_streaming (line 68) | def test_streaming(args):

FILE: examples/runtime/multimodal/llava_onevision_server.py
  function download_video (line 27) | def download_video(url, cache_dir):
  function create_openai_client (line 41) | def create_openai_client(base_url):
  function image_stream_request_test (line 45) | def image_stream_request_test(client):
  function multi_image_stream_request_test (line 82) | def multi_image_stream_request_test(client):
  function video_stream_request_test (line 129) | def video_stream_request_test(client, video_path):
  function image_speed_test (line 152) | def image_speed_test(client):
  function video_speed_test (line 184) | def video_speed_test(client, video_path):
  function prepare_video_messages (line 202) | def prepare_video_messages(video_path):
  function print_speed_test_results (line 236) | def print_speed_test_results(request, start_time, end_time):
  function main (line 250) | def main():

FILE: examples/runtime/multimodal/pixtral_server.py
  function send_request (line 28) | async def send_request(url, data, delay=0):
  function test_concurrent (line 36) | async def test_concurrent(args):
  function test_streaming (line 73) | def test_streaming(args):

FILE: examples/runtime/multimodal/qwen_llava_server.py
  function send_request (line 27) | async def send_request(url, data, delay=0):
  function test_concurrent (line 35) | async def test_concurrent(args):
  function test_streaming (line 68) | def test_streaming(args):

FILE: examples/runtime/qwen3_vl_reranker.py
  function rerank_text_only (line 25) | def rerank_text_only():
  function rerank_with_images (line 51) | def rerank_with_images():
  function rerank_multimodal_query (line 106) | def rerank_multimodal_query():
  function main (line 156) | def main():

FILE: examples/runtime/token_in_token_out/token_in_token_out_llm_engine.py
  function main (line 11) | def main():

FILE: examples/runtime/token_in_token_out/token_in_token_out_llm_server.py
  function main (line 23) | def main():

FILE: examples/runtime/token_in_token_out/token_in_token_out_vlm_engine.py
  function get_input_ids (line 14) | def get_input_ids(
  function token_in_out_example (line 37) | def token_in_out_example(

FILE: examples/runtime/token_in_token_out/token_in_token_out_vlm_server.py
  function get_input_ids (line 26) | def get_input_ids() -> Tuple[list[int], list]:
  function main (line 45) | def main():

FILE: examples/runtime/vertex_predict.py
  class VertexPrediction (line 33) | class VertexPrediction:
  class LocalVertexEndpoint (line 37) | class LocalVertexEndpoint:
    method __init__ (line 38) | def __init__(self) -> None:
    method predict (line 41) | def predict(self, instances: List[dict], parameters: Optional[dict] = ...

FILE: examples/usage/modelopt_quantize_and_export.py
  function _validate_export (line 26) | def _validate_export(export_dir: str) -> bool:
  function _get_export_info (line 56) | def _get_export_info(export_dir: str) -> Optional[dict]:
  function quantize_and_export_model (line 78) | def quantize_and_export_model(
  function deploy_exported_model (line 175) | def deploy_exported_model(
  function main (line 224) | def main():

FILE: python/sglang/_mps_stub.py
  class Stream (line 17) | class Stream:
    method __init__ (line 24) | def __init__(self, device: Any = None, priority: int = 0) -> None:
    method synchronize (line 27) | def synchronize(self) -> None:
    method wait_stream (line 30) | def wait_stream(self, stream: Any) -> None:
    method wait_event (line 33) | def wait_event(self, event: Any) -> None:
    method record_event (line 36) | def record_event(self, event: Any = None) -> Any:
    method query (line 39) | def query(self) -> bool:
    method __enter__ (line 43) | def __enter__(self) -> "Stream":
    method __exit__ (line 46) | def __exit__(self, *args: Any) -> None:
  class Event (line 50) | class Event:
    method __init__ (line 53) | def __init__(self, enable_timing: bool = False) -> None:
    method record (line 56) | def record(self, stream: Any = None) -> None:
    method wait (line 59) | def wait(self, stream: Any = None) -> None:
    method query (line 62) | def query(self) -> bool:
    method synchronize (line 65) | def synchronize(self) -> None:
    method elapsed_time (line 68) | def elapsed_time(self, end_event: Any) -> float:
  function current_stream (line 75) | def current_stream(device: Any = None) -> Stream:
  function stream (line 80) | def stream(s: Any) -> Stream:
  function set_device (line 85) | def set_device(device: Any) -> None:  # noqa: ARG001
  function current_device (line 90) | def current_device() -> int:
  function device_count (line 95) | def device_count() -> int:
  class _MPSDeviceProperties (line 101) | class _MPSDeviceProperties:
    method __getattr__ (line 114) | def __getattr__(self, name: str) -> Any:
  function get_device_properties (line 125) | def get_device_properties(device: Any = 0) -> _MPSDeviceProperties:  # n...
  class _MPSMemoryTracker (line 137) | class _MPSMemoryTracker:
    method __init__ (line 145) | def __init__(self) -> None:
    method memory_allocated (line 149) | def memory_allocated(self, device: Any = None) -> int:  # noqa: ARG002
    method memory_reserved (line 157) | def memory_reserved(self, device: Any = None) -> int:  # noqa: ARG002
    method max_memory_allocated (line 165) | def max_memory_allocated(self, device: Any = None) -> int:  # noqa: AR...
    method max_memory_reserved (line 169) | def max_memory_reserved(self, device: Any = None) -> int:  # noqa: ARG002
    method reset_peak_memory_stats (line 173) | def reset_peak_memory_stats(self, device: Any = None) -> None:  # noqa...
  function _patch_non_blocking (line 183) | def _patch_non_blocking() -> None:
  function install (line 226) | def install() -> None:

FILE: python/sglang/_triton_stub.py
  class _StubBase (line 19) | class _StubBase:
    method __init_subclass__ (line 25) | def __init_subclass__(cls, **kwargs):
  class _MockModule (line 29) | class _MockModule(types.ModuleType):
    method __init__ (line 37) | def __init__(self, name: str):
    method __getattr__ (line 48) | def __getattr__(self, name: str):
    method __call__ (line 66) | def __call__(self, *args, **kwargs):
    method __instancecheck__ (line 77) | def __instancecheck__(self, instance):
    method __contains__ (line 81) | def __contains__(self, item):
    method __iter__ (line 85) | def __iter__(self):
    method __len__ (line 88) | def __len__(self):
    method __bool__ (line 91) | def __bool__(self):
    method __repr__ (line 94) | def __repr__(self):
  function _cdiv (line 98) | def _cdiv(a: int, b: int) -> int:
  function _next_power_of_2 (line 103) | def _next_power_of_2(n: int) -> int:
  class _Config (line 108) | class _Config:
    method __init__ (line 111) | def __init__(self, kwargs=None, num_warps=4, num_stages=2, **extra):
  class _TritonFinder (line 117) | class _TritonFinder:
    method find_module (line 128) | def find_module(self, fullname, path=None):
    method load_module (line 133) | def load_module(self, fullname):
  function _make_mock (line 148) | def _make_mock(name: str) -> _MockModule:
  function install (line 155) | def install() -> None:

FILE: python/sglang/bench_offline_throughput.py
  class BenchArgs (line 35) | class BenchArgs:
    method add_cli_args (line 63) | def add_cli_args(parser: argparse.ArgumentParser):
    method from_cli_args (line 201) | def from_cli_args(cls, args: argparse.Namespace):
  function throughput_test_once (line 206) | def throughput_test_once(
  function monitor_trace_file (line 294) | def monitor_trace_file(known_files, directory, interval=1):
  function _create_ray_engine_backend (line 326) | def _create_ray_engine_backend(server_args: ServerArgs):
  function throughput_test (line 394) | def throughput_test(

FILE: python/sglang/bench_one_batch.py
  function start_profile (line 93) | def start_profile(profile_activities, profile_record_shapes=False, rank_...
  function stop_profile (line 124) | def stop_profile(
  class BenchArgs (line 158) | class BenchArgs:
    method add_cli_args (line 178) | def add_cli_args(parser: argparse.ArgumentParser):
    method from_cli_args (line 245) | def from_cli_args(cls, args: argparse.Namespace):
  function load_model (line 259) | def load_model(server_args, port_args, gpu_id, tp_rank):
  function prepare_inputs_for_correctness_test (line 289) | def prepare_inputs_for_correctness_test(bench_args, tokenizer, custom_pr...
  function prepare_extend_inputs_for_correctness_test (line 334) | def prepare_extend_inputs_for_correctness_test(
  function prepare_synthetic_inputs_for_latency_test (line 348) | def prepare_synthetic_inputs_for_latency_test(
  class TreeCacheNamespace (line 377) | class TreeCacheNamespace(SimpleNamespace):
    method supports_swa (line 378) | def supports_swa(self) -> bool:
    method supports_mamba (line 381) | def supports_mamba(self) -> bool:
    method is_chunk_cache (line 384) | def is_chunk_cache(self) -> bool:
    method is_tree_cache (line 387) | def is_tree_cache(self) -> bool:
    method evict (line 390) | def evict(self, params: EvictParams):
  function extend (line 395) | def extend(reqs, model_runner):
  function decode (line 422) | def decode(input_token_ids, batch, model_runner):
  function _maybe_prepare_mlp_sync_batch (line 433) | def _maybe_prepare_mlp_sync_batch(batch: ScheduleBatch, model_runner):
  function _read_prompts_from_file (line 448) | def _read_prompts_from_file(prompt_file, rank_print):
  function _get_torch_profiler_output_dir (line 461) | def _get_torch_profiler_output_dir():
  function _create_torch_profiler_filename (line 465) | def _create_torch_profiler_filename(
  function _save_profile_trace_results (line 473) | def _save_profile_trace_results(profiler, filename):
  function correctness_test (line 484) | def correctness_test(
  function synchronize (line 533) | def synchronize(device):
  function latency_test_run_once (line 537) | def latency_test_run_once(
  function latency_test (line 676) | def latency_test(
  function main (line 791) | def main(server_args, bench_args):

FILE: python/sglang/bench_one_batch_server.py
  function run_benchmark (line 25) | def run_benchmark(server_args: ServerArgs, bench_args: BenchArgs):

FILE: python/sglang/bench_serving.py
  function _get_bool_env_var (line 60) | def _get_bool_env_var(name: str, default: str = "false") -> bool:
  function _create_bench_client_session (line 65) | def _create_bench_client_session():
  class RequestFuncInput (line 79) | class RequestFuncInput:
  class RequestFuncOutput (line 93) | class RequestFuncOutput:
    method init_new (line 106) | def init_new(request_func_input: RequestFuncInput):
  function get_auth_headers (line 112) | def get_auth_headers() -> Dict[str, str]:
  function get_request_headers (line 123) | def get_request_headers() -> Dict[str, str]:
  function wait_for_endpoint (line 130) | def wait_for_endpoint(url: str, timeout_sec: int = 60) -> bool:
  function async_request_trt_llm (line 153) | async def async_request_trt_llm(
  function async_request_openai_completions (line 224) | async def async_request_openai_completions(
  function async_request_openai_chat_completions (line 338) | async def async_request_openai_chat_completions(
  function async_request_truss (line 518) | async def async_request_truss(
  function async_request_sglang_generate (line 598) | async def async_request_sglang_generate(
  function async_request_openai_embeddings (line 704) | async def async_request_openai_embeddings(
  function async_request_gserver (line 754) | async def async_request_gserver(
  function async_request_profile (line 761) | async def async_request_profile(api_url: str) -> RequestFuncOutput:
  function _build_profile_urls (line 816) | def _build_profile_urls(
  function _call_profile_pd (line 835) | async def _call_profile_pd(profile_urls: List[Tuple[str, str]], mode: st...
  class BenchmarkMetrics (line 875) | class BenchmarkMetrics:
  function get_request (line 912) | async def get_request(
  function calculate_metrics (line 952) | def calculate_metrics(
  function wrap_multi_turn_request_func (line 1130) | def wrap_multi_turn_request_func(request_func: Callable, backend: str) -...
  function benchmark (line 1163) | async def benchmark(
  function check_chat_template (line 1638) | def check_chat_template(model_path):
  function set_global_args (line 1647) | def set_global_args(args_: argparse.Namespace):
  function run_benchmark (line 1653) | def run_benchmark(args_: argparse.Namespace):
  class LoRAPathAction (line 1890) | class LoRAPathAction(argparse.Action):
    method __call__ (line 1891) | def __call__(self, parser, namespace, values, option_string=None):

FILE: python/sglang/benchmark/bench_utils.py
  function run_bench (line 7) | def run_bench(

FILE: python/sglang/benchmark/datasets/__init__.py
  function get_dataset (line 30) | def get_dataset(args, tokenizer, model_id=None):

FILE: python/sglang/benchmark/datasets/common.py
  class DatasetRow (line 22) | class DatasetRow:
    method __post_init__ (line 33) | def __post_init__(self):
  class BaseDataset (line 43) | class BaseDataset(ABC):
    method from_args (line 46) | def from_args(cls, args: Namespace) -> "BaseDataset": ...
    method load (line 49) | def load(
  function compute_random_lens (line 56) | def compute_random_lens(full_len: int, range_ratio: float, num: int) -> ...
  function get_available_tokens (line 68) | def get_available_tokens(tokenizer):
  function gen_prompt (line 73) | def gen_prompt(tokenizer, token_num):
  function gen_mm_prompt (line 80) | def gen_mm_prompt(tokenizer, image_pad_id, token_num):

FILE: python/sglang/benchmark/datasets/custom.py
  class CustomDataset (line 20) | class CustomDataset(BaseDataset):
    method from_args (line 29) | def from_args(cls, args: Namespace) -> "CustomDataset":
    method load (line 40) | def load(
  function sample_custom_requests (line 54) | def sample_custom_requests(

FILE: python/sglang/benchmark/datasets/generated_shared_prefix.py
  class GeneratedSharedPrefixDataset (line 23) | class GeneratedSharedPrefixDataset(BaseDataset):
    method from_args (line 37) | def from_args(cls, args: Namespace) -> "GeneratedSharedPrefixDataset":
    method load (line 53) | def load(
  function get_gen_prefix_cache_path (line 72) | def get_gen_prefix_cache_path(
  function sample_generated_shared_prefix_requests (line 92) | def sample_generated_shared_prefix_requests(

FILE: python/sglang/benchmark/datasets/image.py
  class ImageDataset (line 22) | class ImageDataset(BaseDataset):
    method from_args (line 35) | def from_args(cls, args: Namespace) -> "ImageDataset":
    method load (line 49) | def load(self, tokenizer=None, model_id=None) -> List[DatasetRow]:
  function parse_image_resolution (line 66) | def parse_image_resolution(image_resolution: str) -> Tuple[int, int]:
  function create_mm_data_row (line 96) | def create_mm_data_row(
  function sample_image_requests (line 170) | def sample_image_requests(

FILE: python/sglang/benchmark/datasets/mmmu.py
  class MMMUDataset (line 17) | class MMMUDataset(BaseDataset):
    method from_args (line 23) | def from_args(cls, args: Namespace) -> "MMMUDataset":
    method load (line 30) | def load(self, tokenizer=None, model_id=None) -> List[DatasetRow]:
  function sample_mmmu_requests (line 40) | def sample_mmmu_requests(

FILE: python/sglang/benchmark/datasets/mooncake.py
  class MooncakeDataset (line 20) | class MooncakeDataset(BaseDataset):
    method from_args (line 26) | def from_args(cls, args: Namespace) -> "MooncakeDataset":
    method load (line 33) | def load(self, tokenizer=None, model_id=None) -> List[Dict]:
  function get_mooncake_request_over_time (line 50) | async def get_mooncake_request_over_time(

FILE: python/sglang/benchmark/datasets/openai_dataset.py
  class OpenAIDataset (line 13) | class OpenAIDataset(BaseDataset):
    method from_args (line 19) | def from_args(cls, args: Namespace) -> "OpenAIDataset":
    method load (line 26) | def load(
  function sample_openai_requests (line 37) | def sample_openai_requests(

FILE: python/sglang/benchmark/datasets/random.py
  class RandomDataset (line 21) | class RandomDataset(BaseDataset):
    method from_args (line 31) | def from_args(cls, args: Namespace) -> "RandomDataset":
    method load (line 42) | def load(
  function sample_random_requests (line 57) | def sample_random_requests(

FILE: python/sglang/benchmark/datasets/sharegpt.py
  class ShareGPTDataset (line 25) | class ShareGPTDataset(BaseDataset):
    method from_args (line 34) | def from_args(cls, args: Namespace) -> "ShareGPTDataset":
    method load (line 45) | def load(
  function sample_sharegpt_requests (line 59) | def sample_sharegpt_requests(

FILE: python/sglang/benchmark/utils.py
  function remove_prefix (line 17) | def remove_prefix(text: str, prefix: str) -> str:
  function remove_suffix (line 21) | def remove_suffix(text: str, suffix: str) -> str:
  function parse_custom_headers (line 25) | def parse_custom_headers(header_list: List[str]) -> Dict[str, str]:
  function get_model (line 29) | def get_model(pretrained_model_name_or_path: str) -> str:
  function get_tokenizer (line 44) | def get_tokenizer(
  function get_processor (line 67) | def get_processor(
  function download_and_cache_hf_file (line 90) | def download_and_cache_hf_file(
  function download_and_cache_file (line 101) | def download_and_cache_file(url: str, filename: Optional[str] = None):
  function is_file_valid_json (line 135) | def is_file_valid_json(path):
  function set_ulimit (line 151) | def set_ulimit(target_soft_limit=65535):

FILE: python/sglang/check_env.py
  function is_cuda_v2 (line 16) | def is_cuda_v2():
  class BaseEnv (line 56) | class BaseEnv:
    method __init__ (line 59) | def __init__(self):
    method get_info (line 63) | def get_info(self) -> dict:
    method get_topology (line 70) | def get_topology(self) -> dict:
    method get_package_versions (line 73) | def get_package_versions(self) -> dict:
    method get_device_info (line 87) | def get_device_info(self):
    method get_hypervisor_vendor (line 113) | def get_hypervisor_vendor(self) -> dict:
    method get_ulimit_soft (line 123) | def get_ulimit_soft(self) -> dict:
    method check_env (line 127) | def check_env(self):
  class GPUEnv (line 144) | class GPUEnv(BaseEnv):
    method get_info (line 147) | def get_info(self):
    method _get_cuda_version_info (line 156) | def _get_cuda_version_info(self):
    method _get_nvcc_info (line 170) | def _get_nvcc_info(self):
    method _get_cuda_driver_version (line 193) | def _get_cuda_driver_version(self):
    method get_topology (line 214) | def get_topology(self):
  class HIPEnv (line 235) | class HIPEnv(BaseEnv):
    method get_info (line 238) | def get_info(self):
    method _get_cuda_version_info (line 247) | def _get_cuda_version_info(self):
    method _get_hipcc_info (line 258) | def _get_hipcc_info(self):
    method _get_rocm_driver_version (line 276) | def _get_rocm_driver_version(self):
    method get_topology (line 294) | def get_topology(self):
  class NPUEnv (line 310) | class NPUEnv(BaseEnv):
    method __init__ (line 319) | def __init__(self):
    method get_info (line 323) | def get_info(self):
    method get_device_info (line 331) | def get_device_info(self):
    method _get_cann_version_info (line 346) | def _get_cann_version_info(self):
    method _get_cann_info (line 365) | def _get_cann_info(self, CANN_HOME: str):
    method _get_ascend_driver_version (line 384) | def _get_ascend_driver_version(self):
    method get_topology (line 407) | def get_topology(self):
  class MUSAEnv (line 425) | class MUSAEnv(BaseEnv):
    method get_info (line 428) | def get_info(self):
    method _get_musa_version_info (line 437) | def _get_musa_version_info(self):
    method _get_mcc_info (line 451) | def _get_mcc_info(self):
    method _get_musa_driver_version (line 472) | def _get_musa_driver_version(self):
    method get_topology (line 494) | def get_topology(self):

FILE: python/sglang/cli/generate.py
  function generate (line 6) | def generate(args, extra_argv):

FILE: python/sglang/cli/main.py
  function version (line 7) | def version(args, extra_argv):
  function main (line 12) | def main():

FILE: python/sglang/cli/serve.py
  function _extract_model_type_override (line 16) | def _extract_model_type_override(extra_argv):
  function serve (line 49) | def serve(args, extra_argv):

FILE: python/sglang/cli/utils.py
  function _is_diffusers_model_dir (line 12) | def _is_diffusers_model_dir(model_dir: str) -> bool:
  function get_is_diffusion_model (line 24) | def get_is_diffusion_model(model_path: str) -> bool:
  function get_model_path (line 65) | def get_model_path(extra_argv):
  function get_git_commit_hash (line 94) | def get_git_commit_hash() -> str:

FILE: python/sglang/compile_deep_gemm.py
  class CompileArgs (line 40) | class CompileArgs:
    method add_cli_args (line 44) | def add_cli_args(parser: argparse.ArgumentParser):
    method from_cli_args (line 48) | def from_cli_args(cls, args: argparse.Namespace):
  function warm_up_compile (line 57) | async def warm_up_compile(
  function launch_server_internal (line 76) | def launch_server_internal(server_args):
  function launch_server_process_and_send_one_request (line 85) | def launch_server_process_and_send_one_request(
  function refine_server_args (line 145) | def refine_server_args(server_args: ServerArgs, compile_args: CompileArgs):
  function run_compile (line 156) | def run_compile(server_args: ServerArgs, compile_args: CompileArgs):

FILE: python/sglang/eval/llama3_eval.py
  function fetch_responses (line 39) | async def fetch_responses(
  class CustomAsyncHTTPXClient (line 79) | class CustomAsyncHTTPXClient(httpx.AsyncClient):
    method send (line 80) | async def send(self, request: httpx.Request, *args, **kwargs) -> httpx...
  function get_client (line 87) | def get_client(provider):
  function benchmark (line 103) | async def benchmark(args):
  function get_mmlu_answer (line 144) | def get_mmlu_answer(response):
  function get_mmlu_cot_answer (line 150) | def get_mmlu_cot_answer(response):
  function get_answer_gsm8k (line 172) | def get_answer_gsm8k(response):
  function get_dataset_from_task (line 190) | def get_dataset_from_task(task, response_path, model_size):
  function analyze (line 221) | def analyze(task, response_path, model_size):

FILE: python/sglang/eval/loogle_eval.py
  function get_client (line 15) | def get_client(api_url: str) -> openai.AsyncOpenAI:
  function get_dataset (line 21) | def get_dataset():
  function fetch_response (line 25) | async def fetch_response(
  function benchmark (line 66) | async def benchmark(args):
  function analyse (line 98) | def analyse(args):

FILE: python/sglang/global_config.py
  class GlobalConfig (line 6) | class GlobalConfig:
    method __init__ (line 11) | def __init__(self):

FILE: python/sglang/jit_kernel/__main__.py
  function generate_clangd (line 4) | def generate_clangd():

FILE: python/sglang/jit_kernel/add_constant.py
  function _jit_add_constant_module (line 14) | def _jit_add_constant_module(constant: int) -> Module:
  function add_constant (line 24) | def add_constant(src: torch.Tensor, constant: int) -> torch.Tensor:

FILE: python/sglang/jit_kernel/awq_dequantize.py
  function _jit_awq_dequantize_module (line 14) | def _jit_awq_dequantize_module(dtype: torch.dtype) -> Module:
  function awq_dequantize (line 24) | def awq_dequantize(

FILE: python/sglang/jit_kernel/awq_marlin_repack.py
  function _jit_awq_marlin_repack_module (line 14) | def _jit_awq_marlin_repack_module() -> Module:
  function awq_marlin_repack (line 22) | def awq_marlin_repack(
  function awq_marlin_moe_repack (line 40) | def awq_marlin_moe_repack(

FILE: python/sglang/jit_kernel/benchmark/bench_awq_dequantize.py
  function check_correctness (line 29) | def check_correctness():
  function benchmark (line 85) | def benchmark(qweight_row, qweight_col, provider):

FILE: python/sglang/jit_kernel/benchmark/bench_awq_marlin_moe_repack.py
  function awq_pack (line 24) | def awq_pack(q_w, num_bits, size_k, size_n):
  function make_moe_weights (line 37) | def make_moe_weights(num_experts, size_k, size_n, num_bits, group_size):
  function check_correctness (line 54) | def check_correctness():
  function benchmark (line 102) | def benchmark(num_experts, size_k, size_n, num_bits, provider):

FILE: python/sglang/jit_kernel/benchmark/bench_awq_marlin_repack.py
  function awq_pack (line 25) | def awq_pack(q_w, num_bits, size_k, size_n):
  function check_correctness (line 45) | def check_correctness():
  function benchmark (line 86) | def benchmark(size_k, size_n, num_bits, provider):

FILE: python/sglang/jit_kernel/benchmark/bench_concat_mla.py
  function aot_concat_mla_k (line 27) | def aot_concat_mla_k(k, k_nope, k_rope):
  function jit_concat_mla_k (line 31) | def jit_concat_mla_k(k, k_nope, k_rope):
  function torch_concat_mla_k (line 35) | def torch_concat_mla_k(k, k_nope, k_rope):
  function aot_concat_mla_absorb_q (line 41) | def aot_concat_mla_absorb_q(a, b):
  function jit_concat_mla_absorb_q (line 45) | def jit_concat_mla_absorb_q(a, b):
  function torch_concat_mla_absorb_q (line 49) | def torch_concat_mla_absorb_q(a, b, out):
  function _create_concat_mla_k_data (line 65) | def _create_concat_mla_k_data(num_tokens):
  function bench_concat_mla_k (line 102) | def bench_concat_mla_k(num_tokens: int, provider: str):
  function bench_concat_mla_absorb_q (line 137) | def bench_concat_mla_absorb_q(dim_0: int, dim_1: int, provider: str):

FILE: python/sglang/jit_kernel/benchmark/bench_fused_add_rmsnorm.py
  function sglang_jit_fused_add_rmsnorm (line 14) | def sglang_jit_fused_add_rmsnorm(
  function flashinfer_fused_add_rmsnorm (line 20) | def flashinfer_fused_add_rmsnorm(
  function benchmark (line 56) | def benchmark(hidden_size: int, batch_size: int, provider: str):

FILE: python/sglang/jit_kernel/benchmark/bench_fused_norm_scale_shift.py
  function preprocess_layer (line 37) | def preprocess_layer(layer, affine: bool, D: int, DTYPE: torch.dtype):
  function bench_fused_norm_scale_shift (line 65) | def bench_fused_norm_scale_shift(
  function bench_fused_scale_residual_norm_scale_shift (line 100) | def bench_fused_scale_residual_norm_scale_shift(

FILE: python/sglang/jit_kernel/benchmark/bench_gptq_marlin.py
  function _run_gemm (line 29) | def _run_gemm(fn, a):
  function _run_gemm_aot (line 51) | def _run_gemm_aot(a):
  function check_correctness (line 73) | def check_correctness():
  function benchmark (line 112) | def benchmark(size_m, provider):

FILE: python/sglang/jit_kernel/benchmark/bench_gptq_marlin_repack.py
  function _get_inputs (line 24) | def _get_inputs(size_k):
  function check_correctness (line 37) | def check_correctness():
  function benchmark (line 79) | def benchmark(size_k, provider):

FILE: python/sglang/jit_kernel/benchmark/bench_hadamard.py
  function torch_hadamard_transform (line 47) | def torch_hadamard_transform(x, scale, H, dim, dim_padded):
  function benchmark (line 85) | def benchmark(batch_size: int, dim: int, provider: str) -> Tuple[float, ...

FILE: python/sglang/jit_kernel/benchmark/bench_hicache.py
  class HiCacheCache (line 44) | class HiCacheCache:
    method get_slice (line 50) | def get_slice(self, num_layers: int, element_size: int) -> "HiCacheCac...
  function gen_indices (line 67) | def gen_indices(
  function sglang_aot_transfer_one (line 79) | def sglang_aot_transfer_one(
  function sglang_jit_transfer_one (line 100) | def sglang_jit_transfer_one(
  function sglang_aot_transfer_all (line 121) | def sglang_aot_transfer_all(
  function sglang_jit_transfer_all (line 144) | def sglang_jit_transfer_all(
  function pytorch_transfer (line 168) | def pytorch_transfer(
  function benchmark_one_layer_h2d (line 218) | def benchmark_one_layer_h2d(
  function _create_ptr_tensor (line 299) | def _create_ptr_tensor(tensors, device="cuda"):
  function benchmark_all_layer_d2h (line 321) | def benchmark_all_layer_d2h(

FILE: python/sglang/jit_kernel/benchmark/bench_moe_wna16_marlin.py
  function stack_and_dev (line 18) | def stack_and_dev(tensors):
  function _make_inputs (line 48) | def _make_inputs(size_m):
  function _run_jit (line 76) | def _run_jit(
  function _run_aot (line 116) | def _run_aot(
  function check_correctness (line 156) | def check_correctness():
  function benchmark (line 204) | def benchmark(size_m, provider):

FILE: python/sglang/jit_kernel/benchmark/bench_norm.py
  function benchmark_rmsnorm (line 48) | def benchmark_rmsnorm(hidden_size: int, batch_size: int, provider: str):
  function benchmark_fused_add_rmsnorm (line 72) | def benchmark_fused_add_rmsnorm(hidden_size: int, batch_size: int, provi...

FILE: python/sglang/jit_kernel/benchmark/bench_norm_impls.py
  function effective_rows_from_shape (line 158) | def effective_rows_from_shape(input_shape: list[int]) -> int:
  function ensure_repo (line 165) | def ensure_repo(repo_name: str, repo_url: str) -> Path:
  function ensure_python_dep (line 178) | def ensure_python_dep(module_name: str, package_name: str | None = None)...
  function dtype_from_name (line 189) | def dtype_from_name(name: str) -> torch.dtype:
  function dtype_name (line 201) | def dtype_name(dtype: torch.dtype) -> str:
  function normalize_hidden_sizes (line 210) | def normalize_hidden_sizes(text: str) -> list[int]:
  function normalize_dtypes (line 214) | def normalize_dtypes(text: str) -> list[torch.dtype]:
  function prewarm (line 218) | def prewarm(fn: Callable[[], object], iters: int = 3) -> None:
  function benchmark_provider (line 224) | def benchmark_provider(
  function geometric_mean (line 251) | def geometric_mean(values: list[float]) -> float:
  function load_flaggems (line 258) | def load_flaggems():
  function load_quack (line 272) | def load_quack():
  function build_rmsnorm_providers (line 286) | def build_rmsnorm_providers(dtype: torch.dtype, batch_size: int, hidden_...
  function build_fused_add_rmsnorm_providers (line 318) | def build_fused_add_rmsnorm_providers(
  function build_layernorm_providers (line 370) | def build_layernorm_providers(dtype: torch.dtype, batch_size: int, hidde...
  function maybe_benchmark (line 404) | def maybe_benchmark(
  function write_csv (line 451) | def write_csv(rows: list[dict[str, object]], output_path: Path) -> None:
  function write_markdown (line 478) | def write_markdown(rows: list[dict[str, object]], output_path: Path) -> ...
  function run_suite (line 561) | def run_suite(
  function run_shape_suite (line 620) | def run_shape_suite(
  function main (line 686) | def main() -> None:

FILE: python/sglang/jit_kernel/benchmark/bench_nvfp4_blockwise_moe.py
  function _round_up (line 22) | def _round_up(x: int, y: int) -> int:
  function _expert_offsets (line 26) | def _expert_offsets(m_per_expert: list[int], device: torch.device) -> to...
  function _blockscale_offsets (line 33) | def _blockscale_offsets(m_per_expert: list[int], device: torch.device) -...
  function _prepare_case (line 40) | def _prepare_case(
  function _torch_ref_group_mm (line 130) | def _torch_ref_group_mm(case: dict[str, Any]) -> torch.Tensor:
  function _aot_cutlass_fp4_group_mm (line 143) | def _aot_cutlass_fp4_group_mm(case: dict[str, Any]) -> torch.Tensor:
  function _probe_legacy_aot_group_mm (line 171) | def _probe_legacy_aot_group_mm() -> tuple[bool, str]:
  function benchmark (line 227) | def benchmark(total_tokens, n, k, num_experts, provider):

FILE: python/sglang/jit_kernel/benchmark/bench_nvfp4_quant.py
  function _torch_ref_quant (line 23) | def _torch_ref_quant(input: torch.Tensor, input_global_scale: torch.Tens...
  function _aot_scaled_fp4_quant (line 48) | def _aot_scaled_fp4_quant(input: torch.Tensor, input_global_scale: torch...
  function _probe_legacy_aot_quant (line 63) | def _probe_legacy_aot_quant() -> tuple[bool, str]:
  function _probe_flashinfer_quant (line 92) | def _probe_flashinfer_quant() -> tuple[bool, str]:
  function benchmark (line 157) | def benchmark(m, n, provider):

FILE: python/sglang/jit_kernel/benchmark/bench_nvfp4_scaled_mm.py
  function _dequantize_to_fp16 (line 37) | def _dequantize_to_fp16(
  function _aot_cutlass_scaled_fp4_mm (line 61) | def _aot_cutlass_scaled_fp4_mm(
  function _probe_legacy_aot_scaled_mm (line 76) | def _probe_legacy_aot_scaled_mm() -> tuple[bool, str]:
  function benchmark (line 143) | def benchmark(m, n, k, provider):

FILE: python/sglang/jit_kernel/benchmark/bench_per_tensor_quant_fp8.py
  function vllm_scaled_fp8_quant (line 28) | def vllm_scaled_fp8_quant(
  function sglang_scaled_fp8_quant (line 37) | def sglang_scaled_fp8_quant(
  function calculate_diff (line 52) | def calculate_diff(batch_size: int, seq_len: int):
  function benchmark (line 99) | def benchmark(element_count, provider):

FILE: python/sglang/jit_kernel/benchmark/bench_per_token_group_quant_8bit.py
  function _flatten_to_2d (line 159) | def _flatten_to_2d(t: torch.Tensor) -> torch.Tensor:
  function _make_sglang_bench_fn (line 166) | def _make_sglang_bench_fn(
  function benchmark (line 249) | def benchmark(

FILE: python/sglang/jit_kernel/benchmark/bench_qknorm.py
  function sglang_aot_qknorm (line 20) | def sglang_aot_qknorm(
  function sglang_jit_qknorm (line 39) | def sglang_jit_qknorm(
  function flashinfer_qknorm (line 49) | def flashinfer_qknorm(
  function torch_impl_qknorm (line 62) | def torch_impl_qknorm(
  function benchmark (line 114) | def benchmark(

FILE: python/sglang/jit_kernel/benchmark/bench_qknorm_across_heads.py
  function sglang_jit_qknorm_across_heads (line 18) | def sglang_jit_qknorm_across_heads(
  function sglang_aot_qknorm_across_heads (line 28) | def sglang_aot_qknorm_across_heads(
  function flashinfer_qknorm_across_heads (line 43) | def flashinfer_qknorm_across_heads(
  function torch_impl_qknorm_across_heads (line 56) | def torch_impl_qknorm_across_heads(
  function benchmark (line 101) | def benchmark(

FILE: python/sglang/jit_kernel/benchmark/bench_qwen_image_modulation.py
  function _make_common_inputs (line 28) | def _make_common_inputs(batch_size: int, seq_len: int, hidden_size: int):
  function bench_layernorm_scale_shift_gate_select01 (line 55) | def bench_layernorm_scale_shift_gate_select01(
  function bench_residual_layernorm_scale_shift_gate_select01 (line 116) | def bench_residual_layernorm_scale_shift_gate_select01(

FILE: python/sglang/jit_kernel/benchmark/bench_renorm.py
  function torch_top_k_renorm_probs (line 11) | def torch_top_k_renorm_probs(probs, top_k):
  function torch_top_p_renorm_probs (line 42) | def torch_top_p_renorm_probs(probs, top_p, eps=1e-5):
  function torch_top_k_mask_logits (line 81) | def torch_top_k_mask_logits(logits, top_k):
  function calculate_diff_top_k_renorm (line 106) | def calculate_diff_top_k_renorm(batch_size, vocab_size, k):
  function calculate_diff_top_p_renorm (line 122) | def calculate_diff_top_p_renorm(batch_size, vocab_size, p):
  function calculate_diff_top_k_mask (line 138) | def calculate_diff_top_k_mask(batch_size, vocab_size, k):
  function benchmark_top_k_renorm (line 181) | def benchmark_top_k_renorm(batch_size, vocab_size, k, provider):
  function benchmark_top_p_renorm (line 214) | def benchmark_top_p_renorm(batch_size, vocab_size, p, provider):
  function benchmark_top_k_mask (line 243) | def benchmark_top_k_mask(batch_size, vocab_size, k, provider):

FILE: python/sglang/jit_kernel/benchmark/bench_rmsnorm.py
  function sglang_aot_rmsnorm (line 18) | def sglang_aot_rmsnorm(
  function sglang_jit_rmsnorm (line 25) | def sglang_jit_rmsnorm(
  function flashinfer_rmsnorm (line 32) | def flashinfer_rmsnorm(
  function torch_impl_rmsnorm (line 40) | def torch_impl_rmsnorm(
  function benchmark (line 79) | def benchmark(hidden_size: int, batch_size: int, provider: str):

FILE: python/sglang/jit_kernel/benchmark/bench_rope.py
  function create_cos_sin_cache (line 20) | def create_cos_sin_cache(
  function flashinfer_rope (line 48) | def flashinfer_rope(
  function sglang_pos_enc_rope (line 67) | def sglang_pos_enc_rope(
  function sglang_fused_rope (line 86) | def sglang_fused_rope(
  function jit_rope_then_store (line 102) | def jit_rope_then_store(
  function jit_fused_rope_store (line 134) | def jit_fused_rope_store(
  function benchmark (line 198) | def benchmark(batch_size: int, num_q_k_heads: str, is_neox: bool, provid...
  function benchmark_store (line 255) | def benchmark_store(batch_size: int, num_q_k_heads: str, is_neox: bool, ...

FILE: python/sglang/jit_kernel/benchmark/bench_store_cache.py
  function sglang_jit_store_cache (line 17) | def sglang_jit_store_cache(
  function torch_compile_store_cache (line 28) | def torch_compile_store_cache(
  function torch_streams_store_cache (line 42) | def torch_streams_store_cache(
  function benchmark (line 89) | def benchmark(

FILE: python/sglang/jit_kernel/benchmark/utils.py
  function is_in_ci (line 16) | def is_in_ci() -> bool:
  function get_benchmark_range (line 21) | def get_benchmark_range(full_range: List, ci_range: List) -> List:
  function run_benchmark (line 26) | def run_benchmark(
  function run_benchmark_no_cudagraph (line 43) | def run_benchmark_no_cudagraph(

FILE: python/sglang/jit_kernel/concat_mla.py
  function _jit_concat_mla_k_module (line 14) | def _jit_concat_mla_k_module() -> Module:
  function _jit_concat_mla_absorb_q_module (line 23) | def _jit_concat_mla_absorb_q_module() -> Module:
  function concat_mla_k (line 31) | def concat_mla_k(k: torch.Tensor, k_nope: torch.Tensor, k_rope: torch.Te...
  function concat_mla_absorb_q (line 47) | def concat_mla_absorb_q(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:

FILE: python/sglang/jit_kernel/csrc/fast-hadamard-transform/code_gen.py
  function string_to_array (line 161) | def string_to_array(string):
  function array_code_gen (line 172) | def array_code_gen(arr):
  function main (line 185) | def main():

FILE: python/sglang/jit_kernel/csrc/fast-hadamard-transform/fast_hadamard_transform.h
  type HadamardParamsBase (line 11) | struct HadamardParamsBase {

FILE: python/sglang/jit_kernel/csrc/fast-hadamard-transform/fast_hadamard_transform_common.h
  type uint8 (line 16) | struct uint8 {
  type BytesToType (line 25) | struct BytesToType
  type BytesToType (line 31) | struct BytesToType
  type BytesToType (line 37) | struct BytesToType
  type BytesToType (line 43) | struct BytesToType
  type BytesToType (line 49) | struct BytesToType
  type BytesToType (line 55) | struct BytesToType
  function __device__ (line 64) | __device__ inline T operator()(T const& x, T const& y) {
  function T (line 73) | inline T run(T x, Operator& op) {
  type Allreduce (line 81) | struct Allreduce
  function T (line 83) | inline T run(T x, Operator& op) {
  function cilog2 (line 92) | constexpr int cilog2(int val) {
  function hadamard_mult_thread (line 99) | void hadamard_mult_thread(float x[kNChunks][1 << kLogN]) {
  function __device__ (line 141) | inline __device__ void load_input(input_t* x, float x_vals[kNChunks][kNE...

FILE: python/sglang/jit_kernel/csrc/fast-hadamard-transform/fast_hadamard_transform_special.h
  function hadamard_mult_thread_12 (line 12) | void hadamard_mult_thread_12(float x[12]) {
  function hadamard_mult_thread_20 (line 32) | void hadamard_mult_thread_20(float x[20]) {
  function hadamard_mult_thread_28 (line 80) | void hadamard_mult_thread_28(float x[28]) {
  function hadamard_mult_thread_40 (line 172) | void hadamard_mult_thread_40(float x[40]) {

FILE: python/sglang/jit_kernel/csrc/gemm/marlin/dequant.h
  function namespace (line 68) | namespace device::marlin {

FILE: python/sglang/jit_kernel/csrc/gemm/marlin/kernel.h
  function namespace (line 13) | namespace device::marlin {

FILE: python/sglang/jit_kernel/csrc/gemm/marlin/marlin_template.h
  function namespace (line 32) | namespace device::marlin {
  function __device__ (line 254) | __device__ inline void wait_negative_and_add(int* lock) {
  function transform_a (line 612) | auto transform_a = [&](int i) {
  function init_same_group (line 834) | auto init_same_group = [&](int pipe) {
  function matmul (line 1053) | auto matmul = [&](int k) {

FILE: python/sglang/jit_kernel/csrc/gemm/marlin_moe/kernel.h
  function namespace (line 16) | namespace device::marlin_moe {

FILE: python/sglang/jit_kernel/csrc/gemm/marlin_moe/marlin_template.h
  function namespace (line 33) | namespace device::marlin_moe {
  function __device__ (line 265) | __device__ inline void wait_negative_and_add(int* lock) {
  function else (line 351) | else if constexpr (std::is_same<scalar_t, half>::value) {
  function read_moe_block_data (line 444) | auto read_moe_block_data = [&](int block_id) {
  function transform_a (line 756) | auto transform_a = [&](int i) {
  function init_same_group (line 996) | auto init_same_group = [&](int pipe) {
  function matmul (line 1218) | auto matmul = [&](int k) {
  function write_result (line 1516) | auto write_result = [&](bool last) {

FILE: python/sglang/jit_kernel/cutedsl_gdn.py
  function _define_kernels (line 33) | def _define_kernels():
  function _create_jit_functions (line 951) | def _create_jit_functions():
  function _get_jit_functions (line 1273) | def _get_jit_functions():
  function _get_compiled_kernel (line 1280) | def _get_compiled_kernel(N, H, HV, K, V, pool_size, use_small_batch, is_...
  function cutedsl_fused_sigmoid_gating_delta_rule_update (line 1373) | def cutedsl_fused_sigmoid_gating_delta_rule_update(

FILE: python/sglang/jit_kernel/diffusion/cutedsl/common/norm_fusion.py
  function apply_norm_cta (line 15) | def apply_norm_cta(
  function apply_rmsnorm_cta (line 32) | def apply_rmsnorm_cta(
  function apply_layernorm_cta (line 61) | def apply_layernorm_cta(
  function broadcast_tensor_for_bsfd (line 130) | def broadcast_tensor_for_bsfd(
  function tensor_slice_for_bsfd (line 160) | def tensor_slice_for_bsfd(

FILE: python/sglang/jit_kernel/diffusion/cutedsl/common/reduce.py
  function warp_reduce_sum (line 8) | def warp_reduce_sum(val: cute.Numeric, reduce_size: int = 32) -> cute.Nu...
  function cta_reduce_sum (line 16) | def cta_reduce_sum(

FILE: python/sglang/jit_kernel/diffusion/cutedsl/scale_residual_norm_scale_shift.py
  function to_cute_arg (line 18) | def to_cute_arg(
  function to_fake_cute_args (line 42) | def to_fake_cute_args(t: torch.Tensor):
  class ScaleResidualNormScaleShift (line 57) | class ScaleResidualNormScaleShift:
    method make_hash_key (line 59) | def make_hash_key(cls, *inputs):
    method __init__ (line 81) | def __init__(self, D: int, norm_type: str):
    method __call__ (line 88) | def __call__(
    method kernel (line 135) | def kernel(
  function validate_x (line 228) | def validate_x(t: torch.Tensor, B: int, S: int, D: int):
  function validate_weight_bias (line 237) | def validate_weight_bias(t: Optional[torch.Tensor], B: int, S: int, D: i...
  function validate_scale_shift (line 248) | def validate_scale_shift(t: torch.Tensor, B: int, S: int, D: int):
  function validate_gate (line 271) | def validate_gate(t: Union[torch.Tensor, int], B: int, S: int, D: int):
  function fused_norm_scale_shift (line 278) | def fused_norm_scale_shift(
  function _fused_norm_scale_shift_fake (line 344) | def _fused_norm_scale_shift_fake(x, weight, bias, scale, shift, norm_typ...
  function fused_scale_residual_norm_scale_shift (line 352) | def fused_scale_residual_norm_scale_shift(
  function _fused_scale_residual_norm_scale_shift_fake (line 426) | def _fused_scale_residual_norm_scale_shift_fake(

FILE: python/sglang/jit_kernel/diffusion/triton/mps_fallback.py
  function _torch_to_mlx (line 45) | def _torch_to_mlx(tensor: torch.Tensor) -> "mx.array":
  function _mlx_to_torch (line 53) | def _mlx_to_torch(array: "mx.array", device: torch.device) -> torch.Tensor:
  function fuse_scale_shift_kernel_native (line 64) | def fuse_scale_shift_kernel_native(
  function fuse_scale_shift_gate_select01_kernel_native (line 97) | def fuse_scale_shift_gate_select01_kernel_native(
  function apply_rotary_embedding_native (line 118) | def apply_rotary_embedding_native(
  function norm_infer_native (line 131) | def norm_infer_native(
  function triton_one_pass_rms_norm_native (line 160) | def triton_one_pass_rms_norm_native(
  function rms_norm_fn_native (line 172) | def rms_norm_fn_native(
  function norm_infer_native (line 223) | def norm_infer_native(  # noqa: F811
  function triton_one_pass_rms_norm_native (line 250) | def triton_one_pass_rms_norm_native(  # noqa: F811
  function rms_norm_fn_native (line 262) | def rms_norm_fn_native(  # noqa: F811

FILE: python/sglang/jit_kernel/diffusion/triton/norm.py
  function maybe_contiguous_lastdim (line 10) | def maybe_contiguous_lastdim(x):
  function maybe_contiguous (line 14) | def maybe_contiguous(x):
  function triton_autotune_configs (line 18) | def triton_autotune_configs():
  function _layer_norm_fwd_1pass_kernel (line 64) | def _layer_norm_fwd_1pass_kernel(
  function _layer_norm_fwd (line 188) | def _layer_norm_fwd(
  function _layer_norm_fwd_impl (line 251) | def _layer_norm_fwd_impl(
  class LayerNormFn (line 373) | class LayerNormFn:
    method forward (line 376) | def forward(
  function layer_norm_fn (line 453) | def layer_norm_fn(
  function _norm_infer_kernel (line 496) | def _norm_infer_kernel(
  function norm_infer (line 540) | def norm_infer(
  function rms_norm_fn (line 582) | def rms_norm_fn(

FILE: python/sglang/jit_kernel/diffusion/triton/npu_fallback.py
  function fuse_scale_shift_native (line 5) | def fuse_scale_shift_native(
  function apply_rotary_embedding_native (line 16) | def apply_rotary_embedding_native(

FILE: python/sglang/jit_kernel/diffusion/triton/rmsnorm_onepass.py
  function _rms_norm_tiled_onepass (line 8) | def _rms_norm_tiled_onepass(
  function triton_one_pass_rms_norm (line 36) | def triton_one_pass_rms_norm(x: torch.Tensor, w: torch.Tensor, eps: floa...

FILE: python/sglang/jit_kernel/diffusion/triton/rotary.py
  function _rotary_embedding_kernel (line 18) | def _rotary_embedding_kernel(
  function apply_rotary_embedding (line 67) | def apply_rotary_embedding(

FILE: python/sglang/jit_kernel/diffusion/triton/scale_shift.py
  function _fused_layernorm_scale_shift_gate_select01_kernel (line 9) | def _fused_layernorm_scale_shift_gate_select01_kernel(
  function _fused_residual_layernorm_scale_shift_gate_select01_kernel (line 116) | def _fused_residual_layernorm_scale_shift_gate_select01_kernel(
  function _fused_scale_shift_4d_kernel (line 247) | def _fused_scale_shift_4d_kernel(
  function fuse_scale_shift_kernel_blc_opt (line 292) | def fuse_scale_shift_kernel_blc_opt(
  function fuse_scale_shift_gate_select01_kernel_blc_opt (line 360) | def fuse_scale_shift_gate_select01_kernel_blc_opt(
  function fuse_scale_shift_kernel (line 447) | def fuse_scale_shift_kernel(
  function fuse_scale_shift_gate_select01_kernel (line 566) | def fuse_scale_shift_gate_select01_kernel(
  function fuse_layernorm_scale_shift_gate_select01_kernel (line 638) | def fuse_layernorm_scale_shift_gate_select01_kernel(
  function fuse_residual_layernorm_scale_shift_gate_select01_kernel (line 727) | def fuse_residual_layernorm_scale_shift_gate_select01_kernel(

FILE: python/sglang/jit_kernel/flash_attention_v4.py
  function _maybe_contiguous (line 16) | def _maybe_contiguous(x: Optional[torch.Tensor]) -> Optional[torch.Tensor]:
  function flash_attn_varlen_func (line 20) | def flash_attn_varlen_func(
  function flash_attn_with_kvcache (line 92) | def flash_attn_with_kvcache(

FILE: python/sglang/jit_kernel/fused_metadata_copy.py
  function _jit_fused_metadata_copy_module (line 29) | def _jit_fused_metadata_copy_module(
  function _jit_fused_metadata_copy_multi_module (line 62) | def _jit_fused_metadata_copy_multi_module(
  function fused_metadata_copy_cuda (line 97) | def fused_metadata_copy_cuda(
  function fused_metadata_copy_multi_cuda (line 199) | def fused_metadata_copy_multi_cuda(

FILE: python/sglang/jit_kernel/fused_store_index_cache.py
  function _jit_nsa_fused_store_module (line 30) | def _jit_nsa_fused_store_module(
  function can_use_nsa_fused_store (line 55) | def can_use_nsa_fused_store(
  function fused_store_index_k_cache (line 67) | def fused_store_index_k_cache(

FILE: python/sglang/jit_kernel/gptq_marlin.py
  function _jit_gptq_marlin_module (line 18) | def _jit_gptq_marlin_module(dtype: torch.dtype) -> Module:
  function _or_empty (line 28) | def _or_empty(
  function gptq_marlin_gemm (line 34) | def gptq_marlin_gemm(

FILE: python/sglang/jit_kernel/gptq_marlin_repack.py
  function _jit_gptq_marlin_repack_module (line 17) | def _jit_gptq_marlin_repack_module() -> Module:
  function gptq_marlin_repack (line 25) | def gptq_marlin_repack(

FILE: python/sglang/jit_kernel/hadamard.py
  function _jit_hadamard_module (line 14) | def _jit_hadamard_module(dtype: torch.dtype) -> Module:
  function _hadamard_transform_impl (line 32) | def _hadamard_transform_impl(
  function hadamard_transform (line 59) | def hadamard_transform(x: torch.Tensor, scale: float = 1.0) -> torch.Ten...
  function hadamard_transform_12n (line 64) | def hadamard_transform_12n(x: torch.Tensor, scale: float = 1.0) -> torch...
  function hadamard_transform_20n (line 69) | def hadamard_transform_20n(x: torch.Tensor, scale: float = 1.0) -> torch...
  function hadamard_transform_28n (line 74) | def hadamard_transform_28n(x: torch.Tensor, scale: float = 1.0) -> torch...
  function hadamard_transform_40n (line 79) | def hadamard_transform_40n(x: torch.Tensor, scale: float = 1.0) -> torch...

FILE: python/sglang/jit_kernel/hicache.py
  function _jit_hicache_module (line 16) | def _jit_hicache_module(*, element_size: int, unroll: int, block_quota: ...
  function can_use_hicache_jit_kernel (line 34) | def can_use_hicache_jit_kernel(
  function _default_unroll (line 58) | def _default_unroll(element_size: int) -> int:
  function transfer_hicache_one_layer (line 69) | def transfer_hicache_one_layer(
  function transfer_hicache_all_layer (line 104) | def transfer_hicache_all_layer(

FILE: python/sglang/jit_kernel/include/sgl_kernel/scalar_type.hpp
  type host (line 9) | namespace host {
    class ScalarType (line 20) | class ScalarType {
      type NanRepr (line 22) | enum NanRepr : uint8_t {
      method ScalarType (line 30) | constexpr ScalarType(
      method ScalarType (line 44) | static constexpr ScalarType int_(uint8_t size_bits, int32_t bias = 0) {
      method ScalarType (line 48) | static constexpr ScalarType uint(uint8_t size_bits, int32_t bias = 0) {
      method ScalarType (line 53) | static constexpr ScalarType float_IEEE754(uint8_t exponent, uint8_t ...
      method ScalarType (line 59) | static constexpr ScalarType float_(uint8_t exponent, uint8_t mantiss...
      method member_id_field_width (line 84) | static constexpr size_t member_id_field_width() {
      method reduce_members_helper (line 90) | static constexpr auto reduce_members_helper(Fn f, Init val, Member m...
      method reduce_members (line 100) | constexpr auto reduce_members(Fn f, Init init) const {
      method reduce_member_types (line 106) | static constexpr auto reduce_member_types(Fn f, Init init) {
      method id_size_bits (line 111) | static constexpr auto id_size_bits() {
      method Id (line 120) | constexpr Id id() const {
      method ScalarType (line 134) | static constexpr ScalarType from_id(Id id) {
      method size_bits (line 148) | constexpr int64_t size_bits() const {
      method is_signed (line 151) | constexpr bool is_signed() const {
      method is_integer (line 154) | constexpr bool is_integer() const {
      method is_floating_point (line 157) | constexpr bool is_floating_point() const {
      method is_ieee_754 (line 160) | constexpr bool is_ieee_754() const {
      method has_nans (line 163) | constexpr bool has_nans() const {
      method has_infs (line 166) | constexpr bool has_infs() const {
      method has_bias (line 169) | constexpr bool has_bias() const {
      method _floating_point_max (line 175) | double _floating_point_max() const {
      method _raw_max (line 208) | constexpr std::variant<int64_t, double> _raw_max() const {
      method _raw_min (line 217) | constexpr std::variant<int64_t, double> _raw_min() const {
      method max (line 242) | constexpr std::variant<int64_t, double> max() const {
      method min (line 248) | constexpr std::variant<int64_t, double> min() const {
      method str (line 254) | std::string str() const {

FILE: python/sglang/jit_kernel/include/sgl_kernel/source_location.h
  type source_location_fallback (line 18) | struct source_location_fallback {

FILE: python/sglang/jit_kernel/include/sgl_kernel/tensor.h
  function namespace (line 38) | namespace host {
  type PrintableDevice (line 129) | struct PrintableDevice {
  type SymbolicDType (line 253) | struct SymbolicDType {
  function DLDataType (line 274) | auto unwrap(DebugInfo info = {}) const -> DLDataType {
  type SymbolicDevice (line 316) | struct SymbolicDevice {
  function DLDevice (line 341) | auto unwrap(DebugInfo info = {}) const -> DLDevice {
  function namespace (line 382) | namespace details {
  function SymbolicDevice (line 432) | struct DeviceRef : BaseRef<SymbolicDevice> {

FILE: python/sglang/jit_kernel/include/sgl_kernel/utils.h
  function namespace (line 57) | namespace host {

FILE: python/sglang/jit_kernel/kvcache.py
  function _jit_kvcache_module (line 20) | def _jit_kvcache_module(row_bytes: int) -> Module:
  function can_use_store_cache (line 31) | def can_use_store_cache(size: int) -> bool:
  function store_cache (line 49) | def store_cache(

FILE: python/sglang/jit_kernel/moe_lora_align.py
  function _jit_moe_align_module (line 14) | def _jit_moe_align_module(dtype: torch.dtype) -> Module:
  function moe_lora_align_block_size (line 26) | def moe_lora_align_block_size(

FILE: python/sglang/jit_kernel/moe_wna16_marlin.py
  function _jit_moe_wna16_marlin_module (line 18) | def _jit_moe_wna16_marlin_module(dtype: torch.dtype) -> Module:
  function _or_empty (line 33) | def _or_empty(
  function moe_wna16_marlin_gemm (line 39) | def moe_wna16_marlin_gemm(

FILE: python/sglang/jit_kernel/ngram_embedding.py
  function _jit_ngram_embedding_module (line 13) | def _jit_ngram_embedding_module() -> Module:
  function compute_n_gram_ids (line 24) | def compute_n_gram_ids(
  function update_token_table (line 69) | def update_token_table(

FILE: python/sglang/jit_kernel/norm.py
  function _jit_qknorm_module (line 20) | def _jit_qknorm_module(head_dim: int, dtype: torch.dtype) -> Module:
  function _jit_rmsnorm_module (line 31) | def _jit_rmsnorm_module(hidden_size: int, dtype: torch.dtype) -> Module:
  function _jit_fused_add_rmsnorm_module (line 42) | def _jit_fused_add_rmsnorm_module(dtype: torch.dtype) -> Module:
  function _jit_qknorm_across_heads_module (line 53) | def _jit_qknorm_across_heads_module(dtype: torch.dtype) -> Module:
  function can_use_fused_inplace_qknorm (line 66) | def can_use_fused_inplace_qknorm(head_dim: int, dtype: torch.dtype) -> b...
  function fused_inplace_qknorm (line 79) | def fused_inplace_qknorm(
  function rmsnorm (line 93) | def rmsnorm(
  function fused_add_rmsnorm (line 105) | def fused_add_rmsnorm(
  function fused_inplace_qknorm_across_heads (line 115) | def fused_inplace_qknorm_across_heads(

FILE: python/sglang/jit_kernel/nvfp4.py
  function _find_package_root (line 22) | def _find_package_root(package: str) -> Optional[pathlib.Path]:
  function _resolve_cutlass_include_paths (line 29) | def _resolve_cutlass_include_paths() -> list[str]:
  function _nvfp4_cuda_flags (line 59) | def _nvfp4_cuda_flags() -> list[str]:
  function _get_nvfp4_cuda_arch_list (line 73) | def _get_nvfp4_cuda_arch_list() -> str:
  function _nvfp4_arch_env (line 90) | def _nvfp4_arch_env():
  function _jit_nvfp4_quant_module (line 104) | def _jit_nvfp4_quant_module() -> Module:
  function _jit_nvfp4_expert_quant_module (line 127) | def _jit_nvfp4_expert_quant_module() -> Module:
  function _jit_nvfp4_scaled_mm_module (line 154) | def _jit_nvfp4_scaled_mm_module() -> Module:
  function _jit_nvfp4_blockwise_moe_module (line 176) | def _jit_nvfp4_blockwise_moe_module() -> Module:
  function cutlass_scaled_fp4_mm (line 198) | def cutlass_scaled_fp4_mm(
  function cutlass_fp4_group_mm (line 214) | def cutlass_fp4_group_mm(
  function _scaled_fp4_quant_custom_op (line 283) | def _scaled_fp4_quant_custom_op(
  function scaled_fp4_quant (line 293) | def scaled_fp4_quant(
  function _shuffle_rows_torch (line 329) | def _shuffle_rows_torch(
  function _scaled_fp4_experts_quant_custom_op (line 343) | def _scaled_fp4_experts_quant_custom_op(
  function scaled_fp4_experts_quant (line 362) | def scaled_fp4_experts_quant(
  function _scaled_fp4_grouped_quant_custom_op (line 426) | def _scaled_fp4_grouped_quant_custom_op(
  function scaled_fp4_grouped_quant (line 446) | def scaled_fp4_grouped_quant(
  function _silu_and_mul_scaled_fp4_grouped_quant_custom_op (line 486) | def _silu_and_mul_scaled_fp4_grouped_quant_custom_op(
  function silu_and_mul_scaled_fp4_grouped_quant (line 506) | def silu_and_mul_scaled_fp4_grouped_quant(
  function _cutlass_fp4_group_mm_custom_op (line 557) | def _cutlass_fp4_group_mm_custom_op(
  function suggest_nvfp4_global_scale (line 602) | def suggest_nvfp4_global_scale(x: torch.Tensor) -> torch.Tensor:

FILE: python/sglang/jit_kernel/per_tensor_quant_fp8.py
  function _jit_per_tensor_quant_fp8_module (line 15) | def _jit_per_tensor_quant_fp8_module(is_static: bool, dtype: torch.dtype...
  function per_tensor_quant_fp8 (line 29) | def per_tensor_quant_fp8(

FILE: python/sglang/jit_kernel/per_token_group_quant_8bit.py
  function _jit_per_token_group_quant_8bit_module (line 17) | def _jit_per_token_group_quant_8bit_module(
  function _per_token_group_quant_8bit_custom_op (line 38) | def _per_token_group_quant_8bit_custom_op(
  function per_token_group_quant_8bit (line 75) | def per_token_group_quant_8bit(

FILE: python/sglang/jit_kernel/rope.py
  function _jit_rotary_embedding_module (line 21) | def _jit_rotary_embedding_module() -> Module:
  function _jit_fused_rope_module (line 30) | def _jit_fused_rope_module(is_neox: bool, rope_dim: int, dtype: torch.dt...
  function rotary_embedding_with_key (line 47) | def rotary_embedding_with_key(
  function rotary_embedding_without_key (line 63) | def rotary_embedding_without_key(
  function rotary_embedding (line 74) | def rotary_embedding(
  class FusedSetKVBufferArg (line 94) | class FusedSetKVBufferArg:
  function apply_rope_inplace (line 113) | def apply_rope_inplace(
  function apply_rope_inplace_with_kvcache (line 141) | def apply_rope_inplace_with_kvcache(
  function apply_rope_with_cos_sin_cache_inplace (line 179) | def apply_rope_with_cos_sin_cache_inplace(

FILE: python/sglang/jit_kernel/tests/test_add_constant.py
  function test_add_constant (line 9) | def test_add_constant(size: int, constant: int) -> None:

FILE: python/sglang/jit_kernel/tests/test_awq_dequantize.py
  function reverse_awq_order (line 16) | def reverse_awq_order(t: torch.Tensor):
  function awq_dequantize_torch (line 35) | def awq_dequantize_torch(
  function test_awq_dequantize_jit_vs_torch (line 76) | def test_awq_dequantize_jit_vs_torch(
  function test_awq_dequantize_jit_vs_aot (line 124) | def test_awq_dequantize_jit_vs_aot(

FILE: python/sglang/jit_kernel/tests/test_awq_marlin_moe_repack.py
  function _has_aot_awq_marlin_moe_repack (line 12) | def _has_aot_awq_marlin_moe_repack() -> bool:
  function awq_pack (line 21) | def awq_pack(
  function test_awq_marlin_moe_repack_jit_vs_aot (line 46) | def test_awq_marlin_moe_repack_jit_vs_aot(
  function test_awq_marlin_moe_repack_shape (line 87) | def test_awq_marlin_moe_repack_shape(

FILE: python/sglang/jit_kernel/tests/test_awq_marlin_repack.py
  function _has_aot_awq_marlin_repack (line 13) | def _has_aot_awq_marlin_repack() -> bool:
  function awq_pack (line 22) | def awq_pack(
  function test_awq_marlin_repack_jit_vs_aot (line 46) | def test_awq_marlin_repack_jit_vs_aot(num_bits, k_tiles, n_tiles, group_...
  function test_awq_marlin_repack_correct (line 76) | def test_awq_marlin_repack_correct(num_bits, k_tiles, n_tiles, group_size):

FILE: python/sglang/jit_kernel/tests/test_concat_mla.py
  function torch_concat_mla_k (line 8) | def torch_concat_mla_k(
  function torch_concat_mla_absorb_q (line 21) | def torch_concat_mla_absorb_q(
  function sgl_kernel_concat_mla_k (line 33) | def sgl_kernel_concat_mla_k(
  function sgl_kernel_concat_mla_absorb_q (line 42) | def sgl_kernel_concat_mla_absorb_q(
  function jit_concat_mla_k (line 52) | def jit_concat_mla_k(
  function jit_concat_mla_absorb_q (line 61) | def jit_concat_mla_absorb_q(
  function test_concat_mla_k_jit_vs_torch (line 89) | def test_concat_mla_k_jit_vs_torch(num_tokens: int) -> None:
  function test_concat_mla_k_jit_vs_aot (line 110) | def test_concat_mla_k_jit_vs_aot(num_tokens: int) -> None:
  function test_concat_mla_absorb_q_jit_vs_torch (line 138) | def test_concat_mla_absorb_q_jit_vs_torch(dim_0: int, dim_1: int) -> None:
  function test_concat_mla_absorb_q_jit_vs_aot (line 155) | def test_concat_mla_absorb_q_jit_vs_aot(dim_0: int, dim_1: int) -> None:

FILE: python/sglang/jit_kernel/tests/test_cutedsl_gdn.py
  function run_triton_kernel (line 29) | def run_triton_kernel(A_log, dt_bias, q, k, v, a, b, initial_state, indi...
  function test_cutedsl_gdn_precision (line 57) | def test_cutedsl_gdn_precision(B: int):
  function test_cutedsl_gdn_performance (line 114) | def test_cutedsl_gdn_performance(B: int):

FILE: python/sglang/jit_kernel/tests/test_flash_attention_4.py
  function apply_rotary_emb (line 19) | def apply_rotary_emb(
  function unpad_input (line 81) | def unpad_input(hidden_states, attention_mask, unused_mask=None):
  function pad_input (line 115) | def pad_input(hidden_states, indices, batch, seqlen):
  function generate_random_padding_mask (line 133) | def generate_random_padding_mask(
  function generate_qkv (line 171) | def generate_qkv(
  function construct_local_mask (line 322) | def construct_local_mask(
  function construct_chunk_mask (line 363) | def construct_chunk_mask(
  function attention_ref (line 399) | def attention_ref(
  function test_flash_attn_varlen_output (line 604) | def test_flash_attn_varlen_output(
  function test_flash_attn_kvcache (line 997) | def test_flash_attn_kvcache(
  function _generate_block_kvcache (line 1471) | def _generate_block_kvcache(

FILE: python/sglang/jit_kernel/tests/test_fused_add_rmsnorm.py
  function sglang_jit_fused_add_rmsnorm (line 9) | def sglang_jit_fused_add_rmsnorm(
  function flashinfer_fused_add_rmsnorm (line 17) | def flashinfer_fused_add_rmsnorm(
  function test_fused_add_rmsnorm (line 39) | def test_fused_add_rmsnorm(batch_size: int, hidden_size: int) -> None:

FILE: python/sglang/jit_kernel/tests/test_fused_metadata_copy.py
  function create_test_metadata (line 21) | def create_test_metadata(
  function reference_copy_decode (line 125) | def reference_copy_decode(src, dst, max_len):
  function reference_copy_target_verify (line 148) | def reference_copy_target_verify(src, dst, max_seqlen_k, seqlens_expande...
  function reference_copy_draft_extend (line 176) | def reference_copy_draft_extend(src, dst, max_seqlen_k, seqlens_expanded...
  function test_fused_metadata_copy_dtype_validation (line 209) | def test_fused_metadata_copy_dtype_validation():
  function test_fused_metadata_copy (line 322) | def test_fused_metadata_copy(bs, forward_mode, has_real_page_table, has_...
  function test_fused_metadata_copy_large_batch (line 423) | def test_fused_metadata_copy_large_batch(bs):
  function create_test_metadata_multi (line 488) | def create_test_metadata_multi(
  function reference_copy_for_loop (line 594) | def reference_copy_for_loop(src, dst_list, bs, max_len):
  function test_fused_metadata_copy_multi_dtype_validation (line 618) | def test_fused_metadata_copy_multi_dtype_validation():
  function test_fused_metadata_copy_multi (line 713) | def test_fused_metadata_copy_multi(bs, has_real_page_table, has_flashmla):
  function test_fused_metadata_copy_multi_large_batch (line 915) | def test_fused_metadata_copy_multi_large_batch(bs):

FILE: python/sglang/jit_kernel/tests/test_fused_norm_scale_shift.py
  function _tol (line 39) | def _tol(dtype: torch.dtype):
  function cuda_setup (line 44) | def cuda_setup():
  function _apply_scale_shift (line 50) | def _apply_scale_shift(y: Tensor, scale: Tensor, shift: Tensor) -> Tensor:
  function fused_norm_scale_shift_ref (line 63) | def fused_norm_scale_shift_ref(
  function fused_scale_residual_norm_scale_shift_ref (line 83) | def fused_scale_residual_norm_scale_shift_ref(
  function _make_tensor (line 117) | def _make_tensor(index_mode: str, shape: Tuple, dtype: torch.dtype):
  function run_norm_scale_shift (line 124) | def run_norm_scale_shift(
  function run_scale_resi_norm_scale_shift (line 147) | def run_scale_resi_norm_scale_shift(
  class TestFusedNormScaleShift (line 178) | class TestFusedNormScaleShift:
    method test_shape_dtype (line 181) | def test_shape_dtype(self, shape, dtype, norm_type):
    method test_dtype_0 (line 185) | def test_dtype_0(self, dtype, norm_type):
    method test_dtype_1 (line 189) | def test_dtype_1(self, dtype, norm_type):
    method test_normtype_affine (line 193) | def test_normtype_affine(self, affine_mode, norm_type):
    method test_index_mode (line 197) | def test_index_mode(self, index_mode, norm_type):
  class TestFusedScaleResidualNormScaleShift (line 204) | class TestFusedScaleResidualNormScaleShift:
    method test_shape_dtype (line 207) | def test_shape_dtype(self, shape, dtype, norm_type):
    method test_dtype_0 (line 211) | def test_dtype_0(self, dtype, norm_type):
    method test_dtype_1 (line 215) | def test_dtype_1(self, dtype, norm_type):
    method test_normtype_affine (line 221) | def test_normtype_affine(self, affine_mode, norm_type):
    method test_scale_shift_index_mode (line 225) | def test_scale_shift_index_mode(self, index_mode, norm_type):
    method test_gate_index_mode (line 231) | def test_gate_index_mode(self, index_mode, norm_type):

FILE: python/sglang/jit_kernel/tests/test_fused_store_index_cache.py
  function _skip_if_unavailable (line 56) | def _skip_if_unavailable(page_size: int = PAGE_SIZE):
  function _num_pages (line 71) | def _num_pages(loc: torch.Tensor, page_size: int, extra: int = 1) -> int:
  function _make_buffer (line 75) | def _make_buffer(num_pages: int, page_size: int = PAGE_SIZE) -> torch.Te...
  function _read_token_from_buffer (line 83) | def _read_token_from_buffer(
  function _write_token_to_buffer (line 109) | def _write_token_to_buffer(
  function _gather_tokens (line 135) | def _gather_tokens(
  function _reference_quantize_and_store (line 152) | def _reference_quantize_and_store(
  function _import_act_quant (line 182) | def _import_act_quant():
  function _ref_store_via_act_quant (line 191) | def _ref_store_via_act_quant(
  function test_fused_kernel_matches_own_algorithm (line 241) | def test_fused_kernel_matches_own_algorithm(num_tokens: int, base_index:...
  function test_fused_kernel_vs_act_quant_semantic (line 298) | def test_fused_kernel_vs_act_quant_semantic(scale_fmt: Optional[str]):
  function test_roundtrip_reconstruction (line 368) | def test_roundtrip_reconstruction(num_tokens: int):
  function test_single_token (line 395) | def test_single_token():
  function test_zero_input (line 412) | def test_zero_input():
  function test_reference_writes_nonzero (line 436) | def test_reference_writes_nonzero():

FILE: python/sglang/jit_kernel/tests/test_fused_verify_triton_gdn.py
  function _make_tensors (line 26) | def _make_tensors(N, T, H, HV, K, V, device="cuda", seed=2025):
  function run_reference (line 42) | def run_reference(
  function run_fused_mtp (line 91) | def run_fused_mtp(
  function test_fused_gdn_mtp_precision (line 135) | def test_fused_gdn_mtp_precision(N: int, T: int):
  function test_mtp_single_step_decode (line 178) | def test_mtp_single_step_decode(N: int):

FILE: python/sglang/jit_kernel/tests/test_gptq_marlin.py
  function test_gptq_marlin_gemm (line 23) | def test_gptq_marlin_gemm(

FILE: python/sglang/jit_kernel/tests/test_gptq_marlin_repack.py
  function test_gptq_marlin_repack (line 34) | def test_gptq_marlin_repack(

FILE: python/sglang/jit_kernel/tests/test_hadamard_jit.py
  function _parse_hadamard_str (line 135) | def _parse_hadamard_str(s):
  function hadamard_transform_ref (line 154) | def hadamard_transform_ref(x, scale=1.0):
  function hadamard_transform_mn_ref (line 173) | def hadamard_transform_mn_ref(x, multiple, scale=1.0):
  function test_hadamard_transform (line 223) | def test_hadamard_transform(dim, dtype):
  function test_hadamard_transform_non_power_of_two (line 254) | def test_hadamard_transform_non_power_of_two(dim, dtype):
  function test_hadamard_transform_3d_input (line 277) | def test_hadamard_transform_3d_input(dtype):
  function test_hadamard_transform_scale_one (line 298) | def test_hadamard_transform_scale_one(dtype):
  function test_hadamard_transform_12n (line 328) | def test_hadamard_transform_12n(dim, dtype):
  function test_hadamard_transform_20n (line 352) | def test_hadamard_transform_20n(dim, dtype):
  function test_hadamard_transform_28n (line 376) | def test_hadamard_transform_28n(dim, dtype):
  function test_hadamard_transform_40n (line 400) | def test_hadamard_transform_40n(dim, dtype):

FILE: python/sglang/jit_kernel/tests/test_moe_lora_align_block_size.py
  function round_up (line 16) | def round_up(x, base):
  function CEILDIV (line 20) | def CEILDIV(x, y):
  function sample_data (line 24) | def sample_data(num_experts, max_loras, num_tokens, topk_num):
  function test_moe_lora_align_block_size (line 66) | def test_moe_lora_align_block_size(

FILE: python/sglang/jit_kernel/tests/test_moe_wna16_marlin.py
  function _has_aot_moe_wna16_marlin_gemm (line 12) | def _has_aot_moe_wna16_marlin_gemm() -> bool:
  function stack_and_dev (line 21) | def stack_and_dev(tensors: list[torch.Tensor]):
  function _get_scalar_type (line 26) | def _get_scalar_type(num_bits: int, has_zp: bool):
  function _setup_moe_weights (line 34) | def _setup_moe_weights(e, n, k, quant_type, group_size, act_order, dtype):
  function _run_single_gemm (line 77) | def _run_single_gemm(
  function _run_single_gemm_aot (line 131) | def _run_single_gemm_aot(
  function generate_test_cases (line 184) | def generate_test_cases():
  function test_moe_wna16_marlin_gemm (line 232) | def test_moe_wna16_marlin_gemm(

FILE: python/sglang/jit_kernel/tests/test_norm_jit.py
  function _jit_rmsnorm (line 15) | def _jit_rmsnorm(input, weight, output, eps):
  function _fi_rmsnorm (line 21) | def _fi_rmsnorm(input, weight, out, eps):
  function _jit_fused_add_rmsnorm (line 27) | def _jit_fused_add_rmsnorm(input, residual, weight, eps):
  function _fi_fused_add_rmsnorm (line 33) | def _fi_fused_add_rmsnorm(input, residual, weight, eps):
  function test_rmsnorm_jit (line 43) | def test_rmsnorm_jit(batch_size, hidden_size, dtype, specify_out):
  function test_fused_add_rmsnorm_jit (line 65) | def test_fused_add_rmsnorm_jit(batch_size, hidden_size, dtype):

FILE: python/sglang/jit_kernel/tests/test_nvfp4_blockwise_moe.py
  function _nvfp4_supported (line 14) | def _nvfp4_supported() -> bool:
  function _round_up (line 18) | def _round_up(x: int, y: int) -> int:
  function _build_expert_offsets (line 22) | def _build_expert_offsets(
  function _build_blockscale_offsets (line 31) | def _build_blockscale_offsets(
  function test_nvfp4_blockwise_moe_grouped_mm (line 44) | def test_nvfp4_blockwise_moe_grouped_mm(dtype: torch.dtype) -> None:

FILE: python/sglang/jit_kernel/tests/test_nvfp4_gemm.py
  function _nvfp4_supported (line 7) | def _nvfp4_supported() -> bool:
  function e2m1_to_fp32 (line 35) | def e2m1_to_fp32(int4_value: int) -> float:
  function break_fp4_bytes (line 42) | def break_fp4_bytes(a: torch.Tensor) -> torch.Tensor:
  function convert_swizzled_to_linear (line 53) | def convert_swizzled_to_linear(
  function dequantize_to_dtype (line 67) | def dequantize_to_dtype(
  function get_ref_results (line 84) | def get_ref_results(
  function test_nvfp4_gemm (line 103) | def test_nvfp4_gemm(dtype: torch.dtype, shape: tuple[int, int, int]) -> ...

FILE: python/sglang/jit_kernel/tests/test_nvfp4_quant.py
  function _nvfp4_supported (line 16) | def _nvfp4_supported() -> bool:
  function _silu_and_mul_reference (line 20) | def _silu_and_mul_reference(x: torch.Tensor) -> torch.Tensor:
  function cast_from_fp4 (line 60) | def cast_from_fp4(x: torch.Tensor, m: int, n: int) -> torch.Tensor:
  function cast_to_fp4 (line 68) | def cast_to_fp4(x: torch.Tensor) -> torch.Tensor:
  function get_reciprocal (line 82) | def get_reciprocal(x):
  function ref_nvfp4_quant (line 88) | def ref_nvfp4_quant(x: torch.Tensor, global_scale: torch.Tensor):
  function recover_swizzled_scales (line 103) | def recover_swizzled_scales(scale: torch.Tensor, m: int, n: int) -> torc...
  function test_quantize_to_fp4 (line 118) | def test_quantize_to_fp4(dtype: torch.dtype, shape: tuple[int, int]) -> ...
  function test_quantize_to_fp4_padded (line 139) | def test_quantize_to_fp4_padded(shape: tuple[int, int]) -> None:
  function test_quantize_to_fp4_grouped (line 160) | def test_quantize_to_fp4_grouped(shape: tuple[int, int, int]) -> None:
  function test_silu_and_mul_quantize_to_fp4_grouped (line 186) | def test_silu_and_mul_quantize_to_fp4_grouped(shape: tuple[int, int, int...

FILE: python/sglang/jit_kernel/tests/test_per_tensor_quant_fp8.py
  function sglang_scaled_fp8_quant (line 19) | def sglang_scaled_fp8_quant(
  function torch_scaled_fp8_quant (line 34) | def torch_scaled_fp8_quant(tensor, inv_scale):
  function test_jit_per_tensor_quant_compare_implementations (line 46) | def test_jit_per_tensor_quant_compare_implementations(
  function test_jit_per_tensor_quant_supports_3d (line 62) | def test_jit_per_tensor_quant_supports_3d(shape):

FILE: python/sglang/jit_kernel/tests/test_per_token_group_quant_8bit.py
  function test_per_token_group_quant_with_column_major (line 108) | def test_per_token_group_quant_with_column_major(

FILE: python/sglang/jit_kernel/tests/test_pos_enc.py
  function burn_kernel (line 13) | def burn_kernel(out_ptr, iters: tl.constexpr):
  function triton_burn (line 29) | def triton_burn(ms: float, grid=(256,)):
  function create_test_inputs (line 36) | def create_test_inputs(
  function create_cos_sin_cache (line 59) | def create_cos_sin_cache(rotary_dim, max_position_embeddings, base, dtyp...
  function _apply_rotary_emb (line 86) | def _apply_rotary_emb(
  class RotaryEmbedding (line 115) | class RotaryEmbedding(torch.nn.Module):
    method __init__ (line 117) | def __init__(
    method _compute_inv_freq (line 138) | def _compute_inv_freq(self, base: Union[int, float]) -> torch.Tensor:
    method _compute_cos_sin_cache (line 147) | def _compute_cos_sin_cache(self) -> torch.Tensor:
    method forward_native (line 158) | def forward_native(
  function get_torch_rotary_embedding (line 199) | def get_torch_rotary_embedding(
  function get_sgl_rotary_embedding (line 213) | def get_sgl_rotary_embedding(
  function compare_results (line 234) | def compare_results(jit_out, sgl_out, dtype):
  function test_correctness (line 284) | def test_correctness(
  function test_performance (line 373) | def test_performance(

FILE: python/sglang/jit_kernel/tests/test_qknorm.py
  function sglang_aot_qknorm (line 10) | def sglang_aot_qknorm(
  function sglang_jit_qknorm (line 25) | def sglang_jit_qknorm(
  function flashinfer_qknorm (line 36) | def flashinfer_qknorm(
  function torch_impl_qknorm (line 49) | def torch_impl_qknorm(
  function test_qknorm (line 82) | def test_qknorm(batch_size: int, n_k: int, n_q: int, head_dim: int) -> N...

FILE: python/sglang/jit_kernel/tests/test_qknorm_across_heads.py
  function sglang_jit_qknorm_across_heads (line 10) | def sglang_jit_qknorm_across_heads(
  function sglang_aot_qknorm_across_heads (line 21) | def sglang_aot_qknorm_across_heads(
  function torch_impl_qknorm_across_heads (line 34) | def torch_impl_qknorm_across_heads(
  function test_qknorm_across_heads (line 61) | def test_qknorm_across_heads(batch_size: int, hidden_dim: int) -> None:

FILE: python/sglang/jit_kernel/tests/test_qwen_image_modulation.py
  function _tol (line 23) | def _tol(dtype: torch.dtype) -> tuple[float, float]:
  function _make_modulation_tensors (line 29) | def _make_modulation_tensors(batch_size: int, hidden_size: int, dtype: t...
  function _baseline_select01_modulation (line 39) | def _baseline_select01_modulation(
  function _baseline_residual_select01_modulation (line 72) | def _baseline_residual_select01_modulation(
  function cuda_setup (line 109) | def cuda_setup():
  function test_fused_layernorm_scale_shift_gate_select01 (line 119) | def test_fused_layernorm_scale_shift_gate_select01(
  function test_fused_residual_layernorm_scale_shift_gate_select01 (line 166) | def test_fused_residual_layernorm_scale_shift_gate_select01(

FILE: python/sglang/jit_kernel/tests/test_renorm.py
  function test_top_k_renorm_probs (line 12) | def test_top_k_renorm_probs(batch_size, vocab_size, k):
  function test_top_p_renorm_probs (line 47) | def test_top_p_renorm_probs(batch_size, vocab_size, p):
  function test_top_k_mask_logits (line 82) | def test_top_k_mask_logits(batch_size, vocab_size, k, neginf_input):

FILE: python/sglang/jit_kernel/tests/test_rmsnorm.py
  function sglang_jit_rmsnorm (line 10) | def sglang_jit_rmsnorm(input: torch.Tensor, weight: torch.Tensor) -> None:
  function flashinfer_rmsnorm (line 16) | def flashinfer_rmsnorm(input: torch.Tensor, weight: torch.Tensor) -> None:
  function test_rmsnorm (line 36) | def test_rmsnorm(batch_size: int, hidden_size: int) -> None:

FILE: python/sglang/jit_kernel/tests/test_rope.py
  function create_cos_sin_cache (line 14) | def create_cos_sin_cache(
  function sglang_jit_rope (line 40) | def sglang_jit_rope(
  function flashinfer_rope (line 52) | def flashinfer_rope(
  function torch_impl_rope (line 75) | def torch_impl_rope(
  function test_rope (line 110) | def test_rope(
  function test_rope_position_dtypes (line 138) | def test_rope_position_dtypes(dtype: torch.dtype) -> None:
  function test_partial_rope (line 163) | def test_partial_rope(batch_size: int, is_neox: bool, rope_dim: int, hea...
  function test_fused_rope_store (line 190) | def test_fused_rope_store(

FILE: python/sglang/jit_kernel/tests/test_store_cache.py
  function test_store_cache (line 24) | def test_store_cache(batch_size: int, element_dim: int) -> None:
  function test_store_cache_dtypes (line 49) | def test_store_cache_dtypes(
  function test_store_cache_int32_indices (line 68) | def test_store_cache_int32_indices(batch_size: int, element_dim: int) ->...
  function _valid_num_splits (line 82) | def _valid_num_splits(element_dim: int, dtype: torch.dtype) -> list:
  function test_store_cache_num_split (line 102) | def test_store_cache_num_split(
  function test_can_use_store_cache (line 119) | def test_can_use_store_cache() -> None:

FILE: python/sglang/jit_kernel/tests/test_timestep_embedding.py
  function get_timestep_embedding_reference (line 37) | def get_timestep_embedding_reference(
  function test_timestep_embedding_correctness_with_sgld (line 73) | def test_timestep_embedding_correctness_with_sgld(batch_size, dim, dtype):
  function test_timestep_embedding_correctness_with_diffusers (line 91) | def test_timestep_embedding_correctness_with_diffusers(
  function test_timestep_embedding_perf (line 115) | def test_timestep_embedding_perf():

FILE: python/sglang/jit_kernel/timestep_embedding.py
  function _jit_timestep_embedding_module (line 14) | def _jit_timestep_embedding_module(dtype: torch.dtype) -> Module:
  function timestep_embedding (line 24) | def timestep_embedding(

FILE: python/sglang/jit_kernel/utils.py
  function is_in_ci (line 17) | def is_in_ci() -> bool:
  function should_run_full_tests (line 22) | def should_run_full_tests() -> bool:
  function get_ci_test_range (line 26) | def get_ci_test_range(full_range: List[Any], ci_range: List[Any]) -> Lis...
  function cache_once (line 32) | def cache_once(fn: F) -> F:
  function _make_wrapper (line 49) | def _make_wrapper(tup: Tuple[str, str]) -> str:
  function _resolve_kernel_path (line 55) | def _resolve_kernel_path() -> pathlib.Path:
  class CPPArgList (line 86) | class CPPArgList(list[str]):
    method __str__ (line 87) | def __str__(self) -> str:
  function is_hip_runtime (line 104) | def is_hip_runtime() -> bool:
  function make_cpp_args (line 108) | def make_cpp_args(*args: CPP_TEMPLATE_TYPE) -> CPPArgList:
  function load_jit (line 121) | def load_jit(
  function is_arch_support_pdl (line 215) | def is_arch_support_pdl() -> bool:
  function _get_cuda_arch_value (line 223) | def _get_cuda_arch_value() -> int:
  function _get_cuda_arch_list (line 231) | def _get_cuda_arch_list() -> str:

FILE: python/sglang/lang/api.py
  function function (line 23) | def function(
  function Runtime (line 35) | def Runtime(*args, **kwargs):
  function Engine (line 42) | def Engine(*args, **kwargs):
  function set_default_backend (line 49) | def set_default_backend(backend: BaseBackend):
  function flush_cache (line 53) | def flush_cache(backend: Optional[BaseBackend] = None):
  function get_server_info (line 64) | def get_server_info(backend: Optional[BaseBackend] = None):
  function gen (line 75) | def gen(
  function gen_int (line 142) | def gen_int(
  function gen_string (line 185) | def gen_string(
  function image (line 228) | def image(expr: SglExpr):
  function video (line 232) | def video(path: str, num_frames: int):
  function select (line 236) | def select(
  function _role_common (line 246) | def _role_common(name: str, expr: Optional[SglExpr] = None):
  function system (line 253) | def system(expr: Optional[SglExpr] = None):
  function user (line 257) | def user(expr: Optional[SglExpr] = None):
  function assistant (line 261) | def assistant(expr: Optional[SglExpr] = None):
  function system_begin (line 265) | def system_begin():
  function system_end (line 269) | def system_end():
  function user_begin (line 273) | def user_begin():
  function user_end (line 277) | def user_end():
  function assistant_begin (line 281) | def assistant_begin():
  function assistant_end (line 285) | def assistant_end():
  function separate_reasoning (line 289) | def separate_reasoning(

FILE: python/sglang/lang/backend/anthropic.py
  class Anthropic (line 12) | class Anthropic(BaseBackend):
    method __init__ (line 13) | def __init__(self, model_name, *args, **kwargs):
    method get_chat_template (line 23) | def get_chat_template(self):
    method generate (line 26) | def generate(
    method generate_stream (line 51) | def generate_stream(

FILE: python/sglang/lang/backend/base_backend.py
  class BaseBackend (line 9) | class BaseBackend:
    method __init__ (line 10) | def __init__(self) -> None:
    method get_model_name (line 14) | def get_model_name(self):
    method get_chat_template (line 17) | def get_chat_template(self):
    method cache_prefix (line 20) | def cache_prefix(self, prefix_str: str):
    method uncache_prefix (line 23) | def uncache_prefix(self, rid: str):
    method end_request (line 26) | def end_request(self, rid: Union[str, List[str]]):
    method begin_program (line 29) | def begin_program(self, s: StreamExecutor):
    method end_program (line 32) | def end_program(self, s: Union[StreamExecutor, List[StreamExecutor]]):
    method commit_lazy_operations (line 35) | def commit_lazy_operations(self, s: StreamExecutor):
    method fork_program (line 38) | def fork_program(
    method fill_image (line 46) | def fill_image(self, s: StreamExecutor):
    method generate (line 49) | def generate(
    method generate_stream (line 56) | def generate_stream(
    method select (line 63) | def select(
    method concatenate_and_append (line 72) | def concatenate_and_append(self, src_rids: List[str], dst_rid: str):
    method shutdown (line 75) | def shutdown(self):
    method flush_cache (line 78) | def flush_cache(self):
    method get_server_info (line 81) | def get_server_info(self):

FILE: python/sglang/lang/backend/litellm.py
  class LiteLLM (line 15) | class LiteLLM(BaseBackend):
    method __init__ (line 16) | def __init__(
    method get_chat_template (line 47) | def get_chat_template(self):
    method generate (line 50) | def generate(
    method generate_stream (line 70) | def generate_stream(

FILE: python/sglang/lang/backend/openai.py
  function create_logit_bias_int (line 25) | def create_logit_bias_int(tokenizer):
  class TokenUsage (line 48) | class TokenUsage:
    method reset (line 52) | def reset(self):
  class OpenAI (line 56) | class OpenAI(BaseBackend):
    method __init__ (line 57) | def __init__(
    method get_chat_template (line 106) | def get_chat_template(self):
    method _prepare_spec_execution (line 109) | def _prepare_spec_execution(
    method generate (line 140) | def generate(
    method spec_fill (line 224) | def spec_fill(self, value: str):
    method spec_pattern_match (line 228) | def spec_pattern_match(self, comp):
    method role_end_generate (line 248) | def role_end_generate(
    method generate_stream (line 283) | def generate_stream(
    method select (line 312) | def select(
  function openai_completion (line 383) | def openai_completion(
  function openai_completion_stream (line 425) | def openai_completion_stream(

FILE: python/sglang/lang/backend/runtime_endpoint.py
  class RuntimeEndpoint (line 26) | class RuntimeEndpoint(BaseBackend):
    method __init__ (line 27) | def __init__(
    method get_model_name (line 56) | def get_model_name(self):
    method flush_cache (line 59) | def flush_cache(self):
    method get_server_info (line 68) | def get_server_info(self):
    method get_chat_template (line 77) | def get_chat_template(self):
    method cache_prefix (line 80) | def cache_prefix(self, prefix_str: str):
    method start_profile (line 89) | def start_profile(self):
    method stop_profile (line 97) | def stop_profile(self):
    method commit_lazy_operations (line 105) | def commit_lazy_operations(self, s: StreamExecutor):
    method fill_image (line 116) | def fill_image(self, s: StreamExecutor):
    method _handle_dtype_to_regex (line 127) | def _handle_dtype_to_regex(self, sampling_params: SglSamplingParams):
    method generate (line 159) | def generate(
    method generate_stream (line 198) | def generate_stream(
    method select (line 248) | def select(
    method concatenate_and_append (line 317) | def concatenate_and_append(self, src_rids: List[str], dst_rid: str):
    method _generate_http_request (line 326) | def _generate_http_request(self, s: StreamExecutor, data):
    method _add_images (line 337) | def _add_images(self, s: StreamExecutor, data):
    method _assert_success (line 342) | def _assert_success(self, res):
  function compute_normalized_prompt_logprobs (line 351) | def compute_normalized_prompt_logprobs(input_logprobs):
  class Runtime (line 356) | class Runtime:
    method __init__ (line 366) | def __init__(
    method shutdown (line 436) | def shutdown(self):
    method start_profile (line 443) | def start_profile(self):
    method stop_profile (line 446) | def stop_profile(self):
    method cache_prefix (line 449) | def cache_prefix(self, prefix: str):
    method get_tokenizer (line 452) | def get_tokenizer(self):
    method async_generate (line 462) | async def async_generate(
    method generate (line 500) | def generate(
    method encode (line 524) | def encode(
    method get_server_info (line 532) | async def get_server_info(self):
    method __del__ (line 543) | def __del__(self):

FILE: python/sglang/lang/backend/vertexai.py
  class VertexAI (line 20) | class VertexAI(BaseBackend):
    method __init__ (line 21) | def __init__(self, model_name, safety_settings=None):
    method get_chat_template (line 35) | def get_chat_template(self):
    method generate (line 38) | def generate(
    method generate_stream (line 62) | def generate_stream(
    method text_to_vertexai_input (line 85) | def text_to_vertexai_input(self, text, images):
    method messages_to_vertexai_input (line 99) | def messages_to_vertexai_input(self, messages):

FILE: python/sglang/lang/chat_template.py
  class ChatTemplateStyle (line 7) | class ChatTemplateStyle(Enum):
  class ChatTemplate (line 13) | class ChatTemplate:
    method get_prefix_and_suffix (line 22) | def get_prefix_and_suffix(
    method get_prompt (line 43) | def get_prompt(self, messages: List[Dict]) -> str:
  function register_chat_template (line 61) | def register_chat_template(template):
  function register_chat_template_matching_function (line 65) | def register_chat_template_matching_function(func):
  function get_chat_template (line 69) | def get_chat_template(name):
  function get_chat_template_by_model_path (line 73) | def get_chat_template_by_model_path(model_path):
  function match_deepseek (line 528) | def match_deepseek(model_path: str):
  function match_orion (line 536) | def match_orion(model_path: str):
  function match_deepseek_janus_pro (line 542) | def match_deepseek_janus_pro(model_path: str):
  function match_dbrx (line 548) | def match_dbrx(model_path: str):
  function match_vicuna (line 556) | def match_vicuna(model_path: str):
  function match_llama2_chat (line 562) | def match_llama2_chat(model_path: str):
  function match_mistral (line 572) | def match_mistral(model_path: str):
  function match_llama3_instruct (line 578) | def match_llama3_instruct(model_path: str):
  function match_chat_ml (line 584) | def match_chat_ml(model_path: str):
  function match_chat_yi (line 604) | def match_chat_yi(model_path: str):
  function match_gemma_it (line 614) | def match_gemma_it(model_path: str):
  function match_openbmb_minicpm (line 620) | def match_openbmb_minicpm(model_path: str):
  function match_c4ai_command_r (line 628) | def match_c4ai_command_r(model_path: str):
  function match_granite_instruct (line 634) | def match_granite_instruct(model_path: str):
  function match_gemma3_instruct (line 640) | def match_gemma3_instruct(model_path: str):
  function match_internvl_chat (line 646) | def match_internvl_chat(model_path: str):
  function match_interns1_chat (line 652) | def match_interns1_chat(model_path: str):

FILE: python/sglang/lang/choices.py
  class ChoicesDecision (line 9) | class ChoicesDecision:
  class ChoicesSamplingMethod (line 14) | class ChoicesSamplingMethod(ABC):
    method requires_unconditional_logprobs (line 17) | def requires_unconditional_logprobs(self) -> bool:
    method __call__ (line 21) | def __call__(
  class TokenLengthNormalized (line 32) | class TokenLengthNormalized(ChoicesSamplingMethod):
    method __call__ (line 34) | def __call__(
  class GreedyTokenSelection (line 56) | class GreedyTokenSelection(ChoicesSamplingMethod):
    method __call__ (line 58) | def __call__(
    method _build_logprob_matrix (line 87) | def _build_logprob_matrix(self, input_token_logprobs, max_tokens, num_...
    method _greedy_selection (line 97) | def _greedy_selection(self, logprob_matrix, num_options, max_tokens):
  class UnconditionalLikelihoodNormalized (line 110) | class UnconditionalLikelihoodNormalized(ChoicesSamplingMethod):
    method requires_unconditional_logprobs (line 113) | def requires_unconditional_logprobs(self) -> bool:
    method __call__ (line 116) | def __call__(
    method _normalize_logprobs (line 150) | def _normalize_logprobs(self, input_token_logprobs, unconditional_toke...

FILE: python/sglang/lang/interpreter.py
  function run_internal (line 42) | def run_internal(state, program, func_args, func_kwargs, sync):
  function run_program (line 57) | def run_program(
  function run_program_batch (line 93) | def run_program_batch(
  function _run_program_batch_generator (line 184) | def _run_program_batch_generator(
  function cache_program (line 242) | def cache_program(program, backend):
  class StreamExecutor (line 250) | class StreamExecutor:
    method __init__ (line 253) | def __init__(
    method submit (line 318) | def submit(self, expr: SglExpr):
    method sync (line 326) | def sync(self):
    method get_var (line 330) | def get_var(self, name):
    method set_var (line 335) | def set_var(self, name, value):
    method get_meta_info (line 338) | def get_meta_info(self, name, timeout=None):
    method fork (line 346) | def fork(
    method text (line 380) | def text(self):
    method messages (line 384) | def messages(self):
    method error (line 388) | def error(self):
    method end (line 392) | def end(self):
    method _thread_worker_func (line 398) | def _thread_worker_func(self):
    method _execute (line 437) | def _execute(self, other):
    method _execute_fill (line 481) | def _execute_fill(self, value: str, prefix=False):
    method _execute_image (line 500) | def _execute_image(self, expr: SglImage):
    method _execute_video (line 509) | def _execute_video(self, expr: SglVideo):
    method _spec_gen (line 519) | def _spec_gen(self, sampling_params):
    method _execute_gen (line 569) | def _execute_gen(self, expr: SglGen):
    method _execute_select (line 623) | def _execute_select(self, expr: SglSelect):
    method _execute_variable (line 636) | def _execute_variable(self, expr: SglVariable):
    method _execute_role_begin (line 641) | def _execute_role_begin(self, expr: SglRoleBegin):
    method _execute_role_end (line 659) | def _execute_role_end(self, expr: SglRoleEnd):
    method _execute_var_scope_begin (line 695) | def _execute_var_scope_begin(self, expr: SglVarScopeBegin):
    method _execute_var_scope_end (line 698) | def _execute_var_scope_end(self, expr: SglVarScopeEnd):
    method _execute_commit_lazy_operations (line 702) | def _execute_commit_lazy_operations(self, expr: SglCommitLazy):
    method _execute_concatenate_and_append_text (line 705) | def _execute_concatenate_and_append_text(self, expr: SglConcateAndAppe...
    method _execute_concatenate_and_append_kv_cache (line 714) | def _execute_concatenate_and_append_kv_cache(self, expr: SglConcateAnd...
    method _execute_separate_reasoning (line 730) | def _execute_separate_reasoning(self, expr: SglSeparateReasoning):
    method _init_var_event (line 764) | def _init_var_event(self, expr):
    method _resolve_sampling_params (line 775) | def _resolve_sampling_params(self, sampling_params):
    method __del__ (line 824) | def __del__(self):
  class ProgramState (line 828) | class ProgramState:
    method __init__ (line 831) | def __init__(self, stream_executor: StreamExecutor):
    method _role_common (line 834) | def _role_common(self, name: str, expr: Optional[SglExpr] = None):
    method system (line 849) | def system(self, expr: Optional[SglExpr] = None):
    method user (line 852) | def user(self, expr: Optional[SglExpr] = None):
    method assistant (line 855) | def assistant(self, expr: Optional[SglExpr] = None):
    method var_scope (line 859) | def var_scope(self, name: str):
    method fork (line 864) | def fork(
    method copy (line 875) | def copy(self, position_ids_offset: Optional[List[int]] = None):
    method text (line 882) | def text(self):
    method messages (line 885) | def messages(self):
    method sync (line 888) | def sync(self):
    method error (line 891) | def error(self):
    method text_iter (line 894) | def text_iter(self, var_name: Optional[str] = None):
    method text_async_iter (line 932) | async def text_async_iter(
    method get_var (line 977) | def get_var(self, name):
    method set_var (line 980) | def set_var(self, name, value):
    method get_meta_info (line 983) | def get_meta_info(self, name):
    method __iadd__ (line 986) | def __iadd__(self, other):
    method __getitem__ (line 992) | def __getitem__(self, name):
    method __setitem__ (line 995) | def __setitem__(self, name, value):
    method __contains__ (line 998) | def __contains__(self, name):
    method __del__ (line 1001) | def __del__(self):
    method __repr__ (line 1004) | def __repr__(self) -> str:
  class ProgramStateGroup (line 1008) | class ProgramStateGroup:
    method __init__ (line 1009) | def __init__(
    method join (line 1015) | def join(self, mode: str = "gather_variable"):
    method __getitem__ (line 1041) | def __getitem__(self, i: int):
    method __setitem__ (line 1044) | def __setitem__(self, i: int, value):
    method __iadd__ (line 1047) | def __iadd__(self, other):

FILE: python/sglang/lang/ir.py
  class SglSamplingParams (line 18) | class SglSamplingParams:
    method clone (line 42) | def clone(self):
    method to_openai_kwargs (line 64) | def to_openai_kwargs(self):
    method to_vertexai_kwargs (line 79) | def to_vertexai_kwargs(self):
    method to_anthropic_kwargs (line 93) | def to_anthropic_kwargs(self):
    method to_litellm_kwargs (line 109) | def to_litellm_kwargs(self):
    method to_srt_kwargs (line 121) | def to_srt_kwargs(self):
  class SglFunction (line 141) | class SglFunction:
    method __init__ (line 142) | def __init__(self, func, num_api_spec_tokens=None, bind_arguments=None):
    method bind (line 154) | def bind(self, **kwargs):
    method run (line 160) | def run(
    method run_batch (line 223) | def run_batch(
    method trace (line 304) | def trace(self, *, backend=None, **kwargs):
    method cache (line 310) | def cache(self, backend=None):
    method __call__ (line 316) | def __call__(self, *args, **kwargs):
  class SglExpr (line 327) | class SglExpr:
    method __init__ (line 330) | def __init__(self):
    method __add__ (line 336) | def __add__(self, other):
    method __radd__ (line 343) | def __radd__(self, other):
    method concatenate_ir (line 350) | def concatenate_ir(self, a, b):
    method print_graph_dfs (line 361) | def print_graph_dfs(self):
  class SglExprList (line 397) | class SglExprList(SglExpr):
    method __init__ (line 398) | def __init__(self, expr_list: List[SglExpr]):
    method __repr__ (line 402) | def __repr__(self):
  class SglArgument (line 406) | class SglArgument(SglExpr):
    method __init__ (line 407) | def __init__(self, name: str, value: str):
    method __repr__ (line 412) | def __repr__(self):
    method __len__ (line 415) | def __len__(self):
    method __getitem__ (line 418) | def __getitem__(self, i):
    method __int__ (line 421) | def __int__(self):
    method __bool__ (line 424) | def __bool__(self):
    method __format__ (line 427) | def __format__(self, *args):
  class SglImage (line 434) | class SglImage(SglExpr):
    method __init__ (line 435) | def __init__(self, path: str):
    method __repr__ (line 438) | def __repr__(self) -> str:
  class SglVideo (line 442) | class SglVideo(SglExpr):
    method __init__ (line 443) | def __init__(self, path: str, num_frames: int):
    method __repr__ (line 447) | def __repr__(self) -> str:
  class SglGen (line 451) | class SglGen(SglExpr):
    method __init__ (line 452) | def __init__(
    method __repr__ (line 502) | def __repr__(self):
  class SglConstantText (line 506) | class SglConstantText(SglExpr):
    method __init__ (line 507) | def __init__(self, value: str):
    method __repr__ (line 511) | def __repr__(self):
  class SglRoleBegin (line 515) | class SglRoleBegin(SglExpr):
    method __init__ (line 516) | def __init__(self, role: str):
    method __repr__ (line 520) | def __repr__(self):
  class SglRoleEnd (line 524) | class SglRoleEnd(SglExpr):
    method __init__ (line 525) | def __init__(self, role: str):
    method __repr__ (line 529) | def __repr__(self):
  class SglSelect (line 533) | class SglSelect(SglExpr):
    method __init__ (line 535) | def __init__(
    method __repr__ (line 548) | def __repr__(self):
  class SglFork (line 552) | class SglFork(SglExpr):
    method __init__ (line 553) | def __init__(self, number: int, position_ids_offset=None):
    method __repr__ (line 558) | def __repr__(self):
  class SglGetForkItem (line 565) | class SglGetForkItem(SglExpr):
    method __init__ (line 566) | def __init__(self, index: int):
    method __repr__ (line 570) | def __repr__(self):
  class SglVariable (line 574) | class SglVariable(SglExpr):
    method __init__ (line 575) | def __init__(self, name: str, source):
    method __repr__ (line 580) | def __repr__(self):
  class SglVarScopeBegin (line 584) | class SglVarScopeBegin(SglExpr):
    method __init__ (line 585) | def __init__(self, name: str):
    method __repr__ (line 589) | def __repr__(self):
  class SglVarScopeEnd (line 593) | class SglVarScopeEnd(SglExpr):
    method __init__ (line 594) | def __init__(self, name: str):
    method __repr__ (line 598) | def __repr__(self):
  class SglConcateAndAppend (line 602) | class SglConcateAndAppend(SglExpr):
    method __init__ (line 603) | def __init__(self, states):
    method __repr__ (line 607) | def __repr__(self):
  class SglCommitLazy (line 611) | class SglCommitLazy(SglExpr):
    method __init__ (line 612) | def __init__(self):
    method __repr__ (line 615) | def __repr__(self):
  class SglSeparateReasoning (line 619) | class SglSeparateReasoning(SglExpr):
    method __init__ (line 620) | def __init__(self, model_type: str, expr: SglExpr):
    method process_name_for_reasoning (line 628) | def process_name_for_reasoning(self, name):
    method _process_expr (line 633) | def _process_expr(self, expr):
    method __repr__ (line 642) | def __repr__(self):

FILE: python/sglang/lang/tracer.py
  class StopTracing (line 25) | class StopTracing(Exception):
  function extract_prefix_by_tracing (line 29) | def extract_prefix_by_tracing(program, backend):
  function trace_program (line 54) | def trace_program(program, arguments, backend):
  class TracerProgramState (line 75) | class TracerProgramState(ProgramState):
    method __init__ (line 76) | def __init__(self, backend, arguments, only_trace_prefix):
    method fork (line 108) | def fork(self, size: int = 1, position_ids_offset: Optional[List[int]]...
    method _append_node (line 139) | def _append_node(self, other: SglExpr):
    method _execute (line 144) | def _execute(self, other: SglExpr):
    method __iadd__ (line 175) | def __iadd__(self, other):
    method _execute_fill (line 179) | def _execute_fill(self, expr: SglConstantText):
    method _execute_gen (line 184) | def _execute_gen(self, expr: SglGen):
    method _execute_select (line 190) | def _execute_select(self, expr: SglSelect):
    method _execute_role_begin (line 198) | def _execute_role_begin(self, expr: SglRoleBegin):
    method _execute_role_end (line 217) | def _execute_role_end(self, expr: SglRoleEnd):
    method _execute_var_scope_end (line 228) | def _execute_var_scope_end(self, expr: SglVarScopeEnd):
    method get_var (line 232) | def get_var(self, name):
    method flatten_nodes (line 240) | def flatten_nodes(self):
    method __del__ (line 253) | def __del__(self):
  class TracingScope (line 257) | class TracingScope:
    method __init__ (line 260) | def __init__(self, tracer_state: TracerProgramState):
    method __enter__ (line 264) | def __enter__(self):
    method __exit__ (line 268) | def __exit__(self, exc_type, exc_value, traceback):
    method get_current_scope (line 272) | def get_current_scope():
    method add_child_state (line 275) | def add_child_state(self, state: TracerProgramState):

FILE: python/sglang/launch_server.py
  function run_server (line 15) | def run_server(server_args):

FILE: python/sglang/multimodal_gen/.claude/skills/diffusion-kernel/scripts/bench_diffusion_denoise.py
  function required_gpus_for_model (line 230) | def required_gpus_for_model(model_key: str) -> int:
  function build_sglang_cmd (line 238) | def build_sglang_cmd(
  function run_benchmark_once (line 284) | def run_benchmark_once(
  function print_results_table (line 377) | def print_results_table(results: list[dict]):
  function inject_kernels_example (line 420) | def inject_kernels_example():
  function main (line 475) | def main():

FILE: python/sglang/multimodal_gen/.claude/skills/diffusion-kernel/scripts/bench_diffusion_rmsnorm.py
  function pytorch_rmsnorm (line 52) | def pytorch_rmsnorm(
  function benchmark_kernel (line 64) | def benchmark_kernel(
  function run_benchmark (line 86) | def run_benchmark():

FILE: python/sglang/multimodal_gen/.claude/skills/diffusion-kernel/scripts/diffusion_skill_env.py
  function get_repo_root (line 16) | def get_repo_root() -> Path:
  function get_assets_dir (line 22) | def get_assets_dir(repo_root: Path | None = None) -> Path:
  function get_output_dir (line 27) | def get_output_dir(name: str, repo_root: Path | None = None) -> Path:
  function ensure_dir (line 34) | def ensure_dir(path: Path) -> Path:
  function check_write_access (line 39) | def check_write_access(repo_root: Path | None = None) -> Path:
  function _run_nvidia_smi (line 47) | def _run_nvidia_smi(query: str) -> list[list[str]]:
  function get_gpu_inventory (line 63) | def get_gpu_inventory() -> list[dict[str, int | str]]:
  function get_busy_gpu_uuids (line 79) | def get_busy_gpu_uuids() -> set[str]:
  function pick_idle_gpus (line 84) | def pick_idle_gpus(
  function configure_runtime_env (line 107) | def configure_runtime_env(required_gpus: int = 1) -> str | None:
  function main (line 116) | def main() -> None:

FILE: python/sglang/multimodal_gen/apps/ComfyUI_SGLDiffusion/core/generator.py
  class SGLDiffusionGenerator (line 35) | class SGLDiffusionGenerator:
    method __init__ (line 38) | def __init__(self):
    method __del__ (line 57) | def __del__(self):
    method init_generator (line 60) | def init_generator(
    method kill_generator (line 77) | def kill_generator(self):
    method close_generator (line 115) | def close_generator(self):
    method get_comfyui_model (line 126) | def get_comfyui_model(self, model_path: str, model_options: dict = None):
    method load_model (line 185) | def load_model(

FILE: python/sglang/multimodal_gen/apps/ComfyUI_SGLDiffusion/core/model_patcher.py
  class SGLDModelPatcher (line 10) | class SGLDModelPatcher(ModelPatcher):
    method __init__ (line 13) | def __init__(
    method clone (line 32) | def clone(self):
    method model_size (line 53) | def model_size(self):
    method load (line 60) | def load(
    method patch_model (line 70) | def patch_model(
    method unpatch_model (line 80) | def unpatch_model(self, device_to=None, unpatch_weights=True):

FILE: python/sglang/multimodal_gen/apps/ComfyUI_SGLDiffusion/core/server_api.py
  class SGLDiffusionServerAPI (line 16) | class SGLDiffusionServerAPI:
    method __init__ (line 19) | def __init__(self, base_url: str, api_key: str = "sk-proj-1234567890"):
    method get_model_info (line 41) | def get_model_info(self) -> Dict[str, Any]:
    method generate_image (line 63) | def generate_image(
    method generate_video (line 205) | def generate_video(
    method _build_image_common_params (line 351) | def _build_image_common_params(
    method _get_content_type (line 399) | def _get_content_type(self, file_path: str) -> str:
    method decode_image_from_response (line 410) | def decode_image_from_response(
    method set_lora (line 442) | def set_lora(
    method unset_lora (line 489) | def unset_lora(

FILE: python/sglang/multimodal_gen/apps/ComfyUI_SGLDiffusion/executors/base.py
  class SGLDiffusionExecutor (line 8) | class SGLDiffusionExecutor(torch.nn.Module):
    method __init__ (line 11) | def __init__(self, generator, model_path, model, config):
    method should_suppress_logs (line 21) | def should_suppress_logs(timestep):
    method set_lora (line 27) | def set_lora(self, lora_nickname=None, lora_path=None, strength=None, ...
    method _unpack_latents (line 37) | def _unpack_latents(self, latents, height, width, channels):
    method _pack_latents (line 46) | def _pack_latents(self, latents):

FILE: python/sglang/multimodal_gen/apps/ComfyUI_SGLDiffusion/executors/flux.py
  class FluxExecutor (line 18) | class FluxExecutor(SGLDiffusionExecutor):
    method __init__ (line 21) | def __init__(self, generator, model_path, model, config):
    method forward (line 24) | def forward(self, x, timestep, context, y=None, guidance=None, **kwargs):

FILE: python/sglang/multimodal_gen/apps/ComfyUI_SGLDiffusion/executors/qwen_image.py
  class QwenImageExecutor (line 20) | class QwenImageExecutor(SGLDiffusionExecutor):
    method __init__ (line 23) | def __init__(self, generator, model_path, model, config):
    method _pack_latents (line 27) | def _pack_latents(self, x):
    method _unpack_latents (line 52) | def _unpack_latents(self, latents, num_embeds, orig_shape, x):
    method forward (line 67) | def forward(self, x, timestep, context, **kwargs):
  class QwenImageEditExecutor (line 108) | class QwenImageEditExecutor(QwenImageExecutor):
    method __init__ (line 111) | def __init__(self, generator, model_path, model, config):
    method forward (line 114) | def forward(

FILE: python/sglang/multimodal_gen/apps/ComfyUI_SGLDiffusion/executors/zimage.py
  class ZImageExecutor (line 18) | class ZImageExecutor(SGLDiffusionExecutor):
    method __init__ (line 21) | def __init__(self, generator, model_path, model, config):
    method forward (line 24) | def forward(self, x, timesteps, context, **kwargs):

FILE: python/sglang/multimodal_gen/apps/ComfyUI_SGLDiffusion/nodes.py
  class SGLDOptions (line 21) | class SGLDOptions:
    method INPUT_TYPES (line 23) | def INPUT_TYPES(cls):
    method create_options (line 72) | def create_options(
  class SGLDLoraLoader (line 113) | class SGLDLoraLoader:
    method INPUT_TYPES (line 115) | def INPUT_TYPES(cls):
    method load_lora (line 137) | def load_lora(
  class SGLDUNETLoader (line 166) | class SGLDUNETLoader:
    method __init__ (line 167) | def __init__(self):
    method INPUT_TYPES (line 171) | def INPUT_TYPES(s):
    method load_unet (line 187) | def load_unet(self, unet_name, weight_dtype, sgld_options: dict = None):
  class SGLDiffusionServerModel (line 202) | class SGLDiffusionServerModel:
    method INPUT_TYPES (line 206) | def INPUT_TYPES(cls):
    method load_server (line 231) | def load_server(self, base_url: str, api_key: str):
  class SGLDiffusionGenerateImage (line 246) | class SGLDiffusionGenerateImage:
    method INPUT_TYPES (line 250) | def INPUT_TYPES(cls):
    method generate_image (line 336) | def generate_image(
  class SGLDiffusionGenerateVideo (line 397) | class SGLDiffusionGenerateVideo:
    method INPUT_TYPES (line 401) | def INPUT_TYPES(cls):
    method generate_video (line 514) | def generate_video(
  class SGLDiffusionServerSetLora (line 579) | class SGLDiffusionServerSetLora:
    method INPUT_TYPES (line 583) | def INPUT_TYPES(cls):
    method set_lora (line 624) | def set_lora(
  class SGLDiffusionServerUnsetLora (line 650) | class SGLDiffusionServerUnsetLora:
    method INPUT_TYPES (line 654) | def INPUT_TYPES(cls):
    method unset_lora (line 681) | def unset_lora(

FILE: python/sglang/multimodal_gen/apps/ComfyUI_SGLDiffusion/test/test_flux_pipeline.py
  function test_comfyui_flux_pipeline_direct (line 13) | def test_comfyui_flux_pipeline_direct() -> None:

FILE: python/sglang/multimodal_gen/apps/ComfyUI_SGLDiffusion/test/test_qwen_image_edit_pipeline.py
  function test_comfyui_qwen_image_edit_pipeline_direct (line 13) | def test_comfyui_qwen_image_edit_pipeline_direct() -> None:

FILE: python/sglang/multimodal_gen/apps/ComfyUI_SGLDiffusion/test/test_qwen_image_pipeline.py
  function test_comfyui_qwen_image_pipeline_direct (line 13) | def test_comfyui_qwen_image_pipeline_direct() -> None:

FILE: python/sglang/multimodal_gen/apps/ComfyUI_SGLDiffusion/test/test_zimage_pipeline.py
  function test_comfyui_zimage_pipeline_direct (line 13) | def test_comfyui_zimage_pipeline_direct() -> None:

FILE: python/sglang/multimodal_gen/apps/ComfyUI_SGLDiffusion/utils.py
  function _ensure_dir (line 15) | def _ensure_dir(path: str) -> None:
  function _to_numpy_image (line 19) | def _to_numpy_image(image: torch.Tensor) -> np.ndarray:
  function _to_hwc_tensor (line 35) | def _to_hwc_tensor(image: torch.Tensor) -> torch.Tensor:
  function is_empty_image (line 52) | def is_empty_image(image: torch.Tensor, tolerance: float = 1e-6) -> bool:
  function get_image_path (line 80) | def get_image_path(image: torch.Tensor) -> str:
  function convert_b64_to_tensor_image (line 100) | def convert_b64_to_tensor_image(b64_image: str) -> torch.Tensor:
  class SGLDVideoInput (line 131) | class SGLDVideoInput(VideoInput):
    method __init__ (line 132) | def __init__(self, video_path: str, height: int, width: int):
    method get_dimensions (line 139) | def get_dimensions(self) -> tuple[int, int]:
    method get_components (line 148) | def get_components(self):
    method save_to (line 155) | def save_to(self, path: str, format=None, codec=None, metadata=None):
  function convert_video_to_comfy_video (line 169) | def convert_video_to_comfy_video(

FILE: python/sglang/multimodal_gen/apps/webui/main.py
  function add_webui_args (line 20) | def add_webui_args(parser: argparse.ArgumentParser):
  function run_sgl_diffusion_webui (line 27) | def run_sgl_diffusion_webui(server_args: ServerArgs):

FILE: python/sglang/multimodal_gen/benchmarks/bench_offline_throughput.py
  class BatchOutput (line 50) | class BatchOutput:
  class BenchArgs (line 63) | class BenchArgs:
    method add_cli_args (line 91) | def add_cli_args(parser: argparse.ArgumentParser):
    method from_cli_args (line 171) | def from_cli_args(cls, args: argparse.Namespace):
  function initialize_engine (line 177) | def initialize_engine(server_args: ServerArgs) -> DiffGenerator:
  function generate_batch (line 185) | def generate_batch(
  function calculate_metrics (line 225) | def calculate_metrics(
  function throughput_test (line 265) | def throughput_test(
  function display_results (line 349) | def display_results(
  function save_results (line 390) | def save_results(
  function main (line 421) | def main():

FILE: python/sglang/multimodal_gen/benchmarks/bench_serving.py
  function _compute_scale_factor (line 50) | def _compute_scale_factor(req: RequestFuncInput, args) -> Optional[float]:
  function _compute_expected_latency_ms_from_base (line 66) | def _compute_expected_latency_ms_from_base(
  function _infer_slo_base_time_ms_from_warmups (line 78) | def _infer_slo_base_time_ms_from_warmups(
  function _populate_slo_ms_from_warmups (line 99) | def _populate_slo_ms_from_warmups(
  function async_request_image_sglang (line 129) | async def async_request_image_sglang(
  function async_request_video_sglang (line 223) | async def async_request_video_sglang(
  function calculate_metrics (line 380) | def calculate_metrics(
  function wait_for_service (line 434) | def wait_for_service(base_url: str, timeout: int = 1200) -> None:
  function benchmark (line 455) | async def benchmark(args):

FILE: python/sglang/multimodal_gen/benchmarks/compare_perf.py
  function calculate_diff (line 9) | def calculate_diff(base: float, new: float) -> Tuple[float, float]:
  function calculate_upper_bound (line 19) | def calculate_upper_bound(baseline: float, rel_tol: float, min_abs_tol: ...
  function calculate_lower_bound (line 26) | def calculate_lower_bound(baseline: float, rel_tol: float, min_abs_tol: ...
  function get_perf_status_emoji (line 33) | def get_perf_status_emoji(
  function consolidate_steps (line 57) | def consolidate_steps(
  function _load_benchmark_file (line 106) | def _load_benchmark_file(file_path: str) -> Dict[str, Any]:
  function _get_status_emoji_from_diff_percent (line 112) | def _get_status_emoji_from_diff_percent(diff_pct):
  function _print_single_comparison_report (line 121) | def _print_single_comparison_report(
  function _print_multi_comparison_report (line 168) | def _print_multi_comparison_report(
  function compare_benchmarks (line 214) | def compare_benchmarks(file_paths: List[str], output_format: str = "mark...

FILE: python/sglang/multimodal_gen/benchmarks/datasets.py
  class RequestFuncInput (line 20) | class RequestFuncInput:
  class RequestFuncOutput (line 36) | class RequestFuncOutput:
  function is_dir_not_empty (line 46) | def is_dir_not_empty(path: str) -> bool:
  class BaseDataset (line 50) | class BaseDataset(ABC):
    method __init__ (line 51) | def __init__(self, args, api_url: str = "", model: str = ""):
    method __len__ (line 58) | def __len__(self) -> int:
    method __getitem__ (line 62) | def __getitem__(self, idx: int) -> RequestFuncInput:
    method get_requests (line 65) | def get_requests(self) -> List[RequestFuncInput]:
  class VBenchDataset (line 69) | class VBenchDataset(BaseDataset):
    method __init__ (line 78) | def __init__(self, args, api_url: str = "", model: str = ""):
    method _load_data (line 83) | def _load_data(self) -> List[Dict[str, Any]]:
    method _download_file (line 93) | def _download_file(self, url: str, dest_path: str) -> None:
    method _load_t2v_prompts (line 101) | def _load_t2v_prompts(self) -> List[Dict[str, Any]]:
    method _auto_download_i2v_dataset (line 123) | def _auto_download_i2v_dataset(self) -> Optional[str]:
    method _load_from_i2v_json (line 176) | def _load_from_i2v_json(self, json_path: str) -> List[Dict[str, Any]]:
    method _scan_directory_for_images (line 197) | def _scan_directory_for_images(self, path: str) -> List[Dict[str, Any]]:
    method _create_dummy_data (line 216) | def _create_dummy_data(self) -> List[Dict[str, Any]]:
    method _load_i2v_data (line 229) | def _load_i2v_data(self) -> List[Dict[str, Any]]:
    method _resize_data (line 256) | def _resize_data(self, data: List[Dict[str, Any]]) -> List[Dict[str, A...
    method __len__ (line 267) | def __len__(self) -> int:
    method __getitem__ (line 270) | def __getitem__(self, idx: int) -> RequestFuncInput:
  class RandomDataset (line 284) | class RandomDataset(BaseDataset):
    method __init__ (line 285) | def __init__(self, args, api_url: str = "", model: str = ""):
    method __len__ (line 289) | def __len__(self) -> int:
    method __getitem__ (line 292) | def __getitem__(self, idx: int) -> RequestFuncInput:

FILE: python/sglang/multimodal_gen/configs/models/adapter/base.py
  class AdapterArchConfig (line 10) | class AdapterArchConfig(ArchConfig):
    method __post_init__ (line 39) | def __post_init__(self) -> None:
  class AdapterConfig (line 45) | class AdapterConfig(ModelConfig):
    method add_cli_args (line 52) | def add_cli_args(parser: Any, prefix: str = "dit-config") -> Any:

FILE: python/sglang/multimodal_gen/configs/models/adapter/ltx_2_connector.py
  class LTX2ConnectorArchConfig (line 10) | class LTX2ConnectorArchConfig(AdapterArchConfig):
  class LTX2ConnectorConfig (line 29) | class LTX2ConnectorConfig(AdapterConfig):

FILE: python/sglang/multimodal_gen/configs/models/base.py
  class ArchConfig (line 16) | class ArchConfig:
    method __getattr__ (line 22) | def __getattr__(self, name: str):
    method __setattr__ (line 31) | def __setattr__(self, key, value):
  class ModelConfig (line 44) | class ModelConfig:
    method __getattr__ (line 52) | def __getattr__(self, name):
    method __getstate__ (line 60) | def __getstate__(self):
    method __setstate__ (line 66) | def __setstate__(self, state):
    method update_model_arch (line 71) | def update_model_arch(self, source_model_dict: dict[str, Any]) -> None:
    method update_model_config (line 83) | def update_model_config(self, source_model_dict: dict[str, Any]) -> None:

FILE: python/sglang/multimodal_gen/configs/models/bridges/mova_dual_tower.py
  function _is_conditioner_block (line 9) | def _is_conditioner_block(name: str, module) -> bool:
  class MOVADualTowerArchConfig (line 15) | class MOVADualTowerArchConfig(DiTArchConfig):
    method __post_init__ (line 34) | def __post_init__(self):
  class MOVADualTowerConfig (line 41) | class MOVADualTowerConfig(DiTConfig):

FILE: python/sglang/multimodal_gen/configs/models/dits/base.py
  class DiTArchConfig (line 13) | class DiTArchConfig(ArchConfig):
    method __post_init__ (line 47) | def __post_init__(self) -> None:
  class DiTConfig (line 53) | class DiTConfig(ModelConfig):
    method add_cli_args (line 61) | def add_cli_args(parser: Any, prefix: str = "dit-config") -> Any:

FILE: python/sglang/multimodal_gen/configs/models/dits/flux.py
  class FluxArchConfig (line 11) | class FluxArchConfig(DiTArchConfig):
    method __post_init__ (line 68) | def __post_init__(self):
  class FluxConfig (line 76) | class FluxConfig(DiTConfig):

FILE: python/sglang/multimodal_gen/configs/models/dits/glmimage.py
  class GlmImageArchConfig (line 7) | class GlmImageArchConfig(DiTArchConfig):
    method __post_init__ (line 28) | def __post_init__(self):
  class GlmImageDitConfig (line 36) | class GlmImageDitConfig(DiTConfig):

FILE: python/sglang/multimodal_gen/configs/models/dits/helios.py
  function is_blocks (line 7) | def is_blocks(n: str, m) -> bool:
  class HeliosArchConfig (line 12) | class HeliosArchConfig(DiTArchConfig):
    method __post_init__ (line 69) | def __post_init__(self):
  class HeliosConfig (line 77) | class HeliosConfig(DiTConfig):

FILE: python/sglang/multimodal_gen/configs/models/dits/hunyuan3d.py
  class Hunyuan3DDiTArchConfig (line 8) | class Hunyuan3DDiTArchConfig(DiTArchConfig):
    method __post_init__ (line 33) | def __post_init__(self) -> None:
  class Hunyuan3DDiTConfig (line 40) | class Hunyuan3DDiTConfig(DiTConfig):

FILE: python/sglang/multimodal_gen/configs/models/dits/hunyuanvideo.py
  function is_double_block (line 11) | def is_double_block(n: str, m) -> bool:
  function is_single_block (line 15) | def is_single_block(n: str, m) -> bool:
  function is_refiner_block (line 19) | def is_refiner_block(n: str, m) -> bool:
  function is_txt_in (line 23) | def is_txt_in(n: str, m) -> bool:
  class HunyuanVideoArchConfig (line 28) | class HunyuanVideoArchConfig(DiTArchConfig):
    method __post_init__ (line 174) | def __post_init__(self):
  class HunyuanVideoConfig (line 181) | class HunyuanVideoConfig(DiTConfig):

FILE: python/sglang/multimodal_gen/configs/models/dits/ltx_2.py
  class LTXModelType (line 8) | class LTXModelType(Enum):
    method is_video_enabled (line 20) | def is_video_enabled(self) -> bool:
    method is_audio_enabled (line 23) | def is_audio_enabled(self) -> bool:
  class LTX2RopeType (line 27) | class LTX2RopeType(str, Enum):
  class LTX2AttentionFunction (line 39) | class LTX2AttentionFunction(str, Enum):
  function is_blocks (line 50) | def is_blocks(n: str, m) -> bool:
  class LTX2ArchConfig (line 55) | class LTX2ArchConfig(DiTArchConfig):
    method __post_init__ (line 154) | def __post_init__(self):
  class LTX2Config (line 171) | class LTX2Config(DiTConfig):

FILE: python/sglang/multimodal_gen/configs/models/dits/mova_audio.py
  function _is_blocks (line 9) | def _is_blocks(n: str, m) -> bool:
  class MOVAAudioArchConfig (line 14) | class MOVAAudioArchConfig(DiTArchConfig):
    method __post_init__ (line 54) | def __post_init__(self):
  class MOVAAudioConfig (line 65) | class MOVAAudioConfig(DiTConfig):

FILE: python/sglang/multimodal_gen/configs/models/dits/mova_video.py
  function _is_blocks (line 9) | def _is_blocks(n: str, m) -> bool:
  class MOVAVideoArchConfig (line 14) | class MOVAVideoArchConfig(DiTArchConfig):
    method __post_init__ (line 53) | def __post_init__(self):
  class MOVAVideoConfig (line 64) | class MOVAVideoConfig(DiTConfig):

FILE: python/sglang/multimodal_gen/configs/models/dits/qwenimage.py
  class QwenImageArchConfig (line 11) | class QwenImageArchConfig(DiTArchConfig):
    method __post_init__ (line 38) | def __post_init__(self):
  class QwenImageEditPlus_2511_ArchConfig (line 46) | class QwenImageEditPlus_2511_ArchConfig(QwenImageArchConfig):
  class QwenImageDitConfig (line 51) | class QwenImageDitConfig(DiTConfig):
  class QwenImageEditPlus_2511_DitConfig (line 58) | class QwenImageEditPlus_2511_DitConfig(DiTConfig):

FILE: python/sglang/multimodal_gen/configs/models/dits/sana.py
  class SanaArchConfig (line 21) | class SanaArchConfig(DiTArchConfig):
    method __post_init__ (line 48) | def __post_init__(self):
  class SanaConfig (line 55) | class SanaConfig(DiTConfig):

FILE: python/sglang/multimodal_gen/configs/models/dits/wanvideo.py
  function is_blocks (line 9) | def is_blocks(n: str, m) -> bool:
  class WanVideoArchConfig (line 14) | class WanVideoArchConfig(DiTArchConfig):
    method __post_init__ (line 94) | def __post_init__(self):
  class WanVideoConfig (line 102) | class WanVideoConfig(DiTConfig):

FILE: python/sglang/multimodal_gen/configs/models/dits/zimage.py
  function is_zimage_layer (line 10) | def is_zimage_layer(n: str, m) -> bool:
  class ZImageArchConfig (line 22) | class ZImageArchConfig(DiTArchConfig):
    method __post_init__ (line 67) | def __post_init__(self):
  class ZImageDitConfig (line 75) | class ZImageDitConfig(DiTConfig):

FILE: python/sglang/multimodal_gen/configs/models/encoders/base.py
  class EncoderArchConfig (line 15) | class EncoderArchConfig(ArchConfig):
  class TextEncoderArchConfig (line 30) | class TextEncoderArchConfig(EncoderArchConfig):
    method __post_init__ (line 49) | def __post_init__(self) -> None:
  class ImageEncoderArchConfig (line 58) | class ImageEncoderArchConfig(EncoderArchConfig):
  class BaseEncoderOutput (line 63) | class BaseEncoderOutput:
  class EncoderConfig (line 72) | class EncoderConfig(ModelConfig):
  class TextEncoderConfig (line 81) | class TextEncoderConfig(EncoderConfig):
  class ImageEncoderConfig (line 91) | class ImageEncoderConfig(EncoderConfig):

FILE: python/sglang/multimodal_gen/configs/models/encoders/clip.py
  function _is_transformer_layer (line 15) | def _is_transformer_layer(n: str, m) -> bool:
  function _is_embeddings (line 19) | def _is_embeddings(n: str, m) -> bool:
  class CLIPTextArchConfig (line 24) | class CLIPTextArchConfig(TextEncoderArchConfig):
  class CLIPVisionArchConfig (line 61) | class CLIPVisionArchConfig(ImageEncoderArchConfig):
  class CLIPTextConfig (line 87) | class CLIPTextConfig(TextEncoderConfig):
  class CLIPVisionConfig (line 96) | class CLIPVisionConfig(ImageEncoderConfig):

FILE: python/sglang/multimodal_gen/configs/models/encoders/gemma2.py
  function _is_transformer_layer (line 19) | def _is_transformer_layer(n: str, m) -> bool:
  function _is_embeddings (line 23) | def _is_embeddings(n: str, m) -> bool:
  function _is_final_norm (line 27) | def _is_final_norm(n: str, m) -> bool:
  class Gemma2ArchConfig (line 32) | class Gemma2ArchConfig(TextEncoderArchConfig):
  class Gemma2Config (line 85) | class Gemma2Config(TextEncoderConfig):

FILE: python/sglang/multimodal_gen/configs/models/encoders/gemma_3.py
  function _is_transformer_layer (line 13) | def _is_transformer_layer(n: str, m) -> bool:
  function _is_embeddings (line 17) | def _is_embeddings(n: str, m) -> bool:
  function _is_final_norm (line 21) | def _is_final_norm(n: str, m) -> bool:
  class Gemma3ArchConfig (line 26) | class Gemma3ArchConfig(TextEncoderArchConfig):
  class Gemma3Config (line 78) | class Gemma3Config(TextEncoderConfig):

FILE: python/sglang/multimodal_gen/configs/models/encoders/llama.py
  function _is_transformer_layer (line 12) | def _is_transformer_layer(n: str, m) -> bool:
  function _is_embeddings (line 16) | def _is_embeddings(n: str, m) -> bool:
  function _is_final_norm (line 20) | def _is_final_norm(n: str, m) -> bool:
  class LlamaArchConfig (line 25) | class LlamaArchConfig(TextEncoderArchConfig):
  class LlamaConfig (line 66) | class LlamaConfig(TextEncoderConfig):

FILE: python/sglang/multimodal_gen/configs/models/encoders/qwen3.py
  function _is_transformer_layer (line 12) | def _is_transformer_layer(n: str, m) -> bool:
  function _is_embeddings (line 16) | def _is_embeddings(n: str, m) -> bool:
  function _is_final_norm (line 20) | def _is_final_norm(n: str, m) -> bool:
  class Qwen3TextArchConfig (line 25) | class Qwen3TextArchConfig(TextEncoderArchConfig):
    method __post_init__ (line 72) | def __post_init__(self) -> None:
  class Qwen3TextConfig (line 82) | class Qwen3TextConfig(TextEncoderConfig):

FILE: python/sglang/multimodal_gen/configs/models/encoders/qwen_image.py
  function _is_transformer_layer (line 12) | def _is_transformer_layer(n: str, m) -> bool:
  function _is_embeddings (line 16) | def _is_embeddings(n: str, m) -> bool:
  function _is_final_norm (line 20) | def _is_final_norm(n: str, m) -> bool:
  class QwenImageArchConfig (line 25) | class QwenImageArchConfig(TextEncoderArchConfig):
  class Qwen2_5VLConfig (line 66) | class Qwen2_5VLConfig(TextEncoderConfig):

FILE: python/sglang/multimodal_gen/configs/models/encoders/t5.py
  function _is_transformer_layer (line 13) | def _is_transformer_layer(n: str, m) -> bool:
  function _is_embeddings (line 17) | def _is_embeddings(n: str, m) -> bool:
  function _is_final_layernorm (line 21) | def _is_final_layernorm(n: str, m) -> bool:
  class T5ArchConfig (line 26) | class T5ArchConfig(TextEncoderArchConfig):
    method __post_init__ (line 65) | def __post_init__(self):
  class T5Config (line 84) | class T5Config(TextEncoderConfig):
    method add_cli_args (line 94) | def add_cli_args(

FILE: python/sglang/multimodal_gen/configs/models/vaes/base.py
  class VAEArchConfig (line 16) | class VAEArchConfig(ArchConfig):
  class VAEConfig (line 25) | class VAEConfig(ModelConfig):
    method __post_init__ (line 45) | def __post_init__(self):
    method post_init (line 50) | def post_init(self):
    method add_cli_args (line 54) | def add_cli_args(parser: Any, prefix: str = "vae-config") -> Any:
    method get_vae_scale_factor (line 143) | def get_vae_scale_factor(self):
    method encode_sample_mode (line 146) | def encode_sample_mode(self):
    method from_cli_args (line 150) | def from_cli_args(cls, args: argparse.Namespace) -> "VAEConfig":

FILE: python/sglang/multimodal_gen/configs/models/vaes/dac.py
  class DacVAEArchConfig (line 11) | class DacVAEArchConfig(ArchConfig):
  class DacVAEConfig (line 27) | class DacVAEConfig(ModelConfig):

FILE: python/sglang/multimodal_gen/configs/models/vaes/flux.py
  class FluxVAEArchConfig (line 8) | class FluxVAEArchConfig(VAEArchConfig):
  class Flux2VAEArchConfig (line 30) | class Flux2VAEArchConfig(FluxVAEArchConfig):
  class FluxVAEConfig (line 35) | class FluxVAEConfig(VAEConfig):
    method __post_init__ (line 44) | def __post_init__(self):
    method post_init (line 49) | def post_init(self):
  class Flux2VAEConfig (line 69) | class Flux2VAEConfig(FluxVAEConfig):

FILE: python/sglang/multimodal_gen/configs/models/vaes/glmimage.py
  class GlmImageVAEArchConfig (line 9) | class GlmImageVAEArchConfig(VAEArchConfig):
  class GlmImageVAEConfig (line 39) | class GlmImageVAEConfig(VAEConfig):
    method get_vae_scale_factor (line 48) | def get_vae_scale_factor(self):
    method __post_init__ (line 51) | def __post_init__(self):
    method post_init (line 56) | def post_init(self):

FILE: python/sglang/multimodal_gen/configs/models/vaes/hunyuan3d.py
  class Hunyuan3DVAEArchConfig (line 8) | class Hunyuan3DVAEArchConfig(VAEArchConfig):
  class Hunyuan3DVAEConfig (line 16) | class Hunyuan3DVAEConfig(VAEConfig):

FILE: python/sglang/multimodal_gen/configs/models/vaes/hunyuanvae.py
  class HunyuanVAEArchConfig (line 10) | class HunyuanVAEArchConfig(VAEArchConfig):
    method __post_init__ (line 35) | def __post_init__(self):
  class HunyuanVAEConfig (line 40) | class HunyuanVAEConfig(VAEConfig):

FILE: python/sglang/multimodal_gen/configs/models/vaes/ltx_audio.py
  class LTXAudioVAEArchConfig (line 9) | class LTXAudioVAEArchConfig(VAEArchConfig):
  class LTXAudioVAEConfig (line 29) | class LTXAudioVAEConfig(VAEConfig):

FILE: python/sglang/multimodal_gen/configs/models/vaes/ltx_video.py
  class LTXVideoVAEArchConfig (line 9) | class LTXVideoVAEArchConfig(VAEArchConfig):
  class LTXVideoVAEConfig (line 55) | class LTXVideoVAEConfig(VAEConfig):

FILE: python/sglang/multimodal_gen/configs/models/vaes/qwenimage.py
  class QwenImageVAEArchConfig (line 10) | class QwenImageVAEArchConfig(VAEArchConfig):
  class QwenImageVAEConfig (line 32) | class QwenImageVAEConfig(VAEConfig):
    method get_vae_scale_factor (line 41) | def get_vae_scale_factor(self):
    method __post_init__ (line 44) | def __post_init__(self):
    method post_init (line 49) | def post_init(self):

FILE: python/sglang/multimodal_gen/configs/models/vaes/sana.py
  class SanaVAEArchConfig (line 18) | class SanaVAEArchConfig(VAEArchConfig):
  class SanaVAEConfig (line 28) | class SanaVAEConfig(VAEConfig):
    method post_init (line 37) | def post_init(self):

FILE: python/sglang/multimodal_gen/configs/models/vaes/wanvae.py
  class WanVAEArchConfig (line 12) | class WanVAEArchConfig(VAEArchConfig):
    method __post_init__ (line 65) | def __post_init__(self):
  class WanVAEConfig (line 77) | class WanVAEConfig(VAEConfig):
    method __post_init__ (line 88) | def __post_init__(self):

FILE: python/sglang/multimodal_gen/configs/models/vocoder/base.py
  class VocoderArchConfig (line 12) | class VocoderArchConfig(ArchConfig):
  class VocoderConfig (line 19) | class VocoderConfig(ModelConfig):
    method from_cli_args (line 23) | def from_cli_args(cls, args: argparse.Namespace) -> "VocoderConfig":

FILE: python/sglang/multimodal_gen/configs/models/vocoder/ltx_vocoder.py
  class LTXVocoderArchConfig (line 12) | class LTXVocoderArchConfig(VocoderArchConfig):
  class LTXVocoderConfig (line 28) | class LTXVocoderConfig(VocoderConfig):

FILE: python/sglang/multimodal_gen/configs/pipeline_configs/base.py
  class ModelTaskType (line 46) | class ModelTaskType(Enum):
    method is_image_gen (line 58) | def is_image_gen(self) -> bool:
    method requires_image_input (line 65) | def requires_image_input(self) -> bool:
    method accepts_image_input (line 72) | def accepts_image_input(self) -> bool:
    method data_type (line 81) | def data_type(self) -> DataType:
  class STA_Mode (line 90) | class STA_Mode(str, Enum):
  function preprocess_text (line 100) | def preprocess_text(prompt: str) -> str:
  function postprocess_text (line 104) | def postprocess_text(output: BaseEncoderOutput, _text_inputs) -> torch.t...
  function shard_rotary_emb_for_sp (line 108) | def shard_rotary_emb_for_sp(emb):
  function maybe_unpad_latents (line 143) | def maybe_unpad_latents(latents, batch):
  class PipelineConfig (line 160) | class PipelineConfig:
    method postprocess_image (line 206) | def postprocess_image(self, image):
    method calculate_condition_image_size (line 235) | def calculate_condition_image_size(self, image, width, height) -> tupl...
    method prepare_sigmas (line 242) | def prepare_sigmas(self, sigmas, num_inference_steps):
    method preprocess_condition_image (line 246) | def preprocess_condition_image(
    method prepare_calculated_size (line 256) | def prepare_calculated_size(self, image):
    method prepare_image_processor_kwargs (line 259) | def prepare_image_processor_kwargs(self, batch, neg=False):
    method postprocess_image_latent (line 262) | def postprocess_image_latent(self, latent_condition, batch):
    method slice_noise_pred (line 292) | def slice_noise_pred(self, noise, latents):
    method adjust_num_frames (line 295) | def adjust_num_frames(self, num_frames):
    method tokenize_prompt (line 299) | def tokenize_prompt(self, prompt: list[str], tokenizer, tok_kwargs) ->...
    method prepare_latent_shape (line 302) | def prepare_latent_shape(self, batch, batch_size, num_frames):
    method allow_set_num_frames (line 317) | def allow_set_num_frames(self):
    method get_decode_scale_and_shift (line 320) | def get_decode_scale_and_shift(self, device, dtype, vae):
    method maybe_pack_latents (line 332) | def maybe_pack_latents(self, latents, batch_size, batch):
    method maybe_prepare_latent_ids (line 335) | def maybe_prepare_latent_ids(self, latents):
    method postprocess_vae_encode (line 339) | def postprocess_vae_encode(self, image_latents, vae):
    method preprocess_decoding (line 343) | def preprocess_decoding(self, latents, server_args=None, vae=None):
    method gather_latents_for_sp (line 346) | def gather_latents_for_sp(self, latents):
    method preprocess_vae_image (line 351) | def preprocess_vae_image(self, batch, vae_image_processor):
    method shard_latents_for_sp (line 354) | def shard_latents_for_sp(self, batch, latents):
    method get_pos_prompt_embeds (line 383) | def get_pos_prompt_embeds(self, batch):
    method get_neg_prompt_embeds (line 386) | def get_neg_prompt_embeds(self, batch):
    method post_denoising_loop (line 389) | def post_denoising_loop(self, latents, batch):
    method post_decoding (line 393) | def post_decoding(self, frames, server_args):
    method prepare_pos_cond_kwargs (line 396) | def prepare_pos_cond_kwargs(self, batch, device, rotary_emb, dtype):
    method prepare_neg_cond_kwargs (line 399) | def prepare_neg_cond_kwargs(self, batch, device, rotary_emb, dtype):
    method add_cli_args (line 403) | def add_cli_args(
    method update_config_from_dict (line 534) | def update_config_from_dict(self, args: dict[str, Any], prefix: str = ...
    method from_kwargs (line 553) | def from_kwargs(
    method check_pipeline_config (line 670) | def check_pipeline_config(self) -> None:
    method dump_to_json (line 691) | def dump_to_json(self, file_path: str):
    method load_from_json (line 720) | def load_from_json(self, file_path: str):
    method update_pipeline_config (line 725) | def update_pipeline_config(self, source_pipeline_dict: dict[str, Any])...
  class ImagePipelineConfig (line 753) | class ImagePipelineConfig(PipelineConfig):
    method _prepare_sigmas (line 756) | def _prepare_sigmas(self, sigmas, num_inference_steps):
    method shard_latents_for_sp (line 764) | def shard_latents_for_sp(self, batch, latents):
    method gather_latents_for_sp (line 786) | def gather_latents_for_sp(self, latents):
    method _unpad_and_unpack_latents (line 791) | def _unpad_and_unpack_latents(self, latents, batch):
  class SpatialImagePipelineConfig (line 807) | class SpatialImagePipelineConfig(ImagePipelineConfig):
    method shard_latents_for_sp (line 814) | def shard_latents_for_sp(self, batch, latents):
    method gather_latents_for_sp (line 840) | def gather_latents_for_sp(self, latents):
  class SlidingTileAttnConfig (line 850) | class SlidingTileAttnConfig(PipelineConfig):
  function parse_int_list (line 867) | def parse_int_list(value: str) -> list[int]:

FILE: python/sglang/multimodal_gen/configs/pipeline_configs/diffusers_generic.py
  class DiffusersGenericPipelineConfig (line 20) | class DiffusersGenericPipelineConfig(PipelineConfig):
    method check_pipeline_config (line 64) | def check_pipeline_config(self) -> None:
    method adjust_size (line 70) | def adjust_size(self, width, height, image):
    method adjust_num_frames (line 76) | def adjust_num_frames(self, num_frames):

FILE: python/sglang/multimodal_gen/configs/pipeline_configs/flux.py
  function t5_postprocess_text (line 37) | def t5_postprocess_text(outputs: BaseEncoderOutput, _text_inputs) -> tor...
  class FluxPipelineConfig (line 42) | class FluxPipelineConfig(ImagePipelineConfig):
    method prepare_sigmas (line 89) | def prepare_sigmas(self, sigmas, num_inference_steps):
    method prepare_latent_shape (line 92) | def prepare_latent_shape(self, batch, batch_size, num_frames):
    method maybe_pack_latents (line 101) | def maybe_pack_latents(self, latents, batch_size, batch):
    method get_pos_prompt_embeds (line 110) | def get_pos_prompt_embeds(self, batch):
    method get_neg_prompt_embeds (line 113) | def get_neg_prompt_embeds(self, batch):
    method _prepare_latent_image_ids (line 116) | def _prepare_latent_image_ids(self, original_height, original_width, d...
    method get_freqs_cis (line 138) | def get_freqs_cis(self, prompt_embeds, width, height, device, rotary_e...
    method post_denoising_loop (line 157) | def post_denoising_loop(self, latents, batch):
    method prepare_pos_cond_kwargs (line 169) | def prepare_pos_cond_kwargs(self, batch, device, rotary_emb, dtype):
    method prepare_neg_cond_kwargs (line 184) | def prepare_neg_cond_kwargs(self, batch, device, rotary_emb, dtype):
  function _prepare_latent_ids (line 200) | def _prepare_latent_ids(
  function _unpack_latents_with_ids (line 231) | def _unpack_latents_with_ids(
  function _patchify_latents (line 260) | def _patchify_latents(latents):
  function _unpatchify_latents (line 272) | def _unpatchify_latents(latents):
  function _prepare_text_ids (line 284) | def _prepare_text_ids(
  function _prepare_image_ids (line 303) | def _prepare_image_ids(
  function flux2_postprocess_text (line 332) | def flux2_postprocess_text(outputs: BaseEncoderOutput, _text_inputs) -> ...
  function flux2_klein_postprocess_text (line 344) | def flux2_klein_postprocess_text(
  class Flux2MistralTextArchConfig (line 359) | class Flux2MistralTextArchConfig(TextEncoderArchConfig):
    method __post_init__ (line 372) | def __post_init__(self):
  class Flux2MistralTextConfig (line 384) | class Flux2MistralTextConfig(TextEncoderConfig):
  function format_text_input (line 390) | def format_text_input(prompts: List[str], system_message: str = None):
  function flux_2_preprocess_text (line 408) | def flux_2_preprocess_text(prompt: str):
  function flux2_pack_latents (line 414) | def flux2_pack_latents(latents):
  class Flux2PipelineConfig (line 422) | class Flux2PipelineConfig(FluxPipelineConfig):
    method tokenize_prompt (line 441) | def tokenize_prompt(self, prompts: list[str], tokenizer, tok_kwargs) -...
    method prepare_latent_shape (line 458) | def prepare_latent_shape(self, batch, batch_size, num_frames):
    method get_pos_prompt_embeds (line 467) | def get_pos_prompt_embeds(self, batch):
    method get_neg_prompt_embeds (line 470) | def get_neg_prompt_embeds(self, batch):
    method calculate_condition_image_size (line 473) | def calculate_condition_image_size(
    method preprocess_condition_image (line 496) | def preprocess_condition_image(
    method postprocess_image_latent (line 510) | def postprocess_image_latent(self, latent_condition, batch):
    method prepare_condition_image_latent_ids (line 523) | def prepare_condition_image_latent_ids(self, image_latents, batch):
    method get_freqs_cis (line 528) | def get_freqs_cis(self, prompt_embeds, width, height, device, rotary_e...
    method prepare_pos_cond_kwargs (line 558) | def prepare_pos_cond_kwargs(self, batch, device, rotary_emb, dtype):
    method prepare_neg_cond_kwargs (line 570) | def prepare_neg_cond_kwargs(self, batch, device, rotary_emb, dtype):
    method maybe_pack_latents (line 573) | def maybe_pack_latents(self, latents, batch_size, batch):
    method maybe_prepare_latent_ids (line 576) | def maybe_prepare_latent_ids(self, latents):
    method postprocess_vae_encode (line 579) | def postprocess_vae_encode(self, image_latents, vae):
    method _check_vae_has_bn (line 584) | def _check_vae_has_bn(self, vae):
    method preprocess_decoding (line 590) | def preprocess_decoding(self, latents, server_args=None, vae=None):
    method get_decode_scale_and_shift (line 601) | def get_decode_scale_and_shift(self, device, dtype, vae):
    method post_denoising_loop (line 630) | def post_denoising_loop(self, latents, batch):
    method slice_noise_pred (line 636) | def slice_noise_pred(self, noise, latents):
  class Flux2KleinPipelineConfig (line 643) | class Flux2KleinPipelineConfig(Flux2PipelineConfig):
    method tokenize_prompt (line 661) | def tokenize_prompt(self, prompts: list[str], tokenizer, tok_kwargs) -...

FILE: python/sglang/multimodal_gen/configs/pipeline_configs/flux_finetuned.py
  class Flux2FinetunedPipelineConfig (line 25) | class Flux2FinetunedPipelineConfig(Flux2PipelineConfig):
    method preprocess_decoding (line 39) | def preprocess_decoding(
    method get_decode_scale_and_shift (line 73) | def get_decode_scale_and_shift(self, device, dtype, vae):

FILE: python/sglang/multimodal_gen/configs/pipeline_configs/glm_image.py
  class GlmImagePipelineConfig (line 18) | class GlmImagePipelineConfig(SpatialImagePipelineConfig):
    method __post_init__ (line 46) | def __post_init__(self):
    method get_freqs_cis (line 50) | def get_freqs_cis(self, batch, device, rotary_emb, dtype):
    method prepare_pos_cond_kwargs (line 57) | def prepare_pos_cond_kwargs(self, batch, device, rotary_emb, dtype):
    method prepare_neg_cond_kwargs (line 68) | def prepare_neg_cond_kwargs(self, batch, device, rotary_emb, dtype):
    method get_decode_scale_and_shift (line 79) | def get_decode_scale_and_shift(self, device, dtype, vae):
    method post_denoising_loop (line 92) | def post_denoising_loop(self, latents, batch):
    method post_decoding (line 97) | def post_decoding(self, frames, server_args):

FILE: python/sglang/multimodal_gen/configs/pipeline_configs/helios.py
  function umt5_postprocess_text (line 26) | def umt5_postprocess_text(outputs: BaseEncoderOutput, _text_inputs) -> t...
  class HeliosT2VConfig (line 45) | class HeliosT2VConfig(PipelineConfig):
    method __post_init__ (line 94) | def __post_init__(self):
  class HeliosMidConfig (line 100) | class HeliosMidConfig(HeliosT2VConfig):
  class HeliosDistilledConfig (line 111) | class HeliosDistilledConfig(HeliosT2VConfig):

FILE: python/sglang/multimodal_gen/configs/pipeline_configs/hunyuan.py
  class PromptTemplate (line 34) | class PromptTemplate(TypedDict):
  function llama_preprocess_text (line 45) | def llama_preprocess_text(prompt: str) -> str:
  function llama_postprocess_text (line 49) | def llama_postprocess_text(outputs: BaseEncoderOutput, _text_inputs) -> ...
  function clip_preprocess_text (line 59) | def clip_preprocess_text(prompt: str) -> str:
  function clip_postprocess_text (line 63) | def clip_postprocess_text(outputs: BaseEncoderOutput, _text_inputs) -> t...
  class HunyuanConfig (line 69) | class HunyuanConfig(PipelineConfig):
    method __post_init__ (line 101) | def __post_init__(self):
  class FastHunyuanConfig (line 107) | class FastHunyuanConfig(HunyuanConfig):

FILE: python/sglang/multimodal_gen/configs/pipeline_configs/hunyuan3d.py
  class Hunyuan3D2PipelineConfig (line 15) | class Hunyuan3D2PipelineConfig(PipelineConfig):
    method __post_init__ (line 66) | def __post_init__(self):
    method prepare_latent_shape (line 70) | def prepare_latent_shape(self, batch, batch_size, num_frames):

FILE: python/sglang/multimodal_gen/configs/pipeline_configs/ltx_2.py
  function pack_text_embeds (line 26) | def pack_text_embeds(
  function _gemma_postprocess_func (line 96) | def _gemma_postprocess_func(
  class LTX2PipelineConfig (line 115) | class LTX2PipelineConfig(PipelineConfig):
    method vae_scale_factor (line 135) | def vae_scale_factor(self):
    method vae_temporal_compression (line 139) | def vae_temporal_compression(self):
    method prepare_latent_shape (line 142) | def prepare_latent_shape(self, batch, batch_size, num_frames):
    method prepare_audio_latent_shape (line 159) | def prepare_audio_latent_shape(self, batch, batch_size, num_frames):
    method prepare_sigmas (line 199) | def prepare_sigmas(self, sigmas, num_inference_steps):
    method tokenize_prompt (line 207) | def tokenize_prompt(self, prompt: list[str], tokenizer, tok_kwargs) ->...
    method maybe_pack_latents (line 229) | def maybe_pack_latents(self, latents, batch_size, batch):
    method _infer_video_latent_frames_and_tokens_per_frame (line 255) | def _infer_video_latent_frames_and_tokens_per_frame(
    method shard_latents_for_sp (line 308) | def shard_latents_for_sp(self, batch, latents):
    method gather_latents_for_sp (line 353) | def gather_latents_for_sp(self, latents):
    method maybe_pack_audio_latents (line 361) | def maybe_pack_audio_latents(self, latents, batch_size, batch):
    method get_pos_prompt_embeds (line 375) | def get_pos_prompt_embeds(self, batch):
    method get_neg_prompt_embeds (line 383) | def get_neg_prompt_embeds(self, batch):
    method get_decode_scale_and_shift (line 390) | def get_decode_scale_and_shift(self, device, dtype, vae):
    method _unpack_latents (line 423) | def _unpack_latents(
    method _denormalize_latents (line 454) | def _denormalize_latents(
    method _denormalize_audio_latents (line 469) | def _denormalize_audio_latents(
    method _unpack_audio_latents (line 477) | def _unpack_audio_latents(
    method _unpad_and_unpack_latents (line 497) | def _unpad_and_unpack_latents(self, latents, audio_latents, batch, vae...
  class LTX2I2VPipelineConfig (line 582) | class LTX2I2VPipelineConfig(LTX2PipelineConfig):

FILE: python/sglang/multimodal_gen/configs/pipeline_configs/mova.py
  class MOVAPipelineConfig (line 27) | class MOVAPipelineConfig(PipelineConfig):
    method _center_crop_and_resize (line 55) | def _center_crop_and_resize(
    method adjust_num_frames (line 104) | def adjust_num_frames(self, num_frames: int) -> int:
    method preprocess_condition_image (line 123) | def preprocess_condition_image(
    method prepare_latent_shape (line 129) | def prepare_latent_shape(self, batch, batch_size, num_frames):
    method prepare_audio_latent_shape (line 141) | def prepare_audio_latent_shape(self, batch_size, num_samples, audio_vae):
    method normalize_video_latents (line 145) | def normalize_video_latents(self, latents: torch.Tensor, video_vae) ->...
    method denormalize_video_latents (line 158) | def denormalize_video_latents(
  class MOVA360PConfig (line 175) | class MOVA360PConfig(MOVAPipelineConfig):
  class MOVA720PConfig (line 182) | class MOVA720PConfig(MOVAPipelineConfig):

FILE: python/sglang/multimodal_gen/configs/pipeline_configs/qwen_image.py
  function _extract_masked_hidden (line 25) | def _extract_masked_hidden(hidden_states: torch.Tensor, mask: torch.Tens...
  function qwen_image_preprocess_text (line 34) | def qwen_image_preprocess_text(prompt):
  function qwen_image_postprocess_text (line 42) | def qwen_image_postprocess_text(outputs, _text_inputs, drop_idx=34):
  function _normalize_prompt_list (line 59) | def _normalize_prompt_list(prompt):
  function _normalize_image_list (line 63) | def _normalize_image_list(images):
  function _build_qwen_edit_image_prompt (line 69) | def _build_qwen_edit_image_prompt(num_images: int) -> str:
  function _resolve_qwen_edit_per_prompt_images (line 74) | def _resolve_qwen_edit_per_prompt_images(prompt_list, image_list):
  function _pack_latents (line 91) | def _pack_latents(latents, batch_size, num_channels_latents, height, wid...
  class QwenImagePipelineConfig (line 104) | class QwenImagePipelineConfig(ImagePipelineConfig):
    method prepare_sigmas (line 144) | def prepare_sigmas(self, sigmas, num_inference_steps):
    method prepare_image_processor_kwargs (line 147) | def prepare_image_processor_kwargs(self, batch, neg=False):
    method get_vae_scale_factor (line 156) | def get_vae_scale_factor(self):
    method prepare_latent_shape (line 159) | def prepare_latent_shape(self, batch, batch_size, num_frames):
    method maybe_pack_latents (line 167) | def maybe_pack_latents(self, latents, batch_size, batch):
    method get_decode_scale_and_shift (line 176) | def get_decode_scale_and_shift(self, device, dtype, vae):
    method get_freqs_cis (line 189) | def get_freqs_cis(img_shapes, txt_seq_lens, rotary_emb, device, dtype):
    method _prepare_cond_kwargs (line 203) | def _prepare_cond_kwargs(self, batch, prompt_embeds, rotary_emb, devic...
    method prepare_pos_cond_kwargs (line 239) | def prepare_pos_cond_kwargs(self, batch, device, rotary_emb, dtype):
    method prepare_neg_cond_kwargs (line 244) | def prepare_neg_cond_kwargs(self, batch, device, rotary_emb, dtype):
    method post_denoising_loop (line 249) | def post_denoising_loop(self, latents, batch):
  class QwenImageEditPipelineConfig (line 263) | class QwenImageEditPipelineConfig(QwenImagePipelineConfig):
    method _prepare_edit_cond_kwargs (line 268) | def _prepare_edit_cond_kwargs(
    method preprocess_condition_image (line 324) | def preprocess_condition_image(
    method postprocess_image_latent (line 332) | def postprocess_image_latent(self, latent_condition, batch):
    method prepare_pos_cond_kwargs (line 359) | def prepare_pos_cond_kwargs(self, batch, device, rotary_emb, dtype):
    method prepare_neg_cond_kwargs (line 364) | def prepare_neg_cond_kwargs(self, batch, device, rotary_emb, dtype):
    method calculate_condition_image_size (line 369) | def calculate_condition_image_size(self, image, width, height) -> tupl...
    method slice_noise_pred (line 375) | def slice_noise_pred(self, noise, latents):
  class QwenImageEditPlusPipelineConfig (line 386) | class QwenImageEditPlusPipelineConfig(QwenImageEditPipelineConfig):
    method _get_condition_image_sizes (line 389) | def _get_condition_image_sizes(self, batch) -> list[tuple[int, int]]:
    method prepare_image_processor_kwargs (line 404) | def prepare_image_processor_kwargs(self, batch, neg=False) -> dict:
    method prepare_calculated_size (line 433) | def prepare_calculated_size(self, image):
    method resize_condition_image (line 436) | def resize_condition_image(self, images, target_width, target_height):
    method calculate_condition_image_size (line 444) | def calculate_condition_image_size(self, image, width, height) -> tupl...
    method calculate_vae_image_size (line 450) | def calculate_vae_image_size(self, image, width, height) -> tuple[int,...
    method preprocess_vae_image (line 456) | def preprocess_vae_image(self, batch, vae_image_processor):
    method _prepare_edit_cond_kwargs (line 469) | def _prepare_edit_cond_kwargs(
  class QwenImageEditPlus_2511_PipelineConfig (line 535) | class QwenImageEditPlus_2511_PipelineConfig(QwenImageEditPlusPipelineCon...
  class QwenImageLayeredPipelineConfig (line 540) | class QwenImageLayeredPipelineConfig(QwenImageEditPipelineConfig):
    method _prepare_edit_cond_kwargs (line 544) | def _prepare_edit_cond_kwargs(
    method _unpad_and_unpack_latents (line 579) | def _unpad_and_unpack_latents(self, latents, batch):
    method allow_set_num_frames (line 600) | def allow_set_num_frames(self):
    method post_denoising_loop (line 603) | def post_denoising_loop(self, latents, batch):

FILE: python/sglang/multimodal_gen/configs/pipeline_configs/sana.py
  function sana_postprocess_text (line 37) | def sana_postprocess_text(outputs: BaseEncoderOutput, _text_inputs) -> t...
  class SanaPipelineConfig (line 44) | class SanaPipelineConfig(SpatialImagePipelineConfig):
    method prepare_latent_shape (line 76) | def prepare_latent_shape(self, batch, batch_size, num_frames):
    method get_pos_prompt_embeds (line 86) | def get_pos_prompt_embeds(self, batch):
    method get_neg_prompt_embeds (line 90) | def get_neg_prompt_embeds(self, batch):
    method prepare_pos_cond_kwargs (line 93) | def prepare_pos_cond_kwargs(self, batch, device, rotary_emb, dtype):
    method prepare_neg_cond_kwargs (line 104) | def prepare_neg_cond_kwargs(self, batch, device, rotary_emb, dtype):
    method post_denoising_loop (line 113) | def post_denoising_loop(self, latents, batch):

FILE: python/sglang/multimodal_gen/configs/pipeline_configs/wan.py
  function t5_postprocess_text (line 26) | def t5_postprocess_text(outputs: BaseEncoderOutput, _text_inputs) -> tor...
  class WanI2VCommonConfig (line 43) | class WanI2VCommonConfig(PipelineConfig):
    method adjust_num_frames (line 45) | def adjust_num_frames(self, num_frames):
  class WanT2V480PConfig (line 59) | class WanT2V480PConfig(PipelineConfig):
    method __post_init__ (line 90) | def __post_init__(self):
  class TurboWanT2V480PConfig (line 96) | class TurboWanT2V480PConfig(WanT2V480PConfig):
  class WanT2V720PConfig (line 106) | class WanT2V720PConfig(WanT2V480PConfig):
  class WanI2V480PConfig (line 116) | class WanI2V480PConfig(WanT2V480PConfig, WanI2VCommonConfig):
    method postprocess_image (line 132) | def postprocess_image(self, image):
    method __post_init__ (line 135) | def __post_init__(self) -> None:
  class WanI2V720PConfig (line 141) | class WanI2V720PConfig(WanI2V480PConfig):
  class TurboWanI2V720Config (line 152) | class TurboWanI2V720Config(WanI2V720PConfig):
    method __post_init__ (line 159) | def __post_init__(self) -> None:
  class FastWan2_1_T2V_480P_Config (line 164) | class FastWan2_1_T2V_480P_Config(WanT2V480PConfig):
  class Wan2_2_TI2V_5B_Config (line 177) | class Wan2_2_TI2V_5B_Config(WanT2V480PConfig, WanI2VCommonConfig):
    method prepare_latent_shape (line 184) | def prepare_latent_shape(self, batch, batch_size, num_frames):
    method __post_init__ (line 193) | def __post_init__(self) -> None:
  class FastWan2_2_TI2V_5B_Config (line 200) | class FastWan2_2_TI2V_5B_Config(Wan2_2_TI2V_5B_Config):
  class Wan2_2_T2V_A14B_Config (line 208) | class Wan2_2_T2V_A14B_Config(WanT2V480PConfig):
    method __post_init__ (line 212) | def __post_init__(self) -> None:
  class Wan2_2_I2V_A14B_Config (line 217) | class Wan2_2_I2V_A14B_Config(WanI2V480PConfig):
    method __post_init__ (line 221) | def __post_init__(self) -> None:
  class SelfForcingWanT2V480PConfig (line 230) | class SelfForcingWanT2V480PConfig(WanT2V480PConfig):

FILE: python/sglang/multimodal_gen/configs/pipeline_configs/zimage.py
  function zimage_preprocess_text (line 26) | def zimage_preprocess_text(prompt: str):
  function zimage_postprocess_text (line 33) | def zimage_postprocess_text(outputs: BaseEncoderOutput, _text_inputs) ->...
  class TransformersModelConfig (line 39) | class TransformersModelConfig(EncoderConfig):
  class ZImagePipelineConfig (line 44) | class ZImagePipelineConfig(ImagePipelineConfig):
    method tokenize_prompt (line 64) | def tokenize_prompt(self, prompts: list[str], tokenizer, tok_kwargs) -...
    method _ceil_to_multiple (line 80) | def _ceil_to_multiple(x: int, m: int) -> int:
    method _build_zimage_sp_plan (line 85) | def _build_zimage_sp_plan(self, batch) -> dict:
    method _get_zimage_sp_plan (line 145) | def _get_zimage_sp_plan(self, batch) -> dict:
    method _shard_cap (line 152) | def _shard_cap(self, cap: torch.Tensor, plan: dict) -> torch.Tensor:
    method get_pos_prompt_embeds (line 165) | def get_pos_prompt_embeds(self, batch):
    method shard_latents_for_sp (line 172) | def shard_latents_for_sp(self, batch, latents):
    method gather_latents_for_sp (line 199) | def gather_latents_for_sp(self, latents):
    method post_denoising_loop (line 206) | def post_denoising_loop(self, latents, batch):
    method get_freqs_cis (line 222) | def get_freqs_cis(self, prompt_embeds, width, height, device, rotary_e...
    method prepare_pos_cond_kwargs (line 306) | def prepare_pos_cond_kwargs(self, batch, device, rotary_emb, dtype):
    method prepare_neg_cond_kwargs (line 318) | def prepare_neg_cond_kwargs(self, batch, device, rotary_emb, dtype):

FILE: python/sglang/multimodal_gen/configs/quantization.py
  class NunchakuSVDQuantArgs (line 23) | class NunchakuSVDQuantArgs:
    method _adjust_config (line 36) | def _adjust_config(self) -> None:
    method validate (line 75) | def validate(self) -> None:
    method add_cli_args (line 128) | def add_cli_args(parser) -> None:
    method from_dict (line 164) | def from_dict(cls, kwargs: dict[str, Any]) -> "NunchakuSVDQuantArgs":

FILE: python/sglang/multimodal_gen/configs/sample/diffusers_generic.py
  class DiffusersGenericSamplingParams (line 18) | class DiffusersGenericSamplingParams(SamplingParams):
    method __post_init__ (line 44) | def __post_init__(self) -> None:

FILE: python/sglang/multimodal_gen/configs/sample/flux.py
  class FluxSamplingParams (line 11) | class FluxSamplingParams(SamplingParams):
  class Flux2KleinSamplingParams (line 23) | class Flux2KleinSamplingParams(FluxSamplingParams):

FILE: python/sglang/multimodal_gen/configs/sample/glmimage.py
  class GlmImageSamplingParams (line 7) | class GlmImageSamplingParams(SamplingParams):

FILE: python/sglang/multimodal_gen/configs/sample/helios.py
  class HeliosT2VSamplingParams (line 8) | class HeliosT2VSamplingParams(SamplingParams):
  class HeliosMidSamplingParams (line 39) | class HeliosMidSamplingParams(HeliosT2VSamplingParams):
  class HeliosDistilledSamplingParams (line 46) | class HeliosDistilledSamplingParams(HeliosT2VSamplingParams):

FILE: python/sglang/multimodal_gen/configs/sample/hunyuan.py
  class HunyuanSamplingParams (line 11) | class HunyuanSamplingParams(SamplingParams):
  class FastHunyuanSamplingParam (line 54) | class FastHunyuanSamplingParam(HunyuanSamplingParams):

FILE: python/sglang/multimodal_gen/configs/sample/hunyuan3d.py
  class Hunyuan3DSamplingParams (line 10) | class Hunyuan3DSamplingParams(SamplingParams):
    method __post_init__ (line 21) | def __post_init__(self):

FILE: python/sglang/multimodal_gen/configs/sample/ltx_2.py
  class LTX2SamplingParams (line 7) | class LTX2SamplingParams(SamplingParams):

FILE: python/sglang/multimodal_gen/configs/sample/mova.py
  class MOVASamplingParams (line 8) | class MOVASamplingParams(SamplingParams):
  class MOVA_360P_SamplingParams (line 33) | class MOVA_360P_SamplingParams(MOVASamplingParams):
  class MOVA_720P_SamplingParams (line 48) | class MOVA_720P_SamplingParams(MOVASamplingParams):

FILE: python/sglang/multimodal_gen/configs/sample/qwenimage.py
  class QwenImageSamplingParams (line 10) | class QwenImageSamplingParams(SamplingParams):
  class QwenImage2512SamplingParams (line 19) | class QwenImage2512SamplingParams(QwenImageSamplingParams):
  class QwenImageEditPlusSamplingParams (line 26) | class QwenImageEditPlusSamplingParams(QwenImageSamplingParams):
  class QwenImageLayeredSamplingParams (line 34) | class QwenImageLayeredSamplingParams(QwenImageSamplingParams):

FILE: python/sglang/multimodal_gen/configs/sample/sampling_params.py
  function _json_safe (line 28) | def _json_safe(obj: Any):
  function generate_request_id (line 44) | def generate_request_id() -> str:
  function _sanitize_filename (line 48) | def _sanitize_filename(name: str, replacement: str = "_", max_length: in...
  class DataType (line 69) | class DataType(Enum):
    method get_default_extension (line 74) | def get_default_extension(self) -> str:
  class SamplingParams (line 83) | class SamplingParams:
    method _set_output_file_ext (line 187) | def _set_output_file_ext(self):
    method _set_output_file_name (line 197) | def _set_output_file_name(self):
    method __post_init__ (line 230) | def __post_init__(self) -> None:
    method _adjust_output_quality (line 251) | def _adjust_output_quality(self, output_quality: str, data_type: DataT...
    method _validate (line 258) | def _validate(self):
    method check_sampling_param (line 344) | def check_sampling_param(self):
    method _validate_with_pipeline_config (line 348) | def _validate_with_pipeline_config(self, pipeline_config):
    method _adjust (line 366) | def _adjust(
    method from_pretrained (line 522) | def from_pretrained(cls, model_path: str, **kwargs) -> "SamplingParams":
    method from_user_sampling_params_args (line 532) | def from_user_sampling_params_args(
    method output_size_str (line 590) | def output_size_str(self) -> str:
    method seconds (line 593) | def seconds(self) -> float:
    method add_cli_args (line 597) | def add_cli_args(parser: Any) -> Any:
    method get_cli_args (line 882) | def get_cli_args(cls, args: argparse.Namespace):
    method output_file_path (line 906) | def output_file_path(self):
    method _merge_with_user_params (line 911) | def _merge_with_user_params(
    method n_tokens (line 947) | def n_tokens(self) -> int:
  class CacheParams (line 962) | class CacheParams:

FILE: python/sglang/multimodal_gen/configs/sample/sana.py
  class SanaSamplingParams (line 13) | class SanaSamplingParams(SamplingParams):

FILE: python/sglang/multimodal_gen/configs/sample/teacache.py
  class TeaCacheParams (line 10) | class TeaCacheParams(CacheParams):
  class WanTeaCacheParams (line 17) | class WanTeaCacheParams(CacheParams):
    method coefficients (line 26) | def coefficients(self) -> list[float]:
    method ret_steps (line 33) | def ret_steps(self) -> int:
    method get_cutoff_steps (line 39) | def get_cutoff_steps(self, num_inference_steps: int) -> int:

FILE: python/sglang/multimodal_gen/configs/sample/wan.py
  class WanT2V_1_3B_SamplingParams (line 11) | class WanT2V_1_3B_SamplingParams(SamplingParams):
  class WanT2V_14B_SamplingParams (line 55) | class WanT2V_14B_SamplingParams(SamplingParams):
  class WanI2V_14B_480P_SamplingParam (line 102) | class WanI2V_14B_480P_SamplingParam(WanT2V_1_3B_SamplingParams):
  class WanI2V_14B_720P_SamplingParam (line 138) | class WanI2V_14B_720P_SamplingParam(WanT2V_14B_SamplingParams):
  class FastWanT2V480PConfig (line 176) | class FastWanT2V480PConfig(WanT2V_1_3B_SamplingParams):
  class Wan2_1_Fun_1_3B_InP_SamplingParams (line 190) | class Wan2_1_Fun_1_3B_InP_SamplingParams(SamplingParams):
  class Wan2_2_Base_SamplingParams (line 208) | class Wan2_2_Base_SamplingParams(SamplingParams):
  class Wan2_2_TI2V_5B_SamplingParam (line 222) | class Wan2_2_TI2V_5B_SamplingParam(Wan2_2_Base_SamplingParams):
  class Wan2_2_T2V_A14B_SamplingParam (line 242) | class Wan2_2_T2V_A14B_SamplingParam(Wan2_2_Base_SamplingParams):
  class Wan2_2_I2V_A14B_SamplingParam (line 262) | class Wan2_2_I2V_A14B_SamplingParam(Wan2_2_Base_SamplingParams):
  class Turbo_Wan2_2_I2V_A14B_SamplingParam (line 282) | class Turbo_Wan2_2_I2V_A14B_SamplingParam(Wan2_2_Base_SamplingParams):
  class SelfForcingWanT2V480PConfig (line 293) | class SelfForcingWanT2V480PConfig(WanT2V_1_3B_SamplingParams):

FILE: python/sglang/multimodal_gen/configs/sample/zimage.py
  class ZImageTurboSamplingParams (line 11) | class ZImageTurboSamplingParams(SamplingParams):
  class ZImageSamplingParams (line 38) | class ZImageSamplingParams(SamplingParams):

FILE: python/sglang/multimodal_gen/configs/utils.py
  function update_config_from_args (line 7) | def update_config_from_args(
  function clean_cli_args (line 52) | def clean_cli_args(args: argparse.Namespace) -> dict[str, Any]:

FILE: python/sglang/multimodal_gen/csrc/attn/vmoba_attn/tests/test_vmoba_attn.py
  function generate_test_data (line 9) | def generate_test_data(
  function test_moba_attn_varlen_forward (line 80) | def test_moba_attn_varlen_forward(

FILE: python/sglang/multimodal_gen/csrc/attn/vmoba_attn/vmoba/vmoba.py
  function _unsupported (line 20) | def _unsupported(*args, **kwargs):
  function calc_chunks (line 35) | def calc_chunks(cu_seqlen, moba_chunk_size):
  function _select_threshold_query_head (line 76) | def _select_threshold_query_head(
  function _select_threshold_block (line 150) | def _select_threshold_block(
  function _select_threshold_overall (line 231) | def _select_threshold_overall(
  function _select_threshold_head_global (line 329) | def _select_threshold_head_global(
  class MixedAttention (line 403) | class MixedAttention(torch.autograd.Function):
    method forward (line 405) | def forward(
    method backward (line 512) | def backward(ctx, d_output):
  function moba_attn_varlen (line 602) | def moba_attn_varlen(
  function process_moba_input (line 858) | def process_moba_input(
  function process_moba_output (line 929) | def process_moba_output(
  function generate_data (line 962) | def generate_data(batch_size, seqlen, num_head, head_dim, dtype):
  function test_attn_varlen_moba_speed (line 989) | def test_attn_varlen_moba_speed(

FILE: python/sglang/multimodal_gen/csrc/render/hunyuan3d_rasterizer/__init__.py
  function _load_custom_rasterizer (line 20) | def _load_custom_rasterizer():
  function rasterize (line 42) | def rasterize(
  function interpolate (line 65) | def interpolate(

FILE: python/sglang/multimodal_gen/csrc/render/hunyuan3d_rasterizer/rasterizer.cpp
  function rasterizeTriangleCPU (line 7) | void rasterizeTriangleCPU(int idx, float* vt0, float* vt1, float* vt2, i...
  function barycentricFromImgcoordCPU (line 45) | void barycentricFromImgcoordCPU(float* V, int* F, int* findices, INT64* ...
  function rasterizeImagecoordsKernelCPU (line 84) | void rasterizeImagecoordsKernelCPU(float* V, int* F, float* d, INT64* zb...
  function rasterize_image_cpu (line 97) | std::vector<torch::Tensor> rasterize_image_cpu(torch::Tensor V, torch::T...
  function rasterize_image (line 128) | std::vector<torch::Tensor> rasterize_image(torch::Tensor V, torch::Tenso...
  function PYBIND11_MODULE (line 138) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: python/sglang/multimodal_gen/csrc/render/hunyuan3d_rasterizer/rasterizer.h
  function calculateSignedArea2 (line 16) | inline float calculateSignedArea2(float* a, float* b, float* c) {
  function calculateBarycentricCoordinate (line 20) | inline void calculateBarycentricCoordinate(float* a, float* b, float* c,...
  function isBarycentricCoordInBounds (line 41) | inline bool isBarycentricCoordInBounds(float* barycentricCoord) {

FILE: python/sglang/multimodal_gen/csrc/render/mesh_processor/__init__.py
  function _load_mesh_processor (line 20) | def _load_mesh_processor():
  function meshVerticeInpaint (line 40) | def meshVerticeInpaint(

FILE: python/sglang/multimodal_gen/csrc/render/mesh_processor/mesh_processor.cpp
  function meshVerticeInpaint_smooth (line 17) | std::pair<py::array_t<float>,
  function meshVerticeInpaint (line 146) | std::pair<py::array_t<float>, py::array_t<uint8_t>> meshVerticeInpaint(p...
  function PYBIND11_MODULE (line 157) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: python/sglang/multimodal_gen/envs.py
  function get_default_cache_root (line 62) | def get_default_cache_root() -> str:
  function get_default_config_root (line 69) | def get_default_config_root() -> str:
  function maybe_convert_int (line 76) | def maybe_convert_int(value: str | None) -> int | None:
  function _lazy_str (line 81) | def _lazy_str(key: str, default: str | None = None) -> Callable[[], str ...
  function _lazy_int (line 85) | def _lazy_int(key: str, default: str | int | None = None) -> Callable[[]...
  function _lazy_float (line 95) | def _lazy_float(key: str, default: str | float) -> Callable[[], float]:
  function _lazy_bool (line 99) | def _lazy_bool(key: str, default: str = "false") -> Callable[[], bool]:
  function _lazy_bool_any (line 103) | def _lazy_bool_any(keys: list[str], default: str = "false") -> Callable[...
  function _lazy_path (line 117) | def _lazy_path(
  function _create_secondary_getter (line 295) | def _create_secondary_getter(suffix, type_func, default_val):
  function _secondary_taylorseer_getter (line 314) | def _secondary_taylorseer_getter():
  function __getattr__ (line 327) | def __getattr__(name: str):
  function __dir__ (line 334) | def __dir__():

FILE: python/sglang/multimodal_gen/registry.py
  function _discover_and_register_pipelines (line 139) | def _discover_and_register_pipelines():
  function get_pipeline_config_classes (line 192) | def get_pipeline_config_classes(
  class ConfigInfo (line 205) | class ConfigInfo:
  function register_configs (line 223) | def register_configs(
  function get_model_short_name (line 251) | def get_model_short_name(model_id: str) -> str:
  function _get_config_info (line 259) | def _get_config_info(
  class ModelInfo (line 330) | class ModelInfo:
  function _get_diffusers_model_info (line 341) | def _get_diffusers_model_info(
  function get_model_info (line 397) | def get_model_info(
  function _register_configs (line 528) | def _register_configs():
  function is_known_non_diffusers_multimodal_model (line 829) | def is_known_non_diffusers_multimodal_model(model_path: str) -> bool:
  function get_non_diffusers_pipeline_name (line 836) | def get_non_diffusers_pipeline_name(model_path: str) -> Optional[str]:

FILE: python/sglang/multimodal_gen/runtime/cache/cache_dit_integration.py
  function _patch_cache_dit_similarity (line 41) | def _patch_cache_dit_similarity():
  function _build_parallelism_config (line 105) | def _build_parallelism_config(
  function _mark_transformer_parallelized (line 130) | def _mark_transformer_parallelized(transformer, config, sp_group, tp_gro...
  function get_scm_mask (line 138) | def get_scm_mask(
  class CacheDitConfig (line 182) | class CacheDitConfig:
  function enable_cache_on_transformer (line 225) | def enable_cache_on_transformer(
  function enable_cache_on_dual_transformer (line 339) | def enable_cache_on_dual_transformer(
  function refresh_context_on_transformer (line 527) | def refresh_context_on_transformer(
  function refresh_context_on_dual_transformer (line 548) | def refresh_context_on_dual_transformer(

FILE: python/sglang/multimodal_gen/runtime/cache/teacache.py
  class TeaCacheContext (line 33) | class TeaCacheContext:
  class TeaCacheMixin (line 59) | class TeaCacheMixin:
    method _init_teacache_state (line 132) | def _init_teacache_state(self) -> None:
    method reset_teacache_state (line 155) | def reset_teacache_state(self) -> None:
    method _compute_l1_and_decide (line 171) | def _compute_l1_and_decide(
    method _compute_teacache_decision (line 218) | def _compute_teacache_decision(
    method _get_teacache_context (line 259) | def _get_teacache_context(self) -> TeaCacheContext | None:
    method maybe_cache_states (line 304) | def maybe_cache_states(
    method should_skip_forward_for_cached_states (line 310) | def should_skip_forward_for_cached_states(self, **kwargs: dict[str, An...
    method retrieve_cached_states (line 314) | def retrieve_cached_states(self, hidden_states: torch.Tensor) -> torch...

FILE: python/sglang/multimodal_gen/runtime/distributed/__init__.py
  function _get_folding_tp_group (line 61) | def _get_folding_tp_group(

FILE: python/sglang/multimodal_gen/runtime/distributed/communication_op.py
  function tensor_model_parallel_all_reduce (line 16) | def tensor_model_parallel_all_reduce(
  function tensor_model_parallel_all_gather (line 24) | def tensor_model_parallel_all_gather(
  function sequence_model_parallel_all_to_all_4D (line 33) | def sequence_model_parallel_all_to_all_4D(
  function sequence_model_parallel_all_gather (line 40) | def sequence_model_parallel_all_gather(
  function cfg_model_parallel_all_gather (line 47) | def cfg_model_parallel_all_gather(
  function cfg_model_parallel_all_reduce (line 54) | def cfg_model_parallel_all_reduce(

FILE: python/sglang/multimodal_gen/runtime/distributed/device_communicators/base_device_communicator.py
  class DistributedAutograd (line 14) | class DistributedAutograd:
    class AllReduce (line 22) | class AllReduce(torch.autograd.Function):
      method forward (line 30) | def forward(
      method backward (line 43) | def backward(ctx: Any, grad_output: Tensor) -> tuple[None, Tensor, N...
    class AllGather (line 48) | class AllGather(torch.autograd.Function):
      method forward (line 56) | def forward(
      method backward (line 82) | def backward(ctx: Any, grad_output: Tensor) -> tuple[None, Tensor, N...
    class AllToAll4D (line 102) | class AllToAll4D(torch.autograd.Function):
      method forward (line 114) | def forward(
      method backward (line 185) | def backward(
  class DeviceCommunicatorBase (line 198) | class DeviceCommunicatorBase:
    method __init__ (line 206) | def __init__(
    method all_reduce (line 224) | def all_reduce(
    method all_gather (line 230) | def all_gather(self, input_: torch.Tensor, dim: int = -1) -> torch.Ten...
    method all_to_all_4D (line 238) | def all_to_all_4D(
    method gather (line 246) | def gather(
    method send (line 277) | def send(self, tensor: torch.Tensor, dst: int | None = None) -> None:
    method recv (line 284) | def recv(
    method destroy (line 296) | def destroy(self) -> None:

FILE: python/sglang/multimodal_gen/runtime/distributed/device_communicators/cpu_communicator.py
  class CpuCommunicator (line 14) | class CpuCommunicator(DeviceCommunicatorBase):
    method __init__ (line 16) | def __init__(
    method all_reduce (line 36) | def all_reduce(
    method gather (line 44) | def gather(
    method all_gather (line 77) | def all_gather(self, input_: torch.Tensor, dim: int = -1) -> torch.Ten...
  class _CPUSHMDistributed (line 106) | class _CPUSHMDistributed:
    method __init__ (line 108) | def __init__(self, communicator: CpuCommunicator):
    method _init_cpu_shm (line 120) | def _init_cpu_shm(self) -> int:
    method all_reduce (line 135) | def all_reduce(
    method gather (line 140) | def gather(
    method all_gather_into_tensor (line 155) | def all_gather_into_tensor(

FILE: python/sglang/multimodal_gen/runtime/distributed/device_communicators/cuda_communicator.py
  class CudaCommunicator (line 14) | class CudaCommunicator(DeviceCommunicatorBase):
    method __init__ (line 16) | def __init__(
    method all_reduce (line 36) | def all_reduce(self, input_, op: torch.distributed.ReduceOp | None = N...
    method send (line 49) | def send(self, tensor: torch.Tensor, dst: int | None = None) -> None:
    method recv (line 61) | def recv(
    method destroy (line 77) | def destroy(self) -> None:

FILE: python/sglang/multimodal_gen/runtime/distributed/device_communicators/pynccl.py
  class PyNcclCommunicator (line 27) | class PyNcclCommunicator:
    method __init__ (line 29) | def __init__(
    method all_reduce (line 119) | def all_reduce(
    method all_gather (line 147) | def all_gather(
    method reduce_scatter (line 170) | def reduce_scatter(
    method send (line 198) | def send(self, tensor: torch.Tensor, dst: int, stream=None):
    method recv (line 216) | def recv(self, tensor: torch.Tensor, src: int, stream=None):
    method broadcast (line 234) | def broadcast(self, tensor: torch.Tensor, src: int, stream=None):

FILE: python/sglang/multimodal_gen/runtime/distributed/device_communicators/pynccl_wrapper.py
  class ncclUniqueId (line 50) | class ncclUniqueId(ctypes.Structure):
  class ncclDataTypeEnum (line 60) | class ncclDataTypeEnum:
    method from_torch (line 79) | def from_torch(cls, dtype: torch.dtype) -> int:
  class ncclRedOpTypeEnum (line 102) | class ncclRedOpTypeEnum:
    method from_torch (line 111) | def from_torch(cls, op: ReduceOp) -> int:
  class Function (line 126) | class Function:
  class NCCLLibrary (line 132) | class NCCLLibrary:
    method __init__ (line 268) | def __init__(self, so_file: str | None = None):
    method ncclGetErrorString (line 301) | def ncclGetErrorString(self, result: ncclResult_t) -> str:
    method NCCL_CHECK (line 304) | def NCCL_CHECK(self, result: ncclResult_t) -> None:
    method ncclGetVersion (line 309) | def ncclGetVersion(self) -> str:
    method ncclGetUniqueId (line 319) | def ncclGetUniqueId(self) -> ncclUniqueId:
    method ncclCommInitRank (line 324) | def ncclCommInitRank(
    method ncclAllReduce (line 335) | def ncclAllReduce(
    method ncclReduceScatter (line 356) | def ncclReduceScatter(
    method ncclAllGather (line 377) | def ncclAllGather(
    method ncclSend (line 396) | def ncclSend(
    method ncclRecv (line 409) | def ncclRecv(
    method ncclBroadcast (line 422) | def ncclBroadcast(
    method ncclCommDestroy (line 438) | def ncclCommDestroy(self, comm: ncclComm_t) -> None:

FILE: python/sglang/multimodal_gen/runtime/distributed/group_coordinator.py
  function get_local_torch_device (line 45) | def get_local_torch_device() -> torch.device:
  function _get_unique_name (line 51) | def _get_unique_name(name: str) -> str:
  function _split_tensor_dict (line 64) | def _split_tensor_dict(
  function _update_nested_dict (line 108) | def _update_nested_dict(nested_dict, flattened_key, value):
  class GraphCaptureContext (line 119) | class GraphCaptureContext:
  class GroupCoordinator (line 123) | class GroupCoordinator:
    method __init__ (line 152) | def __init__(
    method first_rank (line 220) | def first_rank(self):
    method last_rank (line 225) | def last_rank(self):
    method is_first_rank (line 230) | def is_first_rank(self):
    method is_last_rank (line 235) | def is_last_rank(self):
    method next_rank (line 240) | def next_rank(self):
    method prev_rank (line 247) | def prev_rank(self):
    method group_next_rank (line 254) | def group_next_rank(self):
    method group_prev_rank (line 261) | def group_prev_rank(self):
    method skip_rank (line 268) | def skip_rank(self):
    method group_skip_rank (line 275) | def group_skip_rank(self):
    method graph_capture (line 282) | def graph_capture(self, graph_capture_context: GraphCaptureContext | N...
    method all_to_all_4D (line 305) | def all_to_all_4D(
    method all_reduce (line 312) | def all_reduce(
    method all_gather (line 332) | def all_gather(
    method gather (line 380) | def gather(self, input_: torch.Tensor, dst: int = 0, dim: int = -1) ->...
    method broadcast (line 411) | def broadcast(self, input_: torch.Tensor, src: int = 0, async_op: bool...
    method broadcast_object (line 429) | def broadcast_object(self, obj: Optional[Any] = None, src: int = 0):
    method broadcast_object_list (line 453) | def broadcast_object_list(
    method send_object (line 473) | def send_object(self, obj: Any, dst: int) -> None:
    method recv_object (line 500) | def recv_object(self, src: int) -> Any:
    method broadcast_tensor_dict (line 536) | def broadcast_tensor_dict(
    method send_tensor_dict (line 616) | def send_tensor_dict(
    method recv_tensor_dict (line 658) | def recv_tensor_dict(
    method barrier (line 697) | def barrier(self):
    method send (line 706) | def send(self, tensor: torch.Tensor, dst: Optional[int] = None) -> None:
    method recv (line 722) | def recv(
    method destroy (line 742) | def destroy(self) -> None:
  class PipelineGroupCoordinator (line 755) | class PipelineGroupCoordinator(GroupCoordinator):
    method __init__ (line 774) | def __init__(
    method reset_buffer (line 866) | def reset_buffer(self):
    method set_config (line 877) | def set_config(self, dtype: torch.dtype):
    method set_recv_buffer (line 880) | def set_recv_buffer(
    method set_extra_tensors_recv_buffer (line 902) | def set_extra_tensors_recv_buffer(
    method _check_shape_and_buffer (line 914) | def _check_shape_and_buffer(
    method _communicate_shapes (line 966) | def _communicate_shapes(self, tensor_send_to_next=None, recv_prev=False):
    method pipeline_send (line 1053) | def pipeline_send(
    method pipeline_isend (line 1062) | def pipeline_isend(
    method pipeline_recv (line 1071) | def pipeline_recv(self, idx: int = -1, name: str = "latent") -> torch....
    method add_pipeline_recv_task (line 1077) | def add_pipeline_recv_task(self, idx: int = -1, name: str = "latent"):
    method recv_next (line 1081) | def recv_next(self):
    method get_pipeline_recv_data (line 1091) | def get_pipeline_recv_data(
    method _pipeline_irecv (line 1104) | def _pipeline_irecv(self, tensor: torch.tensor):
    method _pipeline_isend (line 1115) | def _pipeline_isend(self, tensor: torch.tensor):
    method set_skip_tensor_recv_buffer (line 1126) | def set_skip_tensor_recv_buffer(
    method pipeline_send_skip (line 1140) | def pipeline_send_skip(self, tensor: torch.Tensor) -> None:
    method pipeline_isend_skip (line 1144) | def pipeline_isend_skip(self, tensor: torch.Tensor) -> None:
    method pipeline_recv_skip (line 1148) | def pipeline_recv_skip(self, idx: int = -1) -> torch.Tensor:
    method add_pipeline_recv_skip_task (line 1152) | def add_pipeline_recv_skip_task(self, idx: int = -1):
    method get_pipeline_recv_skip_data (line 1155) | def get_pipeline_recv_skip_data(self, idx: int = -1) -> torch.Tensor:
    method recv_skip_next (line 1166) | def recv_skip_next(self):
    method _pipeline_irecv_skip (line 1180) | def _pipeline_irecv_skip(self, tensor: torch.tensor):
    method _pipeline_isend_skip (line 1185) | def _pipeline_isend_skip(self, tensor: torch.tensor):
  class SequenceParallelGroupCoordinator (line 1191) | class SequenceParallelGroupCoordinator(GroupCoordinator):
    method __init__ (line 1192) | def __init__(

FILE: python/sglang/multimodal_gen/runtime/distributed/parallel_groups.py
  class Singleton (line 7) | class Singleton:
    method __new__ (line 10) | def __new__(cls, *args, **kwargs):
  class ProcessGroupSingleton (line 16) | class ProcessGroupSingleton(Singleton):
    method __init__ (line 17) | def __init__(self):
  function set_seq_parallel_pg_by_sp_groups (line 25) | def set_seq_parallel_pg_by_sp_groups(

FILE: python/sglang/multimodal_gen/runtime/distributed/parallel_state.py
  function _split_tensor_dict (line 74) | def _split_tensor_dict(
  function _register_group (line 103) | def _register_group(group: "GroupCoordinator") -> None:
  function all_reduce (line 107) | def all_reduce(tensor: torch.Tensor, group_name: str) -> torch.Tensor:
  function all_reduce_fake (line 115) | def all_reduce_fake(tensor: torch.Tensor, group_name: str) -> torch.Tensor:
  function get_world_group (line 119) | def get_world_group() -> GroupCoordinator:
  function init_world_group (line 124) | def init_world_group(
  function init_parallel_group_coordinator (line 136) | def init_parallel_group_coordinator(
  function get_tp_group (line 176) | def get_tp_group() -> GroupCoordinator:
  function init_distributed_environment (line 181) | def init_distributed_environment(
  function get_sp_group (line 258) | def get_sp_group() -> SequenceParallelGroupCoordinator:
  function get_dp_group (line 263) | def get_dp_group() -> GroupCoordinator:
  function initialize_model_parallel (line 269) | def initialize_model_parallel(
  function get_sp_world_size (line 434) | def get_sp_world_size() -> int:
  function get_sp_parallel_rank (line 439) | def get_sp_parallel_rank() -> int:
  function get_world_size (line 444) | def get_world_size() -> int:
  function get_world_rank (line 449) | def get_world_rank() -> int:
  function get_dp_world_size (line 454) | def get_dp_world_size() -> int:
  function get_dp_rank (line 459) | def get_dp_rank() -> int:
  function maybe_init_distributed_environment_and_model_parallel (line 464) | def maybe_init_distributed_environment_and_model_parallel(
  function model_parallel_is_initialized (line 524) | def model_parallel_is_initialized() -> bool:
  function patch_tensor_parallel_group (line 539) | def patch_tensor_parallel_group(tp_group: GroupCoordinator):
  function get_tp_world_size (line 561) | def get_tp_world_size() -> int:
  function get_tp_rank (line 566) | def get_tp_rank() -> int:
  function destroy_distributed_environment (line 571) | def destroy_distributed_environment() -> None:
  function cleanup_dist_env_and_memory (line 580) | def cleanup_dist_env_and_memory(shutdown_ray: bool = False):
  function is_the_same_node_as (line 591) | def is_the_same_node_as(
  function get_tensor_model_parallel_world_size (line 681) | def get_tensor_model_parallel_world_size() -> int:
  function get_tensor_model_parallel_rank (line 686) | def get_tensor_model_parallel_rank() -> int:
  function get_sequence_parallel_world_size (line 691) | def get_sequence_parallel_world_size() -> int:
  function get_sequence_parallel_rank (line 696) | def get_sequence_parallel_rank() -> int:
  function get_ulysses_parallel_world_size (line 701) | def get_ulysses_parallel_world_size() -> int:
  function get_ulysses_parallel_rank (line 705) | def get_ulysses_parallel_rank() -> int:
  function get_ring_parallel_world_size (line 709) | def get_ring_parallel_world_size() -> int:
  function get_ring_parallel_rank (line 713) | def get_ring_parallel_rank() -> int:
  function get_pp_group (line 718) | def get_pp_group() -> PipelineGroupCoordinator:
  function get_pipeline_parallel_world_size (line 723) | def get_pipeline_parallel_world_size() -> int:
  function get_pipeline_parallel_rank (line 728) | def get_pipeline_parallel_rank() -> int:
  function is_pipeline_first_stage (line 733) | def is_pipeline_first_stage() -> bool:
  function is_pipeline_last_stage (line 738) | def is_pipeline_last_stage() -> bool:
  function get_cfg_group (line 744) | def get_cfg_group() -> GroupCoordinator:
  function get_classifier_free_guidance_world_size (line 751) | def get_classifier_free_guidance_world_size() -> int:
  function get_classifier_free_guidance_rank (line 756) | def get_classifier_free_guidance_rank() -> int:
  function get_data_parallel_world_size (line 761) | def get_data_parallel_world_size() -> int:
  function get_data_parallel_rank (line 766) | def get_data_parallel_rank() -> int:
  function is_dp_last_group (line 771) | def is_dp_last_group() -> bool:
  function get_dit_world_size (line 781) | def get_dit_world_size() -> int:
  function get_vae_parallel_group (line 792) | def get_vae_parallel_group() -> ProcessGroup:
  function get_vae_parallel_world_size (line 797) | def get_vae_parallel_world_size() -> int:
  function get_vae_parallel_rank (line 802) | def get_vae_parallel_rank() -> int:
  function init_dit_group (line 807) | def init_dit_group(
  function get_dit_group (line 818) | def get_dit_group() -> ProcessGroup:
  function init_vae_group (line 823) | def init_vae_group(
  function destroy_model_parallel (line 835) | def destroy_model_parallel() -> None:

FILE: python/sglang/multimodal_gen/runtime/distributed/utils.py
  function ensure_divisibility (line 25) | def ensure_divisibility(numerator, denominator) -> None:
  function divide (line 32) | def divide(numerator: int, denominator: int) -> int:
  function split_tensor_along_last_dim (line 39) | def split_tensor_along_last_dim(
  class StatelessProcessGroup (line 68) | class StatelessProcessGroup:
    method __post_init__ (line 89) | def __post_init__(self):
    method send_obj (line 95) | def send_obj(self, obj: Any, dst: int):
    method expire_data (line 103) | def expire_data(self) -> None:
    method recv_obj (line 114) | def recv_obj(self, src: int) -> Any:
    method broadcast_obj (line 122) | def broadcast_obj(self, obj: Any | None, src: int) -> Any:
    method all_gather_obj (line 140) | def all_gather_obj(self, obj: Any) -> list[Any]:
    method barrier (line 152) | def barrier(self):
    method create (line 161) | def create(

FILE: python/sglang/multimodal_gen/runtime/entrypoints/cli/cli_types.py
  class CLISubcommand (line 11) | class CLISubcommand:
    method cmd (line 16) | def cmd(
    method validate (line 22) | def validate(self, args: argparse.Namespace) -> None:
    method subparser_init (line 26) | def subparser_init(

FILE: python/sglang/multimodal_gen/runtime/entrypoints/cli/generate.py
  function add_multimodal_gen_generate_args (line 34) | def add_multimodal_gen_generate_args(parser: argparse.ArgumentParser):
  function maybe_dump_performance (line 63) | def maybe_dump_performance(
  function generate_cmd (line 109) | def generate_cmd(args: argparse.Namespace, unknown_args: list[str] | Non...
  class GenerateSubcommand (line 144) | class GenerateSubcommand(CLISubcommand):
    method __init__ (line 147) | def __init__(self) -> None:
    method _get_init_arg_names (line 153) | def _get_init_arg_names(self) -> list[str]:
    method _get_generation_arg_names (line 157) | def _get_generation_arg_names(self) -> list[str]:
    method cmd (line 161) | def cmd(
    method validate (line 166) | def validate(self, args: argparse.Namespace) -> None:
    method subparser_init (line 174) | def subparser_init(

FILE: python/sglang/multimodal_gen/runtime/entrypoints/cli/main.py
  function generate_cmd_init (line 12) | def generate_cmd_init() -> list[CLISubcommand]:
  function cmd_init (line 16) | def cmd_init() -> list[CLISubcommand]:
  function main (line 23) | def main() -> None:

FILE: python/sglang/multimodal_gen/runtime/entrypoints/cli/serve.py
  function add_multimodal_gen_serve_args (line 19) | def add_multimodal_gen_serve_args(parser: argparse.ArgumentParser):
  function execute_serve_cmd (line 31) | def execute_serve_cmd(args: argparse.Namespace, unknown_args: list[str] ...
  class ServeSubcommand (line 40) | class ServeSubcommand(CLISubcommand):
    method __init__ (line 43) | def __init__(self) -> None:
    method cmd (line 47) | def cmd(
    method validate (line 52) | def validate(self, args: argparse.Namespace) -> None:
    method subparser_init (line 57) | def subparser_init(
  function cmd_init (line 71) | def cmd_init() -> list[CLISubcommand]:

FILE: python/sglang/multimodal_gen/runtime/entrypoints/cli/utils.py
  class RaiseNotImplementedAction (line 16) | class RaiseNotImplementedAction(argparse.Action):
    method __call__ (line 18) | def __call__(self, parser, namespace, values, option_string=None):
  function launch_distributed (line 22) | def launch_distributed(

FILE: python/sglang/multimodal_gen/runtime/entrypoints/diffusion_generator.py
  class DiffGenerator (line 58) | class DiffGenerator:
    method __init__ (line 66) | def __init__(
    method from_pretrained (line 84) | def from_pretrained(
    method from_server_args (line 107) | def from_server_args(
    method _start_local_server_if_needed (line 134) | def _start_local_server_if_needed(
    method _check_remote_scheduler (line 145) | def _check_remote_scheduler(self):
    method _resolve_image_paths_per_prompt (line 159) | def _resolve_image_paths_per_prompt(
    method generate (line 176) | def generate(
    method _resolve_prompts (line 337) | def _resolve_prompts(self, prompt: str | list[str] | None) -> list[str]:
    method _log_summary (line 356) | def _log_summary(self, results: list[GenerationResult]) -> None:
    method _send_to_scheduler_and_wait_for_response (line 373) | def _send_to_scheduler_and_wait_for_response(self, batch: list[Req]) -...
    method _send_lora_request (line 380) | def _send_lora_request(self, req: Any, success_msg: str, failure_msg: ...
    method set_lora (line 389) | def set_lora(
    method unmerge_lora_weights (line 427) | def unmerge_lora_weights(self, target: str = "all") -> None:
    method merge_lora_weights (line 441) | def merge_lora_weights(self, target: str = "all", strength: float = 1....
    method list_loras (line 456) | def list_loras(self) -> dict:
    method _ensure_lora_state (line 466) | def _ensure_lora_state(
    method generate_with_lora (line 494) | def generate_with_lora(
    method shutdown (line 515) | def shutdown(self):
    method __enter__ (line 541) | def __enter__(self):
    method __exit__ (line 544) | def __exit__(self, exc_type, exc_val, exc_tb):
    method __del__ (line 547) | def __del__(self):

FILE: python/sglang/multimodal_gen/runtime/entrypoints/http_server.py
  function lifespan (line 40) | async def lifespan(app: FastAPI):
  function health (line 66) | async def health():
  function get_models (line 71) | async def get_models(request: Request):
  function server_info_endpoint (line 100) | async def server_info_endpoint(request: Request):
  function model_info_endpoint (line 118) | async def model_info_endpoint(request: Request):
  function health_generate (line 157) | async def health_generate():
  function make_serializable (line 162) | def make_serializable(obj):
  function encode_video_to_base64 (line 173) | def encode_video_to_base64(file_path: str):
  function forward_to_scheduler (line 180) | async def forward_to_scheduler(
  function vertex_generate (line 236) | async def vertex_generate(vertex_req: VertexGenerateReqInput):
  function create_app (line 269) | def create_app(server_args: ServerArgs):

FILE: python/sglang/multimodal_gen/runtime/entrypoints/openai/common_api.py
  class ModelCard (line 25) | class ModelCard(BaseModel):
  class DiffusionModelCard (line 37) | class DiffusionModelCard(ModelCard):
  function _handle_lora_request (line 48) | async def _handle_lora_request(req: Any, success_msg: str, failure_msg: ...
  function set_lora (line 64) | async def set_lora(
  function merge_lora_weights (line 106) | async def merge_lora_weights(
  function unmerge_lora_weights (line 128) | async def unmerge_lora_weights(
  function model_info (line 147) | async def model_info():
  function list_loras (line 160) | async def list_loras():
  function available_models (line 177) | async def available_models():
  function retrieve_model (line 210) | async def retrieve_model(model: str):

FILE: python/sglang/multimodal_gen/runtime/entrypoints/openai/image_api.py
  function _read_b64_for_paths (line 39) | def _read_b64_for_paths(paths: list[str]) -> list[str]:
  function _build_image_response_kwargs (line 48) | def _build_image_response_kwargs(
  function generations (line 110) | async def generations(
  function edits (line 190) | async def edits(
  function download_image_content (line 324) | async def download_image_content(

FILE: python/sglang/multimodal_gen/runtime/entrypoints/openai/mesh_api.py
  function _normalize_format (line 44) | def _normalize_format(fmt: Optional[str]) -> str:
  function _build_sampling_params_from_request (line 49) | def _build_sampling_params_from_request(
  function _mesh_job_from_sampling (line 79) | def _mesh_job_from_sampling(
  function _dispatch_job_async (line 94) | async def _dispatch_job_async(job_id: str, batch: Req) -> None:
  function create_mesh (line 129) | async def create_mesh(
  function list_meshes (line 227) | async def list_meshes(
  function retrieve_mesh (line 254) | async def retrieve_mesh(mesh_id: str = Path(...)):
  function delete_mesh (line 262) | async def delete_mesh(mesh_id: str = Path(...)):
  function download_mesh_content (line 271) | async def download_mesh_content(

FILE: python/sglang/multimodal_gen/runtime/entrypoints/openai/protocol.py
  class ImageResponseData (line 11) | class ImageResponseData(BaseModel):
  class ImageResponse (line 18) | class ImageResponse(BaseModel):
  class ImageGenerationsRequest (line 26) | class ImageGenerationsRequest(BaseModel):
  class VideoResponse (line 59) | class VideoResponse(BaseModel):
  class VideoGenerationsRequest (line 79) | class VideoGenerationsRequest(BaseModel):
  class VideoListResponse (line 114) | class VideoListResponse(BaseModel):
  class VideoRemixRequest (line 119) | class VideoRemixRequest(BaseModel):
  class MeshResponse (line 124) | class MeshResponse(BaseModel):
  class MeshGenerationsRequest (line 142) | class MeshGenerationsRequest(BaseModel):
  class MeshListResponse (line 154) | class MeshListResponse(BaseModel):
  class BaseReq (line 160) | class BaseReq(ABC):
    method regenerate_rid (line 164) | def regenerate_rid(self):
  class VertexGenerateReqInput (line 174) | class VertexGenerateReqInput(BaseReq):

FILE: python/sglang/multimodal_gen/runtime/entrypoints/openai/storage.py
  class CloudStorage (line 10) | class CloudStorage:
    method __init__ (line 11) | def __init__(self):
    method is_enabled (line 43) | def is_enabled(self) -> bool:
    method upload_file (line 46) | async def upload_file(self, local_path: str, destination_key: str) -> ...
    method upload_and_cleanup (line 91) | async def upload_and_cleanup(self, file_path: str) -> Optional[str]:

FILE: python/sglang/multimodal_gen/runtime/entrypoints/openai/stores.py
  class AsyncDictStore (line 5) | class AsyncDictStore:
    method __init__ (line 13) | def __init__(self) -> None:
    method upsert (line 17) | async def upsert(self, key: str, value: Dict[str, Any]) -> None:
    method update_fields (line 21) | async def update_fields(
    method get (line 31) | async def get(self, key: str) -> Optional[Dict[str, Any]]:
    method pop (line 35) | async def pop(self, key: str) -> Optional[Dict[str, Any]]:
    method list_values (line 39) | async def list_values(self) -> List[Dict[str, Any]]:

FILE: python/sglang/multimodal_gen/runtime/entrypoints/openai/utils.py
  function temp_dir_if_disabled (line 54) | def temp_dir_if_disabled(
  function _parse_size (line 70) | def _parse_size(size: str) -> tuple[int, int] | tuple[None, None]:
  function choose_output_image_ext (line 81) | def choose_output_image_ext(
  function build_sampling_params (line 92) | def build_sampling_params(request_id: str, **kwargs) -> SamplingParams:
  function save_image_to_path (line 140) | async def save_image_to_path(image: Union[UploadFile, str], target_path:...
  function _save_upload_to_path (line 148) | async def _save_upload_to_path(upload: UploadFile, target_path: str) -> ...
  function _maybe_url_image (line 156) | async def _maybe_url_image(img_url: str, target_path: str) -> str | None:
  function _save_url_image_to_path (line 172) | async def _save_url_image_to_path(image_url: str, target_path: str) -> str:
  function _save_base64_image_to_path (line 218) | async def _save_base64_image_to_path(base64_data: str, target_path: str)...
  function process_generation_batch (line 258) | async def process_generation_batch(
  function merge_image_input_list (line 303) | def merge_image_input_list(*inputs: Union[List, Any, None]) -> List:
  function add_common_data_to_response (line 331) | def add_common_data_to_response(
  function adjust_output_quality (line 345) | def adjust_output_quality(output_quality: str, data_type: DataType = Non...

FILE: python/sglang/multimodal_gen/runtime/entrypoints/openai/video_api.py
  function _build_video_sampling_params (line 52) | def _build_video_sampling_params(request_id: str, request: VideoGenerati...
  function _video_job_from_sampling (line 88) | def _video_job_from_sampling(
  function _save_first_input_image (line 107) | async def _save_first_input_image(
  function _dispatch_job_async (line 123) | async def _dispatch_job_async(
  function create_video (line 166) | async def create_video(
  function list_videos (line 357) | async def list_videos(
  function retrieve_video (line 385) | async def retrieve_video(video_id: str = Path(...)):
  function delete_video (line 394) | async def delete_video(video_id: str = Path(...)):
  function download_video_content (line 404) | async def download_video_content(

FILE: python/sglang/multimodal_gen/runtime/entrypoints/post_training/io_struct.py
  class UpdateWeightFromDiskReqInput (line 7) | class UpdateWeightFromDiskReqInput:
  class GetWeightsChecksumReqInput (line 16) | class GetWeightsChecksumReqInput:

FILE: python/sglang/multimodal_gen/runtime/entrypoints/post_training/weights_api.py
  function update_weights_from_disk (line 16) | async def update_weights_from_disk(request: Request):
  function get_weights_checksum (line 50) | async def get_weights_checksum(request: Request):

FILE: python/sglang/multimodal_gen/runtime/entrypoints/utils.py
  class SetLoraReq (line 44) | class SetLoraReq:
  class MergeLoraWeightsReq (line 52) | class MergeLoraWeightsReq:
  class UnmergeLoraWeightsReq (line 58) | class UnmergeLoraWeightsReq:
  class ListLorasReq (line 63) | class ListLorasReq:
  class ShutdownReq (line 68) | class ShutdownReq:
  function format_lora_message (line 72) | def format_lora_message(
  class GenerationResult (line 98) | class GenerationResult:
  function _normalize_audio_to_numpy (line 116) | def _normalize_audio_to_numpy(audio: Any) -> np.ndarray | None:
  function _pick_audio_sample_rate (line 143) | def _pick_audio_sample_rate(
  function _resolve_ffmpeg_exe (line 170) | def _resolve_ffmpeg_exe() -> str:
  function _mux_audio_np_into_mp4 (line 192) | def _mux_audio_np_into_mp4(
  function _maybe_mux_audio_into_mp4 (line 243) | def _maybe_mux_audio_into_mp4(
  function prepare_request (line 281) | def prepare_request(
  function attach_audio_to_video_sample (line 314) | def attach_audio_to_video_sample(
  function save_outputs (line 334) | def save_outputs(
  function post_process_sample (line 398) | def post_process_sample(

FILE: python/sglang/multimodal_gen/runtime/launch_server.py
  function kill_process_tree (line 22) | def kill_process_tree(parent_pid, include_parent: bool = True, skip_pid:...
  function launch_server (line 61) | def launch_server(server_args: ServerArgs, launch_http_server: bool = Tr...
  function launch_http_server_only (line 188) | def launch_http_server_only(server_args):

FILE: python/sglang/multimodal_gen/runtime/layers/activation.py
  class SiluAndMul (line 29) | class SiluAndMul(CustomOp):
    method __init__ (line 39) | def __init__(self) -> None:
    method forward_cuda (line 42) | def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
    method forward_native (line 49) | def forward_native(self, x: torch.Tensor) -> torch.Tensor:
    method forward_npu (line 54) | def forward_npu(self, x: torch.Tensor) -> torch.Tensor:
    method forward_musa (line 58) | def forward_musa(self, x: torch.Tensor) -> torch.Tensor:
  class GeluAndMul (line 63) | class GeluAndMul(CustomOp):
    method __init__ (line 73) | def __init__(self, approximate: str = "none"):
    method forward_cuda (line 79) | def forward_cuda(self, *args, **kwargs) -> Any:
    method forward_native (line 82) | def forward_native(self, x: torch.Tensor) -> torch.Tensor:
    method extra_repr (line 87) | def extra_repr(self) -> str:
  class NewGELU (line 92) | class NewGELU(CustomOp):
    method __init__ (line 94) | def __init__(self):
    method forward_cuda (line 97) | def forward_cuda(self, *args, **kwargs) -> Any:
    method forward_native (line 100) | def forward_native(self, x: torch.Tensor) -> torch.Tensor:
  class QuickGELU (line 107) | class QuickGELU(CustomOp):
    method __init__ (line 109) | def __init__(self):
    method forward_cuda (line 112) | def forward_cuda(self, *args, **kwargs) -> Any:
    method forward_native (line 115) | def forward_native(self, x: torch.Tensor) -> torch.Tensor:
  function get_act_fn (line 130) | def get_act_fn(act_fn_name: str) -> nn.Module:
  function get_act_and_mul_fn (line 145) | def get_act_and_mul_fn(act_fn_name: str) -> nn.Module:

FILE: python/sglang/multimodal_gen/runtime/layers/attention/STA_configuration.py
  function configure_sta (line 14) | def configure_sta(
  function read_specific_json_files (line 256) | def read_specific_json_files(folder_path: str) -> list[dict[str, Any]]:
  function average_head_losses (line 275) | def average_head_losses(
  function select_best_mask_strategy (line 301) | def select_best_mask_strategy(
  function save_mask_search_results (line 360) | def save_mask_search_results(

FILE: python/sglang/multimodal_gen/runtime/layers/attention/backends/aiter.py
  class AITerBackend (line 17) | class AITerBackend(AttentionBackend):
    method get_enum (line 23) | def get_enum() -> AttentionBackendEnum:
    method get_impl_cls (line 27) | def get_impl_cls() -> type["AITerImpl"]:
    method get_metadata_cls (line 31) | def get_metadata_cls() -> type["AttentionMetadata"]:
    method get_builder_cls (line 36) | def get_builder_cls() -> type["AttentionMetadataBuilder"]:
  class AITerImpl (line 40) | class AITerImpl(AttentionImpl):
    method __init__ (line 45) | def __init__(
    method forward (line 63) | def forward(

FILE: python/sglang/multimodal_gen/runtime/layers/attention/backends/aiter_sage.py
  class AITERSageBackend (line 15) | class AITERSageBackend(AttentionBackend):
    method get_enum (line 18) | def get_enum() -> AttentionBackendEnum:
    method get_impl_cls (line 22) | def get_impl_cls() -> type["AITERSageImpl"]:
    method get_metadata_cls (line 26) | def get_metadata_cls() -> type["AttentionMetadata"]:
    method get_builder_cls (line 31) | def get_builder_cls() -> type["AttentionMetadataBuilder"]:
  class AITERSageImpl (line 37) | class AITERSageImpl(AttentionImpl):
    method __init__ (line 39) | def __init__(
    method forward (line 60) | def forward(

FILE: python/sglang/multimodal_gen/runtime/layers/attention/backends/attention_backend.py
  class AttentionBackend (line 18) | class AttentionBackend(ABC):
    method get_enum (line 28) | def get_enum() -> AttentionBackendEnum:
    method get_impl_cls (line 33) | def get_impl_cls() -> type["AttentionImpl"]:
    method get_metadata_cls (line 38) | def get_metadata_cls() -> type["AttentionMetadata"]:
    method get_builder_cls (line 52) | def get_builder_cls() -> type["AttentionMetadataBuilder"]:
  class AttentionMetadata (line 57) | class AttentionMetadata:
    method asdict_zerocopy (line 63) | def asdict_zerocopy(self, skip_fields: set[str] | None = None) -> dict...
  class AttentionMetadataBuilder (line 79) | class AttentionMetadataBuilder(ABC, Generic[T]):
    method __init__ (line 83) | def __init__(self) -> None:
    method prepare (line 88) | def prepare(self) -> None:
    method build (line 93) | def build(
  class AttentionLayer (line 101) | class AttentionLayer(Protocol):
    method forward (line 108) | def forward(
  class AttentionImpl (line 118) | class AttentionImpl(ABC, Generic[T]):
    method __init__ (line 121) | def __init__(
    method preprocess_qkv (line 133) | def preprocess_qkv(self, qkv: torch.Tensor, attn_metadata: T) -> torch...
    method postprocess_output (line 145) | def postprocess_output(
    method forward (line 163) | def forward(

FILE: python/sglang/multimodal_gen/runtime/layers/attention/backends/flash_attn.py
  function flash_attn_func (line 21) | def flash_attn_func(*args, ver: int = 3, **kwargs):
  function maybe_contiguous (line 30) | def maybe_contiguous(x: Optional[torch.Tensor]) -> Optional[torch.Tensor]:
  function flash_attn_varlen_func_fake_out (line 41) | def flash_attn_varlen_func_fake_out(
  function flash_attn_varlen_func_fake_out_lse (line 101) | def flash_attn_varlen_func_fake_out_lse(
  function flash_attn_varlen_func_op (line 176) | def flash_attn_varlen_func_op(
  function flash_attn_varlen_func_op_lse (line 240) | def flash_attn_varlen_func_op_lse(
  function set_fa_ver (line 313) | def set_fa_ver(ver: int) -> None:
  class FlashAttentionMetadata (line 319) | class FlashAttentionMetadata:
  class FlashAttentionMetadataBuilder (line 331) | class FlashAttentionMetadataBuilder(AttentionMetadataBuilder):
    method __init__ (line 332) | def __init__(self) -> None:
    method prepare (line 335) | def prepare(self) -> None:
    method build (line 338) | def build(  # type: ignore
  class FlashAttentionBackend (line 347) | class FlashAttentionBackend(AttentionBackend):
    method get_supported_head_sizes (line 351) | def get_supported_head_sizes() -> list[int]:
    method get_enum (line 355) | def get_enum() -> AttentionBackendEnum:
    method get_impl_cls (line 359) | def get_impl_cls() -> type["FlashAttentionImpl"]:
    method get_metadata_cls (line 363) | def get_metadata_cls() -> type["AttentionMetadata"]:
    method get_builder_cls (line 367) | def get_builder_cls() -> type["AttentionMetadataBuilder"]:
  class FlashAttentionImpl (line 371) | class FlashAttentionImpl(AttentionImpl):
    method __init__ (line 372) | def __init__(
    method forward (line 389) | def forward(

FILE: python/sglang/multimodal_gen/runtime/layers/attention/backends/flash_attn_2.py
  class FlashAttention2Backend (line 22) | class FlashAttention2Backend(AttentionBackend):
    method get_supported_head_sizes (line 26) | def get_supported_head_sizes() -> list[int]:
    method get_enum (line 30) | def get_enum() -> AttentionBackendEnum:
    method get_impl_cls (line 34) | def get_impl_cls() -> type["FlashAttention2Impl"]:
    method get_metadata_cls (line 38) | def get_metadata_cls() -> type["AttentionMetadata"]:
    method get_builder_cls (line 42) | def get_builder_cls() -> type["AttentionMetadataBuilder"]:
  class FlashAttention2Impl (line 46) | class FlashAttention2Impl(AttentionImpl):
    method __init__ (line 48) | def __init__(
    method forward (line 61) | def forward(

FILE: python/sglang/multimodal_gen/runtime/layers/attention/backends/sage_attn.py
  class SageAttentionBackend (line 20) | class SageAttentionBackend(AttentionBackend):
    method get_supported_head_sizes (line 24) | def get_supported_head_sizes() -> list[int]:
    method get_enum (line 28) | def get_enum() -> AttentionBackendEnum:
    method get_impl_cls (line 32) | def get_impl_cls() -> type["SageAttentionImpl"]:
  class SageAttentionImpl (line 36) | class SageAttentionImpl(AttentionImpl):
    method __init__ (line 38) | def __init__(
    method forward (line 52) | def forward(

FILE: python/sglang/multimodal_gen/runtime/layers/attention/backends/sage_attn3.py
  class SageAttention3Backend (line 20) | class SageAttention3Backend(AttentionBackend):
    method get_supported_head_sizes (line 24) | def get_supported_head_sizes() -> list[int]:
    method get_enum (line 28) | def get_enum() -> AttentionBackendEnum:
    method get_impl_cls (line 32) | def get_impl_cls() -> type["SageAttention3Impl"]:
    method get_metadata_cls (line 36) | def get_metadata_cls() -> type["AttentionMetadata"]:
  class SageAttention3Impl (line 40) | class SageAttention3Impl(AttentionImpl):
    method __init__ (line 43) | def __init__(
    method forward (line 57) | def forward(

FILE: python/sglang/multimodal_gen/runtime/layers/attention/backends/sdpa.py
  class SDPABackend (line 18) | class SDPABackend(AttentionBackend):
    method get_supported_head_sizes (line 23) | def get_supported_head_sizes() -> list[int]:
    method get_enum (line 27) | def get_enum() -> AttentionBackendEnum:
    method get_impl_cls (line 31) | def get_impl_cls() -> type["SDPAImpl"]:
  class SDPAImpl (line 39) | class SDPAImpl(AttentionImpl):
    method __init__ (line 41) | def __init__(
    method forward (line 55) | def forward(

FILE: python/sglang/multimodal_gen/runtime/layers/attention/backends/sliding_tile_attn.py
  class RangeDict (line 37) | class RangeDict(dict):
    method __getitem__ (line 39) | def __getitem__(self, item: int) -> str:
  class SlidingTileAttentionBackend (line 50) | class SlidingTileAttentionBackend(AttentionBackend):
    method get_supported_head_sizes (line 54) | def get_supported_head_sizes() -> list[int]:
    method get_enum (line 59) | def get_enum() -> AttentionBackendEnum:
    method get_impl_cls (line 63) | def get_impl_cls() -> type["SlidingTileAttentionImpl"]:
    method get_metadata_cls (line 67) | def get_metadata_cls() -> type["SlidingTileAttentionMetadata"]:
    method get_builder_cls (line 71) | def get_builder_cls() -> type["SlidingTileAttentionMetadataBuilder"]:
  class SlidingTileAttentionMetadata (line 76) | class SlidingTileAttentionMetadata(AttentionMetadata):
  class SlidingTileAttentionMetadataBuilder (line 83) | class SlidingTileAttentionMetadataBuilder(AttentionMetadataBuilder):
    method __init__ (line 85) | def __init__(self):
    method prepare (line 88) | def prepare(self):
    method build (line 91) | def build(  # type: ignore
  class SlidingTileAttentionImpl (line 107) | class SlidingTileAttentionImpl(AttentionImpl):
    method __init__ (line 109) | def __init__(
    method tile (line 152) | def tile(self, x: torch.Tensor) -> torch.Tensor:
    method untile (line 164) | def untile(self, x: torch.Tensor) -> torch.Tensor:
    method preprocess_qkv (line 177) | def preprocess_qkv(
    method postprocess_output (line 193) | def postprocess_output(
    method forward (line 200) | def forward(

FILE: python/sglang/multimodal_gen/runtime/layers/attention/backends/sparse_linear_attn.py
  function get_block_map (line 40) | def get_block_map(q, k, topk_ratio, BLKQ=64, BLKK=64):
  function mean_pool (line 57) | def mean_pool(x, BLK):
  function compress_kernel (line 70) | def compress_kernel(
  function _attn_fwd (line 95) | def _attn_fwd(
  function _get_cuda_arch (line 162) | def _get_cuda_arch(device_index: int) -> str:
  class SparseLinearAttentionBackend (line 169) | class SparseLinearAttentionBackend(AttentionBackend):
    method get_supported_head_sizes (line 175) | def get_supported_head_sizes() -> list[int]:
    method get_enum (line 179) | def get_enum() -> AttentionBackendEnum:
    method get_impl_cls (line 183) | def get_impl_cls() -> type["SparseLinearAttentionImpl"]:
    method get_metadata_cls (line 187) | def get_metadata_cls() -> type["SparseLinearAttentionMetadata"]:
    method get_builder_cls (line 191) | def get_builder_cls() -> type["SparseLinearAttentionMetadataBuilder"]:
  class SparseLinearAttentionMetadata (line 196) | class SparseLinearAttentionMetadata(AttentionMetadata):
  class SparseLinearAttentionMetadataBuilder (line 206) | class SparseLinearAttentionMetadataBuilder(AttentionMetadataBuilder):
    method __init__ (line 209) | def __init__(self) -> None:
    method prepare (line 212) | def prepare(self) -> None:
    method build (line 215) | def build(
  class SparseLinearAttentionImpl (line 227) | class SparseLinearAttentionImpl(AttentionImpl, nn.Module):
    method __init__ (line 230) | def __init__(
    method _init_weights (line 275) | def _init_weights(self) -> None:
    method _calc_linear_attention_with_torch (line 281) | def _calc_linear_attention_with_torch(self, q, k, v):
    method forward (line 286) | def forward(
  class _attention (line 341) | class _attention(torch.autograd.Function):
    method forward (line 343) | def forward(ctx, q, k, v, k_block_id, lut, topk, BLOCK_M, BLOCK_N, qk_...
  class SageSparseLinearAttentionBackend (line 405) | class SageSparseLinearAttentionBackend(AttentionBackend):
    method get_supported_head_sizes (line 411) | def get_supported_head_sizes() -> list[int]:
    method get_enum (line 415) | def get_enum() -> AttentionBackendEnum:
    method get_impl_cls (line 419) | def get_impl_cls() -> type["SageSparseLinearAttentionImpl"]:
    method get_metadata_cls (line 423) | def get_metadata_cls() -> type["SageSparseLinearAttentionMetadata"]:
    method get_builder_cls (line 427) | def get_builder_cls() -> type["SageSparseLinearAttentionMetadataBuilde...
  class SageSparseLinearAttentionMetadata (line 432) | class SageSparseLinearAttentionMetadata(AttentionMetadata):
  class SageSparseLinearAttentionMetadataBuilder (line 442) | class SageSparseLinearAttentionMetadataBuilder(AttentionMetadataBuilder):
    method __init__ (line 445) | def __init__(self) -> None:
    method prepare (line 448) | def prepare(self) -> None:
    method build (line 451) | def build(
  class SageSparseLinearAttentionImpl (line 463) | class SageSparseLinearAttentionImpl(AttentionImpl, nn.Module):
    method __init__ (line 464) | def __init__(
    method _init_weights (line 513) | def _init_weights(self) -> None:
    method _calc_linear_attention_with_torch (line 519) | def _calc_linear_attention_with_torch(
    method forward (line 529) | def forward(

FILE: python/sglang/multimodal_gen/runtime/layers/attention/backends/sparse_video_gen_2_attn.py
  class SparseVideoGen2AttentionBackend (line 44) | class SparseVideoGen2AttentionBackend(AttentionBackend):
    method get_supported_head_sizes (line 49) | def get_supported_head_sizes() -> list[int]:
    method get_enum (line 53) | def get_enum() -> AttentionBackendEnum:
    method get_impl_cls (line 57) | def get_impl_cls() -> type["SparseVideoGen2AttentionImpl"]:
    method get_metadata_cls (line 61) | def get_metadata_cls() -> type["SparseVideoGen2AttentionMetadata"]:
    method get_builder_cls (line 65) | def get_builder_cls() -> type["SparseVideoGen2AttentionMetadataBuilder"]:
  class Svg2LayerCache (line 70) | class Svg2LayerCache:
  class Svg2Cache (line 78) | class Svg2Cache:
    method get_layer (line 81) | def get_layer(self, layer_idx: int) -> Svg2LayerCache:
  class SparseVideoGen2AttentionMetadata (line 90) | class SparseVideoGen2AttentionMetadata(AttentionMetadata):
  function _require_kwarg (line 110) | def _require_kwarg(kwargs: dict[str, Any], name: str) -> Any:
  class SparseVideoGen2AttentionMetadataBuilder (line 118) | class SparseVideoGen2AttentionMetadataBuilder(AttentionMetadataBuilder):
    method __init__ (line 120) | def __init__(self) -> None:
    method prepare (line 123) | def prepare(self) -> None:
    method build (line 126) | def build(  # type: ignore[override]
  class SparseVideoGen2AttentionImpl (line 182) | class SparseVideoGen2AttentionImpl(AttentionImpl):
    method __init__ (line 184) | def __init__(
    method _get_layer_idx (line 207) | def _get_layer_idx(self, prefix: str) -> int:
    method kmeans_init (line 215) | def kmeans_init(
    method kmeans_step (line 248) | def kmeans_step(
    method kmeans_clustering (line 283) | def kmeans_clustering(
    method semantic_aware_permutation (line 330) | def semantic_aware_permutation(
    method _hunyuan_dynamic_map_post_processing (line 389) | def _hunyuan_dynamic_map_post_processing(
    method forward (line 440) | def forward(

FILE: python/sglang/multimodal_gen/runtime/layers/attention/backends/video_sparse_attn.py
  function get_tile_partition_indices (line 32) | def get_tile_partition_indices(
  function get_reverse_tile_partition_indices (line 56) | def get_reverse_tile_partition_indices(
  function construct_variable_block_sizes (line 65) | def construct_variable_block_sizes(
  function get_non_pad_index (line 109) | def get_non_pad_index(
  class VideoSparseAttentionBackend (line 126) | class VideoSparseAttentionBackend(AttentionBackend):
    method get_supported_head_sizes (line 131) | def get_supported_head_sizes() -> list[int]:
    method get_enum (line 135) | def get_enum() -> AttentionBackendEnum:
    method get_impl_cls (line 139) | def get_impl_cls() -> type["VideoSparseAttentionImpl"]:
    method get_metadata_cls (line 143) | def get_metadata_cls() -> type["VideoSparseAttentionMetadata"]:
    method get_builder_cls (line 147) | def get_builder_cls() -> type["VideoSparseAttentionMetadataBuilder"]:
  class VideoSparseAttentionMetadata (line 152) | class VideoSparseAttentionMetadata(AttentionMetadata):
  class VideoSparseAttentionMetadataBuilder (line 171) | class VideoSparseAttentionMetadataBuilder(AttentionMetadataBuilder):
    method __init__ (line 173) | def __init__(self):
    method prepare (line 176) | def prepare(self):
    method build (line 179) | def build(  # type: ignore
  class VideoSparseAttentionImpl (line 228) | class VideoSparseAttentionImpl(AttentionImpl):
    method __init__ (line 230) | def __init__(
    method tile (line 244) | def tile(
    method untile (line 268) | def untile(
    method preprocess_qkv (line 277) | def preprocess_qkv(
    method postprocess_output (line 289) | def postprocess_output(
    method forward (line 300) | def forward(  # type: ignore[override]

FILE: python/sglang/multimodal_gen/runtime/layers/attention/backends/vmoba.py
  class VMOBAAttentionBackend (line 28) | class VMOBAAttentionBackend(AttentionBackend):
    method get_enum (line 33) | def get_enum() -> AttentionBackendEnum:
    method get_impl_cls (line 37) | def get_impl_cls() -> type["VMOBAAttentionImpl"]:
    method get_metadata_cls (line 41) | def get_metadata_cls() -> type["VideoMobaAttentionMetadata"]:
    method get_builder_cls (line 45) | def get_builder_cls() -> type["VideoMobaAttentionMetadataBuilder"]:
  class VideoMobaAttentionMetadata (line 50) | class VideoMobaAttentionMetadata(AttentionMetadata):
  function pad_input (line 73) | def pad_input(hidden_states, indices, batch, seqlen):
  class VideoMobaAttentionMetadataBuilder (line 91) | class VideoMobaAttentionMetadataBuilder(AttentionMetadataBuilder):
    method __init__ (line 93) | def __init__(self):
    method prepare (line 96) | def prepare(self):
    method build (line 99) | def build(  # type: ignore
  class VMOBAAttentionImpl (line 152) | class VMOBAAttentionImpl(AttentionImpl):
    method __init__ (line 154) | def __init__(
    method _get_layer_idx (line 169) | def _get_layer_idx(self, prefix: str) -> int | None:
    method forward (line 175) | def forward(

FILE: python/sglang/multimodal_gen/runtime/layers/attention/layer.py
  class UlyssesAttention (line 38) | class UlyssesAttention(nn.Module):
    method __init__ (line 41) | def __init__(
    method forward (line 82) | def forward(
  class UlyssesAttention_VSA (line 157) | class UlyssesAttention_VSA(UlyssesAttention):
    method forward (line 160) | def forward(
  class LocalAttention (line 221) | class LocalAttention(nn.Module):
    method __init__ (line 224) | def __init__(
    method forward (line 261) | def forward(
  class USPAttention (line 288) | class USPAttention(nn.Module):
    method __init__ (line 297) | def __init__(
    method forward (line 351) | def forward(
    method _forward_with_replicated_prefix (line 414) | def _forward_with_replicated_prefix(

FILE: python/sglang/multimodal_gen/runtime/layers/attention/selector.py
  function backend_name_to_enum (line 25) | def backend_name_to_enum(backend_name: str) -> AttentionBackendEnum | None:
  function get_env_variable_attn_backend (line 42) | def get_env_variable_attn_backend() -> AttentionBackendEnum | None:
  function global_force_attn_backend (line 66) | def global_force_attn_backend(attn_backend: AttentionBackendEnum | None)...
  function get_global_forced_attn_backend (line 81) | def get_global_forced_attn_backend() -> AttentionBackendEnum | None:
  function get_attn_backend (line 89) | def get_attn_backend(
  function _cached_get_attn_backend (line 105) | def _cached_get_attn_backend(
  function global_force_attn_backend_context_manager (line 163) | def global_force_attn_backend_context_manager(

FILE: python/sglang/multimodal_gen/runtime/layers/attention/turbo_layer.py
  function post_all2all (line 31) | def post_all2all(local_seq_2_local_head, seq_world_size):
  function single_all_to_all (line 44) | def single_all_to_all(input, local_seq_2_local_head, group, async_op=Fal...
  function async_a2a_communicate (line 77) | def async_a2a_communicate(
  class _SeqAllToAll (line 127) | class _SeqAllToAll(torch.autograd.Function):
    method forward (line 129) | def forward(
    method backward (line 138) | def backward(ctx: Any, *grad_output: Tensor) -> Tuple[None, Tensor, No...
  class _SeqAllToAllQKV (line 146) | class _SeqAllToAllQKV(torch.autograd.Function):
    method forward (line 148) | def forward(
    method backward (line 168) | def backward(
  class DistributedAttention (line 181) | class DistributedAttention(torch.nn.Module):
    method __init__ (line 189) | def __init__(self, local_attention: Union[Module, Callable]) -> None:
    method forward (line 195) | def forward(
    method set_context_parallel_group (line 224) | def set_context_parallel_group(self, group, stream):
  class MinimalA2AAttnOp (line 229) | class MinimalA2AAttnOp(DistributedAttention):
    method __init__ (line 230) | def __init__(
    method set_context_parallel_group (line 264) | def set_context_parallel_group(self, process_group, ranks, stream):
    method forward (line 268) | def forward(

FILE: python/sglang/multimodal_gen/runtime/layers/custom_op.py
  class CustomOp (line 18) | class CustomOp(nn.Module):
    method __init__ (line 24) | def __init__(self) -> None:
    method forward (line 28) | def forward(self, *args, **kwargs) -> Any:
    method forward_native (line 31) | def forward_native(self, *args, **kwargs) -> Any:
    method forward_cuda (line 39) | def forward_cuda(self, *args, **kwargs) -> Any:
    method forward_hip (line 42) | def forward_hip(self, *args, **kwargs) -> Any:
    method forward_cpu (line 46) | def forward_cpu(self, *args, **kwargs) -> Any:
    method forward_tpu (line 50) | def forward_tpu(self, *args, **kwargs) -> Any:
    method forward_musa (line 56) | def forward_musa(self, *args, **kwargs) -> Any:
    method forward_oot (line 60) | def forward_oot(self, *args, **kwargs) -> Any:
    method forward_npu (line 65) | def forward_npu(self, *args, **kwargs) -> Any:
    method dispatch_forward (line 70) | def dispatch_forward(self) -> Callable:
    method enabled (line 85) | def enabled(cls) -> bool:
    method default_on (line 90) | def default_on() -> bool:
    method register (line 106) | def register(cls, name: str) -> Callable:

FILE: python/sglang/multimodal_gen/runtime/layers/elementwise.py
  class MulAdd (line 7) | class MulAdd(CustomOp):
    method __init__ (line 14) | def __init__(self, prefix: str = ""):
    method forward_native (line 17) | def forward_native(
    method forward_cuda (line 32) | def forward_cuda(

FILE: python/sglang/multimodal_gen/runtime/layers/layernorm.py
  class RMSNorm (line 42) | class RMSNorm(CustomOp):
    method __init__ (line 49) | def __init__(
    method forward_triton (line 66) | def forward_triton(self, x: torch.Tensor, residual: Optional[torch.Ten...
    method forward_cuda (line 71) | def forward_cuda(
    method forward_native (line 106) | def forward_native(
    method forward_cpu (line 145) | def forward_cpu(
    method forward_npu (line 152) | def forward_npu(
    method forward_hip (line 164) | def forward_hip(
    method _get_weight (line 172) | def _get_weight(self, dtype: torch.dtype) -> torch.Tensor:
    method forward_musa (line 183) | def forward_musa(
    method extra_repr (line 211) | def extra_repr(self) -> str:
  class LayerNorm (line 219) | class LayerNorm(CustomOp):
    method __init__ (line 220) | def __init__(
    method _get_weight_fallback (line 246) | def _get_weight_fallback(self, x: torch.Tensor) -> torch.Tensor:
    method forward_triton (line 258) | def forward_triton(self, x: torch.Tensor):
    method forward_cuda (line 268) | def forward_cuda(
    method forward_native (line 277) | def forward_native(
    method forward_cpu (line 293) | def forward_cpu(
    method forward_musa (line 300) | def forward_musa(self, x: torch.Tensor):
    method extra_repr (line 303) | def extra_repr(self) -> str:
  class FP32LayerNorm (line 312) | class FP32LayerNorm(nn.LayerNorm):
    method forward (line 313) | def forward(self, inputs: torch.Tensor) -> torch.Tensor:
  function _ensure_contiguous (line 328) | def _ensure_contiguous(tensor: Optional[torch.Tensor]) -> Optional[torch...
  class _ScaleResidualNormScaleShift (line 332) | class _ScaleResidualNormScaleShift(CustomOp):
    method __init__ (line 343) | def __init__(
    method forward_cuda (line 363) | def forward_cuda(
    method forward_hip (line 401) | def forward_hip(self, *args, **kwargs):
    method forward_musa (line 406) | def forward_musa(self, *args, **kwargs):
    method forward_native (line 411) | def forward_native(
  class ScaleResidualLayerNormScaleShift (line 442) | class ScaleResidualLayerNormScaleShift(_ScaleResidualNormScaleShift):
  class ScaleResidualRMSNormScaleShift (line 446) | class ScaleResidualRMSNormScaleShift(_ScaleResidualNormScaleShift):
  class _NormScaleShift (line 450) | class _NormScaleShift(CustomOp):
    method __init__ (line 460) | def __init__(
    method forward_cuda (line 479) | def forward_cuda(
    method forward_hip (line 505) | def forward_hip(self, *args, **kwargs):
    method forward_musa (line 510) | def forward_musa(self, *args, **kwargs):
    method forward_native (line 515) | def forward_native(
  class LayerNormScaleShift (line 523) | class LayerNormScaleShift(_NormScaleShift):
  class RMSNormScaleShift (line 527) | class RMSNormScaleShift(_NormScaleShift):
  function apply_qk_norm (line 531) | def apply_qk_norm(
  function tensor_parallel_rms_norm (line 571) | def tensor_parallel_rms_norm(x: torch.Tensor, norm: "RMSNorm") -> torch....

FILE: python/sglang/multimodal_gen/runtime/layers/linear.py
  function adjust_scalar_to_fused_array (line 63) | def adjust_scalar_to_fused_array(
  class LinearMethodBase (line 88) | class LinearMethodBase(QuantizeMethodBase):
    method create_weights (line 92) | def create_weights(
    method apply (line 118) | def apply(
  class UnquantizedLinearMethod (line 126) | class UnquantizedLinearMethod(LinearMethodBase):
    method create_weights (line 129) | def create_weights(
    method apply (line 151) | def apply(
  class LinearBase (line 162) | class LinearBase(torch.nn.Module):
    method __init__ (line 173) | def __init__(
    method forward (line 198) | def forward(self, x: torch.Tensor) -> tuple[torch.Tensor, Parameter | ...
  class ReplicatedLinear (line 202) | class ReplicatedLinear(LinearBase):
    method __init__ (line 216) | def __init__(
    method weight_loader (line 264) | def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor)...
    method forward (line 276) | def forward(self, x: torch.Tensor) -> tuple[torch.Tensor, Parameter | ...
    method extra_repr (line 283) | def extra_repr(self) -> str:
  class ColumnParallelLinear (line 290) | class ColumnParallelLinear(LinearBase):
    method __init__ (line 314) | def __init__(
    method weight_loader (line 380) | def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor)...
    method weight_loader_v2 (line 401) | def weight_loader_v2(self, param: Parameter, loaded_weight: torch.Tens...
    method forward (line 409) | def forward(self, input_: torch.Tensor) -> tuple[torch.Tensor, Paramet...
    method extra_repr (line 425) | def extra_repr(self) -> str:
  class MergedColumnParallelLinear (line 434) | class MergedColumnParallelLinear(ColumnParallelLinear):
    method __init__ (line 457) | def __init__(
    method weight_loader (line 483) | def weight_loader(
    method _load_fused_module_from_checkpoint (line 562) | def _load_fused_module_from_checkpoint(
    method weight_loader_v2 (line 598) | def weight_loader_v2(
  class QKVParallelLinear (line 647) | class QKVParallelLinear(ColumnParallelLinear):
    method __init__ (line 673) | def __init__(
    method _get_shard_offset_mapping (line 724) | def _get_shard_offset_mapping(self, loaded_shard_id: str) -> int | None:
    method _get_shard_size_mapping (line 733) | def _get_shard_size_mapping(self, loaded_shard_id: str) -> int | None:
    method _load_fused_module_from_checkpoint (line 741) | def _load_fused_module_from_checkpoint(
    method weight_loader_v2 (line 785) | def weight_loader_v2(
    method weight_loader (line 815) | def weight_loader(
  class RowParallelLinear (line 920) | class RowParallelLinear(LinearBase):
    method __init__ (line 946) | def __init__(
    method weight_loader (line 1006) | def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor):
    method weight_loader_v2 (line 1028) | def weight_loader_v2(self, param: BasevLLMParameter, loaded_weight: to...
    method forward (line 1038) | def forward(self, input_) -> tuple[torch.Tensor, Parameter | None]:
    method extra_repr (line 1065) | def extra_repr(self) -> str:

FILE: python/sglang/multimodal_gen/runtime/layers/lora/linear.py
  class BaseLayerWithLoRA (line 39) | class BaseLayerWithLoRA(nn.Module):
    method __init__ (line 41) | def __init__(
    method weight (line 70) | def weight(self):
    method bias (line 74) | def bias(self):
    method forward (line 78) | def forward(self, x: torch.Tensor) -> torch.Tensor:
    method slice_lora_a_weights (line 103) | def slice_lora_a_weights(self, A: torch.Tensor) -> torch.Tensor:
    method slice_lora_b_weights (line 106) | def slice_lora_b_weights(self, B: torch.Tensor) -> torch.Tensor:
    method set_lora_weights (line 109) | def set_lora_weights(
    method _merge_lora_into_data (line 155) | def _merge_lora_into_data(
    method merge_lora_weights (line 183) | def merge_lora_weights(self, strength: float | None = None) -> None:
    method unmerge_lora_weights (line 251) | def unmerge_lora_weights(self) -> None:
  class VocabParallelEmbeddingWithLoRA (line 280) | class VocabParallelEmbeddingWithLoRA(BaseLayerWithLoRA):
    method __init__ (line 289) | def __init__(
    method forward (line 295) | def forward(self, input_: torch.Tensor) -> torch.Tensor:
  class ColumnParallelLinearWithLoRA (line 301) | class ColumnParallelLinearWithLoRA(BaseLayerWithLoRA):
    method __init__ (line 303) | def __init__(
    method forward (line 311) | def forward(self, input_: torch.Tensor) -> torch.Tensor:
    method slice_lora_a_weights (line 324) | def slice_lora_a_weights(self, A: torch.Tensor) -> torch.Tensor:
    method slice_lora_b_weights (line 327) | def slice_lora_b_weights(self, B: torch.Tensor) -> torch.Tensor:
  class MergedColumnParallelLinearWithLoRA (line 336) | class MergedColumnParallelLinearWithLoRA(ColumnParallelLinearWithLoRA):
    method __init__ (line 338) | def __init__(
    method slice_lora_a_weights (line 346) | def slice_lora_a_weights(self, A: torch.Tensor) -> torch.Tensor:
    method slice_lora_b_weights (line 349) | def slice_lora_b_weights(self, B: torch.Tensor) -> torch.Tensor:
  class QKVParallelLinearWithLoRA (line 358) | class QKVParallelLinearWithLoRA(ColumnParallelLinearWithLoRA):
    method __init__ (line 360) | def __init__(
    method slice_lora_a_weights (line 368) | def slice_lora_a_weights(self, A: torch.Tensor) -> torch.Tensor:
    method slice_lora_b_weights (line 371) | def slice_lora_b_weights(
  class RowParallelLinearWithLoRA (line 391) | class RowParallelLinearWithLoRA(BaseLayerWithLoRA):
    method __init__ (line 393) | def __init__(
    method forward (line 401) | def forward(self, input_: torch.Tensor):
    method slice_lora_a_weights (line 432) | def slice_lora_a_weights(self, A: torch.Tensor) -> torch.Tensor:
    method slice_lora_b_weights (line 440) | def slice_lora_b_weights(self, B: torch.Tensor) -> torch.Tensor:
  class LinearWithLoRA (line 444) | class LinearWithLoRA(BaseLayerWithLoRA):
    method __init__ (line 451) | def __init__(
    method forward (line 460) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  function wrap_with_lora_layer (line 488) | def wrap_with_lora_layer(
  function replace_submodule (line 520) | def replace_submodule(

FILE: python/sglang/multimodal_gen/runtime/layers/mlp.py
  class MLP (line 26) | class MLP(nn.Module):
    method __init__ (line 31) | def __init__(
    method forward (line 64) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class FeedForward (line 71) | class FeedForward(nn.Module):
    method __init__ (line 83) | def __init__(
    method forward (line 118) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:

FILE: python/sglang/multimodal_gen/runtime/layers/quantization/__init__.py
  function register_quantization_config (line 22) | def register_quantization_config(quantization: str):
  function get_quantization_config (line 50) | def get_quantization_config(quantization: str) -> type[QuantizationConfig]:

FILE: python/sglang/multimodal_gen/runtime/layers/quantization/configs/base_config.py
  class QuantizeMethodBase (line 19) | class QuantizeMethodBase(ABC):
    method create_weights (line 23) | def create_weights(
    method apply (line 32) | def apply(self, layer: torch.nn.Module, *args, **kwargs) -> torch.Tensor:
    method embedding (line 39) | def embedding(self, layer: torch.nn.Module, *args, **kwargs) -> torch....
    method process_weights_after_loading (line 45) | def process_weights_after_loading(self, layer: nn.Module) -> None:
  function method_has_implemented_embedding (line 53) | def method_has_implemented_embedding(method_class: type[QuantizeMethodBa...
  class QuantizationConfig (line 65) | class QuantizationConfig(ABC):
    method __init__ (line 71) | def __init__(self):
    method get_name (line 77) | def get_name(self) -> QuantizationMethods:
    method get_supported_act_dtypes (line 82) | def get_supported_act_dtypes(self) -> list[torch.dtype]:
    method get_min_capability (line 88) | def get_min_capability(cls) -> int:
    method get_config_filenames (line 99) | def get_config_filenames() -> list[str]:
    method from_config (line 105) | def from_config(cls, config: dict[str, Any]) -> "QuantizationConfig":
    method override_quantization_method (line 110) | def override_quantization_method(
    method get_from_keys (line 122) | def get_from_keys(config: dict[str, Any], keys: list[str]) -> Any:
    method get_from_keys_or (line 132) | def get_from_keys_or(config: dict[str, Any], keys: list[str], default:...
    method get_quant_method (line 140) | def get_quant_method(
    method get_cache_scale (line 154) | def get_cache_scale(self, name: str) -> str | None:

FILE: python/sglang/multimodal_gen/runtime/layers/quantization/configs/nunchaku_config.py
  function is_nunchaku_available (line 21) | def is_nunchaku_available() -> bool:
  class NunchakuConfig (line 32) | class NunchakuConfig(QuantizationConfig):
    method get_name (line 55) | def get_name(cls) -> str:
    method get_supported_act_dtypes (line 59) | def get_supported_act_dtypes(cls) -> list[torch.dtype]:
    method get_min_capability (line 63) | def get_min_capability(cls) -> int:
    method get_config_filenames (line 67) | def get_config_filenames() -> list[str]:
    method from_config (line 71) | def from_config(cls, config: dict[str, Any]) -> "NunchakuConfig":
    method get_quant_method (line 81) | def get_quant_method(
    method _get_quant_rules (line 123) | def _get_quant_rules(self) -> dict[str, list[str]]:
    method __post_init__ (line 130) | def __post_init__(self):
    method from_dict (line 150) | def from_dict(cls, config_dict: dict) -> "NunchakuConfig":
    method to_dict (line 154) | def to_dict(self) -> dict:
    method from_pretrained (line 165) | def from_pretrained(cls, model_path: str) -> Optional["NunchakuConfig"]:
  function _patch_native_svdq_linear (line 176) | def _patch_native_svdq_linear(
  function _patch_sglang_svdq_linear (line 188) | def _patch_sglang_svdq_linear(
  function _patch_sglang_svdq_wcscales (line 210) | def _patch_sglang_svdq_wcscales(
  function _patch_nunchaku_scales (line 226) | def _patch_nunchaku_scales(

FILE: python/sglang/multimodal_gen/runtime/layers/quantization/fp8.py
  class Fp8Config (line 77) | class Fp8Config(QuantizationConfig):
    method __init__ (line 80) | def __init__(
    method get_name (line 110) | def get_name(cls) -> str:
    method get_supported_act_dtypes (line 114) | def get_supported_act_dtypes(cls) -> List[torch.dtype]:
    method get_min_capability (line 118) | def get_min_capability(cls) -> int:
    method get_config_filenames (line 122) | def get_config_filenames(cls) -> List[str]:
    method from_config (line 126) | def from_config(cls, config: Dict[str, Any]) -> Fp8Config:
    method get_quant_method (line 144) | def get_quant_method(
    method get_scaled_act_names (line 155) | def get_scaled_act_names(self) -> List[str]:
  class Fp8LinearMethod (line 159) | class Fp8LinearMethod(LinearMethodBase):
    method __init__ (line 177) | def __init__(self, quant_config: Union[Fp8Config, W4AFp8Config]):
    method create_weights (line 193) | def create_weights(
    method process_weights_after_loading (line 302) | def process_weights_after_loading(self, layer: Module) -> None:
    method apply (line 442) | def apply(

FILE: python/sglang/multimodal_gen/runtime/layers/quantization/modelslim.py
  class ModelSlimConfig (line 32) | class ModelSlimConfig(QuantizationConfig):
    method __init__ (line 43) | def __init__(self, quant_config: Dict[str, Any] = {}):
    method get_linear_method (line 53) | def get_linear_method(self) -> ModelSlimLinearMethod:
    method get_supported_act_dtypes (line 57) | def get_supported_act_dtypes(cls) -> List[torch.dtype]:
    method get_min_capability (line 61) | def get_min_capability(cls) -> int:
    method get_name (line 65) | def get_name(cls) -> str:
    method get_config_filenames (line 69) | def get_config_filenames(cls) -> List[str]:
    method from_config (line 74) | def from_config(cls, config: Dict[str, Any]) -> ModelSlimConfig:
    method get_quant_method (line 77) | def get_quant_method(
    method _get_scheme_from_parts (line 108) | def _get_scheme_from_parts(
    method get_scheme (line 124) | def get_scheme(
    method is_layer_skipped (line 139) | def is_layer_skipped(
    method get_scaled_act_names (line 170) | def get_scaled_act_names(self) -> List[str]:
  class ModelSlimLinearMethod (line 174) | class ModelSlimLinearMethod(LinearMethodBase):
    method __init__ (line 176) | def __init__(self, quantization_config: ModelSlimConfig):
    method process_weights_after_loading (line 179) | def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
    method create_weights (line 182) | def create_weights(
    method apply (line 208) | def apply(

FILE: python/sglang/multimodal_gen/runtime/layers/quantization/nunchaku_linear.py
  class NunchakuSVDQLinearMethod (line 24) | class NunchakuSVDQLinearMethod(LinearMethodBase):
    method __init__ (line 25) | def __init__(
    method create_weights (line 40) | def create_weights(
    method process_weights_after_loading (line 138) | def process_weights_after_loading(self, layer: nn.Module) -> None:
    method apply (line 163) | def apply(
  class NunchakuAWQLinearMethod (line 206) | class NunchakuAWQLinearMethod(LinearMethodBase):
    method __init__ (line 207) | def __init__(self, group_size: int = 64):
    method create_weights (line 211) | def create_weights(
    method process_weights_after_loading (line 259) | def process_weights_after_loading(self, layer: nn.Module) -> None:
    method apply (line 264) | def apply(

FILE: python/sglang/multimodal_gen/runtime/layers/rotary_embedding/base.py
  class RotaryEmbedding (line 11) | class RotaryEmbedding(CustomOp):
    method __init__ (line 14) | def __init__(
    method _compute_inv_freq (line 36) | def _compute_inv_freq(self, base: int | float) -> torch.Tensor:
    method _compute_cos_sin_cache (line 50) | def _compute_cos_sin_cache(self) -> torch.Tensor:
    method forward_cuda (line 61) | def forward_cuda(self, *args, **kwargs):
    method forward_native (line 64) | def forward_native(
    method extra_repr (line 94) | def extra_repr(self) -> str:
  class LinearScalingRotaryEmbedding (line 101) | class LinearScalingRotaryEmbedding(RotaryEmbedding):
    method __init__ (line 102) | def __init__(
    method _compute_cos_sin_cache (line 122) | def _compute_cos_sin_cache(self) -> torch.Tensor:

FILE: python/sglang/multimodal_gen/runtime/layers/rotary_embedding/factory.py
  function get_rope (line 16) | def get_rope(
  function get_rotary_pos_embed (line 85) | def get_rotary_pos_embed(

FILE: python/sglang/multimodal_gen/runtime/layers/rotary_embedding/mrope.py
  function _to_tuple (line 10) | def _to_tuple(x: int | tuple[int, ...], dim: int = 2) -> tuple[int, ...]:
  function get_1d_rotary_pos_embed (line 19) | def get_1d_rotary_pos_embed(
  class OneDRotaryEmbedding (line 72) | class OneDRotaryEmbedding(torch.nn.Module):
    method __init__ (line 75) | def __init__(
    method build_freqs (line 96) | def build_freqs(self, device):
    method build_freqs_outer (line 108) | def build_freqs_outer(self, pos: torch.Tensor, device):
    method forward_from_grid (line 128) | def forward_from_grid(
    method forward (line 139) | def forward(self, pos: torch.Tensor) -> tuple[torch.Tensor, torch.Tens...
    method _forward_cached (line 151) | def _forward_cached(
  class NDRotaryEmbedding (line 164) | class NDRotaryEmbedding(torch.nn.Module):
    method __init__ (line 167) | def __init__(
    method forward (line 231) | def forward(self, positions: torch.Tensor) -> tuple[torch.Tensor, torc...
    method _forward_cached (line 248) | def _forward_cached(
    method forward_uncached (line 259) | def forward_uncached(self, pos: torch.Tensor) -> tuple[torch.Tensor, t...
    method forward_from_grid (line 296) | def forward_from_grid(
    method _forward_cached_from_grid (line 314) | def _forward_cached_from_grid(

FILE: python/sglang/multimodal_gen/runtime/layers/rotary_embedding/utils.py
  function _apply_rotary_emb (line 32) | def _apply_rotary_emb(
  function apply_flashinfer_rope_qk_inplace (line 64) | def apply_flashinfer_rope_qk_inplace(

FILE: python/sglang/multimodal_gen/runtime/layers/usp.py
  function _maybe_wait (line 26) | def _maybe_wait(tensor: torch.Tensor) -> torch.Tensor:
  function _usp_all_to_all_single (line 36) | def _usp_all_to_all_single(x: torch.Tensor) -> torch.Tensor:
  function _usp_input_all_to_all (line 49) | def _usp_input_all_to_all(x: torch.Tensor, head_dim: int = 1) -> torch.T...
  function _usp_output_all_to_all (line 105) | def _usp_output_all_to_all(x: torch.Tensor, head_dim: int = 1) -> torch....
  function ring_attn (line 161) | def ring_attn(

FILE: python/sglang/multimodal_gen/runtime/layers/utils.py
  function get_group_size (line 16) | def get_group_size(group) -> int:
  function get_group_rank (line 25) | def get_group_rank(group) -> int:
  function get_token_bin_counts_and_mask (line 34) | def get_token_bin_counts_and_mask(
  function direct_register_custom_op (line 54) | def direct_register_custom_op(
  class CustomOpWrapper (line 128) | class CustomOpWrapper:
    method __init__ (line 129) | def __init__(
    method __call__ (line 142) | def __call__(self, *args, **kwargs):
    method real_impl (line 146) | def real_impl(self) -> Callable:
    method fake_impl (line 163) | def fake_impl(self) -> Callable:
  function register_custom_op (line 193) | def register_custom_op(

FILE: python/sglang/multimodal_gen/runtime/layers/visual_embedding.py
  class PatchEmbed (line 35) | class PatchEmbed(nn.Module):
    method __init__ (line 49) | def __init__(
    method forward (line 81) | def forward(self, x):
  class Timesteps (line 89) | class Timesteps(_Timesteps):
    method forward (line 90) | def forward(self, timesteps: torch.Tensor) -> torch.Tensor:
  class CombinedTimestepGuidanceTextProjEmbeddings (line 109) | class CombinedTimestepGuidanceTextProjEmbeddings(
    method __init__ (line 112) | def __init__(self, embedding_dim, pooled_projection_dim):
  class CombinedTimestepTextProjEmbeddings (line 131) | class CombinedTimestepTextProjEmbeddings(_CombinedTimestepTextProjEmbedd...
    method __init__ (line 132) | def __init__(self, embedding_dim, pooled_projection_dim):
  class TimestepEmbedder (line 148) | class TimestepEmbedder(nn.Module):
    method __init__ (line 153) | def __init__(
    method forward (line 176) | def forward(
  function timestep_embedding (line 193) | def timestep_embedding(
  class ModulateProjection (line 223) | class ModulateProjection(nn.Module):
    method __init__ (line 226) | def __init__(
    method forward (line 246) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  function unpatchify (line 252) | def unpatchify(x, t, h, w, patch_size, channels) -> torch.Tensor:

FILE: python/sglang/multimodal_gen/runtime/layers/vocab_parallel_embedding.py
  class UnquantizedEmbeddingMethod (line 31) | class UnquantizedEmbeddingMethod(QuantizeMethodBase):
    method create_weights (line 34) | def create_weights(
    method apply (line 58) | def apply(
    method embedding (line 63) | def embedding(self, layer: torch.nn.Module, input_: torch.Tensor) -> t...
  function pad_vocab_size (line 67) | def pad_vocab_size(vocab_size: int, pad_to: int = DEFAULT_VOCAB_PADDING_...
  function vocab_range_from_per_partition_vocab_size (line 72) | def vocab_range_from_per_partition_vocab_size(
  function vocab_range_from_global_vocab_size (line 80) | def vocab_range_from_global_vocab_size(
  class VocabParallelEmbeddingShardIndices (line 90) | class VocabParallelEmbeddingShardIndices:
    method num_org_elements (line 104) | def num_org_elements(self) -> int:
    method num_added_elements (line 108) | def num_added_elements(self) -> int:
    method num_org_elements_padded (line 112) | def num_org_elements_padded(self) -> int:
    method num_added_elements_padded (line 116) | def num_added_elements_padded(self) -> int:
    method num_org_vocab_padding (line 120) | def num_org_vocab_padding(self) -> int:
    method num_added_vocab_padding (line 124) | def num_added_vocab_padding(self) -> int:
    method num_elements_padded (line 128) | def num_elements_padded(self) -> int:
    method __post_init__ (line 131) | def __post_init__(self):
  function get_masked_input_and_mask (line 153) | def get_masked_input_and_mask(
  class VocabParallelEmbedding (line 180) | class VocabParallelEmbedding(torch.nn.Module):
    method __init__ (line 219) | def __init__(
    method _get_indices (line 310) | def _get_indices(
    method get_sharded_to_full_mapping (line 347) | def get_sharded_to_full_mapping(self) -> list[int] | None:
    method weight_loader (line 412) | def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor):
    method forward (line 460) | def forward(self, input_):
    method extra_repr (line 484) | def extra_repr(self) -> str:

FILE: python/sglang/multimodal_gen/runtime/loader/component_loaders/adapter_loader.py
  class AdapterLoader (line 20) | class AdapterLoader(ComponentLoader):
    method load_customized (line 31) | def load_customized(

FILE: python/sglang/multimodal_gen/runtime/loader/component_loaders/bridge_loader.py
  class BridgeLoader (line 22) | class BridgeLoader(ComponentLoader):
    method load_customized (line 30) | def load_customized(

FILE: python/sglang/multimodal_gen/runtime/loader/component_loaders/component_loader.py
  class ComponentLoader (line 32) | class ComponentLoader(ABC):
    method __init_subclass__ (line 43) | def __init_subclass__(cls, **kwargs):
    method __init__ (line 51) | def __init__(self, device=None) -> None:
    method should_offload (line 54) | def should_offload(
    method target_device (line 60) | def target_device(self, should_offload):
    method load (line 70) | def load(
    method load_native (line 140) | def load_native(
    method load_customized (line 174) | def load_customized(
    method _ensure_loaders_registered (line 185) | def _ensure_loaders_registered(cls):
    method for_component_type (line 210) | def for_component_type(
  class ImageProcessorLoader (line 264) | class ImageProcessorLoader(ComponentLoader):
    method load_customized (line 270) | def load_customized(
  class AutoProcessorLoader (line 276) | class AutoProcessorLoader(ComponentLoader):
    method load_customized (line 282) | def load_customized(
  class TokenizerLoader (line 288) | class TokenizerLoader(ComponentLoader):
    method load_customized (line 294) | def load_customized(
  class GenericComponentLoader (line 303) | class GenericComponentLoader(ComponentLoader):
    method __init__ (line 306) | def __init__(self, library="transformers") -> None:
  class PipelineComponentLoader (line 311) | class PipelineComponentLoader:
    method load_component (line 317) | def load_component(

FILE: python/sglang/multimodal_gen/runtime/loader/component_loaders/image_encoder_loader.py
  class ImageEncoderLoader (line 14) | class ImageEncoderLoader(TextEncoderLoader):
    method should_offload (line 18) | def should_offload(self, server_args, model_config: ModelConfig | None...
    method load_customized (line 32) | def load_customized(

FILE: python/sglang/multimodal_gen/runtime/loader/component_loaders/scheduler_loader.py
  class SchedulerLoader (line 14) | class SchedulerLoader(ComponentLoader):
    method load_customized (line 20) | def load_customized(

FILE: python/sglang/multimodal_gen/runtime/loader/component_loaders/text_encoder_loader.py
  class TextEncoderLoader (line 47) | class TextEncoderLoader(ComponentLoader):
    class Source (line 54) | class Source:
    method should_offload (line 69) | def should_offload(self, server_args, model_config: ModelConfig | None...
    method _prepare_weights (line 83) | def _prepare_weights(
    method _get_weights_iterator (line 135) | def _get_weights_iterator(
    method _get_all_weights (line 154) | def _get_all_weights(
    method load_customized (line 175) | def load_customized(
    method load_model (line 209) | def load_model(

FILE: python/sglang/multimodal_gen/runtime/loader/component_loaders/transformer_loader.py
  class TransformerLoader (line 41) | class TransformerLoader(ComponentLoader):
    method get_list_of_safetensors_to_load (line 47) | def get_list_of_safetensors_to_load(
    method _resolve_quant_config (line 78) | def _resolve_quant_config(
    method _resolve_target_param_dtype (line 97) | def _resolve_target_param_dtype(
    method load_customized (line 127) | def load_customized(

FILE: python/sglang/multimodal_gen/runtime/loader/component_loaders/vae_loader.py
  function _convert_conv3d_weights_to_channels_last_3d (line 30) | def _convert_conv3d_weights_to_channels_last_3d(module: nn.Module) -> int:
  class VAELoader (line 49) | class VAELoader(ComponentLoader):
    method should_offload (line 55) | def should_offload(
    method load_customized (line 60) | def load_customized(

FILE: python/sglang/multimodal_gen/runtime/loader/component_loaders/vl_encoder_loader.py
  class VisionLanguageEncoderLoader (line 11) | class VisionLanguageEncoderLoader(ComponentLoader):
    method load_customized (line 17) | def load_customized(

FILE: python/sglang/multimodal_gen/runtime/loader/component_loaders/vocoder_loader.py
  class VocoderLoader (line 23) | class VocoderLoader(ComponentLoader):
    method should_offload (line 27) | def should_offload(
    method load_customized (line 32) | def load_customized(

FILE: python/sglang/multimodal_gen/runtime/loader/fsdp_load.py
  function _make_param_like (line 44) | def _make_param_like(
  function maybe_load_fsdp_model (line 60) | def maybe_load_fsdp_model(
  function shard_model (line 166) | def shard_model(
  function load_model_from_full_model_state_dict (line 231) | def load_model_from_full_model_state_dict(

FILE: python/sglang/multimodal_gen/runtime/loader/utils.py
  function set_default_torch_dtype (line 23) | def set_default_torch_dtype(dtype: torch.dtype):
  function get_param_names_mapping (line 33) | def get_param_names_mapping(
  function hf_to_custom_state_dict (line 95) | def hf_to_custom_state_dict(
  class skip_init_modules (line 142) | class skip_init_modules:
    method __enter__ (line 143) | def __enter__(self):
    method __exit__ (line 150) | def __exit__(self, exc_type, exc_value, traceback):
  function _normalize_component_type (line 156) | def _normalize_component_type(module_type: str) -> str:
  function _clean_hf_config_inplace (line 163) | def _clean_hf_config_inplace(model_config: dict) -> None:
  function _list_safetensors_files (line 175) | def _list_safetensors_files(model_path: str) -> list[str]:
  function get_memory_usage_of_component (line 183) | def get_memory_usage_of_component(module) -> float | None:

FILE: python/sglang/multimodal_gen/runtime/loader/weight_utils.py
  class DisabledTqdm (line 39) | class DisabledTqdm(tqdm):
    method __init__ (line 41) | def __init__(self, *args, **kwargs):
  function get_lock (line 46) | def get_lock(model_name_or_path: str | Path, cache_dir: str | None = None):
  function filter_duplicate_safetensors_files (line 64) | def filter_duplicate_safetensors_files(
  function filter_files_not_needed_for_inference (line 85) | def filter_files_not_needed_for_inference(hf_weights_files: list[str]) -...
  function _validate_safetensors_file (line 111) | def _validate_safetensors_file(file_path: str) -> bool:
  function safetensors_weights_iterator (line 135) | def safetensors_weights_iterator(
  function _load_pt_file (line 203) | def _load_pt_file(bin_file: str, device: str) -> dict:
  function pt_weights_iterator (line 223) | def pt_weights_iterator(
  function default_weight_loader (line 243) | def default_weight_loader(param: torch.Tensor, loaded_weight: torch.Tens...
  function maybe_remap_kv_scale_name (line 264) | def maybe_remap_kv_scale_name(name: str, params_dict: dict) -> str | None:
  function compute_weights_checksum (line 326) | def compute_weights_checksum(

FILE: python/sglang/multimodal_gen/runtime/loader/weights_updater.py
  function get_updatable_modules (line 64) | def get_updatable_modules(pipeline) -> dict[str, torch.nn.Module]:
  function _get_weights_iter (line 81) | def _get_weights_iter(weights_dir: str):
  function _validate_weight_files (line 89) | def _validate_weight_files(
  function _load_weights_into_module (line 110) | def _load_weights_into_module(module: torch.nn.Module, weights_iter) -> ...
  function load_weights_into_model (line 131) | def load_weights_into_model(weights_iter, model_params: dict) -> None:
  class WeightsUpdater (line 152) | class WeightsUpdater:
    method __init__ (line 161) | def __init__(self, pipeline):
    method update_weights_from_disk (line 164) | def update_weights_from_disk(
    method _collect_modules (line 222) | def _collect_modules(
    method _apply_weights (line 245) | def _apply_weights(
    method _rollback (line 276) | def _rollback(self, updated_modules: list[str]) -> None:

FILE: python/sglang/multimodal_gen/runtime/managers/forward_context.py
  class ForwardContext (line 32) | class ForwardContext:
    method set_attn_backend_cls (line 42) | def set_attn_backend_cls(self, attention_backend_cls: Type):
  function get_forward_context (line 55) | def get_forward_context() -> "ForwardContext":
  function set_forward_context (line 66) | def set_forward_context(

FILE: python/sglang/multimodal_gen/runtime/managers/gpu_worker.py
  class GPUWorker (line 65) | class GPUWorker:
    method __init__ (line 70) | def __init__(
    method init_device_and_model (line 93) | def init_device_and_model(self) -> None:
    method do_mem_analysis (line 161) | def do_mem_analysis(self, output_batch: OutputBatch):
    method execute_forward (line 210) | def execute_forward(self, batch: List[Req]) -> OutputBatch:
    method get_can_stay_resident_components (line 306) | def get_can_stay_resident_components(
    method set_lora (line 340) | def set_lora(
    method merge_lora_weights (line 362) | def merge_lora_weights(
    method unmerge_lora_weights (line 377) | def unmerge_lora_weights(self, target: str = "all") -> OutputBatch:
    method list_loras (line 389) | def list_loras(self) -> OutputBatch:
    method update_weights_from_disk (line 402) | def update_weights_from_disk(
    method get_weights_checksum (line 423) | def get_weights_checksum(
  function _oom_exceptions (line 459) | def _oom_exceptions():
  function run_scheduler_process (line 467) | def run_scheduler_process(

FILE: python/sglang/multimodal_gen/runtime/managers/scheduler.py
  class Scheduler (line 45) | class Scheduler:
    method __init__ (line 52) | def __init__(
    method _handle_set_lora (line 116) | def _handle_set_lora(self, reqs: List[Any]) -> OutputBatch:
    method _handle_merge_lora (line 124) | def _handle_merge_lora(self, reqs: List[Any]):
    method _handle_unmerge_lora (line 128) | def _handle_unmerge_lora(self, reqs: List[Any]) -> OutputBatch:
    method _handle_list_loras (line 132) | def _handle_list_loras(self, _reqs: List[Any]) -> OutputBatch:
    method _handle_shutdown (line 135) | def _handle_shutdown(self, _reqs: List[Any]) -> OutputBatch:
    method _handle_update_weights_from_disk (line 139) | def _handle_update_weights_from_disk(self, reqs: List[Any]) -> OutputB...
    method _handle_get_weights_checksum (line 152) | def _handle_get_weights_checksum(self, reqs: List[Any]) -> OutputBatch:
    method _handle_generation (line 158) | def _handle_generation(self, reqs: List[Req]):
    method return_result (line 170) | def return_result(
    method get_next_batch_to_run (line 182) | def get_next_batch_to_run(self) -> list[tuple[bytes, Req]] | None:
    method prepare_server_warmup_reqs (line 192) | def prepare_server_warmup_reqs(self):
    method process_received_reqs_with_req_based_warmup (line 240) | def process_received_reqs_with_req_based_warmup(
    method recv_reqs (line 262) | def recv_reqs(self) -> List[tuple[bytes, Any]]:
    method event_loop (line 320) | def event_loop(self) -> None:
    method _broadcast_task (line 422) | def _broadcast_task(self, payload: dict[str, Any]) -> None:
    method _collect_slave_results (line 430) | def _collect_slave_results(self) -> List[dict[str, Any]]:

FILE: python/sglang/multimodal_gen/runtime/models/adapter/ltx_2_connector.py
  function apply_interleaved_rotary_emb (line 15) | def apply_interleaved_rotary_emb(
  function apply_split_rotary_emb (line 25) | def apply_split_rotary_emb(
  class LTX2Attention (line 72) | class LTX2Attention(torch.nn.Module):
    method __init__ (line 78) | def __init__(
    method forward (line 148) | def forward(
  class LTX2RotaryPosEmbed1d (line 203) | class LTX2RotaryPosEmbed1d(nn.Module):
    method __init__ (line 208) | def __init__(
    method forward (line 230) | def forward(
  class LTX2TransformerBlock1d (line 307) | class LTX2TransformerBlock1d(nn.Module):
    method __init__ (line 308) | def __init__(
    method forward (line 331) | def forward(
  class LTX2ConnectorTransformer1d (line 352) | class LTX2ConnectorTransformer1d(nn.Module):
    method __init__ (line 360) | def __init__(
    method forward (line 413) | def forward(
  class LTX2TextConnectors (line 487) | class LTX2TextConnectors(nn.Module):
    method __init__ (line 493) | def __init__(
    method forward (line 544) | def forward(

FILE: python/sglang/multimodal_gen/runtime/models/bridges/mova_dual_tower.py
  function compute_rope_cos_sin (line 37) | def compute_rope_cos_sin(
  class PerFrameAttentionPooling (line 85) | class PerFrameAttentionPooling(nn.Module):
    method __init__ (line 93) | def __init__(self, dim: int, num_heads: int, eps: float = 1e-6):
    method forward (line 107) | def forward(self, x: torch.Tensor, grid_size: Tuple[int, int, int]) ->...
  class CrossModalInteractionController (line 136) | class CrossModalInteractionController:
    method __init__ (line 143) | def __init__(self, visual_layers: int = 30, audio_layers: int = 30):
    method get_interaction_layers (line 148) | def get_interaction_layers(
    method should_interact (line 179) | def should_interact(
  class ConditionalCrossAttention (line 188) | class ConditionalCrossAttention(nn.Module):
    method __init__ (line 196) | def __init__(self, dim: int, kv_dim: int, num_heads: int, eps: float =...
    method forward (line 226) | def forward(
  class AdaLayerNorm (line 300) | class AdaLayerNorm(nn.Module):
    method __init__ (line 305) | def __init__(
    method forward (line 328) | def forward(
  class ConditionalCrossAttentionBlock (line 352) | class ConditionalCrossAttentionBlock(nn.Module):
    method __init__ (line 355) | def __init__(
    method forward (line 375) | def forward(
  class DualTowerConditionalBridge (line 398) | class DualTowerConditionalBridge(
    method __init__ (line 417) | def __init__(
    method build_aligned_freqs (line 512) | def build_aligned_freqs(
    method should_interact (line 574) | def should_interact(self, layer_idx: int, direction: str) -> bool:
    method apply_conditional_control (line 579) | def apply_conditional_control(
    method forward (line 625) | def forward(

FILE: python/sglang/multimodal_gen/runtime/models/dits/base.py
  class BaseDiT (line 21) | class BaseDiT(nn.Module, ABC):
    method __init_subclass__ (line 34) | def __init_subclass__(cls) -> None:
    method __init__ (line 47) | def __init__(self, config: DiTConfig, hf_config: dict[str, Any], **kwa...
    method forward (line 57) | def forward(
    method __post_init__ (line 68) | def __post_init__(self) -> None:
    method supported_attention_backends (line 77) | def supported_attention_backends(self) -> set[AttentionBackendEnum]:
    method device (line 81) | def device(self) -> torch.device:
  class CachableDiT (line 86) | class CachableDiT(TeaCacheMixin, BaseDiT):
    method __init__ (line 107) | def __init__(self, config: DiTConfig, **kwargs) -> None:
    method get_nunchaku_quant_rules (line 112) | def get_nunchaku_quant_rules(cls) -> dict[str, dict[str, Any]]:

FILE: python/sglang/multimodal_gen/runtime/models/dits/causal_wanvideo.py
  class CausalWanSelfAttention (line 60) | class CausalWanSelfAttention(nn.Module):
    method __init__ (line 62) | def __init__(
    method forward (line 100) | def forward(
  class CausalWanTransformerBlock (line 253) | class CausalWanTransformerBlock(nn.Module):
    method __init__ (line 255) | def __init__(
    method forward (line 331) | def forward(
  class CausalWanTransformer3DModel (line 432) | class CausalWanTransformer3DModel(BaseDiT, OffloadableDiTMixin):
    method __init__ (line 440) | def __init__(
    method _prepare_blockwise_causal_attn_mask (line 525) | def _prepare_blockwise_causal_attn_mask(
    method _forward_inference (line 597) | def _forward_inference(
    method _forward_train (line 739) | def _forward_train(
    method forward (line 869) | def forward(self, *args, **kwargs):

FILE: python/sglang/multimodal_gen/runtime/models/dits/flux.py
  function _fused_gelu_mlp (line 83) | def _fused_gelu_mlp(
  function _get_qkv_projections (line 182) | def _get_qkv_projections(
  class FluxAttention (line 208) | class FluxAttention(torch.nn.Module, AttentionModuleMixin):
    method __init__ (line 209) | def __init__(
    method forward (line 343) | def forward(
  class FluxSingleTransformerBlock (line 424) | class FluxSingleTransformerBlock(nn.Module):
    method __init__ (line 425) | def __init__(
    method forward (line 499) | def forward(
  class FluxTransformerBlock (line 562) | class FluxTransformerBlock(nn.Module):
    method __init__ (line 563) | def __init__(
    method forward (line 620) | def forward(
  class FluxPosEmbed (line 695) | class FluxPosEmbed(nn.Module):
    method __init__ (line 697) | def __init__(self, theta: int, axes_dim: List[int]):
    method forward (line 711) | def forward(self, ids: torch.Tensor) -> tuple[torch.Tensor, torch.Tens...
  class FluxTransformer2DModel (line 719) | class FluxTransformer2DModel(CachableDiT, OffloadableDiTMixin):
    method get_nunchaku_quant_rules (line 729) | def get_nunchaku_quant_rules(cls) -> dict[str, list[str]]:
    method __init__ (line 760) | def __init__(
    method forward (line 838) | def forward(

FILE: python/sglang/multimodal_gen/runtime/models/dits/flux_2.py
  function _get_qkv_projections (line 42) | def _get_qkv_projections(
  class Flux2SwiGLU (line 58) | class Flux2SwiGLU(nn.Module):
    method __init__ (line 64) | def __init__(self):
    method forward (line 68) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class Flux2FeedForward (line 74) | class Flux2FeedForward(nn.Module):
    method __init__ (line 75) | def __init__(
    method forward (line 98) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class Flux2Attention (line 105) | class Flux2Attention(torch.nn.Module, AttentionModuleMixin):
    method __init__ (line 106) | def __init__(
    method forward (line 213) | def forward(
  class Flux2ParallelSelfAttention (line 294) | class Flux2ParallelSelfAttention(torch.nn.Module, AttentionModuleMixin):
    method __init__ (line 306) | def __init__(
    method forward (line 367) | def forward(
  class Flux2SingleTransformerBlock (line 421) | class Flux2SingleTransformerBlock(nn.Module):
    method __init__ (line 422) | def __init__(
    method forward (line 452) | def forward(
  class Flux2TransformerBlock (line 495) | class Flux2TransformerBlock(nn.Module):
    method __init__ (line 496) | def __init__(
    method forward (line 535) | def forward(
  class Flux2TimestepGuidanceEmbeddings (line 607) | class Flux2TimestepGuidanceEmbeddings(nn.Module):
    method __init__ (line 608) | def __init__(
    method forward (line 633) | def forward(
  class Flux2Modulation (line 652) | class Flux2Modulation(nn.Module):
    method __init__ (line 653) | def __init__(self, dim: int, mod_param_sets: int = 2, bias: bool = Fal...
    method forward (line 662) | def forward(
  class Flux2PosEmbed (line 677) | class Flux2PosEmbed(nn.Module):
    method __init__ (line 678) | def __init__(self, theta: int, axes_dim: List[int]):
    method forward (line 692) | def forward(self, ids: torch.Tensor) -> tuple[torch.Tensor, torch.Tens...
  class Flux2Transformer2DModel (line 700) | class Flux2Transformer2DModel(CachableDiT, OffloadableDiTMixin):
    method __init__ (line 710) | def __init__(
    method forward (line 816) | def forward(

FILE: python/sglang/multimodal_gen/runtime/models/dits/glm_image.py
  class GlmImageLayerKVCache (line 53) | class GlmImageLayerKVCache:
    method __init__ (line 56) | def __init__(self):
    method store (line 61) | def store(self, k: torch.Tensor, v: torch.Tensor):
    method get (line 69) | def get(self):
    method clear (line 72) | def clear(self):
  class GlmImageKVCache (line 78) | class GlmImageKVCache:
    method __init__ (line 81) | def __init__(self, num_layers: int):
    method __getitem__ (line 85) | def __getitem__(self, layer_idx: int) -> GlmImageLayerKVCache:
    method set_mode (line 88) | def set_mode(self, mode: Optional[str]):
    method clear (line 96) | def clear(self):
  class GlmImageTimestepEmbedding (line 101) | class GlmImageTimestepEmbedding(nn.Module):
    method __init__ (line 107) | def __init__(
    method forward (line 126) | def forward(self, sample: torch.Tensor) -> torch.Tensor:
  class GlmImageTextProjection (line 133) | class GlmImageTextProjection(nn.Module):
    method __init__ (line 139) | def __init__(
    method forward (line 158) | def forward(self, caption: torch.Tensor) -> torch.Tensor:
  class GlmImageCombinedTimestepSizeEmbeddings (line 165) | class GlmImageCombinedTimestepSizeEmbeddings(nn.Module):
    method __init__ (line 166) | def __init__(
    method forward (line 188) | def forward(
  class GlmImageImageProjector (line 218) | class GlmImageImageProjector(nn.Module):
    method __init__ (line 219) | def __init__(
    method forward (line 230) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class GlmImageAdaLayerNormZero (line 251) | class GlmImageAdaLayerNormZero(nn.Module):
    method __init__ (line 252) | def __init__(self, embedding_dim: int, dim: int) -> None:
    method forward (line 259) | def forward(
  class GlmImageAttention (line 308) | class GlmImageAttention(torch.nn.Module):
    method __init__ (line 309) | def __init__(
    method forward (line 379) | def forward(
  class GlmImageTransformerBlock (line 474) | class GlmImageTransformerBlock(nn.Module):
    method __init__ (line 475) | def __init__(
    method forward (line 513) | def forward(
  class GlmImageRotaryPosEmbed (line 581) | class GlmImageRotaryPosEmbed(nn.Module):
    method __init__ (line 582) | def __init__(self, dim: int, patch_size: int, theta: float = 10000.0) ...
    method forward (line 589) | def forward(self, hidden_states: torch.Tensor) -> Tuple[torch.Tensor, ...
  class GlmImageAdaLayerNormContinuous (line 632) | class GlmImageAdaLayerNormContinuous(nn.Module):
    method __init__ (line 638) | def __init__(
    method forward (line 660) | def forward(
  class GlmImageTransformer2DModel (line 670) | class GlmImageTransformer2DModel(CachableDiT, OffloadableDiTMixin):
    method __init__ (line 702) | def __init__(
    method forward (line 785) | def forward(

FILE: python/sglang/multimodal_gen/runtime/models/dits/helios.py
  function pad_for_3d_conv (line 61) | def pad_for_3d_conv(x, kernel_size):
  function center_down_sample_3d (line 71) | def center_down_sample_3d(x, kernel_size):
  function apply_rotary_emb_transposed (line 76) | def apply_rotary_emb_transposed(hidden_states, freqs_cis):
  class HeliosOutputNorm (line 91) | class HeliosOutputNorm(nn.Module):
    method __init__ (line 92) | def __init__(self, dim: int, eps: float = 1e-6):
    method forward (line 97) | def forward(self, hidden_states, temb, original_context_length):
  class HeliosRotaryPosEmbed (line 116) | class HeliosRotaryPosEmbed(nn.Module):
    method __init__ (line 119) | def __init__(self, rope_dim, theta):
    method _get_freqs_base (line 129) | def _get_freqs_base(self, dim):
    method _ensure_freqs_base (line 135) | def _ensure_freqs_base(self, device):
    method get_frequency_batched (line 143) | def get_frequency_batched(self, freqs_base, pos):
    method _get_spatial_meshgrid (line 150) | def _get_spatial_meshgrid(self, height, width, device_str):
    method forward (line 158) | def forward(self, frame_indices, height, width, device):
  class HeliosTimeTextEmbedding (line 201) | class HeliosTimeTextEmbedding(nn.Module):
    method __init__ (line 204) | def __init__(self, dim, time_freq_dim, time_proj_dim, text_embed_dim):
    method forward (line 214) | def forward(
  class HeliosSelfAttention (line 231) | class HeliosSelfAttention(nn.Module):
    method __init__ (line 234) | def __init__(
    method forward (line 284) | def forward(self, hidden_states, rotary_emb=None, original_context_len...
  class HeliosCrossAttention (line 333) | class HeliosCrossAttention(nn.Module):
    method __init__ (line 336) | def __init__(
    method forward (line 373) | def forward(self, hidden_states, encoder_hidden_states):
  class HeliosTransformerBlock (line 400) | class HeliosTransformerBlock(nn.Module):
    method __init__ (line 406) | def __init__(
    method forward (line 455) | def forward(
  class HeliosTransformer3DModel (line 527) | class HeliosTransformer3DModel(CachableDiT, OffloadableDiTMixin):
    method __init__ (line 542) | def __init__(
    method forward (line 640) | def forward(

FILE: python/sglang/multimodal_gen/runtime/models/dits/hunyuan3d.py
  class MixedRowParallelLinear (line 33) | class MixedRowParallelLinear(RowParallelLinear):
    method __init__ (line 36) | def __init__(self, input_sizes: list[int], output_size: int, **kwargs):
    method weight_loader (line 40) | def weight_loader(self, param: nn.Parameter, loaded_weight: torch.Tens...
  function _flux_timestep_embedding (line 56) | def _flux_timestep_embedding(
  class _FluxGELU (line 77) | class _FluxGELU(nn.Module):
    method __init__ (line 78) | def __init__(self, approximate="tanh"):
    method forward (line 82) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class _FluxMLPEmbedder (line 86) | class _FluxMLPEmbedder(nn.Module):
    method __init__ (line 87) | def __init__(self, in_dim: int, hidden_dim: int):
    method forward (line 93) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class _FluxRMSNorm (line 97) | class _FluxRMSNorm(nn.Module):
    method __init__ (line 98) | def __init__(self, dim: int):
    method forward (line 102) | def forward(self, x: torch.Tensor):
  class _FluxQKNorm (line 109) | class _FluxQKNorm(nn.Module):
    method __init__ (line 110) | def __init__(self, dim: int):
    method forward (line 115) | def forward(
  class _FluxSelfAttention (line 123) | class _FluxSelfAttention(nn.Module):
    method __init__ (line 124) | def __init__(
    method forward (line 155) | def forward(self, x: torch.Tensor, pe: torch.Tensor) -> torch.Tensor:
  class _FluxModulationOut (line 173) | class _FluxModulationOut:
  class _FluxModulation (line 179) | class _FluxModulation(nn.Module):
    method __init__ (line 180) | def __init__(self, dim: int, double: bool):
    method forward (line 186) | def forward(
  class _FluxDoubleStreamBlock (line 198) | class _FluxDoubleStreamBlock(nn.Module):
    method __init__ (line 199) | def __init__(
    method forward (line 250) | def forward(
  class _FluxSingleStreamBlock (line 307) | class _FluxSingleStreamBlock(nn.Module):
    method __init__ (line 313) | def __init__(
    method forward (line 364) | def forward(
  class _FluxLastLayer (line 392) | class _FluxLastLayer(nn.Module):
    method __init__ (line 393) | def __init__(self, hidden_size: int, patch_size: int, out_channels: int):
    method forward (line 403) | def forward(self, x: torch.Tensor, vec: torch.Tensor) -> torch.Tensor:
  class Hunyuan3D2DiT (line 410) | class Hunyuan3D2DiT(CachableDiT, OffloadableDiTMixin):
    method build_config_from_params (line 418) | def build_config_from_params(cls, params: dict) -> Hunyuan3DDiTConfig:
    method __init__ (line 435) | def __init__(
    method forward (line 520) | def forward(
  function _chunked_feed_forward (line 576) | def _chunked_feed_forward(
  class SGLangAttentionWrapper (line 595) | class SGLangAttentionWrapper(torch.nn.Module):
    method __init__ (line 600) | def __init__(
    method forward (line 641) | def forward(
  class Basic2p5DTransformerBlock (line 671) | class Basic2p5DTransformerBlock(torch.nn.Module):
    method __init__ (line 674) | def __init__(
    method _initialize_attn_weights (line 716) | def _initialize_attn_weights(self):
    method __getattr__ (line 731) | def __getattr__(self, name: str):
    method forward (line 737) | def forward(
  function compute_voxel_grid_mask (line 965) | def compute_voxel_grid_mask(position: torch.Tensor, grid_resolution: int...
  function compute_multi_resolution_mask (line 1010) | def compute_multi_resolution_mask(
  function compute_discrete_voxel_indice (line 1026) | def compute_discrete_voxel_indice(
  function compute_multi_resolution_discrete_voxel_indice (line 1063) | def compute_multi_resolution_discrete_voxel_indice(
  class UNet2p5DConditionModel (line 1085) | class UNet2p5DConditionModel(torch.nn.Module):
    method __init__ (line 1088) | def __init__(self, unet: UNet2DConditionModel) -> None:
    method from_pretrained (line 1108) | def from_pretrained(pretrained_model_name_or_path: str, **kwargs):
    method init_condition (line 1126) | def init_condition(self):
    method init_camera_embedding (line 1142) | def init_camera_embedding(self):
    method init_attention (line 1152) | def init_attention(
    method _iter_2p5d_blocks (line 1240) | def _iter_2p5d_blocks(unet):
    method __getattr__ (line 1251) | def __getattr__(self, name: str):
    method forward (line 1257) | def forward(

FILE: python/sglang/multimodal_gen/runtime/models/dits/hunyuanvideo.py
  class MMDoubleStreamBlock (line 49) | class MMDoubleStreamBlock(nn.Module):
    method __init__ (line 55) | def __init__(
    method forward (line 177) | def forward(
  class MMSingleStreamBlock (line 274) | class MMSingleStreamBlock(nn.Module):
    method __init__ (line 280) | def __init__(
    method forward (line 353) | def forward(
  class HunyuanVideoTransformer3DModel (line 413) | class HunyuanVideoTransformer3DModel(CachableDiT, OffloadableDiTMixin):
    method __init__ (line 435) | def __init__(
    method forward (line 562) | def forward(
    method maybe_cache_states (line 679) | def maybe_cache_states(
    method should_skip_forward_for_cached_states (line 684) | def should_skip_forward_for_cached_states(self, **kwargs) -> bool:
    method retrieve_cached_states (line 783) | def retrieve_cached_states(self, hidden_states: torch.Tensor) -> torch...
  class SingleTokenRefiner (line 787) | class SingleTokenRefiner(nn.Module):
    method __init__ (line 793) | def __init__(
    method forward (line 843) | def forward(self, x, t):
  class IndividualTokenRefinerBlock (line 861) | class IndividualTokenRefinerBlock(nn.Module):
    method __init__ (line 866) | def __init__(
    method forward (line 934) | def forward(self, x, c):
  class FinalLayer (line 960) | class FinalLayer(nn.Module):
    method __init__ (line 965) | def __init__(
    method forward (line 994) | def forward(self, x, c):

FILE: python/sglang/multimodal_gen/runtime/models/dits/ltx_2.py
  function apply_interleaved_rotary_emb (line 41) | def apply_interleaved_rotary_emb(
  function apply_split_rotary_emb (line 50) | def apply_split_rotary_emb(
  class LTX2AudioVideoRotaryPosEmbed (line 93) | class LTX2AudioVideoRotaryPosEmbed(nn.Module):
    method __init__ (line 94) | def __init__(
    method prepare_video_coords (line 147) | def prepare_video_coords(
    method prepare_audio_coords (line 202) | def prepare_audio_coords(
    method prepare_coords (line 236) | def prepare_coords(self, *args, **kwargs):
    method forward (line 241) | def forward(
  function rms_norm (line 317) | def rms_norm(x: torch.Tensor, eps: float) -> torch.Tensor:
  class LTX2TextProjection (line 321) | class LTX2TextProjection(nn.Module):
    method __init__ (line 322) | def __init__(
    method forward (line 347) | def forward(self, caption: torch.Tensor) -> torch.Tensor:
  class LTX2TimestepEmbedder (line 354) | class LTX2TimestepEmbedder(nn.Module):
    method __init__ (line 355) | def __init__(self, embedding_dim: int, in_channels: int = 256) -> None:
    method forward (line 364) | def forward(self, t_emb: torch.Tensor) -> torch.Tensor:
  class LTX2PixArtAlphaCombinedTimestepSizeEmbeddings (line 371) | class LTX2PixArtAlphaCombinedTimestepSizeEmbeddings(nn.Module):
    method __init__ (line 372) | def __init__(self, embedding_dim: int) -> None:
    method forward (line 376) | def forward(
  class LTX2AdaLayerNormSingle (line 386) | class LTX2AdaLayerNormSingle(nn.Module):
    method __init__ (line 387) | def __init__(self, embedding_dim: int, embedding_coefficient: int = 6)...
    method forward (line 398) | def forward(
  class LTX2TPRMSNormAcrossHeads (line 408) | class LTX2TPRMSNormAcrossHeads(nn.Module):
    method __init__ (line 409) | def __init__(
    method forward (line 428) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class LTX2Attention (line 444) | class LTX2Attention(nn.Module):
    method __init__ (line 445) | def __init__(
    method forward (line 545) | def forward(
    method _slice_rope_for_tp (line 617) | def _slice_rope_for_tp(
  class LTX2FeedForward (line 650) | class LTX2FeedForward(nn.Module):
    method __init__ (line 651) | def __init__(
    method forward (line 671) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class LTX2TransformerBlock (line 678) | class LTX2TransformerBlock(nn.Module):
    method __init__ (line 679) | def __init__(
    method get_ada_values (line 786) | def get_ada_values(
    method forward (line 805) | def forward(
  class LTX2VideoTransformer3DModel (line 985) | class LTX2VideoTransformer3DModel(CachableDiT, OffloadableDiTMixin):
    method _validate_tp_config (line 993) | def _validate_tp_config(self, *, arch: LTX2ArchConfig, tp_size: int) -...
    method __init__ (line 1043) | def __init__(
    method forward (line 1261) | def forward(

FILE: python/sglang/multimodal_gen/runtime/models/dits/mova_audio_dit.py
  function legacy_precompute_freqs_cis_1d (line 29) | def legacy_precompute_freqs_cis_1d(
  function precompute_freqs_cis_1d (line 45) | def precompute_freqs_cis_1d(dim: int, end: int = 16384, theta: float = 1...
  class Head (line 50) | class Head(nn.Module):
    method __init__ (line 51) | def __init__(
    method forward (line 61) | def forward(self, x, t_mod):
  class Conv1dLocalIsland (line 78) | class Conv1dLocalIsland(nn.Conv1d):
    method __init__ (line 88) | def __init__(self, *args, **kwargs):
    method forward (line 91) | def forward(self, input):
  class WanAudioModel (line 104) | class WanAudioModel(CachableDiT, OffloadableDiTMixin):
    method __init__ (line 112) | def __init__(
    method _init_freqs (line 201) | def _init_freqs(self):
    method patchify (line 210) | def patchify(
    method unpatchify (line 220) | def unpatchify(self, x: torch.Tensor, grid_size: tuple[int]):
    method forward (line 225) | def forward(

FILE: python/sglang/multimodal_gen/runtime/models/dits/mova_video_dit.py
  function modulate (line 44) | def modulate(x: torch.Tensor, shift: torch.Tensor, scale: torch.Tensor):
  function sinusoidal_embedding_1d (line 48) | def sinusoidal_embedding_1d(dim, position):
  function precompute_freqs_cis_3d (line 62) | def precompute_freqs_cis_3d(dim: int, end: int = 1024, theta: float = 10...
  function precompute_freqs_cis (line 70) | def precompute_freqs_cis(
  function rope_apply (line 82) | def rope_apply(x, freqs, num_heads):
  function rope_apply_head_dim (line 91) | def rope_apply_head_dim(x, freqs, head_dim):
  class SelfAttention (line 101) | class SelfAttention(nn.Module):
    method __init__ (line 110) | def __init__(
    method forward (line 153) | def forward(self, x, freqs):
  class CrossAttention (line 197) | class CrossAttention(nn.Module):
    method __init__ (line 208) | def __init__(
    method forward (line 250) | def forward(self, x: torch.Tensor, y: torch.Tensor):
  class MulAdd (line 283) | class MulAdd(nn.Module):
    method __init__ (line 284) | def __init__(self):
    method forward (line 287) | def forward(self, x, gate, residual):
  class DiTBlock (line 291) | class DiTBlock(nn.Module):
    method __init__ (line 292) | def __init__(
    method forward (line 326) | def forward(self, x, context, t_mod, freqs):
  class Head (line 362) | class Head(nn.Module):
    method __init__ (line 363) | def __init__(
    method forward (line 376) | def forward(self, x, t_mod):
  class Conv3dLocalIsland (line 391) | class Conv3dLocalIsland(nn.Conv3d):
    method __init__ (line 403) | def __init__(self, *args, **kwargs):
    method forward (line 406) | def forward(self, input):
  class WanModel (line 421) | class WanModel(CachableDiT, OffloadableDiTMixin):
    method __init__ (line 429) | def __init__(
    method _init_freqs (line 515) | def _init_freqs(self):
    method patchify (line 521) | def patchify(
    method unpatchify (line 531) | def unpatchify(self, x: torch.Tensor, grid_size: tuple[int, int, int]):
    method forward (line 543) | def forward(

FILE: python/sglang/multimodal_gen/runtime/models/dits/qwen_image.py
  function _get_qkv_projections (line 59) | def _get_qkv_projections(
  class QwenTimestepProjEmbeddings (line 87) | class QwenTimestepProjEmbeddings(nn.Module):
    method __init__ (line 88) | def __init__(self, embedding_dim, use_additional_t_cond=False):
    method forward (line 101) | def forward(self, timestep, hidden_states, addition_t_cond=None):
  class QwenEmbedRope (line 120) | class QwenEmbedRope(nn.Module):
    method __init__ (line 121) | def __init__(self, theta: int, axes_dim: List[int], scale_rope=False):
    method rope_params (line 147) | def rope_params(self, index, dim, theta=10000):
    method forward (line 167) | def forward(
    method _compute_video_freqs (line 234) | def _compute_video_freqs(
  class QwenEmbedLayer3DRope (line 279) | class QwenEmbedLayer3DRope(nn.Module):
    method __init__ (line 280) | def __init__(self, theta: int, axes_dim: List[int], scale_rope=False):
    method rope_params (line 305) | def rope_params(self, index, dim, theta=10000):
    method forward (line 325) | def forward(self, video_fhw, txt_seq_lens, device):
    method _compute_video_freqs (line 389) | def _compute_video_freqs(self, frame, height, width, idx=0):
    method _compute_condition_freqs (line 432) | def _compute_condition_freqs(self, frame, height, width):
  class QwenImageCrossAttention (line 473) | class QwenImageCrossAttention(nn.Module):
    method __init__ (line 474) | def __init__(
    method forward (line 582) | def forward(
  class QwenImageTransformerBlock (line 671) | class QwenImageTransformerBlock(nn.Module):
    method __init__ (line 672) | def __init__(
    method _modulate (line 777) | def _modulate(
    method forward (line 866) | def forward(
  function to_hashable (line 975) | def to_hashable(obj):
  class QwenImageTransformer2DModel (line 981) | class QwenImageTransformer2DModel(CachableDiT, OffloadableDiTMixin):
    method get_nunchaku_quant_rules (line 995) | def get_nunchaku_quant_rules(cls) -> dict[str, list[str]]:
    method __init__ (line 1017) | def __init__(
    method build_modulate_index (line 1090) | def build_modulate_index(self, img_shapes: tuple[int, int, int], device):
    method forward (line 1101) | def forward(

FILE: python/sglang/multimodal_gen/runtime/models/dits/sana.py
  class SanaCombinedTimestepSizeEmbeddings (line 18) | class SanaCombinedTimestepSizeEmbeddings(nn.Module):
    method __init__ (line 19) | def __init__(self, embedding_dim):
    method forward (line 28) | def forward(self, timestep, hidden_dtype=None):
  class SanaAdaLayerNormSingle (line 36) | class SanaAdaLayerNormSingle(nn.Module):
    method __init__ (line 37) | def __init__(self, embedding_dim):
    method forward (line 43) | def forward(self, timestep, hidden_dtype=None):
  class SanaModulatedNorm (line 49) | class SanaModulatedNorm(nn.Module):
    method __init__ (line 50) | def __init__(self, dim, eps=1e-6):
    method forward (line 54) | def forward(self, x, temb, scale_shift_table):
  class GLUMBConv (line 61) | class GLUMBConv(nn.Module):
    method __init__ (line 64) | def __init__(self, in_channels, out_channels, expand_ratio=2.5):
    method forward (line 79) | def forward(self, hidden_states):
  class SanaLinearAttention (line 89) | class SanaLinearAttention(nn.Module):
    method __init__ (line 92) | def __init__(self, query_dim, num_heads, head_dim, qk_norm_dim, bias=F...
    method forward (line 107) | def forward(self, hidden_states):
  class SanaCrossAttention (line 135) | class SanaCrossAttention(nn.Module):
    method __init__ (line 136) | def __init__(self, query_dim, cross_attention_dim, num_heads, head_dim...
    method forward (line 152) | def forward(
  class SanaTransformerBlock (line 182) | class SanaTransformerBlock(nn.Module):
    method __init__ (line 183) | def __init__(
    method forward (line 219) | def forward(
  class SanaTransformer2DModel (line 254) | class SanaTransformer2DModel(CachableDiT, OffloadableDiTMixin):
    method __init__ (line 265) | def __init__(self, config: SanaConfig, hf_config=None, **kwargs):
    method forward (line 326) | def forward(

FILE: python/sglang/multimodal_gen/runtime/models/dits/wanvideo.py
  class WanImageEmbedding (line 67) | class WanImageEmbedding(torch.nn.Module):
    method __init__ (line 69) | def __init__(self, in_features: int, out_features: int):
    method forward (line 76) | def forward(self, encoder_hidden_states_image: torch.Tensor) -> torch....
  class WanTimeTextImageEmbedding (line 84) | class WanTimeTextImageEmbedding(nn.Module):
    method __init__ (line 86) | def __init__(
    method forward (line 107) | def forward(
  class WanSelfAttention (line 127) | class WanSelfAttention(nn.Module):
    method __init__ (line 129) | def __init__(
    method forward (line 199) | def forward(self, x: torch.Tensor, context: torch.Tensor, context_lens...
  class WanT2VCrossAttention (line 207) | class WanT2VCrossAttention(WanSelfAttention):
    method __init__ (line 208) | def __init__(self, *args, **kwargs):
    method forward (line 211) | def forward(self, x, context, context_lens):
  class WanI2VCrossAttention (line 244) | class WanI2VCrossAttention(WanSelfAttention):
    method __init__ (line 246) | def __init__(
    method forward (line 284) | def forward(self, x, context, context_lens):
  class WanTransformerBlock (line 332) | class WanTransformerBlock(nn.Module):
    method __init__ (line 334) | def __init__(
    method forward (line 486) | def forward(
  class WanTransformerBlock_VSA (line 589) | class WanTransformerBlock_VSA(nn.Module):
    method __init__ (line 591) | def __init__(
    method forward (line 728) | def forward(
  class WanTransformer3DModel (line 815) | class WanTransformer3DModel(CachableDiT, OffloadableDiTMixin):
    method __init__ (line 823) | def __init__(
    method _compute_rope_for_sequence_shard (line 928) | def _compute_rope_for_sequence_shard(
    method forward (line 950) | def forward(
    method maybe_cache_states (line 1156) | def maybe_cache_states(
    method should_skip_forward_for_cached_states (line 1166) | def should_skip_forward_for_cached_states(self, **kwargs) -> bool:
    method retrieve_cached_states (line 1209) | def retrieve_cached_states(self, hidden_states: torch.Tensor) -> torch...

FILE: python/sglang/multimodal_gen/runtime/models/dits/zimage.py
  class SelectFirstElement (line 46) | class SelectFirstElement(nn.Module):
    method __init__ (line 47) | def __init__(self):
    method forward (line 50) | def forward(self, x):
  class TimestepEmbedder (line 54) | class TimestepEmbedder(nn.Module):
    method __init__ (line 55) | def __init__(self, out_size, mid_size=None, frequency_embedding_size=2...
    method timestep_embedding (line 75) | def timestep_embedding(t, dim, max_period=10000):
    method forward (line 91) | def forward(self, t):
  class FeedForward (line 101) | class FeedForward(nn.Module):
    method __init__ (line 102) | def __init__(self, dim: int, hidden_dim: int):
    method forward (line 111) | def forward(self, x):
  class ZImageAttention (line 118) | class ZImageAttention(nn.Module):
    method __init__ (line 119) | def __init__(
    method forward (line 213) | def forward(
  class ZImageTransformerBlock (line 274) | class ZImageTransformerBlock(nn.Module):
    method __init__ (line 275) | def __init__(
    method forward (line 342) | def forward(
  class FinalLayer (line 388) | class FinalLayer(nn.Module):
    method __init__ (line 389) | def __init__(self, hidden_size, out_channels):
    method forward (line 402) | def forward(self, x, c):
  class RopeEmbedder (line 410) | class RopeEmbedder:
    method __init__ (line 411) | def __init__(
    method precompute_freqs (line 428) | def precompute_freqs(dim: List[int], end: List[int], theta: float = 25...
    method __call__ (line 445) | def __call__(self, ids: torch.Tensor) -> Tuple[torch.Tensor, torch.Ten...
  class ZImageTransformer2DModel (line 478) | class ZImageTransformer2DModel(CachableDiT, OffloadableDiTMixin):
    method get_nunchaku_quant_rules (line 490) | def get_nunchaku_quant_rules(cls) -> dict[str, list[str]]:
    method __init__ (line 510) | def __init__(
    method unpatchify (line 624) | def unpatchify(
    method create_coordinate_grid (line 644) | def create_coordinate_grid(size, start=None, device=None):
    method patchify_and_embed (line 655) | def patchify_and_embed(
    method forward (line 711) | def forward(

FILE: python/sglang/multimodal_gen/runtime/models/encoders/base.py
  class TextEncoder (line 18) | class TextEncoder(nn.Module, ABC):
    method __init__ (line 25) | def __init__(self, config: TextEncoderConfig) -> None:
    method forward (line 36) | def forward(
    method supported_attention_backends (line 48) | def supported_attention_backends(self) -> set[AttentionBackendEnum]:
  class ImageEncoder (line 52) | class ImageEncoder(nn.Module, ABC):
    method __init__ (line 57) | def __init__(self, config: ImageEncoderConfig) -> None:
    method forward (line 66) | def forward(self, pixel_values: torch.Tensor, **kwargs) -> BaseEncoder...
    method supported_attention_backends (line 70) | def supported_attention_backends(self) -> set[AttentionBackendEnum]:

FILE: python/sglang/multimodal_gen/runtime/models/encoders/bert.py
  class HunyuanClip (line 12) | class HunyuanClip(nn.Module):
    method __init__ (line 18) | def __init__(self, model_dir, max_length=77):
    method forward (line 30) | def forward(self, prompts, with_mask=True):

FILE: python/sglang/multimodal_gen/runtime/models/encoders/clip.py
  class CLIPVisionEmbeddings (line 47) | class CLIPVisionEmbeddings(nn.Module):
    method __init__ (line 49) | def __init__(self, config: CLIPVisionConfig):
    method forward (line 76) | def forward(self, pixel_values: torch.Tensor) -> torch.Tensor:
  class CLIPTextEmbeddings (line 91) | class CLIPTextEmbeddings(nn.Module):
    method __init__ (line 93) | def __init__(self, config: CLIPTextConfig):
    method forward (line 110) | def forward(
  class CLIPAttention (line 143) | class CLIPAttention(nn.Module):
    method __init__ (line 146) | def __init__(
    method _shape (line 193) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
    method forward (line 200) | def forward(
  class CLIPMLP (line 286) | class CLIPMLP(nn.Module):
    method __init__ (line 288) | def __init__(
    method forward (line 312) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class CLIPEncoderLayer (line 320) | class CLIPEncoderLayer(nn.Module):
    method __init__ (line 322) | def __init__(
    method forward (line 338) | def forward(
  class CLIPEncoder (line 360) | class CLIPEncoder(nn.Module):
    method __init__ (line 369) | def __init__(
    method forward (line 395) | def forward(
  class CLIPTextTransformer (line 418) | class CLIPTextTransformer(nn.Module):
    method __init__ (line 420) | def __init__(
    method forward (line 445) | def forward(
  class CLIPTextModel (line 527) | class CLIPTextModel(TextEncoder):
    method __init__ (line 529) | def __init__(
    method forward (line 538) | def forward(
    method load_weights (line 556) | def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) ->...
  class CLIPVisionTransformer (line 593) | class CLIPVisionTransformer(nn.Module):
    method __init__ (line 595) | def __init__(
    method forward (line 637) | def forward(
  class CLIPVisionModel (line 675) | class CLIPVisionModel(ImageEncoder):
    method __init__ (line 680) | def __init__(self, config: CLIPVisionConfig) -> None:
    method forward (line 690) | def forward(
    method device (line 706) | def device(self):
    method load_weights (line 711) | def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) ->...
  class BertModel (line 754) | class BertModel(CLIPTextModel):

FILE: python/sglang/multimodal_gen/runtime/models/encoders/gemma2.py
  class Gemma2RMSNorm (line 42) | class Gemma2RMSNorm(nn.Module):
    method __init__ (line 43) | def __init__(self, dim: int, eps: float = 1e-6):
    method _norm (line 48) | def _norm(self, x):
    method forward (line 51) | def forward(self, x):
  class Gemma2MLP (line 57) | class Gemma2MLP(nn.Module):
    method __init__ (line 58) | def __init__(
    method forward (line 88) | def forward(self, x):
  class Gemma2Attention (line 95) | class Gemma2Attention(nn.Module):
    method __init__ (line 96) | def __init__(
    method forward (line 161) | def forward(
  class Gemma2DecoderLayer (line 233) | class Gemma2DecoderLayer(nn.Module):
    method __init__ (line 234) | def __init__(
    method forward (line 271) | def forward(
  class Gemma2Model (line 292) | class Gemma2Model(nn.Module):
    method __init__ (line 297) | def __init__(self, config: Gemma2Config, **kwargs):
    method get_input_embeddings (line 326) | def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor:
    method forward (line 329) | def forward(
    method load_weights (line 377) | def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) ->...
    method _load_with_shard_id (line 425) | def _load_with_shard_id(weight_loader, param, loaded_weight, shard_id):

FILE: python/sglang/multimodal_gen/runtime/models/encoders/gemma_3.py
  function get_attention_sliding_window_size (line 32) | def get_attention_sliding_window_size(config):
  class Gemma3RMSNorm (line 36) | class Gemma3RMSNorm(nn.Module):
    method __init__ (line 37) | def __init__(self, dim: int, eps: float = 1e-6):
    method _norm (line 42) | def _norm(self, x):
    method forward (line 45) | def forward(self, x):
    method extra_repr (line 50) | def extra_repr(self):
  class Gemma3MLP (line 54) | class Gemma3MLP(nn.Module):
    method __init__ (line 55) | def __init__(
    method forward (line 86) | def forward(self, x):
  function _rotate_half (line 93) | def _rotate_half(x: torch.Tensor) -> torch.Tensor:
  class Gemma3Attention (line 98) | class Gemma3Attention(nn.Module):
    method __init__ (line 99) | def __init__(
    method rotary_emb (line 210) | def rotary_emb(self, positions, q, k):
    method forward (line 227) | def forward(
  class Gemma3DecoderLayer (line 302) | class Gemma3DecoderLayer(nn.Module):
    method __init__ (line 303) | def __init__(
    method forward (line 345) | def forward(
  class Gemma3TextScaledWordEmbedding (line 383) | class Gemma3TextScaledWordEmbedding(nn.Embedding):
    method __init__ (line 384) | def __init__(
    method forward (line 394) | def forward(self, input_ids: torch.Tensor):
  class QuickGELU (line 401) | class QuickGELU(nn.Module):
    method forward (line 402) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class SiglipVisionEmbeddings (line 406) | class SiglipVisionEmbeddings(nn.Module):
    method __init__ (line 407) | def __init__(self, config):
    method forward (line 432) | def forward(self, pixel_values: torch.Tensor) -> torch.Tensor:
  class SiglipMLP (line 443) | class SiglipMLP(nn.Module):
    method __init__ (line 444) | def __init__(
    method forward (line 466) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class SiglipAttention (line 473) | class SiglipAttention(nn.Module):
    method __init__ (line 474) | def __init__(
    method forward (line 515) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class SiglipEncoderLayer (line 534) | class SiglipEncoderLayer(nn.Module):
    method __init__ (line 535) | def __init__(
    method forward (line 561) | def forward(
  class SiglipEncoder (line 577) | class SiglipEncoder(nn.Module):
    method __init__ (line 578) | def __init__(
    method forward (line 600) | def forward(
  class SiglipVisionTransformer (line 610) | class SiglipVisionTransformer(nn.Module):
    method __init__ (line 611) | def __init__(
    method device (line 629) | def device(self) -> torch.device:
    method forward (line 632) | def forward(self, pixel_values: torch.Tensor) -> torch.Tensor:
  class SiglipVisionModel (line 639) | class SiglipVisionModel(nn.Module):
    method __init__ (line 640) | def __init__(
    method device (line 652) | def device(self) -> torch.device:
    method forward (line 655) | def forward(self, pixel_values: torch.Tensor):
  class Gemma3MultiModalProjector (line 659) | class Gemma3MultiModalProjector(nn.Module):
    method __init__ (line 662) | def __init__(self, config: Gemma3Config):
    method forward (line 684) | def forward(self, vision_outputs: torch.Tensor) -> torch.Tensor:
  class Gemma3TextModel (line 710) | class Gemma3TextModel(nn.Module):
    method __init__ (line 711) | def __init__(self, config: Gemma3Config):
    method get_input_embeddings (line 747) | def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor:
    method forward (line 751) | def forward(
    method load_weights (line 803) | def load_weights(self, weights: Any) -> set[str]:
  class Gemma3ForConditionalGeneration (line 893) | class Gemma3ForConditionalGeneration(nn.Module):
    method __init__ (line 894) | def __init__(
    method get_placeholder_mask (line 918) | def get_placeholder_mask(
    method forward (line 937) | def forward(
    method load_weights (line 986) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]) ->...
    method get_attention_sliding_window_size (line 1174) | def get_attention_sliding_window_size(self):

FILE: python/sglang/multimodal_gen/runtime/models/encoders/hunyuan3d.py
  function get_1d_sincos_pos_embed_from_grid (line 15) | def get_1d_sincos_pos_embed_from_grid(embed_dim, pos):
  class ImageEncoder (line 31) | class ImageEncoder(nn.Module):
    method __init__ (line 37) | def __init__(
    method forward (line 72) | def forward(self, image, mask=None, value_range=(-1, 1), **kwargs):
    method unconditional_embedding (line 87) | def unconditional_embedding(self, batch_size, **kwargs):
  class CLIPImageEncoder (line 101) | class CLIPImageEncoder(ImageEncoder):
  class DinoImageEncoder (line 108) | class DinoImageEncoder(ImageEncoder):
  class DinoImageEncoderMV (line 115) | class DinoImageEncoderMV(DinoImageEncoder):
    method __init__ (line 120) | def __init__(
    method forward (line 140) | def forward(self, image, mask=None, value_range=(-1, 1), view_idxs=Non...
    method unconditional_embedding (line 182) | def unconditional_embedding(self, batch_size, view_idxs, **kwargs):
  function build_image_encoder (line 195) | def build_image_encoder(config):
  class DualImageEncoder (line 206) | class DualImageEncoder(nn.Module):
    method __init__ (line 207) | def __init__(
    method forward (line 216) | def forward(self, image, mask=None, **kwargs):
    method unconditional_embedding (line 223) | def unconditional_embedding(self, batch_size, **kwargs):
  class SingleImageEncoder (line 235) | class SingleImageEncoder(nn.Module):
    method __init__ (line 236) | def __init__(
    method forward (line 243) | def forward(self, image, mask=None, **kwargs):
    method unconditional_embedding (line 249) | def unconditional_embedding(self, batch_size, **kwargs):

FILE: python/sglang/multimodal_gen/runtime/models/encoders/llama.py
  class LlamaMLP (line 60) | class LlamaMLP(nn.Module):
    method __init__ (line 62) | def __init__(
    method forward (line 94) | def forward(self, x):
  class LlamaAttention (line 101) | class LlamaAttention(nn.Module):
    method __init__ (line 103) | def __init__(
    method forward (line 192) | def forward(
  class LlamaDecoderLayer (line 220) | class LlamaDecoderLayer(nn.Module):
    method __init__ (line 222) | def __init__(
    method forward (line 277) | def forward(
  class LlamaModel (line 298) | class LlamaModel(TextEncoder):
    method __init__ (line 300) | def __init__(
    method get_input_embeddings (line 341) | def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor:
    method forward (line 344) | def forward(
    method load_weights (line 397) | def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) ->...

FILE: python/sglang/multimodal_gen/runtime/models/encoders/mistral_3.py
  function repeat_kv (line 43) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  class MistralAttention (line 58) | class MistralAttention(nn.Module):
    method __init__ (line 61) | def __init__(self, config: MistralConfig, layer_idx: int):
    method forward (line 106) | def forward(
  class MistralDecoderLayer (line 154) | class MistralDecoderLayer(nn.Module):
    method __init__ (line 155) | def __init__(self, config: MistralConfig, layer_idx: int):
    method forward (line 167) | def forward(
  class MistralModel (line 203) | class MistralModel(nn.Module):
    method __init__ (line 204) | def __init__(self, config: MistralConfig):
    method forward (line 224) | def forward(
  class Mistral3Model (line 298) | class Mistral3Model(nn.Module):
    method __init__ (line 301) | def __init__(self, config: Mistral3Config):
    method get_input_embeddings (line 306) | def get_input_embeddings(self):
    method set_decoder (line 309) | def set_decoder(self, decoder):
    method get_decoder (line 312) | def get_decoder(self):
    method forward (line 315) | def forward(
  class Mistral3ForConditionalGeneration (line 363) | class Mistral3ForConditionalGeneration(nn.Module):
    method __init__ (line 371) | def __init__(self, config: LlavaConfig):
    method get_input_embeddings (line 375) | def get_input_embeddings(self):
    method set_decoder (line 378) | def set_decoder(self, decoder):
    method get_decoder (line 381) | def get_decoder(self):
    method language_model (line 386) | def language_model(self):
    method forward (line 389) | def forward(
    method load_weights (line 434) | def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) ->...

FILE: python/sglang/multimodal_gen/runtime/models/encoders/qwen2_5vl.py
  class Qwen2_5_VLAttention (line 80) | class Qwen2_5_VLAttention(nn.Module):
    method __init__ (line 86) | def __init__(self, config: Qwen2_5_VLTextConfig, layer_idx: Optional[i...
    method forward (line 143) | def forward(
  class Qwen2_5_VLDecoderLayer (line 208) | class Qwen2_5_VLDecoderLayer(nn.Module):
    method __init__ (line 209) | def __init__(self, config: Qwen2_5_VLTextConfig, layer_idx: int):
    method forward (line 230) | def forward(
  class Qwen2_5_VLMLP (line 295) | class Qwen2_5_VLMLP(nn.Module):
    method __init__ (line 296) | def __init__(
    method forward (line 322) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class Qwen2_5_VLTextModel (line 330) | class Qwen2_5_VLTextModel(nn.Module):
    method __init__ (line 331) | def __init__(self, config: PretrainedConfig):
    method forward (line 355) | def forward(
  class Qwen2_5_VLModel (line 502) | class Qwen2_5_VLModel(nn.Module):
    method __init__ (line 509) | def __init__(self, config, enable_image_understanding: bool = False):
    method get_input_embeddings (line 523) | def get_input_embeddings(self):
    method set_input_embeddings (line 526) | def set_input_embeddings(self, value):
    method set_decoder (line 529) | def set_decoder(self, decoder):
    method get_decoder (line 532) | def get_decoder(self):
    method get_rope_index (line 535) | def get_rope_index(
    method get_video_features (line 763) | def get_video_features(
    method get_image_features (line 785) | def get_image_features(
    method get_placeholder_mask (line 812) | def get_placeholder_mask(
    method forward (line 874) | def forward(
  class Qwen2_5_VLForConditionalGeneration (line 1005) | class Qwen2_5_VLForConditionalGeneration(TextEncoder):
    method __init__ (line 1024) | def __init__(
    method get_input_embeddings (line 1044) | def get_input_embeddings(self):
    method forward (line 1048) | def forward(
    method load_weights (line 1125) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
    method get_embed_and_head (line 1152) | def get_embed_and_head(self):

FILE: python/sglang/multimodal_gen/runtime/models/encoders/qwen3.py
  class Qwen3MLP (line 30) | class Qwen3MLP(nn.Module):
    method __init__ (line 33) | def __init__(
    method forward (line 63) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class Qwen3Attention (line 70) | class Qwen3Attention(nn.Module):
    method __init__ (line 76) | def __init__(
    method forward (line 157) | def forward(
  class Qwen3DecoderLayer (line 196) | class Qwen3DecoderLayer(nn.Module):
    method __init__ (line 199) | def __init__(
    method forward (line 239) | def forward(
  class Qwen3ForCausalLM (line 260) | class Qwen3ForCausalLM(TextEncoder):
    method __init__ (line 270) | def __init__(self, config: Qwen3TextConfig) -> None:
    method get_input_embeddings (line 308) | def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor:
    method forward (line 311) | def forward(
    method load_weights (line 360) | def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) ->...

FILE: python/sglang/multimodal_gen/runtime/models/encoders/t5.py
  class AttentionType (line 51) | class AttentionType:
  class AttentionMetadata (line 68) | class AttentionMetadata:
  class T5DenseActDense (line 72) | class T5DenseActDense(nn.Module):
    method __init__ (line 74) | def __init__(
    method forward (line 91) | def forward(self, hidden_states) -> torch.Tensor:
  class T5DenseGatedActDense (line 98) | class T5DenseGatedActDense(nn.Module):
    method __init__ (line 100) | def __init__(
    method forward (line 130) | def forward(self, hidden_states) -> torch.Tensor:
  class T5LayerFF (line 138) | class T5LayerFF(nn.Module):
    method __init__ (line 140) | def __init__(
    method forward (line 153) | def forward(self, hidden_states) -> torch.Tensor:
  class T5MultiHeadAttention (line 161) | class T5MultiHeadAttention(nn.Module):
    method __init__ (line 163) | def __init__(self) -> None:
    method forward (line 166) | def forward(self, q, k, v, attn_bias=None):
  class T5Attention (line 178) | class T5Attention(nn.Module):
    method __init__ (line 180) | def __init__(
    method _relative_position_bucket (line 241) | def _relative_position_bucket(
    method compute_bias (line 298) | def compute_bias(self, query_length, key_length, device=None) -> torch...
    method forward (line 324) | def forward(
  class T5LayerSelfAttention (line 378) | class T5LayerSelfAttention(nn.Module):
    method __init__ (line 380) | def __init__(
    method forward (line 397) | def forward(
  class T5LayerCrossAttention (line 416) | class T5LayerCrossAttention(nn.Module):
    method __init__ (line 418) | def __init__(
    method forward (line 431) | def forward(
  class T5Block (line 445) | class T5Block(nn.Module):
    method __init__ (line 447) | def __init__(
    method forward (line 476) | def forward(
  class T5Stack (line 505) | class T5Stack(nn.Module):
    method __init__ (line 507) | def __init__(
    method forward (line 549) | def forward(
  class T5EncoderModel (line 568) | class T5EncoderModel(TextEncoder):
    method __init__ (line 570) | def __init__(self, config: T5Config, prefix: str = ""):
    method get_input_embeddings (line 592) | def get_input_embeddings(self):
    method forward (line 595) | def forward(
    method load_weights (line 613) | def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) ->...
  class UMT5EncoderModel (line 657) | class UMT5EncoderModel(TextEncoder):
    method __init__ (line 659) | def __init__(self, config: T5Config, prefix: str = ""):
    method get_input_embeddings (line 681) | def get_input_embeddings(self):
    method forward (line 684) | def forward(
    method load_weights (line 705) | def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) ->...

FILE: python/sglang/multimodal_gen/runtime/models/encoders/vision.py
  class VisionEncoderInfo (line 19) | class VisionEncoderInfo(ABC, Generic[_C]):
    method __init__ (line 21) | def __init__(self, vision_config: _C) -> None:
    method get_num_image_tokens (line 27) | def get_num_image_tokens(
    method get_max_image_tokens (line 36) | def get_max_image_tokens(self) -> int:
    method get_image_size (line 40) | def get_image_size(self) -> int:
    method get_patch_size (line 44) | def get_patch_size(self) -> int:
    method get_patch_grid_length (line 48) | def get_patch_grid_length(self) -> int:
  function resolve_visual_encoder_outputs (line 52) | def resolve_visual_encoder_outputs(

FILE: python/sglang/multimodal_gen/runtime/models/parameter.py
  class BasevLLMParameter (line 20) | class BasevLLMParameter(Parameter):
    method __new__ (line 27) | def __new__(cls, data: torch.Tensor, **kwargs):
    method __init__ (line 31) | def __init__(self, data: torch.Tensor, weight_loader: Callable):
    method weight_loader (line 57) | def weight_loader(self):
    method _is_1d_and_scalar (line 60) | def _is_1d_and_scalar(self, loaded_weight: torch.Tensor):
    method _assert_and_load (line 65) | def _assert_and_load(self, loaded_weight: torch.Tensor) -> None:
    method load_column_parallel_weight (line 71) | def load_column_parallel_weight(self, loaded_weight: torch.Tensor) -> ...
    method load_row_parallel_weight (line 74) | def load_row_parallel_weight(self, loaded_weight: torch.Tensor) -> None:
    method load_merged_column_weight (line 77) | def load_merged_column_weight(self, loaded_weight: torch.Tensor, **kwa...
    method load_qkv_weight (line 80) | def load_qkv_weight(self, loaded_weight: torch.Tensor, **kwargs) -> None:
  class _ColumnvLLMParameter (line 84) | class _ColumnvLLMParameter(BasevLLMParameter):
    method __init__ (line 95) | def __init__(self, output_dim: int, **kwargs):
    method output_dim (line 100) | def output_dim(self):
    method load_column_parallel_weight (line 103) | def load_column_parallel_weight(self, loaded_weight: torch.Tensor) -> ...
    method load_merged_column_weight (line 112) | def load_merged_column_weight(self, loaded_weight: torch.Tensor, **kwa...
    method load_qkv_weight (line 136) | def load_qkv_weight(self, loaded_weight: torch.Tensor, **kwargs) -> None:
  class RowvLLMParameter (line 168) | class RowvLLMParameter(BasevLLMParameter):
    method __init__ (line 176) | def __init__(self, input_dim: int, **kwargs):
    method input_dim (line 181) | def input_dim(self):
    method load_row_parallel_weight (line 184) | def load_row_parallel_weight(self, loaded_weight: torch.Tensor) -> None:
  class ModelWeightParameter (line 198) | class ModelWeightParameter(_ColumnvLLMParameter, RowvLLMParameter):
  class GroupQuantScaleParameter (line 207) | class GroupQuantScaleParameter(_ColumnvLLMParameter, RowvLLMParameter):
  class ChannelQuantScaleParameter (line 216) | class ChannelQuantScaleParameter(_ColumnvLLMParameter):
  class PerTensorScaleParameter (line 225) | class PerTensorScaleParameter(BasevLLMParameter):
    method __init__ (line 239) | def __init__(self, **kwargs):
    method _shard_id_as_int (line 243) | def _shard_id_as_int(self, shard_id: str | int) -> int:
    method load_row_parallel_weight (line 255) | def load_row_parallel_weight(self, *args, **kwargs) -> None:
    method load_merged_column_weight (line 258) | def load_merged_column_weight(self, *args, **kwargs) -> None:
    method load_qkv_weight (line 261) | def load_qkv_weight(self, *args, **kwargs) -> None:
    method load_column_parallel_weight (line 264) | def load_column_parallel_weight(self, *args, **kwargs) -> None:
    method _load_into_shard_id (line 267) | def _load_into_shard_id(
  class PackedColumnParameter (line 289) | class PackedColumnParameter(_ColumnvLLMParameter):
    method __init__ (line 296) | def __init__(self, packed_factor: int | Fraction, packed_dim: int, **k...
    method packed_dim (line 302) | def packed_dim(self):
    method packed_factor (line 306) | def packed_factor(self):
    method adjust_shard_indexes_for_packing (line 309) | def adjust_shard_indexes_for_packing(
  class PackedvLLMParameter (line 319) | class PackedvLLMParameter(ModelWeightParameter):
    method __init__ (line 330) | def __init__(self, packed_factor: int | Fraction, packed_dim: int, **k...
    method packed_dim (line 336) | def packed_dim(self):
    method packed_factor (line 340) | def packed_factor(self):
    method adjust_shard_indexes_for_packing (line 343) | def adjust_shard_indexes_for_packing(self, shard_size, shard_offset):
  class BlockQuantScaleParameter (line 351) | class BlockQuantScaleParameter(_ColumnvLLMParameter, RowvLLMParameter):
  function permute_param_layout_ (line 360) | def permute_param_layout_(
  function _adjust_shard_indexes_for_packing (line 418) | def _adjust_shard_indexes_for_packing(

FILE: python/sglang/multimodal_gen/runtime/models/registry.py
  function _parse_aliases_from_ast (line 44) | def _parse_aliases_from_ast(value_node: ast.expr) -> list[str]:
  function _discover_and_register_models (line 55) | def _discover_and_register_models() -> dict[str, tuple[str, str, str]]:
  class _ModelInfo (line 166) | class _ModelInfo:
    method from_model_cls (line 170) | def from_model_cls(model: type[nn.Module]) -> "_ModelInfo":
  class _BaseRegisteredModel (line 176) | class _BaseRegisteredModel(ABC):
    method inspect_model_cls (line 179) | def inspect_model_cls(self) -> _ModelInfo:
    method load_model_cls (line 183) | def load_model_cls(self) -> type[nn.Module]:
  class _RegisteredModel (line 188) | class _RegisteredModel(_BaseRegisteredModel):
    method from_model_cls (line 197) | def from_model_cls(model_cls: type[nn.Module]):
    method inspect_model_cls (line 203) | def inspect_model_cls(self) -> _ModelInfo:
    method load_model_cls (line 206) | def load_model_cls(self) -> type[nn.Module]:
  function _run_in_subprocess (line 210) | def _run_in_subprocess(fn: Callable[[], _T]) -> _T:
  class _LazyRegisteredModel (line 239) | class _LazyRegisteredModel(_BaseRegisteredModel):
    method inspect_model_cls (line 249) | def inspect_model_cls(self) -> _ModelInfo:
    method load_model_cls (line 254) | def load_model_cls(self) -> type[nn.Module]:
  function _try_load_model_cls (line 260) | def _try_load_model_cls(
  function _try_inspect_model_cls (line 275) | def _try_inspect_model_cls(
  class _ModelRegistry (line 287) | class _ModelRegistry:
    method get_supported_archs (line 291) | def get_supported_archs(self) -> Set[str]:
    method resolve_by_alias (line 294) | def resolve_by_alias(self, alias: str) -> type[nn.Module] | None:
    method register_model (line 301) | def register_model(
    method _raise_for_unsupported (line 337) | def _raise_for_unsupported(self, architectures: list[str]) -> NoReturn:
    method _try_load_model_cls (line 351) | def _try_load_model_cls(self, model_arch: str) -> type[nn.Module] | None:
    method _try_inspect_model_cls (line 357) | def _try_inspect_model_cls(self, model_arch: str) -> _ModelInfo | None:
    method _normalize_archs (line 363) | def _normalize_archs(
    method inspect_model_cls (line 382) | def inspect_model_cls(
    method resolve_model_cls (line 395) | def resolve_model_cls(

FILE: python/sglang/multimodal_gen/runtime/models/schedulers/base.py
  class BaseScheduler (line 10) | class BaseScheduler(ABC):
    method __init__ (line 15) | def __init__(self, *args, **kwargs) -> None:
    method set_shift (line 26) | def set_shift(self, shift: float) -> None:
    method set_timesteps (line 30) | def set_timesteps(self, *args, **kwargs) -> None:
    method scale_model_input (line 34) | def scale_model_input(

FILE: python/sglang/multimodal_gen/runtime/models/schedulers/flow_match_pair.py
  class FlowMatchScheduler (line 13) | class FlowMatchScheduler(BaseScheduler):
    method __init__ (line 14) | def __init__(
    method set_shift (line 45) | def set_shift(self, shift: float) -> None:
    method set_timesteps (line 48) | def set_timesteps(
    method scale_model_input (line 105) | def scale_model_input(self, sample: torch.Tensor, timestep: int | None...
    method step (line 108) | def step(self, model_output, timestep, sample, to_final=False, **kwargs):
    method return_to_timestep (line 120) | def return_to_timestep(self, timestep, sample, sample_stablized):
    method add_noise (line 128) | def add_noise(self, original_samples, noise, timestep):
    method training_target (line 136) | def training_target(self, sample, noise, timestep):
    method training_weight (line 140) | def training_weight(self, timestep):
    method calculate_shift (line 147) | def calculate_shift(
  class FlowMatchPairScheduler (line 161) | class FlowMatchPairScheduler(FlowMatchScheduler):
    method __init__ (line 171) | def __init__(
    method set_pair_postprocess (line 205) | def set_pair_postprocess(self, fn):
    method set_pair_postprocess_by_name (line 226) | def set_pair_postprocess_by_name(self, name: str | None, **kwargs):
    method _make_pairs_from_vector (line 442) | def _make_pairs_from_vector(self, vec: torch.Tensor) -> torch.Tensor:
    method get_pairs (line 447) | def get_pairs(self, source: str = "timesteps") -> torch.Tensor:
    method timestep_to_sigma (line 458) | def timestep_to_sigma(self, timestep: torch.Tensor | float) -> torch.T...
    method step_from_to (line 472) | def step_from_to(
    method _refresh_pair_cache (line 505) | def _refresh_pair_cache(self) -> None:

FILE: python/sglang/multimodal_gen/runtime/models/schedulers/hunyuan3d_scheduler.py
  class Hunyuan3DFlowMatchSchedulerOutput (line 16) | class Hunyuan3DFlowMatchSchedulerOutput(BaseOutput):
  class Hunyuan3DFlowMatchEulerDiscreteScheduler (line 22) | class Hunyuan3DFlowMatchEulerDiscreteScheduler(SchedulerMixin, ConfigMix...
    method __init__ (line 35) | def __init__(
    method step_index (line 59) | def step_index(self) -> Optional[int]:
    method begin_index (line 64) | def begin_index(self) -> Optional[int]:
    method set_begin_index (line 68) | def set_begin_index(self, begin_index: int = 0):
    method scale_model_input (line 76) | def scale_model_input(
    method scale_noise (line 84) | def scale_noise(
    method _sigma_to_t (line 116) | def _sigma_to_t(self, sigma: float) -> float:
    method time_shift (line 120) | def time_shift(self, mu: float, sigma: float, t: torch.Tensor) -> torc...
    method set_timesteps (line 124) | def set_timesteps(
    method index_for_timestep (line 160) | def index_for_timestep(
    method _init_step_index (line 171) | def _init_step_index(self, timestep: Union[float, torch.Tensor]):
    method step (line 180) | def step(
    method __len__ (line 218) | def __len__(self) -> int:
  class Hunyuan3DConsistencyFlowMatchSchedulerOutput (line 223) | class Hunyuan3DConsistencyFlowMatchSchedulerOutput(BaseOutput):
  class Hunyuan3DConsistencyFlowMatchEulerDiscreteScheduler (line 230) | class Hunyuan3DConsistencyFlowMatchEulerDiscreteScheduler(SchedulerMixin...
    method __init__ (line 242) | def __init__(
    method step_index (line 264) | def step_index(self) -> Optional[int]:
    method begin_index (line 268) | def begin_index(self) -> Optional[int]:
    method set_begin_index (line 271) | def set_begin_index(self, begin_index: int = 0):
    method scale_model_input (line 274) | def scale_model_input(
    method _sigma_to_t (line 282) | def _sigma_to_t(self, sigma: float) -> float:
    method set_timesteps (line 285) | def set_timesteps(
    method index_for_timestep (line 311) | def index_for_timestep(
    method _init_step_index (line 320) | def _init_step_index(self, timestep: Union[float, torch.Tensor]):
    method step (line 328) | def step(
    method __len__ (line 363) | def __len__(self) -> int:

FILE: python/sglang/multimodal_gen/runtime/models/schedulers/scheduling_comfyui_passthrough.py
  class ComfyUIPassThroughSchedulerOutput (line 21) | class ComfyUIPassThroughSchedulerOutput(BaseOutput):
  class ComfyUIPassThroughScheduler (line 32) | class ComfyUIPassThroughScheduler(BaseScheduler, ConfigMixin, SchedulerM...
    method __init__ (line 50) | def __init__(
    method set_timesteps (line 63) | def set_timesteps(
    method step (line 92) | def step(
    method scale_model_input (line 126) | def scale_model_input(
    method set_shift (line 141) | def set_shift(self, shift: float) -> None:
    method set_begin_index (line 150) | def set_begin_index(self, begin_index: int = 0) -> None:
    method begin_index (line 160) | def begin_index(self) -> int | None:
    method step_index (line 167) | def step_index(self) -> int:
    method add_noise (line 173) | def add_noise(

FILE: python/sglang/multimodal_gen/runtime/models/schedulers/scheduling_dpm_solver_multistep.py
  class DPMSolverMultistepScheduler (line 22) | class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin, BaseSched...
    method __init__ (line 29) | def __init__(
    method set_shift (line 94) | def set_shift(self, shift: float) -> None:
    method set_begin_index (line 97) | def set_begin_index(self, begin_index: int = 0) -> None:
    method begin_index (line 101) | def begin_index(self) -> int | None:
    method set_timesteps (line 104) | def set_timesteps(self, num_inference_steps: int, device=None, **kwargs):
    method scale_model_input (line 108) | def scale_model_input(
    method step (line 113) | def step(
    method sigmas (line 123) | def sigmas(self):
    method init_noise_sigma (line 127) | def init_noise_sigma(self):
    method add_noise (line 130) | def add_noise(

FILE: python/sglang/multimodal_gen/runtime/models/schedulers/scheduling_flow_match_euler_discrete.py
  class FlowMatchEulerDiscreteSchedulerOutput (line 41) | class FlowMatchEulerDiscreteSchedulerOutput(BaseOutput):
  class FlowMatchEulerDiscreteScheduler (line 54) | class FlowMatchEulerDiscreteScheduler(SchedulerMixin, ConfigMixin, BaseS...
    method __init__ (line 98) | def __init__(
    method shift (line 157) | def shift(self) -> float:
    method step_index (line 164) | def step_index(self) -> int | None:
    method begin_index (line 171) | def begin_index(self) -> int | None:
    method set_begin_index (line 178) | def set_begin_index(self, begin_index: int = 0) -> None:
    method set_shift (line 188) | def set_shift(self, shift: float) -> None:
    method scale_noise (line 191) | def scale_noise(
    method _sigma_to_t (line 233) | def _sigma_to_t(self, sigma: float) -> float:
    method time_shift (line 236) | def time_shift(
    method stretch_shift_to_terminal (line 246) | def stretch_shift_to_terminal(self, t: torch.Tensor) -> torch.Tensor:
    method set_timesteps (line 267) | def set_timesteps(
    method index_for_timestep (line 413) | def index_for_timestep(
    method _init_step_index (line 431) | def _init_step_index(self, timestep: float | torch.FloatTensor) -> None:
    method step (line 439) | def step(
    method _convert_to_karras (line 539) | def _convert_to_karras(
    method _convert_to_exponential (line 567) | def _convert_to_exponential(
    method _convert_to_beta (line 593) | def _convert_to_beta(
    method _time_shift_exponential (line 628) | def _time_shift_exponential(
    method _time_shift_linear (line 636) | def _time_shift_linear(
    method add_noise (line 641) | def add_noise(
    method scale_model_input (line 679) | def scale_model_input(
    method __len__ (line 684) | def __len__(self) -> int:

FILE: python/sglang/multimodal_gen/runtime/models/schedulers/scheduling_flow_unipc_multistep.py
  class FlowUniPCMultistepScheduler (line 24) | class FlowUniPCMultistepScheduler(SchedulerMixin, ConfigMixin, BaseSched...
    method __init__ (line 81) | def __init__(
    method step_index (line 143) | def step_index(self):
    method begin_index (line 150) | def begin_index(self):
    method set_shift (line 156) | def set_shift(self, shift: float) -> None:
    method set_begin_index (line 160) | def set_begin_index(self, begin_index: int = 0):
    method set_timesteps (line 171) | def set_timesteps(
    method _threshold_sample (line 244) | def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
    method _sigma_to_alpha_sigma_t (line 281) | def _sigma_to_alpha_sigma_t(self, sigma) -> tuple[Any, Any]:
    method time_shift (line 285) | def time_shift(self, mu: float, sigma: float, t: torch.Tensor):
    method convert_model_output (line 288) | def convert_model_output(
    method multistep_uni_p_bh_update (line 355) | def multistep_uni_p_bh_update(
    method multistep_uni_c_bh_update (line 498) | def multistep_uni_c_bh_update(
    method index_for_timestep (line 667) | def index_for_timestep(self, timestep, schedule_timesteps=None) -> int:
    method _init_step_index (line 683) | def _init_step_index(self, timestep) -> None:
    method step (line 695) | def step(
    method scale_model_input (line 786) | def scale_model_input(self, sample: torch.Tensor, *args, **kwargs) -> ...
    method add_noise (line 802) | def add_noise(

FILE: python/sglang/multimodal_gen/runtime/models/schedulers/scheduling_helios.py
  class HeliosSchedulerOutput (line 19) | class HeliosSchedulerOutput:
  class HeliosSchedulerConfig (line 26) | class HeliosSchedulerConfig:
    method __init__ (line 29) | def __init__(self, **kwargs):
    method get (line 33) | def get(self, key, default=None):
  class HeliosScheduler (line 37) | class HeliosScheduler:
    method __init__ (line 47) | def __init__(
    method init_sigmas (line 118) | def init_sigmas(self):
    method init_sigmas_for_each_stage (line 133) | def init_sigmas_for_each_stage(self):
    method step_index (line 197) | def step_index(self):
    method begin_index (line 201) | def begin_index(self):
    method set_begin_index (line 204) | def set_begin_index(self, begin_index: int = 0):
    method time_shift (line 207) | def time_shift(self, mu, sigma, t):
    method set_timesteps (line 213) | def set_timesteps(
    method index_for_timestep (line 280) | def index_for_timestep(self, timestep, schedule_timesteps=None):
    method _init_step_index (line 287) | def _init_step_index(self, timestep):
    method step_euler (line 295) | def step_euler(
    method _sigma_to_alpha_sigma_t (line 320) | def _sigma_to_alpha_sigma_t(self, sigma):
    method convert_model_output (line 329) | def convert_model_output(self, model_output, sample=None, sigma=None, ...
    method multistep_uni_p_bh_update (line 366) | def multistep_uni_p_bh_update(
    method multistep_uni_c_bh_update (line 450) | def multistep_uni_c_bh_update(
    method step_unipc (line 547) | def step_unipc(
    method add_noise (line 611) | def add_noise(self, original_samples, noise, timestep, sigmas, timeste...
    method convert_flow_pred_to_x0 (line 621) | def convert_flow_pred_to_x0(self, flow_pred, xt, timestep, sigmas, tim...
    method step_dmd (line 634) | def step_dmd(
    method step (line 680) | def step(
    method reset_scheduler_history (line 715) | def reset_scheduler_history(self):
    method set_shift (line 725) | def set_shift(self, shift: float):
    method __len__ (line 730) | def __len__(self):

FILE: python/sglang/multimodal_gen/runtime/models/schedulers/scheduling_self_forcing_flow_match.py
  class SelfForcingFlowMatchSchedulerOutput (line 15) | class SelfForcingFlowMatchSchedulerOutput(BaseOutput):
  class SelfForcingFlowMatchScheduler (line 28) | class SelfForcingFlowMatchScheduler(BaseScheduler, ConfigMixin, Schedule...
    method __init__ (line 33) | def __init__(
    method set_timesteps (line 55) | def set_timesteps(
    method step (line 80) | def step(
    method add_noise (line 113) | def add_noise(self, original_samples, noise, timestep):
    method scale_model_input (line 133) | def scale_model_input(
    method set_shift (line 138) | def set_shift(self, shift: float) -> None:

FILE: python/sglang/multimodal_gen/runtime/models/schedulers/scheduling_unipc_multistep.py
  function betas_for_alpha_bar (line 44) | def betas_for_alpha_bar(
  function rescale_zero_terminal_snr (line 89) | def rescale_zero_terminal_snr(betas):
  class UniPCMultistepScheduler (line 125) | class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin, BaseScheduler):
    method __init__ (line 201) | def __init__(
    method step_index (line 321) | def step_index(self):
    method begin_index (line 328) | def begin_index(self):
    method set_shift (line 334) | def set_shift(self, shift: float) -> None:
    method set_begin_index (line 338) | def set_begin_index(self, begin_index: int = 0):
    method set_timesteps (line 348) | def set_timesteps(
    method _threshold_sample (line 516) | def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
    method _sigma_to_t (line 554) | def _sigma_to_t(self, sigma, log_sigmas):
    method _sigma_to_alpha_sigma_t (line 582) | def _sigma_to_alpha_sigma_t(self, sigma):
    method _convert_to_karras (line 593) | def _convert_to_karras(
    method _convert_to_exponential (line 621) | def _convert_to_exponential(
    method _convert_to_beta (line 647) | def _convert_to_beta(
    method convert_model_output (line 682) | def convert_model_output(
    method multistep_uni_p_bh_update (line 755) | def multistep_uni_p_bh_update(
    method multistep_uni_c_bh_update (line 887) | def multistep_uni_c_bh_update(
    method index_for_timestep (line 1028) | def index_for_timestep(self, timestep, schedule_timesteps=None):
    method _init_step_index (line 1048) | def _init_step_index(self, timestep):
    method step (line 1060) | def step(
    method scale_model_input (line 1147) | def scale_model_input(self, sample: torch.Tensor, *args, **kwargs) -> ...
    method add_noise (line 1163) | def add_noise(
    method __len__ (line 1203) | def __len__(self):

FILE: python/sglang/multimodal_gen/runtime/models/utils.py
  function set_weight_attrs (line 12) | def set_weight_attrs(
  function _make_synced_weight_loader (line 46) | def _make_synced_weight_loader(original_weight_loader) -> Any:
  function extract_layer_index (line 55) | def extract_layer_index(layer_name: str) -> int:
  function modulate (line 77) | def modulate(
  function pred_noise_to_pred_video (line 95) | def pred_noise_to_pred_video(

FILE: python/sglang/multimodal_gen/runtime/models/vaes/autoencoder.py
  class AutoencoderKL (line 27) | class AutoencoderKL(nn.Module):
    method __init__ (line 65) | def __init__(
    method enable_tiling (line 138) | def enable_tiling(self, use_tiling: bool = True):
    method disable_tiling (line 146) | def disable_tiling(self):
    method enable_slicing (line 153) | def enable_slicing(self):
    method disable_slicing (line 160) | def disable_slicing(self):
    method attn_processors (line 169) | def attn_processors(self) -> Dict[str, AttentionProcessor]:
    method set_attn_processor (line 197) | def set_attn_processor(
    method set_default_attn_processor (line 234) | def set_default_attn_processor(self):
    method _encode (line 255) | def _encode(self, x: torch.Tensor) -> torch.Tensor:
    method encode (line 269) | def encode(
    method _decode (line 297) | def _decode(
    method decode (line 316) | def decode(self, z: torch.FloatTensor) -> Union[DecoderOutput, torch.F...
    method blend_v (line 338) | def blend_v(
    method blend_h (line 348) | def blend_h(
    method _tiled_encode (line 358) | def _tiled_encode(self, x: torch.Tensor) -> torch.Tensor:
    method tiled_encode (line 411) | def tiled_encode(
    method tiled_decode (line 480) | def tiled_decode(
    method forward (line 536) | def forward(
    method fuse_qkv_projections (line 559) | def fuse_qkv_projections(self):

FILE: python/sglang/multimodal_gen/runtime/models/vaes/autoencoder_dc.py
  class AutoencoderDC (line 14) | class AutoencoderDC(nn.Module):
    method __init__ (line 17) | def __init__(self, config: SanaVAEConfig = None, **kwargs):
    method _ensure_inner_model (line 23) | def _ensure_inner_model(self, state_dict: dict[str, torch.Tensor] | No...
    method config (line 71) | def config(self):
    method dtype (line 77) | def dtype(self):
    method device (line 83) | def device(self):
    method encode (line 88) | def encode(self, x: torch.Tensor, **kwargs):
    method decode (line 92) | def decode(self, z: torch.Tensor, **kwargs):
    method forward (line 97) | def forward(self, x: torch.Tensor, **kwargs):
    method load_state_dict (line 101) | def load_state_dict(
    method state_dict (line 110) | def state_dict(self, *args, **kwargs) -> dict[str, torch.Tensor]:
    method load_weights (line 114) | def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) ->...
    method to (line 122) | def to(self, *args, **kwargs):

FILE: python/sglang/multimodal_gen/runtime/models/vaes/autoencoder_kl_flux2.py
  class AutoencoderKLFlux2 (line 25) | class AutoencoderKLFlux2(ParallelTiledVAE):
    method __init__ (line 38) | def __init__(
    method attn_processors (line 125) | def attn_processors(self) -> Dict[str, AttentionProcessor]:
    method set_attn_processor (line 153) | def set_attn_processor(
    method set_default_attn_processor (line 190) | def set_default_attn_processor(self):
    method _encode (line 211) | def _encode(self, x: torch.Tensor) -> torch.Tensor:
    method encode (line 225) | def encode(
    method _decode (line 254) | def _decode(
    method decode (line 273) | def decode(
    method blend_v (line 298) | def blend_v(
    method blend_h (line 308) | def blend_h(
    method _tiled_encode (line 318) | def _tiled_encode(self, x: torch.Tensor) -> torch.Tensor:
    method tiled_encode (line 371) | def tiled_encode(
    method tiled_decode (line 439) | def tiled_decode(
    method forward (line 495) | def forward(

FILE: python/sglang/multimodal_gen/runtime/models/vaes/autoencoder_kl_qwenimage.py
  class QwenImageCausalConv3d (line 25) | class QwenImageCausalConv3d(nn.Conv3d):
    method __init__ (line 40) | def __init__(
    method forward (line 67) | def forward(self, x, cache_x=None):
  class QwenImageRMS_norm (line 77) | class QwenImageRMS_norm(nn.Module):
    method __init__ (line 89) | def __init__(
    method forward (line 105) | def forward(self, x):
  class QwenImageUpsample (line 114) | class QwenImageUpsample(nn.Upsample):
    method forward (line 122) | def forward(self, x):
  class QwenImageResample (line 126) | class QwenImageResample(nn.Module):
    method __init__ (line 140) | def __init__(self, dim: int, mode: str) -> None:
    method forward (line 175) | def forward(self, x, feat_cache=None, feat_idx=[0]):
  class QwenImageResidualBlock (line 240) | class QwenImageResidualBlock(nn.Module):
    method __init__ (line 251) | def __init__(
    method forward (line 275) | def forward(self, x, feat_cache=None, feat_idx=[0]):
  class QwenImageAttentionBlock (line 330) | class QwenImageAttentionBlock(nn.Module):
    method __init__ (line 338) | def __init__(self, dim):
    method forward (line 347) | def forward(self, x):
  class QwenImageMidBlock (line 379) | class QwenImageMidBlock(nn.Module):
    method __init__ (line 389) | def __init__(
    method forward (line 410) | def forward(self, x, feat_cache=None, feat_idx=[0]):
  class QwenImageEncoder3d (line 424) | class QwenImageEncoder3d(nn.Module):
    method __init__ (line 439) | def __init__(
    method forward (line 504) | def forward(self, x, feat_cache=None, feat_idx=[0]):
  class QwenImageUpBlock (line 556) | class QwenImageUpBlock(nn.Module):
    method __init__ (line 569) | def __init__(
    method forward (line 603) | def forward(self, x, feat_cache=None, feat_idx=[0]):
  class QwenImageDecoder3d (line 629) | class QwenImageDecoder3d(nn.Module):
    method __init__ (line 644) | def __init__(
    method forward (line 711) | def forward(self, x, feat_cache=None, feat_idx=[0]):
  class AutoencoderKLQwenImage (line 761) | class AutoencoderKLQwenImage(ParallelTiledVAE):
    method __init__ (line 772) | def __init__(
    method enable_tiling (line 844) | def enable_tiling(
    method disable_tiling (line 874) | def disable_tiling(self) -> None:
    method enable_slicing (line 881) | def enable_slicing(self) -> None:
    method disable_slicing (line 888) | def disable_slicing(self) -> None:
    method clear_cache (line 895) | def clear_cache(self):
    method _encode (line 911) | def _encode(self, x: torch.Tensor):
    method encode (line 935) | def encode(
    method _decode (line 959) | def _decode(self, z: torch.Tensor, return_dict: bool = True):
    method decode (line 984) | def decode(self, z: torch.Tensor, return_dict: bool = True) -> Union[D...
    method blend_v (line 1006) | def blend_v(self, a: torch.Tensor, b: torch.Tensor, blend_extent: int)...
    method blend_h (line 1014) | def blend_h(self, a: torch.Tensor, b: torch.Tensor, blend_extent: int)...
    method tiled_encode (line 1022) | def tiled_encode(self, x: torch.Tensor) -> AutoencoderKLOutput:
    method tiled_decode (line 1088) | def tiled_decode(self, z: torch.Tensor, return_dict: bool = True) -> U...
    method forward (line 1151) | def forward(

FILE: python/sglang/multimodal_gen/runtime/models/vaes/common.py
  class ParallelTiledVAE (line 24) | class ParallelTiledVAE(ABC, nn.Module):
    method __init__ (line 36) | def __init__(self, config: VAEConfig, **kwargs) -> None:
    method device (line 51) | def device(self):
    method temporal_compression_ratio (line 55) | def temporal_compression_ratio(self) -> int:
    method spatial_compression_ratio (line 59) | def spatial_compression_ratio(self) -> int:
    method scaling_factor (line 63) | def scaling_factor(self) -> float | torch.Tensor:
    method _encode (line 67) | def _encode(self, *args, **kwargs) -> torch.Tensor:
    method _decode (line 71) | def _decode(self, *args, **kwargs) -> torch.Tensor:
    method encode (line 74) | def encode(self, x: torch.Tensor) -> DiagonalGaussianDistribution:
    method decode (line 92) | def decode(self, z: torch.Tensor) -> torch.Tensor:
    method blend_v (line 121) | def blend_v(
    method blend_h (line 131) | def blend_h(
    method blend_t (line 141) | def blend_t(
    method spatial_tiled_encode (line 151) | def spatial_tiled_encode(self, x: torch.Tensor) -> torch.Tensor:
    method _parallel_data_generator (line 206) | def _parallel_data_generator(
    method parallel_tiled_decode (line 223) | def parallel_tiled_decode(self, z: torch.FloatTensor) -> torch.FloatTe...
    method _merge_spatial_tiles (line 369) | def _merge_spatial_tiles(
    method spatial_tiled_decode (line 385) | def spatial_tiled_decode(self, z: torch.Tensor) -> torch.Tensor:
    method tiled_encode (line 441) | def tiled_encode(self, x: torch.Tensor) -> torch.Tensor:
    method tiled_decode (line 472) | def tiled_decode(self, z: torch.Tensor) -> torch.Tensor:
    method enable_tiling (line 516) | def enable_tiling(
    method disable_tiling (line 579) | def disable_tiling(self) -> None:
  class DiagonalGaussianDistribution (line 588) | class DiagonalGaussianDistribution:
    method __init__ (line 590) | def __init__(self, parameters: torch.Tensor, deterministic: bool = Fal...
    method sample (line 602) | def sample(self, generator: torch.Generator | None = None) -> torch.Te...
    method kl (line 613) | def kl(
    method nll (line 636) | def nll(
    method mode (line 647) | def mode(self) -> torch.Tensor:

FILE: python/sglang/multimodal_gen/runtime/models/vaes/dac.py
  function snake (line 22) | def snake(x, alpha):
  class Snake1d (line 30) | class Snake1d(nn.Module):
    method __init__ (line 31) | def __init__(self, channels):
    method forward (line 35) | def forward(self, x):
  class VectorQuantize (line 39) | class VectorQuantize(nn.Module):
    method __init__ (line 51) | def __init__(self, input_dim: int, codebook_size: int, codebook_dim: i...
    method forward (line 60) | def forward(self, z):
    method embed_code (line 91) | def embed_code(self, embed_id):
    method decode_code (line 94) | def decode_code(self, embed_id):
    method decode_latents (line 97) | def decode_latents(self, latents):
  class ResidualVectorQuantize (line 116) | class ResidualVectorQuantize(nn.Module):
    method __init__ (line 122) | def __init__(
    method forward (line 150) | def forward(self, z, n_quantizers: int = None):
    method from_codes (line 240) | def from_codes(self, codes: torch.Tensor):
    method from_latents (line 263) | def from_latents(self, latents: torch.Tensor):
  class ResidualUnit (line 292) | class ResidualUnit(nn.Module):
    method __init__ (line 293) | def __init__(self, dim: int = 16, dilation: int = 1):
    method forward (line 303) | def forward(self, x):
  class EncoderBlock (line 311) | class EncoderBlock(nn.Module):
    method __init__ (line 312) | def __init__(self, dim: int = 16, stride: int = 1):
    method forward (line 328) | def forward(self, x):
  class Encoder (line 332) | class Encoder(nn.Module):
    method __init__ (line 333) | def __init__(
    method forward (line 358) | def forward(self, x):
  class DecoderBlock (line 362) | class DecoderBlock(nn.Module):
    method __init__ (line 363) | def __init__(self, input_dim: int = 16, output_dim: int = 8, stride: i...
    method forward (line 380) | def forward(self, x):
  class Decoder (line 384) | class Decoder(nn.Module):
    method __init__ (line 385) | def __init__(
    method forward (line 412) | def forward(self, x):
  class DAC (line 416) | class DAC(nn.Module):
    method __init__ (line 417) | def __init__(
    method init_weights (line 466) | def init_weights(m):
    method dtype (line 472) | def dtype(self):
    method device (line 476) | def device(self):
    method preprocess (line 479) | def preprocess(self, audio_data, sample_rate):
    method encode (line 490) | def encode(
    method decode (line 535) | def decode(self, z: torch.Tensor):
    method forward (line 565) | def forward(

FILE: python/sglang/multimodal_gen/runtime/models/vaes/hunyuan3d_vae.py
  class CrossAttentionProcessor (line 23) | class CrossAttentionProcessor:
    method __call__ (line 24) | def __call__(self, attn, q, k, v):
  class FlashVDMCrossAttentionProcessor (line 29) | class FlashVDMCrossAttentionProcessor:
    method __init__ (line 30) | def __init__(self, topk=None):
    method __call__ (line 33) | def __call__(self, attn, q, k, v):
    method select_topkv (line 67) | def select_topkv(self, q_chunk, k, v, topk):
  class FlashVDMTopMCrossAttentionProcessor (line 78) | class FlashVDMTopMCrossAttentionProcessor(FlashVDMCrossAttentionProcessor):
    method select_topkv (line 79) | def select_topkv(self, q_chunk, k, v, topk):
  class FourierEmbedder (line 98) | class FourierEmbedder(nn.Module):
    method __init__ (line 99) | def __init__(
    method get_dims (line 127) | def get_dims(self, input_dim):
    method forward (line 133) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class DropPath (line 148) | class DropPath(nn.Module):
    method __init__ (line 151) | def __init__(self, drop_prob: float = 0.0, scale_by_keep: bool = True):
    method forward (line 156) | def forward(self, x):
    method extra_repr (line 169) | def extra_repr(self):
  class MLP (line 173) | class MLP(nn.Module):
    method __init__ (line 174) | def __init__(
    method forward (line 193) | def forward(self, x):
  class QKVMultiheadCrossAttention (line 197) | class QKVMultiheadCrossAttention(nn.Module):
    method __init__ (line 198) | def __init__(
    method forward (line 223) | def forward(self, q, kv):
  class MultiheadCrossAttention (line 241) | class MultiheadCrossAttention(nn.Module):
    method __init__ (line 242) | def __init__(
    method forward (line 272) | def forward(self, x, data):
  class ResidualCrossAttentionBlock (line 288) | class ResidualCrossAttentionBlock(nn.Module):
    method __init__ (line 289) | def __init__(
    method forward (line 320) | def forward(self, x: torch.Tensor, data: torch.Tensor):
  class QKVMultiheadAttention (line 326) | class QKVMultiheadAttention(nn.Module):
    method __init__ (line 327) | def __init__(
    method forward (line 350) | def forward(self, qkv):
  class MultiheadAttention (line 368) | class MultiheadAttention(nn.Module):
    method __init__ (line 369) | def __init__(
    method forward (line 397) | def forward(self, x):
  class ResidualAttentionBlock (line 404) | class ResidualAttentionBlock(nn.Module):
    method __init__ (line 405) | def __init__(
    method forward (line 430) | def forward(self, x: torch.Tensor):
  class Transformer (line 436) | class Transformer(nn.Module):
    method __init__ (line 437) | def __init__(
    method forward (line 468) | def forward(self, x: torch.Tensor):
  class CrossAttentionDecoder (line 474) | class CrossAttentionDecoder(nn.Module):
    method __init__ (line 476) | def __init__(
    method set_cross_attention_processor (line 515) | def set_cross_attention_processor(self, processor):
    method forward (line 518) | def forward(self, queries=None, query_embeddings=None, latents=None):
  function generate_dense_grid_points (line 535) | def generate_dense_grid_points(
  function extract_near_surface_volume_fn (line 554) | def extract_near_surface_volume_fn(input_tensor: torch.Tensor, alpha: fl...
  class VanillaVolumeDecoder (line 620) | class VanillaVolumeDecoder:
    method __call__ (line 624) | def __call__(
  class HierarchicalVolumeDecoding (line 672) | class HierarchicalVolumeDecoding:
    method __call__ (line 676) | def __call__(
  class FlashVDMVolumeDecoding (line 800) | class FlashVDMVolumeDecoding:
    method __init__ (line 803) | def __init__(self, topk_mode="mean"):
    method __call__ (line 813) | def __call__(
  class Latent2MeshOutput (line 1004) | class Latent2MeshOutput:
    method __init__ (line 1007) | def __init__(self, mesh_v=None, mesh_f=None):
  function center_vertices (line 1012) | def center_vertices(vertices):
  class SurfaceExtractor (line 1020) | class SurfaceExtractor:
    method _compute_box_stat (line 1023) | def _compute_box_stat(
    method run (line 1038) | def run(self, *args, **kwargs):
    method __call__ (line 1041) | def __call__(self, grid_logits, **kwargs):
  class MCSurfaceExtractor (line 1057) | class MCSurfaceExtractor(SurfaceExtractor):
    method run (line 1060) | def run(self, grid_logit, *, mc_level, bounds, octree_resolution, **kw...
  class DMCSurfaceExtractor (line 1073) | class DMCSurfaceExtractor(SurfaceExtractor):
    method run (line 1076) | def run(self, grid_logit, *, octree_resolution, **kwargs):
  class VectsetVAE (line 1102) | class VectsetVAE(nn.Module):
    method __init__ (line 1105) | def __init__(self, volume_decoder=None, surface_extractor=None):
    method latents2mesh (line 1114) | def latents2mesh(self, latents: torch.FloatTensor, **kwargs):
    method enable_flashvdm_decoder (line 1120) | def enable_flashvdm_decoder(
  class ShapeVAE (line 1143) | class ShapeVAE(VectsetVAE):
    method __init__ (line 1148) | def __init__(
    method forward (line 1211) | def forward(self, latents):
    method decode (line 1216) | def decode(self, latents):

FILE: python/sglang/multimodal_gen/runtime/models/vaes/hunyuanvae.py
  function prepare_causal_attention_mask (line 30) | def prepare_causal_attention_mask(
  class HunyuanVAEAttention (line 47) | class HunyuanVAEAttention(nn.Module):
    method __init__ (line 49) | def __init__(
    method forward (line 73) | def forward(
  class HunyuanVideoCausalConv3d (line 114) | class HunyuanVideoCausalConv3d(nn.Module):
    method __init__ (line 116) | def __init__(
    method forward (line 149) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class HunyuanVideoUpsampleCausal3D (line 156) | class HunyuanVideoUpsampleCausal3D(nn.Module):
    method __init__ (line 158) | def __init__(
    method forward (line 176) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class HunyuanVideoDownsampleCausal3D (line 205) | class HunyuanVideoDownsampleCausal3D(nn.Module):
    method __init__ (line 207) | def __init__(
    method forward (line 223) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class HunyuanVideoResnetBlockCausal3D (line 228) | class HunyuanVideoResnetBlockCausal3D(nn.Module):
    method __init__ (line 230) | def __init__(
    method forward (line 257) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class HunyuanVideoMidBlock3D (line 277) | class HunyuanVideoMidBlock3D(nn.Module):
    method __init__ (line 279) | def __init__(
    method forward (line 340) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class HunyuanVideoDownBlock3D (line 392) | class HunyuanVideoDownBlock3D(nn.Module):
    method __init__ (line 394) | def __init__(
    method forward (line 441) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class HunyuanVideoUpBlock3D (line 456) | class HunyuanVideoUpBlock3D(nn.Module):
    method __init__ (line 458) | def __init__(
    method forward (line 504) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class HunyuanVideoEncoder3D (line 520) | class HunyuanVideoEncoder3D(nn.Module):
    method __init__ (line 525) | def __init__(
    method forward (line 616) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class HunyuanVideoDecoder3D (line 641) | class HunyuanVideoDecoder3D(nn.Module):
    method __init__ (line 646) | def __init__(
    method forward (line 737) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class AutoencoderKLHunyuanVideo (line 763) | class AutoencoderKLHunyuanVideo(ParallelTiledVAE):
    method __init__ (line 774) | def __init__(
    method _encode (line 820) | def _encode(self, x: torch.Tensor) -> torch.Tensor:
    method _decode (line 825) | def _decode(self, z: torch.Tensor) -> torch.Tensor:
    method forward (line 830) | def forward(

FILE: python/sglang/multimodal_gen/runtime/models/vaes/ltx_2_audio.py
  class LTX2AudioCausalConv2d (line 18) | class LTX2AudioCausalConv2d(nn.Module):
    method __init__ (line 23) | def __init__(
    method forward (line 66) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class LTX2AudioPixelNorm (line 71) | class LTX2AudioPixelNorm(nn.Module):
    method __init__ (line 76) | def __init__(self, dim: int = 1, eps: float = 1e-8) -> None:
    method forward (line 81) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class LTX2AudioAttnBlock (line 87) | class LTX2AudioAttnBlock(nn.Module):
    method __init__ (line 88) | def __init__(
    method forward (line 111) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class LTX2AudioResnetBlock (line 131) | class LTX2AudioResnetBlock(nn.Module):
    method __init__ (line 132) | def __init__(
    method forward (line 228) | def forward(
  class LTX2AudioDownsample (line 253) | class LTX2AudioDownsample(nn.Module):
    method __init__ (line 254) | def __init__(
    method forward (line 269) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class LTX2AudioUpsample (line 294) | class LTX2AudioUpsample(nn.Module):
    method __init__ (line 295) | def __init__(
    method forward (line 318) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class LTX2AudioAudioPatchifier (line 336) | class LTX2AudioAudioPatchifier:
    method __init__ (line 341) | def __init__(
    method patchify (line 355) | def patchify(self, audio_latents: torch.Tensor) -> torch.Tensor:
    method unpatchify (line 359) | def unpatchify(
    method patch_size (line 366) | def patch_size(self) -> Tuple[int, int, int]:
  class LTX2AudioEncoder (line 370) | class LTX2AudioEncoder(nn.Module):
    method __init__ (line 371) | def __init__(
    method forward (line 513) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class LTX2AudioDecoder (line 538) | class LTX2AudioDecoder(nn.Module):
    method __init__ (line 546) | def __init__(
    method forward (line 699) | def forward(
  class AutoencoderKLLTX2Audio (line 764) | class AutoencoderKLLTX2Audio(ParallelTiledVAE):
    method __init__ (line 771) | def __init__(
    method _encode (line 855) | def _encode(self, x: torch.Tensor) -> torch.Tensor:
    method encode (line 858) | def encode(self, x: torch.Tensor, return_dict: bool = True):
    method _decode (line 870) | def _decode(self, z: torch.Tensor) -> torch.Tensor:
    method decode (line 873) | def decode(
    method forward (line 887) | def forward(

FILE: python/sglang/multimodal_gen/runtime/models/vaes/ltx_2_vae.py
  class PerChannelRMSNorm (line 17) | class PerChannelRMSNorm(nn.Module):
    method __init__ (line 27) | def __init__(self, channel_dim: int = 1, eps: float = 1e-8) -> None:
    method forward (line 37) | def forward(
  class LTX2VideoCausalConv3d (line 52) | class LTX2VideoCausalConv3d(nn.Module):
    method __init__ (line 53) | def __init__(
    method forward (line 90) | def forward(self, hidden_states: torch.Tensor, causal: bool = True) ->...
  class LTX2VideoResnetBlock3d (line 115) | class LTX2VideoResnetBlock3d(nn.Module):
    method __init__ (line 136) | def __init__(
    method forward (line 197) | def forward(
  class LTXVideoDownsampler3d (line 265) | class LTXVideoDownsampler3d(nn.Module):
    method __init__ (line 266) | def __init__(
    method forward (line 292) | def forward(self, hidden_states: torch.Tensor, causal: bool = True) ->...
  class LTXVideoUpsampler3d (line 319) | class LTXVideoUpsampler3d(nn.Module):
    method __init__ (line 320) | def __init__(
    method forward (line 346) | def forward(self, hidden_states: torch.Tensor, causal: bool = True) ->...
  class LTX2VideoDownBlock3D (line 398) | class LTX2VideoDownBlock3D(nn.Module):
    method __init__ (line 424) | def __init__(
    method forward (line 498) | def forward(
  class LTX2VideoMidBlock3d (line 524) | class LTX2VideoMidBlock3d(nn.Module):
    method __init__ (line 545) | def __init__(
    method forward (line 582) | def forward(
  class LTX2VideoUpBlock3d (line 613) | class LTX2VideoUpBlock3d(nn.Module):
    method __init__ (line 639) | def __init__(
    method forward (line 709) | def forward(
  class LTX2VideoEncoder3d (line 746) | class LTX2VideoEncoder3d(nn.Module):
    method __init__ (line 777) | def __init__(
    method forward (line 862) | def forward(
  class LTX2VideoDecoder3d (line 917) | class LTX2VideoDecoder3d(nn.Module):
    method __init__ (line 945) | def __init__(
    method forward (line 1044) | def forward(
  class AutoencoderKLLTX2Video (line 1107) | class AutoencoderKLLTX2Video(ParallelTiledVAE):
    method __init__ (line 1117) | def __init__(self, config: LTXVideoVAEConfig):
    method enable_tiling (line 1204) | def enable_tiling(
    method _encode (line 1248) | def _encode(self, x: torch.Tensor, causal: Optional[bool] = None) -> t...
    method encode (line 1263) | def encode(
    method _decode (line 1291) | def _decode(
    method decode (line 1326) | def decode(
    method blend_v (line 1366) | def blend_v(
    method blend_h (line 1376) | def blend_h(
    method blend_t (line 1386) | def blend_t(
    method tiled_encode (line 1396) | def tiled_encode(
    method tiled_decode (line 1466) | def tiled_decode(
    method _temporal_tiled_encode (line 1556) | def _temporal_tiled_encode(
    method _temporal_tiled_decode (line 1595) | def _temporal_tiled_decode(
    method forward (line 1654) | def forward(

FILE: python/sglang/multimodal_gen/runtime/models/vaes/parallel/wan_common_utils.py
  class AvgDown3D (line 10) | class AvgDown3D(nn.Module):
    method __init__ (line 11) | def __init__(
    method forward (line 28) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class DupUp3D (line 63) | class DupUp3D(nn.Module):
    method __init__ (line 64) | def __init__(
    method forward (line 82) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class WanCausalConv3d (line 109) | class WanCausalConv3d(nn.Conv3d):
    method __init__ (line 117) | def __init__(
    method forward (line 144) | def forward(self, x, cache_x=None):
  class WanRMS_norm (line 157) | class WanRMS_norm(nn.Module):
    method __init__ (line 162) | def __init__(
    method forward (line 178) | def forward(self, x):
  class WanUpsample (line 187) | class WanUpsample(nn.Upsample):
    method forward (line 192) | def forward(self, x):
  function bind_context (line 203) | def bind_context(
  function _ensure_bound (line 222) | def _ensure_bound():
  function resample_forward (line 233) | def resample_forward(self, x):
  function residual_block_forward (line 315) | def residual_block_forward(self, x):
  function attention_block_forward (line 379) | def attention_block_forward(self, x):
  function mid_block_forward (line 411) | def mid_block_forward(self, x):
  function residual_down_block_forward (line 425) | def residual_down_block_forward(self, x):
  function residual_up_block_forward (line 435) | def residual_up_block_forward(self, x):
  function up_block_forward (line 451) | def up_block_forward(self, x):

FILE: python/sglang/multimodal_gen/runtime/models/vaes/parallel/wan_dist_utils.py
  function tensor_pad (line 31) | def tensor_pad(x: torch.Tensor, len_to_pad: int, dim: int = -2):
  function tensor_chunk (line 48) | def tensor_chunk(x: torch.Tensor, dim: int = -2, world_size: int = 1, ra...
  function split_for_parallel_encode (line 61) | def split_for_parallel_encode(
  function ensure_local_height (line 75) | def ensure_local_height(x: torch.Tensor, expected_local_height: int | No...
  function split_for_parallel_decode (line 86) | def split_for_parallel_decode(
  function gather_and_trim_height (line 94) | def gather_and_trim_height(x: torch.Tensor, expected_height: int | None):
  function _ensure_recv_buf (line 103) | def _ensure_recv_buf(
  function halo_exchange (line 116) | def halo_exchange(
  class WanDistConv2d (line 168) | class WanDistConv2d(nn.Conv2d):
    method __init__ (line 169) | def __init__(
    method forward (line 208) | def forward(self, x):
  class WanDistCausalConv3d (line 250) | class WanDistCausalConv3d(nn.Conv3d):
    method __init__ (line 251) | def __init__(
    method forward (line 297) | def forward(self, x, cache_x=None):
  class WanDistZeroPad2d (line 349) | class WanDistZeroPad2d(nn.Module):
    method __init__ (line 352) | def __init__(self, padding: tuple[int, int, int, int]) -> None:
    method forward (line 358) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class WanDistResample (line 368) | class WanDistResample(nn.Module):
    method __init__ (line 382) | def __init__(self, dim: int, mode: str, upsample_out_dim: int = None) ...
    method forward (line 422) | def forward(self, x):
  class WanDistResidualBlock (line 426) | class WanDistResidualBlock(nn.Module):
    method __init__ (line 437) | def __init__(
    method forward (line 461) | def forward(self, x):
  class WanDistAttentionBlock (line 465) | class WanDistAttentionBlock(nn.Module):
    method __init__ (line 473) | def __init__(self, dim) -> None:
    method forward (line 485) | def forward(self, x):
  class WanDistMidBlock (line 496) | class WanDistMidBlock(nn.Module):
    method __init__ (line 506) | def __init__(
    method forward (line 527) | def forward(self, x):
  class WanDistResidualDownBlock (line 531) | class WanDistResidualDownBlock(nn.Module):
    method __init__ (line 532) | def __init__(
    method forward (line 565) | def forward(self, x):
  class WanDistResidualUpBlock (line 569) | class WanDistResidualUpBlock(nn.Module):
    method __init__ (line 582) | def __init__(
    method forward (line 628) | def forward(self, x):
  class WanDistUpBlock (line 632) | class WanDistUpBlock(nn.Module):
    method __init__ (line 645) | def __init__(
    method forward (line 679) | def forward(self, x):

FILE: python/sglang/multimodal_gen/runtime/models/vaes/wanvae.py
  function forward_context (line 78) | def forward_context(
  class WanResample (line 94) | class WanResample(nn.Module):
    method __init__ (line 108) | def __init__(self, dim: int, mode: str, upsample_out_dim: int = None) ...
    method forward (line 145) | def forward(self, x):
  class WanResidualBlock (line 149) | class WanResidualBlock(nn.Module):
    method __init__ (line 160) | def __init__(
    method forward (line 182) | def forward(self, x):
  class WanAttentionBlock (line 186) | class WanAttentionBlock(nn.Module):
    method __init__ (line 194) | def __init__(self, dim) -> None:
    method forward (line 203) | def forward(self, x):
  class WanMidBlock (line 207) | class WanMidBlock(nn.Module):
    method __init__ (line 217) | def __init__(
    method forward (line 238) | def forward(self, x):
  class WanResidualDownBlock (line 242) | class WanResidualDownBlock(nn.Module):
    method __init__ (line 244) | def __init__(
    method forward (line 277) | def forward(self, x):
  class WanEncoder3d (line 281) | class WanEncoder3d(nn.Module):
    method __init__ (line 296) | def __init__(
    method forward (line 392) | def forward(self, x):
  class WanResidualUpBlock (line 467) | class WanResidualUpBlock(nn.Module):
    method __init__ (line 480) | def __init__(
    method forward (line 526) | def forward(self, x):
  class WanUpBlock (line 530) | class WanUpBlock(nn.Module):
    method __init__ (line 543) | def __init__(
    method forward (line 575) | def forward(self, x):
  class WanDecoder3d (line 579) | class WanDecoder3d(nn.Module):
    method __init__ (line 594) | def __init__(
    method forward (line 698) | def forward(self, x):
  function patchify (line 769) | def patchify(x, patch_size):
  function unpatchify (line 788) | def unpatchify(x, patch_size):
  class AutoencoderKLWan (line 805) | class AutoencoderKLWan(ParallelTiledVAE):
    method __init__ (line 813) | def __init__(
    method clear_cache (line 867) | def clear_cache(self) -> None:
    method encode (line 886) | def encode(self, x: torch.Tensor) -> torch.Tensor:
    method _encode (line 918) | def _encode(self, x: torch.Tensor, first_frame=False) -> torch.Tensor:
    method tiled_encode (line 926) | def tiled_encode(self, x: torch.Tensor) -> torch.Tensor:
    method spatial_tiled_encode (line 935) | def spatial_tiled_encode(self, x: torch.Tensor) -> torch.Tensor:
    method decode (line 944) | def decode(self, z: torch.Tensor) -> torch.Tensor:
    method _decode (line 973) | def _decode(self, z: torch.Tensor, first_frame=False) -> torch.Tensor:
    method tiled_decode (line 982) | def tiled_decode(self, z: torch.Tensor) -> torch.Tensor:
    method spatial_tiled_decode (line 989) | def spatial_tiled_decode(self, z: torch.Tensor) -> torch.Tensor:
    method parallel_tiled_decode (line 995) | def parallel_tiled_decode(self, z: torch.FloatTensor) -> torch.FloatTe...
    method forward (line 1002) | def forward(

FILE: python/sglang/multimodal_gen/runtime/models/vision_utils.py
  function pil_to_numpy (line 36) | def pil_to_numpy(images: list[PIL.Image.Image] | PIL.Image.Image) -> np....
  function numpy_to_pt (line 56) | def numpy_to_pt(images: np.ndarray) -> torch.Tensor:
  function normalize (line 75) | def normalize(images: np.ndarray | torch.Tensor) -> np.ndarray | torch.T...
  function load_image (line 91) | def load_image(
  function load_video (line 132) | def load_video(
  function get_default_height_width (line 210) | def get_default_height_width(
  function resize (line 258) | def resize(

FILE: python/sglang/multimodal_gen/runtime/models/vocoder/ltx_2_vocoder.py
  class ResBlock (line 12) | class ResBlock(nn.Module):
    method __init__ (line 13) | def __init__(
    method forward (line 54) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class LTX2Vocoder (line 64) | class LTX2Vocoder(ABC, nn.Module):
    method __init__ (line 69) | def __init__(
    method forward (line 142) | def forward(

FILE: python/sglang/multimodal_gen/runtime/pipelines/comfyui_flux_pipeline.py
  class ComfyUIFluxPipeline (line 31) | class ComfyUIFluxPipeline(LoRAPipeline, ComposedPipelineBase):
    method initialize_pipeline (line 62) | def initialize_pipeline(self, server_args: ServerArgs):
    method load_modules (line 82) | def load_modules(
    method _load_and_convert_weights_from_safetensors (line 106) | def _load_and_convert_weights_from_safetensors(
    method _convert_comfyui_weights (line 282) | def _convert_comfyui_weights(
    method _load_transformer_from_safetensors (line 403) | def _load_transformer_from_safetensors(
    method create_pipeline_stages (line 664) | def create_pipeline_stages(self, server_args: ServerArgs):

FILE: python/sglang/multimodal_gen/runtime/pipelines/comfyui_qwen_image_pipeline.py
  class ComfyUIQwenImagePipelineBase (line 43) | class ComfyUIQwenImagePipelineBase(LoRAPipeline, ComposedPipelineBase):
    method initialize_pipeline (line 69) | def initialize_pipeline(self, server_args: ServerArgs):
    method load_modules (line 86) | def load_modules(
    method _load_transformer_from_safetensors (line 110) | def _load_transformer_from_safetensors(
    method _prepare_dit_config_and_mapping (line 149) | def _prepare_dit_config_and_mapping(self, server_args: ServerArgs):
    method _instantiate_model (line 198) | def _instantiate_model(
    method _load_weights_into_model (line 258) | def _load_weights_into_model(
    method create_pipeline_stages (line 291) | def create_pipeline_stages(self, server_args: ServerArgs):
  class ComfyUIQwenImagePipeline (line 314) | class ComfyUIQwenImagePipeline(ComfyUIQwenImagePipelineBase):
  class ComfyUIQwenImageEditPipeline (line 329) | class ComfyUIQwenImageEditPipeline(ComfyUIQwenImagePipelineBase):

FILE: python/sglang/multimodal_gen/runtime/pipelines/comfyui_zimage_pipeline.py
  class ComfyUIZImagePipeline (line 46) | class ComfyUIZImagePipeline(LoRAPipeline, ComposedPipelineBase):
    method initialize_pipeline (line 77) | def initialize_pipeline(self, server_args: ServerArgs):
    method load_modules (line 99) | def load_modules(
    method _convert_comfyui_qkv_weights (line 123) | def _convert_comfyui_qkv_weights(
    method _load_transformer_from_safetensors (line 193) | def _load_transformer_from_safetensors(
    method create_pipeline_stages (line 379) | def create_pipeline_stages(self, server_args: ServerArgs):

FILE: python/sglang/multimodal_gen/runtime/pipelines/diffusers_pipeline.py
  class DiffusersExecutionStage (line 47) | class DiffusersExecutionStage(PipelineStage):
    method __init__ (line 50) | def __init__(self, diffusers_pipe: DiffusionPipeline):
    method forward (line 54) | def forward(self, batch: Req, server_args: ServerArgs) -> Req:
    method _filter_pipeline_kwargs (line 84) | def _filter_pipeline_kwargs(
    method _extract_output (line 130) | def _extract_output(self, output: Any) -> torch.Tensor | None:
    method _convert_to_tensor (line 150) | def _convert_to_tensor(self, data: Any) -> torch.Tensor | None:
    method _convert_list_to_tensor (line 174) | def _convert_list_to_tensor(self, data: list) -> torch.Tensor | None:
    method _postprocess_output (line 209) | def _postprocess_output(self, output: torch.Tensor) -> torch.Tensor:
    method _fix_output_shape (line 231) | def _fix_output_shape(self, output: torch.Tensor) -> torch.Tensor:
    method _build_pipeline_kwargs (line 259) | def _build_pipeline_kwargs(self, batch: Req) -> dict[str, Any]:
    method _get_generator_device (line 310) | def _get_generator_device(self, batch: Req) -> str:
    method _load_input_image (line 321) | def _load_input_image(self, batch: Req) -> Image.Image | None:
  class DiffusersPipeline (line 350) | class DiffusersPipeline(ComposedPipelineBase):
    method __init__ (line 362) | def __init__(
    method _load_diffusers_pipeline (line 384) | def _load_diffusers_pipeline(
    method _apply_vae_optimizations (line 481) | def _apply_vae_optimizations(
    method _apply_attention_backend (line 517) | def _apply_attention_backend(
    method _apply_cache_dit (line 565) | def _apply_cache_dit(
    method _apply_torch_compile (line 606) | def _apply_torch_compile(self, pipe: Any, server_args: ServerArgs) -> ...
    method _get_dtype (line 657) | def _get_dtype(self, server_args: ServerArgs) -> torch.dtype:
    method _detect_pipeline_type (line 674) | def _detect_pipeline_type(self) -> None:
    method load_modules (line 684) | def load_modules(
    method create_pipeline_stages (line 692) | def create_pipeline_stages(self, server_args: ServerArgs) -> None:
    method initialize_pipeline (line 699) | def initialize_pipeline(self, server_args: ServerArgs) -> None:
    method post_init (line 702) | def post_init(self) -> None:
    method add_stage (line 710) | def add_stage(self, stage_name: str, stage: PipelineStage) -> None:
    method stages (line 722) | def stages(self) -> list[PipelineStage]:
    method forward (line 727) | def forward(self, batch: Req, server_args: ServerArgs) -> Req:
    method from_pretrained (line 734) | def from_pretrained(
    method get_module (line 758) | def get_module(self, module_name: str, default_value: Any = None) -> Any:

FILE: python/sglang/multimodal_gen/runtime/pipelines/flux.py
  function calculate_shift (line 22) | def calculate_shift(
  function prepare_mu (line 35) | def prepare_mu(batch: Req, server_args: ServerArgs):
  class FluxPipeline (line 54) | class FluxPipeline(LoRAPipeline, ComposedPipelineBase):
    method create_pipeline_stages (line 67) | def create_pipeline_stages(self, server_args: ServerArgs):

FILE: python/sglang/multimodal_gen/runtime/pipelines/flux_2.py
  function compute_empirical_mu (line 16) | def compute_empirical_mu(batch: Req, server_args: ServerArgs):
  class Flux2Pipeline (line 36) | class Flux2Pipeline(LoRAPipeline, ComposedPipelineBase):
    method create_pipeline_stages (line 47) | def create_pipeline_stages(self, server_args: ServerArgs):

FILE: python/sglang/multimodal_gen/runtime/pipelines/flux_2_klein.py
  class Flux2KleinPipeline (line 4) | class Flux2KleinPipeline(Flux2Pipeline):

FILE: python/sglang/multimodal_gen/runtime/pipelines/glm_image.py
  class GlmImagePipeline (line 15) | class GlmImagePipeline(LoRAPipeline, ComposedPipelineBase):
    method create_pipeline_stages (line 28) | def create_pipeline_stages(self, server_args: ServerArgs):

FILE: python/sglang/multimodal_gen/runtime/pipelines/helios_pipeline.py
  class HeliosPipeline (line 28) | class HeliosPipeline(LoRAPipeline, ComposedPipelineBase):
    method initialize_pipeline (line 46) | def initialize_pipeline(self, server_args: ServerArgs):
    method create_pipeline_stages (line 62) | def create_pipeline_stages(self, server_args: ServerArgs) -> None:
  class HeliosPyramidPipeline (line 83) | class HeliosPyramidPipeline(HeliosPipeline):

FILE: python/sglang/multimodal_gen/runtime/pipelines/hunyuan3d_pipeline.py
  class Hunyuan3D2Pipeline (line 45) | class Hunyuan3D2Pipeline(ComposedPipelineBase):
    method _load_config (line 61) | def _load_config(self) -> dict[str, Any]:
    method _resolve_class (line 74) | def _resolve_class(target: str) -> Any:
    method _resolve_shape_dir (line 103) | def _resolve_shape_dir(
    method _resolve_paint_dir (line 154) | def _resolve_paint_dir(model_path: str, subfolder: str) -> str:
    method _load_and_split_checkpoint (line 185) | def _load_and_split_checkpoint(
    method _load_dit_model (line 204) | def _load_dit_model(
    method _load_simple_component (line 247) | def _load_simple_component(
    method _instantiate_component (line 270) | def _instantiate_component(cls, cfg: dict[str, Any]) -> Any:
    method load_modules (line 279) | def load_modules(
    method initialize_pipeline (line 349) | def initialize_pipeline(self, server_args: ServerArgs):
    method create_pipeline_stages (line 357) | def create_pipeline_stages(self, server_args: ServerArgs):

FILE: python/sglang/multimodal_gen/runtime/pipelines/hunyuan_pipeline.py
  class HunyuanVideoPipeline (line 26) | class HunyuanVideoPipeline(ComposedPipelineBase):
    method create_pipeline_stages (line 40) | def create_pipeline_stages(self, server_args: ServerArgs):

FILE: python/sglang/multimodal_gen/runtime/pipelines/ltx_2_pipeline.py
  function calculate_shift (line 28) | def calculate_shift(
  function prepare_mu (line 41) | def prepare_mu(batch: Req, server_args: ServerArgs):
  function _load_component_config (line 69) | def _load_component_config(model_path: str, component_name: str):
  function _filter_kwargs_for_cls (line 98) | def _filter_kwargs_for_cls(cls, kwargs):
  class LTX2FlowMatchScheduler (line 104) | class LTX2FlowMatchScheduler(FlowMatchEulerDiscreteScheduler):
    method _time_shift_exponential (line 107) | def _time_shift_exponential(self, mu, sigma, t):
  class LTX2Pipeline (line 115) | class LTX2Pipeline(ComposedPipelineBase):
    method initialize_pipeline (line 130) | def initialize_pipeline(self, server_args: ServerArgs):
    method create_pipeline_stages (line 134) | def create_pipeline_stages(self, server_args: ServerArgs):

FILE: python/sglang/multimodal_gen/runtime/pipelines/mova_pipeline.py
  class MOVAPipeline (line 29) | class MOVAPipeline(ComposedPipelineBase):
    method initialize_pipeline (line 48) | def initialize_pipeline(self, server_args: ServerArgs) -> None:
    method create_pipeline_stages (line 62) | def create_pipeline_stages(self, server_args: ServerArgs) -> None:
  class MOVAPipelineAlias (line 101) | class MOVAPipelineAlias(MOVAPipeline):

FILE: python/sglang/multimodal_gen/runtime/pipelines/qwen_image.py
  function calculate_shift (line 22) | def calculate_shift(
  function prepare_mu (line 35) | def prepare_mu(batch: Req, server_args: ServerArgs):
  class QwenImagePipeline (line 53) | class QwenImagePipeline(LoRAPipeline, ComposedPipelineBase):
    method create_pipeline_stages (line 64) | def create_pipeline_stages(self, server_args: ServerArgs):
  class QwenImageEditPipeline (line 68) | class QwenImageEditPipeline(LoRAPipeline, ComposedPipelineBase):
    method create_pipeline_stages (line 80) | def create_pipeline_stages(self, server_args: ServerArgs):
  class QwenImageEditPlusPipeline (line 95) | class QwenImageEditPlusPipeline(QwenImageEditPipeline):
  function prepare_mu_layered (line 99) | def prepare_mu_layered(batch: Req, server_args: ServerArgs):
  class QwenImageLayeredPipeline (line 105) | class QwenImageLayeredPipeline(QwenImageEditPipeline):
    method create_pipeline_stages (line 116) | def create_pipeline_stages(self, server_args: ServerArgs):

FILE: python/sglang/multimodal_gen/runtime/pipelines/sana.py
  class SanaPipeline (line 28) | class SanaPipeline(LoRAPipeline, ComposedPipelineBase):
    method create_pipeline_stages (line 39) | def create_pipeline_stages(self, server_args: ServerArgs):

FILE: python/sglang/multimodal_gen/runtime/pipelines/wan_causal_dmd_pipeline.py
  class WanCausalDMDPipeline (line 28) | class WanCausalDMDPipeline(LoRAPipeline, ComposedPipelineBase):
    method create_pipeline_stages (line 39) | def create_pipeline_stages(self, server_args: ServerArgs) -> None:

FILE: python/sglang/multimodal_gen/runtime/pipelines/wan_dmd_pipeline.py
  class WanDMDPipeline (line 32) | class WanDMDPipeline(LoRAPipeline, ComposedPipelineBase):
    method initialize_pipeline (line 47) | def initialize_pipeline(self, server_args: ServerArgs):
    method create_pipeline_stages (line 53) | def create_pipeline_stages(self, server_args: ServerArgs) -> None:

FILE: python/sglang/multimodal_gen/runtime/pipelines/wan_i2v_dmd_pipeline.py
  class WanImageToVideoDmdPipeline (line 25) | class WanImageToVideoDmdPipeline(LoRAPipeline, ComposedPipelineBase):
    method initialize_pipeline (line 38) | def initialize_pipeline(self, server_args: ServerArgs):
    method create_pipeline_stages (line 43) | def create_pipeline_stages(self, server_args: ServerArgs):

FILE: python/sglang/multimodal_gen/runtime/pipelines/wan_i2v_pipeline.py
  class WanImageToVideoPipeline (line 24) | class WanImageToVideoPipeline(LoRAPipeline, ComposedPipelineBase):
    method initialize_pipeline (line 37) | def initialize_pipeline(self, server_args: ServerArgs):
    method create_pipeline_stages (line 42) | def create_pipeline_stages(self, server_args: ServerArgs):

FILE: python/sglang/multimodal_gen/runtime/pipelines/wan_pipeline.py
  class WanPipeline (line 24) | class WanPipeline(LoRAPipeline, ComposedPipelineBase):
    method initialize_pipeline (line 39) | def initialize_pipeline(self, server_args: ServerArgs):
    method create_pipeline_stages (line 45) | def create_pipeline_stages(self, server_args: ServerArgs) -> None:

FILE: python/sglang/multimodal_gen/runtime/pipelines/zimage_pipeline.py
  function calculate_shift (line 15) | def calculate_shift(
  function prepare_mu (line 28) | def prepare_mu(batch: Req, server_args: ServerArgs):
  class ZImagePipeline (line 46) | class ZImagePipeline(LoRAPipeline, ComposedPipelineBase):
    method create_pipeline_stages (line 57) | def create_pipeline_stages(self, server_args: ServerArgs):

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/__init__.py
  class PipelineWithLoRA (line 28) | class PipelineWithLoRA(LoRAPipeline, ComposedPipelineBase):
  function build_pipeline (line 34) | def build_pipeline(

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/composed_pipeline_base.py
  class ComposedPipelineBase (line 47) | class ComposedPipelineBase(ABC):
    method is_lora_effective (line 67) | def is_lora_effective(self):
    method is_lora_set (line 70) | def is_lora_set(self):
    method __init__ (line 73) | def __init__(
    method build_executor (line 106) | def build_executor(self, server_args: ServerArgs):
    method __post_init__ (line 115) | def __post_init__(self) -> None:
    method get_module (line 122) | def get_module(self, module_name: str, default_value: Any = None) -> Any:
    method add_module (line 125) | def add_module(self, module_name: str, module: Any):
    method _load_config (line 128) | def _load_config(self) -> dict[str, Any]:
    method required_config_modules (line 136) | def required_config_modules(self) -> list[str]:
    method stages (line 154) | def stages(self) -> list[PipelineStage]:
    method create_pipeline_stages (line 161) | def create_pipeline_stages(self, server_args: ServerArgs):
    method initialize_pipeline (line 167) | def initialize_pipeline(self, server_args: ServerArgs):
    method _resolve_component_path (line 173) | def _resolve_component_path(
    method load_modules (line 186) | def load_modules(
    method _infer_stage_name (line 335) | def _infer_stage_name(stage: PipelineStage) -> str:
    method add_stage (line 343) | def add_stage(
    method add_stages (line 358) | def add_stages(
    method add_stage_if (line 370) | def add_stage_if(
    method get_stage (line 380) | def get_stage(self, stage_name: str) -> PipelineStage | None:
    method add_standard_text_encoding_stage (line 384) | def add_standard_text_encoding_stage(
    method add_standard_timestep_preparation_stage (line 396) | def add_standard_timestep_preparation_stage(
    method add_standard_latent_preparation_stage (line 408) | def add_standard_latent_preparation_stage(
    method add_standard_denoising_stage (line 420) | def add_standard_denoising_stage(
    method add_standard_decoding_stage (line 446) | def add_standard_decoding_stage(
    method add_standard_t2i_stages (line 459) | def add_standard_t2i_stages(
    method add_standard_ti2i_stages (line 479) | def add_standard_ti2i_stages(
    method add_standard_ti2v_stages (line 529) | def add_standard_ti2v_stages(
    method forward (line 600) | def forward(

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/executors/parallel_executor.py
  class ParallelExecutor (line 29) | class ParallelExecutor(PipelineExecutor):
    method collect_from_main (line 35) | def collect_from_main(self, batches: list[Req]):
    method _execute (line 55) | def _execute(
    method execute (line 95) | def execute(

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/executors/pipeline_executor.py
  class Timer (line 26) | class Timer(StageProfiler):
    method __init__ (line 32) | def __init__(self, name="Stage"):
  class PipelineExecutor (line 38) | class PipelineExecutor(ABC):
    method __init__ (line 46) | def __init__(self, server_args):
    method execute_with_profiling (line 49) | def execute_with_profiling(
    method execute (line 62) | def execute(
    method profile_execution (line 82) | def profile_execution(self, batch: Req, dump_rank: int = 0):

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/executors/sync_executor.py
  class SyncExecutor (line 19) | class SyncExecutor(PipelineExecutor):
    method run_profile_all_stages (line 24) | def run_profile_all_stages(
    method execute (line 40) | def execute(

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/lora_format_adapter.py
  class LoRAFormat (line 13) | class LoRAFormat(str, Enum):
  function _sample_keys (line 25) | def _sample_keys(keys: Iterable[str], k: int = 20) -> list[str]:
  function _has_substring_key (line 34) | def _has_substring_key(keys: Iterable[str], substr: str) -> bool:
  function _has_prefix_key (line 38) | def _has_prefix_key(keys: Iterable[str], prefix: str) -> bool:
  function _looks_like_xlabs_flux_key (line 42) | def _looks_like_xlabs_flux_key(k: str) -> bool:
  function _looks_like_kohya_flux (line 60) | def _looks_like_kohya_flux(state_dict: Mapping[str, torch.Tensor]) -> bool:
  function _looks_like_non_diffusers_sd (line 72) | def _looks_like_non_diffusers_sd(state_dict: Mapping[str, torch.Tensor])...
  function _looks_like_wan_lora (line 82) | def _looks_like_wan_lora(state_dict: Mapping[str, torch.Tensor]) -> bool:
  function _looks_like_qwen_image (line 98) | def _looks_like_qwen_image(state_dict: Mapping[str, torch.Tensor]) -> bool:
  function _looks_like_ai_toolkit_flux_lora (line 108) | def _looks_like_ai_toolkit_flux_lora(state_dict: Mapping[str, torch.Tens...
  function detect_lora_format_from_state_dict (line 134) | def detect_lora_format_from_state_dict(
  function _convert_qwen_image_standard (line 168) | def _convert_qwen_image_standard(
  function _convert_non_diffusers_sd_simple (line 191) | def _convert_non_diffusers_sd_simple(
  function _convert_with_diffusers_utils_if_available (line 221) | def _convert_with_diffusers_utils_if_available(
  function _convert_via_diffusers_candidates (line 253) | def _convert_via_diffusers_candidates(
  function _convert_xlabs_ai_via_diffusers (line 289) | def _convert_xlabs_ai_via_diffusers(
  function _convert_kohya_flux_via_diffusers (line 317) | def _convert_kohya_flux_via_diffusers(
  function _convert_ai_toolkit_flux_lora (line 341) | def _convert_ai_toolkit_flux_lora(
  function convert_lora_state_dict_by_format (line 490) | def convert_lora_state_dict_by_format(
  function normalize_lora_state_dict (line 539) | def normalize_lora_state_dict(

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/lora_pipeline.py
  class LoRAPipeline (line 37) | class LoRAPipeline(ComposedPipelineBase):
    method __init__ (line 70) | def __init__(self, *args, **kwargs) -> None:
    method is_target_layer (line 104) | def is_target_layer(self, module_name: str) -> bool:
    method _get_target_lora_layers (line 111) | def _get_target_lora_layers(
    method _temporarily_disable_offload (line 151) | def _temporarily_disable_offload(
    method convert_module_lora_layers (line 215) | def convert_module_lora_layers(
    method convert_to_lora_layers (line 258) | def convert_to_lora_layers(self) -> None:
    method _normalize_lora_params (line 303) | def _normalize_lora_params(
    method _check_lora_config_matches (line 357) | def _check_lora_config_matches(
    method _apply_lora_to_layers (line 392) | def _apply_lora_to_layers(
    method is_lora_effective (line 493) | def is_lora_effective(self, target: str = "all") -> bool:
    method is_lora_set (line 504) | def is_lora_set(self, target: str = "all") -> bool:
    method load_lora_adapter (line 517) | def load_lora_adapter(self, lora_path: str, lora_nickname: str, rank: ...
    method set_lora (line 586) | def set_lora(
    method merge_lora_weights (line 713) | def merge_lora_weights(self, target: str = "all", strength: float = 1....
    method unmerge_lora_weights (line 767) | def unmerge_lora_weights(self, target: str = "all") -> None:
    method get_lora_status (line 817) | def get_lora_status(self) -> dict[str, Any]:

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/schedule_batch.py
  class Req (line 39) | class Req:
    method __init__ (line 160) | def __init__(self, **kwargs):
    method __getattr__ (line 175) | def __getattr__(self, name: str) -> Any:
    method __setattr__ (line 193) | def __setattr__(self, name: str, value: Any) -> None:
    method batch_size (line 226) | def batch_size(self):
    method output_file_path (line 239) | def output_file_path(self, num_outputs=1, output_idx=None):
    method set_as_warmup (line 249) | def set_as_warmup(self, warmup_steps: int = 1):
    method copy_as_warmup (line 256) | def copy_as_warmup(self, warmup_steps: int = 1) -> "Req":
    method validate (line 261) | def validate(self):
    method adjust_size (line 273) | def adjust_size(self, server_args: ServerArgs):
    method __str__ (line 276) | def __str__(self):
    method log (line 279) | def log(self, server_args: ServerArgs):
  class OutputBatch (line 323) | class OutputBatch:

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/base.py
  class StageParallelismType (line 28) | class StageParallelismType(Enum):
  class StageVerificationError (line 37) | class StageVerificationError(Exception):
  class PipelineStage (line 43) | class PipelineStage(ABC):
    method __init__ (line 52) | def __init__(self):
    method log_info (line 55) | def log_info(self, msg, *args):
    method log_warning (line 61) | def log_warning(self, msg, *args):
    method log_error (line 65) | def log_error(self, msg, *args):
    method log_debug (line 69) | def log_debug(self, msg, *args):
    method verify_input (line 73) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific...
    method maybe_free_model_hooks (line 91) | def maybe_free_model_hooks(self):
    method load_model (line 94) | def load_model(self):
    method offload_model (line 100) | def offload_model(self):
    method parallelism_type (line 108) | def parallelism_type(self) -> StageParallelismType:
    method verify_output (line 113) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi...
    method _run_verification (line 125) | def _run_verification(
    method device (line 154) | def device(self) -> torch.device:
    method set_logging (line 160) | def set_logging(self, enable: bool):
    method __call__ (line 169) | def __call__(
    method forward (line 216) | def forward(
    method backward (line 234) | def backward(

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/causal_denoising.py
  class CausalDMDDenoisingStage (line 26) | class CausalDMDDenoisingStage(DenoisingStage):
    method __init__ (line 31) | def __init__(self, transformer, scheduler) -> None:
    method forward (line 52) | def forward(
    method _initialize_kv_cache (line 391) | def _initialize_kv_cache(self, batch_size, dtype, device) -> None:
    method _initialize_crossattn_cache (line 437) | def _initialize_crossattn_cache(
    method verify_input (line 474) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific...

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/comfyui_latent_preparation.py
  class ComfyUILatentPreparationStage (line 26) | class ComfyUILatentPreparationStage(LatentPreparationStage):
    method _fix_tensor_device (line 35) | def _fix_tensor_device(value, target_device):
    method _has_tensor (line 54) | def _has_tensor(value):
    method forward (line 62) | def forward(self, batch, server_args):

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/decoding.py
  function _ensure_tensor_decode_output (line 31) | def _ensure_tensor_decode_output(decode_output):
  class DecodingStage (line 51) | class DecodingStage(PipelineStage):
    method __init__ (line 59) | def __init__(self, vae, pipeline=None, component_name: str = "vae") ->...
    method parallelism_type (line 66) | def parallelism_type(self) -> StageParallelismType:
    method verify_input (line 71) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific...
    method verify_output (line 78) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi...
    method scale_and_shift (line 85) | def scale_and_shift(self, latents: torch.Tensor, server_args):
    method decode (line 107) | def decode(self, latents: torch.Tensor, server_args: ServerArgs) -> to...
    method load_model (line 157) | def load_model(self):
    method offload_model (line 172) | def offload_model(self):
    method forward (line 189) | def forward(

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/decoding_av.py
  class LTX2AVDecodingStage (line 14) | class LTX2AVDecodingStage(DecodingStage):
    method __init__ (line 19) | def __init__(self, vae, audio_vae, vocoder, pipeline=None):
    method forward (line 28) | def forward(self, batch: Req, server_args: ServerArgs) -> OutputBatch:

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/denoising.py
  class DenoisingStage (line 87) | class DenoisingStage(PipelineStage):
    method __init__ (line 95) | def __init__(
    method _maybe_enable_torch_compile (line 132) | def _maybe_enable_torch_compile(self, module: object) -> None:
    method _maybe_enable_cache_dit (line 164) | def _maybe_enable_cache_dit(
    method _build_guidance (line 353) | def _build_guidance(self, batch_size, target_dtype, device, guidance_v...
    method get_or_build_guidance (line 365) | def get_or_build_guidance(self, bsz: int, dtype, device):
    method parallelism_type (line 381) | def parallelism_type(self) -> StageParallelismType:
    method _preprocess_latents_for_ti2v (line 385) | def _preprocess_latents_for_ti2v(
    method _postprocess_latents_for_ti2v (line 444) | def _postprocess_latents_for_ti2v(self, z, reserved_frames_masks, batch):
    method _handle_boundary_ratio (line 497) | def _handle_boundary_ratio(
    method _prepare_denoising_loop (line 521) | def _prepare_denoising_loop(self, batch: Req, server_args: ServerArgs):
    method _post_denoising_loop (line 704) | def _post_denoising_loop(
    method _preprocess_sp_latents (line 787) | def _preprocess_sp_latents(self, batch: Req, server_args: ServerArgs):
    method _postprocess_sp_latents (line 808) | def _postprocess_sp_latents(
    method step_profile (line 832) | def step_profile(self):
    method _manage_device_placement (line 837) | def _manage_device_placement(
    method _select_and_manage_model (line 867) | def _select_and_manage_model(
    method expand_timestep_before_forward (line 890) | def expand_timestep_before_forward(
    method post_forward_for_ti2v_task (line 943) | def post_forward_for_ti2v_task(
    method forward (line 971) | def forward(
    method prepare_extra_func_kwargs (line 1135) | def prepare_extra_func_kwargs(self, func, kwargs) -> dict[str, Any]:
    method progress_bar (line 1159) | def progress_bar(
    method rescale_noise_cfg (line 1169) | def rescale_noise_cfg(
    method _build_attn_metadata (line 1198) | def _build_attn_metadata(
    method _predict_noise (line 1339) | def _predict_noise(
    method _predict_noise_with_cfg (line 1355) | def _predict_noise_with_cfg(
    method prepare_sta_param (line 1514) | def prepare_sta_param(self, batch: Req, server_args: ServerArgs):
    method save_sta_search_results (line 1628) | def save_sta_search_results(self, batch: Req):
    method verify_input (line 1666) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific...
    method verify_output (line 1695) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi...

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/denoising_av.py
  class LTX2AVDenoisingStage (line 38) | class LTX2AVDenoisingStage(DenoisingStage):
    method __init__ (line 43) | def __init__(self, transformer, scheduler, vae=None, audio_vae=None, *...
    method _get_video_latent_num_frames_for_model (line 50) | def _get_video_latent_num_frames_for_model(
    method _truncate_sp_padded_token_latents (line 76) | def _truncate_sp_padded_token_latents(
    method _maybe_enable_cache_dit (line 100) | def _maybe_enable_cache_dit(self, num_inference_steps: int, batch: Req...
    method _resize_center_crop (line 110) | def _resize_center_crop(
    method _apply_video_codec_compression (line 116) | def _apply_video_codec_compression(
    method _resize_center_crop_tensor (line 143) | def _resize_center_crop_tensor(
    method _pil_to_normed_tensor (line 176) | def _pil_to_normed_tensor(img: PIL.Image.Image) -> torch.Tensor:
    method _should_apply_ltx2_ti2v (line 183) | def _should_apply_ltx2_ti2v(batch: Req) -> bool:
    method _prepare_ltx2_image_latent (line 199) | def _prepare_ltx2_image_latent(self, batch: Req, server_args: ServerAr...
    method forward (line 314) | def forward(self, batch: Req, server_args: ServerArgs) -> Req:
    method _post_denoising_loop (line 648) | def _post_denoising_loop(
    method verify_input (line 712) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific...
    method do_classifier_free_guidance (line 755) | def do_classifier_free_guidance(self, batch: Req) -> bool:
  class LTX2RefinementStage (line 759) | class LTX2RefinementStage(LTX2AVDenoisingStage):
    method __init__ (line 760) | def __init__(
    method forward (line 766) | def forward(self, batch: Req, server_args: ServerArgs) -> Req:
    method do_classifier_free_guidance (line 795) | def do_classifier_free_guidance(self, batch: Req) -> bool:

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/denoising_dmd.py
  class DmdDenoisingStage (line 24) | class DmdDenoisingStage(DenoisingStage):
    method __init__ (line 29) | def __init__(self, transformer, scheduler, transformer_2=None) -> None:
    method _preprocess_sp_latents (line 35) | def _preprocess_sp_latents(self, batch: Req, server_args: ServerArgs):
    method _postprocess_sp_latents (line 45) | def _postprocess_sp_latents(
    method forward (line 58) | def forward(
    method _select_and_manage_model (line 223) | def _select_and_manage_model(
    method _manage_device_placement (line 246) | def _manage_device_placement(
    method _handle_boundary_ratio (line 272) | def _handle_boundary_ratio(

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/encoding.py
  class EncodingStage (line 28) | class EncodingStage(PipelineStage):
    method __init__ (line 36) | def __init__(self, vae: ParallelTiledVAE) -> None:
    method verify_input (line 41) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific...
    method verify_output (line 48) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi...
    method forward (line 55) | def forward(

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/hunyuan3d_paint.py
  function guidance_scale_embedding (line 41) | def guidance_scale_embedding(
  function extract_into_tensor (line 59) | def extract_into_tensor(
  function get_predicted_original_sample (line 70) | def get_predicted_original_sample(
  function get_predicted_noise (line 99) | def get_predicted_noise(
  function to_rgb_image (line 128) | def to_rgb_image(maybe_rgba):
  class DDIMSolver (line 145) | class DDIMSolver:
    method __init__ (line 148) | def __init__(
    method to (line 166) | def to(self, device: torch.device) -> "DDIMSolver":
    method ddim_step (line 172) | def ddim_step(
  function _recorrect_rgb (line 187) | def _recorrect_rgb(
  class Hunyuan3DPaintPreprocessStage (line 229) | class Hunyuan3DPaintPreprocessStage(PipelineStage):
    method parallelism_type (line 237) | def parallelism_type(self) -> StageParallelismType:
    method __init__ (line 240) | def __init__(self, config: Hunyuan3D2PipelineConfig) -> None:
    method _do_uv_unwrap (line 250) | def _do_uv_unwrap(self, batch: Req, server_args: ServerArgs) -> Req:
    method _load_delight_model (line 272) | def _load_delight_model(self, server_args: ServerArgs):
    method _run_delight (line 322) | def _run_delight(self, image):
    method _do_delight (line 369) | def _do_delight(self, batch: Req, server_args: ServerArgs) -> Req:
    method _init_renderer (line 395) | def _init_renderer(self):
    method _render_multiview (line 408) | def _render_multiview(self, mesh) -> tuple:
    method forward (line 424) | def forward(self, batch: Req, server_args: ServerArgs) -> Req:
    method verify_input (line 467) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific...
    method verify_output (line 473) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi...
  class Hunyuan3DPaintTexGenStage (line 486) | class Hunyuan3DPaintTexGenStage(PipelineStage):
    method __init__ (line 487) | def __init__(
    method parallelism_type (line 512) | def parallelism_type(self) -> StageParallelismType:
    method _load_paint_models (line 515) | def _load_paint_models(self, server_args: ServerArgs) -> None:
    method _do_load_paint (line 533) | def _do_load_paint(self, server_args: ServerArgs) -> None:
    method _convert_pil_list_to_tensor (line 594) | def _convert_pil_list_to_tensor(
    method _encode_images (line 623) | def _encode_images(self, images: torch.Tensor) -> torch.Tensor:
    method _compute_camera_index (line 633) | def _compute_camera_index(azim: float, elev: float) -> int:
    method _prepare_denoising_inputs (line 649) | def _prepare_denoising_inputs(
    method _denoise_loop (line 816) | def _denoise_loop(
    method _decode_latents (line 885) | def _decode_latents(self, latents: torch.Tensor) -> list:
    method forward (line 891) | def forward(self, batch: Req, server_args: ServerArgs) -> Req:
    method verify_input (line 948) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific...
    method verify_output (line 961) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi...
  class Hunyuan3DPaintPostprocessStage (line 970) | class Hunyuan3DPaintPostprocessStage(PipelineStage):
    method parallelism_type (line 974) | def parallelism_type(self) -> StageParallelismType:
    method __init__ (line 977) | def __init__(self, config: Hunyuan3D2PipelineConfig) -> None:
    method forward (line 981) | def forward(self, batch: Req, server_args: ServerArgs) -> OutputBatch:
    method _cleanup_obj_artifacts (line 1035) | def _cleanup_obj_artifacts(obj_path: str) -> None:
    method verify_input (line 1048) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific...

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/hunyuan3d_shape.py
  function retrieve_timesteps (line 39) | def retrieve_timesteps(
  function _prepare_shape_image (line 84) | def _prepare_shape_image(image_processor, image, mask=None) -> dict:
  function _move_to_device (line 106) | def _move_to_device(payload, device, dtype):
  class Hunyuan3DShapeBeforeDenoisingStage (line 117) | class Hunyuan3DShapeBeforeDenoisingStage(PipelineStage):
    method __init__ (line 124) | def __init__(
    method _validate_input (line 141) | def _validate_input(self, batch: Req, server_args: ServerArgs) -> None:
    method _prepare_latents (line 157) | def _prepare_latents(self, batch_size, dtype, device, generator):
    method forward (line 164) | def forward(self, batch: Req, server_args: ServerArgs) -> Req:
    method verify_input (line 229) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific...
    method verify_output (line 237) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi...
  class Hunyuan3DShapeDenoisingStage (line 245) | class Hunyuan3DShapeDenoisingStage(DenoisingStage):
    method __init__ (line 248) | def __init__(self, transformer: Any, scheduler: Any, **kwargs) -> None:
    method _prepare_denoising_loop (line 251) | def _prepare_denoising_loop(self, batch: Req, server_args: ServerArgs):
    method _predict_noise (line 321) | def _predict_noise(
    method _predict_noise_with_cfg (line 335) | def _predict_noise_with_cfg(
    method verify_input (line 385) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific...
    method verify_output (line 396) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi...
  class Hunyuan3DShapeExportStage (line 402) | class Hunyuan3DShapeExportStage(PipelineStage):
    method __init__ (line 405) | def __init__(self, vae: Any, config: Hunyuan3D2PipelineConfig) -> None:
    method forward (line 410) | def forward(self, batch: Req, server_args: ServerArgs) -> Req:
    method verify_input (line 449) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific...
    method verify_output (line 454) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi...
  class Hunyuan3DShapeSaveStage (line 460) | class Hunyuan3DShapeSaveStage(PipelineStage):
    method __init__ (line 463) | def __init__(self, config: Hunyuan3D2PipelineConfig) -> None:
    method _get_output_paths (line 467) | def _get_output_paths(self, batch: Req) -> tuple[str, str]:
    method forward (line 478) | def forward(self, batch: Req, server_args: ServerArgs) -> Req | Output...
    method verify_input (line 515) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific...

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/image_encoding.py
  class ImageEncodingStage (line 44) | class ImageEncodingStage(PipelineStage):
    method __init__ (line 52) | def __init__(
    method load_model (line 69) | def load_model(self):
    method offload_model (line 74) | def offload_model(self):
    method move_to_device (line 78) | def move_to_device(self, device):
    method encoding_qwen_image_edit (line 90) | def encoding_qwen_image_edit(self, outputs, image_inputs):
    method forward (line 96) | def forward(
    method verify_input (line 209) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific...
    method verify_output (line 219) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi...
  class ImageVAEEncodingStage (line 226) | class ImageVAEEncodingStage(PipelineStage):
    method __init__ (line 234) | def __init__(self, vae: ParallelTiledVAE, **kwargs) -> None:
    method load_model (line 238) | def load_model(self):
    method offload_model (line 241) | def offload_model(self):
    method forward (line 245) | def forward(
    method retrieve_latents (line 370) | def retrieve_latents(
    method preprocess (line 383) | def preprocess(
    method verify_input (line 400) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific...
    method verify_output (line 419) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi...

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/input_validation.py
  class InputValidationStage (line 37) | class InputValidationStage(PipelineStage):
    method __init__ (line 47) | def __init__(self, vae_image_processor=None):
    method _calculate_dimensions_from_area (line 52) | def _calculate_dimensions_from_area(
    method _generate_seeds (line 70) | def _generate_seeds(self, batch: Req, server_args: ServerArgs):
    method preprocess_condition_image (line 92) | def preprocess_condition_image(
    method forward (line 233) | def forward(
    method verify_input (line 333) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific...
    method verify_output (line 365) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi...

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/latent_preparation.py
  class LatentPreparationStage (line 25) | class LatentPreparationStage(PipelineStage):
    method __init__ (line 33) | def __init__(self, scheduler, transformer) -> None:
    method forward (line 38) | def forward(
    method adjust_video_length (line 107) | def adjust_video_length(self, batch: Req, server_args: ServerArgs) -> ...
    method verify_input (line 124) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific...
    method verify_output (line 144) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi...

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/latent_preparation_av.py
  class LTX2AVLatentPreparationStage (line 21) | class LTX2AVLatentPreparationStage(LatentPreparationStage):
    method __init__ (line 26) | def __init__(self, scheduler, transformer=None, audio_vae=None):
    method verify_input (line 30) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific...
    method forward (line 56) | def forward(self, batch: Req, server_args: ServerArgs) -> Req:

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/model_specific_stages/glm_image.py
  function calculate_shift (line 25) | def calculate_shift(
  function retrieve_timesteps (line 36) | def retrieve_timesteps(
  function retrieve_latents (line 91) | def retrieve_latents(
  class GlmImageBeforeDenoisingStage (line 106) | class GlmImageBeforeDenoisingStage(PipelineStage):
    method __init__ (line 130) | def __init__(
    method _parse_and_expand_shape_info (line 165) | def _parse_and_expand_shape_info(
    method _build_image_grid_thw (line 196) | def _build_image_grid_thw(
    method _calculate_ar_generation_params (line 227) | def _calculate_ar_generation_params(
    method _extract_large_image_tokens (line 250) | def _extract_large_image_tokens(
    method _upsample_d32_to_d16 (line 265) | def _upsample_d32_to_d16(
    method _compute_generation_params (line 296) | def _compute_generation_params(
    method _upsample_token_ids (line 320) | def _upsample_token_ids(
    method generate_prior_tokens (line 330) | def generate_prior_tokens(
    method get_glyph_texts (line 410) | def get_glyph_texts(self, prompt):
    method _get_glyph_embeds (line 420) | def _get_glyph_embeds(
    method encode_prompt (line 461) | def encode_prompt(
    method prepare_latents (line 536) | def prepare_latents(
    method check_inputs (line 561) | def check_inputs(
    method guidance_scale (line 605) | def guidance_scale(self):
    method do_classifier_free_guidance (line 609) | def do_classifier_free_guidance(self):
    method num_timesteps (line 613) | def num_timesteps(self):
    method attention_kwargs (line 617) | def attention_kwargs(self):
    method current_timestep (line 621) | def current_timestep(self):
    method interrupt (line 625) | def interrupt(self):
    method forward (line 629) | def forward(

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/model_specific_stages/helios_decoding.py
  class HeliosDecodingStage (line 21) | class HeliosDecodingStage(DecodingStage):
    method forward (line 36) | def forward(

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/model_specific_stages/helios_denoising.py
  function optimized_scale (line 31) | def optimized_scale(positive_flat, negative_flat):
  function calculate_shift (line 40) | def calculate_shift(
  function sample_block_noise (line 53) | def sample_block_noise(
  class HeliosChunkedDenoisingStage (line 80) | class HeliosChunkedDenoisingStage(PipelineStage):
    method __init__ (line 89) | def __init__(self, transformer, scheduler):
    method parallelism_type (line 95) | def parallelism_type(self):
    method _denoise_one_chunk (line 98) | def _denoise_one_chunk(
    method _denoise_one_chunk_stage2 (line 226) | def _denoise_one_chunk_stage2(
    method forward (line 442) | def forward(self, batch: Req, server_args: ServerArgs) -> Req:

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/model_specific_stages/mova.py
  class MOVALatentPreparationStage (line 75) | class MOVALatentPreparationStage(PipelineStage):
    method __init__ (line 78) | def __init__(self, audio_vae, require_vae_embedding: bool = True) -> N...
    method forward (line 83) | def forward(self, batch: Req, server_args: ServerArgs) -> Req:
  class MOVATimestepPreparationStage (line 121) | class MOVATimestepPreparationStage(PipelineStage):
    method __init__ (line 124) | def __init__(self, scheduler) -> None:
    method forward (line 128) | def forward(self, batch: Req, server_args: ServerArgs) -> Req:
  class MOVADenoisingStage (line 145) | class MOVADenoisingStage(PipelineStage):
    method __init__ (line 148) | def __init__(self, video_dit, video_dit_2, audio_dit, dual_tower_bridg...
    method parallelism_type (line 160) | def parallelism_type(self) -> StageParallelismType:
    method _predict (line 165) | def _predict(
    method _cfg_combine (line 196) | def _cfg_combine(self, pos, neg, guidance_scale, cfg_rank, enable_cfg_...
    method _maybe_enable_torch_compile (line 205) | def _maybe_enable_torch_compile(self, module: nn.Module, server_args: ...
    method _maybe_compile_dits (line 238) | def _maybe_compile_dits(self, server_args: ServerArgs):
    method verify_input (line 245) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific...
    method verify_output (line 272) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi...
    method progress_bar (line 279) | def progress_bar(
    method step_profile (line 289) | def step_profile(self):
    method rescale_noise_cfg (line 294) | def rescale_noise_cfg(
    method prepare_extra_func_kwargs (line 313) | def prepare_extra_func_kwargs(self, func, kwargs) -> dict[str, object]:
    method _build_attn_metadata (line 324) | def _build_attn_metadata(
    method _manage_device_placement (line 329) | def _manage_device_placement(
    method _select_visual_dit (line 350) | def _select_visual_dit(
    method _ensure_shared_models_on_device (line 368) | def _ensure_shared_models_on_device(self, server_args: ServerArgs):
    method _apply_guidance_rescale (line 373) | def _apply_guidance_rescale(
    method forward (line 400) | def forward(self, batch: Req, server_args: ServerArgs) -> Req:
    method _shard_sequence_for_sp (line 596) | def _shard_sequence_for_sp(
    method _gather_sequence_from_sp (line 632) | def _gather_sequence_from_sp(
    method inference_single_step (line 657) | def inference_single_step(
    method forward_dual_tower_dit (line 787) | def forward_dual_tower_dit(
  class MOVADecodingStage (line 882) | class MOVADecodingStage(PipelineStage):
    method __init__ (line 885) | def __init__(self, video_vae, audio_vae) -> None:
    method parallelism_type (line 891) | def parallelism_type(self) -> StageParallelismType:
    method forward (line 897) | def forward(self, batch: Req, server_args: ServerArgs) -> OutputBatch:

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/model_specific_stages/qwen_image_layered.py
  function calculate_dimensions (line 22) | def calculate_dimensions(target_area, ratio):
  function retrieve_latents (line 33) | def retrieve_latents(
  function retrieve_timesteps (line 47) | def retrieve_timesteps(
  class QwenImageLayeredBeforeDenoisingStage (line 112) | class QwenImageLayeredBeforeDenoisingStage(PipelineStage):
    method __init__ (line 113) | def __init__(
    method _extract_masked_hidden (line 162) | def _extract_masked_hidden(self, hidden_states: torch.Tensor, mask: to...
    method get_image_caption (line 170) | def get_image_caption(self, prompt_image, use_en_prompt=True, device=N...
    method _get_qwen_prompt_embeds (line 196) | def _get_qwen_prompt_embeds(
    method _pack_latents (line 247) | def _pack_latents(latents, batch_size, num_channels_latents, height, w...
    method encode_prompt (line 259) | def encode_prompt(
    method _encode_vae_image (line 299) | def _encode_vae_image(self, image: torch.Tensor, generator: torch.Gene...
    method prepare_latents (line 329) | def prepare_latents(
    method forward (line 410) | def forward(

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/text_connector.py
  class LTX2TextConnectorStage (line 9) | class LTX2TextConnectorStage(PipelineStage):
    method __init__ (line 15) | def __init__(self, connectors):
    method forward (line 19) | def forward(self, batch: Req, server_args: ServerArgs) -> Req:

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/text_encoding.py
  class TextEncodingStage (line 31) | class TextEncodingStage(PipelineStage):
    method __init__ (line 39) | def __init__(self, text_encoders, tokenizers) -> None:
    method forward (line 49) | def forward(
    method verify_input (line 110) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific...
    method prepare_tokenizer_kwargs (line 130) | def prepare_tokenizer_kwargs(self, tokenizer_kwargs, **kwargs):
    method encode_text (line 136) | def encode_text(
    method verify_output (line 317) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi...

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/timestep_preparation.py
  class TimestepPreparationStage (line 33) | class TimestepPreparationStage(PipelineStage):
    method __init__ (line 41) | def __init__(
    method parallelism_type (line 55) | def parallelism_type(self) -> StageParallelismType:
    method forward (line 58) | def forward(
    method verify_input (line 140) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific...
    method verify_output (line 151) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi...

FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/validators.py
  class StageValidators (line 17) | class StageValidators:
    method not_none (line 21) | def not_none(value: Any) -> bool:
    method positive_int (line 26) | def positive_int(value: Any) -> bool:
    method non_negative_int (line 31) | def non_negative_int(value: Any) -> bool:
    method positive_float (line 36) | def positive_float(value: Any) -> bool:
    method non_negative_float (line 41) | def non_negative_float(value: Any) -> bool:
    method divisible_by (line 46) | def divisible_by(value: Any, divisor: int) -> bool:
    method is_tensor (line 51) | def is_tensor(value: Any) -> bool:
    method tensor_with_dims (line 58) | def tensor_with_dims(value: Any, dims: int) -> bool:
    method tensor_min_dims (line 67) | def tensor_min_dims(value: Any, min_dims: int) -> bool:
    method tensor_shape_matches (line 76) | def tensor_shape_matches(value: Any, expected_shape: tuple) -> bool:
    method list_not_empty (line 88) | def list_not_empty(value: Any) -> bool:
    method list_length (line 93) | def list_length(value: Any, length: int) -> bool:
    method list_min_length (line 98) | def list_min_length(value: Any, min_length: int) -> bool:
    method string_not_empty (line 103) | def string_not_empty(value: Any) -> bool:
    method string_not_none (line 108) | def string_not_none(value: Any) -> bool:
    method string_or_list_strings (line 113) | def string_or_list_strings(value: Any) -> bool:
    method bool_value (line 122) | def bool_value(value: Any) -> bool:
    method generator_or_list_generators (line 127) | def generator_or_list_generators(value: Any) -> bool:
    method is_list (line 136) | def is_list(value: Any) -> bool:
    method is_tuple (line 141) | def is_tuple(value: Any) -> bool:
    method none_or_tensor (line 146) | def none_or_tensor(value: Any) -> bool:
    method list_of_tensors_with_dims (line 155) | def list_of_tensors_with_dims(value: Any, dims: int) -> bool:
    method list_of_tensors (line 169) | def list_of_tensors(value: Any) -> bool:
    method list_of_tensors_with_min_dims (line 181) | def list_of_tensors_with_min_dims(value: Any, min_dims: int) -> bool:
    method none_or_tensor_with_dims (line 195) | def none_or_tensor_with_dims(dims: int) -> Callable[[Any], bool]:
    method none_or_list (line 210) | def none_or_list(value: Any) -> bool:
    method none_or_positive_int (line 215) | def none_or_positive_int(value: Any) -> bool:
    method with_dims (line 221) | def with_dims(dims: int) -> Callable[[Any], bool]:
    method min_dims (line 230) | def min_dims(min_dims: int) -> Callable[[Any], bool]:
    method divisible (line 239) | def divisible(divisor: int) -> Callable[[Any], bool]:
    method positive_int_divisible (line 248) | def positive_int_divisible(divisor: int) -> Callable[[Any], bool]:
    method list_of_tensors_dims (line 261) | def list_of_tensors_dims(dims: int) -> Callable[[Any], bool]:
    method list_of_tensors_min_dims (line 270) | def list_of_tensors_min_dims(min_dims: int) -> Callable[[Any], bool]:
  class ValidationFailure (line 279) | class ValidationFailure:
    method __init__ (line 282) | def __init__(
    method __str__ (line 294) | def __str__(self) -> str:
    method _format_value (line 309) | def _format_value(self, value: Any) -> str:
  class VerificationResult (line 332) | class VerificationResult:
    method __init__ (line 335) | def __init__(self) -> None:
    method add_check (line 339) | def add_check(
    method _create_validation_failure (line 398) | def _create_validation_failure(
    method is_valid (line 490) | def is_valid(self) -> bool:
    method get_failed_fields (line 494) | def get_failed_fields(self) -> list[str]:
    method get_detailed_failures (line 498) | def get_detailed_failures(self) -> dict[str, list[ValidationFailure]]:
    method get_failure_summary (line 502) | def get_failure_summary(self) -> str:
    method to_dict (line 516) | def to_dict(self) -> dict:

FILE: python/sglang/multimodal_gen/runtime/platforms/__init__.py
  function cuda_platform_plugin (line 21) | def cuda_platform_plugin() -> str | None:
  function mps_platform_plugin (line 61) | def mps_platform_plugin() -> str | None:
  function cpu_platform_plugin (line 77) | def cpu_platform_plugin() -> str | None:
  function rocm_platform_plugin (line 83) | def rocm_platform_plugin() -> str | None:
  function npu_platform_plugin (line 104) | def npu_platform_plugin() -> str | None:
  function musa_platform_plugin (line 122) | def musa_platform_plugin() -> str | None:
  function resolve_current_platform_cls_qualname (line 151) | def resolve_current_platform_cls_qualname() -> str:
  function __getattr__ (line 194) | def __getattr__(name: str):

FILE: python/sglang/multimodal_gen/runtime/platforms/cpu.py
  class CpuPlatform (line 20) | class CpuPlatform(Platform):
    method get_cpu_architecture (line 27) | def get_cpu_architecture(cls) -> CpuArchEnum:
    method get_device_name (line 38) | def get_device_name(cls, device_id: int = 0) -> str:
    method get_device_uuid (line 42) | def get_device_uuid(cls, device_id: int = 0) -> str:
    method get_device_total_memory (line 47) | def get_device_total_memory(cls, device_id: int = 0) -> int:
    method is_async_output_supported (line 52) | def is_async_output_supported(cls, enforce_eager: bool | None) -> bool:
    method get_current_memory_usage (line 56) | def get_current_memory_usage(
    method get_available_gpu_memory (line 63) | def get_available_gpu_memory(
    method get_device_communicator_cls (line 87) | def get_device_communicator_cls(cls) -> str:

FILE: python/sglang/multimodal_gen/runtime/platforms/cuda.py
  function device_id_to_physical_device_id (line 40) | def device_id_to_physical_device_id(device_id: int) -> int:
  function with_nvml_context (line 59) | def with_nvml_context(fn: Callable[_P, _R]) -> Callable[_P, _R]:
  class CudaPlatformBase (line 71) | class CudaPlatformBase(Platform):
    method get_local_torch_device (line 79) | def get_local_torch_device(cls) -> torch.device:
    method get_device_capability (line 83) | def get_device_capability(cls, device_id: int = 0) -> DeviceCapability...
    method get_device_name (line 87) | def get_device_name(cls, device_id: int = 0) -> str:
    method get_device_total_memory (line 92) | def get_device_total_memory(cls, device_id: int = 0) -> int:
    method is_async_output_supported (line 96) | def is_async_output_supported(cls, enforce_eager: bool | None) -> bool:
    method is_full_nvlink (line 107) | def is_full_nvlink(cls, device_ids: list[int]) -> bool:
    method log_warnings (line 111) | def log_warnings(cls) -> None:
    method get_current_memory_usage (line 115) | def get_current_memory_usage(
    method get_available_gpu_memory (line 122) | def get_available_gpu_memory(
    method get_attn_backend_cls_str (line 151) | def get_attn_backend_cls_str(
    method get_device_communicator_cls (line 382) | def get_device_communicator_cls(cls) -> str:
  class NvmlCudaPlatform (line 390) | class NvmlCudaPlatform(CudaPlatformBase):
    method get_device_capability (line 394) | def get_device_capability(cls, device_id: int = 0) -> DeviceCapability...
    method has_device_capability (line 406) | def has_device_capability(
    method get_device_name (line 419) | def get_device_name(cls, device_id: int = 0) -> str:
    method get_device_uuid (line 426) | def get_device_uuid(cls, device_id: int = 0) -> str:
    method get_device_total_memory (line 434) | def get_device_total_memory(cls, device_id: int = 0) -> int:
    method is_full_nvlink (line 441) | def is_full_nvlink(cls, physical_device_ids: list[int]) -> bool:
    method _get_physical_device_name (line 466) | def _get_physical_device_name(cls, device_id: int = 0) -> str:
    method log_warnings (line 472) | def log_warnings(cls) -> None:
  class NonNvmlCudaPlatform (line 488) | class NonNvmlCudaPlatform(CudaPlatformBase):
    method get_device_capability (line 490) | def get_device_capability(cls, device_id: int = 0) -> DeviceCapability:
    method get_device_name (line 495) | def get_device_name(cls, device_id: int = 0) -> str:
    method get_device_total_memory (line 500) | def get_device_total_memory(cls, device_id: int = 0) -> int:
    method is_full_nvlink (line 505) | def is_full_nvlink(cls, physical_device_ids: list[int]) -> bool:

FILE: python/sglang/multimodal_gen/runtime/platforms/interface.py
  class AttentionBackendEnum (line 26) | class AttentionBackendEnum(enum.Enum):
    method __str__ (line 42) | def __str__(self):
    method is_sparse (line 46) | def is_sparse(self) -> bool:
  class PlatformEnum (line 57) | class PlatformEnum(enum.Enum):
  class CpuArchEnum (line 69) | class CpuArchEnum(enum.Enum):
  class DeviceCapability (line 75) | class DeviceCapability(NamedTuple):
    method as_version_str (line 79) | def as_version_str(self) -> str:
    method to_int (line 82) | def to_int(self) -> int:
  class Platform (line 92) | class Platform:
    method is_cuda (line 113) | def is_cuda(self) -> bool:
    method is_npu (line 117) | def is_npu(self) -> bool:
    method is_rocm (line 121) | def is_rocm(self) -> bool:
    method is_tpu (line 125) | def is_tpu(self) -> bool:
    method is_cpu (line 129) | def is_cpu(self) -> bool:
    method is_blackwell (line 134) | def is_blackwell(cls):
    method is_hopper (line 141) | def is_hopper(cls):
    method is_sm120 (line 148) | def is_sm120(cls):
    method is_cuda_static (line 154) | def is_cuda_static(cls) -> bool:
    method is_rocm_static (line 158) | def is_rocm_static(cls) -> bool:
    method is_hpu (line 162) | def is_hpu(self) -> bool:
    method is_xpu (line 166) | def is_xpu(self) -> bool:
    method is_npu (line 170) | def is_npu(self) -> bool:
    method is_out_of_tree (line 173) | def is_out_of_tree(self) -> bool:
    method is_cuda_alike (line 177) | def is_cuda_alike(self) -> bool:
    method is_mps (line 182) | def is_mps(self) -> bool:
    method is_musa (line 186) | def is_musa(self):
    method is_hip (line 193) | def is_hip(self) -> bool:
    method is_amp_supported (line 198) | def is_amp_supported(cls) -> bool:
    method get_local_torch_device (line 202) | def get_local_torch_device(cls) -> torch.device:
    method get_attn_backend_cls_str (line 206) | def get_attn_backend_cls_str(
    method get_device_capability (line 216) | def get_device_capability(
    method has_device_capability (line 224) | def has_device_capability(
    method get_device_name (line 247) | def get_device_name(cls, device_id: int = 0) -> str:
    method get_device_uuid (line 252) | def get_device_uuid(cls, device_id: int = 0) -> str:
    method get_device_total_memory (line 258) | def get_device_total_memory(cls, device_id: int = 0) -> int:
    method get_device (line 263) | def get_device(self, local_rank: int) -> torch.device:
    method get_torch_distributed_backend_str (line 276) | def get_torch_distributed_backend_str(self) -> str:
    method is_async_output_supported (line 291) | def is_async_output_supported(cls, enforce_eager: bool | None) -> bool:
    method inference_mode (line 298) | def inference_mode(cls):
    method seed_everything (line 308) | def seed_everything(cls, seed: int | None = None) -> None:
    method verify_model_arch (line 322) | def verify_model_arch(cls, model_arch: str) -> None:
    method verify_quantization (line 334) | def verify_quantization(cls, quant: str) -> None:
    method get_current_memory_usage (line 345) | def get_current_memory_usage(
    method get_available_gpu_memory (line 354) | def get_available_gpu_memory(
    method get_device_communicator_cls (line 367) | def get_device_communicator_cls(cls) -> str:
    method get_cpu_architecture (line 374) | def get_cpu_architecture(cls) -> CpuArchEnum:
    method enable_dit_layerwise_offload_for_wan_by_default (line 379) | def enable_dit_layerwise_offload_for_wan_by_default(cls) -> bool:
    method optimize_vae (line 384) | def optimize_vae(cls, vae: torch.nn.Module) -> torch.nn.Module:
    method get_attn_backend (line 388) | def get_attn_backend(self, *args, **kwargs) -> AttentionImpl:
  class UnspecifiedPlatform (line 393) | class UnspecifiedPlatform(Platform):

FILE: python/sglang/multimodal_gen/runtime/platforms/mps.py
  class MpsPlatform (line 22) | class MpsPlatform(Platform):
    method is_amp_supported (line 31) | def is_amp_supported(cls) -> bool:
    method get_local_torch_device (line 35) | def get_local_torch_device(cls) -> torch.device:
    method get_device_capability (line 39) | def get_device_capability(cls, device_id: int = 0) -> DeviceCapability...
    method get_device_name (line 43) | def get_device_name(cls, device_id: int = 0) -> str:
    method get_device_uuid (line 47) | def get_device_uuid(cls, device_id: int = 0) -> str:
    method get_device_total_memory (line 52) | def get_device_total_memory(cls, device_id: int = 0) -> int:
    method is_async_output_supported (line 57) | def is_async_output_supported(cls, enforce_eager: bool | None) -> bool:
    method get_current_memory_usage (line 68) | def get_current_memory_usage(
    method get_available_gpu_memory (line 74) | def get_available_gpu_memory(
    method get_attn_backend_cls_str (line 98) | def get_attn_backend_cls_str(
    method get_device_communicator_cls (line 111) | def get_device_communicator_cls(cls) -> str:
    method seed_everything (line 116) | def seed_everything(cls, seed: int | None = None) -> None:

FILE: python/sglang/multimodal_gen/runtime/platforms/musa.py
  function device_id_to_physical_device_id (line 36) | def device_id_to_physical_device_id(device_id: int) -> int:
  function with_mtml_context (line 55) | def with_mtml_context(fn: Callable[_P, _R]) -> Callable[_P, _R]:
  class MusaPlatformBase (line 67) | class MusaPlatformBase(Platform):
    method get_local_torch_device (line 75) | def get_local_torch_device(cls) -> torch.device:
    method get_device_capability (line 79) | def get_device_capability(cls, device_id: int = 0) -> DeviceCapability...
    method get_device_name (line 83) | def get_device_name(cls, device_id: int = 0) -> str:
    method get_device_total_memory (line 88) | def get_device_total_memory(cls, device_id: int = 0) -> int:
    method is_async_output_supported (line 92) | def is_async_output_supported(cls, enforce_eager: bool | None) -> bool:
    method is_full_mtlink (line 103) | def is_full_mtlink(cls, device_ids: list[int]) -> bool:
    method log_warnings (line 107) | def log_warnings(cls) -> None:
    method get_current_memory_usage (line 111) | def get_current_memory_usage(
    method get_available_gpu_memory (line 118) | def get_available_gpu_memory(
    method get_attn_backend_cls_str (line 147) | def get_attn_backend_cls_str(
    method get_device_communicator_cls (line 159) | def get_device_communicator_cls(cls) -> str:
  class MtmlMusaPlatform (line 167) | class MtmlMusaPlatform(MusaPlatformBase):
    method get_device_capability (line 171) | def get_device_capability(cls, device_id: int = 0) -> DeviceCapability...
    method has_device_capability (line 183) | def has_device_capability(
    method get_device_name (line 196) | def get_device_name(cls, device_id: int = 0) -> str:
    method get_device_uuid (line 203) | def get_device_uuid(cls, device_id: int = 0) -> str:
    method get_device_total_memory (line 211) | def get_device_total_memory(cls, device_id: int = 0) -> int:
    method is_full_mtlink (line 218) | def is_full_mtlink(cls, physical_device_ids: list[int]) -> bool:
    method _get_physical_device_name (line 243) | def _get_physical_device_name(cls, device_id: int = 0) -> str:
    method log_warnings (line 249) | def log_warnings(cls) -> None:
  class NonMtmlMusaPlatform (line 265) | class NonMtmlMusaPlatform(MusaPlatformBase):
    method get_device_capability (line 267) | def get_device_capability(cls, device_id: int = 0) -> DeviceCapability:
    method get_device_name (line 272) | def get_device_name(cls, device_id: int = 0) -> str:
    method get_device_total_memory (line 277) | def get_device_total_memory(cls, device_id: int = 0) -> int:
    method is_full_mtlink (line 282) | def is_full_mtlink(cls, physical_device_ids: list[int]) -> bool:

FILE: python/sglang/multimodal_gen/runtime/platforms/npu.py
  function device_id_to_physical_device_id (line 21) | def device_id_to_physical_device_id(device_id: int) -> int:
  class NPUPlatformBase (line 36) | class NPUPlatformBase(Platform):
    method get_local_torch_device (line 44) | def get_local_torch_device(cls) -> torch.device:
    method get_device_capability (line 48) | def get_device_capability(cls, device_id: int = 0) -> DeviceCapability:
    method get_device_name (line 52) | def get_device_name(cls, device_id: int = 0) -> str:
    method get_device_total_memory (line 56) | def get_device_total_memory(cls, device_id: int = 0) -> int:
    method is_async_output_supported (line 61) | def is_async_output_supported(cls, enforce_eager: bool | None) -> bool:
    method is_full_nvlink (line 72) | def is_full_nvlink(cls, physical_device_ids: list[int]) -> bool:
    method get_available_gpu_memory (line 80) | def get_available_gpu_memory(
    method log_warnings (line 102) | def log_warnings(cls) -> None:
    method get_current_memory_usage (line 106) | def get_current_memory_usage(
    method get_attn_backend_cls_str (line 113) | def get_attn_backend_cls_str(
    method get_device_communicator_cls (line 125) | def get_device_communicator_cls(cls) -> str:
    method enable_dit_layerwise_offload_for_wan_by_default (line 129) | def enable_dit_layerwise_offload_for_wan_by_default(cls) -> bool:

FILE: python/sglang/multimodal_gen/runtime/platforms/rocm.py
  class RocmPlatform (line 28) | class RocmPlatform(Platform):
    method get_local_torch_device (line 36) | def get_local_torch_device(cls) -> torch.device:
    method get_device_capability (line 40) | def get_device_capability(cls, device_id: int = 0) -> DeviceCapability:
    method get_device_name (line 45) | def get_device_name(cls, device_id: int = 0) -> str:
    method get_device_total_memory (line 50) | def get_device_total_memory(cls, device_id: int = 0) -> int:
    method is_async_output_supported (line 54) | def is_async_output_supported(cls, enforce_eager: bool | None) -> bool:
    method log_warnings (line 64) | def log_warnings(cls) -> None:
    method get_current_memory_usage (line 68) | def get_current_memory_usage(cls, device: torch.device | None = None) ...
    method get_available_gpu_memory (line 73) | def get_available_gpu_memory(
    method get_attn_backend_cls_str (line 95) | def get_attn_backend_cls_str(
    method get_device_communicator_cls (line 182) | def get_device_communicator_cls(cls) -> str:
    method optimize_vae (line 186) | def optimize_vae(cls, vae: torch.nn.Module) -> torch.nn.Module:
    method _replace_groupnorm (line 207) | def _replace_groupnorm(module: torch.nn.Module, aiter_gn_cls: type) ->...
    method enable_dit_layerwise_offload_for_wan_by_default (line 228) | def enable_dit_layerwise_offload_for_wan_by_default(cls) -> bool:

FILE: python/sglang/multimodal_gen/runtime/postprocess/realesrgan_upscaler.py
  class SRVGGNetCompact (line 40) | class SRVGGNetCompact(nn.Module):
    method __init__ (line 47) | def __init__(
    method _make_act (line 78) | def _make_act(act_type: str, num_feat: int) -> nn.Module:
    method forward (line 88) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class ResidualDenseBlock (line 98) | class ResidualDenseBlock(nn.Module):
    method __init__ (line 101) | def __init__(self, num_feat: int = 64, num_grow_ch: int = 32):
    method forward (line 110) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class RRDB (line 119) | class RRDB(nn.Module):
    method __init__ (line 122) | def __init__(self, num_feat: int, num_grow_ch: int = 32):
    method forward (line 128) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class RRDBNet (line 135) | class RRDBNet(nn.Module):
    method __init__ (line 138) | def __init__(
    method forward (line 166) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  function _build_net_from_state_dict (line 190) | def _build_net_from_state_dict(state_dict: dict) -> nn.Module:
  class UpscalerModel (line 255) | class UpscalerModel:
    method __init__ (line 258) | def __init__(self, net: nn.Module, scale: int):
    method device (line 263) | def device(self) -> torch.device:
    method upscale (line 266) | def upscale(self, frame: np.ndarray, outscale: float | None = None) ->...
  class ImageUpscaler (line 301) | class ImageUpscaler:
    method __init__ (line 309) | def __init__(
    method _ensure_model_loaded (line 319) | def _ensure_model_loaded(self) -> UpscalerModel:
    method upscale (line 381) | def upscale(self, frames: list[np.ndarray]) -> list[np.ndarray]:
  function _resolve_model_path (line 399) | def _resolve_model_path(model_path: str) -> str:
  function upscale_frames (line 453) | def upscale_frames(

FILE: python/sglang/multimodal_gen/runtime/postprocess/rife_interpolator.py
  function warp (line 39) | def warp(tenInput: torch.Tensor, tenFlow: torch.Tensor) -> torch.Tensor:
  function _conv (line 72) | def _conv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dil...
  class ResConv (line 88) | class ResConv(nn.Module):
    method __init__ (line 91) | def __init__(self, c: int, dilation: int = 1):
    method forward (line 97) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class IFBlock (line 101) | class IFBlock(nn.Module):
    method __init__ (line 104) | def __init__(self, in_planes: int, c: int = 64):
    method forward (line 125) | def forward(
  class Head (line 158) | class Head(nn.Module):
    method __init__ (line 161) | def __init__(self):
    method forward (line 169) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class IFNet (line 180) | class IFNet(nn.Module):
    method __init__ (line 183) | def __init__(self):
    method forward (line 191) | def forward(
  class Model (line 262) | class Model:
    method __init__ (line 265) | def __init__(self):
    method eval (line 269) | def eval(self) -> "Model":
    method device (line 273) | def device(self) -> torch.device:
    method load_model (line 276) | def load_model(self, path: str, strip_module_prefix: bool = True) -> N...
    method inference (line 305) | def inference(
  class FrameInterpolator (line 341) | class FrameInterpolator:
    method __init__ (line 349) | def __init__(self, model_path: Optional[str] = None):
    method _ensure_model_loaded (line 353) | def _ensure_model_loaded(self) -> Model:
    method _frame_to_tensor (line 384) | def _frame_to_tensor(frame: np.ndarray, device: torch.device) -> torch...
    method _tensor_to_frame (line 390) | def _tensor_to_frame(t: torch.Tensor) -> np.ndarray:
    method _make_inference (line 395) | def _make_inference(
    method interpolate (line 412) | def interpolate(
  function interpolate_video_frames (line 463) | def interpolate_video_frames(

FILE: python/sglang/multimodal_gen/runtime/scheduler_client.py
  function run_zeromq_broker (line 13) | async def run_zeromq_broker(server_args: ServerArgs):
  class SchedulerClient (line 47) | class SchedulerClient:
    method __init__ (line 53) | def __init__(self):
    method initialize (line 58) | def initialize(self, server_args: ServerArgs):
    method forward (line 79) | def forward(self, batch: Any) -> Any:
    method ping (line 89) | def ping(self) -> bool:
    method close (line 113) | def close(self):
  class AsyncSchedulerClient (line 123) | class AsyncSchedulerClient:
    method __init__ (line 132) | def __init__(self):
    method initialize (line 136) | def initialize(self, server_args: ServerArgs):
    method forward (line 147) | async def forward(self, batch: Any) -> Any:
    method ping (line 173) | async def ping(self) -> bool:
    method close (line 197) | def close(self):

FILE: python/sglang/multimodal_gen/runtime/server_args.py
  class Backend (line 53) | class Backend(str, Enum):
    method from_string (line 66) | def from_string(cls, value: str) -> "Backend":
    method choices (line 76) | def choices(cls) -> list[str]:
  class ServerArgs (line 82) | class ServerArgs:
    method broker_port (line 219) | def broker_port(self) -> int:
    method is_local_mode (line 223) | def is_local_mode(self) -> bool:
    method _adjust_path (line 229) | def _adjust_path(self):
    method _adjust_parameters (line 233) | def _adjust_parameters(self):
    method _validate_parameters (line 247) | def _validate_parameters(self):
    method _adjust_save_paths (line 254) | def _adjust_save_paths(self):
    method _adjust_quant_config (line 261) | def _adjust_quant_config(self):
    method adjust_pipeline_config (line 284) | def adjust_pipeline_config(self):
    method _adjust_offload (line 304) | def _adjust_offload(self):
    method _adjust_attention_backend (line 338) | def _adjust_attention_backend(self):
    method _adjust_warmup (line 368) | def _adjust_warmup(self):
    method _adjust_network_ports (line 377) | def _adjust_network_ports(self):
    method _adjust_parallelism (line 390) | def _adjust_parallelism(self):
    method _adjust_platform_specific (line 428) | def _adjust_platform_specific(self):
    method _adjust_autocast (line 447) | def _adjust_autocast(self):
    method _parse_attention_backend_config (line 451) | def _parse_attention_backend_config(self, config_str: str) -> dict[str...
    method __post_init__ (line 490) | def __post_init__(self):
    method add_cli_args (line 509) | def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentPa...
    method url (line 819) | def url(self):
    method scheduler_endpoint (line 831) | def scheduler_endpoint(self):
    method settle_port (line 841) | def settle_port(
    method _extract_component_paths (line 871) | def _extract_component_paths(
    method from_cli_args (line 907) | def from_cli_args(
    method from_dict (line 934) | def from_dict(cls, kwargs: dict[str, Any]) -> "ServerArgs":
    method load_config_file (line 958) | def load_config_file(config_file: str) -> dict[str, Any]:
    method from_kwargs (line 977) | def from_kwargs(cls, **kwargs: Any) -> "ServerArgs":
    method get_provided_args (line 986) | def get_provided_args(
    method _validate_pipeline (line 1011) | def _validate_pipeline(self):
    method _validate_offload (line 1017) | def _validate_offload(self):
    method _validate_parallelism (line 1060) | def _validate_parallelism(self):
    method _validate_cfg_parallel (line 1117) | def _validate_cfg_parallel(self):
    method _set_default_attention_backend (line 1123) | def _set_default_attention_backend(self) -> None:
  class PortArgs (line 1136) | class PortArgs:
    method from_server_args (line 1153) | def from_server_args(
  function prepare_server_args (line 1181) | def prepare_server_args(argv: list[str]) -> ServerArgs:
  function set_global_server_args (line 1192) | def set_global_server_args(server_args: ServerArgs):
  function get_global_server_args (line 1200) | def get_global_server_args() -> ServerArgs:

FILE: python/sglang/multimodal_gen/runtime/utils/common.py
  function kill_process_tree (line 21) | def kill_process_tree(parent_pid, include_parent: bool = True, skip_pid:...
  function add_prefix (line 60) | def add_prefix(name: str, prefix: str) -> str:
  function is_valid_ipv6_address (line 73) | def is_valid_ipv6_address(address: str) -> bool:
  function configure_ipv6 (line 81) | def configure_ipv6(dist_init_addr):
  function is_port_available (line 113) | def is_port_available(port):
  function get_zmq_socket (line 127) | def get_zmq_socket(
  function is_host_cpu_x86 (line 246) | def is_host_cpu_x86() -> bool:
  function set_cuda_arch (line 258) | def set_cuda_arch():
  function set_musa_arch (line 267) | def set_musa_arch():
  function get_bool_env_var (line 278) | def get_bool_env_var(name: str, default: str = "false") -> bool:
  function cpu_has_amx_support (line 312) | def cpu_has_amx_support():
  function use_intel_amx_backend (line 316) | def use_intel_amx_backend(layer):

FILE: python/sglang/multimodal_gen/runtime/utils/distributed.py
  function broadcast_pyobj (line 13) | def broadcast_pyobj(
  function generate_masked_orthogonal_rank_groups (line 61) | def generate_masked_orthogonal_rank_groups(
  class RankGenerator (line 165) | class RankGenerator(object):
    method __init__ (line 166) | def __init__(
    method get_mask (line 207) | def get_mask(self, order: str, token: str):
    method get_ranks (line 215) | def get_ranks(self, token):

FILE: python/sglang/multimodal_gen/runtime/utils/hf_diffusers_utils.py
  function _check_index_files_for_missing_shards (line 53) | def _check_index_files_for_missing_shards(
  function _cleanup_model_cache (line 116) | def _cleanup_model_cache(model_path: str, reason: str) -> bool:
  function _ci_validate_diffusers_model (line 160) | def _ci_validate_diffusers_model(model_path: str) -> tuple[bool, bool]:
  function _verify_diffusers_model_complete (line 207) | def _verify_diffusers_model_complete(path: str) -> bool:
  function download_from_hf (line 247) | def download_from_hf(model_path: str):
  function get_hf_config (line 254) | def get_hf_config(
  function get_config (line 281) | def get_config(
  function load_dict (line 293) | def load_dict(file_path):
  function get_diffusers_component_config (line 310) | def get_diffusers_component_config(
  function attach_additional_stop_token_ids (line 353) | def attach_additional_stop_token_ids(tokenizer):
  function check_gguf_file (line 363) | def check_gguf_file(model: str | os.PathLike) -> bool:
  function maybe_download_lora (line 376) | def maybe_download_lora(
  function verify_model_config_and_directory (line 412) | def verify_model_config_and_directory(model_path: str) -> dict[str, Any]:
  function maybe_download_model_index (line 474) | def maybe_download_model_index(model_name_or_path: str) -> dict[str, Any]:
  function maybe_download_model (line 556) | def maybe_download_model(
  function hf_hub_download (line 757) | def hf_hub_download(
  function snapshot_download (line 784) | def snapshot_download(

FILE: python/sglang/multimodal_gen/runtime/utils/layerwise_offload.py
  class LayerwiseOffloadManager (line 15) | class LayerwiseOffloadManager:
    method __init__ (line 29) | def __init__(
    method _match_layer_idx (line 75) | def _match_layer_idx(self, name: str) -> int | None:
    method _get_shared_empty_tensor (line 84) | def _get_shared_empty_tensor(self, dtype: torch.dtype) -> torch.Tensor:
    method _initialize (line 92) | def _initialize(self) -> None:
    method prepare_for_next_req (line 151) | def prepare_for_next_req(self, non_blocking=True):
    method get_target_with_name (line 160) | def get_target_with_name(self, name: str) -> torch.Tensor:
    method prefetch_layer (line 169) | def prefetch_layer(self, layer_idx: int, non_blocking: bool = True) ->...
    method release_layer (line 212) | def release_layer(self, layer_idx: int) -> None:
    method release_all (line 234) | def release_all(self) -> None:
    method load_all_layers (line 244) | def load_all_layers(self) -> None:
    method sync_layer_to_cpu (line 256) | def sync_layer_to_cpu(self, layer_idx: int) -> None:
    method sync_all_layers_to_cpu (line 278) | def sync_all_layers_to_cpu(self) -> None:
    method update_cpu_weights (line 289) | def update_cpu_weights(
    method iter_cpu_weights (line 350) | def iter_cpu_weights(self):
    method register_forward_hooks (line 368) | def register_forward_hooks(self) -> None:
    method remove_forward_hooks (line 407) | def remove_forward_hooks(self) -> None:
  class OffloadableDiTMixin (line 414) | class OffloadableDiTMixin:
    method configure_layerwise_offload (line 423) | def configure_layerwise_offload(self, server_args: ServerArgs):
    method prepare_for_next_req (line 453) | def prepare_for_next_req(self):
    method disable_offload (line 459) | def disable_offload(self) -> None:
    method enable_offload (line 468) | def enable_offload(self) -> None:
  function iter_materialized_weights (line 479) | def iter_materialized_weights(module: torch.nn.Module):

FILE: python/sglang/multimodal_gen/runtime/utils/logging_utils.py
  class ColoredFormatter (line 75) | class ColoredFormatter(logging.Formatter):
    method format (line 83) | def format(self, record: logging.LogRecord) -> str:
  class SortedHelpFormatter (line 95) | class SortedHelpFormatter(argparse.HelpFormatter):
    method add_arguments (line 98) | def add_arguments(self, actions):
  function _print_info_once (line 104) | def _print_info_once(logger: Logger, msg: str) -> None:
  function _print_warning_once (line 110) | def _print_warning_once(logger: Logger, msg: str) -> None:
  function get_is_main_process (line 115) | def get_is_main_process():
  function get_is_local_main_process (line 123) | def get_is_local_main_process():
  function _log_process_aware (line 131) | def _log_process_aware(
  class _SGLDiffusionLogger (line 161) | class _SGLDiffusionLogger(Logger):
    method info_once (line 170) | def info_once(self, msg: str) -> None:
    method warning_once (line 177) | def warning_once(self, msg: str) -> None:
    method info (line 184) | def info(  # type: ignore[override]
    method debug (line 193) | def debug(  # type: ignore[override]
    method warning (line 202) | def warning(  # type: ignore[override]
    method error (line 211) | def error(  # type: ignore[override]
  function init_logger (line 221) | def init_logger(name: str) -> _SGLDiffusionLogger:
  function _is_torch_tensor (line 287) | def _is_torch_tensor(obj: Any) -> tuple[bool, Any]:
  function _sanitize_for_logging (line 297) | def _sanitize_for_logging(obj: Any, key_hint: str | None = None) -> Any:
  function _trace_calls (line 388) | def _trace_calls(log_path, root_dir, frame, event, arg=None):
  function enable_trace_function_call (line 431) | def enable_trace_function_call(log_file_path: str, root_dir: str | None ...
  function set_uvicorn_logging_configs (line 454) | def set_uvicorn_logging_configs():
  function configure_logger (line 467) | def configure_logger(server_args, prefix: str = ""):
  function get_log_level (line 484) | def get_log_level() -> int:
  function suppress_loggers (line 489) | def suppress_loggers(loggers_to_suppress: list[str], level: int = loggin...
  function globally_suppress_loggers (line 500) | def globally_suppress_loggers():
  function suppress_stdout (line 520) | def suppress_stdout():
  class GenerationTimer (line 547) | class GenerationTimer:
    method __init__ (line 548) | def __init__(self):
  function log_generation_timer (line 555) | def log_generation_timer(
  function log_batch_completion (line 595) | def log_batch_completion(

FILE: python/sglang/multimodal_gen/runtime/utils/mesh3d_utils.py
  function transform_pos (line 24) | def transform_pos(
  function get_mv_matrix (line 43) | def get_mv_matrix(
  function get_orthographic_projection_matrix (line 90) | def get_orthographic_projection_matrix(
  function get_perspective_projection_matrix (line 109) | def get_perspective_projection_matrix(
  function export_to_trimesh (line 127) | def export_to_trimesh(mesh_output: Any) -> Any:
  function mesh_uv_wrap (line 145) | def mesh_uv_wrap(mesh: Any) -> Any:
  function stride_from_shape (line 175) | def stride_from_shape(shape: Tuple[int, ...]) -> List[int]:
  function scatter_add_nd_with_count (line 183) | def scatter_add_nd_with_count(
  function linear_grid_put_2d (line 214) | def linear_grid_put_2d(
  class MeshRender (line 289) | class MeshRender:
    method __init__ (line 292) | def __init__(
    method set_default_render_resolution (line 345) | def set_default_render_resolution(
    method set_default_texture_resolution (line 353) | def set_default_texture_resolution(self, texture_size: Union[int, Tupl...
    method _rasterize (line 359) | def _rasterize(
    method _interpolate (line 376) | def _interpolate(
    method load_mesh (line 394) | def load_mesh(
    method save_mesh (line 441) | def save_mesh(self) -> trimesh.Trimesh:
    method get_mesh (line 454) | def get_mesh(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.nda...
    method set_texture (line 473) | def set_texture(self, tex: Union[np.ndarray, torch.Tensor, Image.Image]):
    method get_texture (line 489) | def get_texture(self) -> np.ndarray:
    method _get_pos_from_mvp (line 495) | def _get_pos_from_mvp(
    method render_normal (line 518) | def render_normal(
    method render_position (line 589) | def render_position(
    method render_normal_multiview (line 633) | def render_normal_multiview(
    method render_position_multiview (line 648) | def render_position_multiview(
    method _render_sketch_from_depth (line 660) | def _render_sketch_from_depth(self, depth_image: torch.Tensor) -> torc...
    method back_project (line 671) | def back_project(
    method bake_from_multiview (line 794) | def bake_from_multiview(
    method fast_bake_texture (line 826) | def fast_bake_texture(
    method texture_inpaint (line 849) | def texture_inpaint(
  function array_to_tensor (line 897) | def array_to_tensor(np_array):
  function recenter_image (line 906) | def recenter_image(image, border_ratio=0.2):
  class ImageProcessorV2 (line 943) | class ImageProcessorV2:
    method __init__ (line 952) | def __init__(self, size=512, border_ratio=None):
    method recenter (line 957) | def recenter(image, border_ratio: float = 0.2):
    method load_image (line 1005) | def load_image(self, image, border_ratio=0.15, to_tensor=True):
    method __call__ (line 1024) | def __call__(self, image, border_ratio=0.15, to_tensor=True, **kwargs):
  class MVImageProcessorV2 (line 1034) | class MVImageProcessorV2(ImageProcessorV2):
    method __init__ (line 1044) | def __init__(self, size=512, border_ratio=None):
    method __call__ (line 1048) | def __call__(self, image_dict, border_ratio=0.15, to_tensor=True, **kw...
  function resolve_hunyuan3d_tool (line 1080) | def resolve_hunyuan3d_tool(target: str):

FILE: python/sglang/multimodal_gen/runtime/utils/perf_logger.py
  class MemorySnapshot (line 32) | class MemorySnapshot:
    method to_dict (line 38) | def to_dict(self) -> Dict[str, Any]:
  class RequestMetrics (line 48) | class RequestMetrics:
    method __init__ (line 51) | def __init__(self, request_id: str):
    method total_duration_s (line 60) | def total_duration_s(self) -> float:
    method record_stage (line 63) | def record_stage(self, stage_name: str, duration_s: float):
    method record_steps (line 67) | def record_steps(self, index: int, duration_s: float):
    method record_memory_snapshot (line 72) | def record_memory_snapshot(self, checkpoint_name: str, snapshot: Memor...
    method to_dict (line 75) | def to_dict(self) -> Dict[str, Any]:
  function get_diffusion_perf_log_dir (line 89) | def get_diffusion_perf_log_dir() -> str:
  function get_git_commit_hash (line 104) | def get_git_commit_hash() -> str:
  function capture_memory_snapshot (line 122) | def capture_memory_snapshot() -> MemorySnapshot:
  class RequestPerfRecord (line 145) | class RequestPerfRecord:
    method __init__ (line 157) | def __init__(
  class StageProfiler (line 182) | class StageProfiler:
    method __init__ (line 187) | def __init__(
    method __enter__ (line 204) | def __enter__(self):
    method __exit__ (line 222) | def __exit__(self, exc_type, exc_val, exc_tb):
  class PerformanceLogger (line 266) | class PerformanceLogger:
    method dump_benchmark_report (line 276) | def dump_benchmark_report(
    method log_request_summary (line 324) | def log_request_summary(

FILE: python/sglang/multimodal_gen/runtime/utils/profiler.py
  class SGLDiffusionProfiler (line 21) | class SGLDiffusionProfiler:
    method __init__ (line 33) | def __init__(
    method start (line 103) | def start(self):
    method _step (line 107) | def _step(self):
    method step_stage (line 110) | def step_stage(self):
    method step_denoising_step (line 114) | def step_denoising_step(self):
    method get_instance (line 124) | def get_instance(cls) -> "SGLDiffusionProfiler":
    method stop (line 127) | def stop(self, export_trace: bool = True, dump_rank: int | None = None):
    method _export_trace (line 149) | def _export_trace(self):
    method _check_trace_integrity (line 169) | def _check_trace_integrity(self, trace_path: str) -> bool:

FILE: python/sglang/multimodal_gen/runtime/utils/quantization_utils.py
  function find_quant_modelslim_config (line 18) | def find_quant_modelslim_config(model_config, component_model_path):
  function replace_prefix (line 31) | def replace_prefix(key: str, prefix_mapping: dict[str, str]) -> str:
  function get_quant_config (line 38) | def get_quant_config(
  function handle_fp8_metadata_format (line 114) | def handle_fp8_metadata_format(quant_config_dict):
  function get_quant_config_from_safetensors_metadata (line 124) | def get_quant_config_from_safetensors_metadata(
  function get_metadata_from_safetensors_file (line 163) | def get_metadata_from_safetensors_file(file_path: str):

FILE: python/sglang/multimodal_gen/test/cli/test_generate_common.py
  class TestResult (line 25) | class TestResult:
  function run_command (line 31) | def run_command(command) -> Optional[float]:
  class CLIBase (line 51) | class CLIBase(unittest.TestCase):
    method setUp (line 61) | def setUp(self):
    method tearDown (line 71) | def tearDown(self):
    method get_base_command (line 79) | def get_base_command(self):
    method _run_command (line 92) | def _run_command(self, name: str, model_path: str, args=[]):
    method _run_test (line 105) | def _run_test(self, name: str, args, model_path: str, test_key: str):
    method verify (line 109) | def verify(self, status, name):
    method model_name (line 125) | def model_name(self):
    method test_single_gpu (line 128) | def test_single_gpu(self):

FILE: python/sglang/multimodal_gen/test/cli/test_generate_i2i.py
  class TestQwenImageEditI2I (line 14) | class TestQwenImageEditI2I(CLIBase):
    method get_base_command (line 25) | def get_base_command(self):
    method verify_multi_output (line 36) | def verify_multi_output(self, name: str, num_outputs: int):
    method test_single_prompt_single_image (line 61) | def test_single_prompt_single_image(self):
    method test_single_prompt_multi_image (line 77) | def test_single_prompt_multi_image(self):
    method test_multi_prompt_multi_image (line 93) | def test_multi_prompt_multi_image(self):
    method test_multi_prompt_single_image (line 110) | def test_multi_prompt_single_image(self):

FILE: python/sglang/multimodal_gen/test/cli/test_generate_t2i_perf.py
  class TestFlux_T2V (line 13) | class TestFlux_T2V(CLIBase):

FILE: python/sglang/multimodal_gen/test/run_suite.py
  function parse_args (line 74) | def parse_args():
  function collect_test_items (line 117) | def collect_test_items(files, filter_expr=None):
  function run_pytest (line 168) | def run_pytest(files, filter_expr=None):
  function _is_in_ci (line 251) | def _is_in_ci() -> bool:
  function _maybe_pin_update_weights_model_pair (line 255) | def _maybe_pin_update_weights_model_pair(suite_files_rel: list[str]) -> ...
  function main (line 272) | def main():

FILE: python/sglang/multimodal_gen/test/scripts/gen_diffusion_ci_outputs.py
  function main (line 24) | def main():

FILE: python/sglang/multimodal_gen/test/scripts/gen_perf_baselines.py
  function _all_cases (line 25) | def _all_cases() -> list[DiffusionTestCase]:
  function _baseline_path (line 42) | def _baseline_path() -> Path:
  function _openai_client (line 48) | def _openai_client(port: int) -> OpenAI:
  function _build_server_extra_args (line 52) | def _build_server_extra_args(case: DiffusionTestCase) -> str:
  function _build_env_vars (line 79) | def _build_env_vars(case: DiffusionTestCase) -> dict[str, str]:
  function _torch_cleanup (line 85) | def _torch_cleanup() -> None:
  function _run_case (line 102) | def _run_case(case: DiffusionTestCase) -> dict:
  function main (line 154) | def main() -> int:

FILE: python/sglang/multimodal_gen/test/server/ascend/test_server_1_npu.py
  class TestDiffusionServerOneNpu (line 23) | class TestDiffusionServerOneNpu(DiffusionServerBase):
    method case (line 27) | def case(self, request) -> DiffusionTestCase:

FILE: python/sglang/multimodal_gen/test/server/ascend/test_server_2_npu.py
  class TestDiffusionServerTwoNpu (line 23) | class TestDiffusionServerTwoNpu(DiffusionServerBase):
    method case (line 27) | def case(self, request) -> DiffusionTestCase:

FILE: python/sglang/multimodal_gen/test/server/ascend/test_server_8_npu.py
  class TestDiffusionServerEightNpu (line 25) | class TestDiffusionServerEightNpu(DiffusionServerBase):
    method case (line 29) | def case(self, request) -> DiffusionTestCase:

FILE: python/sglang/multimodal_gen/test/server/conftest.py
  function pytest_configure (line 8) | def pytest_configure(config):
  function add_perf_results (line 18) | def add_perf_results(config, results: list):
  function perf_config (line 29) | def perf_config(request):
  function _write_github_step_summary (line 34) | def _write_github_step_summary(content: str):
  function _write_results_json (line 42) | def _write_results_json(results: list, output_path: str = "diffusion-res...
  function _generate_diffusion_markdown_report (line 54) | def _generate_diffusion_markdown_report(results: list) -> str:
  function pytest_sessionfinish (line 97) | def pytest_sessionfinish(session):

FILE: python/sglang/multimodal_gen/test/server/test_server_2_gpu_a.py
  class TestDiffusionServerTwoGpu (line 19) | class TestDiffusionServerTwoGpu(DiffusionServerBase):
    method case (line 23) | def case(self, request) -> DiffusionTestCase:

FILE: python/sglang/multimodal_gen/test/server/test_server_2_gpu_b.py
  class TestDiffusionServerTwoGpu (line 19) | class TestDiffusionServerTwoGpu(DiffusionServerBase):
    method case (line 23) | def case(self, request) -> DiffusionTestCase:

FILE: python/sglang/multimodal_gen/test/server/test_server_a.py
  class TestDiffusionServerOneGpu (line 25) | class TestDiffusionServerOneGpu(DiffusionServerBase):
    method case (line 29) | def case(self, request) -> DiffusionTestCase:

FILE: python/sglang/multimodal_gen/test/server/test_server_b.py
  class TestDiffusionServerOneGpu (line 25) | class TestDiffusionServerOneGpu(DiffusionServerBase):
    method case (line 29) | def case(self, request) -> DiffusionTestCase:

FILE: python/sglang/multimodal_gen/test/server/test_server_common.py
  function diffusion_server (line 47) | def diffusion_server(case: DiffusionTestCase) -> ServerContext:
  class DiffusionServerBase (line 137) | class DiffusionServerBase:
    method setup_class (line 149) | def setup_class(cls):
    method teardown_class (line 154) | def teardown_class(cls):
    method _capture_pytest_config (line 186) | def _capture_pytest_config(self, request):
    method _client (line 190) | def _client(self, ctx: ServerContext) -> OpenAI:
    method run_and_collect (line 197) | def run_and_collect(
    method _validate_and_record (line 222) | def _validate_and_record(
    method _check_for_improvement (line 303) | def _check_for_improvement(
    method _dump_baseline_for_testcase (line 381) | def _dump_baseline_for_testcase(
    method _save_gt_output (line 420) | def _save_gt_output(
    method _test_lora_api_functionality (line 474) | def _test_lora_api_functionality(
    method _test_lora_dynamic_switch_e2e (line 534) | def _test_lora_dynamic_switch_e2e(
    method _test_dynamic_lora_loading (line 593) | def _test_dynamic_lora_loading(
    method _test_multi_lora_e2e (line 621) | def _test_multi_lora_e2e(
    method _test_v1_models_endpoint (line 695) | def _test_v1_models_endpoint(
    method _test_t2v_rejects_input_reference (line 784) | def _test_t2v_rejects_input_reference(
    method test_diffusion_generation (line 815) | def test_diffusion_generation(

FILE: python/sglang/multimodal_gen/test/server/test_server_utils.py
  function download_image_from_url (line 58) | def download_image_from_url(url: str) -> Path:
  function parse_dimensions (line 89) | def parse_dimensions(size_string: str | None) -> tuple[int | None, int |...
  class ServerContext (line 133) | class ServerContext:
    method cleanup (line 145) | def cleanup(self) -> None:
    method _cleanup_hf_cache_if_not_persistent (line 165) | def _cleanup_hf_cache_if_not_persistent(self) -> None:
    method _cleanup_rocm_gpu_memory (line 219) | def _cleanup_rocm_gpu_memory(self) -> None:
  class ServerManager (line 249) | class ServerManager:
    method __init__ (line 252) | def __init__(
    method _wait_for_rocm_gpu_memory_clear (line 266) | def _wait_for_rocm_gpu_memory_clear(self, max_wait: float = 60.0) -> N...
    method start (line 325) | def start(self) -> ServerContext:
    method _wait_for_ready (line 417) | def _wait_for_ready(self, process: subprocess.Popen, stdout_path: Path...
    method _get_log_tail (line 450) | def _get_log_tail(path: Path, lines: int = 200) -> str:
  class PerformanceValidator (line 459) | class PerformanceValidator:
    method __init__ (line 464) | def __init__(
    method _assert_le (line 477) | def _assert_le(
    method validate (line 522) | def validate(
    method collect_metrics (line 537) | def collect_metrics(
    method _validate_e2e (line 543) | def _validate_e2e(self, summary: PerformanceSummary) -> None:
    method _validate_denoise_agg (line 553) | def _validate_denoise_agg(self, summary: PerformanceSummary) -> None:
    method _validate_denoise_steps (line 570) | def _validate_denoise_steps(self, summary: PerformanceSummary) -> None:
    method _validate_stages (line 586) | def _validate_stages(self, summary: PerformanceSummary) -> None:
  class VideoPerformanceValidator (line 609) | class VideoPerformanceValidator(PerformanceValidator):
    method validate (line 614) | def validate(
    method _validate_frame_rate (line 632) | def _validate_frame_rate(self, summary: PerformanceSummary) -> None:
  class MeshValidator (line 644) | class MeshValidator(PerformanceValidator):
  function _download_reference_mesh (line 656) | def _download_reference_mesh(url: str) -> Path:
  function validate_mesh_correctness (line 673) | def validate_mesh_correctness(
  function get_generate_fn (line 740) | def get_generate_fn(

FILE: python/sglang/multimodal_gen/test/server/test_update_weights_from_disk.py
  function _resolve_active_model_pairs (line 198) | def _resolve_active_model_pairs() -> list[tuple[str, str]]:
  function _compute_checksum_from_disk (line 222) | def _compute_checksum_from_disk(model_path: str, module_name: str) -> str:
  function _clone_model_with_modified_module (line 243) | def _clone_model_with_modified_module(
  function _truncate_safetensor (line 284) | def _truncate_safetensor(src_file: str, dst_file: str) -> None:
  function _perturb_safetensor (line 297) | def _perturb_safetensor(src_file: str, dst_file: str) -> None:
  class _UpdateWeightsApiMixin (line 307) | class _UpdateWeightsApiMixin:
    method _update_weights (line 308) | def _update_weights(
    method _get_weights_checksum (line 326) | def _get_weights_checksum(
    method _assert_server_matches_model (line 345) | def _assert_server_matches_model(
  class TestUpdateWeightsFromDisk (line 362) | class TestUpdateWeightsFromDisk(_UpdateWeightsApiMixin):
    method diffusion_server_no_offload (line 369) | def diffusion_server_no_offload(self, request):
    method test_update_weights_from_disk_default (line 437) | def test_update_weights_from_disk_default(self, diffusion_server_no_of...
    method test_update_weights_specific_modules (line 452) | def test_update_weights_specific_modules(self, diffusion_server_no_off...
    method test_update_weights_nonexistent_model (line 494) | def test_update_weights_nonexistent_model(self, diffusion_server_no_of...
    method test_update_weights_missing_model_path (line 512) | def test_update_weights_missing_model_path(self, diffusion_server_no_o...
    method test_update_weights_nonexistent_module (line 530) | def test_update_weights_nonexistent_module(self, diffusion_server_no_o...
    method test_corrupted_weights_rollback (line 550) | def test_corrupted_weights_rollback(self, diffusion_server_no_offload):
  class TestUpdateWeightsFromDiskWithOffload (line 620) | class TestUpdateWeightsFromDiskWithOffload(_UpdateWeightsApiMixin):
    method diffusion_server_with_offload (line 624) | def diffusion_server_with_offload(self, request):
    method test_update_weights_with_offload_enabled (line 659) | def test_update_weights_with_offload_enabled(self, diffusion_server_wi...

FILE: python/sglang/multimodal_gen/test/server/testcase_configs.py
  class ToleranceConfig (line 51) | class ToleranceConfig:
    method load_profile (line 61) | def load_profile(cls, all_tolerances: dict, profile_name: str) -> Tole...
  class ScenarioConfig (line 103) | class ScenarioConfig:
  class BaselineConfig (line 114) | class BaselineConfig:
    method load (line 123) | def load(cls, path: Path) -> BaselineConfig:
    method update (line 153) | def update(self, path: Path):
  class DiffusionServerArgs (line 173) | class DiffusionServerArgs:
    method __post_init__ (line 204) | def __post_init__(self):
  class DiffusionSamplingParams (line 214) | class DiffusionSamplingParams:
  class DiffusionTestCase (line 242) | class DiffusionTestCase:
  function sample_step_indices (line 251) | def sample_step_indices(
  class PerformanceSummary (line 266) | class PerformanceSummary:
    method from_req_perf_record (line 282) | def from_req_perf_record(

FILE: python/sglang/multimodal_gen/test/slack_utils.py
  function _get_status_message (line 51) | def _get_status_message(run_id, current_case_id, thread_messages=None):
  function upload_file_to_slack (line 98) | def upload_file_to_slack(

FILE: python/sglang/multimodal_gen/test/test_utils.py
  function print_value_formatted (line 67) | def print_value_formatted(description: str, value: int | float | str):
  function print_divider (line 84) | def print_divider(length: int, char: str = "-"):
  function is_image_url (line 89) | def is_image_url(image_path: str | Path | None) -> bool:
  function probe_port (line 98) | def probe_port(host="127.0.0.1", port=30010, timeout=2.0) -> bool:
  function is_in_ci (line 108) | def is_in_ci() -> bool:
  function get_dynamic_server_port (line 112) | def get_dynamic_server_port() -> int:
  function find_free_port (line 129) | def find_free_port(host: str = "127.0.0.1") -> int:
  function wait_for_server_health (line 136) | def wait_for_server_health(
  function post_json (line 161) | def post_json(
  function query_gpu_mem_used_mib (line 176) | def query_gpu_mem_used_mib(gpu_index: int = 0, required: bool = False) -...
  function require_gpu_mem_query (line 201) | def require_gpu_mem_query(gpu_index: int = 0) -> int:
  function assert_gpu_mem_changed (line 212) | def assert_gpu_mem_changed(
  function is_mp4 (line 230) | def is_mp4(data: bytes) -> bool:
  function is_jpeg (line 237) | def is_jpeg(data: bytes) -> bool:
  function is_png (line 242) | def is_png(data):
  function is_webp (line 247) | def is_webp(data: bytes) -> bool:
  function detect_image_format (line 252) | def detect_image_format(data: bytes) -> str:
  function get_expected_image_format (line 265) | def get_expected_image_format(
  function wait_for_port (line 284) | def wait_for_port(host="127.0.0.1", port=30010, deadline=300.0, interval...
  function check_image_size (line 294) | def check_image_size(ut, image, width, height):
  function get_perf_log_dir (line 299) | def get_perf_log_dir() -> Path:
  function _ensure_log_path (line 310) | def _ensure_log_path(log_dir: Path) -> Path:
  function clear_perf_log (line 315) | def clear_perf_log(log_dir: Path) -> Path:
  function prepare_perf_log (line 324) | def prepare_perf_log() -> tuple[Path, Path]:
  function read_perf_logs (line 331) | def read_perf_logs(log_path: Path) -> list[RequestPerfRecord]:
  function wait_for_req_perf_record (line 348) | def wait_for_req_perf_record(
  function validate_image (line 373) | def validate_image(b64_json: str) -> None:
  function validate_video (line 379) | def validate_video(b64_json: str) -> None:
  function validate_openai_video (line 386) | def validate_openai_video(video_bytes: bytes) -> None:
  function validate_image_file (line 392) | def validate_image_file(
  function _get_video_dimensions_from_metadata (line 444) | def _get_video_dimensions_from_metadata(
  function _get_video_dimensions_from_frame (line 464) | def _get_video_dimensions_from_frame(cap: cv2.VideoCapture) -> tuple[int...
  function get_video_dimensions (line 483) | def get_video_dimensions(file_path: str) -> tuple[int, int]:
  function get_video_frame_count (line 505) | def get_video_frame_count(file_path: str) -> int:
  function validate_video_file (line 521) | def validate_video_file(
  function output_format_to_ext (line 560) | def output_format_to_ext(output_format: str | None) -> str:
  function _consistency_gt_filenames (line 572) | def _consistency_gt_filenames(
  function extract_key_frames_from_video (line 587) | def extract_key_frames_from_video(
  function image_bytes_to_numpy (line 639) | def image_bytes_to_numpy(image_bytes: bytes) -> np.ndarray:

FILE: python/sglang/multimodal_gen/test/unit/test_lora_format_adapter.py
  function download_lora (line 32) | def download_lora(
  function is_diffusers_style_keys (line 59) | def is_diffusers_style_keys(
  function run_single_test (line 130) | def run_single_test(
  function _run_all_tests (line 172) | def _run_all_tests() -> List[Dict]:
  function _print_summary (line 286) | def _print_summary(results: List[Dict]) -> None:
  function main (line 319) | def main() -> None:
  class TestLoRAFormatAdapter (line 327) | class TestLoRAFormatAdapter:
    method test_lora_format_adapter_all_formats (line 328) | def test_lora_format_adapter_all_formats(self):

FILE: python/sglang/multimodal_gen/test/unit/test_sampling_params.py
  class TestSamplingParamsValidate (line 13) | class TestSamplingParamsValidate(unittest.TestCase):
    method test_prompt_path_suffix (line 14) | def test_prompt_path_suffix(self):
    method test_num_outputs_per_prompt_must_be_positive (line 18) | def test_num_outputs_per_prompt_must_be_positive(self):
    method test_fps_must_be_positive_int (line 22) | def test_fps_must_be_positive_int(self):
    method test_num_inference_steps_optional_but_if_set_must_be_positive (line 28) | def test_num_inference_steps_optional_but_if_set_must_be_positive(self):
    method test_guidance_scale_must_be_finite_non_negative_if_set (line 33) | def test_guidance_scale_must_be_finite_non_negative_if_set(self):
    method test_guidance_rescale_must_be_finite_non_negative (line 40) | def test_guidance_rescale_must_be_finite_non_negative(self):
    method test_boundary_ratio_range (line 46) | def test_boundary_ratio_range(self):
  class TestSamplingParamsSubclass (line 54) | class TestSamplingParamsSubclass(unittest.TestCase):
    method test_flux_defaults_resolution_when_not_provided (line 55) | def test_flux_defaults_resolution_when_not_provided(self):
    method test_flux_preserves_user_resolution (line 61) | def test_flux_preserves_user_resolution(self):
    method test_diffusers_generic_calls_base_post_init (line 67) | def test_diffusers_generic_calls_base_post_init(self):
  class TestSamplingParamsCliArgs (line 72) | class TestSamplingParamsCliArgs(unittest.TestCase):
    method _parse_cli_kwargs (line 73) | def _parse_cli_kwargs(self, argv: list[str]) -> dict:
    method _make_qwen_image_params (line 79) | def _make_qwen_image_params(self, argv: list[str]) -> QwenImageSamplin...
    method test_get_cli_args_drops_unset_sampling_params (line 82) | def test_get_cli_args_drops_unset_sampling_params(self):
    method test_get_cli_args_keeps_explicit_sampling_params (line 85) | def test_get_cli_args_keeps_explicit_sampling_params(self):
    method test_qwen_image_cli_path_preserves_model_defaults (line 100) | def test_qwen_image_cli_path_preserves_model_defaults(self):
    method test_qwen_image_cli_path_allows_explicit_override_to_base_defaults (line 106) | def test_qwen_image_cli_path_allows_explicit_override_to_base_defaults...
    method test_merge_allows_explicit_field_matching_base_default (line 119) | def test_merge_allows_explicit_field_matching_base_default(self):

FILE: python/sglang/multimodal_gen/test/unit/test_server_args.py
  class TestServerArgsPathExpansion (line 15) | class TestServerArgsPathExpansion(unittest.TestCase):
    method _from_dict_without_model_resolution (line 16) | def _from_dict_without_model_resolution(self, kwargs):
    method test_tilde_model_path_is_expanded (line 22) | def test_tilde_model_path_is_expanded(self):
    method test_absolute_path_is_unchanged (line 30) | def test_absolute_path_is_unchanged(self):
    method test_component_paths_are_expanded_before_pipeline_resolution (line 36) | def test_component_paths_are_expanded_before_pipeline_resolution(self):
  class TestModelIdResolution (line 49) | class TestModelIdResolution(unittest.TestCase):
    method setUp (line 50) | def setUp(self):
    method test_model_id_overrides_arbitrary_local_path (line 53) | def test_model_id_overrides_arbitrary_local_path(self):
    method test_model_id_works_after_tilde_expansion (line 61) | def test_model_id_works_after_tilde_expansion(self):
    method test_model_id_unknown_falls_back_without_crash (line 68) | def test_model_id_unknown_falls_back_without_crash(self):
  class TestPipelineResolutionCliOverride (line 75) | class TestPipelineResolutionCliOverride(unittest.TestCase):
    method setUp (line 76) | def setUp(self):
    method test_resolution_flag_overrides_qwen_image_layered_pipeline_config (line 79) | def test_resolution_flag_overrides_qwen_image_layered_pipeline_config(...

FILE: python/sglang/multimodal_gen/test/unit/test_storage.py
  function _create_temp_file (line 18) | def _create_temp_file(tmp_path, name="test.png", content=b"\x89PNG\r\n\x...
  function test_upload_file_success (line 28) | def test_upload_file_success(tmp_path):
  function test_upload_and_cleanup (line 56) | def test_upload_and_cleanup(tmp_path):
  function test_upload_failure_preserves_file (line 75) | def test_upload_failure_preserves_file(tmp_path):
  function test_disabled_storage_returns_none (line 94) | def test_disabled_storage_returns_none(tmp_path):
  function test_aws_url_with_region (line 110) | def test_aws_url_with_region(tmp_path):
  function test_aws_url_default_region (line 127) | def test_aws_url_default_region(tmp_path):
  function test_custom_endpoint_url (line 144) | def test_custom_endpoint_url(tmp_path):
  function test_content_type_detection (line 162) | def test_content_type_detection(tmp_path):
  function test_integration_with_moto (line 199) | def test_integration_with_moto(tmp_path):

FILE: python/sglang/multimodal_gen/third_party/pynvml.py
  class NVMLLibraryMismatchError (line 1206) | class NVMLLibraryMismatchError(Exception):
  class NVMLError (line 1211) | class NVMLError(Exception):
    method __new__ (line 1237) | def __new__(typ, value):
    method __str__ (line 1248) | def __str__(self):
    method __eq__ (line 1258) | def __eq__(self, other):
  function nvmlExceptionClass (line 1262) | def nvmlExceptionClass(nvmlErrorCode):
  function _extractNVMLErrorsAsClasses (line 1268) | def _extractNVMLErrorsAsClasses():
  function _nvmlCheckReturn (line 1303) | def _nvmlCheckReturn(ret):
  function _nvmlGetFunctionPointer (line 1315) | def _nvmlGetFunctionPointer(name):
  class nvmlFriendlyObject (line 1340) | class nvmlFriendlyObject(object):
    method __init__ (line 1341) | def __init__(self, dictionary):
    method __str__ (line 1345) | def __str__(self):
  function nvmlStructToFriendlyObject (line 1349) | def nvmlStructToFriendlyObject(struct):
  function nvmlFriendlyObjectToStruct (line 1361) | def nvmlFriendlyObjectToStruct(obj, model):
  class struct_c_nvmlUnit_t (line 1374) | class struct_c_nvmlUnit_t(Structure):
  class _PrintableStructure (line 1381) | class _PrintableStructure(Structure):
    method __str__ (line 1403) | def __str__(self):
    method __getattribute__ (line 1416) | def __getattribute__(self, name):
    method __setattr__ (line 1428) | def __setattr__(self, name, value):
  class c_nvmlUnitInfo_t (line 1436) | class c_nvmlUnitInfo_t(_PrintableStructure):
  class c_nvmlC2cModeInfo_v1_t (line 1445) | class c_nvmlC2cModeInfo_v1_t(_PrintableStructure):
  class c_nvmlLedState_t (line 1452) | class c_nvmlLedState_t(_PrintableStructure):
  class c_nvmlPSUInfo_t (line 1459) | class c_nvmlPSUInfo_t(_PrintableStructure):
  class c_nvmlUnitFanInfo_t (line 1468) | class c_nvmlUnitFanInfo_t(_PrintableStructure):
  class c_nvmlUnitFanSpeeds_t (line 1475) | class c_nvmlUnitFanSpeeds_t(_PrintableStructure):
  class struct_c_nvmlDevice_t (line 1480) | class struct_c_nvmlDevice_t(Structure):
  class nvmlPciInfoExt_v1_t (line 1487) | class nvmlPciInfoExt_v1_t(_PrintableStructure):
  class nvmlPciInfo_v2_t (line 1515) | class nvmlPciInfo_v2_t(_PrintableStructure):
  class nvmlPciInfo_t (line 1538) | class nvmlPciInfo_t(_PrintableStructure):
  class c_nvmlSystemDriverBranchInfo_v1_t (line 1561) | class c_nvmlSystemDriverBranchInfo_v1_t(_PrintableStructure):
  class c_nvmlExcludedDeviceInfo_t (line 1571) | class c_nvmlExcludedDeviceInfo_t(_PrintableStructure):
  class nvmlNvLinkUtilizationControl_t (line 1575) | class nvmlNvLinkUtilizationControl_t(_PrintableStructure):
  class c_nvmlMemory_t (line 1582) | class c_nvmlMemory_t(_PrintableStructure):
  class c_nvmlMemory_v2_t (line 1591) | class c_nvmlMemory_v2_t(_PrintableStructure):
  class c_nvmlBAR1Memory_t (line 1605) | class c_nvmlBAR1Memory_t(_PrintableStructure):
  class nvmlClkMonFaultInfo_t (line 1614) | class nvmlClkMonFaultInfo_t(Structure):
  class nvmlClkMonStatus_t (line 1621) | class nvmlClkMonStatus_t(Structure):
  class c_nvmlProcessInfo_v2_t (line 1640) | class c_nvmlProcessInfo_v2_t(_PrintableStructure):
  class c_nvmlProcessDetail_v1_t (line 1660) | class c_nvmlProcessDetail_v1_t(Structure):
  class c_nvmlProcessDetailList_v1_t (line 1670) | class c_nvmlProcessDetailList_v1_t(_PrintableStructure):
  class c_nvmlBridgeChipInfo_t (line 1685) | class c_nvmlBridgeChipInfo_t(_PrintableStructure):
  class c_nvmlBridgeChipHierarchy_t (line 1692) | class c_nvmlBridgeChipHierarchy_t(_PrintableStructure):
  class c_nvmlEccErrorCounts_t (line 1699) | class c_nvmlEccErrorCounts_t(_PrintableStructure):
  class c_nvmlUtilization_t (line 1708) | class c_nvmlUtilization_t(_PrintableStructure):
  class c_nvmlHwbcEntry_t (line 1717) | class c_nvmlHwbcEntry_t(_PrintableStructure):
  class c_nvmlValue_t (line 1724) | class c_nvmlValue_t(Union):
  class c_nvmlSample_t (line 1736) | class c_nvmlSample_t(_PrintableStructure):
  class c_nvmlViolationTime_t (line 1743) | class c_nvmlViolationTime_t(_PrintableStructure):
  class c_nvmlFieldValue_t (line 1750) | class c_nvmlFieldValue_t(_PrintableStructure):
  class c_nvmlNvlinkSupportedBwModes_v1_t (line 1767) | class c_nvmlNvlinkSupportedBwModes_v1_t(_PrintableStructure):
    method __init__ (line 1774) | def __init__(self):
  class c_nvmlNvlinkGetBwMode_v1_t (line 1783) | class c_nvmlNvlinkGetBwMode_v1_t(_PrintableStructure):
    method __init__ (line 1786) | def __init__(self):
  class c_nvmlNvlinkSetBwMode_v1_t (line 1793) | class c_nvmlNvlinkSetBwMode_v1_t(_PrintableStructure):
    method __init__ (line 1796) | def __init__(self):
  class c_nvmlVgpuHeterogeneousMode_v1_t (line 1800) | class c_nvmlVgpuHeterogeneousMode_v1_t(_PrintableStructure):
  class c_nvmlVgpuPlacementId_v1_t (line 1810) | class c_nvmlVgpuPlacementId_v1_t(_PrintableStructure):
  class c_nvmlVgpuPlacementList_v1_t (line 1820) | class c_nvmlVgpuPlacementList_v1_t(_PrintableStructure):
  class c_nvmlVgpuPlacementList_v2_t (line 1835) | class c_nvmlVgpuPlacementList_v2_t(_PrintableStructure):
  class c_nvmlVgpuTypeBar1Info_v1_t (line 1848) | class c_nvmlVgpuTypeBar1Info_v1_t(_PrintableStructure):
  class c_nvmlVgpuInstanceUtilizationSample_t (line 1858) | class c_nvmlVgpuInstanceUtilizationSample_t(_PrintableStructure):
  class c_nvmlVgpuInstanceUtilizationInfo_v1_t (line 1869) | class c_nvmlVgpuInstanceUtilizationInfo_v1_t(_PrintableStructure):
  class c_nvmlVgpuInstancesUtilizationInfo_v1_t (line 1882) | class c_nvmlVgpuInstancesUtilizationInfo_v1_t(_PrintableStructure):
  class c_nvmlVgpuProcessUtilizationSample_t (line 1895) | class c_nvmlVgpuProcessUtilizationSample_t(_PrintableStructure):
  class c_nvmlVgpuProcessUtilizationInfo_v1_t (line 1908) | class c_nvmlVgpuProcessUtilizationInfo_v1_t(_PrintableStructure):
  class c_nvmlVgpuProcessesUtilizationInfo_v1_t (line 1923) | class c_nvmlVgpuProcessesUtilizationInfo_v1_t(_PrintableStructure):
  class nvmlVgpuRuntimeState_v1_t (line 1935) | class nvmlVgpuRuntimeState_v1_t(_PrintableStructure):
  class c_nvmlVgpuLicenseExpiry_t (line 1945) | class c_nvmlVgpuLicenseExpiry_t(_PrintableStructure):
  class c_nvmlVgpuLicenseInfo_t (line 1965) | class c_nvmlVgpuLicenseInfo_t(_PrintableStructure):
  class c_nvmlEncoderSession_t (line 1973) | class c_nvmlEncoderSession_t(_PrintableStructure):
  class c_nvmlProcessUtilizationSample_t (line 1986) | class c_nvmlProcessUtilizationSample_t(_PrintableStructure):
  class c_nvmlProcessUtilizationInfo_v1_t (line 1997) | class c_nvmlProcessUtilizationInfo_v1_t(_PrintableStructure):
  class c_nvmlProcessesUtilizationInfo_v1_t (line 2010) | class c_nvmlProcessesUtilizationInfo_v1_t(_PrintableStructure):
  class c_nvmlGridLicenseExpiry_t (line 2022) | class c_nvmlGridLicenseExpiry_t(_PrintableStructure):
  class c_nvmlGridLicensableFeature_v4_t (line 2034) | class c_nvmlGridLicensableFeature_v4_t(_PrintableStructure):
  class c_nvmlGridLicensableFeatures_v4_t (line 2045) | class c_nvmlGridLicensableFeatures_v4_t(_PrintableStructure):
  class c_nvmlGridLicensableFeature_v3_t (line 2056) | class c_nvmlGridLicensableFeature_v3_t(_PrintableStructure):
  class c_nvmlGridLicensableFeatures_v3_t (line 2066) | class c_nvmlGridLicensableFeatures_v3_t(_PrintableStructure):
  class c_nvmlGridLicensableFeature_v2_t (line 2077) | class c_nvmlGridLicensableFeature_v2_t(_PrintableStructure):
  class c_nvmlGridLicensableFeatures_v2_t (line 2086) | class c_nvmlGridLicensableFeatures_v2_t(_PrintableStructure):
  class c_nvmlGridLicensableFeature_t (line 2097) | class c_nvmlGridLicensableFeature_t(_PrintableStructure):
  class c_nvmlGridLicensableFeatures_t (line 2105) | class c_nvmlGridLicensableFeatures_t(_PrintableStructure):
  class c_nvmlMarginTemperature_v1_t (line 2116) | class c_nvmlMarginTemperature_v1_t(_PrintableStructure):
  class struct_c_nvmlEventSet_t (line 2127) | class struct_c_nvmlEventSet_t(Structure):
  class c_nvmlEventData_t (line 2217) | class c_nvmlEventData_t(_PrintableStructure):
  class c_nvmlAccountingStats_t (line 2228) | class c_nvmlAccountingStats_t(_PrintableStructure):
  class c_nvmlVgpuVersion_t (line 2240) | class c_nvmlVgpuVersion_t(Structure):
  class c_nvmlVgpuMetadata_t (line 2244) | class c_nvmlVgpuMetadata_t(_PrintableStructure):
  class c_nvmlVgpuPgpuMetadata_t (line 2259) | class c_nvmlVgpuPgpuMetadata_t(_PrintableStructure):
  class c_nvmlVgpuPgpuCompatibility_t (line 2272) | class c_nvmlVgpuPgpuCompatibility_t(Structure):
  class c_nvmlVgpuSchedDataWithARR_t (line 2295) | class c_nvmlVgpuSchedDataWithARR_t(_PrintableStructure):
  class c_nvmlVgpuSchedData_t (line 2302) | class c_nvmlVgpuSchedData_t(_PrintableStructure):
  class c_nvmlVgpuSchedulerParams_t (line 2308) | class c_nvmlVgpuSchedulerParams_t(Union):
  class c_nvmlVgpuSchedulerLogEntry_t (line 2315) | class c_nvmlVgpuSchedulerLogEntry_t(_PrintableStructure):
  class c_nvmlVgpuSchedulerLog_t (line 2326) | class c_nvmlVgpuSchedulerLog_t(_PrintableStructure):
  class c_nvmlVgpuSchedulerGetState_t (line 2340) | class c_nvmlVgpuSchedulerGetState_t(_PrintableStructure):
  class c_nvmlVgpuSchedSetDataWithARR_t (line 2348) | class c_nvmlVgpuSchedSetDataWithARR_t(_PrintableStructure):
  class c_nvmlVgpuSchedSetData_t (line 2355) | class c_nvmlVgpuSchedSetData_t(_PrintableStructure):
  class c_nvmlVgpuSchedulerSetParams_t (line 2361) | class c_nvmlVgpuSchedulerSetParams_t(Union):
  class c_nvmlVgpuSchedulerSetState_t (line 2368) | class c_nvmlVgpuSchedulerSetState_t(_PrintableStructure):
  class c_nvmlVgpuSchedulerCapabilities_t (line 2376) | class c_nvmlVgpuSchedulerCapabilities_t(_PrintableStructure):
  class c_nvmlFBCStats_t (line 2389) | class c_nvmlFBCStats_t(Structure):
  class c_nvmlFBCSession_t (line 2397) | class c_nvmlFBCSession_t(_PrintableStructure):
  class c_nvmlGpuInstancePlacement_t (line 2433) | class c_nvmlGpuInstancePlacement_t(Structure):
  class c_nvmlGpuInstanceProfileInfo_t (line 2437) | class c_nvmlGpuInstanceProfileInfo_t(Structure):
  class c_nvmlGpuInstanceProfileInfo_v2_t (line 2456) | class c_nvmlGpuInstanceProfileInfo_v2_t(_PrintableStructure):
    method __init__ (line 2473) | def __init__(self):
  class c_nvmlGpuInstanceInfo_t (line 2479) | class c_nvmlGpuInstanceInfo_t(Structure):
  class struct_c_nvmlGpuInstance_t (line 2488) | class struct_c_nvmlGpuInstance_t(Structure):
  class c_nvmlComputeInstancePlacement_t (line 2508) | class c_nvmlComputeInstancePlacement_t(Structure):
  class c_nvmlComputeInstanceProfileInfo_t (line 2512) | class c_nvmlComputeInstanceProfileInfo_t(Structure):
  class c_nvmlComputeInstanceProfileInfo_v2_t (line 2529) | class c_nvmlComputeInstanceProfileInfo_v2_t(_PrintableStructure):
    method __init__ (line 2544) | def __init__(self):
  class c_nvmlComputeInstanceInfo_t (line 2550) | class c_nvmlComputeInstanceInfo_t(Structure):
  class c_nvmlGpuDynamicPstatesUtilization_t (line 2567) | class c_nvmlGpuDynamicPstatesUtilization_t(Structure):
  class c_nvmlGpuDynamicPstatesInfo_t (line 2576) | class c_nvmlGpuDynamicPstatesInfo_t(Structure):
  class c_nvmlGpuThermalSensor_t (line 2620) | class c_nvmlGpuThermalSensor_t(Structure):
  class c_nvmlGpuThermalSettings_t (line 2630) | class c_nvmlGpuThermalSettings_t(Structure):
  class c_nvmlCoolerInfo_t (line 2655) | class c_nvmlCoolerInfo_t(_PrintableStructure):
  function nvmlDeviceGetCoolerInfo (line 2667) | def nvmlDeviceGetCoolerInfo(handle):
  class struct_c_nvmlComputeInstance_t (line 2677) | class struct_c_nvmlComputeInstance_t(Structure):
  class c_nvmlDeviceAttributes (line 2684) | class c_nvmlDeviceAttributes(Structure):
  class c_nvmlRowRemapperHistogramValues (line 2698) | class c_nvmlRowRemapperHistogramValues(Structure):
  class c_nvmlConfComputeSystemState_t (line 2717) | class c_nvmlConfComputeSystemState_t(Structure):
  class c_nvmlSystemConfComputeSettings_v1_t (line 2728) | class c_nvmlSystemConfComputeSettings_v1_t(Structure):
    method __init__ (line 2737) | def __init__(self):
  class c_nvmlConfComputeSystemCaps_t (line 2743) | class c_nvmlConfComputeSystemCaps_t(Structure):
  class c_nvmlConfComputeMemSizeInfo_t (line 2750) | class c_nvmlConfComputeMemSizeInfo_t(Structure):
  class c_nvmlConfComputeGpuCertificate_t (line 2757) | class c_nvmlConfComputeGpuCertificate_t(Structure):
  class c_nvmlConfComputeGpuAttestationReport_t (line 2766) | class c_nvmlConfComputeGpuAttestationReport_t(Structure):
  class c_nvmlConfComputeSetKeyRotationThresholdInfo_t (line 2777) | class c_nvmlConfComputeSetKeyRotationThresholdInfo_t(Structure):
  class c_nvmlConfComputeGetKeyRotationThresholdInfo_t (line 2787) | class c_nvmlConfComputeGetKeyRotationThresholdInfo_t(Structure):
  function convertStrBytes (line 2798) | def convertStrBytes(func):
  function throwOnVersionMismatch (line 2827) | def throwOnVersionMismatch(func):
  function nvmlInitWithFlags (line 2844) | def nvmlInitWithFlags(flags):
  function nvmlInit (line 2862) | def nvmlInit():
  function _LoadNvmlLibrary (line 2867) | def _LoadNvmlLibrary():
  function nvmlShutdown (line 2912) | def nvmlShutdown():
  function nvmlErrorString (line 2931) | def nvmlErrorString(result):
  function nvmlSystemGetNVMLVersion (line 2940) | def nvmlSystemGetNVMLVersion():
  function nvmlSystemGetCudaDriverVersion (line 2948) | def nvmlSystemGetCudaDriverVersion():
  function nvmlSystemGetCudaDriverVersion_v2 (line 2956) | def nvmlSystemGetCudaDriverVersion_v2():
  function nvmlSystemGetProcessName (line 2966) | def nvmlSystemGetProcessName(pid):
  function nvmlSystemGetDriverVersion (line 2975) | def nvmlSystemGetDriverVersion():
  function nvmlSystemGetHicVersion (line 2984) | def nvmlSystemGetHicVersion():
  function nvmlSystemGetDriverBranch (line 3007) | def nvmlSystemGetDriverBranch():
  function nvmlUnitGetCount (line 3017) | def nvmlUnitGetCount():
  function nvmlUnitGetHandleByIndex (line 3025) | def nvmlUnitGetHandleByIndex(index):
  function nvmlUnitGetUnitInfo (line 3034) | def nvmlUnitGetUnitInfo(unit):
  function nvmlUnitGetLedState (line 3042) | def nvmlUnitGetLedState(unit):
  function nvmlUnitGetPsuInfo (line 3050) | def nvmlUnitGetPsuInfo(unit):
  function nvmlUnitGetTemperature (line 3058) | def nvmlUnitGetTemperature(unit, type):
  function nvmlUnitGetFanSpeedInfo (line 3066) | def nvmlUnitGetFanSpeedInfo(unit):
  function nvmlUnitGetDeviceCount (line 3075) | def nvmlUnitGetDeviceCount(unit):
  function nvmlUnitGetDevices (line 3086) | def nvmlUnitGetDevices(unit):
  function nvmlDeviceGetCount (line 3097) | def nvmlDeviceGetCount():
  function nvmlDeviceGetHandleByIndex (line 3105) | def nvmlDeviceGetHandleByIndex(index):
  function nvmlDeviceGetHandleBySerial (line 3115) | def nvmlDeviceGetHandleBySerial(serial):
  function nvmlDeviceGetHandleByUUID (line 3125) | def nvmlDeviceGetHandleByUUID(uuid):
  function nvmlDeviceGetHandleByPciBusId (line 3135) | def nvmlDeviceGetHandleByPciBusId(pciBusId):
  function nvmlDeviceGetName (line 3145) | def nvmlDeviceGetName(handle):
  class c_nvmlDevicePerfModes_v1_t (line 3153) | class c_nvmlDevicePerfModes_v1_t(_PrintableStructure):
  function nvmlDeviceGetPerformanceModes (line 3164) | def nvmlDeviceGetPerformanceModes(handle):
  class c_nvmlDeviceCurrentClockFreqs_v1_t (line 3173) | class c_nvmlDeviceCurrentClockFreqs_v1_t(_PrintableStructure):
  function nvmlDeviceGetCurrentClockFreqs (line 3184) | def nvmlDeviceGetCurrentClockFreqs(handle):
  function nvmlDeviceGetBoardId (line 3193) | def nvmlDeviceGetBoardId(handle):
  function nvmlDeviceGetMultiGpuBoard (line 3201) | def nvmlDeviceGetMultiGpuBoard(handle):
  function nvmlDeviceGetBrand (line 3209) | def nvmlDeviceGetBrand(handle):
  function nvmlDeviceGetC2cModeInfoV1 (line 3217) | def nvmlDeviceGetC2cModeInfoV1(handle):
  function nvmlDeviceGetC2cModeInfoV (line 3225) | def nvmlDeviceGetC2cModeInfoV(handle):
  function nvmlDeviceGetBoardPartNumber (line 3230) | def nvmlDeviceGetBoardPartNumber(handle):
  function nvmlDeviceGetSerial (line 3239) | def nvmlDeviceGetSerial(handle):
  function nvmlDeviceGetModuleId (line 3247) | def nvmlDeviceGetModuleId(handle, moduleId=c_uint()):
  function nvmlDeviceGetMemoryAffinity (line 3259) | def nvmlDeviceGetMemoryAffinity(handle, nodeSetSize, scope):
  function nvmlDeviceGetCpuAffinityWithinScope (line 3268) | def nvmlDeviceGetCpuAffinityWithinScope(handle, cpuSetSize, scope):
  function nvmlDeviceGetCpuAffinity (line 3277) | def nvmlDeviceGetCpuAffinity(handle, cpuSetSize):
  function nvmlDeviceSetCpuAffinity (line 3286) | def nvmlDeviceSetCpuAffinity(handle):
  function nvmlDeviceClearCpuAffinity (line 3293) | def nvmlDeviceClearCpuAffinity(handle):
  function nvmlDeviceGetNumaNodeId (line 3300) | def nvmlDeviceGetNumaNodeId(handle):
  function nvmlDeviceGetMinorNumber (line 3308) | def nvmlDeviceGetMinorNumber(handle):
  function nvmlDeviceGetUUID (line 3317) | def nvmlDeviceGetUUID(handle):
  function nvmlDeviceGetInforomVersion (line 3326) | def nvmlDeviceGetInforomVersion(handle, infoRomObject):
  function nvmlDeviceGetInforomImageVersion (line 3341) | def nvmlDeviceGetInforomImageVersion(handle):
  function nvmlDeviceGetInforomConfigurationChecksum (line 3350) | def nvmlDeviceGetInforomConfigurationChecksum(handle):
  function nvmlDeviceValidateInforom (line 3359) | def nvmlDeviceValidateInforom(handle):
  function nvmlDeviceGetLastBBXFlushTime (line 3366) | def nvmlDeviceGetLastBBXFlushTime(handle):
  function nvmlDeviceGetDisplayMode (line 3375) | def nvmlDeviceGetDisplayMode(handle):
  function nvmlDeviceGetDisplayActive (line 3383) | def nvmlDeviceGetDisplayActive(handle):
  function nvmlDeviceGetPersistenceMode (line 3391) | def nvmlDeviceGetPersistenceMode(handle):
  function nvmlDeviceGetPciInfoExt (line 3399) | def nvmlDeviceGetPciInfoExt(handle, c_info):
  function nvmlDeviceGetPciInfo_v3 (line 3406) | def nvmlDeviceGetPciInfo_v3(handle):
  function nvmlDeviceGetPciInfo (line 3414) | def nvmlDeviceGetPciInfo(handle):
  function nvmlDeviceGetClockInfo (line 3418) | def nvmlDeviceGetClockInfo(handle, type):
  function nvmlDeviceGetMaxClockInfo (line 3427) | def nvmlDeviceGetMaxClockInfo(handle, type):
  function nvmlDeviceGetApplicationsClock (line 3436) | def nvmlDeviceGetApplicationsClock(handle, type):
  function nvmlDeviceGetMaxCustomerBoostClock (line 3444) | def nvmlDeviceGetMaxCustomerBoostClock(handle, type):
  function nvmlDeviceGetClock (line 3452) | def nvmlDeviceGetClock(handle, type, id):
  function nvmlDeviceGetDefaultApplicationsClock (line 3461) | def nvmlDeviceGetDefaultApplicationsClock(handle, type):
  function nvmlDeviceGetSupportedMemoryClocks (line 3470) | def nvmlDeviceGetSupportedMemoryClocks(handle):
  function nvmlDeviceGetSupportedGraphicsClocks (line 3499) | def nvmlDeviceGetSupportedGraphicsClocks(handle, memoryClockMHz):
  function nvmlDeviceGetFanSpeed (line 3527) | def nvmlDeviceGetFanSpeed(handle):
  function nvmlDeviceGetFanSpeed_v2 (line 3535) | def nvmlDeviceGetFanSpeed_v2(handle, fan):
  class c_nvmlFanSpeedInfo_t (line 3543) | class c_nvmlFanSpeedInfo_t(_PrintableStructure):
  function nvmlDeviceGetFanSpeedRPM (line 3554) | def nvmlDeviceGetFanSpeedRPM(handle):
  function nvmlDeviceGetTargetFanSpeed (line 3564) | def nvmlDeviceGetTargetFanSpeed(handle, fan):
  function nvmlDeviceGetNumFans (line 3572) | def nvmlDeviceGetNumFans(device):
  function nvmlDeviceSetDefaultFanSpeed_v2 (line 3580) | def nvmlDeviceSetDefaultFanSpeed_v2(handle, index):
  function nvmlDeviceGetMinMaxFanSpeed (line 3587) | def nvmlDeviceGetMinMaxFanSpeed(handle, minSpeed=c_uint(), maxSpeed=c_ui...
  function nvmlDeviceGetFanControlPolicy_v2 (line 3597) | def nvmlDeviceGetFanControlPolicy_v2(handle, fan, fanControlPolicy=c_uin...
  function nvmlDeviceSetFanControlPolicy (line 3606) | def nvmlDeviceSetFanControlPolicy(handle, fan, fanControlPolicy):
  class c_nvmlTemperature_v1_t (line 3613) | class c_nvmlTemperature_v1_t(_PrintableStructure):
  function nvmlDeviceGetTemperatureV1 (line 3624) | def nvmlDeviceGetTemperatureV1(handle, sensor):
  function nvmlDeviceGetTemperatureV (line 3634) | def nvmlDeviceGetTemperatureV(handle, sensor, version=nvmlTemperature_v1):
  function nvmlDeviceGetTemperature (line 3642) | def nvmlDeviceGetTemperature(handle, sensor):
  function nvmlDeviceGetTemperatureThreshold (line 3650) | def nvmlDeviceGetTemperatureThreshold(handle, threshold):
  function nvmlDeviceSetTemperatureThreshold (line 3658) | def nvmlDeviceSetTemperatureThreshold(handle, threshold, temp):
  function nvmlDeviceGetMarginTemperature (line 3667) | def nvmlDeviceGetMarginTemperature(handle):
  function nvmlDeviceGetPowerState (line 3677) | def nvmlDeviceGetPowerState(handle):
  function nvmlDeviceGetPerformanceState (line 3685) | def nvmlDeviceGetPerformanceState(handle):
  function nvmlDeviceGetPowerManagementMode (line 3693) | def nvmlDeviceGetPowerManagementMode(handle):
  function nvmlDeviceGetPowerManagementLimit (line 3701) | def nvmlDeviceGetPowerManagementLimit(handle):
  function nvmlDeviceGetPowerManagementLimitConstraints (line 3710) | def nvmlDeviceGetPowerManagementLimitConstraints(handle):
  function nvmlDeviceGetPowerManagementDefaultLimit (line 3720) | def nvmlDeviceGetPowerManagementDefaultLimit(handle):
  function nvmlDeviceGetEnforcedPowerLimit (line 3729) | def nvmlDeviceGetEnforcedPowerLimit(handle):
  function nvmlDeviceGetPowerUsage (line 3737) | def nvmlDeviceGetPowerUsage(handle):
  function nvmlDeviceGetTotalEnergyConsumption (line 3745) | def nvmlDeviceGetTotalEnergyConsumption(handle):
  function nvmlDeviceGetGpuOperationMode (line 3754) | def nvmlDeviceGetGpuOperationMode(handle):
  function nvmlDeviceGetCurrentGpuOperationMode (line 3764) | def nvmlDeviceGetCurrentGpuOperationMode(handle):
  function nvmlDeviceGetPendingGpuOperationMode (line 3769) | def nvmlDeviceGetPendingGpuOperationMode(handle):
  function nvmlDeviceGetMemoryInfo (line 3773) | def nvmlDeviceGetMemoryInfo(handle, version=None):
  function nvmlDeviceGetBAR1MemoryInfo (line 3786) | def nvmlDeviceGetBAR1MemoryInfo(handle):
  function nvmlDeviceGetComputeMode (line 3794) | def nvmlDeviceGetComputeMode(handle):
  function nvmlDeviceGetCudaComputeCapability (line 3802) | def nvmlDeviceGetCudaComputeCapability(handle):
  function nvmlDeviceGetEccMode (line 3811) | def nvmlDeviceGetEccMode(handle):
  function nvmlDeviceGetCurrentEccMode (line 3821) | def nvmlDeviceGetCurrentEccMode(handle):
  function nvmlDeviceGetPendingEccMode (line 3826) | def nvmlDeviceGetPendingEccMode(handle):
  function nvmlDeviceGetDefaultEccMode (line 3830) | def nvmlDeviceGetDefaultEccMode(handle):
  function nvmlDeviceGetTotalEccErrors (line 3838) | def nvmlDeviceGetTotalEccErrors(handle, errorType, counterType):
  function nvmlDeviceGetDetailedEccErrors (line 3852) | def nvmlDeviceGetDetailedEccErrors(handle, errorType, counterType):
  function nvmlDeviceGetMemoryErrorCounter (line 3866) | def nvmlDeviceGetMemoryErrorCounter(handle, errorType, counterType, loca...
  function nvmlDeviceGetUtilizationRates (line 3880) | def nvmlDeviceGetUtilizationRates(handle):
  function nvmlDeviceGetEncoderUtilization (line 3888) | def nvmlDeviceGetEncoderUtilization(handle):
  function nvmlDeviceGetDecoderUtilization (line 3897) | def nvmlDeviceGetDecoderUtilization(handle):
  function nvmlDeviceGetJpgUtilization (line 3906) | def nvmlDeviceGetJpgUtilization(handle):
  function nvmlDeviceGetOfaUtilization (line 3915) | def nvmlDeviceGetOfaUtilization(handle):
  function nvmlDeviceGetPcieReplayCounter (line 3924) | def nvmlDeviceGetPcieReplayCounter(handle):
  function nvmlDeviceGetDriverModel (line 3932) | def nvmlDeviceGetDriverModel(handle):
  function nvmlDeviceGetCurrentDriverModel (line 3942) | def nvmlDeviceGetCurrentDriverModel(handle):
  function nvmlDeviceGetPendingDriverModel (line 3947) | def nvmlDeviceGetPendingDriverModel(handle):
  function nvmlDeviceGetVbiosVersion (line 3953) | def nvmlDeviceGetVbiosVersion(handle):
  function nvmlDeviceGetComputeRunningProcesses_v2 (line 3962) | def nvmlDeviceGetComputeRunningProcesses_v2(handle):
  function nvmlDeviceGetComputeRunningProcesses_v3 (line 3994) | def nvmlDeviceGetComputeRunningProcesses_v3(handle):
  function nvmlDeviceGetComputeRunningProcesses (line 4030) | def nvmlDeviceGetComputeRunningProcesses(handle):
  function nvmlDeviceGetGraphicsRunningProcesses_v2 (line 4034) | def nvmlDeviceGetGraphicsRunningProcesses_v2(handle):
  function nvmlDeviceGetGraphicsRunningProcesses_v3 (line 4065) | def nvmlDeviceGetGraphicsRunningProcesses_v3(handle):
  function nvmlDeviceGetGraphicsRunningProcesses (line 4101) | def nvmlDeviceGetGraphicsRunningProcesses(handle):
  function nvmlDeviceGetMPSComputeRunningProcesses (line 4106) | def nvmlDeviceGetMPSComputeRunningProcesses(handle):
  function nvmlDeviceGetMPSComputeRunningProcesses_v2 (line 4110) | def nvmlDeviceGetMPSComputeRunningProcesses_v2(handle):
  function nvmlDeviceGetMPSComputeRunningProcesses_v3 (line 4145) | def nvmlDeviceGetMPSComputeRunningProcesses_v3(handle):
  function nvmlDeviceGetRunningProcessDetailList (line 4180) | def nvmlDeviceGetRunningProcessDetailList(handle, version, mode):
  function nvmlDeviceGetAutoBoostedClocksEnabled (line 4218) | def nvmlDeviceGetAutoBoostedClocksEnabled(handle):
  function nvmlUnitSetLedState (line 4229) | def nvmlUnitSetLedState(unit, color):
  function nvmlDeviceSetPersistenceMode (line 4236) | def nvmlDeviceSetPersistenceMode(handle, mode):
  function nvmlDeviceSetComputeMode (line 4243) | def nvmlDeviceSetComputeMode(handle, mode):
  function nvmlDeviceSetEccMode (line 4250) | def nvmlDeviceSetEccMode(handle, mode):
  function nvmlDeviceClearEccErrorCounts (line 4257) | def nvmlDeviceClearEccErrorCounts(handle, counterType):
  function nvmlDeviceSetDriverModel (line 4264) | def nvmlDeviceSetDriverModel(handle, model):
  function nvmlDeviceSetAutoBoostedClocksEnabled (line 4271) | def nvmlDeviceSetAutoBoostedClocksEnabled(handle, enabled):
  function nvmlDeviceSetDefaultAutoBoostedClocksEnabled (line 4279) | def nvmlDeviceSetDefaultAutoBoostedClocksEnabled(handle, enabled, flags):
  function nvmlDeviceSetGpuLockedClocks (line 4287) | def nvmlDeviceSetGpuLockedClocks(handle, minGpuClockMHz, maxGpuClockMHz):
  function nvmlDeviceResetGpuLockedClocks (line 4294) | def nvmlDeviceResetGpuLockedClocks(handle):
  function nvmlDeviceSetMemoryLockedClocks (line 4301) | def nvmlDeviceSetMemoryLockedClocks(handle, minMemClockMHz, maxMemClockM...
  function nvmlDeviceResetMemoryLockedClocks (line 4308) | def nvmlDeviceResetMemoryLockedClocks(handle):
  function nvmlDeviceGetClkMonStatus (line 4315) | def nvmlDeviceGetClkMonStatus(handle, c_clkMonInfo=nvmlClkMonStatus_t()):
  function nvmlDeviceSetApplicationsClocks (line 4325) | def nvmlDeviceSetApplicationsClocks(handle, maxMemClockMHz, maxGraphicsC...
  function nvmlDeviceResetApplicationsClocks (line 4333) | def nvmlDeviceResetApplicationsClocks(handle):
  function nvmlDeviceSetPowerManagementLimit (line 4341) | def nvmlDeviceSetPowerManagementLimit(handle, limit):
  function nvmlDeviceSetGpuOperationMode (line 4349) | def nvmlDeviceSetGpuOperationMode(handle, mode):
  function nvmlEventSetCreate (line 4357) | def nvmlEventSetCreate():
  function nvmlDeviceRegisterEvents (line 4366) | def nvmlDeviceRegisterEvents(handle, eventTypes, eventSet):
  function nvmlDeviceGetSupportedEventTypes (line 4374) | def nvmlDeviceGetSupportedEventTypes(handle):
  function nvmlEventSetWait_v2 (line 4383) | def nvmlEventSetWait_v2(eventSet, timeoutms):
  function nvmlEventSetWait (line 4391) | def nvmlEventSetWait(eventSet, timeoutms):
  function nvmlEventSetFree (line 4396) | def nvmlEventSetFree(eventSet):
  function nvmlDeviceOnSameBoard (line 4404) | def nvmlDeviceOnSameBoard(handle1, handle2):
  function nvmlDeviceGetCurrPcieLinkGeneration (line 4413) | def nvmlDeviceGetCurrPcieLinkGeneration(handle):
  function nvmlDeviceGetMaxPcieLinkGeneration (line 4422) | def nvmlDeviceGetMaxPcieLinkGeneration(handle):
  function nvmlDeviceGetCurrPcieLinkWidth (line 4431) | def nvmlDeviceGetCurrPcieLinkWidth(handle):
  function nvmlDeviceGetMaxPcieLinkWidth (line 4440) | def nvmlDeviceGetMaxPcieLinkWidth(handle):
  function nvmlDeviceGetGpuMaxPcieLinkGeneration (line 4448) | def nvmlDeviceGetGpuMaxPcieLinkGeneration(handle):
  function nvmlDeviceGetSupportedClocksThrottleReasons (line 4457) | def nvmlDeviceGetSupportedClocksThrottleReasons(handle):
  function nvmlDeviceGetSupportedClocksEventReasons (line 4465) | def nvmlDeviceGetSupportedClocksEventReasons(handle):
  function nvmlDeviceGetCurrentClocksThrottleReasons (line 4474) | def nvmlDeviceGetCurrentClocksThrottleReasons(handle):
  function nvmlDeviceGetCurrentClocksEventReasons (line 4482) | def nvmlDeviceGetCurrentClocksEventReasons(handle):
  function nvmlDeviceGetIndex (line 4491) | def nvmlDeviceGetIndex(handle):
  function nvmlDeviceGetAccountingMode (line 4500) | def nvmlDeviceGetAccountingMode(handle):
  function nvmlDeviceSetAccountingMode (line 4508) | def nvmlDeviceSetAccountingMode(handle, mode):
  function nvmlDeviceClearAccountingPids (line 4515) | def nvmlDeviceClearAccountingPids(handle):
  function nvmlDeviceGetAccountingStats (line 4522) | def nvmlDeviceGetAccountingStats(handle, pid):
  function nvmlDeviceGetAccountingPids (line 4533) | def nvmlDeviceGetAccountingPids(handle):
  function nvmlDeviceGetAccountingBufferSize (line 4542) | def nvmlDeviceGetAccountingBufferSize(handle):
  function nvmlDeviceGetRetiredPages (line 4550) | def nvmlDeviceGetRetiredPages(device, sourceFilter):
  function nvmlDeviceGetRetiredPages_v2 (line 4573) | def nvmlDeviceGetRetiredPages_v2(device, sourceFilter):
  function nvmlDeviceGetRetiredPagesPendingStatus (line 4601) | def nvmlDeviceGetRetiredPagesPendingStatus(device):
  function nvmlDeviceGetAPIRestriction (line 4609) | def nvmlDeviceGetAPIRestriction(device, apiType):
  function nvmlDeviceSetAPIRestriction (line 4617) | def nvmlDeviceSetAPIRestriction(handle, apiType, isRestricted):
  function nvmlDeviceGetBridgeChipInfo (line 4624) | def nvmlDeviceGetBridgeChipInfo(handle):
  function nvmlDeviceGetSamples (line 4632) | def nvmlDeviceGetSamples(device, sampling_type, timeStamp):
  function nvmlDeviceGetViolationStatus (line 4667) | def nvmlDeviceGetViolationStatus(device, perfPolicyType):
  function nvmlDeviceGetPcieThroughput (line 4678) | def nvmlDeviceGetPcieThroughput(device, counter):
  function nvmlSystemGetTopologyGpuSet (line 4686) | def nvmlSystemGetTopologyGpuSet(cpuNumber):
  function nvmlDeviceGetTopologyNearestGpus (line 4703) | def nvmlDeviceGetTopologyNearestGpus(device, level):
  function nvmlDeviceGetTopologyCommonAncestor (line 4721) | def nvmlDeviceGetTopologyCommonAncestor(device1, device2):
  function nvmlDeviceGetNvLinkUtilizationCounter (line 4729) | def nvmlDeviceGetNvLinkUtilizationCounter(device, link, counter):
  function nvmlDeviceFreezeNvLinkUtilizationCounter (line 4738) | def nvmlDeviceFreezeNvLinkUtilizationCounter(device, link, counter, free...
  function nvmlDeviceResetNvLinkUtilizationCounter (line 4745) | def nvmlDeviceResetNvLinkUtilizationCounter(device, link, counter):
  function nvmlDeviceSetNvLinkUtilizationControl (line 4752) | def nvmlDeviceSetNvLinkUtilizationControl(device, link, counter, control...
  function nvmlDeviceGetNvLinkUtilizationControl (line 4759) | def nvmlDeviceGetNvLinkUtilizationControl(device, link, counter):
  function nvmlDeviceGetNvLinkCapability (line 4767) | def nvmlDeviceGetNvLinkCapability(device, link, capability):
  function nvmlDeviceGetNvLinkErrorCounter (line 4775) | def nvmlDeviceGetNvLinkErrorCounter(device, link, counter):
  function nvmlDeviceResetNvLinkErrorCounters (line 4783) | def nvmlDeviceResetNvLinkErrorCounters(device, link):
  function nvmlDeviceGetNvLinkRemotePciInfo (line 4790) | def nvmlDeviceGetNvLinkRemotePciInfo(device, link):
  function nvmlDeviceGetNvLinkRemoteDeviceType (line 4798) | def nvmlDeviceGetNvLinkRemoteDeviceType(handle, link):
  function nvmlDeviceGetNvLinkState (line 4806) | def nvmlDeviceGetNvLinkState(device, link):
  function nvmlDeviceGetNvLinkVersion (line 4814) | def nvmlDeviceGetNvLinkVersion(device, link):
  function nvmlDeviceModifyDrainState (line 4822) | def nvmlDeviceModifyDrainState(pciInfo, newState):
  function nvmlDeviceQueryDrainState (line 4829) | def nvmlDeviceQueryDrainState(pciInfo):
  function nvmlDeviceRemoveGpu (line 4837) | def nvmlDeviceRemoveGpu(pciInfo):
  function nvmlDeviceDiscoverGpus (line 4844) | def nvmlDeviceDiscoverGpus(pciInfo):
  function nvmlDeviceGetFieldValues (line 4851) | def nvmlDeviceGetFieldValues(handle, fieldIds):
  function nvmlDeviceClearFieldValues (line 4867) | def nvmlDeviceClearFieldValues(handle, fieldIds):
  function nvmlDeviceGetVirtualizationMode (line 4883) | def nvmlDeviceGetVirtualizationMode(handle):
  function nvmlDeviceSetVirtualizationMode (line 4891) | def nvmlDeviceSetVirtualizationMode(handle, virtualization_mode):
  function nvmlDeviceGetVgpuHeterogeneousMode (line 4896) | def nvmlDeviceGetVgpuHeterogeneousMode(handle):
  function nvmlDeviceSetVgpuHeterogeneousMode (line 4905) | def nvmlDeviceSetVgpuHeterogeneousMode(handle, heterogeneous_mode):
  function nvmlVgpuInstanceGetPlacementId (line 4915) | def nvmlVgpuInstanceGetPlacementId(vgpuInstance):
  function nvmlDeviceGetVgpuTypeSupportedPlacements (line 4924) | def nvmlDeviceGetVgpuTypeSupportedPlacements(handle, vgpuTypeId, mode=0,...
  function nvmlDeviceGetVgpuTypeCreatablePlacements (line 4949) | def nvmlDeviceGetVgpuTypeCreatablePlacements(handle, vgpuTypeId, version...
  function nvmlGetVgpuDriverCapabilities (line 4971) | def nvmlGetVgpuDriverCapabilities(capability):
  function nvmlDeviceGetVgpuCapabilities (line 4979) | def nvmlDeviceGetVgpuCapabilities(handle, capability):
  function nvmlDeviceSetVgpuCapabilities (line 4987) | def nvmlDeviceSetVgpuCapabilities(handle, capability, state):
  function nvmlDeviceGetSupportedVgpus (line 4994) | def nvmlDeviceGetSupportedVgpus(handle):
  function nvmlDeviceGetCreatableVgpus (line 5021) | def nvmlDeviceGetCreatableVgpus(handle):
  function nvmlVgpuTypeGetGpuInstanceProfileId (line 5048) | def nvmlVgpuTypeGetGpuInstanceProfileId(vgpuTypeId):
  function nvmlVgpuTypeGetClass (line 5057) | def nvmlVgpuTypeGetClass(vgpuTypeId):
  function nvmlVgpuTypeGetName (line 5067) | def nvmlVgpuTypeGetName(vgpuTypeId):
  function nvmlVgpuTypeGetDeviceID (line 5076) | def nvmlVgpuTypeGetDeviceID(vgpuTypeId):
  function nvmlVgpuTypeGetFramebufferSize (line 5085) | def nvmlVgpuTypeGetFramebufferSize(vgpuTypeId):
  function nvmlVgpuTypeGetNumDisplayHeads (line 5093) | def nvmlVgpuTypeGetNumDisplayHeads(vgpuTypeId):
  function nvmlVgpuTypeGetResolution (line 5101) | def nvmlVgpuTypeGetResolution(vgpuTypeId):
  function nvmlVgpuTypeGetLicense (line 5111) | def nvmlVgpuTypeGetLicense(vgpuTypeId):
  function nvmlVgpuTypeGetFrameRateLimit (line 5120) | def nvmlVgpuTypeGetFrameRateLimit(vgpuTypeId):
  function nvmlVgpuTypeGetGspHeapSize (line 5128) | def nvmlVgpuTypeGetGspHeapSize(vgpuTypeId):
  function nvmlVgpuTypeGetFbReservation (line 5136) | def nvmlVgpuTypeGetFbReservation(vgpuTypeId):
  function nvmlVgpuInstanceGetRuntimeStateSize (line 5144) | def nvmlVgpuInstanceGetRuntimeStateSize(vgpuInstance):
  function nvmlVgpuTypeGetMaxInstances (line 5153) | def nvmlVgpuTypeGetMaxInstances(handle, vgpuTypeId):
  function nvmlVgpuTypeGetMaxInstancesPerVm (line 5161) | def nvmlVgpuTypeGetMaxInstancesPerVm(vgpuTypeId):
  function nvmlVgpuTypeGetBAR1Info (line 5169) | def nvmlVgpuTypeGetBAR1Info(vgpuTypeId):
  function nvmlDeviceGetActiveVgpus (line 5178) | def nvmlDeviceGetActiveVgpus(handle):
  function nvmlVgpuInstanceGetVmID (line 5206) | def nvmlVgpuInstanceGetVmID(vgpuInstance):
  function nvmlVgpuInstanceGetUUID (line 5217) | def nvmlVgpuInstanceGetUUID(vgpuInstance):
  function nvmlVgpuInstanceGetMdevUUID (line 5227) | def nvmlVgpuInstanceGetMdevUUID(vgpuInstance):
  function nvmlVgpuInstanceGetVmDriverVersion (line 5237) | def nvmlVgpuInstanceGetVmDriverVersion(vgpuInstance):
  function nvmlVgpuInstanceGetLicenseStatus (line 5246) | def nvmlVgpuInstanceGetLicenseStatus(vgpuInstance):
  function nvmlVgpuInstanceGetLicenseInfo_v2 (line 5254) | def nvmlVgpuInstanceGetLicenseInfo_v2(vgpuInstance):
  function nvmlVgpuInstanceGetLicenseInfo (line 5262) | def nvmlVgpuInstanceGetLicenseInfo(vgpuInstance):
  function nvmlVgpuInstanceGetFrameRateLimit (line 5266) | def nvmlVgpuInstanceGetFrameRateLimit(vgpuInstance):
  function nvmlVgpuInstanceGetEccMode (line 5274) | def nvmlVgpuInstanceGetEccMode(vgpuInstance):
  function nvmlVgpuInstanceGetType (line 5282) | def nvmlVgpuInstanceGetType(vgpuInstance):
  function nvmlVgpuInstanceGetEncoderCapacity (line 5290) | def nvmlVgpuInstanceGetEncoderCapacity(vgpuInstance):
  function nvmlVgpuInstanceSetEncoderCapacity (line 5298) | def nvmlVgpuInstanceSetEncoderCapacity(vgpuInstance, encoder_capacity):
  function nvmlVgpuInstanceGetFbUsage (line 5303) | def nvmlVgpuInstanceGetFbUsage(vgpuInstance):
  function nvmlVgpuTypeGetCapabilities (line 5311) | def nvmlVgpuTypeGetCapabilities(vgpuTypeId, capability):
  function nvmlVgpuInstanceGetGpuInstanceId (line 5319) | def nvmlVgpuInstanceGetGpuInstanceId(vgpuInstance):
  function nvmlVgpuInstanceGetGpuPciId (line 5328) | def nvmlVgpuInstanceGetGpuPciId(vgpuInstance):
  function nvmlDeviceGetVgpuUtilization (line 5338) | def nvmlDeviceGetVgpuUtilization(handle, timeStamp):
  function nvmlDeviceGetVgpuInstancesUtilizationInfo (line 5373) | def nvmlDeviceGetVgpuInstancesUtilizationInfo(handle, timeStamp):
  function nvmlDeviceGetP2PStatus (line 5406) | def nvmlDeviceGetP2PStatus(device1, device2, p2pIndex):
  function nvmlDeviceGetGridLicensableFeatures_v4 (line 5414) | def nvmlDeviceGetGridLicensableFeatures_v4(handle):
  function nvmlDeviceGetGridLicensableFeatures (line 5423) | def nvmlDeviceGetGridLicensableFeatures(handle):
  function nvmlDeviceGetGspFirmwareVersion (line 5427) | def nvmlDeviceGetGspFirmwareVersion(handle, version=None):
  function nvmlDeviceGetGspFirmwareMode (line 5437) | def nvmlDeviceGetGspFirmwareMode(handle, isEnabled=c_uint(), defaultMode...
  function nvmlDeviceGetEncoderCapacity (line 5447) | def nvmlDeviceGetEncoderCapacity(handle, encoderQueryType):
  function nvmlDeviceGetVgpuProcessUtilization (line 5457) | def nvmlDeviceGetVgpuProcessUtilization(handle, timeStamp):
  function nvmlDeviceGetVgpuProcessesUtilizationInfo (line 5483) | def nvmlDeviceGetVgpuProcessesUtilizationInfo(handle, timeStamp):
  function nvmlDeviceGetEncoderStats (line 5515) | def nvmlDeviceGetEncoderStats(handle):
  function nvmlDeviceGetEncoderSessions (line 5525) | def nvmlDeviceGetEncoderSessions(handle):
  function nvmlDeviceGetFBCStats (line 5552) | def nvmlDeviceGetFBCStats(handle):
  function nvmlDeviceGetFBCSessions (line 5560) | def nvmlDeviceGetFBCSessions(handle):
  function nvmlVgpuInstanceGetEncoderStats (line 5587) | def nvmlVgpuInstanceGetEncoderStats(vgpuInstance):
  function nvmlVgpuInstanceGetEncoderSessions (line 5599) | def nvmlVgpuInstanceGetEncoderSessions(vgpuInstance):
  function nvmlVgpuInstanceGetFBCStats (line 5626) | def nvmlVgpuInstanceGetFBCStats(vgpuInstance):
  function nvmlVgpuInstanceGetFBCSessions (line 5634) | def nvmlVgpuInstanceGetFBCSessions(vgpuInstance):
  function nvmlDeviceGetProcessUtilization (line 5661) | def nvmlDeviceGetProcessUtilization(handle, timeStamp):
  function nvmlDeviceGetProcessesUtilizationInfo (line 5684) | def nvmlDeviceGetProcessesUtilizationInfo(handle, timeStamp):
  function nvmlVgpuInstanceGetMetadata (line 5713) | def nvmlVgpuInstanceGetMetadata(vgpuInstance):
  function nvmlDeviceGetVgpuMetadata (line 5728) | def nvmlDeviceGetVgpuMetadata(handle):
  function nvmlGetVgpuCompatibility (line 5743) | def nvmlGetVgpuCompatibility(vgpuMetadata, pgpuMetadata):
  function nvmlDeviceGetPgpuMetadataString (line 5752) | def nvmlDeviceGetPgpuMetadataString(handle):
  function nvmlDeviceGetVgpuSchedulerLog (line 5767) | def nvmlDeviceGetVgpuSchedulerLog(handle):
  function nvmlDeviceGetVgpuSchedulerState (line 5775) | def nvmlDeviceGetVgpuSchedulerState(handle):
  function nvmlDeviceGetVgpuSchedulerCapabilities (line 5783) | def nvmlDeviceGetVgpuSchedulerCapabilities(handle):
  function nvmlDeviceSetVgpuSchedulerState (line 5791) | def nvmlDeviceSetVgpuSchedulerState(handle, sched_state):
  function nvmlSetVgpuVersion (line 5798) | def nvmlSetVgpuVersion(vgpuVersion):
  function nvmlGetVgpuVersion (line 5805) | def nvmlGetVgpuVersion(supported=None, current=None):
  function nvmlVgpuInstanceGetAccountingMode (line 5823) | def nvmlVgpuInstanceGetAccountingMode(vgpuInstance):
  function nvmlVgpuInstanceGetAccountingPids (line 5831) | def nvmlVgpuInstanceGetAccountingPids(vgpuInstance):
  function nvmlVgpuInstanceGetAccountingStats (line 5845) | def nvmlVgpuInstanceGetAccountingStats(vgpuInstance, pid):
  function nvmlVgpuInstanceClearAccountingPids (line 5853) | def nvmlVgpuInstanceClearAccountingPids(vgpuInstance):
  function nvmlGetExcludedDeviceCount (line 5860) | def nvmlGetExcludedDeviceCount():
  function nvmlGetExcludedDeviceInfoByIndex (line 5868) | def nvmlGetExcludedDeviceInfoByIndex(index):
  function nvmlDeviceGetHostVgpuMode (line 5877) | def nvmlDeviceGetHostVgpuMode(handle):
  function nvmlDeviceSetMigMode (line 5885) | def nvmlDeviceSetMigMode(device, mode):
  function nvmlDeviceGetMigMode (line 5893) | def nvmlDeviceGetMigMode(device):
  function nvmlDeviceGetGpuInstanceProfileInfo (line 5902) | def nvmlDeviceGetGpuInstanceProfileInfo(device, profile, version=2):
  function nvmlDeviceGetGpuInstanceRemainingCapacity (line 5920) | def nvmlDeviceGetGpuInstanceRemainingCapacity(device, profileId):
  function nvmlDeviceGetGpuInstancePossiblePlacements (line 5928) | def nvmlDeviceGetGpuInstancePossiblePlacements(
  function nvmlDeviceCreateGpuInstance (line 5937) | def nvmlDeviceCreateGpuInstance(device, profileId):
  function nvmlDeviceCreateGpuInstanceWithPlacement (line 5945) | def nvmlDeviceCreateGpuInstanceWithPlacement(device, profileId, placement):
  function nvmlGpuInstanceDestroy (line 5953) | def nvmlGpuInstanceDestroy(gpuInstance):
  function nvmlDeviceGetGpuInstances (line 5960) | def nvmlDeviceGetGpuInstances(device, profileId, gpuInstancesRef, countR...
  function nvmlDeviceGetGpuInstanceById (line 5967) | def nvmlDeviceGetGpuInstanceById(device, gpuInstanceId):
  function nvmlGpuInstanceGetInfo (line 5975) | def nvmlGpuInstanceGetInfo(gpuInstance):
  function nvmlGpuInstanceGetComputeInstanceProfileInfo (line 5983) | def nvmlGpuInstanceGetComputeInstanceProfileInfo(
  function nvmlGpuInstanceGetComputeInstanceRemainingCapacity (line 6005) | def nvmlGpuInstanceGetComputeInstanceRemainingCapacity(gpuInstance, prof...
  function nvmlGpuInstanceGetComputeInstancePossiblePlacements (line 6013) | def nvmlGpuInstanceGetComputeInstancePossiblePlacements(
  function nvmlGpuInstanceCreateComputeInstance (line 6022) | def nvmlGpuInstanceCreateComputeInstance(gpuInstance, profileId):
  function nvmlGpuInstanceCreateComputeInstanceWithPlacement (line 6030) | def nvmlGpuInstanceCreateComputeInstanceWithPlacement(
  function nvmlComputeInstanceDestroy (line 6040) | def nvmlComputeInstanceDestroy(computeInstance):
  function nvmlGpuInstanceGetComputeInstances (line 6047) | def nvmlGpuInstanceGetComputeInstances(
  function nvmlGpuInstanceGetComputeInstanceById (line 6056) | def nvmlGpuInstanceGetComputeInstanceById(gpuInstance, computeInstanceId):
  function nvmlComputeInstanceGetInfo_v2 (line 6064) | def nvmlComputeInstanceGetInfo_v2(computeInstance):
  function nvmlComputeInstanceGetInfo (line 6072) | def nvmlComputeInstanceGetInfo(computeInstance):
  function nvmlDeviceIsMigDeviceHandle (line 6076) | def nvmlDeviceIsMigDeviceHandle(device):
  function nvmlDeviceGetGpuInstanceId (line 6084) | def nvmlDeviceGetGpuInstanceId(device):
  function nvmlDeviceGetComputeInstanceId (line 6092) | def nvmlDeviceGetComputeInstanceId(device):
  function nvmlDeviceGetMaxMigDeviceCount (line 6100) | def nvmlDeviceGetMaxMigDeviceCount(device):
  function nvmlDeviceGetMigDeviceHandleByIndex (line 6108) | def nvmlDeviceGetMigDeviceHandleByIndex(device, index):
  function nvmlDeviceGetDeviceHandleFromMigDeviceHandle (line 6117) | def nvmlDeviceGetDeviceHandleFromMigDeviceHandle(migDevice):
  function nvmlDeviceGetAttributes_v2 (line 6125) | def nvmlDeviceGetAttributes_v2(device):
  function nvmlDeviceGetAttributes (line 6133) | def nvmlDeviceGetAttributes(device):
  function nvmlDeviceGetRemappedRows (line 6137) | def nvmlDeviceGetRemappedRows(device):
  function nvmlDeviceGetRowRemapperHistogram (line 6148) | def nvmlDeviceGetRowRemapperHistogram(device):
  function nvmlDeviceGetArchitecture (line 6156) | def nvmlDeviceGetArchitecture(device):
  function nvmlDeviceGetBusType (line 6164) | def nvmlDeviceGetBusType(device):
  function nvmlDeviceGetIrqNum (line 6172) | def nvmlDeviceGetIrqNum(device):
  function nvmlDeviceGetNumGpuCores (line 6180) | def nvmlDeviceGetNumGpuCores(device):
  function nvmlDeviceGetPowerSource (line 6188) | def nvmlDeviceGetPowerSource(device):
  function nvmlDeviceGetMemoryBusWidth (line 6196) | def nvmlDeviceGetMemoryBusWidth(device):
  function nvmlDeviceGetPcieLinkMaxSpeed (line 6204) | def nvmlDeviceGetPcieLinkMaxSpeed(device):
  function nvmlDeviceGetAdaptiveClockInfoStatus (line 6212) | def nvmlDeviceGetAdaptiveClockInfoStatus(device):
  function nvmlDeviceGetPcieSpeed (line 6220) | def nvmlDeviceGetPcieSpeed(device):
  function nvmlDeviceGetDynamicPstatesInfo (line 6228) | def nvmlDeviceGetDynamicPstatesInfo(
  function nvmlDeviceSetFanSpeed_v2 (line 6242) | def nvmlDeviceSetFanSpeed_v2(handle, index, speed):
  function nvmlDeviceGetThermalSettings (line 6249) | def nvmlDeviceGetThermalSettings(
  function nvmlDeviceGetMinMaxClockOfPState (line 6260) | def nvmlDeviceGetMinMaxClockOfPState(
  class c_nvmlClockOffset_t (line 6278) | class c_nvmlClockOffset_t(_PrintableStructure):
  function nvmlDeviceGetClockOffsets (line 6292) | def nvmlDeviceGetClockOffsets(device, info):
  function nvmlDeviceSetClockOffsets (line 6298) | def nvmlDeviceSetClockOffsets(device, info):
  function nvmlDeviceGetSupportedPerformanceStates (line 6304) | def nvmlDeviceGetSupportedPerformanceStates(device):
  function nvmlDeviceGetGpcClkVfOffset (line 6324) | def nvmlDeviceGetGpcClkVfOffset(device):
  function nvmlDeviceSetGpcClkVfOffset (line 6332) | def nvmlDeviceSetGpcClkVfOffset(device, offset):
  function nvmlDeviceGetGpcClkMinMaxVfOffset (line 6340) | def nvmlDeviceGetGpcClkMinMaxVfOffset(device, minOffset=c_int(), maxOffs...
  function nvmlDeviceGetMemClkVfOffset (line 6350) | def nvmlDeviceGetMemClkVfOffset(device):
  function nvmlDeviceSetMemClkVfOffset (line 6358) | def nvmlDeviceSetMemClkVfOffset(device, offset):
  function nvmlDeviceGetMemClkMinMaxVfOffset (line 6366) | def nvmlDeviceGetMemClkMinMaxVfOffset(device, minOffset=c_int(), maxOffs...
  function nvmlSystemSetConfComputeGpusReadyState (line 6377) | def nvmlSystemSetConfComputeGpusReadyState(state):
  function nvmlSystemGetConfComputeGpusReadyState (line 6385) | def nvmlSystemGetConfComputeGpusReadyState():
  function nvmlSystemGetConfComputeCapabilities (line 6393) | def nvmlSystemGetConfComputeCapabilities():
  function nvmlSystemGetConfComputeState (line 6401) | def nvmlSystemGetConfComputeState():
  function nvmlSystemGetConfComputeSettings (line 6409) | def nvmlSystemGetConfComputeSettings(settings):
  function nvmlDeviceSetConfComputeUnprotectedMemSize (line 6414) | def nvmlDeviceSetConfComputeUnprotectedMemSize(device, c_ccMemSize):
  function nvmlDeviceGetConfComputeMemSizeInfo (line 6421) | def nvmlDeviceGetConfComputeMemSizeInfo(device):
  function nvmlDeviceGetConfComputeProtectedMemoryUsage (line 6429) | def nvmlDeviceGetConfComputeProtectedMemoryUsage(device):
  function nvmlDeviceGetConfComputeGpuCertificate (line 6437) | def nvmlDeviceGetConfComputeGpuCertificate(device):
  function nvmlDeviceGetConfComputeGpuAttestationReport (line 6445) | def nvmlDeviceGetConfComputeGpuAttestationReport(device, c_nonce):
  function nvmlSystemSetConfComputeKeyRotationThresholdInfo (line 6455) | def nvmlSystemSetConfComputeKeyRotationThresholdInfo(max_atk_adv):
  function nvmlSystemGetConfComputeKeyRotationThresholdInfo (line 6465) | def nvmlSystemGetConfComputeKeyRotationThresholdInfo():
  class c_nvmlUnitInfo_t (line 6633) | class c_nvmlUnitInfo_t(_PrintableStructure):
  class struct_c_nvmlGpmSample_t (line 6642) | class struct_c_nvmlGpmSample_t(Structure):
  class c_metricInfo_t (line 6649) | class c_metricInfo_t(Structure):
  class c_nvmlGpmMetric_t (line 6657) | class c_nvmlGpmMetric_t(_PrintableStructure):
  class c_nvmlGpmMetricsGet_t (line 6666) | class c_nvmlGpmMetricsGet_t(_PrintableStructure):
  class c_nvmlGpmSupport_t (line 6679) | class c_nvmlGpmSupport_t(_PrintableStructure):
  function nvmlGpmMetricsGet (line 6691) | def nvmlGpmMetricsGet(metricsGet):
  function nvmlGpmSampleFree (line 6698) | def nvmlGpmSampleFree(gpmSample):
  function nvmlGpmSampleAlloc (line 6705) | def nvmlGpmSampleAlloc():
  function nvmlGpmSampleGet (line 6713) | def nvmlGpmSampleGet(device, gpmSample):
  function nvmlGpmMigSampleGet (line 6720) | def nvmlGpmMigSampleGet(device, gpuInstanceId, gpmSample):
  function nvmlGpmQueryDeviceSupport (line 6727) | def nvmlGpmQueryDeviceSupport(device):
  function nvmlGpmSetStreamingEnabled (line 6736) | def nvmlGpmSetStreamingEnabled(device, state):
  function nvmlGpmQueryIfStreamingEnabled (line 6744) | def nvmlGpmQueryIfStreamingEnabled(device):
  class c_nvmlNvLinkPowerThres_t (line 6763) | class c_nvmlNvLinkPowerThres_t(Structure):
  function nvmlDeviceSetNvLinkDeviceLowPowerThreshold (line 6769) | def nvmlDeviceSetNvLinkDeviceLowPowerThreshold(device, l1threshold):
  class c_nvmlGpuFabricInfo_t (line 6787) | class c_nvmlGpuFabricInfo_t(_PrintableStructure):
  class c_nvmlGpuFabricInfoV_t (line 6823) | class c_nvmlGpuFabricInfoV_t(_PrintableStructure):
    method __init__ (line 6833) | def __init__(self):
  function nvmlDeviceGetGpuFabricInfo (line 6837) | def nvmlDeviceGetGpuFabricInfo(device, gpuFabricInfo):
  function nvmlDeviceGetGpuFabricInfoV (line 6844) | def nvmlDeviceGetGpuFabricInfoV(device, gpuFabricInfo):
  function nvmlSystemSetNvlinkBwMode (line 6862) | def nvmlSystemSetNvlinkBwMode(mode):
  function nvmlSystemGetNvlinkBwMode (line 6869) | def nvmlSystemGetNvlinkBwMode():
  class c_nvmlPowerValue_v2_t (line 6883) | class c_nvmlPowerValue_v2_t(_PrintableStructure):
  function nvmlDeviceSetPowerManagementLimit_v2 (line 6895) | def nvmlDeviceSetPowerManagementLimit_v2(
  class c_nvmlEccSramErrorStatus_v1_t (line 6908) | class c_nvmlEccSramErrorStatus_v1_t(_PrintableStructure):
    method __init__ (line 6925) | def __init__(self):
  function nvmlDeviceGetSramEccErrorStatus (line 6934) | def nvmlDeviceGetSramEccErrorStatus(device, status):
  class c_nvmlDeviceCapabilities_v1_t (line 6945) | class c_nvmlDeviceCapabilities_v1_t(_PrintableStructure):
    method __init__ (line 6951) | def __init__(self):
  function nvmlDeviceGetCapabilities (line 6957) | def nvmlDeviceGetCapabilities(device, caps):
  class c_nvmlPlatformInfo_v1_t (line 6962) | class c_nvmlPlatformInfo_v1_t(_PrintableStructure):
    method __init__ (line 6974) | def __init__(self):
  function nvmlDeviceGetPlatformInfo (line 6981) | def nvmlDeviceGetPlatformInfo(device, platformInfo):
  class c_nvmlMask255_t (line 6988) | class c_nvmlMask255_t(_PrintableStructure):
  class c_nvmlWorkloadPowerProfileInfo_v1_t (line 7015) | class c_nvmlWorkloadPowerProfileInfo_v1_t(_PrintableStructure):
    method __init__ (line 7023) | def __init__(self):
  class c_nvmlWorkloadPowerProfileProfilesInfo_v1_t (line 7032) | class c_nvmlWorkloadPowerProfileProfilesInfo_v1_t(_PrintableStructure):
    method __init__ (line 7042) | def __init__(self):
  class c_nvmlWorkloadPowerProfileCurrentProfiles_v1_t (line 7051) | class c_nvmlWorkloadPowerProfileCurrentProfiles_v1_t(_PrintableStructure):
    method __init__ (line 7059) | def __init__(self):
  class c_nvmlWorkloadPowerProfileRequestedProfiles_v1_t (line 7068) | class c_nvmlWorkloadPowerProfileRequestedProfiles_v1_t(_PrintableStructu...
    method __init__ (line 7074) | def __init__(self):
  function nvmlDeviceWorkloadPowerProfileGetProfilesInfo (line 7080) | def nvmlDeviceWorkloadPowerProfileGetProfilesInfo(device, profilesInfo):
  function nvmlDeviceWorkloadPowerProfileGetCurrentProfiles (line 7087) | def nvmlDeviceWorkloadPowerProfileGetCurrentProfiles(device, currentProf...
  function nvmlDeviceWorkloadPowerProfileSetRequestedProfiles (line 7094) | def nvmlDeviceWorkloadPowerProfileSetRequestedProfiles(device, requested...
  function nvmlDeviceWorkloadPowerProfileClearRequestedProfiles (line 7101) | def nvmlDeviceWorkloadPowerProfileClearRequestedProfiles(device, request...
  function nvmlDeviceGetNvlinkSupportedBwModes (line 7108) | def nvmlDeviceGetNvlinkSupportedBwModes(device, supportedBwModes):
  function nvmlDeviceGetNvlinkBwMode (line 7115) | def nvmlDeviceGetNvlinkBwMode(device, getBwMode):
  function nvmlDeviceSetNvlinkBwMode (line 7122) | def nvmlDeviceSetNvlinkBwMode(device, setBwMode):
  class c_nvmlDramEncryptionInfo_t (line 7132) | class c_nvmlDramEncryptionInfo_t(_PrintableStructure):
    method __init__ (line 7138) | def __init__(self):
  function nvmlDeviceGetDramEncryptionMode (line 7144) | def nvmlDeviceGetDramEncryptionMode(handle):
  function nvmlDeviceGetCurrentDramEncryptionMode (line 7154) | def nvmlDeviceGetCurrentDramEncryptionMode(handle):
  function nvmlDeviceGetPendingDramEncryptionMode (line 7159) | def nvmlDeviceGetPendingDramEncryptionMode(handle):
  function nvmlDeviceSetDramEncryptionMode (line 7163) | def nvmlDeviceSetDramEncryptionMode(handle, mode):
  class c_nvmlPowerSmoothingState_v1_t (line 7183) | class c_nvmlPowerSmoothingState_v1_t(_PrintableStructure):
    method __init__ (line 7189) | def __init__(self):
  class c_nvmlPowerSmoothingProfile_v1_t (line 7198) | class c_nvmlPowerSmoothingProfile_v1_t(_PrintableStructure):
    method __init__ (line 7206) | def __init__(self):
  function nvmlDevicePowerSmoothingActivatePresetProfile (line 7212) | def nvmlDevicePowerSmoothingActivatePresetProfile(device, profile):
  function nvmlDevicePowerSmoothingUpdatePresetProfileParam (line 7218) | def nvmlDevicePowerSmoothingUpdatePresetProfileParam(device, profile):
  function nvmlDevicePowerSmoothingSetState (line 7224) | def nvmlDevicePowerSmoothingSetState(device, state):

FILE: python/sglang/multimodal_gen/tools/convert_hf_to_fp8.py
  function ceildiv (line 44) | def ceildiv(a, b):
  function block_fp8 (line 48) | def block_fp8(weight, block_size):
  function channel_fp8 (line 82) | def channel_fp8(weight):
  function tensor_fp8 (line 90) | def tensor_fp8(weight):
  function quant_fp8 (line 98) | def quant_fp8(weight, strategy, block_size=None):
  class ConversionResult (line 107) | class ConversionResult:
    method __init__ (line 108) | def __init__(self):
    method add_result (line 114) | def add_result(self, filename, q_weights, module_names):
  function process_file (line 122) | def process_file(
  function convert_fp8 (line 180) | def convert_fp8(input_path, output_path, strategy, block_size=None, max_...

FILE: python/sglang/multimodal_gen/tools/wan_repack.py
  function get_transformer_config (line 53) | def get_transformer_config(model_type: str) -> Tuple[Dict[str, Any], ...]:
  function update_dict_ (line 59) | def update_dict_(dict: Dict[str, Any], old_key: str, new_key: str) -> Di...
  function load_sharded_safetensors (line 63) | def load_sharded_safetensors(path: pathlib.Path):
  function convert_transformer (line 70) | def convert_transformer(model_type: str, model_dir: str, output_dir: str):
  function get_args (line 96) | def get_args():

FILE: python/sglang/multimodal_gen/utils.py
  function _expand_path_value (line 38) | def _expand_path_value(field_name: str, value: Any) -> Any:
  function expand_path_kwargs (line 49) | def expand_path_kwargs(kwargs: dict[str, Any]) -> dict[str, Any]:
  function expand_path_fields (line 53) | def expand_path_fields(obj) -> None:
  function find_nccl_library (line 71) | def find_nccl_library() -> str:
  function _patched_set_stream (line 104) | def _patched_set_stream(stream: torch.cuda.Stream | None) -> None:
  function current_stream (line 114) | def current_stream() -> torch.cuda.Stream | None:
  class StoreBoolean (line 146) | class StoreBoolean(argparse.Action):
    method __init__ (line 148) | def __init__(self, option_strings, dest, default=False, required=False...
    method __call__ (line 159) | def __call__(self, parser, namespace, values, option_string=None):
  class FlexibleArgumentParser (line 175) | class FlexibleArgumentParser(argparse.ArgumentParser):
    method __init__ (line 178) | def __init__(self, *args, **kwargs) -> None:
    method parse_args (line 184) | def parse_args(  # type: ignore[override]
    method _pull_args_from_config (line 238) | def _pull_args_from_config(self, args: list[str]) -> list[str]:
    method _load_config_file (line 326) | def _load_config_file(self, file_path: str) -> list[str]:
  function warn_for_unimplemented_methods (line 396) | def warn_for_unimplemented_methods(cls: type[T]) -> type[T]:
  function align_to (line 439) | def align_to(value: int, alignment: int) -> int:
  function resolve_obj_by_qualname (line 452) | def resolve_obj_by_qualname(qualname: str) -> Any:
  function import_pynvml (line 462) | def import_pynvml():
  function update_environment_variables (line 492) | def update_environment_variables(envs: dict[str, str]):
  function run_method (line 504) | def run_method(
  function shallow_asdict (line 528) | def shallow_asdict(obj) -> dict[str, Any]:
  function kill_itself_when_parent_died (line 535) | def kill_itself_when_parent_died() -> None:
  function get_exception_traceback (line 551) | def get_exception_traceback() -> str:
  class TypeBasedDispatcher (line 557) | class TypeBasedDispatcher:
    method __init__ (line 559) | def __init__(self, mapping: list[tuple[type, Callable]]):
    method __call__ (line 562) | def __call__(self, obj: Any):
  class MixedPrecisionState (line 570) | class MixedPrecisionState:
  function get_mixed_precision_state (line 582) | def get_mixed_precision_state() -> MixedPrecisionState:
  function set_mixed_precision_policy (line 589) | def set_mixed_precision_policy(
  function get_compute_dtype (line 611) | def get_compute_dtype() -> torch.dtype:
  function dict_to_3d_list (line 620) | def dict_to_3d_list(
  function set_random_seed (line 681) | def set_random_seed(seed: int) -> None:
  function is_vsa_available (line 688) | def is_vsa_available() -> bool:
  function is_vmoba_available (line 693) | def is_vmoba_available() -> bool:
  function masks_like (line 705) | def masks_like(
  function best_output_size (line 771) | def best_output_size(w, h, dw, dh, expected_area):
  function calculate_dimensions (line 796) | def calculate_dimensions(target_area, ratio):

FILE: python/sglang/profiler.py
  function run_profile (line 21) | def run_profile(

FILE: python/sglang/srt/batch_invariant_ops/batch_invariant_ops.py
  function _matmul_launch_metadata (line 40) | def _matmul_launch_metadata(
  function _compute_pid (line 60) | def _compute_pid(tile_id, num_pid_in_group, num_pid_m, GROUP_SIZE_M, NUM...
  function matmul_kernel_persistent (line 70) | def matmul_kernel_persistent(
  function _matmul_persistent_triton (line 163) | def _matmul_persistent_triton(
  function _matmul_persistent_deepgemm (line 240) | def _matmul_persistent_deepgemm(
  function matmul_persistent (line 265) | def matmul_persistent(
  function _log_softmax_kernel (line 309) | def _log_softmax_kernel(
  function log_softmax (line 381) | def log_softmax(input: torch.Tensor, dim: int = -1) -> torch.Tensor:
  function mean_kernel (line 425) | def mean_kernel(
  function mean_dim (line 474) | def mean_dim(
  function mm_batch_invariant (line 567) | def mm_batch_invariant(a, b):
  function addmm_batch_invariant (line 571) | def addmm_batch_invariant(bias, a, b):
  function _log_softmax_batch_invariant (line 575) | def _log_softmax_batch_invariant(input, dim, _half_to_float):
  function mean_batch_invariant (line 580) | def mean_batch_invariant(input, dim, keepdim=False, dtype: torch.dtype |...
  function bmm_kernel_persistent (line 597) | def bmm_kernel_persistent(
  function bmm_batch_invariant (line 715) | def bmm_batch_invariant(a, b, *, out=None):
  function _rms_norm_kernel (line 813) | def _rms_norm_kernel(
  function rms_norm (line 863) | def rms_norm(
  function rms_norm_batch_invariant (line 910) | def rms_norm_batch_invariant(
  function is_batch_invariant_mode_enabled (line 937) | def is_batch_invariant_mode_enabled():
  function enable_batch_invariant_mode (line 941) | def enable_batch_invariant_mode(
  function disable_batch_invariant_mode (line 965) | def disable_batch_invariant_mode():
  function set_batch_invariant_mode (line 977) | def set_batch_invariant_mode(enabled: bool = True):
  function get_batch_invariant_attention_block_size (line 993) | def get_batch_invariant_attention_block_size() -> AttentionBlockSize:

FILE: python/sglang/srt/batch_overlap/operations.py
  function execute_operations (line 21) | def execute_operations(inputs, operations):
  function execute_overlapped_operations (line 30) | def execute_overlapped_operations(
  class YieldOperation (line 61) | class YieldOperation:
  class ExecutionOperation (line 66) | class ExecutionOperation:
  class _StageExecutor (line 75) | class _StageExecutor:
    method __init__ (line 76) | def __init__(self, debug_name: str, stages: List[Stage], inputs: dict):
    method next (line 90) | def next(self):
    method output (line 118) | def output(self):
    method done (line 123) | def done(self):
    method num_stages (line 127) | def num_stages(self):
  function _annotate_region (line 132) | def _annotate_region(debug_name):
  class _StateDict (line 141) | class _StateDict:
    method __init__ (line 142) | def __init__(self):
    method __setattr__ (line 145) | def __setattr__(self, key, value):
    method __getattr__ (line 154) | def __getattr__(self, item):
    method __delattr__ (line 157) | def __delattr__(self, item):
    method pop (line 160) | def pop(self, item):
    method update (line 163) | def update(self, values: Dict[str, Any]):
    method get (line 167) | def get(self, item):
    method clear (line 170) | def clear(self, expect_keys: Sequence[str]):
  function _convert_operations_to_stages (line 179) | def _convert_operations_to_stages(operations: List[Operation]) -> List[S...
  function _chunk_by_separator (line 188) | def _chunk_by_separator(
  function _decorate_operations (line 202) | def _decorate_operations(operations: List[Operation], debug_name_prefix:...
  function _decorate_operation (line 206) | def _decorate_operation(operation: Operation, debug_name_prefix: str):

FILE: python/sglang/srt/batch_overlap/operations_strategy.py
  class OperationsStrategy (line 16) | class OperationsStrategy:
    method concat (line 22) | def concat(cls, items: List["OperationsStrategy"]) -> "OperationsStrat...
    method init_new_tbo (line 34) | def init_new_tbo(
  function _assert_all_same (line 70) | def _assert_all_same(items: List):
  function _compute_moe_deepseek_layer_operations_strategy_tbo (line 79) | def _compute_moe_deepseek_layer_operations_strategy_tbo(
  function _compute_moe_deepseek_blog_prefill (line 94) | def _compute_moe_deepseek_blog_prefill(layer):
  function _compute_moe_deepseek_blog_decode (line 125) | def _compute_moe_deepseek_blog_decode(layer):
  function _compute_moe_qwen3_layer_operations_strategy_tbo (line 158) | def _compute_moe_qwen3_layer_operations_strategy_tbo(
  function _compute_moe_qwen3_prefill (line 173) | def _compute_moe_qwen3_prefill(layer):
  function _compute_moe_qwen3_decode (line 203) | def _compute_moe_qwen3_decode(layer):
  function _compute_moe_mimov2_layer_operations_strategy_tbo (line 235) | def _compute_moe_mimov2_layer_operations_strategy_tbo(
  function _compute_moe_mimov2_prefill (line 250) | def _compute_moe_mimov2_prefill(layer):
  function _compute_moe_mimov2_decode (line 278) | def _compute_moe_mimov2_decode(layer):

FILE: python/sglang/srt/batch_overlap/single_batch_overlap.py
  class SboFlags (line 28) | class SboFlags:
    method enable_combine_down_gemm_two_stream_overlap (line 32) | def enable_combine_down_gemm_two_stream_overlap(cls):
    method enable_combine_shared_two_stream_overlap (line 43) | def enable_combine_shared_two_stream_overlap(cls):
    method enable_dispatch_shared_one_stream_overlap (line 51) | def enable_dispatch_shared_one_stream_overlap(cls):
    method fuse_shared_experts_inside_sbo (line 55) | def fuse_shared_experts_inside_sbo(cls):
  class CombineOverlapArgs (line 63) | class CombineOverlapArgs:
  class DownGemmOverlapArgs (line 75) | class DownGemmOverlapArgs:
  function compute_overlap_args (line 81) | def compute_overlap_args(dispatch_output, alt_stream):

FILE: python/sglang/srt/batch_overlap/two_batch_overlap.py
  function get_token_num_per_seq (line 62) | def get_token_num_per_seq(
  function compute_split_seq_index (line 78) | def compute_split_seq_index(
  function _is_two_chunk_split_enabled (line 97) | def _is_two_chunk_split_enabled(extend_lens: Sequence[int]) -> bool:
  function _split_extend_seqs (line 111) | def _split_extend_seqs(arr: Sequence[int]) -> int:
  function _split_array_by_cum_less_than_half (line 118) | def _split_array_by_cum_less_than_half(arr: Sequence[int]) -> int:
  function _split_array_by_balanced_sum (line 133) | def _split_array_by_balanced_sum(arr: Sequence[int]) -> int:
  function _update_device_and_sum_field_from_cpu_field (line 152) | def _update_device_and_sum_field_from_cpu_field(
  function _compute_mask_offset (line 180) | def _compute_mask_offset(seq_index: int, spec_info: Optional[EagleVerify...
  function split_spec_info (line 193) | def split_spec_info(
  function compute_split_token_index (line 265) | def compute_split_token_index(
  function compute_split_indices_for_cuda_graph_replay (line 286) | def compute_split_indices_for_cuda_graph_replay(
  class TboCudaGraphRunnerPlugin (line 315) | class TboCudaGraphRunnerPlugin:
    method __init__ (line 316) | def __init__(self):
    method capture_one_batch_size (line 319) | def capture_one_batch_size(self, batch: ForwardBatch, num_tokens: int):
    method replay_prepare (line 344) | def replay_prepare(
  class TboDPAttentionPreparer (line 370) | class TboDPAttentionPreparer:
    method prepare_all_gather (line 371) | def prepare_all_gather(
    method compute_output (line 419) | def compute_output(self, partial_global_info):
    method _compute_local_forward_mode (line 440) | def _compute_local_forward_mode(local_batch):
    method _compute_global_forward_mode (line 446) | def _compute_global_forward_mode(forward_modes):
    method _is_all_same (line 468) | def _is_all_same(x):
  class TboForwardBatchPreparer (line 472) | class TboForwardBatchPreparer:
    method prepare (line 474) | def prepare(cls, batch: ForwardBatch, is_draft_worker: bool = False):
    method prepare_raw (line 486) | def prepare_raw(
    method derive_fields_related_to_seq_len_for_two_chunk (line 551) | def derive_fields_related_to_seq_len_for_two_chunk(
    method filter_batch (line 613) | def filter_batch(
    method compute_tbo_children_num_token_non_padded (line 774) | def compute_tbo_children_num_token_non_padded(cls, batch: ForwardBatch):
    method compute_tbo_children_num_token_non_padded_raw (line 781) | def compute_tbo_children_num_token_non_padded_raw(
    method _compute_split_token_index (line 792) | def _compute_split_token_index(cls, batch: ForwardBatch):
  function _compute_extend_num_tokens (line 804) | def _compute_extend_num_tokens(input_ids, forward_mode: ForwardMode):
  function model_forward_maybe_tbo (line 819) | def model_forward_maybe_tbo(
  function _model_forward_tbo (line 851) | def _model_forward_tbo(
  function _model_forward_non_tbo (line 883) | def _model_forward_non_tbo(inputs, operations_strategy: OperationsStrate...
  function _model_forward_tbo_split_inputs (line 888) | def _model_forward_tbo_split_inputs(
  function _model_forward_tbo_split_inputs_raw (line 938) | def _model_forward_tbo_split_inputs_raw(
  function _model_forward_filter_inputs (line 966) | def _model_forward_filter_inputs(
  function _model_forward_tbo_merge_outputs (line 1000) | def _model_forward_tbo_merge_outputs(output_a, output_b, original_len):
  class MaybeTboDeepEPDispatcher (line 1024) | class MaybeTboDeepEPDispatcher(BaseDispatcher):
    method __init__ (line 1025) | def __init__(self, **kwargs):
    method _execute (line 1046) | def _execute(self, name, tbo_subbatch_index: Optional[int] = None, **k...
    method dispatch (line 1049) | def dispatch(self, **kwargs) -> DispatchOutput:
    method dispatch_a (line 1052) | def dispatch_a(self, **kwargs):
    method dispatch_b (line 1055) | def dispatch_b(self, **kwargs):
    method combine (line 1058) | def combine(self, **kwargs) -> torch.Tensor:
    method combine_a (line 1061) | def combine_a(self, **kwargs):
    method combine_b (line 1064) | def combine_b(self, **kwargs):
    method register_deepep_dispatch_hook (line 1067) | def register_deepep_dispatch_hook(self, hook):
    method set_quant_config (line 1073) | def set_quant_config(self, quant_config: dict):
    method set_overlap_args (line 1078) | def set_overlap_args(
    method clear_overlap_args (line 1085) | def clear_overlap_args(self):

FILE: python/sglang/srt/checkpoint_engine/checkpoint_engine_worker.py
  class SGLangCheckpointEngineWorkerExtension (line 36) | class SGLangCheckpointEngineWorkerExtension:
    method __init__ (line 42) | def __init__(self):
    method get_device_uuid (line 45) | def get_device_uuid(self) -> str:
    method get_device_id (line 53) | def get_device_id(self) -> int:
    method get_model_loader (line 59) | def get_model_loader(self) -> Callable:
    method get_post_hook (line 65) | def get_post_hook(self) -> Optional[Callable]:
    method update_weights_from_ipc (line 69) | def update_weights_from_ipc(self, zmq_handles: Dict[str, str]):
  class SGLangCheckpointEngineWorkerExtensionImpl (line 92) | class SGLangCheckpointEngineWorkerExtensionImpl(SGLangCheckpointEngineWo...
    method __init__ (line 98) | def __init__(self, model_runner):
    method get_device_uuid (line 102) | def get_device_uuid(self) -> str:
    method get_device_id (line 111) | def get_device_id(self) -> int:
    method get_model_loader (line 115) | def get_model_loader(self) -> Callable:
    method get_post_hook (line 119) | def get_post_hook(self) -> Optional[Callable]:

FILE: python/sglang/srt/checkpoint_engine/update.py
  function timer (line 42) | def timer(msg: str):
  function check_sglang_ready (line 49) | def check_sglang_ready(
  function split_checkpoint_files (line 74) | def split_checkpoint_files(
  function split_tensors (line 87) | def split_tensors(
  function req_inference (line 108) | def req_inference(
  function update_weights (line 137) | def update_weights(
  function join (line 175) | def join(
  function run_with_torchrun (line 199) | def run_with_torchrun():
  function main (line 240) | def main():

FILE: python/sglang/srt/compilation/backend.py
  function make_compiler (line 29) | def make_compiler(config: CompilationConfig):
  function make_backend (line 38) | def make_backend(
  class CompilerManager (line 64) | class CompilerManager:
    method __init__ (line 65) | def __init__(
    method compute_hash (line 73) | def compute_hash(self):
    method initialize_cache (line 76) | def initialize_cache(
    method save_to_file (line 91) | def save_to_file(self):
    method load (line 99) | def load(
    method compile (line 128) | def compile(
  class SplitItem (line 206) | class SplitItem:
  function split_graph (line 213) | def split_graph(
  class PiecewiseCompileInterpreter (line 265) | class PiecewiseCompileInterpreter(torch.fx.Interpreter):
    method __init__ (line 266) | def __init__(
    method run (line 287) | def run(self, *args):
    method call_module (line 295) | def call_module(
  function set_model_tag (line 343) | def set_model_tag(tag: str):
  class SGLangBackend (line 357) | class SGLangBackend:
    method __init__ (line 373) | def __init__(
    method configure_post_pass (line 391) | def configure_post_pass(self):
    method __call__ (line 395) | def __call__(self, graph: fx.GraphModule, example_inputs) -> Callable:

FILE: python/sglang/srt/compilation/compilation_config.py
  function register_split_op (line 8) | def register_split_op(op_name: Optional[str] = None):
  class CompilationConfig (line 18) | class CompilationConfig:
    method __init__ (line 19) | def __init__(
    method add_split_op (line 32) | def add_split_op(self, op: str):
    method add_traced_file (line 35) | def add_traced_file(self, file_path: str):
    method get_traced_files (line 38) | def get_traced_files(self):
    method get_capture_sizes (line 41) | def get_capture_sizes(self):
    method get_enable_debug_mode (line 44) | def get_enable_debug_mode(self):

FILE: python/sglang/srt/compilation/compilation_counter.py
  class CompilationCounter (line 9) | class CompilationCounter:
    method clone (line 32) | def clone(self) -> "CompilationCounter":
    method expect (line 36) | def expect(self, **kwargs):

FILE: python/sglang/srt/compilation/compile.py
  class IntermediateTensors (line 18) | class IntermediateTensors:
    method __init__ (line 32) | def __init__(self, tensors):
    method __getitem__ (line 39) | def __getitem__(self, key: Union[str, slice]):
    method __setitem__ (line 45) | def __setitem__(self, key: str, value: torch.Tensor):
    method items (line 48) | def items(self):
    method __len__ (line 51) | def __len__(self):
    method __eq__ (line 54) | def __eq__(self, other: object):
    method __repr__ (line 57) | def __repr__(self) -> str:
  function _normalize_dims (line 61) | def _normalize_dims(dims, ndim: int):
  class _MaybeIntermediateTensors (line 66) | class _MaybeIntermediateTensors:
    method __init__ (line 69) | def __init__(self, obj):
  function _mark_dynamic_on_value (line 76) | def _mark_dynamic_on_value(val, dims):
  function _infer_dynamic_arg_dims_from_annotations (line 87) | def _infer_dynamic_arg_dims_from_annotations(forward_fn):
  function install_torch_compiled (line 111) | def install_torch_compiled(

FILE: python/sglang/srt/compilation/compiler_interface.py
  class CompilerInterface (line 20) | class CompilerInterface:
    method initialize_cache (line 29) | def initialize_cache(
    method compute_hash (line 47) | def compute_hash(self) -> str:
    method compile (line 59) | def compile(
    method load (line 93) | def load(
  function get_inductor_factors (line 110) | def get_inductor_factors() -> list[Any]:
  class AlwaysHitShapeEnv (line 126) | class AlwaysHitShapeEnv:
    method __init__ (line 151) | def __init__(self) -> None:
    method evaluate_guards_expression (line 154) | def evaluate_guards_expression(self, *args, **kwargs):
    method get_pruned_guards (line 157) | def get_pruned_guards(self, *args, **kwargs):
    method produce_guards_expression (line 160) | def produce_guards_expression(self, *args, **kwargs):
  class InductorAdaptor (line 164) | class InductorAdaptor(CompilerInterface):
    method compute_hash (line 171) | def compute_hash(self) -> str:
    method initialize_cache (line 178) | def initialize_cache(
    method compile (line 197) | def compile(
    method load (line 374) | def load(
    method metrics_context (line 452) | def metrics_context(self) -> contextlib.AbstractContextManager:
  function set_inductor_config (line 473) | def set_inductor_config(config, runtime_shape):
  class EagerAdapter (line 481) | class EagerAdapter(CompilerInterface):
    method compile (line 484) | def compile(
    method load (line 495) | def load(

FILE: python/sglang/srt/compilation/cuda_piecewise_backend.py
  class ConcreteSizeEntry (line 24) | class ConcreteSizeEntry:
  class CUDAPiecewiseBackend (line 40) | class CUDAPiecewiseBackend:
    method __init__ (line 42) | def __init__(
    method check_for_ending_compilation (line 101) | def check_for_ending_compilation(self):
    method __call__ (line 107) | def __call__(self, *args) -> Any:

FILE: python/sglang/srt/compilation/fix_functionalization.py
  class FixFunctionalizationPass (line 17) | class FixFunctionalizationPass(SGLangInductorPass):
    method __call__ (line 26) | def __call__(self, graph: torch.fx.Graph):
    method _remove (line 50) | def _remove(self, node_or_nodes: Union[torch.fx.Node, Iterable[torch.f...
    method defunctionalize (line 59) | def defunctionalize(
    method replace_users_with_mutated_args (line 75) | def replace_users_with_mutated_args(
    method getitem_users (line 91) | def getitem_users(self, node: torch.fx.Node) -> dict[int, torch.fx.Node]:
    method insert_defunctionalized (line 103) | def insert_defunctionalized(

FILE: python/sglang/srt/compilation/fx_utils.py
  function is_func (line 12) | def is_func(node: fx.Node, target) -> bool:
  function is_auto_func (line 16) | def is_auto_func(node: fx.Node, op: OpOverload) -> bool:
  function find_specified_fn_maybe (line 21) | def find_specified_fn_maybe(
  function find_specified_fn (line 31) | def find_specified_fn(nodes: Iterable[fx.Node], op: OpOverload) -> fx.Node:
  function find_auto_fn_maybe (line 38) | def find_auto_fn_maybe(nodes: Iterable[fx.Node], op: OpOverload) -> Opti...
  function find_auto_fn (line 46) | def find_auto_fn(nodes: Iterable[fx.Node], op: OpOverload) -> fx.Node:
  function find_getitem_maybe (line 54) | def find_getitem_maybe(node: fx.Node, idx: int) -> Optional[fx.Node]:
  function find_getitem (line 62) | def find_getitem(node: fx.Node, idx: int) -> fx.Node:
  function find_op_nodes (line 69) | def find_op_nodes(op: OpOverload, graph: fx.Graph) -> Iterator[fx.Node]:
  function get_only_user (line 81) | def get_only_user(node: fx.Node) -> fx.Node:

FILE: python/sglang/srt/compilation/inductor_pass.py
  class PassContext (line 22) | class PassContext:
    method __init__ (line 24) | def __init__(self, runtime_shape: Optional[int]):
  function get_pass_context (line 28) | def get_pass_context() -> PassContext:
  function pass_context (line 35) | def pass_context(runtime_shape: Optional[int]):
  class InductorPass (line 48) | class InductorPass(CustomGraphPass):
    method uuid (line 54) | def uuid(self) -> Any:
    method hash_source (line 64) | def hash_source(*srcs: Union[str, Any]):
    method hash_dict (line 83) | def hash_dict(dict_: dict[Any, Any]):
    method is_applicable_for_shape (line 91) | def is_applicable_for_shape(self, shape: Optional[int]):
  class CallableInductorPass (line 95) | class CallableInductorPass(InductorPass):
    method __init__ (line 101) | def __init__(
    method __call__ (line 107) | def __call__(self, graph: torch.fx.Graph):
    method uuid (line 110) | def uuid(self) -> Any:
  class SGLangInductorPass (line 114) | class SGLangInductorPass(InductorPass):
    method __init__ (line 116) | def __init__(
    method dump_graph (line 121) | def dump_graph(self, graph: torch.fx.Graph, stage: str):
    method begin (line 124) | def begin(self):
    method end_and_log (line 127) | def end_and_log(self):
  class PrinterInductorPass (line 133) | class PrinterInductorPass(SGLangInductorPass):
    method __init__ (line 135) | def __init__(self, name: str):
    method __call__ (line 139) | def __call__(self, graph: torch.fx.Graph):

FILE: python/sglang/srt/compilation/npu_piecewise_backend.py
  class NPUPiecewiseBackend (line 16) | class NPUPiecewiseBackend(CUDAPiecewiseBackend):
    method __init__ (line 17) | def __init__(
    method __call__ (line 41) | def __call__(self, *args):

FILE: python/sglang/srt/compilation/pass_manager.py
  class PostGradPassManager (line 18) | class PostGradPassManager(CustomGraphPass):
    method __init__ (line 33) | def __init__(self):
    method __call__ (line 36) | def __call__(self, graph: fx.Graph):
    method configure (line 45) | def configure(
    method add (line 51) | def add(self, pass_: InductorPass):
    method uuid (line 55) | def uuid(self):

FILE: python/sglang/srt/compilation/piecewise_context_manager.py
  function is_in_piecewise_cuda_graph (line 21) | def is_in_piecewise_cuda_graph():
  function is_in_pcg_torch_compile (line 25) | def is_in_pcg_torch_compile():
  function get_pcg_capture_stream (line 29) | def get_pcg_capture_stream():
  function enable_piecewise_cuda_graph_compile (line 34) | def enable_piecewise_cuda_graph_compile():
  function enable_piecewise_cuda_graph (line 42) | def enable_piecewise_cuda_graph():
  function set_pcg_capture_stream (line 59) | def set_pcg_capture_stream(stream: torch.cuda.Stream):
  class ForwardContext (line 67) | class ForwardContext:
    method __init__ (line 68) | def __init__(self):
    method set_forward_batch (line 75) | def set_forward_batch(self, forward_batch: ForwardBatch):
    method set_attention_layers (line 78) | def set_attention_layers(self, layers: List[Any]):
    method set_quant_config (line 81) | def set_quant_config(self, quant_config: Any):
    method set_moe_layers (line 84) | def set_moe_layers(self, layers: List[Any]):
    method set_moe_fusions (line 87) | def set_moe_fusions(self, fusions: List[Any]):
  function get_forward_context (line 94) | def get_forward_context() -> Optional[ForwardContext]:
  function set_forward_context (line 101) | def set_forward_context(

FILE: python/sglang/srt/compilation/weak_ref_tensor.py
  function weak_ref_tensors (line 15) | def weak_ref_tensors(

FILE: python/sglang/srt/configs/afmoe.py
  class AfmoeConfig (line 6) | class AfmoeConfig(PretrainedConfig):
    method __init__ (line 9) | def __init__(

FILE: python/sglang/srt/configs/bailing_hybrid.py
  class HybridLayerType (line 27) | class HybridLayerType(enum.Enum):
  class BailingHybridConfig (line 32) | class BailingHybridConfig(PretrainedConfig):
    method __init__ (line 37) | def __init__(
    method layers_block_type (line 144) | def layers_block_type(self):
    method linear_layer_ids (line 159) | def linear_layer_ids(self):
    method full_attention_layer_ids (line 167) | def full_attention_layer_ids(self):
    method mamba2_cache_params (line 175) | def mamba2_cache_params(self) -> Mamba2CacheParams:

FILE: python/sglang/srt/configs/chatglm.py
  class ChatGLMConfig (line 12) | class ChatGLMConfig(PretrainedConfig):
    method __init__ (line 19) | def __init__(

FILE: python/sglang/srt/configs/dbrx.py
  class DbrxAttentionConfig (line 16) | class DbrxAttentionConfig(PretrainedConfig):
    method __init__ (line 34) | def __init__(
    method from_pretrained (line 55) | def from_pretrained(
  class DbrxFFNConfig (line 83) | class DbrxFFNConfig(PretrainedConfig):
    method __init__ (line 106) | def __init__(
    method from_pretrained (line 137) | def from_pretrained(
  class DbrxConfig (line 165) | class DbrxConfig(PretrainedConfig):
    method __init__ (line 229) | def __init__(

FILE: python/sglang/srt/configs/deepseek_ocr.py
  function get_default_ngram_custom_params (line 40) | def get_default_ngram_custom_params() -> Dict[str, Any]:
  class DictOutput (line 53) | class DictOutput(object):
    method items (line 54) | def items(self):
    method keys (line 57) | def keys(self):
    method __getitem__ (line 60) | def __getitem__(self, item):
    method __contains__ (line 63) | def __contains__(self, key):
    method __setitem__ (line 66) | def __setitem__(self, key, value):
  class VLChatProcessorOutput (line 71) | class VLChatProcessorOutput(DictOutput):
    method __len__ (line 81) | def __len__(self):
  class ImageTransform (line 85) | class ImageTransform(object):
    method __init__ (line 86) | def __init__(
    method __call__ (line 113) | def __call__(self, pil_img: Image.Image):
  function find_closest_aspect_ratio (line 118) | def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height...
  function dynamic_preprocess (line 134) | def dynamic_preprocess(
  class DeepseekOCRProcessor (line 180) | class DeepseekOCRProcessor(ProcessorMixin):
    method __init__ (line 184) | def __init__(
    method format_messages_v2 (line 254) | def format_messages_v2(self, messages: str, pil_images, max_req_input_...
    method bos_id (line 295) | def bos_id(self):
    method eos_id (line 299) | def eos_id(self):
    method pad_id (line 303) | def pad_id(self):
    method encode (line 306) | def encode(self, text: str, bos: bool = True, eos: bool = False):
    method decode (line 316) | def decode(self, t: List[int], **kwargs) -> str:
    method process_one (line 319) | def process_one(
    method __call__ (line 393) | def __call__(
    method find_all_indices (line 421) | def find_all_indices(self, messages, target_value):
    method tokenize_with_images (line 428) | def tokenize_with_images(
  class VisionEncoderConfig (line 599) | class VisionEncoderConfig(PretrainedConfig):
    method __init__ (line 618) | def __init__(
  class MlpProjectorConfig (line 650) | class MlpProjectorConfig(PretrainedConfig):
    method __init__ (line 660) | def __init__(
  class DeepseekV2Config (line 680) | class DeepseekV2Config(PretrainedConfig):
    method __init__ (line 684) | def __init__(
  class DeepseekVLV2Config (line 781) | class DeepseekVLV2Config(PretrainedConfig):
    method __init__ (line 792) | def __init__(

FILE: python/sglang/srt/configs/deepseekvl2.py
  function select_best_resolution (line 15) | def select_best_resolution(image_size, candidate_resolutions):
  class DictOutput (line 43) | class DictOutput(object):
    method items (line 44) | def items(self):
    method keys (line 47) | def keys(self):
    method __getitem__ (line 50) | def __getitem__(self, item):
    method __contains__ (line 53) | def __contains__(self, key):
    method __setitem__ (line 56) | def __setitem__(self, key, value):
  class VLChatProcessorOutput (line 61) | class VLChatProcessorOutput(DictOutput):
    method __len__ (line 70) | def __len__(self):
  class ImageTransform (line 74) | class ImageTransform(object):
    method __init__ (line 75) | def __init__(
    method __call__ (line 102) | def __call__(self, pil_img: Image.Image):
  class DeepseekVLV2Processor (line 107) | class DeepseekVLV2Processor(ProcessorMixin):
    method __init__ (line 111) | def __init__(
    method format_messages_v2 (line 179) | def format_messages_v2(self, messages, pil_images, max_req_input_len=-1):
    method bos_id (line 221) | def bos_id(self):
    method eos_id (line 225) | def eos_id(self):
    method pad_id (line 229) | def pad_id(self):
    method encode (line 232) | def encode(self, text: str, bos: bool = True, eos: bool = False):
    method decode (line 242) | def decode(self, t: List[int], **kwargs) -> str:
    method process_one (line 245) | def process_one(
    method __call__ (line 333) | def __call__(
    method find_all_indices (line 357) | def find_all_indices(self, messages, target_value):
    method tokenize_with_images (line 364) | def tokenize_with_images(
  class DeepseekVL2VisionEncoderConfig (line 468) | class DeepseekVL2VisionEncoderConfig(PretrainedConfig):
    method __init__ (line 487) | def __init__(
  class DeepseekVL2MlpProjectorConfig (line 519) | class DeepseekVL2MlpProjectorConfig(PretrainedConfig):
    method __init__ (line 529) | def __init__(
  class DeepseekV2Config (line 549) | class DeepseekV2Config(PretrainedConfig):
    method __init__ (line 554) | def __init__(
  class DeepseekVL2Config (line 650) | class DeepseekVL2Config(PretrainedConfig):
    method __init__ (line 660) | def __init__(

FILE: python/sglang/srt/configs/device_config.py
  class DeviceConfig (line 11) | class DeviceConfig:
    method __init__ (line 15) | def __init__(self, device: str = "cuda", gpu_id: int = -1) -> None:

FILE: python/sglang/srt/configs/dots_ocr.py
  class DotsOCRConfig (line 10) | class DotsOCRConfig(Qwen2Config):
    method __init__ (line 13) | def __init__(
    method save_pretrained (line 26) | def save_pretrained(self, save_directory, **kwargs):
  class DummyVideoProcessor (line 31) | class DummyVideoProcessor(BaseImageProcessor):
    method __call__ (line 34) | def __call__(self, *args, **kwargs):
  class DotsVLProcessor (line 38) | class DotsVLProcessor(Qwen2_5_VLProcessor):
    method __init__ (line 39) | def __init__(

FILE: python/sglang/srt/configs/dots_vlm.py
  class DotsVisionConfig (line 14) | class DotsVisionConfig(PretrainedConfig):
    method __init__ (line 17) | def __init__(
  class DotsVLMConfig (line 58) | class DotsVLMConfig(PretrainedConfig):
    method __init__ (line 61) | def __init__(self, **kwargs):
  class DotsVLMProcessorKwargs (line 71) | class DotsVLMProcessorKwargs(ProcessingKwargs, total=False):
  class DotsVLMProcessor (line 79) | class DotsVLMProcessor(Qwen2_5_VLProcessor):
    method __init__ (line 100) | def __init__(

FILE: python/sglang/srt/configs/exaone.py
  class ExaoneConfig (line 30) | class ExaoneConfig(PretrainedConfig):
    method __init__ (line 144) | def __init__(

FILE: python/sglang/srt/configs/falcon_h1.py
  class FalconH1Config (line 29) | class FalconH1Config(PretrainedConfig):
    method __init__ (line 139) | def __init__(
    method layers_block_type (line 287) | def layers_block_type(self):
    method full_attention_layer_ids (line 291) | def full_attention_layer_ids(self):
    method linear_layer_ids (line 296) | def linear_layer_ids(self):
    method mamba2_cache_params (line 301) | def mamba2_cache_params(self):

FILE: python/sglang/srt/configs/granitemoehybrid.py
  class GraniteMoeHybridConfig (line 28) | class GraniteMoeHybridConfig(PretrainedConfig):
    method __init__ (line 129) | def __init__(
    method mamba_layer_ids (line 269) | def mamba_layer_ids(self):
    method attention_layer_ids (line 276) | def attention_layer_ids(self):
    method full_attention_layer_ids (line 283) | def full_attention_layer_ids(self):
    method mamba2_cache_params (line 288) | def mamba2_cache_params(self):

FILE: python/sglang/srt/configs/internvl.py
  class InternLM2Config (line 29) | class InternLM2Config(PretrainedConfig):
    method __init__ (line 80) | def __init__(  # pylint: disable=W0102
    method _rope_scaling_validation (line 134) | def _rope_scaling_validation(self):
  class InternVisionConfig (line 164) | class InternVisionConfig(PretrainedConfig):
    method __init__ (line 212) | def __init__(
    method from_pretrained (line 254) | def from_pretrained(
  class InternVLChatConfig (line 277) | class InternVLChatConfig(PretrainedConfig):
    method __init__ (line 281) | def __init__(
    method to_dict (line 351) | def to_dict(self):
  class InternLM2Tokenizer (line 486) | class InternLM2Tokenizer(PreTrainedTokenizer):
    method __init__ (line 500) | def __init__(
    method no_prefix_space_tokens (line 533) | def no_prefix_space_tokens(self):
    method vocab_size (line 542) | def vocab_size(self):
    method bos_token_id (line 547) | def bos_token_id(self) -> Optional[int]:
    method eos_token_id (line 551) | def eos_token_id(self) -> Optional[int]:
    method get_vocab (line 554) | def get_vocab(self):
    method _tokenize (line 560) | def _tokenize(self, text):
    method _convert_token_to_id (line 564) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 568) | def _convert_id_to_token(self, index):
    method _maybe_add_prefix_space (line 573) | def _maybe_add_prefix_space(self, tokens, decoded):
    method convert_tokens_to_string (line 579) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 599) | def save_vocabulary(
    method build_inputs_with_special_tokens (line 632) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No...
    method get_special_tokens_mask (line 648) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 680) | def create_token_type_ids_from_sequences(

FILE: python/sglang/srt/configs/janus_pro.py
  class DictToObject (line 25) | class DictToObject(dict):
    method __init__ (line 26) | def __init__(self, dictionary):
  class VisionConfig (line 35) | class VisionConfig(PretrainedConfig):
    method __init__ (line 40) | def __init__(self, **kwargs):
  class GenAlignerConfig (line 50) | class GenAlignerConfig(PretrainedConfig):
    method __init__ (line 55) | def __init__(self, **kwargs):
  class GenHeadConfig (line 65) | class GenHeadConfig(PretrainedConfig):
    method __init__ (line 70) | def __init__(self, **kwargs):
  class AlignerConfig (line 80) | class AlignerConfig(PretrainedConfig):
    method __init__ (line 85) | def __init__(self, **kwargs):
  class GenVisionConfig (line 95) | class GenVisionConfig(PretrainedConfig):
    method __init__ (line 100) | def __init__(self, **kwargs):
  class SigLIPVisionCfg (line 111) | class SigLIPVisionCfg:
  class MultiModalityConfig (line 124) | class MultiModalityConfig(PretrainedConfig):
    method __init__ (line 135) | def __init__(self, **kwargs):
  class VLMImageProcessor (line 159) | class VLMImageProcessor(BaseImageProcessor):
    method __init__ (line 162) | def __init__(
    method resize (line 194) | def resize(self, pil_img: Image) -> np.ndarray:
    method preprocess (line 249) | def preprocess(self, images, return_tensors: str = "pt", **kwargs) -> ...
    method default_shape (line 282) | def default_shape(self):
  class DictOutput (line 286) | class DictOutput(object):
    method items (line 287) | def items(self):
    method keys (line 290) | def keys(self):
    method __getitem__ (line 293) | def __getitem__(self, item):
    method __contains__ (line 296) | def __contains__(self, key):
    method __setitem__ (line 299) | def __setitem__(self, key, value):
  class VLChatProcessorOutput (line 304) | class VLChatProcessorOutput(DictOutput):
    method __len__ (line 310) | def __len__(self):
  class BatchedVLChatProcessorOutput (line 315) | class BatchedVLChatProcessorOutput(DictOutput):
  class VLChatProcessor (line 326) | class VLChatProcessor(ProcessorMixin):
    method __init__ (line 332) | def __init__(
    method image_token (line 374) | def image_token(self):
    method image_id (line 378) | def image_id(self) -> int:
    method image_start_id (line 383) | def image_start_id(self):
    method image_end_id (line 388) | def image_end_id(self):
    method image_start_token (line 393) | def image_start_token(self):
    method image_end_token (line 397) | def image_end_token(self):
    method pad_id (line 401) | def pad_id(self):
    method add_image_token (line 405) | def add_image_token(
    method process_one (line 450) | def process_one(
    method __call__ (line 497) | def __call__(
    method batchify (line 532) | def batchify(
  class VLMImageProcessorConfig (line 596) | class VLMImageProcessorConfig(PretrainedConfig):
    method __init__ (line 605) | def __init__(

FILE: python/sglang/srt/configs/jet_nemotron.py
  class JetBlockConfig (line 14) | class JetBlockConfig:
  class JetNemotronConfig (line 25) | class JetNemotronConfig(PretrainedConfig):
    method full_attention_layer_ids (line 42) | def full_attention_layer_ids(self) -> list[int]:
    method linear_layer_ids (line 50) | def linear_layer_ids(self) -> list[int]:
    method mamba2_cache_params (line 58) | def mamba2_cache_params(self) -> Mamba2CacheParams:

FILE: python/sglang/srt/configs/jet_vlm.py
  class JetVLMConfig (line 10) | class JetVLMConfig(PretrainedConfig):
    method __init__ (line 18) | def __init__(
    method full_attention_layer_ids (line 44) | def full_attention_layer_ids(self) -> list[int]:
    method linear_layer_ids (line 48) | def linear_layer_ids(self) -> list[int]:
    method mamba2_cache_params (line 52) | def mamba2_cache_params(self) -> Mamba2CacheParams:

FILE: python/sglang/srt/configs/kimi_k25.py
  class KimiK25VisionConfig (line 9) | class KimiK25VisionConfig(PretrainedConfig):
    method __init__ (line 36) | def __init__(
  class KimiK25Config (line 84) | class KimiK25Config(PretrainedConfig):
    method __init__ (line 128) | def __init__(
    method hidden_size (line 164) | def hidden_size(self) -> int:
    method vocab_size (line 169) | def vocab_size(self) -> int:

FILE: python/sglang/srt/configs/kimi_linear.py
  class KimiLinearConfig (line 7) | class KimiLinearConfig(PretrainedConfig):
    method __init__ (line 11) | def __init__(
    method is_mla (line 111) | def is_mla(self):
    method is_moe (line 122) | def is_moe(self):
    method is_linear_attn (line 126) | def is_linear_attn(self) -> bool:
    method is_kda_layer (line 136) | def is_kda_layer(self, layer_idx: int):
    method linear_layer_ids (line 143) | def linear_layer_ids(self):
    method full_attention_layer_ids (line 147) | def full_attention_layer_ids(self):
    method mamba2_cache_params (line 151) | def mamba2_cache_params(self) -> KimiLinearCacheParams:

FILE: python/sglang/srt/configs/kimi_vl.py
  class KimiVLConfig (line 11) | class KimiVLConfig(PretrainedConfig):
    method __init__ (line 14) | def __init__(

FILE: python/sglang/srt/configs/kimi_vl_moonvit.py
  class MoonViTConfig (line 6) | class MoonViTConfig(PretrainedConfig):
    method __init__ (line 9) | def __init__(

FILE: python/sglang/srt/configs/lfm2.py
  class Lfm2Config (line 32) | class Lfm2Config(HFLfm2Config):
    method full_attention_layer_ids (line 41) | def full_attention_layer_ids(self) -> List[int]:
    method linear_layer_ids (line 46) | def linear_layer_ids(self) -> List[int]:
    method mamba_chunk_size (line 53) | def mamba_chunk_size(self) -> int:
    method mamba2_cache_params (line 58) | def mamba2_cache_params(self) -> Optional[Mamba2CacheParams]:

FILE: python/sglang/srt/configs/lfm2_moe.py
  class Lfm2MoeConfig (line 28) | class Lfm2MoeConfig(PretrainedConfig):
    method __init__ (line 45) | def __init__(
    method full_attention_layer_ids (line 125) | def full_attention_layer_ids(self) -> List[int]:
    method linear_layer_ids (line 132) | def linear_layer_ids(self) -> List[int]:
    method mamba_chunk_size (line 141) | def mamba_chunk_size(self) -> int:
    method mamba2_cache_params (line 146) | def mamba2_cache_params(self) -> Optional[Mamba2CacheParams]:

FILE: python/sglang/srt/configs/load_config.py
  class LoadFormat (line 15) | class LoadFormat(str, enum.Enum):
  class LoadConfig (line 37) | class LoadConfig:
    method __post_init__ (line 98) | def __post_init__(self):
    method _verify_load_format (line 120) | def _verify_load_format(self) -> None:

FILE: python/sglang/srt/configs/longcat_flash.py
  class LongcatFlashConfig (line 9) | class LongcatFlashConfig(PretrainedConfig):
    method __init__ (line 13) | def __init__(

FILE: python/sglang/srt/configs/mamba_utils.py
  function extra_groups_for_head_shards (line 29) | def extra_groups_for_head_shards(ngroups: int, tp_size: int):
  class Mamba2StateDType (line 42) | class Mamba2StateDType:
  function mamba2_state_dtype (line 47) | def mamba2_state_dtype(config=None) -> Mamba2StateDType:
  class BaseLinearStateParams (line 111) | class BaseLinearStateParams(ABC):
    method mamba_cache_per_req (line 116) | def mamba_cache_per_req(self) -> int:
  class Mamba2StateShape (line 129) | class Mamba2StateShape:
    method create (line 142) | def create(
  class Mamba2CacheParams (line 181) | class Mamba2CacheParams(BaseLinearStateParams):
  class KimiLinearStateShape (line 186) | class KimiLinearStateShape:
    method create (line 198) | def create(
  class KimiLinearCacheParams (line 238) | class KimiLinearCacheParams(BaseLinearStateParams):

FILE: python/sglang/srt/configs/model_config.py
  class AttentionArch (line 42) | class AttentionArch(IntEnum):
  class ModelImpl (line 47) | class ModelImpl(str, Enum):
  function is_deepseek_nsa (line 54) | def is_deepseek_nsa(config) -> bool:
  function get_nsa_index_head_dim (line 80) | def get_nsa_index_head_dim(config: PretrainedConfig) -> int:
  function get_nsa_index_topk (line 85) | def get_nsa_index_topk(config: PretrainedConfig) -> int:
  function get_nsa_index_n_heads (line 90) | def get_nsa_index_n_heads(config: PretrainedConfig) -> int:
  class ModelConfig (line 95) | class ModelConfig:
    method __init__ (line 96) | def __init__(
    method from_server_args (line 248) | def from_server_args(
    method _config_draft_model (line 287) | def _config_draft_model(self):
    method _derive_hybrid_model (line 354) | def _derive_hybrid_model(self):
    method _derive_context_length (line 374) | def _derive_context_length(self, context_length: int):
    method _derive_model_shapes (line 408) | def _derive_model_shapes(self):
    method get_total_num_attention_heads (line 589) | def get_total_num_attention_heads(self) -> int:
    method get_num_attention_heads (line 592) | def get_num_attention_heads(self, tensor_parallel_size) -> int:
    method get_total_num_kv_heads (line 597) | def get_total_num_kv_heads(self) -> int:
    method get_num_kv_heads (line 660) | def get_num_kv_heads(self, tensor_parallel_size) -> int:
    method get_swa_num_kv_heads (line 669) | def get_swa_num_kv_heads(self, tensor_parallel_size) -> int:
    method _parse_quant_hf_config (line 683) | def _parse_quant_hf_config(self):
    method _find_quant_modelslim_config (line 781) | def _find_quant_modelslim_config(self):
    method _parse_modelopt_quant_config (line 793) | def _parse_modelopt_quant_config(self, quant_config_dict: dict) -> Opt...
    method get_quantization_config_log_str (line 812) | def get_quantization_config_log_str(self) -> Optional[str]:
    method _is_already_quantized (line 834) | def _is_already_quantized(self) -> bool:
    method _get_modelopt_quant_type (line 847) | def _get_modelopt_quant_type(self) -> str:
    method _get_sliding_window_size (line 871) | def _get_sliding_window_size(self) -> Optional[int]:
    method _validate_quantize_and_serve_config (line 877) | def _validate_quantize_and_serve_config(self):
    method _verify_quantization (line 908) | def _verify_quantization(self) -> None:
    method _verify_dual_chunk_attention_config (line 1053) | def _verify_dual_chunk_attention_config(self) -> None:
    method _verify_transformers_version (line 1070) | def _verify_transformers_version(self):
    method _get_hf_eos_token_id (line 1105) | def _get_hf_eos_token_id(self) -> Optional[Set[int]]:
    method get_default_sampling_params (line 1125) | def get_default_sampling_params(self) -> dict[str, Any]:
    method _maybe_pull_model_tokenizer_from_remote (line 1157) | def _maybe_pull_model_tokenizer_from_remote(self) -> None:
  function _get_and_verify_dtype (line 1192) | def _get_and_verify_dtype(
  function is_generation_model (line 1256) | def is_generation_model(model_architectures: List[str], is_embedding: bo...
  function is_multimodal_model (line 1351) | def is_multimodal_model(model_architectures: List[str]):
  function is_multimodal_gen_model (line 1361) | def is_multimodal_gen_model(model_architectures: List[str]):
  function is_image_gen_model (line 1365) | def is_image_gen_model(model_architectures: List[str]):
  function is_audio_model (line 1369) | def is_audio_model(model_architectures: List[str]):
  function is_encoder_decoder_model (line 1376) | def is_encoder_decoder_model(model_architectures: List[str]):
  function is_local_attention_model (line 1384) | def is_local_attention_model(model_architectures: List[str]):
  function is_multimodal_chunked_prefill_supported (line 1388) | def is_multimodal_chunked_prefill_supported(model_architectures: List[st...
  function is_piecewise_cuda_graph_disabled_model (line 1403) | def is_piecewise_cuda_graph_disabled_model(model_architectures: List[str]):
  function yarn_get_mscale (line 1410) | def yarn_get_mscale(scale: float = 1, mscale: float = 1) -> float:
  function compute_mla_mscale_scaling (line 1416) | def compute_mla_mscale_scaling(rope_scaling: dict, base_scaling: float) ...
  function is_hybrid_swa_model (line 1433) | def is_hybrid_swa_model(model_architectures: List[str]):
  function get_hybrid_layer_ids (line 1446) | def get_hybrid_layer_ids(

FILE: python/sglang/srt/configs/modelopt_config.py
  class ModelOptConfig (line 7) | class ModelOptConfig:
    method __post_init__ (line 27) | def __post_init__(self):

FILE: python/sglang/srt/configs/nano_nemotron_vl.py
  function float_triplet (line 25) | def float_triplet(seq: Any):
  class NemotronH_Nano_VL_V2_Config (line 33) | class NemotronH_Nano_VL_V2_Config(PretrainedConfig):
    method __init__ (line 37) | def __init__(
    method create_radio_config (line 101) | def create_radio_config(self):

FILE: python/sglang/srt/configs/nemotron_h.py
  class NemotronHConfig (line 38) | class NemotronHConfig(PretrainedConfig):
    method _validate_layers_block_type (line 161) | def _validate_layers_block_type(
    method _resolve_layers_block_type (line 189) | def _resolve_layers_block_type(
    method _resolve_mtp_layers_block_type (line 206) | def _resolve_mtp_layers_block_type(mtp_layers_block_type, kwargs) -> l...
    method _resolve_mamba_chunk_size (line 218) | def _resolve_mamba_chunk_size(mamba_chunk_size, kwargs) -> int:
    method __init__ (line 239) | def __init__(
    method mamba_layer_ids (line 402) | def mamba_layer_ids(self):
    method full_attention_layer_ids (line 410) | def full_attention_layer_ids(self):
    method mamba2_cache_params (line 418) | def mamba2_cache_params(self) -> Mamba2CacheParams:
    method num_hidden_layers (line 436) | def num_hidden_layers(self) -> int:
    method num_hidden_layers (line 444) | def num_hidden_layers(self, value):
    method hybrid_override_pattern (line 452) | def hybrid_override_pattern(self) -> str:
    method hybrid_override_pattern (line 460) | def hybrid_override_pattern(self, value):
    method mtp_hybrid_override_pattern (line 467) | def mtp_hybrid_override_pattern(self) -> str:
    method mtp_hybrid_override_pattern (line 475) | def mtp_hybrid_override_pattern(self, value):
    method _list_to_pattern (line 480) | def _list_to_pattern(layers_list: list[str]) -> str:
    method _pattern_to_list (line 491) | def _pattern_to_list(pattern: str) -> list[str]:

FILE: python/sglang/srt/configs/olmo3.py
  class Olmo3LayerType (line 25) | class Olmo3LayerType(enum.Enum):
  class Olmo3Config (line 30) | class Olmo3Config(PretrainedConfig):
    method __init__ (line 35) | def __init__(

FILE: python/sglang/srt/configs/points_v15_chat.py
  class POINTSV15ChatConfig (line 7) | class POINTSV15ChatConfig(PretrainedConfig):
    method __init__ (line 10) | def __init__(

FILE: python/sglang/srt/configs/qwen3_5.py
  class Qwen3_5VisionConfig (line 7) | class Qwen3_5VisionConfig(Qwen3VLVisionConfig):
  class Qwen3_5TextConfig (line 12) | class Qwen3_5TextConfig(Qwen3NextConfig):
    method __init__ (line 16) | def __init__(
  class Qwen3_5Config (line 34) | class Qwen3_5Config(PretrainedConfig):
    method __init__ (line 81) | def __init__(
  class Qwen3_5MoeVisionConfig (line 109) | class Qwen3_5MoeVisionConfig(Qwen3_5VisionConfig):
  class Qwen3_5MoeTextConfig (line 113) | class Qwen3_5MoeTextConfig(Qwen3_5TextConfig):
  class Qwen3_5MoeConfig (line 117) | class Qwen3_5MoeConfig(Qwen3_5Config):

FILE: python/sglang/srt/configs/qwen3_next.py
  class HybridLayerType (line 34) | class HybridLayerType(enum.Enum):
  class Qwen3NextConfig (line 39) | class Qwen3NextConfig(PretrainedConfig):
    method __init__ (line 180) | def __init__(
    method layers_block_type (line 255) | def layers_block_type(self):
    method linear_layer_ids (line 267) | def linear_layer_ids(self):
    method full_attention_layer_ids (line 275) | def full_attention_layer_ids(self):
    method mamba2_cache_params (line 283) | def mamba2_cache_params(self) -> Mamba2CacheParams:

FILE: python/sglang/srt/configs/qwen3_omni.py
  class Qwen3OmniMoeAudioEncoderConfig (line 7) | class Qwen3OmniMoeAudioEncoderConfig(PretrainedConfig):
    method __init__ (line 10) | def __init__(
  class Qwen3OmniMoeVisionEncoderConfig (line 55) | class Qwen3OmniMoeVisionEncoderConfig(PretrainedConfig):
    method __init__ (line 59) | def __init__(
  class Qwen3OmniMoeTextConfig (line 93) | class Qwen3OmniMoeTextConfig(PretrainedConfig):
    method __init__ (line 116) | def __init__(
  class Qwen3OmniMoeThinkerConfig (line 182) | class Qwen3OmniMoeThinkerConfig(PretrainedConfig):
    method __init__ (line 195) | def __init__(
  class Qwen3OmniMoeTalkerCodePredictorConfig (line 237) | class Qwen3OmniMoeTalkerCodePredictorConfig(PretrainedConfig):
    method __init__ (line 258) | def __init__(
  class Qwen3OmniMoeTalkerTextConfig (line 327) | class Qwen3OmniMoeTalkerTextConfig(PretrainedConfig):
    method __init__ (line 351) | def __init__(
  class Qwen3OmniMoeTalkerConfig (line 417) | class Qwen3OmniMoeTalkerConfig(PretrainedConfig):
    method __init__ (line 424) | def __init__(
  class Qwen3OmniMoeCode2WavConfig (line 488) | class Qwen3OmniMoeCode2WavConfig(PretrainedConfig):
    method __init__ (line 490) | def __init__(
    method layer_types (line 533) | def layer_types(self):
  class Qwen3OmniMoeConfig (line 540) | class Qwen3OmniMoeConfig(PretrainedConfig):
    method __init__ (line 549) | def __init__(
    method get_text_config (line 597) | def get_text_config(self, decoder=False) -> "PretrainedConfig":

FILE: python/sglang/srt/configs/qwen3_vl.py
  class Qwen3VLVisionConfig (line 4) | class Qwen3VLVisionConfig(PretrainedConfig):
    method __init__ (line 8) | def __init__(
  class Qwen3VLTextConfig (line 42) | class Qwen3VLTextConfig(PretrainedConfig):
    method __init__ (line 146) | def __init__(
  class Qwen3VLConfig (line 192) | class Qwen3VLConfig(PretrainedConfig):
    method __init__ (line 239) | def __init__(
  class Qwen3VLMoeTextConfig (line 267) | class Qwen3VLMoeTextConfig(PretrainedConfig):
    method __init__ (line 401) | def __init__(
  class Qwen3VLMoeVisionConfig (line 461) | class Qwen3VLMoeVisionConfig(PretrainedConfig):
    method __init__ (line 465) | def __init__(
  class Qwen3VLMoeConfig (line 499) | class Qwen3VLMoeConfig(PretrainedConfig):
    method __init__ (line 546) | def __init__(

FILE: python/sglang/srt/configs/radio.py
  class RadioConfig (line 34) | class RadioConfig(PretrainedConfig):
    method __init__ (line 62) | def __init__(

FILE: python/sglang/srt/configs/step3_vl.py
  class Step3VisionEncoderConfig (line 6) | class Step3VisionEncoderConfig(PretrainedConfig):
    method __init__ (line 9) | def __init__(
  class Step3TextConfig (line 36) | class Step3TextConfig(PretrainedConfig):
    method __init__ (line 40) | def __init__(
  class Step3VLConfig (line 143) | class Step3VLConfig(PretrainedConfig):
    method __init__ (line 146) | def __init__(

FILE: python/sglang/srt/configs/step3p5.py
  class Step3p5Config (line 6) | class Step3p5Config(PretrainedConfig):
    method __init__ (line 10) | def __init__(

FILE: python/sglang/srt/configs/update_config.py
  function may_get_weight_block_size (line 13) | def may_get_weight_block_size(model_config, load_config):
  function get_moe_padding_size (line 23) | def get_moe_padding_size(weight_block_size):
  function get_num_heads_padding_size (line 38) | def get_num_heads_padding_size(tp_size, weight_block_size, head_dim):
  function adjust_tp_num_heads_if_necessary (line 51) | def adjust_tp_num_heads_if_necessary(model_config, tp_size, is_post_upda...
  function update_intermediate_size (line 89) | def update_intermediate_size(model_config, attr_name, intermediate_paddi...
  function adjust_config_with_unaligned_cpu_tp (line 112) | def adjust_config_with_unaligned_cpu_tp(

FILE: python/sglang/srt/configs/utils.py
  function register_image_processor (line 12) | def register_image_processor(
  function register_processor (line 23) | def register_processor(config: Type[PretrainedConfig], processor: Type[P...

FILE: python/sglang/srt/connector/__init__.py
  class ConnectorType (line 19) | class ConnectorType(str, enum.Enum):
  function create_remote_connector (line 25) | def create_remote_connector(url, device=None, **kwargs) -> BaseConnector:
  function get_connector_type (line 37) | def get_connector_type(client: BaseConnector) -> ConnectorType:

FILE: python/sglang/srt/connector/base_connector.py
  class BaseConnector (line 13) | class BaseConnector(ABC):
    method __init__ (line 23) | def __init__(self, url: str):
    method get_local_dir (line 31) | def get_local_dir(self):
    method weight_iterator (line 35) | def weight_iterator(
    method pull_files (line 41) | def pull_files(
    method close (line 48) | def close(self):
    method __enter__ (line 56) | def __enter__(self):
    method __exit__ (line 59) | def __exit__(self, exc_type, exc_value, traceback):
    method __del__ (line 62) | def __del__(self):
    method _close_by_signal (line 65) | def _close_by_signal(self, existing_handler=None):
  class BaseKVConnector (line 75) | class BaseKVConnector(BaseConnector):
    method get (line 78) | def get(self, key: str) -> Optional[torch.Tensor]:
    method getstr (line 82) | def getstr(self, key: str) -> Optional[str]:
    method set (line 86) | def set(self, key: str, obj: torch.Tensor) -> None:
    method setstr (line 90) | def setstr(self, key: str, obj: str) -> None:
    method list (line 94) | def list(self, prefix: str) -> List[str]:
  class BaseFileConnector (line 98) | class BaseFileConnector(BaseConnector):
    method glob (line 110) | def glob(self, allow_pattern: str) -> List[str]:

FILE: python/sglang/srt/connector/redis.py
  class RedisConnector (line 16) | class RedisConnector(BaseKVConnector):
    method __init__ (line 18) | def __init__(self, url: str):
    method get (line 28) | def get(self, key: str) -> Optional[torch.Tensor]:
    method getstr (line 37) | def getstr(self, key: str) -> Optional[str]:
    method set (line 45) | def set(self, key: str, tensor: torch.Tensor) -> None:
    method setstr (line 49) | def setstr(self, key: str, obj: str) -> None:
    method list (line 52) | def list(self, prefix: str) -> List[str]:
    method weight_iterator (line 67) | def weight_iterator(
    method pull_files (line 76) | def pull_files(
    method close (line 83) | def close(self):

FILE: python/sglang/srt/connector/remote_instance.py
  class RemoteInstanceConnector (line 16) | class RemoteInstanceConnector(BaseConnector):
    method __init__ (line 18) | def __init__(self, url: str, device: torch.device = "cpu"):
    method build_group (line 26) | def build_group(
    method pull_files (line 71) | def pull_files(
    method weight_iterator (line 79) | def weight_iterator(

FILE: python/sglang/srt/connector/s3.py
  function _filter_allow (line 13) | def _filter_allow(paths: list[str], patterns: list[str]) -> list[str]:
  function _filter_ignore (line 21) | def _filter_ignore(paths: list[str], patterns: list[str]) -> list[str]:
  function list_files (line 29) | def list_files(
  class S3Connector (line 69) | class S3Connector(BaseFileConnector):
    method __init__ (line 71) | def __init__(self, url: str) -> None:
    method glob (line 77) | def glob(self, allow_pattern: Optional[list[str]] = None) -> list[str]:
    method pull_files (line 83) | def pull_files(
    method weight_iterator (line 109) | def weight_iterator(
    method close (line 120) | def close(self):

FILE: python/sglang/srt/connector/serde/__init__.py
  function create_serde (line 12) | def create_serde(serde_type: str) -> Tuple[Serializer, Deserializer]:

FILE: python/sglang/srt/connector/serde/safe_serde.py
  class SafeSerializer (line 11) | class SafeSerializer(Serializer):
    method __init__ (line 13) | def __init__(self):
    method to_bytes (line 16) | def to_bytes(self, t: torch.Tensor) -> bytes:
  class SafeDeserializer (line 20) | class SafeDeserializer(Deserializer):
    method __init__ (line 22) | def __init__(self):
    method from_bytes_normal (line 26) | def from_bytes_normal(self, b: Union[bytearray, bytes]) -> torch.Tensor:
    method from_bytes (line 29) | def from_bytes(self, b: Union[bytearray, bytes]) -> torch.Tensor:

FILE: python/sglang/srt/connector/serde/serde.py
  class Serializer (line 9) | class Serializer(ABC):
    method to_bytes (line 12) | def to_bytes(self, t: torch.Tensor) -> bytes:
  class Deserializer (line 27) | class Deserializer(metaclass=abc.ABCMeta):
    method __init__ (line 29) | def __init__(self, dtype):
    method from_bytes (line 33) | def from_bytes(self, bs: bytes) -> torch.Tensor:

FILE: python/sglang/srt/connector/utils.py
  function parse_model_name (line 11) | def parse_model_name(url: str) -> str:
  function pull_files_from_db (line 20) | def pull_files_from_db(

FILE: python/sglang/srt/constrained/base_grammar_backend.py
  class GrammarStats (line 30) | class GrammarStats:
  class BaseGrammarObject (line 41) | class BaseGrammarObject:
    method __init__ (line 43) | def __init__(self):
    method maybe_init_reasoning (line 48) | def maybe_init_reasoning(self, reasoning: bool):
    method accept_token (line 51) | def accept_token(self, token: int) -> None:
    method rollback (line 57) | def rollback(self, k: int):
    method is_terminated (line 60) | def is_terminated(self):
    method allocate_vocab_mask (line 63) | def allocate_vocab_mask(
    method fill_vocab_mask (line 68) | def fill_vocab_mask(self, vocab_mask: torch.Tensor, idx: int) -> None:
    method move_vocab_mask (line 72) | def move_vocab_mask(vocab_mask: torch.Tensor, device) -> torch.Tensor:
    method apply_vocab_mask (line 76) | def apply_vocab_mask(logits: torch.Tensor, vocab_mask: torch.Tensor) -...
    method copy (line 79) | def copy(self) -> "BaseGrammarObject":
    method finished (line 83) | def finished(self):
    method finished (line 87) | def finished(self, finished):
    method try_jump_forward (line 90) | def try_jump_forward(self, tokenizer) -> Optional[Tuple[List[int], str]]:
    method jump_forward_str_state (line 100) | def jump_forward_str_state(self, helper: Tuple[List[int], str]) -> Tup...
    method jump_and_retokenize (line 110) | def jump_and_retokenize(
  class InvalidGrammarObject (line 119) | class InvalidGrammarObject(BaseGrammarObject):
    method __init__ (line 122) | def __init__(self, error_message: str = "Unknown grammar error"):
    method __repr__ (line 126) | def __repr__(self):
  class BaseGrammarBackend (line 130) | class BaseGrammarBackend:
    method __init__ (line 131) | def __init__(self):
    method _not_supported (line 135) | def _not_supported(self, key_type: str, key_string: str) -> BaseGramma...
    method dispatch_fallback (line 139) | def dispatch_fallback(self, key_type: str, key_string: str) -> BaseGra...
    method dispatch_json (line 145) | def dispatch_json(self, key_string: str) -> BaseGrammarObject:
    method dispatch_regex (line 148) | def dispatch_regex(self, key_string: str) -> BaseGrammarObject:
    method dispatch_ebnf (line 151) | def dispatch_ebnf(self, key_string: str) -> BaseGrammarObject:
    method dispatch_structural_tag (line 154) | def dispatch_structural_tag(self, key_string: str) -> BaseGrammarObject:
    method _init_value_dispatch (line 157) | def _init_value_dispatch(
    method get_cached_or_future_value (line 177) | def get_cached_or_future_value(
    method set_cache (line 188) | def set_cache(self, key: Tuple[str, str], value: BaseGrammarObject):
    method reset (line 191) | def reset(self):
  function register_grammar_backend (line 198) | def register_grammar_backend(name, init_func):
  function create_grammar_backend (line 202) | def create_grammar_backend(

FILE: python/sglang/srt/constrained/grammar_manager.py
  class GrammarManager (line 24) | class GrammarManager:
    method __init__ (line 25) | def __init__(self, scheduler: Scheduler):
    method __len__ (line 49) | def __len__(self):
    method clear (line 52) | def clear(self):
    method has_waiting_grammars (line 56) | def has_waiting_grammars(self) -> bool:
    method abort_requests (line 59) | def abort_requests(self, recv_req: AbortReq):
    method process_req_with_grammar (line 67) | def process_req_with_grammar(self, req: Req) -> bool:
    method get_ready_grammar_requests (line 111) | def get_ready_grammar_requests(self) -> List[Req]:

FILE: python/sglang/srt/constrained/llguidance_backend.py
  class GuidanceGrammar (line 40) | class GuidanceGrammar(BaseGrammarObject):
    method __init__ (line 42) | def __init__(self, llguidance_tokenizer: LLTokenizer, serialized_gramm...
    method accept_token (line 57) | def accept_token(self, token: int):
    method rollback (line 66) | def rollback(self, num_tokens: int) -> None:
    method is_terminated (line 76) | def is_terminated(self):
    method fill_vocab_mask (line 79) | def fill_vocab_mask(self, vocab_mask: torch.Tensor, idx: int) -> None:
    method allocate_vocab_mask (line 83) | def allocate_vocab_mask(
    method move_vocab_mask (line 98) | def move_vocab_mask(vocab_mask: torch.Tensor, device) -> torch.Tensor:
    method apply_vocab_mask (line 102) | def apply_vocab_mask(logits: torch.Tensor, vocab_mask: torch.Tensor) -...
    method copy (line 105) | def copy(self):
    method try_jump_forward (line 111) | def try_jump_forward(self, tokenizer) -> Optional[Tuple[List[int], str]]:
    method jump_forward_str_state (line 118) | def jump_forward_str_state(self, helper: Tuple[List[int], str]) -> Tup...
    method jump_and_retokenize (line 121) | def jump_and_retokenize(
    method _check_err (line 126) | def _check_err(self) -> None:
  class GuidanceBackend (line 131) | class GuidanceBackend(BaseGrammarBackend):
    method __init__ (line 133) | def __init__(
    method _from_serialized (line 147) | def _from_serialized(self, serialized_grammar) -> BaseGrammarObject:
    method dispatch_json (line 157) | def dispatch_json(self, key_string: str) -> BaseGrammarObject:
    method dispatch_regex (line 171) | def dispatch_regex(self, key_string: str) -> BaseGrammarObject:
    method dispatch_ebnf (line 175) | def dispatch_ebnf(self, key_string: str) -> BaseGrammarObject:
    method dispatch_structural_tag (line 183) | def dispatch_structural_tag(self, key_string: str) -> BaseGrammarObject:

FILE: python/sglang/srt/constrained/outlines_backend.py
  class OutlinesGrammar (line 42) | class OutlinesGrammar(BaseGrammarObject):
    method __init__ (line 43) | def __init__(
    method accept_token (line 53) | def accept_token(self, token: int):
    method allocate_vocab_mask (line 56) | def allocate_vocab_mask(
    method move_vocab_mask (line 62) | def move_vocab_mask(vocab_mask: torch.Tensor, device) -> torch.Tensor:
    method fill_vocab_mask (line 65) | def fill_vocab_mask(self, vocab_mask: torch.Tensor, idx: int) -> None:
    method apply_vocab_mask (line 74) | def apply_vocab_mask(logits: torch.Tensor, vocab_mask: torch.Tensor):
    method copy (line 77) | def copy(self):
    method try_jump_forward (line 80) | def try_jump_forward(self, tokenizer) -> Optional[Tuple]:
    method jump_forward_str_state (line 104) | def jump_forward_str_state(self, helper: Tuple[List[int], str]) -> Tup...
    method jump_and_retokenize (line 108) | def jump_and_retokenize(
  class OutlinesGrammarBackend (line 114) | class OutlinesGrammarBackend(BaseGrammarBackend):
    method __init__ (line 115) | def __init__(
    method _compile_regex (line 145) | def _compile_regex(self, regex: str) -> BaseGrammarObject:
    method dispatch_ebnf (line 160) | def dispatch_ebnf(self, key_string: str):
    method dispatch_structural_tag (line 163) | def dispatch_structural_tag(self, key_string: str):
    method dispatch_json (line 166) | def dispatch_json(self, key_string: str):
    method dispatch_regex (line 177) | def dispatch_regex(self, key_string: str):
  function build_regex_from_object (line 181) | def build_regex_from_object(

FILE: python/sglang/srt/constrained/outlines_jump_forward.py
  class JumpEdge (line 47) | class JumpEdge:
  function disk_cache (line 54) | def disk_cache(expire: Optional[float] = None, typed=False, ignore=()):
  function init_state_to_jump_forward (line 62) | def init_state_to_jump_forward(regex_string):
  class OutlinesJumpForwardMap (line 142) | class OutlinesJumpForwardMap:
    method __init__ (line 143) | def __init__(self, regex_string):
    method jump_forward_symbol (line 146) | def jump_forward_symbol(self, state):
    method jump_forward_byte (line 159) | def jump_forward_byte(self, state):
    method is_jump_forward_symbol_state (line 174) | def is_jump_forward_symbol_state(self, state):
  function test_main (line 181) | def test_main(regex_string):

FILE: python/sglang/srt/constrained/reasoner_grammar_backend.py
  class ReasonerGrammarObject (line 27) | class ReasonerGrammarObject(BaseGrammarObject):
    method __init__ (line 28) | def __init__(self, grammar: BaseGrammarObject, think_end_id: int):
    method maybe_init_reasoning (line 37) | def maybe_init_reasoning(self, reasoning: bool):
    method transfer_state (line 40) | def transfer_state(self, token: int) -> int:
    method rollback_state (line 46) | def rollback_state(self):
    method accept_token (line 52) | def accept_token(self, token: int):
    method is_terminated (line 57) | def is_terminated(self):
    method rollback (line 60) | def rollback(self, k):
    method allocate_vocab_mask (line 68) | def allocate_vocab_mask(
    method fill_vocab_mask (line 73) | def fill_vocab_mask(self, vocab_mask: torch.Tensor, idx: int) -> None:
    method move_vocab_mask (line 77) | def move_vocab_mask(self, vocab_mask: torch.Tensor, device) -> torch.T...
    method apply_vocab_mask (line 81) | def apply_vocab_mask(self):
    method copy (line 84) | def copy(self) -> BaseGrammarObject:
    method finished (line 88) | def finished(self):
    method finished (line 92) | def finished(self, finished):
    method try_jump_forward (line 95) | def try_jump_forward(self, tokenizer):
    method jump_forward_str_state (line 98) | def jump_forward_str_state(self, helper):
    method jump_and_retokenize (line 101) | def jump_and_retokenize(
  class ReasonerGrammarBackend (line 109) | class ReasonerGrammarBackend(BaseGrammarBackend):
    method __init__ (line 110) | def __init__(self, grammar_backend: BaseGrammarBackend, think_end_id):
    method _init_value_dispatch (line 115) | def _init_value_dispatch(

FILE: python/sglang/srt/constrained/triton_ops/bitmask_ops.py
  function apply_token_bitmask_inplace_kernel (line 14) | def apply_token_bitmask_inplace_kernel(
  function apply_token_bitmask_inplace_triton (line 84) | def apply_token_bitmask_inplace_triton(

FILE: python/sglang/srt/constrained/utils.py
  function is_legacy_structural_tag (line 4) | def is_legacy_structural_tag(obj: Dict) -> bool:

FILE: python/sglang/srt/constrained/xgrammar_backend.py
  class XGrammarGrammar (line 53) | class XGrammarGrammar(BaseGrammarObject):
    method __init__ (line 55) | def __init__(
    method accept_token (line 73) | def accept_token(self, token: int):
    method rollback (line 87) | def rollback(self, k: int):
    method is_terminated (line 91) | def is_terminated(self):
    method allocate_vocab_mask (line 94) | def allocate_vocab_mask(
    method fill_vocab_mask (line 99) | def fill_vocab_mask(self, vocab_mask: torch.Tensor, idx: int) -> None:
    method move_vocab_mask (line 103) | def move_vocab_mask(vocab_mask: torch.Tensor, device) -> torch.Tensor:
    method apply_vocab_mask (line 106) | def apply_vocab_mask(self, logits: torch.Tensor, vocab_mask: torch.Ten...
    method copy (line 119) | def copy(self):
    method try_jump_forward (line 138) | def try_jump_forward(self, tokenizer) -> Optional[Tuple[List[int], str]]:
    method jump_forward_str_state (line 144) | def jump_forward_str_state(self, helper: Tuple[List[int], str]) -> Tup...
    method jump_and_retokenize (line 148) | def jump_and_retokenize(
    method __repr__ (line 165) | def __repr__(self):
  class TokenizerNotSupportedError (line 169) | class TokenizerNotSupportedError(Exception):
  class XGrammarGrammarBackend (line 175) | class XGrammarGrammarBackend(BaseGrammarBackend):
    method __init__ (line 176) | def __init__(
    method _sanitize_structural_format (line 213) | def _sanitize_structural_format(structural_format):
    method _sanitize_structural_tag_structures (line 235) | def _sanitize_structural_tag_structures(structural_tag: Dict) -> None:
    method _from_context (line 240) | def _from_context(
    method dispatch_json (line 257) | def dispatch_json(self, key_string: str) -> BaseGrammarObject:
    method dispatch_ebnf (line 272) | def dispatch_ebnf(self, key_string: str) -> BaseGrammarObject:
    method dispatch_regex (line 280) | def dispatch_regex(self, key_string: str) -> BaseGrammarObject:
    method dispatch_structural_tag (line 288) | def dispatch_structural_tag(self, key_string: str) -> BaseGrammarObject:
    method reset (line 319) | def reset(self):
  function demo_test (line 323) | def demo_test():

FILE: python/sglang/srt/debug_utils/comparator/aligner/axis_aligner.py
  class AxisAlignerPlan (line 20) | class AxisAlignerPlan(_FrozenBase):
  function compute_axis_aligner_plan (line 27) | def compute_axis_aligner_plan(
  function _semantic_names_match (line 54) | def _semantic_names_match(specs_pair: Pair[list[DimSpec]]) -> bool:
  function _expand_and_skip_squeeze (line 77) | def _expand_and_skip_squeeze(specs: list[DimSpec]) -> list[str]:
  function _build_canonical_order (line 87) | def _build_canonical_order(specs_pair: Pair[list[DimSpec]]) -> Optional[...
  function _build_side_pattern (line 145) | def _build_side_pattern(
  function execute_axis_aligner_plan (line 179) | def execute_axis_aligner_plan(

FILE: python/sglang/srt/debug_utils/comparator/aligner/entrypoint/executor.py
  class StepPlansResult (line 44) | class StepPlansResult(NamedTuple):
  class SubPlansResult (line 50) | class SubPlansResult(NamedTuple):
  class AlignerResult (line 57) | class AlignerResult:
  function execute_aligner_plan (line 64) | def execute_aligner_plan(
  function _execute_step_plans (line 132) | def _execute_step_plans(
  function execute_sub_plans (line 171) | def execute_sub_plans(
  function execute_sub_plan (line 202) | def execute_sub_plan(

FILE: python/sglang/srt/debug_utils/comparator/aligner/entrypoint/planner.py
  function compute_aligner_plan (line 36) | def compute_aligner_plan(
  function _compute_per_step_plans (line 69) | def _compute_per_step_plans(
  function compute_per_step_sub_plans (line 99) | def compute_per_step_sub_plans(

FILE: python/sglang/srt/debug_utils/comparator/aligner/entrypoint/traced_types.py
  class TracedSubPlan (line 20) | class TracedSubPlan(_StrictBase):
  class TracedStepPlan (line 25) | class TracedStepPlan(_StrictBase):
  class TracedSidePlan (line 31) | class TracedSidePlan(_StrictBase):
  class TracedAlignerPlan (line 35) | class TracedAlignerPlan(_StrictBase):

FILE: python/sglang/srt/debug_utils/comparator/aligner/entrypoint/types.py
  class AlignerPerStepPlan (line 21) | class AlignerPerStepPlan(_FrozenBase):
  class AlignerPlan (line 27) | class AlignerPlan(_FrozenBase):

FILE: python/sglang/srt/debug_utils/comparator/aligner/reorderer/executor.py
  function execute_reorderer_plan (line 16) | def execute_reorderer_plan(
  function _reorder_zigzag_to_natural_thd (line 42) | def _reorder_zigzag_to_natural_thd(
  function _reorder_zigzag_to_natural (line 81) | def _reorder_zigzag_to_natural(

FILE: python/sglang/srt/debug_utils/comparator/aligner/reorderer/planner.py
  function compute_reorderer_plans (line 20) | def compute_reorderer_plans(

FILE: python/sglang/srt/debug_utils/comparator/aligner/reorderer/types.py
  class ZigzagToNaturalParams (line 8) | class ZigzagToNaturalParams(_FrozenBase):
  class ZigzagToNaturalThdParams (line 14) | class ZigzagToNaturalThdParams(_FrozenBase):
  class ReordererPlan (line 27) | class ReordererPlan(_FrozenBase):

FILE: python/sglang/srt/debug_utils/comparator/aligner/token_aligner/concat_steps/executor.py
  function execute_token_aligner_concat_steps (line 16) | def execute_token_aligner_concat_steps(
  function _resolve_token_dim (line 30) | def _resolve_token_dim(tensor: torch.Tensor) -> int:
  function _concat_steps (line 44) | def _concat_steps(tensor_of_step: dict[int, torch.Tensor], *, dim: int) ...

FILE: python/sglang/srt/debug_utils/comparator/aligner/token_aligner/concat_steps/thd_seq_lens_loader.py
  function load_thd_seq_lens_only (line 17) | def load_thd_seq_lens_only(

FILE: python/sglang/srt/debug_utils/comparator/aligner/token_aligner/entrypoint.py
  class TokenAlignerResult (line 38) | class TokenAlignerResult:
  function compute_maybe_token_aligner_result (line 46) | def compute_maybe_token_aligner_result(
  function _build_smart_result (line 81) | def _build_smart_result(
  function _load_thd_seq_lens_pair (line 123) | def _load_thd_seq_lens_pair(

FILE: python/sglang/srt/debug_utils/comparator/aligner/token_aligner/smart/aux_loader.py
  function load_and_normalize_aux (line 46) | def load_and_normalize_aux(
  function has_aux_tensors (line 90) | def has_aux_tensors(df: pl.DataFrame) -> bool:
  function _detect_plugin (line 96) | def _detect_plugin(df: pl.DataFrame, dump_path: Path) -> Optional[_AuxFr...
  function _load_step_data (line 113) | def _load_step_data(
  function _load_non_tensor_aux (line 167) | def _load_non_tensor_aux(
  function _load_and_align_aux_tensor (line 198) | def _load_and_align_aux_tensor(
  function _ensure_dims_in_metas (line 261) | def _ensure_dims_in_metas(

FILE: python/sglang/srt/debug_utils/comparator/aligner/token_aligner/smart/aux_plugins.py
  class _AuxFrameworkPlugin (line 21) | class _AuxFrameworkPlugin(ABC):
    method name (line 24) | def name(self) -> str: ...
    method tensor_names (line 28) | def tensor_names(self) -> frozenset[str]: ...
    method non_tensor_names (line 32) | def non_tensor_names(self) -> frozenset[str]: ...
    method cp_sharded_names (line 35) | def cp_sharded_names(self) -> frozenset[str]:
    method discriminating_names (line 39) | def discriminating_names(self) -> frozenset[str]:
    method detect_layout (line 44) | def detect_layout(self, raw: dict[int, dict[str, object]]) -> TokenLay...
    method compute_step_aux (line 47) | def compute_step_aux(
    method has_required_names (line 52) | def has_required_names(self, names: set[str]) -> bool:
    method all_names (line 57) | def all_names(self) -> frozenset[str]:
    method extract_global_seq_lens (line 60) | def extract_global_seq_lens(
    method infer_cp_sharded_dims (line 69) | def infer_cp_sharded_dims(self, name: str, ndim: int) -> str:
  class _SGLangPlugin (line 79) | class _SGLangPlugin(_AuxFrameworkPlugin):
    method name (line 81) | def name(self) -> str:
    method tensor_names (line 85) | def tensor_names(self) -> frozenset[str]:
    method non_tensor_names (line 89) | def non_tensor_names(self) -> frozenset[str]:
    method cp_sharded_names (line 93) | def cp_sharded_names(self) -> frozenset[str]:
    method discriminating_names (line 97) | def discriminating_names(self) -> frozenset[str]:
    method has_required_names (line 100) | def has_required_names(self, names: set[str]) -> bool:
    method detect_layout (line 103) | def detect_layout(self, raw: dict[int, dict[str, object]]) -> TokenLay...
    method extract_global_seq_lens (line 106) | def extract_global_seq_lens(
    method infer_cp_sharded_dims (line 118) | def infer_cp_sharded_dims(self, name: str, ndim: int) -> str:
    method compute_step_aux (line 130) | def compute_step_aux(
  class _MegatronPlugin (line 168) | class _MegatronPlugin(_AuxFrameworkPlugin):
    method name (line 170) | def name(self) -> str:
    method tensor_names (line 174) | def tensor_names(self) -> frozenset[str]:
    method non_tensor_names (line 178) | def non_tensor_names(self) -> frozenset[str]:
    method cp_sharded_names (line 182) | def cp_sharded_names(self) -> frozenset[str]:
    method discriminating_names (line 186) | def discriminating_names(self) -> frozenset[str]:
    method has_required_names (line 189) | def has_required_names(self, names: set[str]) -> bool:
    method extract_global_seq_lens (line 192) | def extract_global_seq_lens(
    method infer_cp_sharded_dims (line 204) | def infer_cp_sharded_dims(self, name: str, ndim: int) -> str:
    method detect_layout (line 218) | def detect_layout(self, raw: dict[int, dict[str, object]]) -> TokenLay...
    method compute_step_aux (line 241) | def compute_step_aux(
  function _infer_positions (line 290) | def _infer_positions(*, seq_lens: torch.Tensor) -> torch.Tensor:

FILE: python/sglang/srt/debug_utils/comparator/aligner/token_aligner/smart/executor.py
  function execute_token_aligner (line 23) | def execute_token_aligner(
  function _collapse_bs_to_t (line 55) | def _collapse_bs_to_t(
  function _build_bs_collapse_pattern (line 94) | def _build_bs_collapse_pattern(
  function _resolve_dim_or_fallback (line 123) | def _resolve_dim_or_fallback(tensor: torch.Tensor, name: str) -> int:
  function _make_empty (line 129) | def _make_empty(*, tensor_of_step: dict[int, torch.Tensor]) -> torch.Ten...
  function _extract_and_stack_tokens (line 137) | def _extract_and_stack_tokens(

FILE: python/sglang/srt/debug_utils/comparator/aligner/token_aligner/smart/planner.py
  function compute_token_aligner_plan (line 16) | def compute_token_aligner_plan(
  function _match_sequences (line 60) | def _match_sequences(
  function _find_matching_x_exact (line 92) | def _find_matching_x_exact(
  class _PrefixCandidate (line 107) | class _PrefixCandidate(NamedTuple):
  function _find_matching_x_prefix (line 112) | def _find_matching_x_prefix(
  function _is_prefix_pair (line 132) | def _is_prefix_pair(a: list[int], b: list[int]) -> bool:

FILE: python/sglang/srt/debug_utils/comparator/aligner/token_aligner/smart/seq_info_builder.py
  class _SeqInfoAccumulator (line 16) | class _SeqInfoAccumulator:
    method extend (line 24) | def extend(
    method build (line 37) | def build(self) -> TokenAlignerSeqInfo:
  function build_seqs_info (line 48) | def build_seqs_info(global_aux: TokenAlignerGlobalAux) -> TokenAlignerSe...
  function _build_token_aligner_seq_infos (line 56) | def _build_token_aligner_seq_infos(

FILE: python/sglang/srt/debug_utils/comparator/aligner/token_aligner/smart/types.py
  class SGLangSeqId (line 16) | class SGLangSeqId(NamedTuple):
  class PositionalSeqId (line 20) | class PositionalSeqId(NamedTuple):
  class TokenAlignerStepAux (line 29) | class TokenAlignerStepAux:
    method __post_init__ (line 37) | def __post_init__(self) -> None:
  class TokenAlignerGlobalAux (line 49) | class TokenAlignerGlobalAux:
  class TokenLocator (line 58) | class TokenLocator(_FrozenBase):
    method __add__ (line 67) | def __add__(self, other: TokenLocator) -> TokenLocator:
  class TokenAlignerSeqInfo (line 74) | class TokenAlignerSeqInfo(_FrozenBase):
    method _validate_fields (line 83) | def _validate_fields(self) -> TokenAlignerSeqInfo:
    method __add__ (line 99) | def __add__(self, other: TokenAlignerSeqInfo) -> TokenAlignerSeqInfo:
  class TokenAlignerSeqsInfo (line 107) | class TokenAlignerSeqsInfo(_FrozenBase):
  class TokenAlignerPlan (line 114) | class TokenAlignerPlan(_FrozenBase):
    method _validate_fields (line 121) | def _validate_fields(self) -> TokenAlignerPlan:

FILE: python/sglang/srt/debug_utils/comparator/aligner/unsharder/executor.py
  class UnsharderResult (line 25) | class UnsharderResult:
  function execute_unsharder_plan (line 30) | def execute_unsharder_plan(
  function _apply_unshard (line 51) | def _apply_unshard(
  function _verify_replicated_group (line 92) | def _verify_replicated_group(
  function _check_replicated_pair (line 112) | def _check_replicated_pair(
  function _thd_concat (line 144) | def _thd_concat(

FILE: python/sglang/srt/debug_utils/comparator/aligner/unsharder/parallel_info.py
  function _is_error_sentinel (line 9) | def _is_error_sentinel(value: dict) -> bool:
  function normalize_parallel_info (line 14) | def normalize_parallel_info(meta: dict) -> dict[ParallelAxis, AxisInfo]:

FILE: python/sglang/srt/debug_utils/comparator/aligner/unsharder/planner.py
  class _GroupResult (line 26) | class _GroupResult(NamedTuple):
  function compute_unsharder_plan (line 31) | def compute_unsharder_plan(
  function _validate_explicit_replicated (line 114) | def _validate_explicit_replicated(
  function _validate (line 145) | def _validate(
  function _group_and_project (line 180) | def _group_and_project(
  function _resolve_unshard_params (line 205) | def _resolve_unshard_params(

FILE: python/sglang/srt/debug_utils/comparator/aligner/unsharder/types.py
  class AxisInfo (line 11) | class AxisInfo(_FrozenBase):
    method _validate_bounds (line 16) | def _validate_bounds(self) -> AxisInfo:
  class ConcatParams (line 26) | class ConcatParams(_FrozenBase):
  class CpThdConcatParams (line 31) | class CpThdConcatParams(_FrozenBase):
  class PickParams (line 37) | class PickParams(_FrozenBase):
  class ReduceSumParams (line 41) | class ReduceSumParams(_FrozenBase):
  class UnsharderPlan (line 51) | class UnsharderPlan(_FrozenBase):

FILE: python/sglang/srt/debug_utils/comparator/bundle_comparator.py
  function _collect_bundle_side_info (line 49) | def _collect_bundle_side_info(
  function compare_bundle_pair (line 80) | def compare_bundle_pair(
  function _compare_bundle_pair_inner (line 113) | def _compare_bundle_pair_inner(
  function _extract_dp_alias_from_items (line 184) | def _extract_dp_alias_from_items(items: list[ValueWithMeta]) -> Optional...
  function _compare_bundle_pair_tensor_type (line 194) | def _compare_bundle_pair_tensor_type(
  function _try_generate_viz (line 284) | def _try_generate_viz(
  function _resolve_seq_dim (line 317) | def _resolve_seq_dim(tensor: torch.Tensor) -> Optional[int]:
  function _compare_bundle_pair_non_tensor_type (line 330) | def _compare_bundle_pair_non_tensor_type(
  function _apply_dim_names_from_meta (line 353) | def _apply_dim_names_from_meta(
  function _load_all_values (line 369) | def _load_all_values(filenames: list[str], base_path: Path) -> list[Valu...

FILE: python/sglang/srt/debug_utils/comparator/bundle_matcher.py
  class TensorFileInfo (line 14) | class TensorFileInfo:
  function match_bundles (line 23) | def match_bundles(
  function _rows_to_tensor_infos (line 41) | def _rows_to_tensor_infos(rows: list[dict[str, Any]]) -> list[TensorFile...

FILE: python/sglang/srt/debug_utils/comparator/dims_spec/comment_parser.py
  class _CommentSuffix (line 15) | class _CommentSuffix(NamedTuple):
  function _parse_comment_suffix (line 20) | def _parse_comment_suffix(declaration_part: str) -> _CommentSuffix:

FILE: python/sglang/srt/debug_utils/comparator/dims_spec/dim_parser.py
  function parse_dim (line 22) | def parse_dim(token: str) -> DimSpec:
  function _parse_single_dim (line 33) | def _parse_single_dim(token: str) -> DimSpec:
  function _parse_fused_dim (line 45) | def _parse_fused_dim(*, token: str, fused_match: re.Match[str]) -> DimSpec:

FILE: python/sglang/srt/debug_utils/comparator/dims_spec/dims_parser.py
  class _SingletonDimUtil (line 18) | class _SingletonDimUtil:
    method is_squeeze (line 24) | def is_squeeze(spec: DimSpec) -> bool:
    method filter_out (line 28) | def filter_out(dim_specs: list[DimSpec]) -> list[DimSpec]:
    method make_name (line 32) | def make_name(index: int) -> str:
    method is_singleton_name (line 36) | def is_singleton_name(name: str) -> bool:
    method sanitize_names (line 43) | def sanitize_names(names: list[str]) -> list[str]:
  function parse_dims (line 58) | def parse_dims(dims_str: str) -> DimsSpec:
  function resolve_dim_names (line 109) | def resolve_dim_names(dims_str: str) -> list[str]:

FILE: python/sglang/srt/debug_utils/comparator/dims_spec/modifier_parser.py
  function _parse_modifier_token (line 15) | def _parse_modifier_token(modifier_token: str, dim_token: str) -> Parall...
  function _parse_modifiers (line 66) | def _parse_modifiers(

FILE: python/sglang/srt/debug_utils/comparator/dims_spec/tensor_naming.py
  function find_dim_index (line 10) | def find_dim_index(dim_specs: list[DimSpec], name: str) -> Optional[int]:
  function resolve_dim_by_name (line 18) | def resolve_dim_by_name(tensor: torch.Tensor, name: str) -> int:
  function apply_dim_names (line 29) | def apply_dim_names(tensor: torch.Tensor, dim_names: list[str]) -> torch...
  function strip_dim_names (line 39) | def strip_dim_names(tensor: torch.Tensor) -> torch.Tensor:

FILE: python/sglang/srt/debug_utils/comparator/dims_spec/types.py
  class TokenLayout (line 14) | class TokenLayout(Enum):
  class ParallelAxis (line 19) | class ParallelAxis(Enum):
  class Ordering (line 27) | class Ordering(Enum):
  class Reduction (line 32) | class Reduction(Enum):
  class ParallelModifier (line 36) | class ParallelModifier(_FrozenBase):
  class DimSpec (line 51) | class DimSpec(_FrozenBase):
    method sub_dims (line 56) | def sub_dims(self) -> list[str]:
    method is_fused (line 61) | def is_fused(self) -> bool:
    method sanitized_name (line 65) | def sanitized_name(self) -> str:
  class DimsSpec (line 72) | class DimsSpec(_FrozenBase):

FILE: python/sglang/srt/debug_utils/comparator/display.py
  function emit_display_records (line 20) | def emit_display_records(
  function _render_polars_as_text (line 40) | def _render_polars_as_text(df: pl.DataFrame, *, title: Optional[str] = N...
  function _collect_rank_info (line 55) | def _collect_rank_info(
  function _collect_input_ids_and_positions (line 78) | def _collect_input_ids_and_positions(
  function _extract_parallel_info (line 122) | def _extract_parallel_info(row_data: dict[str, Any], info: dict[str, Any...

FILE: python/sglang/srt/debug_utils/comparator/dp_utils.py
  function filter_to_non_empty_dp_rank (line 18) | def filter_to_non_empty_dp_rank(
  function _extract_dp_info (line 71) | def _extract_dp_info(
  function _group_has_data (line 97) | def _group_has_data(group: list[ValueWithMeta]) -> bool:

FILE: python/sglang/srt/debug_utils/comparator/entrypoint.py
  function main (line 52) | def main() -> None:
  function run (line 57) | def run(args: argparse.Namespace) -> int:
  function _resolve_report_path (line 161) | def _resolve_report_path(
  function _maybe_load_tokenizer (line 169) | def _maybe_load_tokenizer(*, tokenizer_arg: Optional[str], dir_pair: Pai...
  function _read_df (line 189) | def _read_df(
  function _compare_bundle_pairs (line 209) | def _compare_bundle_pairs(
  function _consume_comparison_records (line 271) | def _consume_comparison_records(
  function parse_args (line 315) | def parse_args(argv: list[str]) -> argparse.Namespace:

FILE: python/sglang/srt/debug_utils/comparator/log_sink.py
  class LogSink (line 9) | class LogSink:
    method __init__ (line 10) | def __init__(self) -> None:
    method context (line 14) | def context(self) -> Generator[list[BaseLog], None, None]:
    method add (line 23) | def add(self, log: BaseLog) -> None:

FILE: python/sglang/srt/debug_utils/comparator/meta_overrider.py
  class MetaOverrideRule (line 18) | class MetaOverrideRule(_StrictBase):
  class MetaOverrideConfig (line 29) | class MetaOverrideConfig(_StrictBase):
  class MetaOverrider (line 35) | class MetaOverrider:
    method __init__ (line 38) | def __init__(self, rules: list[MetaOverrideRule]) -> None:
    method is_empty (line 42) | def is_empty(self) -> bool:
    method from_args_and_config (line 46) | def from_args_and_config(
    method apply_to_meta (line 71) | def apply_to_meta(
  function _parse_cli_override_arg (line 88) | def _parse_cli_override_arg(raw: str) -> tuple[str, str]:
  function _load_yaml_rules (line 98) | def _load_yaml_rules(path: Path) -> list[MetaOverrideRule]:

FILE: python/sglang/srt/debug_utils/comparator/output_formatter.py
  function _render_record_rich (line 48) | def _render_record_rich(
  function _render_record_text (line 66) | def _render_record_text(record: _OutputRecord) -> str:
  function _format_log_lines_rich (line 77) | def _format_log_lines_rich(
  function _format_log_lines_text (line 90) | def _format_log_lines_text(*, errors: list[ErrorLog], infos: list[InfoLo...
  function _format_config_body (line 104) | def _format_config_body(record: ConfigRecord) -> str:
  function _format_config_rich_body (line 108) | def _format_config_rich_body(
  function _format_skip_body (line 118) | def _format_skip_body(record: ComparisonSkipRecord) -> str:
  function _format_skip_rich_body (line 122) | def _format_skip_rich_body(
  function _format_error_body (line 134) | def _format_error_body(record: ComparisonErrorRecord) -> str:
  function _format_error_rich_body (line 142) | def _format_error_rich_body(
  function _format_table_body (line 158) | def _format_table_body(record: _TableRecord) -> str:
  function _format_table_rich_body (line 168) | def _format_table_rich_body(
  function _format_tensor_comparison_body (line 185) | def _format_tensor_comparison_body(record: ComparisonTensorRecord) -> str:
  function _format_tensor_comparison_rich_body (line 194) | def _format_tensor_comparison_rich_body(
  function _format_non_tensor_body (line 209) | def _format_non_tensor_body(record: ComparisonNonTensorRecord) -> str:
  function _format_non_tensor_rich_body (line 220) | def _format_non_tensor_rich_body(
  function _format_summary_body (line 243) | def _format_summary_body(record: SummaryRecord) -> str:
  function _format_summary_rich_body (line 253) | def _format_summary_rich_body(
  function _format_log_body (line 270) | def _format_log_body(record: LogRecord) -> str:
  function _format_aligner_plan (line 277) | def _format_aligner_plan(traced_plan: TracedAlignerPlan) -> str:
  function _format_sub_plan_text (line 302) | def _format_sub_plan_text(traced_sub: TracedSubPlan) -> str:
  function _format_cross_side_plan_text (line 316) | def _format_cross_side_plan_text(plan: AlignerPlan) -> list[str]:

FILE: python/sglang/srt/debug_utils/comparator/output_types.py
  class BaseLog (line 45) | class BaseLog(_StrictBase):
    method to_text (line 49) | def to_text(self) -> str:
  class ErrorLog (line 53) | class ErrorLog(BaseLog):
  class InfoLog (line 57) | class InfoLog(BaseLog):
  function _split_logs (line 64) | def _split_logs(logs: list[BaseLog]) -> tuple[list[ErrorLog], list[InfoL...
  class ReplicatedCheckResult (line 70) | class ReplicatedCheckResult(_StrictBase):
  class BundleFileInfo (line 80) | class BundleFileInfo(_StrictBase):
  class BundleSideInfo (line 89) | class BundleSideInfo(_StrictBase):
  class ShapeSnapshot (line 95) | class ShapeSnapshot(_StrictBase):
  class _OutputRecord (line 100) | class _OutputRecord(_StrictBase):
    method _format_body (line 105) | def _format_body(self) -> str: ...
    method _format_rich_body (line 107) | def _format_rich_body(self, verbosity: Verbosity = "normal") -> Render...
    method to_rich (line 110) | def to_rich(self, verbosity: Verbosity = "normal") -> RenderableType:
    method to_text (line 113) | def to_text(self) -> str:
  class RecordLocation (line 117) | class RecordLocation(_StrictBase):
  class _BaseComparisonRecord (line 121) | class _BaseComparisonRecord(_OutputRecord):
    method _format_location_prefix (line 124) | def _format_location_prefix(self) -> str:
    method _format_location_prefix_rich (line 129) | def _format_location_prefix_rich(self) -> str:
    method _format_location_suffix (line 134) | def _format_location_suffix(self) -> str:
  class ConfigRecord (line 140) | class ConfigRecord(_OutputRecord):
    method _format_body (line 144) | def _format_body(self) -> str:
    method _format_rich_body (line 147) | def _format_rich_body(self, verbosity: Verbosity = "normal") -> Render...
  class ComparisonSkipRecord (line 151) | class ComparisonSkipRecord(_BaseComparisonRecord):
    method category (line 157) | def category(self) -> str:
    method _format_body (line 162) | def _format_body(self) -> str:
    method _format_rich_body (line 165) | def _format_rich_body(self, verbosity: Verbosity = "normal") -> Render...
  class ComparisonErrorRecord (line 169) | class ComparisonErrorRecord(_BaseComparisonRecord):
    method category (line 176) | def category(self) -> str:
    method _format_body (line 179) | def _format_body(self) -> str:
    method _format_rich_body (line 182) | def _format_rich_body(self, verbosity: Verbosity = "normal") -> Render...
  class _TableRecord (line 186) | class _TableRecord(_OutputRecord):
    method _table_title (line 191) | def _table_title(self) -> str: ...
    method _format_body (line 193) | def _format_body(self) -> str:
    method _format_rich_body (line 196) | def _format_rich_body(self, verbosity: Verbosity = "normal") -> Render...
  class RankInfoRecord (line 200) | class RankInfoRecord(_TableRecord):
    method _table_title (line 203) | def _table_title(self) -> str:
  class InputIdsRecord (line 207) | class InputIdsRecord(_TableRecord):
    method _table_title (line 210) | def _table_title(self) -> str:
  class ComparisonTensorRecord (line 214) | class ComparisonTensorRecord(TensorComparisonInfo, _BaseComparisonRecord):
    method category (line 223) | def category(self) -> str:
    method _format_body (line 230) | def _format_body(self) -> str:
    method _format_rich_body (line 233) | def _format_rich_body(self, verbosity: Verbosity = "normal") -> Render...
  class ComparisonNonTensorRecord (line 237) | class ComparisonNonTensorRecord(_BaseComparisonRecord):
    method category (line 247) | def category(self) -> str:
    method _format_body (line 252) | def _format_body(self) -> str:
    method _format_rich_body (line 255) | def _format_rich_body(self, verbosity: Verbosity = "normal") -> Render...
  class SummaryRecord (line 259) | class SummaryRecord(_OutputRecord):
    method _validate_totals (line 268) | def _validate_totals(self) -> "SummaryRecord":
    method _format_body (line 277) | def _format_body(self) -> str:
    method _format_rich_body (line 280) | def _format_rich_body(self, verbosity: Verbosity = "normal") -> Render...
  class LogRecord (line 284) | class LogRecord(_OutputRecord):
    method _format_body (line 287) | def _format_body(self) -> str:
  function _get_any_record_adapter (line 307) | def _get_any_record_adapter() -> TypeAdapter:
  function parse_record_json (line 311) | def parse_record_json(json_str: str | bytes) -> AnyRecord:

FILE: python/sglang/srt/debug_utils/comparator/per_token_visualizer.py
  function generate_per_token_heatmap (line 15) | def generate_per_token_heatmap(
  function _collect_per_token_data (line 32) | def _collect_per_token_data(
  function _render_heatmap (line 44) | def _render_heatmap(

FILE: python/sglang/srt/debug_utils/comparator/preset.py
  function expand_preset (line 23) | def expand_preset(argv: list[str], presets: dict[str, list[str]]) -> lis...
  function _expand_flag (line 38) | def _expand_flag(

FILE: python/sglang/srt/debug_utils/comparator/report_sink.py
  class ReportSink (line 14) | class ReportSink:
    method __init__ (line 17) | def __init__(self) -> None:
    method verbosity (line 25) | def verbosity(self) -> Verbosity:
    method configure (line 28) | def configure(
    method add (line 49) | def add(self, record: _OutputRecord) -> None:
    method close (line 57) | def close(self) -> None:
    method report_path (line 63) | def report_path(self) -> Optional[Path]:
    method _reset (line 66) | def _reset(self) -> None:
    method _get_console (line 73) | def _get_console(self) -> Console:
    method _print_to_stdout (line 78) | def _print_to_stdout(self, record: _OutputRecord) -> None:

FILE: python/sglang/srt/debug_utils/comparator/tensor_comparator/comparator.py
  function compare_tensor_pair (line 26) | def compare_tensor_pair(
  function _compute_tensor_stats (line 95) | def _compute_tensor_stats(x: torch.Tensor) -> TensorStats:
  function _compute_percentiles (line 117) | def _compute_percentiles(x: torch.Tensor, *, include: bool) -> dict[int,...
  function compute_diff (line 124) | def compute_diff(

FILE: python/sglang/srt/debug_utils/comparator/tensor_comparator/formatter.py
  function _esc_shape (line 32) | def _esc_shape(shape: Optional[list[int]]) -> str:
  function _strip_torch_prefix (line 36) | def _strip_torch_prefix(dtype: str) -> str:
  function _fmt_val (line 45) | def _fmt_val(value: float) -> str:
  function _fmt_diff_colored (line 49) | def _fmt_diff_colored(diff: float, *, threshold: float = 1e-2) -> str:
  function _category_marker (line 61) | def _category_marker(category: str) -> tuple[bool, str, str]:
  function _format_stat_line (line 73) | def _format_stat_line(stat_name: str, val_b: float, val_t: float, diff: ...
  function format_comparison (line 85) | def format_comparison(info: TensorComparisonInfo) -> str:
  function format_replicated_checks (line 134) | def format_replicated_checks(checks: list[ReplicatedCheckResult]) -> str:
  function _format_stats_comparison (line 158) | def _format_stats_comparison(baseline: TensorStats, target: TensorStats)...
  function _format_diff (line 182) | def _format_diff(diff: DiffInfo, prefix_text: str = "") -> list[str]:
  function format_comparison_rich (line 209) | def format_comparison_rich(
  function _format_comparison_minimal (line 222) | def _format_comparison_minimal(record: ComparisonTensorRecord) -> str:
  function _format_comparison_normal_or_verbose (line 234) | def _format_comparison_normal_or_verbose(
  function _format_bundle_section (line 348) | def _format_bundle_section(
  function _format_plan_section_rich (line 395) | def _format_plan_section_rich(
  function _format_sub_plan_rich (line 421) | def _format_sub_plan_rich(traced_sub: TracedSubPlan) -> str:
  function _format_cross_side_plan_rich (line 445) | def _format_cross_side_plan_rich(plan: AlignerPlan) -> list[str]:
  function _format_stats_rich (line 466) | def _format_stats_rich(
  function _format_abs_diff_percentiles_rich (line 503) | def _format_abs_diff_percentiles_rich(diff: DiffInfo) -> str:

FILE: python/sglang/srt/debug_utils/comparator/tensor_comparator/types.py
  class TensorStats (line 8) | class TensorStats(_StrictBase):
  class TensorInfo (line 17) | class TensorInfo(_StrictBase):
  class DiffInfo (line 24) | class DiffInfo(_StrictBase):
  class TensorComparisonInfo (line 37) | class TensorComparisonInfo(_StrictBase):

FILE: python/sglang/srt/debug_utils/comparator/utils.py
  function _check_equal_lengths (line 15) | def _check_equal_lengths(**named_lists: list) -> None:
  function auto_descend_dir (line 23) | def auto_descend_dir(directory: Path, label: str) -> Path:
  class _StrictBase (line 63) | class _StrictBase(BaseModel):
  class _FrozenBase (line 67) | class _FrozenBase(BaseModel):
  class Pair (line 71) | class Pair(_FrozenBase, Generic[_T]):
    method map (line 75) | def map(self, fn: Callable[[_T], _U]) -> Pair[_U]:
  function argmax_coord (line 79) | def argmax_coord(x: torch.Tensor) -> Tuple[int, ...]:
  function compute_smaller_dtype (line 84) | def compute_smaller_dtype(
  function try_unify_shape (line 94) | def try_unify_shape(x: torch.Tensor, target_shape: torch.Size) -> torch....
  function calc_rel_diff (line 106) | def calc_rel_diff(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
  function calc_per_token_rel_diff (line 113) | def calc_per_token_rel_diff(
  function compute_exit_code (line 137) | def compute_exit_code(
  function _is_all_match_pattern (line 161) | def _is_all_match_pattern(*, pattern: Optional[str], strings: list[str])...

FILE: python/sglang/srt/debug_utils/comparator/visualizer/figure.py
  class _PanelContext (line 18) | class _PanelContext:
  class _Panel (line 26) | class _Panel:
  function _build_panels (line 32) | def _build_panels() -> list[_Panel]:
  function generate_comparison_figure (line 54) | def generate_comparison_figure(

FILE: python/sglang/srt/debug_utils/comparator/visualizer/panels.py
  function _draw_baseline_heatmap (line 21) | def _draw_baseline_heatmap(
  function _draw_target_heatmap (line 30) | def _draw_target_heatmap(
  function _draw_diff_heatmap (line 39) | def _draw_diff_heatmap(
  function _draw_diff_histogram (line 47) | def _draw_diff_histogram(
  function _draw_hist2d (line 57) | def _draw_hist2d(axes: np.ndarray, row_idx: int, ctx: _PanelContext) -> ...
  function _draw_sampled (line 68) | def _draw_sampled(axes: np.ndarray, row_idx: int, ctx: _PanelContext) ->...
  function _draw_heatmap_pair (line 82) | def _draw_heatmap_pair(
  function _draw_histogram_pair (line 104) | def _draw_histogram_pair(
  function _draw_scatter_hist2d (line 130) | def _draw_scatter_hist2d(
  function _draw_scatter_sampled (line 190) | def _draw_scatter_sampled(

FILE: python/sglang/srt/debug_utils/comparator/visualizer/preprocessing.py
  function _preprocess_tensor (line 15) | def _preprocess_tensor(tensor: torch.Tensor) -> torch.Tensor:
  function _reshape_to_balanced_aspect (line 27) | def _reshape_to_balanced_aspect(
  function _to_log10 (line 54) | def _to_log10(t: torch.Tensor) -> torch.Tensor:
  function _format_log_ticks (line 58) | def _format_log_ticks(ax: object, axis: str = "both") -> None:
  function _format_stats (line 70) | def _format_stats(name: str, t: torch.Tensor) -> str:
  function _safe_hist (line 78) | def _safe_hist(
  function _maybe_downsample_numpy (line 88) | def _maybe_downsample_numpy(
  function _sanitize_filename (line 100) | def _sanitize_filename(name: str) -> str:

FILE: python/sglang/srt/debug_utils/cuda_coredump.py
  function is_enabled (line 27) | def is_enabled() -> bool:
  function get_dump_dir (line 31) | def get_dump_dir() -> str:
  function _inject_env (line 35) | def _inject_env():
  function cleanup_dump_dir (line 58) | def cleanup_dump_dir():
  function report (line 65) | def report():

FILE: python/sglang/srt/debug_utils/dump_comparator.py
  function main (line 20) | def main(args):
  function check_tensor_pair (line 87) | def check_tensor_pair(
  function _compute_and_print_diff (line 188) | def _compute_and_print_diff(
  function _argmax_coord (line 215) | def _argmax_coord(x: torch.Tensor) -> tuple:
  function _compute_smaller_dtype (line 220) | def _compute_smaller_dtype(dtype_a, dtype_b):
  function _try_unify_shape (line 228) | def _try_unify_shape(x: torch.Tensor, target_shape):
  function _calc_rel_diff (line 242) | def _calc_rel_diff(x: torch.Tensor, y: torch.Tensor):
  function _load_object (line 249) | def _load_object(path):
  function _comparison_preprocessor (line 265) | def _comparison_preprocessor(x_baseline, x_target, name):
  class TensorDimDesc (line 271) | class TensorDimDesc:
  function _get_tensor_dim_descs (line 278) | def _get_tensor_dim_descs() -> List[TensorDimDesc]:

FILE: python/sglang/srt/debug_utils/dump_loader.py
  function parse_meta_from_filename (line 13) | def parse_meta_from_filename(path: Path) -> Dict[str, Any]:
  class ValueWithMeta (line 27) | class ValueWithMeta:
    method load (line 32) | def load(path: Path) -> "ValueWithMeta":
  function _unwrap_dict_format (line 51) | def _unwrap_dict_format(obj: Any) -> Tuple[Any, Dict[str, Any]]:
  class DumpLoader (line 59) | class DumpLoader:
    method __init__ (line 60) | def __init__(self):
    method enable (line 69) | def enable(self):
    method load (line 72) | def load(self, name, **kwargs):
  function read_meta (line 95) | def read_meta(directory):
  function _add_duplicate_index (line 123) | def _add_duplicate_index(df: pl.DataFrame) -> pl.DataFrame:
  function filter_rows (line 132) | def filter_rows(df: pl.DataFrame, conditions: Dict[str, Any]) -> list[di...
  function find_row (line 147) | def find_row(df: pl.DataFrame, conditions: Dict[str, Any]):
  function _cast_to_polars_dtype (line 155) | def _cast_to_polars_dtype(value, target_dtype):
  function read_tokenizer_path (line 168) | def read_tokenizer_path(directory: Path) -> Optional[str]:

FILE: python/sglang/srt/debug_utils/dumper.py
  class _BaseConfig (line 27) | class _BaseConfig(ABC):
    method __post_init__ (line 28) | def __post_init__(self) -> None:
    method _verify_types (line 31) | def _verify_types(self) -> None:
    method _env_prefix (line 47) | def _env_prefix(cls) -> str: ...
    method _env_name (line 50) | def _env_name(cls, field_name: str) -> str:
    method from_env (line 54) | def from_env(cls) -> "_BaseConfig":
    method with_defaults (line 62) | def with_defaults(self, **kwargs) -> "_BaseConfig":
    method _unwrap_type (line 72) | def _unwrap_type(hint) -> type:
    method _parse_env_field (line 79) | def _parse_env_field(cls, env_name: str, default):
    method _parse_env_value (line 83) | def _parse_env_value(raw, default):
    method from_kv_pairs (line 93) | def from_kv_pairs(cls, pairs: Optional[List[str]]) -> "_BaseConfig":
    method _kv_pairs_to_dict (line 97) | def _kv_pairs_to_dict(cls, pairs: Optional[List[str]]) -> dict:
  class DumperConfig (line 127) | class DumperConfig(_BaseConfig):
    method _env_prefix (line 145) | def _env_prefix(cls) -> str:
    method server_port_parsed (line 150) | def server_port_parsed(self) -> Optional[Union[int, Literal["reuse"]]]:
  class _DumperState (line 164) | class _DumperState:
  class _Dumper (line 172) | class _Dumper:
    method __init__ (line 202) | def __init__(self, *, config: DumperConfig):
    method may_enable (line 210) | def may_enable(self) -> bool:
    method step (line 213) | def step(self):
    method dump (line 227) | def dump(
    method dump_model (line 259) | def dump_model(
    method dump_dict (line 279) | def dump_dict(self, name_prefix, data, save: bool = True, **kwargs):
    method set_ctx (line 284) | def set_ctx(self, **kwargs):
    method ctx (line 297) | def ctx(
    method apply_source_patches (line 330) | def apply_source_patches(self) -> None:
    method register_non_intrusive_dumper (line 349) | def register_non_intrusive_dumper(
    method configure (line 363) | def configure(self, **kwargs) -> None:
    method configure_default (line 366) | def configure_default(self, **kwargs) -> None:
    method reset (line 369) | def reset(self) -> None:
    method capture_output (line 376) | def capture_output(self):
    method get_state (line 384) | def get_state(self) -> dict:
    method _http_manager (line 392) | def _http_manager(self) -> Optional["_DumperHttpManager"]:
    method _dump_inner (line 399) | def _dump_inner(
    method _register_dump_grad_hook (line 467) | def _register_dump_grad_hook(
    method _dump_single (line 500) | def _dump_single(
    method _static_meta (line 564) | def _static_meta(self) -> dict:
    method _ensure_exp_name (line 567) | def _ensure_exp_name(self):
  class _NonIntrusiveDumper (line 579) | class _NonIntrusiveDumper:
    method __init__ (line 583) | def __init__(
    method remove (line 612) | def remove(self) -> None:
    method _detect_module_ctx (line 618) | def _detect_module_ctx(
    method _register_ctx_hooks (line 630) | def _register_ctx_hooks(self, module: "torch.nn.Module", *, ctx: dict)...
    method _make_forward_pre_hook (line 645) | def _make_forward_pre_hook(self, *, module_name: str, is_root: bool):
    method _make_forward_hook (line 661) | def _make_forward_hook(self, *, module_name: str, is_root: bool):
    method _dump_value (line 668) | def _dump_value(
    method _convert_value (line 682) | def _convert_value(value, *, skip_forward_batch: bool = False) -> dict...
  function _register_forward_hook_or_replace_fn (line 700) | def _register_forward_hook_or_replace_fn(
  function _torch_save (line 748) | def _torch_save(value, path: str):
  function _map_tensor (line 764) | def _map_tensor(value, fn: Callable[[torch.Tensor], torch.Tensor]):
  function _clone_if_view (line 772) | def _clone_if_view(value):
  function _strip_parameter (line 781) | def _strip_parameter(value):
  function _collective_with_timeout (line 790) | def _collective_with_timeout(fn, operation_name: str, timeout_seconds: i...
  function _get_default_exp_name (line 810) | def _get_default_exp_name(timeout_seconds: int = 60):
  function _cleanup_old_dumps (line 837) | def _cleanup_old_dumps(base_dir: Path, exp_name: Optional[str] = None) -...
  function _get_rank (line 857) | def _get_rank():
  function _get_world_size (line 864) | def _get_world_size():
  function _obj_to_dict (line 871) | def _obj_to_dict(obj):
  function _materialize_value (line 888) | def _materialize_value(value):
  function _format_tags (line 894) | def _format_tags(kwargs: dict) -> str:
  class _DefaultNoneDict (line 898) | class _DefaultNoneDict(dict):
    method __missing__ (line 901) | def __missing__(self, key: str):
  function _evaluate_filter (line 908) | def _evaluate_filter(filter_expr: str, tags: dict[str, Any]) -> bool:
  function _deepcopy_or_clone (line 919) | def _deepcopy_or_clone(x):
  function _compute_static_meta (line 928) | def _compute_static_meta():
  class _DumperHttpManager (line 950) | class _DumperHttpManager:
    method __init__ (line 951) | def __init__(self, dumper: "_Dumper"):
    method handle_request (line 974) | def handle_request(self, *, method: str, body: dict[str, Any]) -> list...
    method _handle_request_inner (line 979) | def _handle_request_inner(self, *, method: str, body: dict[str, Any]) ...
  function _start_http_server (line 995) | def _start_http_server(*, prefix: str, target: object, http_port: int):
  function _make_http_handler (line 1002) | def _make_http_handler(*, prefix: str, target):
  function _create_zmq_rpc_broadcast (line 1034) | def _create_zmq_rpc_broadcast(
  class _ZmqRpcHandle (line 1086) | class _ZmqRpcHandle:
    method __init__ (line 1089) | def __init__(self, socket, debug_name: str):
    method __getattr__ (line 1093) | def __getattr__(self, method_name: str):
  class _RpcBroadcastBase (line 1112) | class _RpcBroadcastBase:
    method __getattr__ (line 1115) | def __getattr__(self, method_name: str):
    method __init__ (line 1118) | def __init__(self, handles: List[_ZmqRpcHandle]):
  class _ZmqRpcBroadcast (line 1122) | class _ZmqRpcBroadcast(_RpcBroadcastBase):
    method __init__ (line 1128) | def __init__(self, handles: List[_ZmqRpcHandle]):
    method __getattr__ (line 1131) | def __getattr__(self, method_name: str):
  function _get_local_ip_by_remote (line 1144) | def _get_local_ip_by_remote() -> Optional[str]:
  class _RecomputeStatus (line 1176) | class _RecomputeStatus(enum.Enum):
    method to_pseudo_parallel_meta (line 1181) | def to_pseudo_parallel_meta(self) -> dict[str, Any]:
  class _FrameworkPlugin (line 1190) | class _FrameworkPlugin(ABC):
    method name (line 1193) | def name(self) -> str: ...
    method collect_parallel_info (line 1196) | def collect_parallel_info(self) -> dict: ...
    method convert_value (line 1199) | def convert_value(
    method detect_layer_id (line 1206) | def detect_layer_id(self, module: "torch.nn.Module") -> Optional[int]:
    method core_fields (line 1210) | def core_fields(self) -> frozenset[str]:
    method get_tokenizer_path (line 1213) | def get_tokenizer_path(self) -> Optional[str]:
    method detect_recompute_status (line 1216) | def detect_recompute_status(self) -> _RecomputeStatus:
  class _SGLangPlugin (line 1220) | class _SGLangPlugin(_FrameworkPlugin):
    method name (line 1234) | def name(self) -> str:
    method collect_parallel_info (line 1237) | def collect_parallel_info(self) -> dict:
    method convert_value (line 1272) | def convert_value(
    method detect_layer_id (line 1297) | def detect_layer_id(self, module: "torch.nn.Module") -> Optional[int]:
    method core_fields (line 1302) | def core_fields(self) -> frozenset[str]:
    method get_tokenizer_path (line 1307) | def get_tokenizer_path(self) -> Optional[str]:
  class _MegatronPlugin (line 1323) | class _MegatronPlugin(_FrameworkPlugin):
    method name (line 1332) | def name(self) -> str:
    method collect_parallel_info (line 1335) | def collect_parallel_info(self) -> dict:
    method convert_value (line 1385) | def convert_value(
    method detect_layer_id (line 1398) | def detect_layer_id(self, module: "torch.nn.Module") -> Optional[int]:
    method core_fields (line 1403) | def core_fields(self) -> frozenset[str]:
    method detect_recompute_status (line 1408) | def detect_recompute_status(self) -> _RecomputeStatus:
  function _detect_recompute_status (line 1426) | def _detect_recompute_status() -> _RecomputeStatus:
  function get_truncated_value (line 1443) | def get_truncated_value(value):
  function get_tensor_info (line 1460) | def get_tensor_info(x):

FILE: python/sglang/srt/debug_utils/log_parser.py
  function parse (line 19) | def parse(lines):

FILE: python/sglang/srt/debug_utils/model_truncator.py
  function main (line 14) | def main(args):
  function _maybe_snapshot_download (line 54) | def _maybe_snapshot_download(path):
  function _transform_json (line 60) | def _transform_json(dir_input, dir_output, filename, fn):
  function _transform_config (line 67) | def _transform_config(args, config_json):
  function _transform_safetensors_index (line 71) | def _transform_safetensors_index(args, safetensors_index):
  function _transform_safetensors_file (line 79) | def _transform_safetensors_file(
  function _filter_tensor_name (line 88) | def _filter_tensor_name(args, tensor_name: str):

FILE: python/sglang/srt/debug_utils/schedule_simulator/data_source/data_loader.py
  function load_from_request_logger (line 8) | def load_from_request_logger(file_path: Union[str, Path]) -> List[SimReq...

FILE: python/sglang/srt/debug_utils/schedule_simulator/data_source/data_synthesis.py
  function generate_random_requests (line 7) | def generate_random_requests(
  function generate_gsp_requests (line 36) | def generate_gsp_requests(
  function _random_len (line 77) | def _random_len(full_len: int, range_ratio: float) -> int:

FILE: python/sglang/srt/debug_utils/schedule_simulator/entrypoint.py
  function create_arg_parser (line 31) | def create_arg_parser() -> argparse.ArgumentParser:
  function _load_requests (line 90) | def _load_requests(args: argparse.Namespace) -> List[SimRequest]:
  function _create_router (line 115) | def _create_router(name: str, total_gpus: int):
  function _create_scheduler (line 125) | def _create_scheduler(name: str):
  function main (line 131) | def main(args: argparse.Namespace) -> SimulationResult:

FILE: python/sglang/srt/debug_utils/schedule_simulator/gpu_state.py
  class StepRecord (line 8) | class StepRecord:
  class GPUState (line 19) | class GPUState:
    method batch_size (line 25) | def batch_size(self) -> int:
    method total_attention_compute (line 28) | def total_attention_compute(self) -> int:
    method total_seq_len (line 31) | def total_seq_len(self, extra_reqs: Optional[List[SimRequest]] = None)...
    method is_valid (line 41) | def is_valid(self) -> bool:
    method start_request (line 44) | def start_request(self, req: SimRequest) -> None:
    method evict_request (line 49) | def evict_request(self, req: SimRequest) -> None:
    method execute_step (line 54) | def execute_step(self) -> None:
    method get_step_record (line 61) | def get_step_record(self, step: int) -> StepRecord:

FILE: python/sglang/srt/debug_utils/schedule_simulator/metrics.py
  class MetricRecorder (line 7) | class MetricRecorder(ABC):
    method on_step_end (line 9) | def on_step_end(self, step: int, gpu_states: List[GPUState]) -> None: ...
    method get_summary (line 12) | def get_summary(self) -> Dict[str, Any]: ...
  class BalancednessRecorder (line 15) | class BalancednessRecorder(MetricRecorder):
    method __init__ (line 16) | def __init__(self, name: str, value_fn: Callable[[GPUState], float]):
    method on_step_end (line 21) | def on_step_end(self, step: int, gpu_states: List[GPUState]) -> None:
    method get_summary (line 28) | def get_summary(self) -> Dict[str, Any]:
  function BatchSizeBalancednessRecorder (line 38) | def BatchSizeBalancednessRecorder() -> BalancednessRecorder:
  function AttentionComputeBalancednessRecorder (line 42) | def AttentionComputeBalancednessRecorder() -> BalancednessRecorder:
  class AvgBatchSizeRecorder (line 48) | class AvgBatchSizeRecorder(MetricRecorder):
    method __init__ (line 49) | def __init__(self):
    method on_step_end (line 53) | def on_step_end(self, step: int, gpu_states: List[GPUState]) -> None:
    method get_summary (line 58) | def get_summary(self) -> Dict[str, Any]:

FILE: python/sglang/srt/debug_utils/schedule_simulator/request.py
  class SimRequest (line 6) | class SimRequest:
    method seq_len (line 14) | def seq_len(self) -> int:
    method is_finished (line 17) | def is_finished(self) -> bool:

FILE: python/sglang/srt/debug_utils/schedule_simulator/routers/base.py
  class RouterPolicy (line 6) | class RouterPolicy(ABC):
    method route (line 8) | def route(self, incoming_request: SimRequest) -> int: ...

FILE: python/sglang/srt/debug_utils/schedule_simulator/routers/random_router.py
  class RandomRouter (line 7) | class RandomRouter(RouterPolicy):
    method __init__ (line 8) | def __init__(self, num_gpus: int):
    method route (line 11) | def route(self, incoming_request: SimRequest) -> int:

FILE: python/sglang/srt/debug_utils/schedule_simulator/routers/round_robin_router.py
  class RoundRobinRouter (line 5) | class RoundRobinRouter(RouterPolicy):
    method __init__ (line 6) | def __init__(self, num_gpus: int):
    method route (line 10) | def route(self, incoming_request: SimRequest) -> int:

FILE: python/sglang/srt/debug_utils/schedule_simulator/routers/sticky_router.py
  class StickyRouter (line 8) | class StickyRouter(RouterPolicy):
    method __init__ (line 9) | def __init__(self, num_gpus: int):
    method _assign_gpu (line 13) | def _assign_gpu(self) -> int:
    method route (line 16) | def route(self, incoming_request: SimRequest) -> int:

FILE: python/sglang/srt/debug_utils/schedule_simulator/schedulers/base.py
  class SchedulerPolicy (line 8) | class SchedulerPolicy(ABC):
    method schedule (line 10) | def schedule(self, gpu_state: "GPUState") -> None: ...

FILE: python/sglang/srt/debug_utils/schedule_simulator/schedulers/fifo_scheduler.py
  class FIFOScheduler (line 9) | class FIFOScheduler(SchedulerPolicy):
    method schedule (line 10) | def schedule(self, gpu_state: "GPUState") -> None:

FILE: python/sglang/srt/debug_utils/schedule_simulator/simulator.py
  class SimulationResult (line 12) | class SimulationResult:
  class Simulator (line 17) | class Simulator:
    method __init__ (line 18) | def __init__(
    method run (line 40) | def run(self, requests: List[SimRequest]) -> SimulationResult:
    method _should_stop (line 65) | def _should_stop(self) -> bool:
    method _route_requests (line 76) | def _route_requests(self, incoming_requests: List[SimRequest]) -> None:
    method _schedule_all_gpus (line 82) | def _schedule_all_gpus(self) -> None:
    method _execute_step (line 90) | def _execute_step(self) -> None:
    method _log_step (line 94) | def _log_step(self) -> None:
    method _record_metrics (line 108) | def _record_metrics(self) -> None:
    method _get_summary (line 112) | def _get_summary(self) -> Dict[str, Any]:
  function _format_ids (line 116) | def _format_ids(requests: List[SimRequest], limit: int = 5) -> str:

FILE: python/sglang/srt/debug_utils/source_patcher/code_patcher.py
  function apply_patches_from_config (line 19) | def apply_patches_from_config(
  class CodePatcher (line 41) | class CodePatcher:
    method __init__ (line 44) | def __init__(self, *, patches: list[PatchSpec]) -> None:
    method __enter__ (line 48) | def __enter__(self) -> "CodePatcher":
    method __exit__ (line 52) | def __exit__(
  function patch_function (line 63) | def patch_function(
  function _apply_specs (line 101) | def _apply_specs(specs: list[PatchSpec]) -> list[PatchState]:
  function _inject_preamble (line 113) | def _inject_preamble(*, config: PatchConfig, extra_imports: list[str]) -...
  function _insert_preamble (line 130) | def _insert_preamble(*, source: str, preamble: str) -> str:
  function _find_signature_end (line 150) | def _find_signature_end(lines: list[str]) -> int:
  function _resolve_target (line 158) | def _resolve_target(qualified_name: str) -> Callable[..., Any]:

FILE: python/sglang/srt/debug_utils/source_patcher/source_editor.py
  function apply_edits (line 4) | def apply_edits(*, source: str, edits: list[EditSpec]) -> str:
  function _apply_single_edit (line 15) | def _apply_single_edit(*, source: str, edit: EditSpec) -> str:
  function _resolve_replacement (line 44) | def _resolve_replacement(*, edit: EditSpec, match_text: str) -> str:
  function _find_match (line 53) | def _find_match(*, source_lines: list[str], match_lines: list[str]) -> int:
  function _realign_replacement (line 81) | def _realign_replacement(
  function _leading_spaces (line 107) | def _leading_spaces(line: str) -> int:

FILE: python/sglang/srt/debug_utils/source_patcher/types.py
  class PatchApplicationError (line 8) | class PatchApplicationError(Exception):
  class _StrictBase (line 12) | class _StrictBase(BaseModel):
  class EditSpec (line 16) | class EditSpec(_StrictBase):
    method _check_modes_mutually_exclusive (line 31) | def _check_modes_mutually_exclusive(self) -> "EditSpec":
  class PatchSpec (line 45) | class PatchSpec(_StrictBase):
  class PatchConfig (line 51) | class PatchConfig(_StrictBase):
  class PatchState (line 55) | class PatchState:
    method __init__ (line 56) | def __init__(
    method restore (line 62) | def restore(self) -> None:

FILE: python/sglang/srt/debug_utils/tensor_dump_forward_hook.py
  class TensorDumper (line 26) | class TensorDumper:
    method __init__ (line 27) | def __init__(
    method get_dump_dir (line 46) | def get_dump_dir(self):
    method add_tensor (line 49) | def add_tensor(self, name, tensor_item):
    method dump_current_tensors (line 78) | def dump_current_tensors(self):
    method _add_hook_recursive (line 89) | def _add_hook_recursive(
    method _dump_hook (line 127) | def _dump_hook(self, tensor_name, do_dump):
  function register_forward_hook_for_model (line 141) | def register_forward_hook_for_model(

FILE: python/sglang/srt/debug_utils/text_comparator.py
  function main (line 16) | def main(args):
  function _compute_df_input_mode_simple_evals (line 74) | def _compute_df_input_mode_simple_evals(args):
  function _compute_df_input_one_mode_simple_evals (line 83) | def _compute_df_input_one_mode_simple_evals(path, category, trial_index):
  function _compute_id_from_object (line 107) | def _compute_id_from_object(obj):
  function _compute_df_raw (line 114) | def _compute_df_raw(args):
  function _get_file_infos (line 127) | def _get_file_infos(args):
  function _read_df_raw (line 138) | def _read_df_raw(path: str, category: str, trial_index: int):
  function _transform_df_input (line 144) | def _transform_df_input(df: pl.DataFrame):
  function _compute_df_meta (line 173) | def _compute_df_meta(df_input: pl.DataFrame):
  function _handle_one_prompt (line 188) | def _handle_one_prompt(df_one_prompt: pl.DataFrame):
  function _compute_str_prefix_len (line 216) | def _compute_str_prefix_len(a: str, b: str) -> int:

FILE: python/sglang/srt/disaggregation/ascend/conn.py
  class AscendKVManager (line 21) | class AscendKVManager(MooncakeKVManager):
    method init_engine (line 22) | def init_engine(self):
    method register_buffer_to_engine (line 31) | def register_buffer_to_engine(self):
    method send_kvcache (line 43) | def send_kvcache(
  class AscendKVSender (line 134) | class AscendKVSender(MooncakeKVSender):
  class AscendKVReceiver (line 138) | class AscendKVReceiver(MooncakeKVReceiver):
  class AscendKVBootstrapServer (line 142) | class AscendKVBootstrapServer(MooncakeKVBootstrapServer):

FILE: python/sglang/srt/disaggregation/ascend/transfer_engine.py
  class AscendTransferEngine (line 24) | class AscendTransferEngine(MooncakeTransferEngine):
    method __init__ (line 26) | def __init__(
    method initialize (line 56) | def initialize(self) -> None:
    method batch_register (line 84) | def batch_register(self, ptrs: List[int], lengths: List[int]):
    method _get_transfer_protocol (line 94) | def _get_transfer_protocol():

FILE: python/sglang/srt/disaggregation/base/conn.py
  class KVArgs (line 15) | class KVArgs:
  class KVPoll (line 42) | class KVPoll:
  class BaseKVManager (line 50) | class BaseKVManager(ABC):
    method __init__ (line 54) | def __init__(
    method register_to_bootstrap (line 63) | def register_to_bootstrap(self):
  class BaseKVSender (line 68) | class BaseKVSender(ABC):
    method __init__ (line 71) | def __init__(
    method init (line 81) | def init(self, num_kv_indices: int, aux_index: Optional[int] = None):
    method send (line 88) | def send(
    method poll (line 99) | def poll(self) -> KVPoll:
    method failure_exception (line 106) | def failure_exception(self):
  class BaseKVReceiver (line 113) | class BaseKVReceiver(ABC):
    method __init__ (line 116) | def __init__(
    method init (line 124) | def init(
    method poll (line 136) | def poll(self) -> KVPoll:
    method failure_exception (line 143) | def failure_exception(self):
    method clear (line 149) | def clear(self):
    method abort (line 155) | def abort(self):
  class BaseKVBootstrapServer (line 162) | class BaseKVBootstrapServer(ABC):
    method __init__ (line 164) | def __init__(self, host: str, port: int): ...

FILE: python/sglang/srt/disaggregation/common/conn.py
  class PrefillServerInfo (line 48) | class PrefillServerInfo:
    method __post_init__ (line 66) | def __post_init__(self):
  class PrefillRankInfo (line 79) | class PrefillRankInfo:
    method __post_init__ (line 83) | def __post_init__(self):
  class CommonKVManager (line 88) | class CommonKVManager(BaseKVManager):
    method __init__ (line 89) | def __init__(
    method check_status (line 177) | def check_status(self, bootstrap_room: int) -> KVPoll:
    method update_status (line 180) | def update_status(self, bootstrap_room: int, status: KVPoll):
    method record_failure (line 191) | def record_failure(self, bootstrap_room: int, failure_reason: str):
    method try_ensure_parallel_info (line 195) | def try_ensure_parallel_info(self, bootstrap_addr: str) -> bool:
    method _resolve_rank_mapping (line 240) | def _resolve_rank_mapping(self, info: PrefillServerInfo) -> None:
    method register_to_bootstrap (line 319) | def register_to_bootstrap(self):
    method _connect (line 363) | def _connect(self, endpoint: str, is_ipv6: bool = False):
    method get_mha_kv_ptrs_with_pp (line 370) | def get_mha_kv_ptrs_with_pp(
    method get_mla_kv_ptrs_with_pp (line 403) | def get_mla_kv_ptrs_with_pp(
  class CommonKVSender (line 417) | class CommonKVSender(BaseKVSender):
    method __init__ (line 418) | def __init__(
    method _register_prefill_dp_rank (line 444) | def _register_prefill_dp_rank(self):
    method init (line 460) | def init(self, num_kv_indices: int, aux_index: Optional[int] = None):
    method send (line 467) | def send(
    method poll (line 474) | def poll(self) -> KVPoll:
    method failure_exception (line 477) | def failure_exception(self):
  class CommonKVReceiver (line 481) | class CommonKVReceiver(BaseKVReceiver):
    method __init__ (line 487) | def __init__(
    method _setup_bootstrap_infos (line 529) | def _setup_bootstrap_infos(self):
    method _get_bootstrap_info_from_server (line 583) | def _get_bootstrap_info_from_server(
    method query_prefill_dp_ranks (line 603) | def query_prefill_dp_ranks(
    method _connect (line 626) | def _connect(cls, endpoint: str, is_ipv6: bool = False):
    method _connect_to_bootstrap_server (line 638) | def _connect_to_bootstrap_server(cls, bootstrap_info: dict):
    method _register_kv_args (line 645) | def _register_kv_args(self):
    method failure_exception (line 648) | def failure_exception(self):
  class CommonKVBootstrapServer (line 652) | class CommonKVBootstrapServer(BaseKVBootstrapServer):
    method __init__ (line 653) | def __init__(self, host: str, port: int):
    method run (line 680) | def run(self):
    method _is_ready (line 683) | def _is_ready(self) -> bool:
    method _setup_routes (line 697) | def _setup_routes(self):
    method _handle_health_check (line 703) | async def _handle_health_check(self, request):
    method _handle_route (line 706) | async def _handle_route(self, request: web.Request):
    method _handle_route_put (line 717) | async def _handle_route_put(self, request: web.Request):
    method _handle_route_get (line 784) | async def _handle_route_get(self, request: web.Request):
    method _handle_register_dp_rank (line 846) | async def _handle_register_dp_rank(self, request: web.Request):
    method _handle_query_dp_ranks (line 858) | async def _handle_query_dp_ranks(self, request: web.Request):
    method _cleanup_expired_entries (line 869) | async def _cleanup_expired_entries(self):
    method _run_server (line 887) | def _run_server(self):
    method close (line 915) | def close(self):
    method poll (line 925) | def poll(self) -> KVPoll: ...

FILE: python/sglang/srt/disaggregation/common/utils.py
  class FastQueue (line 9) | class FastQueue:
    method __init__ (line 10) | def __init__(self):
    method put (line 14) | def put(self, item):
    method get (line 20) | def get(self):
  function group_concurrent_contiguous (line 28) | def group_concurrent_contiguous(

FILE: python/sglang/srt/disaggregation/decode.py
  function _is_fake_transfer (line 81) | def _is_fake_transfer(req: Req, server_args: ServerArgs) -> bool:
  function _bootstrap_addr (line 88) | def _bootstrap_addr(req: Req) -> str:
  class DecodeReqToTokenPool (line 93) | class DecodeReqToTokenPool:
    method __init__ (line 105) | def __init__(
    method write (line 130) | def write(self, indices, values):
    method available_size (line 133) | def available_size(self):
    method alloc (line 136) | def alloc(self, reqs: List["Req"]) -> Optional[List[int]]:
    method free (line 159) | def free(self, req: "Req"):
    method clear (line 164) | def clear(self):
  class HybridMambaDecodeReqToTokenPool (line 168) | class HybridMambaDecodeReqToTokenPool(HybridReqToTokenPool):
    method __init__ (line 170) | def __init__(
    method clear (line 207) | def clear(self):
  class DecodeRequest (line 213) | class DecodeRequest:
    method seqlen (line 220) | def seqlen(self) -> int:
  class DecodePreallocQueue (line 224) | class DecodePreallocQueue:
    method __init__ (line 229) | def __init__(
    method _init_kv_manager (line 287) | def _init_kv_manager(self) -> CommonKVManager:
    method add (line 356) | def add(self, req: Req, is_retracted: bool = False) -> None:
    method _resolve_prefill_dp_rank (line 377) | def _resolve_prefill_dp_rank(self, req: Req) -> Optional[int]:
    method _create_receiver_and_enqueue (line 393) | def _create_receiver_and_enqueue(self, req: Req, prefill_dp_rank: int)...
    method _check_if_req_exceed_kv_capacity (line 412) | def _check_if_req_exceed_kv_capacity(self, req: Req) -> bool:
    method extend (line 421) | def extend(self, reqs: List[Req], is_retracted: bool = False) -> None:
    method resume_retracted_reqs (line 426) | def resume_retracted_reqs(
    method _update_handshake_waiters (line 468) | def _update_handshake_waiters(
    method _ensure_prefill_info (line 507) | def _ensure_prefill_info(
    method _resolve_pending_reqs (line 554) | def _resolve_pending_reqs(self) -> None:
    method pop_preallocated (line 596) | def pop_preallocated(
    method num_tokens_pre_allocated (line 737) | def num_tokens_pre_allocated(self):
    method _allocatable_tokens (line 742) | def _allocatable_tokens(
    method _pre_alloc (line 792) | def _pre_alloc(self, req: Req) -> torch.Tensor:
  class DecodeTransferQueue (line 830) | class DecodeTransferQueue:
    method __init__ (line 835) | def __init__(
    method add (line 853) | def add(self, decode_req: DecodeRequest) -> None:
    method extend (line 856) | def extend(self, decode_reqs: List[DecodeRequest]) -> None:
    method _commit_transfer_to_req (line 859) | def _commit_transfer_to_req(self, decode_req: DecodeRequest) -> bool:
    method pop_transferred (line 940) | def pop_transferred(self, rids_to_check: Optional[List[str]] = None) -...
  class SchedulerDisaggregationDecodeMixin (line 1010) | class SchedulerDisaggregationDecodeMixin:
    method event_loop_normal_disagg_decode (line 1013) | def event_loop_normal_disagg_decode(self: Scheduler):
    method event_loop_overlap_disagg_decode (line 1039) | def event_loop_overlap_disagg_decode(self: Scheduler):
    method _run_batch_prebuilt (line 1075) | def _run_batch_prebuilt(
    method get_next_disagg_decode_batch_to_run (line 1086) | def get_next_disagg_decode_batch_to_run(
    method get_new_prebuilt_batch (line 1114) | def get_new_prebuilt_batch(self: Scheduler) -> Optional[ScheduleBatch]:
    method process_decode_queue (line 1166) | def process_decode_queue(self: Scheduler):

FILE: python/sglang/srt/disaggregation/decode_kvcache_offload_manager.py
  class DecodeKVCacheOffloadManager (line 34) | class DecodeKVCacheOffloadManager:
    method __init__ (line 37) | def __init__(
    method offload_kv_cache (line 109) | def offload_kv_cache(self, req) -> bool:
    method check_offload_progress (line 182) | def check_offload_progress(self):
    method _check_offload_progress (line 202) | def _check_offload_progress(self, finish_count):
    method _release_finished_req (line 237) | def _release_finished_req(self, req: Req, start_offset: int):
    method _check_backup_progress (line 261) | def _check_backup_progress(self, finish_count):
    method _trigger_backup (line 275) | def _trigger_backup(
    method _compute_prefix_hash (line 288) | def _compute_prefix_hash(self, tokens, prior_hash=""):
    method finalize_release_on_finish (line 297) | def finalize_release_on_finish(self, req: Req):

FILE: python/sglang/srt/disaggregation/decode_schedule_batch_mixin.py
  class ScheduleBatchDisaggregationDecodeMixin (line 20) | class ScheduleBatchDisaggregationDecodeMixin:
    method prepare_for_prebuilt (line 22) | def prepare_for_prebuilt(self: ScheduleBatch):
    method process_prebuilt (line 103) | def process_prebuilt(

FILE: python/sglang/srt/disaggregation/encode_grpc_server.py
  class EncoderHealthServicer (line 41) | class EncoderHealthServicer(health_pb2_grpc.HealthServicer):
    method __init__ (line 50) | def __init__(self):
    method set_serving (line 53) | def set_serving(self):
    method set_not_serving (line 56) | def set_not_serving(self):
    method Check (line 59) | async def Check(self, request, context) -> health_pb2.HealthCheckRespo...
    method Watch (line 68) | async def Watch(self, request, context):
  class SGLangEncoderServer (line 72) | class SGLangEncoderServer(SGLangEncoderServicer):
    method __init__ (line 77) | def __init__(
    method Encode (line 87) | async def Encode(
    method Send (line 164) | async def Send(
    method SchedulerReceiveUrl (line 187) | async def SchedulerReceiveUrl(
  function serve_grpc_encoder (line 208) | async def serve_grpc_encoder(server_args: ServerArgs):

FILE: python/sglang/srt/disaggregation/encode_receiver.py
  function _grpc_target (line 40) | def _grpc_target(url: str) -> str:
  function _normalize_embedding_ports (line 48) | def _normalize_embedding_ports(embedding_port):
  function _grpc_scheduler_receive_url (line 56) | def _grpc_scheduler_receive_url(target, req_id, receive_url, receive_cou...
  function _grpc_encode_request (line 76) | def _grpc_encode_request(target, encode_request):
  function _grpc_send_request (line 102) | def _grpc_send_request(target, request_json):
  class EmbeddingData (line 124) | class EmbeddingData:
    method __init__ (line 125) | def __init__(
    method get_grid (line 156) | def get_grid(self):
    method get_embedding (line 160) | def get_embedding(self):
    method __repr__ (line 163) | def __repr__(self):
    method copy_without_embedding (line 166) | def copy_without_embedding(self):
  function _cat_grid (line 194) | def _cat_grid(dims, flatten_items=False):
  class MultiModalEmbeddingData (line 204) | class MultiModalEmbeddingData(EmbeddingData):
    method __init__ (line 205) | def __init__(
    method _set_part_grid (line 246) | def _set_part_grid(self, part_idx, modality, grid):
    method _set_video_meta_for_part (line 255) | def _set_video_meta_for_part(self, part_idx, source):
    method from_embedding_data (line 267) | def from_embedding_data(cls, embedding_data: EmbeddingData):
    method __repr__ (line 288) | def __repr__(self):
    method get_embedding (line 291) | def get_embedding(self, is_concat=False):
    method ready (line 304) | def ready(self):
    method get_mm_extra_meta (line 307) | def get_mm_extra_meta(self):
    method add (line 325) | def add(self, embedding_data: EmbeddingData):
  class WaitingImageRequestStatus (line 338) | class WaitingImageRequestStatus(IntEnum):
  function create_part_req_id (line 345) | def create_part_req_id(original_req_id: str, part_idx: int) -> str:
  function extract_original_req_id (line 350) | def extract_original_req_id(part_req_id: str) -> str:
  function calculate_modality_num_parts (line 357) | def calculate_modality_num_parts(modalities, num_items_assigned):
  class WaitingImageRequest (line 381) | class WaitingImageRequest:
    method __init__ (line 382) | def __init__(
    method send_encode_request (line 412) | def send_encode_request(self):
    method _try_recv_mm_data (line 482) | def _try_recv_mm_data(self):
  class WaitingImageRequestGrpc (line 534) | class WaitingImageRequestGrpc(WaitingImageRequest):
    method send_encode_request (line 535) | def send_encode_request(self):
  function _determine_tensor_transport_mode (line 581) | def _determine_tensor_transport_mode(server_args):
  class MMReceiverBase (line 591) | class MMReceiverBase(ABC):
    method __init__ (line 592) | def __init__(
    method process_waiting_requests (line 673) | def process_waiting_requests(self, recv_reqs):
    method recv_mm_data (line 676) | async def recv_mm_data(
    method _cleanup_mooncake_buffer (line 699) | def _cleanup_mooncake_buffer(self, req_id):
    method _recv_mm_data (line 714) | async def _recv_mm_data(self, req_id, recv_socket, mm_processor, prompt):
    method send_encode_request (line 800) | def send_encode_request(self, obj):
    method _send_encode_request (line 803) | def _send_encode_request(self, obj):
    method _process_waiting_requests (line 829) | def _process_waiting_requests(self, recv_reqs, waiting_cls):
    method _run_encode_in_thread (line 902) | def _run_encode_in_thread(
    method create_req (line 919) | def create_req(self, recv_req: TokenizedGenerateReqInput):
    method allocate_embedding_buffer (line 955) | async def allocate_embedding_buffer(self, req_id, total_bytes):
    method _assign_items_by_modality (line 964) | def _assign_items_by_modality(
    method _extract_url_data (line 1010) | def _extract_url_data(self, request_obj) -> List[Dict]:
  class MMReceiverHTTP (line 1035) | class MMReceiverHTTP(MMReceiverBase):
    method __init__ (line 1036) | def __init__(
    method process_waiting_requests (line 1057) | def process_waiting_requests(self, recv_reqs):
    method encode (line 1060) | async def encode(
  class MMReceiverGrpc (line 1194) | class MMReceiverGrpc(MMReceiverBase):
    method __init__ (line 1195) | def __init__(
    method build_and_send_encode_request (line 1215) | def build_and_send_encode_request(self, image_urls, rid):
    method process_waiting_requests (line 1224) | def process_waiting_requests(self, recv_reqs):
    method encode (line 1227) | async def encode(
  function _validate_transport_mode (line 1349) | def _validate_transport_mode(transport_mode: str, encoder_urls):
  function create_mm_receiver (line 1375) | def create_mm_receiver(

FILE: python/sglang/srt/disaggregation/encode_server.py
  class MMError (line 73) | class MMError(Exception):
    method __init__ (line 74) | def __init__(self, message, code=HTTPStatus.INTERNAL_SERVER_ERROR):
  class BadRequestError (line 80) | class BadRequestError(MMError):
    method __init__ (line 81) | def __init__(self, message):
  class InternalError (line 85) | class InternalError(MMError):
    method __init__ (line 86) | def __init__(self, message):
  class TensorWrapper (line 90) | class TensorWrapper:
    method __init__ (line 93) | def __init__(self, tensor):
    method __buffer__ (line 105) | def __buffer__(self):
  function _convert (line 113) | def _convert(data):
  function _get_mm_grid_dim (line 139) | def _get_mm_grid_dim(mm_inputs, modality):
  function _get_mm_feature (line 146) | def _get_mm_feature(mm_inputs, modality):
  function _build_mm_aux_data (line 155) | def _build_mm_aux_data(mm_inputs):
  class MMEncoder (line 166) | class MMEncoder:
    method __init__ (line 167) | def __init__(
    method _infer_embedding_dims (line 289) | def _infer_embedding_dims(self) -> dict:
    method _build_vision_config (line 328) | def _build_vision_config(self, mm_process_config):
    method _load_mm_processor (line 373) | def _load_mm_processor(self, server_args: ServerArgs):
    method _load_single_item (line 422) | def _load_single_item(
    method submit_data_loading_tasks (line 450) | def submit_data_loading_tasks(self, items, modalities):
    method _get_feat_extract_output_lengths (line 466) | def _get_feat_extract_output_lengths(self, feature_lens):
    method _flatten_and_load_videos (line 490) | async def _flatten_and_load_videos(self, mm_items):
    method _flatten_and_load_data_by_modality (line 519) | async def _flatten_and_load_data_by_modality(self, mm_items, modality):
    method get_num_patches (line 566) | def get_num_patches(
    method get_num_tokens (line 575) | def get_num_tokens(
    method slice_embedding (line 586) | def slice_embedding(
    method _calculate_hashes_from_features (line 597) | def _calculate_hashes_from_features(
    method _encode_missing (line 612) | async def _encode_missing(
    method encode_with_global_cache (line 662) | async def encode_with_global_cache(
    method _flatten_and_load_audios (line 816) | async def _flatten_and_load_audios(self, mm_items):
    method _flatten_and_load_images (line 822) | async def _flatten_and_load_images(self, mm_items):
    method _calculate_timestamps (line 828) | def _calculate_timestamps(self, indices, video_fps: float, merge_size:...
    method _process_mm_items (line 846) | async def _process_mm_items(self, mm_items, modality):
    method _encode (line 939) | async def _encode(self, mm_items, modality: Modality) -> torch.Tensor:
    method _send (line 990) | async def _send(
    method encode (line 1045) | async def encode(self, mm_items, modality: Modality, req_id, num_parts...
    method send (line 1086) | async def send(
    method send_with_url (line 1100) | async def send_with_url(
    method get_embedding_port (line 1179) | async def get_embedding_port(self, prefill_url):
  class EncoderProfiler (line 1191) | class EncoderProfiler:
    method __init__ (line 1192) | def __init__(self, rank: int):
    method start (line 1200) | def start(self, obj: ProfileReq):
    method step (line 1234) | def step(self):
    method stop (line 1243) | def stop(self):
  function run_encoder (line 1261) | async def run_encoder(
  function launch_encoder (line 1294) | def launch_encoder(server_args, schedule_path, dist_init_method, rank):
  function launch_server (line 1303) | def launch_server(server_args: ServerArgs):
  function get_condition (line 1330) | async def get_condition(rid):
  function handle_encode_request (line 1338) | async def handle_encode_request(request: dict):
  function handle_send_request (line 1441) | async def handle_send_request(request: dict):
  function handle_scheduler_receive_url_request (line 1455) | async def handle_scheduler_receive_url_request(request: dict):
  function health_generate (line 1471) | async def health_generate():
  function start_profile_async (line 1482) | async def start_profile_async(obj: Optional[ProfileReqInput] = None):
  function stop_profile_async (line 1520) | async def stop_profile_async():

FILE: python/sglang/srt/disaggregation/fake/conn.py
  class FakeKVManager (line 21) | class FakeKVManager(BaseKVManager):
    method __init__ (line 22) | def __init__(
    method register_to_bootstrap (line 31) | def register_to_bootstrap(self):
  class FakeKVSender (line 35) | class FakeKVSender(BaseKVSender):
    method __init__ (line 36) | def __init__(
    method poll (line 46) | def poll(self) -> KVPoll:
    method init (line 55) | def init(
    method send (line 65) | def send(
    method failure_exception (line 75) | def failure_exception(self):
  class FakeKVReceiver (line 79) | class FakeKVReceiver(BaseKVReceiver):
    method __init__ (line 80) | def __init__(
    method poll (line 89) | def poll(self) -> KVPoll:
    method init (line 98) | def init(
    method failure_exception (line 109) | def failure_exception(self):

FILE: python/sglang/srt/disaggregation/kv_events.py
  class EventBatch (line 38) | class EventBatch(
  class KVCacheEvent (line 49) | class KVCacheEvent(
  class OffloadedState (line 64) | class OffloadedState:
    method __init__ (line 73) | def __init__(
  class BlockStored (line 81) | class BlockStored(KVCacheEvent):
  class BlockRemoved (line 90) | class BlockRemoved(KVCacheEvent):
  class AllBlocksCleared (line 95) | class AllBlocksCleared(KVCacheEvent):
  class KVEventBatch (line 99) | class KVEventBatch(EventBatch):
  class EventPublisher (line 103) | class EventPublisher(ABC):
    method __init__ (line 117) | def __init__(self, attn_dp_rank: int = 0):
    method publish (line 121) | def publish(self, events: EventBatch) -> None:
    method shutdown (line 129) | def shutdown(self) -> None:
  class NullEventPublisher (line 133) | class NullEventPublisher(EventPublisher):
    method publish (line 136) | def publish(self, events) -> None:
    method shutdown (line 139) | def shutdown(self) -> None:
  class ZmqEventPublisher (line 143) | class ZmqEventPublisher(EventPublisher):
    method __init__ (line 170) | def __init__(
    method publish (line 212) | def publish(self, events: EventBatch) -> None:
    method shutdown (line 219) | def shutdown(self) -> None:
    method _socket_setup (line 250) | def _socket_setup(self) -> None:
    method _publisher_thread (line 283) | def _publisher_thread(self) -> None:
    method _service_replay (line 320) | def _service_replay(self) -> None:
    method offset_endpoint_port (line 344) | def offset_endpoint_port(
  class KVEventsConfig (line 377) | class KVEventsConfig(BaseModel):
    method from_cli (line 412) | def from_cli(cls, cli_value: str) -> "KVEventsConfig":
  class EventPublisherFactory (line 417) | class EventPublisherFactory:
    method register_publisher (line 424) | def register_publisher(cls, name: str, ctor: Callable[..., EventPublis...
    method create (line 430) | def create(cls, config: Optional[str], attn_dp_rank: int = 0) -> Event...

FILE: python/sglang/srt/disaggregation/mooncake/conn.py
  class KVTransferError (line 43) | class KVTransferError(Exception):
    method __init__ (line 44) | def __init__(self, bootstrap_room: int, failure_reason: str):
    method __str__ (line 49) | def __str__(self):
  class TransferKVChunk (line 55) | class TransferKVChunk:
  class TransferInfo (line 66) | class TransferInfo:
    method from_zmq (line 78) | def from_zmq(cls, msg: List[bytes]):
  class KVArgsRegisterInfo (line 107) | class KVArgsRegisterInfo:
    method from_zmq (line 123) | def from_zmq(cls, msg: List[bytes]):
  class AuxDataCodec (line 148) | class AuxDataCodec:
    method serialize_data_from_buffer (line 152) | def serialize_data_from_buffer(src_addr, data_length):
    method deserialize_data_to_buffer (line 158) | def deserialize_data_to_buffer(kv_args, buffer_index, aux_index, data):
  class MooncakeKVManager (line 168) | class MooncakeKVManager(CommonKVManager):
    method __init__ (line 171) | def __init__(
    method init_engine (line 217) | def init_engine(self):
    method register_buffer_to_engine (line 220) | def register_buffer_to_engine(self):
    method _transfer_data (line 239) | def _transfer_data(self, mooncake_session_id, transfer_blocks):
    method _send_kvcache_generic (line 248) | def _send_kvcache_generic(
    method send_kvcache (line 356) | def send_kvcache(
    method send_kvcache_slice (line 374) | def send_kvcache_slice(
    method send_aux (line 497) | def send_aux(
    method send_aux_tcp (line 521) | def send_aux_tcp(
    method send_aux_data_to_endpoint (line 546) | def send_aux_data_to_endpoint(
    method _handle_aux_data (line 569) | def _handle_aux_data(self, msg: List[bytes]):
    method maybe_send_extra (line 589) | def maybe_send_extra(
    method _send_mamba_state (line 653) | def _send_mamba_state(
    method _send_mamba_state_slice (line 674) | def _send_mamba_state_slice(
    method sync_status_to_decode_endpoint (line 760) | def sync_status_to_decode_endpoint(
    method transfer_worker (line 772) | def transfer_worker(
    method start_prefill_thread (line 924) | def start_prefill_thread(self):
    method start_decode_thread (line 960) | def start_decode_thread(self):
    method add_transfer_request (line 1048) | def add_transfer_request(
    method get_session_id (line 1093) | def get_session_id(self):
    method _handle_node_failure (line 1096) | def _handle_node_failure(self, failed_bootstrap_addr):
  class MooncakeKVSender (line 1128) | class MooncakeKVSender(CommonKVSender):
    method __init__ (line 1130) | def __init__(
    method send (line 1142) | def send(
    method poll (line 1182) | def poll(self) -> KVPoll:
    method clear (line 1208) | def clear(self) -> None:
    method failure_exception (line 1212) | def failure_exception(self):
    method abort (line 1225) | def abort(self):
  class MooncakeKVReceiver (line 1234) | class MooncakeKVReceiver(CommonKVReceiver):
    method __init__ (line 1235) | def __init__(
    method _register_kv_args (line 1250) | def _register_kv_args(self):
    method init (line 1298) | def init(
    method poll (line 1338) | def poll(self) -> KVPoll:
    method clear (line 1364) | def clear(self) -> None:
    method failure_exception (line 1374) | def failure_exception(self):
    method abort (line 1387) | def abort(self):
  class MooncakeKVBootstrapServer (line 1396) | class MooncakeKVBootstrapServer(CommonKVBootstrapServer):

FILE: python/sglang/srt/disaggregation/mooncake/utils.py
  function init_mooncake_custom_mem_pool (line 29) | def init_mooncake_custom_mem_pool(
  function check_mooncake_custom_mem_pool_enabled (line 92) | def check_mooncake_custom_mem_pool_enabled() -> Tuple[bool, Optional[str]]:

FILE: python/sglang/srt/disaggregation/mori/conn.py
  function _pack_mem_desc_list (line 47) | def _pack_mem_desc_list(mems: List[MemoryDesc]) -> bytes:
  function _unpack_mem_desc_list (line 54) | def _unpack_mem_desc_list(blob: bytes) -> List[MemoryDesc]:
  class TransferInfo (line 62) | class TransferInfo:
    method from_zmq (line 73) | def from_zmq(cls, payload: List[bytes]) -> TransferInfo:
  class KVArgsRegisterInfo (line 106) | class KVArgsRegisterInfo:
    method engine_key (line 119) | def engine_key(self) -> str:
    method from_zmq (line 123) | def from_zmq(cls, payload: List[bytes]) -> KVArgsRegisterInfo:
  class AuxDataCodec (line 148) | class AuxDataCodec:
    method serialize_data_from_buffer (line 150) | def serialize_data_from_buffer(src_addr, data_length):
    method deserialize_data_to_buffer (line 155) | def deserialize_data_to_buffer(kv_args, buffer_index, aux_index, data):
  class TPSliceConfig (line 165) | class TPSliceConfig:
  class MoriKVManager (line 176) | class MoriKVManager(CommonKVManager):
    method __init__ (line 179) | def __init__(
    method _init_engine (line 200) | def _init_engine(self) -> IOEngine:
    method _register_local_buffers (line 256) | def _register_local_buffers(self) -> None:
    method _handle_register_message (line 284) | def _handle_register_message(self, payload: List[bytes]) -> None:
    method _handle_transfer_message (line 291) | def _handle_transfer_message(self, payload: List[bytes]) -> None:
    method _validate_message (line 307) | def _validate_message(self, msg: List[bytes]) -> Optional[List[bytes]]:
    method _start_bootstrap_thread (line 316) | def _start_bootstrap_thread(self) -> None:
    method _cleanup_room_tracking (line 335) | def _cleanup_room_tracking(self, bootstrap_room: int) -> None:
    method _start_decode_thread (line 344) | def _start_decode_thread(self) -> None:
    method notify_decode_status (line 398) | def notify_decode_status(
    method _add_remote_peer (line 428) | def _add_remote_peer(self, register_info: KVArgsRegisterInfo) -> None:
    method _get_mha_mem_desc_slices (line 442) | def _get_mha_mem_desc_slices(
    method _get_mla_mem_desc_slices (line 468) | def _get_mla_mem_desc_slices(
    method _issue_layer_transfers (line 482) | def _issue_layer_transfers(
    method _build_tp_slice_config (line 508) | def _build_tp_slice_config(self, peer_info: KVArgsRegisterInfo) -> TPS...
    method _issue_tp_slice_transfers (line 562) | def _issue_tp_slice_transfers(
    method send_kvcache (line 623) | def send_kvcache(
    method send_aux (line 707) | def send_aux(
    method send_aux_tcp (line 716) | def send_aux_tcp(
    method send_aux_data_to_endpoint (line 742) | def send_aux_data_to_endpoint(
    method _handle_aux_data (line 765) | def _handle_aux_data(self, msg: List[bytes]):
    method add_transfer_request (line 785) | def add_transfer_request(
  class MoriKVSender (line 838) | class MoriKVSender(CommonKVSender):
    method __init__ (line 839) | def __init__(
    method send (line 855) | def send(
    method poll (line 889) | def poll(self) -> KVPoll:
    method _all_transfers_finished (line 924) | def _all_transfers_finished(self) -> bool:
    method _has_transfer_error (line 931) | def _has_transfer_error(self) -> bool:
    method _collect_failure_reason (line 934) | def _collect_failure_reason(self) -> str:
    method _notify_decode (line 940) | def _notify_decode(
    method _finalize_failure (line 951) | def _finalize_failure(self, failure_reason: Optional[str] = None) -> N...
    method clear (line 961) | def clear(self) -> None:
    method failure_exception (line 964) | def failure_exception(self):
    method abort (line 974) | def abort(self):
  class MoriKVReceiver (line 981) | class MoriKVReceiver(CommonKVReceiver):
    method __init__ (line 983) | def __init__(
    method _register_kv_args (line 1000) | def _register_kv_args(self):
    method init (line 1032) | def init(
    method poll (line 1066) | def poll(self) -> KVPoll:
    method clear (line 1086) | def clear(self) -> None:
    method failure_exception (line 1094) | def failure_exception(self):
    method abort (line 1105) | def abort(self):
  class MoriKVBootstrapServer (line 1115) | class MoriKVBootstrapServer(CommonKVBootstrapServer):

FILE: python/sglang/srt/disaggregation/nixl/conn.py
  class TransferInfo (line 36) | class TransferInfo:
    method is_dummy (line 48) | def is_dummy(self):
    method from_zmq (line 52) | def from_zmq(cls, msg: List[bytes]):
  class KVArgsRegisterInfo (line 72) | class KVArgsRegisterInfo:
    method from_zmq (line 89) | def from_zmq(cls, msg: List[bytes]):
  class TransferStatus (line 113) | class TransferStatus:
    method is_done (line 133) | def is_done(self):
    method is_failed (line 153) | def is_failed(self):
  class NixlKVManager (line 157) | class NixlKVManager(CommonKVManager):
    method __init__ (line 158) | def __init__(
    method _start_heartbeat_checker_thread (line 204) | def _start_heartbeat_checker_thread(self):
    method _handle_node_failure (line 256) | def _handle_node_failure(self, failed_bootstrap_addr):
    method register_buffer_to_engine (line 290) | def register_buffer_to_engine(self):
    method _add_remote_peer (line 326) | def _add_remote_peer(self, decode_kv_args: KVArgsRegisterInfo):
    method _send_kvcache_generic (line 334) | def _send_kvcache_generic(
    method send_kvcache (line 444) | def send_kvcache(
    method send_kvcache_slice (line 464) | def send_kvcache_slice(
    method send_aux (line 589) | def send_aux(
    method _send_mamba_state (line 627) | def _send_mamba_state(
    method maybe_send_extra (line 674) | def maybe_send_extra(
    method add_transfer_request (line 727) | def add_transfer_request(
    method update_transfer_status (line 811) | def update_transfer_status(self):
    method check_transfer_done (line 845) | def check_transfer_done(self, room: int):
    method _start_bootstrap_thread (line 850) | def _start_bootstrap_thread(self):
  class NixlKVSender (line 888) | class NixlKVSender(CommonKVSender):
    method __init__ (line 889) | def __init__(
    method send (line 902) | def send(
    method poll (line 940) | def poll(self) -> KVPoll:
    method failure_exception (line 950) | def failure_exception(self):
  class NixlKVReceiver (line 954) | class NixlKVReceiver(CommonKVReceiver):
    method __init__ (line 955) | def __init__(
    method init (line 973) | def init(
    method poll (line 1021) | def poll(self) -> KVPoll:
    method _register_kv_args (line 1060) | def _register_kv_args(self):
    method failure_exception (line 1092) | def failure_exception(self):
  class NixlKVBootstrapServer (line 1096) | class NixlKVBootstrapServer(CommonKVBootstrapServer):

FILE: python/sglang/srt/disaggregation/prefill.py
  function release_req_to_metadata_buffer (line 65) | def release_req_to_metadata_buffer(
  class PrefillBootstrapQueue (line 86) | class PrefillBootstrapQueue:
    method __init__ (line 91) | def __init__(
    method _init_kv_manager (line 133) | def _init_kv_manager(self) -> CommonKVManager:
    method add (line 206) | def add(self, req: Req, num_kv_heads: int) -> None:
    method extend (line 229) | def extend(self, reqs: List[Req], num_kv_heads: int) -> None:
    method _check_if_req_exceed_kv_capacity (line 233) | def _check_if_req_exceed_kv_capacity(self, req: Req) -> bool:
    method _process_req (line 243) | def _process_req(self, req: Req) -> None:
    method pop_bootstrapped (line 249) | def pop_bootstrapped(
  class SchedulerDisaggregationPrefillMixin (line 334) | class SchedulerDisaggregationPrefillMixin:
    method get_next_disagg_prefill_batch_to_run (line 339) | def get_next_disagg_prefill_batch_to_run(
    method event_loop_normal_disagg_prefill (line 357) | def event_loop_normal_disagg_prefill(self: Scheduler) -> None:
    method event_loop_overlap_disagg_prefill (line 385) | def event_loop_overlap_disagg_prefill(self: Scheduler) -> None:
    method process_batch_result_disagg_prefill (line 424) | def process_batch_result_disagg_prefill(
    method process_disagg_prefill_inflight_queue (line 545) | def process_disagg_prefill_inflight_queue(
    method get_transferred_rids (line 660) | def get_transferred_rids(self: Scheduler) -> List[str]:
    method process_prefill_chunk (line 678) | def process_prefill_chunk(self: Scheduler) -> None:
    method send_kv_chunk (line 706) | def send_kv_chunk(

FILE: python/sglang/srt/disaggregation/utils.py
  class DisaggregationMode (line 33) | class DisaggregationMode(Enum):
  function poll_and_all_reduce (line 47) | def poll_and_all_reduce(pollers, gloo_group: dist.ProcessGroup):
  function poll_and_all_reduce_attn_cp_tp_group (line 63) | def poll_and_all_reduce_attn_cp_tp_group(
  class ReqToMetadataIdxAllocator (line 88) | class ReqToMetadataIdxAllocator:
    method __init__ (line 91) | def __init__(
    method available_size (line 98) | def available_size(self):
    method alloc (line 101) | def alloc(self) -> Optional[int]:
    method free (line 107) | def free(self, free_index: int):
  class MetadataBuffers (line 111) | class MetadataBuffers:
    method __init__ (line 112) | def __init__(
    method get_buf_infos (line 173) | def get_buf_infos(self):
    method get_buf (line 212) | def get_buf(self, idx: int):
    method set_buf (line 226) | def set_buf(self, req: Req):
  class TransferBackend (line 277) | class TransferBackend(Enum):
  class KVClassType (line 285) | class KVClassType(Enum):
  function get_kv_class (line 294) | def get_kv_class(
  function get_kv_class (line 298) | def get_kv_class(
  function get_kv_class (line 302) | def get_kv_class(
  function get_kv_class (line 306) | def get_kv_class(
  function get_kv_class (line 310) | def get_kv_class(
  function get_kv_class (line 315) | def get_kv_class(
  function kv_to_page_indices (line 412) | def kv_to_page_indices(kv_indices: np.ndarray, page_size: int):
  function kv_to_page_num (line 422) | def kv_to_page_num(num_kv_indices: int, page_size: int):
  function page_indices_to_cp_rank_page_indices (line 427) | def page_indices_to_cp_rank_page_indices(
  function filter_kv_indices_for_cp_rank (line 473) | def filter_kv_indices_for_cp_rank(
  function is_mla_backend (line 513) | def is_mla_backend(target_kv_pool) -> bool:
  function prepare_abort (line 519) | def prepare_abort(req: Req, error_message: str, status_code=None):

FILE: python/sglang/srt/distributed/communication_op.py
  function tensor_model_parallel_all_reduce (line 11) | def tensor_model_parallel_all_reduce(input_: torch.Tensor) -> torch.Tensor:
  function tensor_model_parallel_fused_allreduce_rmsnorm (line 16) | def tensor_model_parallel_fused_allreduce_rmsnorm(
  function tensor_model_parallel_all_gather (line 31) | def tensor_model_parallel_all_gather(
  function tensor_model_parallel_gather (line 38) | def tensor_model_parallel_gather(
  function broadcast_tensor_dict (line 45) | def broadcast_tensor_dict(

FILE: python/sglang/srt/distributed/device_communicators/cuda_wrapper.py
  class cudaIpcMemHandle_t (line 30) | class cudaIpcMemHandle_t(ctypes.Structure):
  class Function (line 35) | class Function:
  function find_loaded_library (line 41) | def find_loaded_library(lib_name) -> Optional[str]:
  class CudaRTLibrary (line 68) | class CudaRTLibrary:
    method __init__ (line 118) | def __init__(self, so_file: Optional[str] = None):
    method CUDART_CHECK (line 137) | def CUDART_CHECK(self, result: cudaError_t) -> None:
    method cudaGetErrorString (line 142) | def cudaGetErrorString(self, error: cudaError_t) -> str:
    method cudaSetDevice (line 145) | def cudaSetDevice(self, device: int) -> None:
    method cudaDeviceSynchronize (line 148) | def cudaDeviceSynchronize(self) -> None:
    method cudaDeviceReset (line 151) | def cudaDeviceReset(self) -> None:
    method cudaMalloc (line 154) | def cudaMalloc(self, size: int) -> ctypes.c_void_p:
    method cudaFree (line 159) | def cudaFree(self, devPtr: ctypes.c_void_p) -> None:
    method cudaMemset (line 162) | def cudaMemset(self, devPtr: ctypes.c_void_p, value: int, count: int) ...
    method cudaMemcpy (line 165) | def cudaMemcpy(
    method cudaIpcGetMemHandle (line 172) | def cudaIpcGetMemHandle(self, devPtr: ctypes.c_void_p) -> cudaIpcMemHa...
    method cudaIpcOpenMemHandle (line 179) | def cudaIpcOpenMemHandle(self, handle: cudaIpcMemHandle_t) -> ctypes.c...

FILE: python/sglang/srt/distributed/device_communicators/custom_all_reduce.py
  function _can_p2p (line 39) | def _can_p2p(rank: int, world_size: int) -> bool:
  class CustomAllreduce (line 53) | class CustomAllreduce:
    method __init__ (line 64) | def __init__(
    method create_shared_buffer (line 216) | def create_shared_buffer(
    method free_shared_buffer (line 243) | def free_shared_buffer(
    method capture (line 251) | def capture(self):
    method _get_ipc_meta (line 265) | def _get_ipc_meta(self, inp: torch.Tensor):
    method _gather_ipc_meta (line 275) | def _gather_ipc_meta(self, shard_data):
    method register_buffer (line 299) | def register_buffer(self, inp: torch.Tensor):
    method register_graph_buffers (line 303) | def register_graph_buffers(self):
    method should_custom_ar (line 329) | def should_custom_ar(self, inp: torch.Tensor):
    method all_reduce_reg (line 354) | def all_reduce_reg(self, inp: torch.Tensor, out: torch.Tensor = None):
    method all_reduce_unreg (line 361) | def all_reduce_unreg(self, inp: torch.Tensor, out: torch.Tensor = None):
    method all_reduce (line 367) | def all_reduce(
    method deterministic_all_reduce (line 390) | def deterministic_all_reduce(
    method custom_all_reduce (line 407) | def custom_all_reduce(self, input: torch.Tensor) -> Optional[torch.Ten...
    method close (line 444) | def close(self):
    method __del__ (line 452) | def __del__(self):
  function dispatch_custom_allreduce (line 456) | def dispatch_custom_allreduce():

FILE: python/sglang/srt/distributed/device_communicators/custom_all_reduce_ops.py
  function init_custom_ar (line 37) | def init_custom_ar(
  function all_reduce (line 45) | def all_reduce(
  function dispose (line 54) | def dispose(fa: int) -> None:
  function meta_size (line 57) | def meta_size() -> int:
  function register_buffer (line 60) | def register_buffer(fa: int, ipc_tensors: List[int]) -> None:
  function get_graph_buffer_ipc_meta (line 63) | def get_graph_buffer_ipc_meta(fa: int) -> Tuple[List[int], List[int]]:
  function register_graph_buffers (line 66) | def register_graph_buffers(
  function init_custom_ar (line 74) | def init_custom_ar(
  function all_reduce_reg (line 86) | def all_reduce_reg(fa: int, inp: torch.Tensor, out: torch.Tensor) -> None:
  function all_reduce_unreg (line 89) | def all_reduce_unreg(
  function deterministic_all_reduce_reg (line 94) | def deterministic_all_reduce_reg(
  function deterministic_all_reduce_unreg (line 99) | def deterministic_all_reduce_unreg(
  function dispose (line 104) | def dispose(fa: int) -> None:
  function meta_size (line 107) | def meta_size() -> int:
  function register_buffer (line 110) | def register_buffer(
  function get_graph_buffer_ipc_meta (line 115) | def get_graph_buffer_ipc_meta(fa: int) -> Tuple[torch.Tensor, List[int]]:
  function register_graph_buffers (line 118) | def register_graph_buffers(
  function allocate_meta_buffer (line 123) | def allocate_meta_buffer(size: int) -> torch.Tensor:
  function get_meta_buffer_ipc_handle (line 126) | def get_meta_buffer_ipc_handle(inp: torch.Tensor) -> torch.Tensor:
  function init_custom_qr (line 140) | def init_custom_qr(
  function qr_get_handle (line 145) | def qr_get_handle(fa: int) -> torch.Tensor:
  function qr_open_handles (line 148) | def qr_open_handles(fa: int, handles: list[torch.Tensor]) -> None:
  function qr_all_reduce (line 151) | def qr_all_reduce(
  function qr_destroy (line 160) | def qr_destroy(fa: int) -> None:
  function qr_max_size (line 163) | def qr_max_size() -> int:
  function mscclpp_generate_unique_id (line 176) | def mscclpp_generate_unique_id() -> bytes:
  function mscclpp_init_context (line 179) | def mscclpp_init_context(
  function mscclpp_allreduce (line 202) | def mscclpp_allreduce(

FILE: python/sglang/srt/distributed/device_communicators/custom_all_reduce_utils.py
  function update_environment_variables (line 57) | def update_environment_variables(envs: Dict[str, str]):
  function producer (line 69) | def producer(
  function consumer (line 103) | def consumer(
  function can_actually_p2p (line 144) | def can_actually_p2p(
  function gpu_p2p_access_check (line 244) | def gpu_p2p_access_check(src: int, tgt: int) -> bool:
  function with_nvml_context (line 319) | def with_nvml_context(fn: Callable[_P, _R]) -> Callable[_P, _R]:
  function is_full_nvlink (line 339) | def is_full_nvlink(physical_device_ids: List[int], world_size: int) -> b...
  function is_weak_contiguous (line 380) | def is_weak_contiguous(inp: torch.Tensor):

FILE: python/sglang/srt/distributed/device_communicators/hpu_communicator.py
  class HpuCommunicator (line 13) | class HpuCommunicator:
    method __init__ (line 15) | def __init__(self, group: ProcessGroup):
    method all_reduce (line 23) | def all_reduce(self, x: torch.Tensor) -> torch.Tensor:
    method all_gather (line 31) | def all_gather(self, x: torch.Tensor, dim: int = -1) -> torch.Tensor:

FILE: python/sglang/srt/distributed/device_communicators/mooncake_transfer_engine.py
  function get_ib_devices_for_gpu (line 15) | def get_ib_devices_for_gpu(ib_device_str: Optional[str], gpu_id: int) ->...
  class MooncakeTransferEngine (line 93) | class MooncakeTransferEngine:
    method __init__ (line 96) | def __init__(
    method register (line 124) | def register(self, ptr, length):
    method deregister (line 134) | def deregister(self, ptr):
    method batch_register (line 144) | def batch_register(self, ptrs: List[int], lengths: List[int]) -> int:
    method batch_deregister (line 161) | def batch_deregister(self, ptrs: List[int]) -> int:
    method initialize (line 173) | def initialize(
    method transfer_sync (line 202) | def transfer_sync(
    method batch_transfer_sync (line 223) | def batch_transfer_sync(
    method get_session_id (line 254) | def get_session_id(self):
    method get_engine (line 257) | def get_engine(self):
    method get_ib_device (line 260) | def get_ib_device(self):
  function init_mooncake_transfer_engine (line 264) | def init_mooncake_transfer_engine(
  function get_mooncake_transfer_engine (line 284) | def get_mooncake_transfer_engine() -> Optional[MooncakeTransferEngine]:

FILE: python/sglang/srt/distributed/device_communicators/npu_communicator.py
  class NpuCommunicator (line 8) | class NpuCommunicator:
    method __init__ (line 10) | def __init__(self, group: ProcessGroup):
    method all_reduce (line 18) | def all_reduce(self, x: torch.Tensor) -> torch.Tensor:
    method all_gather (line 22) | def all_gather(self, x: torch.Tensor, dim: int = -1) -> torch.Tensor:

FILE: python/sglang/srt/distributed/device_communicators/pymscclpp.py
  class MscclContextSelection (line 21) | class MscclContextSelection(IntEnum):
  function mscclpp_is_weak_contiguous (line 26) | def mscclpp_is_weak_contiguous(inp: torch.Tensor):
  function mscclpp_convert_to_bytes (line 33) | def mscclpp_convert_to_bytes(size_str):
  function mscclpp_bench_time (line 74) | def mscclpp_bench_time(func, test_niter: int = 10, warmup_niter: int = 2):
  class PyMscclppCommunicator (line 91) | class PyMscclppCommunicator:
    method __init__ (line 98) | def __init__(
    method pre_tune_config (line 230) | def pre_tune_config(self, dtype=torch.bfloat16) -> bool:
    method should_mscclpp_allreduce (line 260) | def should_mscclpp_allreduce(
    method all_reduce (line 276) | def all_reduce(self, tensor: torch.Tensor, op: ReduceOp = ReduceOp.SUM):
    method change_state (line 289) | def change_state(

FILE: python/sglang/srt/distributed/device_communicators/pynccl.py
  class PyNcclCommunicator (line 27) | class PyNcclCommunicator:
    method __init__ (line 29) | def __init__(
    method _resolve_stream (line 129) | def _resolve_stream(self, stream: Optional[torch.cuda.Stream]):
    method all_reduce (line 144) | def all_reduce(
    method outplace_all_reduce (line 167) | def outplace_all_reduce(
    method all_gather (line 196) | def all_gather(
    method cp_all_gather_into_tensor (line 241) | def cp_all_gather_into_tensor(
    method reduce_scatter (line 269) | def reduce_scatter(
    method send (line 317) | def send(self, tensor: torch.Tensor, dst: int, stream=None):
    method recv (line 334) | def recv(self, tensor: torch.Tensor, src: int, stream=None):
    method broadcast (line 351) | def broadcast(self, tensor: torch.Tensor, src: int, stream=None):
    method register_comm_window_raw (line 377) | def register_comm_window_raw(self, ptr: int, size: int):
    method deregister_comm_window (line 380) | def deregister_comm_window(self, window):
    method group_start (line 383) | def group_start(self):
    method group_end (line 386) | def group_end(self):
    method change_state (line 390) | def change_state(

FILE: python/sglang/srt/distributed/device_communicators/pynccl_allocator.py
  function is_symmetric_memory_enabled (line 77) | def is_symmetric_memory_enabled():
  function set_graph_pool_id (line 84) | def set_graph_pool_id(graph_pool_id):
  function disable_symmetric_memory_context (line 89) | def disable_symmetric_memory_context():
  function restore_symmetric_memory_context (line 97) | def restore_symmetric_memory_context(saved_context):
  function get_nccl_mem_pool (line 102) | def get_nccl_mem_pool():
  class SymmetricMemoryContext (line 138) | class SymmetricMemoryContext:
    method __init__ (line 148) | def __init__(
    method __enter__ (line 157) | def __enter__(self):
    method __exit__ (line 190) | def __exit__(self, exc_type, exc_val, exc_tb):
  function use_symmetric_memory (line 207) | def use_symmetric_memory(group_coordinator: GroupCoordinator, disabled: ...

FILE: python/sglang/srt/distributed/device_communicators/pynccl_wrapper.py
  function find_nccl_library (line 37) | def find_nccl_library() -> str:
  class ncclUniqueId (line 75) | class ncclUniqueId(ctypes.Structure):
  class ncclDataTypeEnum (line 85) | class ncclDataTypeEnum:
    method from_torch (line 104) | def from_torch(cls, dtype: torch.dtype) -> int:
  class ncclRedOpTypeEnum (line 127) | class ncclRedOpTypeEnum:
    method from_torch (line 136) | def from_torch(cls, op: ReduceOp) -> int:
  class Function (line 151) | class Function:
  class NCCLLibrary (line 157) | class NCCLLibrary:
    method __init__ (line 334) | def __init__(self, so_file: Optional[str] = None):
    method ncclGetErrorString (line 370) | def ncclGetErrorString(self, result: ncclResult_t) -> str:
    method NCCL_CHECK (line 373) | def NCCL_CHECK(self, result: ncclResult_t) -> None:
    method ncclGetRawVersion (line 378) | def ncclGetRawVersion(self) -> int:
    method ncclGetVersion (line 384) | def ncclGetVersion(self) -> str:
    method ncclGetUniqueId (line 392) | def ncclGetUniqueId(self) -> ncclUniqueId:
    method ncclCommInitRank (line 397) | def ncclCommInitRank(
    method ncclAllReduce (line 408) | def ncclAllReduce(
    method ncclReduce (line 429) | def ncclReduce(
    method ncclReduceScatter (line 451) | def ncclReduceScatter(
    method ncclAllGather (line 472) | def ncclAllGather(
    method ncclSend (line 491) | def ncclSend(
    method ncclRecv (line 504) | def ncclRecv(
    method ncclBroadcast (line 517) | def ncclBroadcast(
    method ncclCommDestroy (line 533) | def ncclCommDestroy(self, comm: ncclComm_t) -> None:
    method ncclCommWindowRegister (line 536) | def ncclCommWindowRegister(
    method ncclCommWindowDeregister (line 547) | def ncclCommWindowDeregister(self, comm: ncclComm_t, window: ncclWindo...
    method ncclGroupStart (line 550) | def ncclGroupStart(self) -> None:
    method ncclGroupEnd (line 553) | def ncclGroupEnd(self) -> None:

FILE: python/sglang/srt/distributed/device_communicators/quick_all_reduce.py
  function qr_rocm_arch_available (line 28) | def qr_rocm_arch_available():
  class QuickReduceRegime (line 41) | class QuickReduceRegime(Enum):
  class QuickAllReduce (line 52) | class QuickAllReduce:
    method __init__ (line 67) | def __init__(
    method init_quick_all_reduce (line 169) | def init_quick_all_reduce(self):
    method create_shared_buffer (line 213) | def create_shared_buffer(self):
    method should_quick_allreduce (line 224) | def should_quick_allreduce(self, inp: torch.Tensor):
    method quick_all_reduce (line 248) | def quick_all_reduce(self, inp: torch.Tensor, *, out: torch.Tensor = N...
    method close (line 259) | def close(self):
    method __del__ (line 266) | def __del__(self):

FILE: python/sglang/srt/distributed/device_communicators/shm_broadcast.py
  class ShmRingBuffer (line 29) | class ShmRingBuffer:
    method __init__ (line 31) | def __init__(
    method __reduce__ (line 127) | def __reduce__(self):
    method __del__ (line 138) | def __del__(self):
    method get_data (line 145) | def get_data(self, current_idx: int):
    method get_metadata (line 152) | def get_metadata(self, current_idx: int):
  class Handle (line 160) | class Handle:
  class MessageQueue (line 169) | class MessageQueue:
    method __init__ (line 171) | def __init__(
    method export_handle (line 254) | def export_handle(self) -> Handle:
    method create_from_handle (line 258) | def create_from_handle(handle: Handle, rank) -> "MessageQueue":
    method wait_until_ready (line 300) | def wait_until_ready(self):
    method acquire_write (line 334) | def acquire_write(self):
    method acquire_read (line 387) | def acquire_read(self):
    method enqueue (line 430) | def enqueue(self, obj):
    method dequeue (line 445) | def dequeue(self):
    method broadcast_object (line 464) | def broadcast_object(self, obj=None):
    method create_from_process_group (line 472) | def create_from_process_group(

FILE: python/sglang/srt/distributed/device_communicators/torch_symm_mem.py
  class TorchSymmMemCommunicator (line 30) | class TorchSymmMemCommunicator:
    method __init__ (line 52) | def __init__(self, group: ProcessGroup, device: Union[int, str, torch....
    method should_torch_symm_mem_allreduce (line 110) | def should_torch_symm_mem_allreduce(self, inp: torch.Tensor):
    method all_reduce (line 133) | def all_reduce(

FILE: python/sglang/srt/distributed/device_communicators/xpu_communicator.py
  class XpuCommunicator (line 10) | class XpuCommunicator:
    method __init__ (line 12) | def __init__(self, group: ProcessGroup):
    method all_reduce (line 20) | def all_reduce(self, x: torch.Tensor) -> torch.Tensor:
    method gather (line 24) | def gather(

FILE: python/sglang/srt/distributed/naive_distributed.py
  class NaiveDistributed (line 12) | class NaiveDistributed:
    method __init__ (line 13) | def __init__(self, rank: int, world_size: int, rendezvous: str):
    method get_rank (line 24) | def get_rank(self):
    method get_world_size (line 27) | def get_world_size(self):
    method scatter (line 30) | def scatter(
    method all_gather_object (line 68) | def all_gather_object(self, obj: Any) -> List[Any]:
    method barrier (line 96) | def barrier(self):
  function get_naive_distributed (line 105) | def get_naive_distributed():
  function set_naive_distributed (line 110) | def set_naive_distributed(instance: NaiveDistributed):

FILE: python/sglang/srt/distributed/parallel_state.py
  function get_torch_distributed_pg_options (line 73) | def get_torch_distributed_pg_options(group_name=None):
  class GraphCaptureContext (line 92) | class GraphCaptureContext:
  class P2PWork (line 97) | class P2PWork:
  function _split_tensor_dict (line 102) | def _split_tensor_dict(
  function _get_unique_name (line 131) | def _get_unique_name(name: str) -> str:
  function _register_group (line 147) | def _register_group(group: "GroupCoordinator") -> None:
  function inplace_all_reduce (line 153) | def inplace_all_reduce(tensor: torch.Tensor, group_name: str) -> None:
  function outplace_all_reduce (line 162) | def outplace_all_reduce(
  function reg_all_gather_into_tensor (line 173) | def reg_all_gather_into_tensor(
  function reg_reduce_scatter_tensor (line 184) | def reg_reduce_scatter_tensor(
  class GroupCoordinator (line 194) | class GroupCoordinator:
    method __init__ (line 235) | def __init__(
    method __repr__ (line 443) | def __repr__(self):
    method first_rank (line 451) | def first_rank(self):
    method last_rank (line 456) | def last_rank(self):
    method is_first_rank (line 461) | def is_first_rank(self):
    method is_last_rank (line 466) | def is_last_rank(self):
    method next_rank (line 471) | def next_rank(self):
    method prev_rank (line 478) | def prev_rank(self):
    method graph_capture (line 485) | def graph_capture(
    method all_reduce (line 549) | def all_reduce(self, input_: torch.Tensor) -> torch.Tensor:
    method fused_allreduce_rmsnorm (line 649) | def fused_allreduce_rmsnorm(
    method _all_reduce_out_place (line 701) | def _all_reduce_out_place(
    method _all_reduce_in_place (line 730) | def _all_reduce_in_place(self, input_: torch.Tensor) -> None:
    method _reduce_scatter_tensor (line 740) | def _reduce_scatter_tensor(
    method reduce_scatter_tensor (line 759) | def reduce_scatter_tensor(self, output: torch.Tensor, input: torch.Ten...
    method reduce_scatter (line 765) | def reduce_scatter(
    method reduce_scatterv (line 774) | def reduce_scatterv(
    method _all_gather_into_tensor (line 809) | def _all_gather_into_tensor(self, output: torch.Tensor, input: torch.T...
    method all_gather_into_tensor (line 823) | def all_gather_into_tensor(self, output: torch.Tensor, input: torch.Te...
    method cp_all_gather_into_tensor_async (line 829) | def cp_all_gather_into_tensor_async(
    method all_gather (line 847) | def all_gather(
    method all_gatherv (line 921) | def all_gatherv(
    method gather (line 977) | def gather(
    method broadcast (line 1012) | def broadcast(self, input_: torch.Tensor, src: int = 0):
    method broadcast_object (line 1027) | def broadcast_object(self, obj: Optional[Any] = None, src: int = 0):
    method broadcast_object_list (line 1051) | def broadcast_object_list(
    method all_gather_object (line 1068) | def all_gather_object(self, obj: Any) -> List[Any]:
    method send_object (line 1073) | def send_object(
    method recv_object (line 1122) | def recv_object(
    method broadcast_tensor_dict (line 1158) | def broadcast_tensor_dict(
    method send_tensor_dict (line 1240) | def send_tensor_dict(
    method recv_tensor_dict (line 1295) | def recv_tensor_dict(
    method barrier (line 1355) | def barrier(self):
    method send (line 1364) | def send(self, tensor: torch.Tensor, dst: Optional[int] = None) -> None:
    method recv (line 1376) | def recv(
    method destroy (line 1392) | def destroy(self):
  function get_world_group (line 1410) | def get_world_group() -> GroupCoordinator:
  function init_world_group (line 1415) | def init_world_group(
  function init_model_parallel_group (line 1433) | def init_model_parallel_group(
  function set_pdmux_status (line 1482) | def set_pdmux_status(enable_prefill_multiplexing: bool):
  function get_tp_group (line 1487) | def get_tp_group() -> GroupCoordinator:
  function get_attn_tp_group (line 1497) | def get_attn_tp_group() -> GroupCoordinator:
  function get_attn_cp_group (line 1504) | def get_attn_cp_group() -> GroupCoordinator:
  function get_moe_dp_group (line 1516) | def get_moe_dp_group() -> GroupCoordinator:
  function get_moe_ep_group (line 1521) | def get_moe_ep_group() -> GroupCoordinator:
  function get_moe_tp_group (line 1526) | def get_moe_tp_group() -> GroupCoordinator:
  function get_pp_group (line 1537) | def get_pp_group() -> GroupCoordinator:
  function get_mooncake_transfer_engine (line 1546) | def get_mooncake_transfer_engine():
  function graph_capture (line 1559) | def graph_capture(stream: Optional[torch.cuda.Stream] = None):
  function set_custom_all_reduce (line 1586) | def set_custom_all_reduce(enable: bool):
  function set_mscclpp_all_reduce (line 1591) | def set_mscclpp_all_reduce(enable: bool):
  function set_torch_symm_mem_all_reduce (line 1596) | def set_torch_symm_mem_all_reduce(enable: bool):
  function get_default_distributed_backend (line 1611) | def get_default_distributed_backend(device: str) -> str:
  function _create_global_tcp_store (line 1615) | def _create_global_tcp_store(rank: int, world_size: int) -> None:
  function init_distributed_environment (line 1670) | def init_distributed_environment(
  function initialize_model_parallel (line 1744) | def initialize_model_parallel(
  function create_custom_parallel_group (line 2019) | def create_custom_parallel_group(
  function ensure_model_parallel_initialized (line 2068) | def ensure_model_parallel_initialized(
  function model_parallel_is_initialized (line 2101) | def model_parallel_is_initialized():
  function patch_tensor_parallel_group (line 2110) | def patch_tensor_parallel_group(tp_group: GroupCoordinator):
  function get_world_size (line 2134) | def get_world_size():
  function get_world_rank (line 2139) | def get_world_rank():
  function get_tensor_model_parallel_world_size (line 2144) | def get_tensor_model_parallel_world_size():
  function get_tensor_model_parallel_rank (line 2149) | def get_tensor_model_parallel_rank():
  function get_attn_tensor_model_parallel_world_size (line 2155) | def get_attn_tensor_model_parallel_world_size():
  function get_attn_tensor_model_parallel_rank (line 2160) | def get_attn_tensor_model_parallel_rank():
  function get_attn_context_model_parallel_world_size (line 2166) | def get_attn_context_model_parallel_world_size():
  function get_attn_context_model_parallel_rank (line 2171) | def get_attn_context_model_parallel_rank():
  function get_pipeline_model_parallel_world_size (line 2176) | def get_pipeline_model_parallel_world_size():
  function get_pipeline_model_parallel_rank (line 2181) | def get_pipeline_model_parallel_rank():
  function get_moe_data_parallel_world_size (line 2187) | def get_moe_data_parallel_world_size():
  function get_moe_data_parallel_rank (line 2192) | def get_moe_data_parallel_rank():
  function get_moe_expert_parallel_world_size (line 2198) | def get_moe_expert_parallel_world_size():
  function get_moe_expert_parallel_rank (line 2203) | def get_moe_expert_parallel_rank():
  function get_moe_tensor_parallel_world_size (line 2209) | def get_moe_tensor_parallel_world_size():
  function get_moe_tensor_parallel_rank (line 2214) | def get_moe_tensor_parallel_rank():
  function destroy_model_parallel (line 2219) | def destroy_model_parallel():
  function destroy_distributed_environment (line 2262) | def destroy_distributed_environment():
  function cleanup_dist_env_and_memory (line 2271) | def cleanup_dist_env_and_memory(shutdown_ray: bool = False):
  function in_the_same_node_as (line 2298) | def in_the_same_node_as(pg: ProcessGroup, source_rank: int = 0) -> List[...
  function monkey_patch_vllm_parallel_state (line 2369) | def monkey_patch_vllm_parallel_state(reverse: bool = False):

FILE: python/sglang/srt/distributed/utils.py
  function set_global_tcp_store (line 25) | def set_global_tcp_store(store: TCPStore) -> None:
  function get_global_tcp_store (line 36) | def get_global_tcp_store() -> Optional[TCPStore]:
  function ensure_divisibility (line 57) | def ensure_divisibility(numerator, denominator):
  function divide (line 64) | def divide(numerator, denominator):
  function split_tensor_along_last_dim (line 71) | def split_tensor_along_last_dim(
  function get_pp_indices (line 99) | def get_pp_indices(
  class StatelessProcessGroup (line 139) | class StatelessProcessGroup:
    method __post_init__ (line 160) | def __post_init__(self):
    method send_obj (line 166) | def send_obj(self, obj: Any, dst: int):
    method expire_data (line 174) | def expire_data(self):
    method recv_obj (line 185) | def recv_obj(self, src: int) -> Any:
    method broadcast_obj (line 193) | def broadcast_obj(self, obj: Optional[Any], src: int) -> Any:
    method all_gather_obj (line 211) | def all_gather_obj(self, obj: Any) -> list[Any]:
    method barrier (line 223) | def barrier(self):
    method create (line 232) | def create(

FILE: python/sglang/srt/dllm/algorithm/__init__.py
  function import_algorithms (line 10) | def import_algorithms():
  function get_algorithm (line 31) | def get_algorithm(config: DllmConfig):

FILE: python/sglang/srt/dllm/algorithm/base.py
  class DllmAlgorithm (line 6) | class DllmAlgorithm:
    method __init__ (line 8) | def __init__(
    method from_server_args (line 16) | def from_server_args(server_args: ServerArgs):

FILE: python/sglang/srt/dllm/algorithm/joint_threshold.py
  class JointThreshold (line 12) | class JointThreshold(DllmAlgorithm):
    method __init__ (line 14) | def __init__(
    method run (line 26) | def run(

FILE: python/sglang/srt/dllm/algorithm/low_confidence.py
  class LowConfidence (line 14) | class LowConfidence(DllmAlgorithm):
    method __init__ (line 16) | def __init__(
    method run (line 23) | def run(

FILE: python/sglang/srt/dllm/config.py
  class DllmConfig (line 7) | class DllmConfig:
    method __init__ (line 8) | def __init__(
    method from_server_args (line 23) | def from_server_args(

FILE: python/sglang/srt/dllm/mixin/req.py
  class DllmReqPhase (line 12) | class DllmReqPhase(str, enum.Enum):
  class ReqDllmMixin (line 19) | class ReqDllmMixin:
    method init_diffusion_llm (line 20) | def init_diffusion_llm(self: Req, dllm_config: DllmConfig):
    method is_dllm (line 31) | def is_dllm(self: Req) -> bool:
    method is_dllm_prefill (line 34) | def is_dllm_prefill(self: Req) -> bool:
    method determine_dllm_phase (line 40) | def determine_dllm_phase(self: Req):
    method _init_fill_ids_for_dllm (line 56) | def _init_fill_ids_for_dllm(self: Req):
    method _update_block_offset_for_dllm (line 68) | def _update_block_offset_for_dllm(self):

FILE: python/sglang/srt/dllm/mixin/scheduler.py
  class SchedulerDllmMixin (line 20) | class SchedulerDllmMixin:
    method init_diffusion_llm (line 21) | def init_diffusion_llm(self: Scheduler):
    method get_new_batch_dllm (line 29) | def get_new_batch_dllm(self: Scheduler) -> Optional[ScheduleBatch]:
    method process_batch_result_dllm (line 63) | def process_batch_result_dllm(
    method _fetch_waiting_reqs (line 102) | def _fetch_waiting_reqs(self: Scheduler):
    method _should_skip_prefill (line 114) | def _should_skip_prefill(self: Scheduler) -> bool:
    method _create_dllm_prefill_adder (line 132) | def _create_dllm_prefill_adder(self: Scheduler, running_bs: int) -> Pr...
    method _process_dllm_batches (line 148) | def _process_dllm_batches(self: Scheduler, adder: PrefillAdder) -> For...
    method _process_batch_by_phase (line 173) | def _process_batch_by_phase(
    method _update_state_for_batch (line 191) | def _update_state_for_batch(
    method _create_dllm_batch (line 208) | def _create_dllm_batch(
    method process_dllm_incoming_reqs (line 235) | def process_dllm_incoming_reqs(
    method process_dllm_staging_reqs (line 269) | def process_dllm_staging_reqs(
  class DllmManager (line 281) | class DllmManager:
    method __init__ (line 290) | def __init__(self, dllm_config: Optional[DllmConfig] = None):
    method get_prefill_requests (line 298) | def get_prefill_requests(self) -> List[Req]:
    method get_decode_requests (line 302) | def get_decode_requests(self) -> List[Req]:
    method add_waiting_reqs (line 306) | def add_waiting_reqs(self, reqs: Union[Req, List[Req]]) -> None:
    method add_staging_reqs (line 318) | def add_staging_reqs(self, reqs: Union[Req, List[Req]]) -> None:
    method _has_duplicate_reqs (line 323) | def _has_duplicate_reqs(self, reqs: List[Req]) -> bool:
    method any_staging_reqs (line 328) | def any_staging_reqs(self) -> bool:
    method is_empty (line 332) | def is_empty(self) -> bool:
    method increment_chunked_count (line 338) | def increment_chunked_count(self) -> None:
    method filter_finished_reqs (line 343) | def filter_finished_reqs(self) -> None:
    method init_next_round (line 348) | def init_next_round(self) -> None:

FILE: python/sglang/srt/elastic_ep/elastic_ep.py
  class ElasticEPState (line 13) | class ElasticEPState:
    method is_active_equal_last (line 18) | def is_active_equal_last(self) -> bool:
    method sync_active_to_cpu (line 21) | def sync_active_to_cpu(self):
    method snapshot_active_to_last (line 25) | def snapshot_active_to_last(self):
  class ElasticEPStateManager (line 30) | class ElasticEPStateManager:
    method instance (line 34) | def instance(cls) -> ElasticEPState:
    method init (line 38) | def init(cls, server_args: ServerArgs):
    method _select_device (line 47) | def _select_device() -> torch.device:
    method _build_state (line 56) | def _build_state(
    method healthy_rank_state (line 67) | def healthy_rank_state(

FILE: python/sglang/srt/elastic_ep/expert_backup_client.py
  function extract_layer_and_expert_id (line 23) | def extract_layer_and_expert_id(param_name):
  class ExpertBackupClient (line 31) | class ExpertBackupClient:
    method __init__ (line 32) | def __init__(self, server_args: ServerArgs, model_runner):
    method _receive_loop (line 73) | def _receive_loop(self):
    method start_transfer_client (line 85) | def start_transfer_client(self):
    method update_weights (line 101) | def update_weights(self, weight_name_filter=None):

FILE: python/sglang/srt/elastic_ep/expert_backup_manager.py
  function extract_expert_id (line 26) | def extract_expert_id(param_name):
  class ExpertBackupManager (line 34) | class ExpertBackupManager:
    method __init__ (line 35) | def __init__(self, server_args: ServerArgs, port_args: PortArgs):
    method backup_weights_from_disk (line 80) | def backup_weights_from_disk(self):
    method start_transfer_server (line 143) | def start_transfer_server(self):
  function run_expert_backup_manager_process (line 158) | def run_expert_backup_manager_process(
  function run_expert_backup_manager (line 177) | def run_expert_backup_manager(

FILE: python/sglang/srt/entrypoints/EngineBase.py
  class EngineBase (line 7) | class EngineBase(ABC):
    method generate (line 14) | def generate(
    method flush_cache (line 41) | def flush_cache(self):
    method update_weights_from_tensor (line 46) | def update_weights_from_tensor(
    method load_lora_adapter (line 55) | def load_lora_adapter(self, lora_name: str, lora_path: str):
    method unload_lora_adapter (line 59) | def unload_lora_adapter(self, lora_name: str):
    method release_memory_occupation (line 64) | def release_memory_occupation(self):
    method resume_memory_occupation (line 69) | def resume_memory_occupation(self):
    method shutdown (line 74) | def shutdown(self):

FILE: python/sglang/srt/entrypoints/anthropic/protocol.py
  class AnthropicError (line 9) | class AnthropicError(BaseModel):
  class AnthropicErrorResponse (line 16) | class AnthropicErrorResponse(BaseModel):
  class AnthropicUsage (line 23) | class AnthropicUsage(BaseModel):
  class AnthropicContentBlock (line 32) | class AnthropicContentBlock(BaseModel):
  class AnthropicMessage (line 53) | class AnthropicMessage(BaseModel):
  class AnthropicTool (line 60) | class AnthropicTool(BaseModel):
    method validate_input_schema (line 69) | def validate_input_schema(cls, v):
  class AnthropicToolChoice (line 77) | class AnthropicToolChoice(BaseModel):
  class AnthropicCountTokensRequest (line 84) | class AnthropicCountTokensRequest(BaseModel):
  class AnthropicCountTokensResponse (line 94) | class AnthropicCountTokensResponse(BaseModel):
  class AnthropicMessagesRequest (line 100) | class AnthropicMessagesRequest(BaseModel):
    method validate_model (line 118) | def validate_model(cls, v):
    method validate_max_tokens (line 125) | def validate_max_tokens(cls, v):
  class AnthropicDelta (line 131) | class AnthropicDelta(BaseModel):
  class AnthropicStreamEvent (line 145) | class AnthropicStreamEvent(BaseModel):
  class AnthropicMessagesResponse (line 166) | class AnthropicMessagesResponse(BaseModel):

FILE: python/sglang/srt/entrypoints/anthropic/serving.py
  function _wrap_sse_event (line 53) | def _wrap_sse_event(data: str, event_type: str) -> str:
  class AnthropicServing (line 58) | class AnthropicServing:
    method __init__ (line 65) | def __init__(self, openai_serving_chat: OpenAIServingChat):
    method handle_messages (line 68) | async def handle_messages(
    method _convert_to_chat_completion_request (line 89) | def _convert_to_chat_completion_request(
    method _handle_non_streaming (line 309) | async def _handle_non_streaming(
    method _handle_streaming (line 365) | async def _handle_streaming(
    method _generate_anthropic_stream (line 415) | async def _generate_anthropic_stream(
    method _convert_response (line 640) | def _convert_response(
    method _error_response (line 692) | def _error_response(
    method handle_count_tokens (line 707) | async def handle_count_tokens(

FILE: python/sglang/srt/entrypoints/context.py
  class ConversationContext (line 26) | class ConversationContext(ABC):
    method append_output (line 29) | def append_output(self, output) -> None:
    method call_tool (line 33) | async def call_tool(self) -> list[Message]:
    method need_builtin_tool_call (line 37) | def need_builtin_tool_call(self) -> bool:
    method render_for_completion (line 41) | def render_for_completion(self) -> list[int]:
  class SimpleContext (line 45) | class SimpleContext(ConversationContext):
    method __init__ (line 47) | def __init__(self):
    method append_output (line 50) | def append_output(self, output) -> None:
    method need_builtin_tool_call (line 53) | def need_builtin_tool_call(self) -> bool:
    method call_tool (line 56) | async def call_tool(self) -> list[Message]:
    method render_for_completion (line 59) | def render_for_completion(self) -> list[int]:
  class HarmonyContext (line 63) | class HarmonyContext(ConversationContext):
    method __init__ (line 65) | def __init__(
    method append_output (line 83) | def append_output(self, output) -> None:
    method messages (line 107) | def messages(self) -> list:
    method need_builtin_tool_call (line 110) | def need_builtin_tool_call(self) -> bool:
    method call_tool (line 119) | async def call_tool(self) -> list[Message]:
    method render_for_completion (line 135) | def render_for_completion(self) -> list[int]:
    method call_search_tool (line 138) | async def call_search_tool(
    method call_python_tool (line 151) | async def call_python_tool(
  class StreamingHarmonyContext (line 175) | class StreamingHarmonyContext(HarmonyContext):
    method __init__ (line 177) | def __init__(self, *args, **kwargs):
    method messages (line 187) | def messages(self) -> list:
    method append_output (line 190) | def append_output(self, output) -> None:
    method is_expecting_start (line 229) | def is_expecting_start(self) -> bool:
    method is_assistant_action_turn (line 232) | def is_assistant_action_turn(self) -> bool:
    method render_for_completion (line 235) | def render_for_completion(self) -> list[int]:

FILE: python/sglang/srt/entrypoints/engine.py
  class SchedulerInitResult (line 110) | class SchedulerInitResult:
  function init_tokenizer_manager (line 118) | def init_tokenizer_manager(
  class Engine (line 139) | class Engine(EngineBase):
    method __init__ (line 160) | def __init__(self, **kwargs):
    method _resolve_routed_dp_rank (line 229) | def _resolve_routed_dp_rank(
    method generate (line 260) | def generate(
    method async_generate (line 350) | async def async_generate(
    method encode (line 430) | def encode(
    method async_encode (line 459) | async def async_encode(
    method rerank (line 489) | def rerank(
    method _launch_scheduler_processes (line 503) | def _launch_scheduler_processes(
    method _launch_subprocesses (line 602) | def _launch_subprocesses(
    method shutdown (line 697) | def shutdown(self):
    method __enter__ (line 701) | def __enter__(self):
    method __exit__ (line 704) | def __exit__(self, exc_type, exc_value, traceback):
    method flush_cache (line 708) | def flush_cache(self):
    method open_session (line 711) | def open_session(
    method close_session (line 741) | def close_session(self, session_id: str) -> None:
    method start_profile (line 750) | def start_profile(self, **kwargs):
    method stop_profile (line 753) | def stop_profile(self):
    method start_expert_distribution_record (line 756) | def start_expert_distribution_record(self):
    method stop_expert_distribution_record (line 761) | def stop_expert_distribution_record(self):
    method dump_expert_distribution_record (line 766) | def dump_expert_distribution_record(self):
    method get_server_info (line 771) | def get_server_info(self):
    method init_weights_update_group (line 782) | def init_weights_update_group(
    method destroy_weights_update_group (line 804) | def destroy_weights_update_group(
    method update_weights_from_distributed (line 816) | def update_weights_from_distributed(
    method update_weights_from_tensor (line 838) | def update_weights_from_tensor(
    method update_weights_from_disk (line 862) | def update_weights_from_disk(
    method update_weights_from_ipc (line 882) | def update_weights_from_ipc(
    method get_weights_by_name (line 896) | def get_weights_by_name(self, name: str, truncate_size: int = 100):
    method load_lora_adapter_from_tensors (line 903) | def load_lora_adapter_from_tensors(
    method load_lora_adapter (line 926) | def load_lora_adapter(self, lora_name: str, lora_path: str, pinned: bo...
    method unload_lora_adapter (line 939) | def unload_lora_adapter(self, lora_name: str):
    method async_load_lora_adapter (line 948) | async def async_load_lora_adapter(
    method async_unload_lora_adapter (line 965) | async def async_unload_lora_adapter(self, lora_name: str):
    method release_memory_occupation (line 976) | def release_memory_occupation(self, tags: Optional[List[str]] = None):
    method resume_memory_occupation (line 982) | def resume_memory_occupation(self, tags: Optional[List[str]] = None):
    method freeze_gc (line 988) | def freeze_gc(self):
    method collective_rpc (line 1007) | def collective_rpc(self, method: str, **kwargs):
    method save_remote_model (line 1014) | def save_remote_model(self, **kwargs):
    method save_sharded_model (line 1017) | def save_sharded_model(self, **kwargs):
    method score (line 1020) | def score(
    method async_score (line 1071) | async def async_score(
  function _set_envs_and_config (line 1094) | def _set_envs_and_config(server_args: ServerArgs):
  function _wait_for_scheduler_ready (line 1182) | def _wait_for_scheduler_ready(
  function _calculate_rank_ranges (line 1207) | def _calculate_rank_ranges(
  function _compute_parallelism_ranks (line 1242) | def _compute_parallelism_ranks(

FILE: python/sglang/srt/entrypoints/grpc_server.py
  function serve_grpc (line 6) | async def serve_grpc(server_args, model_info=None):

FILE: python/sglang/srt/entrypoints/harmony_utils.py
  function get_encoding (line 54) | def get_encoding():
  function get_system_message (line 61) | def get_system_message(
  function get_developer_message (line 86) | def get_developer_message(
  function get_user_message (line 118) | def get_user_message(content: str) -> Message:
  function parse_response_input (line 122) | def parse_response_input(
  function parse_response_output (line 174) | def parse_response_output(output: ResponseOutputItem) -> Message:
  function parse_chat_input (line 190) | def parse_chat_input(chat_msg) -> Message:
  function render_for_completion (line 202) | def render_for_completion(messages: list[Message]) -> list[int]:
  function get_stop_tokens_for_assistant_actions (line 210) | def get_stop_tokens_for_assistant_actions() -> list[int]:
  function get_streamable_parser_for_assistant (line 214) | def get_streamable_parser_for_assistant() -> StreamableParser:
  function parse_output_message (line 218) | def parse_output_message(message: Message):
  function parse_remaining_state (line 324) | def parse_remaining_state(parser: StreamableParser):
  function parse_output_into_messages (line 364) | def parse_output_into_messages(token_ids: Iterable[int]):

FILE: python/sglang/srt/entrypoints/http_server.py
  class _GlobalState (line 193) | class _GlobalState:
  function set_global_state (line 211) | def set_global_state(global_state: _GlobalState):
  function get_global_state (line 216) | def get_global_state() -> _GlobalState:
  function init_multi_tokenizer (line 220) | async def init_multi_tokenizer() -> ServerArgs:
  function lifespan (line 272) | async def lifespan(fast_api_app: FastAPI):
  function validation_exception_handler (line 405) | async def validation_exception_handler(request: Request, exc: HTTPExcept...
  function validation_exception_handler (line 434) | async def validation_exception_handler(request: Request, exc: RequestVal...
  function validate_json_request (line 469) | async def validate_json_request(raw_request: Request):
  function health_generate (line 490) | async def health_generate(request: Request) -> Response:
  function get_model_info (line 568) | async def get_model_info():
  function model_info (line 578) | async def model_info():
  function weight_version (line 599) | async def weight_version():
  function get_server_info (line 608) | async def get_server_info():
  function server_info (line 618) | async def server_info():
  function get_load (line 638) | async def get_load():
  function set_internal_state (line 651) | async def set_internal_state(obj: SetInternalStateReq, request: Request):
  function _dumper_control_handler (line 661) | async def _dumper_control_handler(method: str, request: Request):
  function generate_request (line 678) | async def generate_request(obj: GenerateReqInput, request: Request):
  function encode_request (line 711) | async def encode_request(obj: EmbeddingReqInput, request: Request):
  function classify_request (line 723) | async def classify_request(obj: EmbeddingReqInput, request: Request):
  function flush_cache (line 736) | async def flush_cache():
  function clear_hicache_storage_backend_deprecated (line 748) | async def clear_hicache_storage_backend_deprecated():
  function clear_hicache_storage_backend (line 764) | async def clear_hicache_storage_backend():
  function attach_hicache_storage_backend (line 784) | async def attach_hicache_storage_backend(obj: AttachHiCacheStorageReqInp...
  function detach_hicache_storage_backend (line 816) | async def detach_hicache_storage_backend():
  function hicache_storage_backend_status (line 843) | async def hicache_storage_backend_status():
  function pin_prefix (line 858) | async def pin_prefix(obj: PinPrefixReqInput):
  function start_profile_async (line 877) | async def start_profile_async(obj: Optional[ProfileReqInput] = None):
  function stop_profile_async (line 902) | async def stop_profile_async():
  function set_trace_level (line 912) | def set_trace_level(level: int = Query(..., ge=0)):
  function freeze_gc_async (line 923) | async def freeze_gc_async():
  function start_expert_distribution_record_async (line 936) | async def start_expert_distribution_record_async():
  function stop_expert_distribution_record_async (line 947) | async def stop_expert_distribution_record_async():
  function dump_expert_distribution_record_async (line 958) | async def dump_expert_distribution_record_async():
  function update_weights_from_disk (line 969) | async def update_weights_from_disk(obj: UpdateWeightFromDiskReqInput, re...
  function init_weights_send_group_for_remote_instance (line 994) | async def init_weights_send_group_for_remote_instance(
  function send_weights_to_remote_instance (line 1011) | async def send_weights_to_remote_instance(
  function get_remote_instance_transfer_engine_info (line 1028) | async def get_remote_instance_transfer_engine_info(rank: int = None):
  function init_weights_update_group (line 1053) | async def init_weights_update_group(
  function destroy_weights_update_group (line 1069) | async def destroy_weights_update_group(
  function update_weights_from_tensor (line 1084) | async def update_weights_from_tensor(
  function update_weights_from_distributed (line 1106) | async def update_weights_from_distributed(
  function update_weights_from_ipc (line 1125) | async def update_weights_from_ipc(obj: UpdateWeightsFromIPCReqInput, req...
  function update_weight_version (line 1142) | async def update_weight_version(obj: UpdateWeightVersionReqInput, reques...
  function get_weights_by_name (line 1173) | async def get_weights_by_name(obj: GetWeightsByNameReqInput, request: Re...
  function release_memory_occupation (line 1187) | async def release_memory_occupation(
  function resume_memory_occupation (line 1199) | async def resume_memory_occupation(
  function check_weights (line 1211) | async def check_weights(obj: CheckWeightsReqInput, request: Request):
  function slow_down (line 1221) | async def slow_down(obj: SlowDownReqInput, request: Request):
  function load_lora_adapter (line 1235) | async def load_lora_adapter(obj: LoadLoRAAdapterReqInput, request: Reque...
  function load_lora_adapter_from_tensors (line 1252) | async def load_lora_adapter_from_tensors(
  function unload_lora_adapter (line 1268) | async def unload_lora_adapter(obj: UnloadLoRAAdapterReqInput, request: R...
  function open_session (line 1285) | async def open_session(obj: OpenSessionReqInput, request: Request):
  function close_session (line 1299) | async def close_session(obj: CloseSessionReqInput, request: Request):
  function configure_logging (line 1310) | async def configure_logging(obj: ConfigureLoggingReq, request: Request):
  function abort_request (line 1318) | async def abort_request(obj: AbortReq, request: Request):
  function parse_function_call_request (line 1330) | async def parse_function_call_request(obj: ParseFunctionCallReq, request...
  function separate_reasoning_request (line 1352) | async def separate_reasoning_request(obj: SeparateReasoningReqInput, req...
  function pause_generation (line 1373) | async def pause_generation(obj: PauseGenerationReqInput, request: Request):
  function continue_generation (line 1384) | async def continue_generation(obj: ContinueGenerationReqInput, request: ...
  function openai_v1_completions (line 1397) | async def openai_v1_completions(request: CompletionRequest, raw_request:...
  function openai_v1_chat_completions (line 1405) | async def openai_v1_chat_completions(
  function openai_v1_embeddings (line 1419) | async def openai_v1_embeddings(request: EmbeddingRequest, raw_request: R...
  function openai_v1_classify (line 1431) | async def openai_v1_classify(request: ClassifyRequest, raw_request: Requ...
  function openai_v1_tokenize (line 1449) | async def openai_v1_tokenize(request: TokenizeRequest, raw_request: Requ...
  function openai_v1_detokenize (line 1467) | async def openai_v1_detokenize(request: DetokenizeRequest, raw_request: ...
  function openai_v1_audio_transcriptions (line 1475) | async def openai_v1_audio_transcriptions(
  function available_models (line 1507) | async def available_models():
  function retrieve_model (line 1539) | async def retrieve_model(model: str):
  function v1_score_request (line 1564) | async def v1_score_request(request: ScoringRequest, raw_request: Request):
  function v1_responses_request (line 1572) | async def v1_responses_request(request: dict, raw_request: Request):
  function v1_retrieve_responses (line 1592) | async def v1_retrieve_responses(response_id: str, raw_request: Request):
  function v1_cancel_responses (line 1600) | async def v1_cancel_responses(response_id: str, raw_request: Request):
  function v1_rerank_request (line 1610) | async def v1_rerank_request(request: V1RerankReqInput, raw_request: Requ...
  function ollama_root (line 1624) | async def ollama_root():
  function sglang_root (line 1632) | async def sglang_root():
  function ollama_chat (line 1638) | async def ollama_chat(request: OllamaChatRequest, raw_request: Request):
  function ollama_generate (line 1644) | async def ollama_generate(request: OllamaGenerateRequest, raw_request: R...
  function ollama_tags (line 1652) | async def ollama_tags(raw_request: Request):
  function ollama_show (line 1658) | async def ollama_show(request: OllamaShowRequest, raw_request: Request):
  function anthropic_v1_messages (line 1667) | async def anthropic_v1_messages(
  function anthropic_v1_count_tokens (line 1677) | async def anthropic_v1_count_tokens(
  function sagemaker_health (line 1688) | async def sagemaker_health() -> Response:
  function sagemaker_chat_completions (line 1694) | async def sagemaker_chat_completions(
  function vertex_generate (line 1705) | async def vertex_generate(vertex_req: VertexGenerateReqInput, raw_reques...
  function _create_error_response (line 1731) | def _create_error_response(e):
  function _admin_api_key_missing_response (line 1744) | def _admin_api_key_missing_response(
  function _execute_server_warmup (line 1762) | def _execute_server_warmup(server_args: ServerArgs):
  function _wait_and_warmup (line 1921) | def _wait_and_warmup(
  function _wait_weights_ready (line 1949) | def _wait_weights_ready():
  function _setup_and_run_http_server (line 1970) | def _setup_and_run_http_server(
  function launch_server (line 2144) | def launch_server(

FILE: python/sglang/srt/entrypoints/http_server_engine.py
  function launch_server_process (line 14) | def launch_server_process(server_args: ServerArgs) -> multiprocessing.Pr...
  class HttpServerEngineAdapter (line 49) | class HttpServerEngineAdapter(EngineBase):
    method __init__ (line 56) | def __init__(self, **kwargs):
    method _make_request (line 63) | def _make_request(self, endpoint: str, payload: Optional[dict] = None):
    method update_weights_from_tensor (line 78) | def update_weights_from_tensor(
    method shutdown (line 102) | def shutdown(self):
    method generate (line 105) | def generate(
    method release_memory_occupation (line 137) | def release_memory_occupation(self):
    method resume_memory_occupation (line 140) | def resume_memory_occupation(self):
    method flush_cache (line 143) | def flush_cache(self):

FILE: python/sglang/srt/entrypoints/ollama/protocol.py
  class OllamaMessage (line 13) | class OllamaMessage(BaseModel):
  class OllamaChatRequest (line 21) | class OllamaChatRequest(BaseModel):
  class OllamaChatResponse (line 33) | class OllamaChatResponse(BaseModel):
  class OllamaChatStreamResponse (line 49) | class OllamaChatStreamResponse(BaseModel):
  class OllamaGenerateRequest (line 59) | class OllamaGenerateRequest(BaseModel):
  class OllamaGenerateResponse (line 77) | class OllamaGenerateResponse(BaseModel):
  class OllamaGenerateStreamResponse (line 94) | class OllamaGenerateStreamResponse(BaseModel):
  class OllamaModelInfo (line 104) | class OllamaModelInfo(BaseModel):
  class OllamaTagsResponse (line 115) | class OllamaTagsResponse(BaseModel):
  class OllamaShowRequest (line 121) | class OllamaShowRequest(BaseModel):
  class OllamaShowResponse (line 127) | class OllamaShowResponse(BaseModel):

FILE: python/sglang/srt/entrypoints/ollama/serving.py
  class OllamaServing (line 31) | class OllamaServing:
    method __init__ (line 34) | def __init__(self, tokenizer_manager):
    method _get_timestamp (line 37) | def _get_timestamp(self) -> str:
    method _convert_options_to_sampling_params (line 41) | def _convert_options_to_sampling_params(self, options: dict = None) ->...
    method handle_chat (line 68) | async def handle_chat(
    method _generate_chat_response (line 105) | async def _generate_chat_response(
    method _stream_chat_response (line 132) | async def _stream_chat_response(
    method handle_generate (line 173) | async def handle_generate(
    method _generate_generate_response (line 223) | async def _generate_generate_response(
    method _stream_generate_response (line 249) | async def _stream_generate_response(
    method get_tags (line 289) | def get_tags(self) -> OllamaTagsResponse:
    method get_show (line 310) | def get_show(self, model: str) -> OllamaShowResponse:

FILE: python/sglang/srt/entrypoints/ollama/smart_router.py
  class SmartRouter (line 23) | class SmartRouter:
    method __init__ (line 39) | def __init__(
    method _classify_with_llm (line 69) | def _classify_with_llm(
    method should_use_remote (line 104) | def should_use_remote(self, prompt: str, verbose: bool = False) -> tup...
    method chat (line 117) | def chat(
    method chat_stream (line 197) | def chat_stream(
  function main (line 244) | def main():

FILE: python/sglang/srt/entrypoints/openai/encoding_dsv32.py
  class DS32EncodingError (line 8) | class DS32EncodingError(Exception):
  function to_json (line 62) | def to_json(value: Any) -> str:
  function tools_from_openai_format (line 69) | def tools_from_openai_format(tools):
  function tool_calls_from_openai_format (line 73) | def tool_calls_from_openai_format(tool_calls):
  function tool_calls_to_openai_format (line 83) | def tool_calls_to_openai_format(tool_calls):
  function encode_arguments_to_dsml (line 96) | def encode_arguments_to_dsml(tool_call: Dict[str, str]) -> str:
  function decode_dsml_to_arguments (line 115) | def decode_dsml_to_arguments(
  function render_tools (line 133) | def render_tools(tools: List[Dict[str, Union[str, Dict[str, Any]]]]) -> ...
  function find_last_user_index (line 144) | def find_last_user_index(messages: List[Dict[str, Any]]) -> int:
  function render_message (line 153) | def render_message(
  function drop_thinking_messages (line 289) | def drop_thinking_messages(
  function encode_messages (line 310) | def encode_messages(
  function _read_until_stop (line 333) | def _read_until_stop(
  function parse_tool_calls (line 353) | def parse_tool_calls(index: int, text: str):
  function parse_message_from_completion_text (line 414) | def parse_message_from_completion_text(text: str, thinking_mode: str):

FILE: python/sglang/srt/entrypoints/openai/protocol.py
  class ModelCard (line 53) | class ModelCard(BaseModel):
  class ModelList (line 65) | class ModelList(BaseModel):
  class ErrorResponse (line 72) | class ErrorResponse(BaseModel):
  class LogProbs (line 80) | class LogProbs(BaseModel):
  class TopLogprob (line 87) | class TopLogprob(BaseModel):
  class ChatCompletionTokenLogprob (line 93) | class ChatCompletionTokenLogprob(BaseModel):
  class ChoiceLogprobs (line 100) | class ChoiceLogprobs(BaseModel):
  class CachedTokensDetails (line 105) | class CachedTokensDetails(BaseModel):
    method _serialize (line 115) | def _serialize(self, handler):
  class PromptTokensDetails (line 125) | class PromptTokensDetails(BaseModel):
  class UsageInfo (line 131) | class UsageInfo(BaseModel):
  class StreamOptions (line 140) | class StreamOptions(BaseModel):
  class JsonSchemaResponseFormat (line 145) | class JsonSchemaResponseFormat(BaseModel):
  class ResponseFormat (line 153) | class ResponseFormat(BaseModel):
  class StructuresResponseFormat (line 158) | class StructuresResponseFormat(BaseModel):
  class LegacyStructuralTagResponseFormat (line 165) | class LegacyStructuralTagResponseFormat(BaseModel):
  class FileRequest (line 181) | class FileRequest(BaseModel):
  class FileResponse (line 189) | class FileResponse(BaseModel):
  class FileDeleteResponse (line 198) | class FileDeleteResponse(BaseModel):
  class BatchRequest (line 204) | class BatchRequest(BaseModel):
  class BatchResponse (line 213) | class BatchResponse(BaseModel):
  function _migrate_deprecated_dp_rank (line 236) | def _migrate_deprecated_dp_rank(values: dict) -> dict:
  class CompletionRequest (line 250) | class CompletionRequest(BaseModel):
    method _handle_deprecated_dp_rank (line 323) | def _handle_deprecated_dp_rank(cls, values):
    method validate_max_tokens_positive (line 328) | def validate_max_tokens_positive(cls, v):
  class SglExt (line 334) | class SglExt(BaseModel):
    method _serialize (line 345) | def _serialize(self, handler):
  class CompletionResponseChoice (line 351) | class CompletionResponseChoice(BaseModel):
    method _serialize (line 360) | def _serialize(self, handler):
  class CompletionResponse (line 367) | class CompletionResponse(BaseModel):
    method _serialize (line 378) | def _serialize(self, handler):
  class CompletionResponseStreamChoice (line 385) | class CompletionResponseStreamChoice(BaseModel):
    method _serialize (line 394) | def _serialize(self, handler):
  class CompletionStreamResponse (line 401) | class CompletionStreamResponse(BaseModel):
    method _serialize (line 411) | def _serialize(self, handler):
  class ChatCompletionMessageContentTextPart (line 418) | class ChatCompletionMessageContentTextPart(BaseModel):
  class ChatCompletionMessageContentImageURL (line 423) | class ChatCompletionMessageContentImageURL(BaseModel):
  class ChatCompletionMessageContentVideoURL (line 430) | class ChatCompletionMessageContentVideoURL(BaseModel):
  class ChatCompletionMessageContentAudioURL (line 436) | class ChatCompletionMessageContentAudioURL(BaseModel):
  class ChatCompletionMessageContentImagePart (line 440) | class ChatCompletionMessageContentImagePart(BaseModel):
  class ChatCompletionMessageContentVideoPart (line 446) | class ChatCompletionMessageContentVideoPart(BaseModel):
  class ChatCompletionMessageContentAudioPart (line 451) | class ChatCompletionMessageContentAudioPart(BaseModel):
  class FunctionResponse (line 473) | class FunctionResponse(BaseModel):
  class ToolCall (line 480) | class ToolCall(BaseModel):
  class ChatCompletionMessageGenericParam (line 489) | class ChatCompletionMessageGenericParam(BaseModel):
    method _normalize_role (line 502) | def _normalize_role(cls, v):
  class ChatCompletionMessageUserParam (line 513) | class ChatCompletionMessageUserParam(BaseModel):
  class Function (line 523) | class Function(BaseModel):
  class Tool (line 532) | class Tool(BaseModel):
  class ToolChoiceFuncName (line 539) | class ToolChoiceFuncName(BaseModel):
  class ToolChoice (line 545) | class ToolChoice(BaseModel):
  class ChatCompletionRequest (line 552) | class ChatCompletionRequest(BaseModel):
    method _handle_deprecated_dp_rank (line 659) | def _handle_deprecated_dp_rank(cls, values):
    method set_tool_choice_default (line 664) | def set_tool_choice_default(cls, values):
    method normalize_reasoning_inputs (line 674) | def normalize_reasoning_inputs(cls, values: Dict):
    method set_json_schema (line 711) | def set_json_schema(cls, values):
    method to_sampling_params (line 741) | def to_sampling_params(
  class ChatMessage (line 829) | class ChatMessage(BaseModel):
  class ChatCompletionResponseChoice (line 836) | class ChatCompletionResponseChoice(BaseModel):
    method _serialize (line 849) | def _serialize(self, handler):
  class ChatCompletionResponse (line 856) | class ChatCompletionResponse(BaseModel):
    method _serialize (line 867) | def _serialize(self, handler):
  class DeltaMessage (line 874) | class DeltaMessage(BaseModel):
    method _serialize (line 882) | def _serialize(self, handler):
  class ChatCompletionResponseStreamChoice (line 889) | class ChatCompletionResponseStreamChoice(BaseModel):
  class ChatCompletionStreamResponse (line 901) | class ChatCompletionStreamResponse(BaseModel):
    method _serialize (line 911) | def _serialize(self, handler):
  class MultimodalEmbeddingInput (line 918) | class MultimodalEmbeddingInput(BaseModel):
  class EmbeddingRequest (line 929) | class EmbeddingRequest(BaseModel):
  class EmbeddingObject (line 946) | class EmbeddingObject(BaseModel):
  class ClassifyRequest (line 955) | class ClassifyRequest(BaseModel):
  class ClassifyData (line 967) | class ClassifyData(BaseModel):
  class ClassifyResponse (line 974) | class ClassifyResponse(BaseModel):
  class EmbeddingResponse (line 983) | class EmbeddingResponse(BaseModel):
  class ScoringRequest (line 990) | class ScoringRequest(BaseModel):
  class ScoringResponse (line 1005) | class ScoringResponse(BaseModel):
  class V1RerankReqInput (line 1014) | class V1RerankReqInput(BaseModel):
    method validate_top_n (line 1041) | def validate_top_n(cls, v):
    method is_multimodal (line 1046) | def is_multimodal(self) -> bool:
  class RerankResponse (line 1056) | class RerankResponse(BaseModel):
    method _serialize (line 1063) | def _serialize(self, handler):
  class TokenizeRequest (line 1071) | class TokenizeRequest(BaseModel):
  class TokenizeResponse (line 1082) | class TokenizeResponse(BaseModel):
  class DetokenizeRequest (line 1090) | class DetokenizeRequest(BaseModel):
  class DetokenizeResponse (line 1101) | class DetokenizeResponse(BaseModel):
  class ResponseReasoningParam (line 1120) | class ResponseReasoningParam(BaseModel):
  class ResponseTool (line 1129) | class ResponseTool(BaseModel):
  class ResponsesRequest (line 1144) | class ResponsesRequest(BaseModel):
    method to_sampling_params (line 1212) | def to_sampling_params(
  class PromptTokenUsageInfo (line 1259) | class PromptTokenUsageInfo(BaseModel):
  class ResponsesResponse (line 1265) | class ResponsesResponse(BaseModel):
    method from_request (line 1303) | def from_request(
  class RequestResponseMetadata (line 1381) | class RequestResponseMetadata(BaseModel):
  class MessageProcessingResult (line 1389) | class MessageProcessingResult:
  class ToolCallProcessingResult (line 1416) | class ToolCallProcessingResult(NamedTuple):
  class ResponseReasoningTextContent (line 1426) | class ResponseReasoningTextContent(BaseModel):
  class TranscriptionRequest (line 1439) | class TranscriptionRequest(BaseModel):
  class TranscriptionUsage (line 1452) | class TranscriptionUsage(BaseModel):
  class TranscriptionResponse (line 1459) | class TranscriptionResponse(BaseModel):
  class TranscriptionStreamChoice (line 1466) | class TranscriptionStreamChoice(BaseModel):
  class TranscriptionStreamResponse (line 1473) | class TranscriptionStreamResponse(BaseModel):

FILE: python/sglang/srt/entrypoints/openai/serving_base.py
  class OpenAIServingBase (line 26) | class OpenAIServingBase(ABC):
    method __init__ (line 29) | def __init__(self, tokenizer_manager: TokenizerManager):
    method _parse_model_parameter (line 40) | def _parse_model_parameter(self, model: str) -> Tuple[str, Optional[st...
    method _resolve_lora_path (line 55) | def _resolve_lora_path(
    method handle_request (line 73) | async def handle_request(
    method _request_id_prefix (line 136) | def _request_id_prefix(self) -> str:
    method _generate_request_id_base (line 140) | def _generate_request_id_base(self, request: OpenAIServingRequest) -> ...
    method _compute_extra_key (line 151) | def _compute_extra_key(self, request: OpenAIServingRequest) -> Optiona...
    method _convert_to_internal_request (line 165) | def _convert_to_internal_request(
    method _handle_streaming_request (line 173) | async def _handle_streaming_request(
    method _handle_non_streaming_request (line 189) | async def _handle_non_streaming_request(
    method _validate_request (line 205) | def _validate_request(self, _: OpenAIServingRequest) -> Optional[str]:
    method create_error_response (line 209) | def create_error_response(
    method create_streaming_error_response (line 227) | def create_streaming_error_response(
    method extract_custom_labels (line 243) | def extract_custom_labels(self, raw_request):
    method extract_routing_key (line 272) | def extract_routing_key(self, raw_request):
    method extract_routed_dp_rank_from_header (line 277) | def extract_routed_dp_rank_from_header(

FILE: python/sglang/srt/entrypoints/openai/serving_chat.py
  function _extract_max_dynamic_patch (line 62) | def _extract_max_dynamic_patch(request: ChatCompletionRequest):
  class OpenAIServingChat (line 88) | class OpenAIServingChat(OpenAIServingBase):
    method __init__ (line 93) | def __init__(
    method _handle_last_assistant_message (line 125) | def _handle_last_assistant_message(
    method _append_assistant_prefix_to_prompt_ids (line 163) | def _append_assistant_prefix_to_prompt_ids(
    method _use_dpsk_v32_encoding (line 181) | def _use_dpsk_v32_encoding(self) -> bool:
    method _request_id_prefix (line 190) | def _request_id_prefix(self) -> str:
    method _validate_request (line 193) | def _validate_request(self, request: ChatCompletionRequest) -> Optiona...
    method _convert_to_internal_request (line 241) | def _convert_to_internal_request(
    method _process_messages (line 328) | def _process_messages(
    method _apply_jinja_template (line 375) | def _apply_jinja_template(
    method _apply_conversation_template (line 535) | def _apply_conversation_template(
    method _handle_streaming_request (line 600) | async def _handle_streaming_request(
    method _generate_chat_stream (line 613) | async def _generate_chat_stream(
    method _handle_non_streaming_request (line 890) | async def _handle_non_streaming_request(
    method _build_chat_response (line 915) | def _build_chat_response(
    method _process_logprobs_tokens (line 1025) | def _process_logprobs_tokens(
    method _process_response_logprobs (line 1067) | def _process_response_logprobs(self, ret_item: Dict[str, Any]) -> Choi...
    method _process_tool_call_id (line 1077) | def _process_tool_call_id(
    method _process_tool_calls (line 1097) | def _process_tool_calls(
    method _process_streaming_logprobs (line 1176) | def _process_streaming_logprobs(
    method _process_reasoning_stream (line 1192) | def _process_reasoning_stream(
    method _get_history_tool_calls_cnt (line 1215) | def _get_history_tool_calls_cnt(self, request: ChatCompletionRequest) ...
    method _patch_mistral_skip_special_tokens (line 1235) | def _patch_mistral_skip_special_tokens(
    method _get_reasoning_from_request (line 1247) | def _get_reasoning_from_request(self, request: ChatCompletionRequest) ...
    method _process_tool_call_stream (line 1280) | async def _process_tool_call_stream(
    method _check_for_unstreamed_tool_args (line 1386) | def _check_for_unstreamed_tool_args(

FILE: python/sglang/srt/entrypoints/openai/serving_classify.py
  class OpenAIServingClassify (line 28) | class OpenAIServingClassify(OpenAIServingBase):
    method __init__ (line 31) | def __init__(
    method _request_id_prefix (line 47) | def _request_id_prefix(self) -> str:
    method _convert_to_internal_request (line 50) | def _convert_to_internal_request(
    method _validate_request (line 79) | def _validate_request(self, request: ClassifyRequest) -> Optional[str]:
    method _get_id2label_mapping (line 111) | def _get_id2label_mapping(self) -> Optional[Dict[int, str]]:
    method _handle_non_streaming_request (line 129) | async def _handle_non_streaming_request(
    method _build_classify_response (line 151) | def _build_classify_response(self, ret: List[Dict[str, Any]]) -> Class...

FILE: python/sglang/srt/entrypoints/openai/serving_completions.py
  class OpenAIServingCompletion (line 41) | class OpenAIServingCompletion(OpenAIServingBase):
    method __init__ (line 44) | def __init__(
    method _request_id_prefix (line 52) | def _request_id_prefix(self) -> str:
    method _validate_request (line 55) | def _validate_request(self, request: CompletionRequest) -> Optional[str]:
    method _convert_to_internal_request (line 63) | def _convert_to_internal_request(
    method _build_sampling_params (line 134) | def _build_sampling_params(self, request: CompletionRequest) -> Dict[s...
    method _handle_streaming_request (line 178) | async def _handle_streaming_request(
    method _generate_completion_stream (line 191) | async def _generate_completion_stream(
    method _handle_non_streaming_request (line 391) | async def _handle_non_streaming_request(
    method _build_completion_response (line 417) | def _build_completion_response(
    method _get_echo_text (line 510) | def _get_echo_text(self, request: CompletionRequest, index: int) -> str:
    method _prepare_echo_prompts (line 534) | def _prepare_echo_prompts(self, request: CompletionRequest) -> List[str]:

FILE: python/sglang/srt/entrypoints/openai/serving_embedding.py
  class OpenAIServingEmbedding (line 25) | class OpenAIServingEmbedding(OpenAIServingBase):
    method __init__ (line 28) | def __init__(
    method _request_id_prefix (line 36) | def _request_id_prefix(self) -> str:
    method _validate_request (line 39) | def _validate_request(self, request: EmbeddingRequest) -> Optional[str]:
    method _convert_to_internal_request (line 74) | def _convert_to_internal_request(
    method _handle_non_streaming_request (line 143) | async def _handle_non_streaming_request(
    method _build_embedding_response (line 163) | def _build_embedding_response(self, ret: List[Dict[str, Any]]) -> Embe...

FILE: python/sglang/srt/entrypoints/openai/serving_rerank.py
  function _get_yes_no_token_ids (line 22) | def _get_yes_no_token_ids(tokenizer) -> tuple[int, int]:
  function _is_qwen3_reranker_template (line 50) | def _is_qwen3_reranker_template(chat_template: str) -> bool:
  function _is_qwen3_vl_reranker_template (line 60) | def _is_qwen3_vl_reranker_template(chat_template: str) -> bool:
  function _is_qwen3_vl_model (line 78) | def _is_qwen3_vl_model(model_path: str) -> bool:
  function _detect_rerank_backend (line 86) | def _detect_rerank_backend(
  function _qwen3_rerank_score (line 112) | def _qwen3_rerank_score(p_yes: float, p_no: float) -> float:
  function _get_jinja_env (line 119) | def _get_jinja_env():
  function _render_jinja_chat_template (line 135) | def _render_jinja_chat_template(
  function _render_vl_jinja_template (line 165) | def _render_vl_jinja_template(
  function _extract_text_from_content (line 189) | def _extract_text_from_content(content: RerankContent) -> str:
  class OpenAIServingRerank (line 202) | class OpenAIServingRerank(OpenAIServingBase):
    method __init__ (line 205) | def __init__(self, tokenizer_manager, template_manager=None):
    method _request_id_prefix (line 219) | def _request_id_prefix(self) -> str:
    method _validate_request (line 222) | def _validate_request(self, request: V1RerankReqInput) -> Optional[str]:
    method _convert_to_internal_request (line 242) | def _convert_to_internal_request(
    method _handle_non_streaming_request (line 278) | async def _handle_non_streaming_request(
    method _handle_rerank_paths (line 316) | async def _handle_rerank_paths(
    method _handle_text_reranker_request (line 351) | async def _handle_text_reranker_request(
    method _handle_vl_reranker_request (line 395) | async def _handle_vl_reranker_request(
    method _build_vl_reranker_content (line 461) | def _build_vl_reranker_content(
    method _content_to_template_list (line 483) | def _content_to_template_list(
    method _extract_score_from_logprobs (line 532) | def _extract_score_from_logprobs(self, ret: Dict[str, Any]) -> float:
    method _build_rerank_response (line 557) | def _build_rerank_response(

FILE: python/sglang/srt/entrypoints/openai/serving_responses.py
  class OpenAIServingResponses (line 70) | class OpenAIServingResponses(OpenAIServingChat):
    method __init__ (line 73) | def __init__(
    method create_error_response (line 129) | def create_error_response(
    method create_streaming_error_response (line 144) | def create_streaming_error_response(
    method _request_id_prefix (line 161) | def _request_id_prefix(self) -> str:
    method create_responses (line 164) | async def create_responses(
    method _make_request (line 373) | async def _make_request(
    method _make_request_with_harmony (line 417) | def _make_request_with_harmony(
    method responses_full_generator (line 431) | async def responses_full_generator(
    method _make_response_output_items (line 527) | def _make_response_output_items(
    method _make_response_output_items_with_harmony (line 577) | def _make_response_output_items_with_harmony(
    method _construct_input_messages (line 591) | def _construct_input_messages(
    method _construct_input_messages_with_harmony (line 632) | def _construct_input_messages_with_harmony(
    method _run_background_request (line 707) | async def _run_background_request(
    method retrieve_responses (line 752) | async def retrieve_responses(
    method cancel_responses (line 766) | async def cancel_responses(
    method _make_invalid_id_error (line 799) | def _make_invalid_id_error(self, response_id: str):
    method _make_not_found_error (line 809) | def _make_not_found_error(self, response_id: str):
    method responses_stream_generator (line 817) | async def responses_stream_generator(
    method _generate_with_builtin_tools (line 1260) | async def _generate_with_builtin_tools(

FILE: python/sglang/srt/entrypoints/openai/serving_score.py
  class OpenAIServingScore (line 17) | class OpenAIServingScore(OpenAIServingBase):
    method _request_id_prefix (line 23) | def _request_id_prefix(self) -> str:
    method _convert_to_internal_request (line 26) | def _convert_to_internal_request(
    method _handle_non_streaming_request (line 37) | async def _handle_non_streaming_request(

FILE: python/sglang/srt/entrypoints/openai/serving_tokenize.py
  class OpenAIServingTokenize (line 19) | class OpenAIServingTokenize(OpenAIServingBase):
    method _request_id_prefix (line 22) | def _request_id_prefix(self) -> str:
    method _convert_to_internal_request (line 25) | def _convert_to_internal_request(
    method _handle_non_streaming_request (line 30) | async def _handle_non_streaming_request(
  class OpenAIServingDetokenize (line 73) | class OpenAIServingDetokenize(OpenAIServingBase):
    method _request_id_prefix (line 76) | def _request_id_prefix(self) -> str:
    method _convert_to_internal_request (line 79) | def _convert_to_internal_request(
    method _handle_non_streaming_request (line 84) | async def _handle_non_streaming_request(

FILE: python/sglang/srt/entrypoints/openai/serving_transcription.py
  class OpenAIServingTranscription (line 48) | class OpenAIServingTranscription(OpenAIServingBase):
    method __init__ (line 51) | def __init__(self, tokenizer_manager: TokenizerManager):
    method _request_id_prefix (line 54) | def _request_id_prefix(self) -> str:
    method _validate_request (line 57) | def _validate_request(self, request: TranscriptionRequest) -> Optional...
    method _convert_to_internal_request (line 62) | def _convert_to_internal_request(
    method _get_audio_duration (line 87) | def _get_audio_duration(self, audio_data: bytes) -> float:
    method create_transcription (line 99) | async def create_transcription(
    method _handle_non_streaming_request (line 127) | async def _handle_non_streaming_request(
    method _handle_streaming_request (line 152) | async def _handle_streaming_request(
    method _generate_transcription_stream (line 165) | async def _generate_transcription_stream(

FILE: python/sglang/srt/entrypoints/openai/tool_server.py
  function list_server_and_tools (line 20) | async def list_server_and_tools(server_url: str):
  function trim_schema (line 30) | def trim_schema(schema: dict) -> dict:
  function post_process_tools_description (line 55) | def post_process_tools_description(
  class ToolServer (line 73) | class ToolServer(ABC):
    method has_tool (line 76) | def has_tool(self, tool_name: str):
    method get_tool_description (line 80) | def get_tool_description(self, tool_name: str):
    method get_tool_session (line 84) | def get_tool_session(self, tool_name: str) -> AbstractAsyncContextMana...
  class MCPToolServer (line 87) | class MCPToolServer(ToolServer):
    method __init__ (line 89) | def __init__(self):
    method add_tool_server (line 92) | async def add_tool_server(self, server_url: str):
    method has_tool (line 124) | def has_tool(self, tool_name: str):
    method get_tool_description (line 127) | def get_tool_description(self, tool_name: str):
    method get_tool_session (line 131) | async def get_tool_session(self, tool_name: str):
  class DemoToolServer (line 143) | class DemoToolServer(ToolServer):
    method __init__ (line 145) | def __init__(self):
    method has_tool (line 160) | def has_tool(self, tool_name: str):
    method get_tool_description (line 163) | def get_tool_description(self, tool_name: str):
    method get_tool_session (line 174) | async def get_tool_session(self, tool_name: str):

FILE: python/sglang/srt/entrypoints/openai/usage_processor.py
  class UsageProcessor (line 9) | class UsageProcessor:
    method _details_if_cached (line 13) | def _details_if_cached(count: int) -> Optional[PromptTokensDetails]:
    method calculate_response_usage (line 18) | def calculate_response_usage(
    method calculate_streaming_usage (line 47) | def calculate_streaming_usage(
    method calculate_token_usage (line 75) | def calculate_token_usage(

FILE: python/sglang/srt/entrypoints/openai/utils.py
  function to_openai_style_logprobs (line 14) | def to_openai_style_logprobs(
  function process_hidden_states_from_ret (line 51) | def process_hidden_states_from_ret(
  function process_routed_experts_from_ret (line 76) | def process_routed_experts_from_ret(
  function process_cached_tokens_details_from_ret (line 89) | def process_cached_tokens_details_from_ret(

FILE: python/sglang/srt/entrypoints/ssl_utils.py
  class SSLCertRefresher (line 13) | class SSLCertRefresher:
    method __init__ (line 22) | def __init__(
    method _watch_cert_key (line 44) | async def _watch_cert_key(self) -> None:
    method _watch_ca (line 64) | async def _watch_ca(self) -> None:
    method stop (line 84) | def stop(self) -> None:

FILE: python/sglang/srt/entrypoints/tool.py
  class Tool (line 16) | class Tool(ABC):
    method get_result (line 19) | async def get_result(self, context: "ConversationContext") -> Any:
  class HarmonyBrowserTool (line 23) | class HarmonyBrowserTool(Tool):
    method __init__ (line 25) | def __init__(self):
    method get_result (line 45) | async def get_result(self, context: "ConversationContext") -> Any:
    method tool_config (line 56) | def tool_config(self) -> Any:
  class HarmonyPythonTool (line 60) | class HarmonyPythonTool(Tool):
    method __init__ (line 62) | def __init__(self):
    method get_result (line 75) | async def get_result(self, context: "ConversationContext") -> Any:
    method tool_config (line 86) | def tool_config(self) -> Any:

FILE: python/sglang/srt/entrypoints/v1_loads.py
  function _get_tokenizer_manager (line 49) | def _get_tokenizer_manager():
  function _loads_dict_factory (line 56) | def _loads_dict_factory(items):
  function _compute_aggregate (line 61) | def _compute_aggregate(load_dicts: list) -> dict:
  function _format_loads_prometheus (line 86) | def _format_loads_prometheus(load_results) -> Response:
  function get_loads (line 131) | async def get_loads(

FILE: python/sglang/srt/entrypoints/warmup.py
  function warmup (line 20) | def warmup(name: str):
  function execute_warmups (line 28) | async def execute_warmups(
  function voice_chat (line 42) | async def voice_chat(disaggregation_mode: str, tokenizer_manager: Tokeni...

FILE: python/sglang/srt/environ.py
  function temp_set_env (line 10) | def temp_set_env(*, allow_sglang: bool = False, **env_vars: Any):
  class EnvField (line 38) | class EnvField:
    method __init__ (line 41) | def __init__(self, default: Any):
    method __set_name__ (line 47) | def __set_name__(self, owner, name):
    method parse (line 51) | def parse(self, value: str) -> Any:
    method get (line 54) | def get(self) -> Any:
    method is_set (line 74) | def is_set(self):
    method set (line 77) | def set(self, value: Any):
    method override (line 82) | def override(self, value: Any):
    method clear (line 94) | def clear(self):
    method __bool__ (line 98) | def __bool__(self):
    method __len__ (line 103) | def __len__(self):
  class EnvTuple (line 109) | class EnvTuple(EnvField):
    method parse (line 110) | def parse(self, value: str) -> tuple[str, ...]:
  class EnvStr (line 114) | class EnvStr(EnvField):
    method parse (line 115) | def parse(self, value: str) -> str:
  class EnvBool (line 119) | class EnvBool(EnvField):
    method parse (line 120) | def parse(self, value: str) -> bool:
  class EnvInt (line 129) | class EnvInt(EnvField):
    method parse (line 130) | def parse(self, value: str) -> int:
  class EnvFloat (line 137) | class EnvFloat(EnvField):
    method parse (line 138) | def parse(self, value: str) -> float:
  class ToolStrictLevel (line 145) | class ToolStrictLevel(IntEnum):
  class Envs (line 159) | class Envs:
  function _print_deprecated_env (line 530) | def _print_deprecated_env(new_name: str, old_name: str):
  function _warn_deprecated_env_to_cli_flag (line 538) | def _warn_deprecated_env_to_cli_flag(env_name: str, suggestion: str):
  function _convert_SGL_to_SGLANG (line 547) | def _convert_SGL_to_SGLANG():
  function example_with_exit_stack (line 618) | def example_with_exit_stack():
  function example_with_subprocess (line 627) | def example_with_subprocess():
  function example_with_implicit_bool_avoidance (line 642) | def example_with_implicit_bool_avoidance():
  function examples (line 666) | def examples():

FILE: python/sglang/srt/eplb/eplb_algorithms/__init__.py
  class EplbAlgorithm (line 10) | class EplbAlgorithm(Enum):
  function rebalance_experts (line 20) | def rebalance_experts(
  function compute_algorithm (line 74) | def compute_algorithm(

FILE: python/sglang/srt/eplb/eplb_algorithms/deepseek.py
  function balanced_packing (line 7) | def balanced_packing(
  function replicate_experts (line 52) | def replicate_experts(
  function rebalance_experts_hierarchical (line 83) | def rebalance_experts_hierarchical(
  function rebalance_experts (line 168) | def rebalance_experts(

FILE: python/sglang/srt/eplb/eplb_algorithms/deepseek_vec.py
  function pack_groups (line 7) | def pack_groups(tokens_per_group: torch.Tensor, num_nodes: int) -> torch...
  function make_redundant_experts_chunkwise (line 35) | def make_redundant_experts_chunkwise(
  function decode_rebalance_experts (line 184) | def decode_rebalance_experts(
  function prefill_rebalance_experts (line 197) | def prefill_rebalance_experts(
  function rebalance_experts (line 255) | def rebalance_experts(

FILE: python/sglang/srt/eplb/eplb_algorithms/elasticity_aware.py
  function rebalance_experts (line 8) | def rebalance_experts(

FILE: python/sglang/srt/eplb/eplb_manager.py
  class EPLBManager (line 16) | class EPLBManager:
    method __init__ (line 17) | def __init__(self, model_runner: "ModelRunner"):
    method on_forward_pass_end (line 41) | def on_forward_pass_end(self):
    method _entrypoint (line 45) | def _entrypoint(self):
    method rebalance (line 52) | def rebalance(self):
    method _check_rebalance_needed (line 93) | def _check_rebalance_needed(self, average_utilization_rate_over_window):
    method _compute_update_layer_ids_chunks (line 108) | def _compute_update_layer_ids_chunks(self) -> List[List[int]]:
  function _chunk_list (line 116) | def _chunk_list(items: List, chunk_size):

FILE: python/sglang/srt/eplb/eplb_simulator/reader.py
  function read_mode_per_pass (line 16) | def read_mode_per_pass(dir_data: Path):

FILE: python/sglang/srt/eplb/expert_distribution.py
  class ExpertDistributionMetrics (line 48) | class ExpertDistributionMetrics:
    method copy_to_cpu (line 51) | def copy_to_cpu(self):
  class ExpertDistributionRecorder (line 55) | class ExpertDistributionRecorder(ABC):
    method init_new (line 59) | def init_new(
    method with_current_layer (line 77) | def with_current_layer(self, layer_idx):
    method with_debug_name (line 81) | def with_debug_name(self, debug_name):
    method disable_this_region (line 85) | def disable_this_region(self):
    method with_forward_pass (line 89) | def with_forward_pass(self, forward_pass_id: int, forward_batch: Forwa...
    method on_select_experts (line 92) | def on_select_experts(self, topk_ids: torch.Tensor):
    method on_deepep_dispatch_normal (line 95) | def on_deepep_dispatch_normal(
    method on_deepep_dispatch_low_latency (line 104) | def on_deepep_dispatch_low_latency(
    method start_record (line 109) | def start_record(self):
    method stop_record (line 112) | def stop_record(self):
    method dump_record (line 115) | def dump_record(self, output_mode: _OutputMode = "file"):
    method recording (line 119) | def recording(self):
    method _on_not_implemented (line 122) | def _on_not_implemented(self):
  class _ExpertDistributionRecorderNoop (line 128) | class _ExpertDistributionRecorderNoop(ExpertDistributionRecorder):
  class _ExpertDistributionRecorderReal (line 132) | class _ExpertDistributionRecorderReal(ExpertDistributionRecorder):
    method __init__ (line 133) | def __init__(
    method with_current_layer (line 161) | def with_current_layer(self, layer_idx):
    method with_debug_name (line 164) | def with_debug_name(self, debug_name):
    method with_forward_pass (line 168) | def with_forward_pass(self, forward_pass_id: int, forward_batch: Forwa...
    method disable_this_region (line 178) | def disable_this_region(self):
    method _on_forward_pass_start (line 187) | def _on_forward_pass_start(self, forward_batch: ForwardBatch):
    method _on_forward_pass_end (line 194) | def _on_forward_pass_end(self, forward_pass_id: int, outputs: Dict[str...
    method on_select_experts (line 203) | def on_select_experts(self, topk_ids: torch.Tensor):
    method on_deepep_dispatch_normal (line 206) | def on_deepep_dispatch_normal(
    method on_deepep_dispatch_low_latency (line 221) | def on_deepep_dispatch_low_latency(
    method _on_hook (line 229) | def _on_hook(self, hook_name: str, **kwargs):
    method _reset (line 243) | def _reset(self):
    method start_record (line 253) | def start_record(self):
    method stop_record (line 262) | def stop_record(self):
    method dump_record (line 270) | def dump_record(self, output_mode: _OutputMode = "file"):
    method recording (line 277) | def recording(self):
  function get_global_expert_distribution_recorder (line 286) | def get_global_expert_distribution_recorder():
  function set_global_expert_distribution_recorder (line 290) | def set_global_expert_distribution_recorder(value):
  class _SinglePassGatherer (line 298) | class _SinglePassGatherer(ABC):
    method init_new (line 300) | def init_new(
    method __init__ (line 330) | def __init__(self, expert_location_metadata: ExpertLocationMetadata, r...
    method on_forward_pass_start (line 334) | def on_forward_pass_start(self, forward_batch: ForwardBatch):
    method on_select_experts (line 337) | def on_select_experts(self, layer_idx: int, topk_ids: torch.Tensor):
    method on_deepep_dispatch_normal (line 340) | def on_deepep_dispatch_normal(
    method on_deepep_dispatch_low_latency (line 350) | def on_deepep_dispatch_low_latency(
    method reset (line 355) | def reset(self):
    method collect (line 358) | def collect(self) -> Dict:
  class _DetailSinglePassGatherer (line 362) | class _DetailSinglePassGatherer(_SinglePassGatherer):
    method __init__ (line 366) | def __init__(
    method on_forward_pass_start (line 390) | def on_forward_pass_start(self, forward_batch: ForwardBatch):
    method on_select_experts (line 401) | def on_select_experts(self, layer_idx: int, topk_ids: torch.Tensor):
    method on_deepep_dispatch_normal (line 406) | def on_deepep_dispatch_normal(
    method reset (line 423) | def reset(self):
    method collect (line 428) | def collect(self) -> Dict:
  class _LayerBasedCpuSinglePassGatherer (line 446) | class _LayerBasedCpuSinglePassGatherer(_SinglePassGatherer):
    method __init__ (line 447) | def __init__(self, *args, **kwargs):
    method _on_layer_data (line 451) | def _on_layer_data(self, layer_idx: int, objects: List[int]):
    method reset (line 460) | def reset(self):
    method _collect_objects (line 463) | def _collect_objects(self, pad_len: int) -> torch.Tensor:
  function _list_sum (line 471) | def _list_sum(a: List, b: List) -> List:
  class _LayerBasedGpuSinglePassGatherer (line 475) | class _LayerBasedGpuSinglePassGatherer(_SinglePassGatherer):
    method __init__ (line 476) | def __init__(self, *args, enable_global_physical_experts: bool, **kwar...
    method reset (line 492) | def reset(self):
    method collect (line 495) | def collect(self) -> Dict:
  class _SelectExpertsSinglePassGatherer (line 510) | class _SelectExpertsSinglePassGatherer(_LayerBasedGpuSinglePassGatherer):
    method __init__ (line 511) | def __init__(self, *args, **kwargs):
    method on_select_experts (line 515) | def on_select_experts(self, layer_idx: int, topk_ids: torch.Tensor):
  class _DeepepNormalSinglePassGatherer (line 523) | class _DeepepNormalSinglePassGatherer(_LayerBasedCpuSinglePassGatherer):
    method __init__ (line 524) | def __init__(self, *args, **kwargs):
    method on_deepep_dispatch_normal (line 532) | def on_deepep_dispatch_normal(
    method collect (line 543) | def collect(self) -> Dict:
  class _DeepepLowLatencySinglePassGatherer (line 556) | class _DeepepLowLatencySinglePassGatherer(_LayerBasedGpuSinglePassGather...
    method __init__ (line 557) | def __init__(self, *args, **kwargs):
    method on_deepep_dispatch_low_latency (line 560) | def on_deepep_dispatch_low_latency(
  function _convert_per_token_to_global_physical_count (line 567) | def _convert_per_token_to_global_physical_count(
  function _convert_local_to_global_physical_count (line 588) | def _convert_local_to_global_physical_count(
  class _Accumulator (line 610) | class _Accumulator(ABC):
    method init_new (line 612) | def init_new(
    method get_class (line 622) | def get_class(server_args: ServerArgs) -> Type["_Accumulator"]:
    method __init__ (line 630) | def __init__(
    method get_single_pass_gatherer_keys (line 640) | def get_single_pass_gatherer_keys(self):
    method get_single_pass_gatherer_key (line 643) | def get_single_pass_gatherer_key(self, debug_name: Optional[str]):
    method append (line 646) | def append(
    method reset (line 655) | def reset(self):
    method dump (line 658) | def dump(self, output_mode: _OutputMode):
  class _UtilizationRateAccumulatorMixin (line 662) | class _UtilizationRateAccumulatorMixin(_Accumulator):
    method __init__ (line 663) | def __init__(self, *args, **kwargs):
    method append (line 677) | def append(
    method reset (line 690) | def reset(self):
    method _append_utilization_rate (line 695) | def _append_utilization_rate(
    method _handle_metric_eplb_heatmap (line 738) | def _handle_metric_eplb_heatmap(self, gpu_physical_count: torch.Tensor):
  class _DequeCollection (line 761) | class _DequeCollection:
    method __init__ (line 762) | def __init__(self, maxlens: List[int]):
    method append (line 765) | def append(self, value):
    method clear (line 769) | def clear(self):
    method mean (line 773) | def mean(self) -> Dict[int, float]:
  class _DetailAccumulator (line 777) | class _DetailAccumulator(_UtilizationRateAccumulatorMixin):
    method __init__ (line 778) | def __init__(self, *args, **kwargs):
    method get_single_pass_gatherer_keys (line 782) | def get_single_pass_gatherer_keys(self):
    method get_single_pass_gatherer_key (line 787) | def get_single_pass_gatherer_key(self, debug_name: Optional[str]):
    method append (line 792) | def append(
    method reset (line 819) | def reset(self):
    method dump (line 823) | def dump(self, output_mode: _OutputMode):
  class _StatAccumulator (line 835) | class _StatAccumulator(_UtilizationRateAccumulatorMixin):
    method __init__ (line 836) | def __init__(self, *args, **kwargs):
    method append (line 850) | def append(
    method reset (line 863) | def reset(self):
    method dump (line 867) | def dump(self, output_mode: _OutputMode):
    method _get_global_average_utilization_rate (line 897) | def _get_global_average_utilization_rate(self):
  function _dump_to_file (line 923) | def _dump_to_file(name, data):
  class _Buffer (line 932) | class _Buffer:
    method init_new (line 934) | def init_new(item_shape: Tuple, buffer_size: int, dtype, device):
    method append (line 940) | def append(self, value: torch.Tensor):
    method get_all (line 943) | def get_all(self) -> torch.Tensor:
    method reset (line 946) | def reset(self):
  class _CircularBuffer (line 950) | class _CircularBuffer(_Buffer):
    method __init__ (line 951) | def __init__(self, item_shape: Tuple, buffer_size: int, dtype, device):
    method append (line 957) | def append(self, value: torch.Tensor):
    method get_all (line 961) | def get_all(self) -> torch.Tensor:
    method reset (line 964) | def reset(self):
  class _InfiniteBuffer (line 968) | class _InfiniteBuffer(_Buffer):
    method __init__ (line 969) | def __init__(self, item_shape: Tuple, dtype, device):
    method append (line 974) | def append(self, value: torch.Tensor):
    method get_all (line 989) | def get_all(self) -> torch.Tensor:
    method reset (line 992) | def reset(self):
  function _convert_global_physical_count_to_logical_count (line 997) | def _convert_global_physical_count_to_logical_count(
  function compute_gpu_physical_count (line 1020) | def compute_gpu_physical_count(
  function compute_utilization_rate (line 1033) | def compute_utilization_rate(

FILE: python/sglang/srt/eplb/expert_location.py
  class ExpertLocationMetadata (line 39) | class ExpertLocationMetadata:
    method num_layers (line 51) | def num_layers(self) -> int:
    method num_physical_experts (line 55) | def num_physical_experts(self) -> int:
    method num_local_physical_experts (line 59) | def num_local_physical_experts(self) -> int:
    method num_logical_experts (line 65) | def num_logical_experts(self) -> int:
    method ep_size (line 69) | def ep_size(self):
    method __post_init__ (line 73) | def __post_init__(self):
    method init_trivial (line 88) | def init_trivial(
    method init_by_mapping (line 115) | def init_by_mapping(
    method init_by_eplb (line 147) | def init_by_eplb(
    method _init_common (line 191) | def _init_common(server_args: ServerArgs, model_config: ModelConfig):
    method _init_raw (line 215) | def _init_raw(
    method update (line 255) | def update(
    method logical_to_all_physical (line 286) | def logical_to_all_physical(
  function get_global_expert_location_metadata (line 312) | def get_global_expert_location_metadata():
  function set_global_expert_location_metadata (line 316) | def set_global_expert_location_metadata(value):
  function _compute_logical_to_all_physical_map (line 322) | def _compute_logical_to_all_physical_map(
  function _pad_nested_array (line 382) | def _pad_nested_array(arr, pad_value):
  function compute_logical_to_rank_dispatch_physical_map (line 392) | def compute_logical_to_rank_dispatch_physical_map(
  function _logical_to_all_physical_raw (line 446) | def _logical_to_all_physical_raw(
  function _compute_gpu_id_of_physical_expert (line 458) | def _compute_gpu_id_of_physical_expert(
  function _compute_node_id_of_physical_expert (line 464) | def _compute_node_id_of_physical_expert(
  function _find_nearest_expert (line 470) | def _find_nearest_expert(
  function _fair_choices (line 510) | def _fair_choices(arr: List, k: int, r: random.Random) -> List:
  class ModelConfigForExpertLocation (line 518) | class ModelConfigForExpertLocation:
    method from_model_config (line 524) | def from_model_config(model_config: ModelConfig):
  function compute_initial_expert_location_metadata (line 534) | def compute_initial_expert_location_metadata(

FILE: python/sglang/srt/eplb/expert_location_dispatch.py
  class ExpertLocationDispatchInfo (line 25) | class ExpertLocationDispatchInfo:
    method init_new (line 36) | def init_new(cls, layer_id: int):
  function transform_select_experts_inputs (line 64) | def transform_select_experts_inputs(
  function topk_ids_logical_to_physical (line 76) | def topk_ids_logical_to_physical(
  function _topk_ids_logical_to_physical_static (line 89) | def _topk_ids_logical_to_physical_static(
  function _topk_ids_logical_to_physical_dynamic (line 95) | def _topk_ids_logical_to_physical_dynamic(

FILE: python/sglang/srt/eplb/expert_location_updater.py
  class ExpertLocationUpdater (line 37) | class ExpertLocationUpdater:
    method __init__ (line 38) | def __init__(self):
    method update (line 41) | def update(
  function _update_expert_weights (line 78) | def _update_expert_weights(**kwargs):
  function _update_expert_weights_with_canary (line 86) | def _update_expert_weights_with_canary(
  function _update_expert_weights_raw (line 135) | def _update_expert_weights_raw(
  function create_temp_buffers (line 178) | def create_temp_buffers(sample_tensors):
  function update_expert_weights_single_layer (line 182) | def update_expert_weights_single_layer(
  class _ChunkUtils (line 516) | class _ChunkUtils:
    method __init__ (line 517) | def __init__(self, *, chunk_values: List, element_values: List):
    method chunk_value_from_element_value (line 521) | def chunk_value_from_element_value(self, element_value):
    method element_values_from_chunk_value (line 529) | def element_values_from_chunk_value(self, chunk_value) -> List:
    method _chunk_index_from_element_index (line 540) | def _chunk_index_from_element_index(
    method _element_slice_from_chunk_index (line 554) | def _element_slice_from_chunk_index(
  function _deduplicate_ordered (line 563) | def _deduplicate_ordered(arr: List[int]):
  function _log_p2p_op_metrics (line 571) | def _log_p2p_op_metrics(
  function _get_direction_from_op (line 606) | def _get_direction_from_op(op: P2POp):
  function _group_by (line 614) | def _group_by(items, keyfunc):

FILE: python/sglang/srt/function_call/base_format_detector.py
  class BaseFormatDetector (line 26) | class BaseFormatDetector(ABC):
    method __init__ (line 29) | def __init__(self):
    method _get_tool_indices (line 53) | def _get_tool_indices(self, tools: List[Tool]) -> Dict[str, int]:
    method parse_base_json (line 71) | def parse_base_json(self, action: Any, tools: List[Tool]) -> List[Tool...
    method detect_and_parse (line 98) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP...
    method _ends_with_partial_token (line 106) | def _ends_with_partial_token(self, buffer: str, bot_token: str) -> int:
    method parse_streaming_increment (line 119) | def parse_streaming_increment(
    method has_tool_call (line 324) | def has_tool_call(self, text: str) -> bool:
    method supports_structural_tag (line 330) | def supports_structural_tag(self) -> bool:
    method structure_info (line 335) | def structure_info(self) -> _GetInfoFunc:

FILE: python/sglang/srt/function_call/core_types.py
  class ToolCallItem (line 7) | class ToolCallItem(BaseModel):
  class StreamingParseResult (line 15) | class StreamingParseResult(BaseModel):
  class StructureInfo (line 23) | class StructureInfo:

FILE: python/sglang/srt/function_call/deepseekv31_detector.py
  class DeepSeekV31Detector (line 19) | class DeepSeekV31Detector(BaseFormatDetector):
    method __init__ (line 45) | def __init__(self):
    method has_tool_call (line 56) | def has_tool_call(self, text: str) -> bool:
    method detect_and_parse (line 60) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP...
    method parse_streaming_increment (line 90) | def parse_streaming_increment(
    method structure_info (line 201) | def structure_info(self) -> _GetInfoFunc:

FILE: python/sglang/srt/function_call/deepseekv32_detector.py
  class DeepSeekV32Detector (line 20) | class DeepSeekV32Detector(BaseFormatDetector):
    method __init__ (line 72) | def __init__(self):
    method has_tool_call (line 91) | def has_tool_call(self, text: str) -> bool:
    method _parse_parameters_from_xml (line 95) | def _parse_parameters_from_xml(
    method detect_and_parse (line 168) | def detect_and_parse(self, text: str, tools: list[Tool]) -> StreamingP...
    method parse_streaming_increment (line 212) | def parse_streaming_increment(
    method structure_info (line 348) | def structure_info(self) -> _GetInfoFunc:

FILE: python/sglang/srt/function_call/deepseekv3_detector.py
  class DeepSeekV3Detector (line 19) | class DeepSeekV3Detector(BaseFormatDetector):
    method __init__ (line 45) | def __init__(self):
    method has_tool_call (line 54) | def has_tool_call(self, text: str) -> bool:
    method detect_and_parse (line 58) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP...
    method parse_streaming_increment (line 88) | def parse_streaming_increment(
    method structure_info (line 204) | def structure_info(self) -> _GetInfoFunc:

FILE: python/sglang/srt/function_call/function_call_parser.py
  class FunctionCallParser (line 39) | class FunctionCallParser:
    method __init__ (line 74) | def __init__(self, tools: List[Tool], tool_call_parser: str):
    method has_tool_call (line 85) | def has_tool_call(self, text: str) -> bool:
    method parse_non_stream (line 100) | def parse_non_stream(self, full_text: str) -> Tuple[str, list[ToolCall...
    method parse_stream_chunk (line 121) | def parse_stream_chunk(self, chunk_text: str) -> Tuple[str, list[ToolC...
    method get_structure_tag (line 147) | def get_structure_tag(self) -> LegacyStructuralTagResponseFormat:
    method get_structure_constraint (line 186) | def get_structure_constraint(

FILE: python/sglang/srt/function_call/gigachat3_detector.py
  class GigaChat3Detector (line 37) | class GigaChat3Detector(BaseFormatDetector):
    method __init__ (line 38) | def __init__(self) -> None:
    method has_tool_call (line 46) | def has_tool_call(self, text: str) -> bool:
    method detect_and_parse (line 50) | def detect_and_parse(
    method parse_streaming_increment (line 96) | def parse_streaming_increment(
    method supports_structural_tag (line 194) | def supports_structural_tag(self) -> bool:
    method structure_info (line 198) | def structure_info(self) -> _GetInfoFunc:

FILE: python/sglang/srt/function_call/glm47_moe_detector.py
  class StreamState (line 20) | class StreamState(str, Enum):
  function get_argument_type (line 30) | def get_argument_type(
  function _convert_to_number (line 76) | def _convert_to_number(value: str) -> Any:
  function parse_arguments (line 94) | def parse_arguments(
  class Glm47MoeDetector (line 145) | class Glm47MoeDetector(BaseFormatDetector):
    method __init__ (line 152) | def __init__(self):
    method _reset_streaming_state (line 174) | def _reset_streaming_state(self) -> None:
    method has_tool_call (line 188) | def has_tool_call(self, text: str) -> bool:
    method detect_and_parse (line 192) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP...
    method _get_value_type (line 247) | def _get_value_type(self, func_name: str, key: str, tools: List[Tool])...
    method _format_value_complete (line 300) | def _format_value_complete(self, value: str, value_type: str) -> str:
    method _process_xml_to_json_streaming (line 327) | def _process_xml_to_json_streaming(
    method _extract_match_groups (line 447) | def _extract_match_groups(self, match: re.Match) -> tuple[str, str, str]:
    method _send_tool_name_if_needed (line 461) | def _send_tool_name_if_needed(
    method _process_arguments_streaming (line 504) | def _process_arguments_streaming(
    method _finalize_tool_call (line 547) | def _finalize_tool_call(
    method parse_streaming_increment (line 620) | def parse_streaming_increment(
    method _parse_argument_pairs (line 746) | def _parse_argument_pairs(
    method supports_structural_tag (line 784) | def supports_structural_tag(self) -> bool:
    method structure_info (line 787) | def structure_info(self) -> _GetInfoFunc:

FILE: python/sglang/srt/function_call/glm4_moe_detector.py
  class StreamState (line 20) | class StreamState(str, Enum):
  function get_argument_type (line 30) | def get_argument_type(
  function _convert_to_number (line 65) | def _convert_to_number(value: str) -> Any:
  function parse_arguments (line 83) | def parse_arguments(
  class Glm4MoeDetector (line 134) | class Glm4MoeDetector(BaseFormatDetector):
    method __init__ (line 151) | def __init__(self):
    method _reset_streaming_state (line 169) | def _reset_streaming_state(self) -> None:
    method has_tool_call (line 181) | def has_tool_call(self, text: str) -> bool:
    method detect_and_parse (line 185) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP...
    method _get_value_type (line 221) | def _get_value_type(self, func_name: str, key: str, tools: List[Tool])...
    method _format_value_complete (line 274) | def _format_value_complete(self, value: str, value_type: str) -> str:
    method _process_xml_to_json_streaming (line 301) | def _process_xml_to_json_streaming(
    method parse_streaming_increment (line 421) | def parse_streaming_increment(
    method _parse_argument_pairs (line 600) | def _parse_argument_pairs(
    method supports_structural_tag (line 638) | def supports_structural_tag(self) -> bool:
    method structure_info (line 641) | def structure_info(self) -> _GetInfoFunc:

FILE: python/sglang/srt/function_call/gpt_oss_detector.py
  class GptOssDetector (line 19) | class GptOssDetector(BaseFormatDetector):
    method __init__ (line 27) | def __init__(self):
    method has_tool_call (line 39) | def has_tool_call(self, text: str) -> bool:
    method detect_and_parse (line 43) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP...
    method parse_streaming_increment (line 76) | def parse_streaming_increment(
    method _extract_tool_call_from_event (line 197) | def _extract_tool_call_from_event(
    method structure_info (line 240) | def structure_info(self) -> _GetInfoFunc:

FILE: python/sglang/srt/function_call/hermes_detector.py
  class HermesDetector (line 17) | class HermesDetector(BaseFormatDetector):
    method __init__ (line 25) | def __init__(self):
    method has_tool_call (line 34) | def has_tool_call(self, text: str) -> bool:
    method detect_and_parse (line 37) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP...
    method _clean_normal_text (line 62) | def _clean_normal_text(self, text: str) -> str:
    method parse_streaming_increment (line 85) | def parse_streaming_increment(
    method structure_info (line 115) | def structure_info(self) -> _GetInfoFunc:

FILE: python/sglang/srt/function_call/internlm_detector.py
  class InternlmDetector (line 21) | class InternlmDetector(BaseFormatDetector):
    method __init__ (line 48) | def __init__(self):
    method has_tool_call (line 54) | def has_tool_call(self, text: str) -> bool:
    method get_arguments (line 59) | def get_arguments(self, obj):
    method detect_and_parse (line 67) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP...
    method parse_streaming_increment (line 155) | def parse_streaming_increment(
    method structure_info (line 233) | def structure_info(self) -> _GetInfoFunc:

FILE: python/sglang/srt/function_call/json_array_parser.py
  class JsonArrayParser (line 8) | class JsonArrayParser(BaseFormatDetector):
    method __init__ (line 16) | def __init__(self):
    method has_tool_call (line 23) | def has_tool_call(self, text: str) -> bool:
    method detect_and_parse (line 29) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP...
    method parse_streaming_increment (line 37) | def parse_streaming_increment(
    method structure_info (line 45) | def structure_info(self) -> callable:

FILE: python/sglang/srt/function_call/kimik2_detector.py
  function _strip_special_tokens (line 27) | def _strip_special_tokens(text: str) -> str:
  class KimiK2Detector (line 34) | class KimiK2Detector(BaseFormatDetector):
    method __init__ (line 48) | def __init__(self):
    method has_tool_call (line 76) | def has_tool_call(self, text: str) -> bool:
    method detect_and_parse (line 80) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP...
    method parse_streaming_increment (line 127) | def parse_streaming_increment(
    method structure_info (line 245) | def structure_info(self) -> _GetInfoFunc:

FILE: python/sglang/srt/function_call/lfm2_detector.py
  class Lfm2Detector (line 39) | class Lfm2Detector(BaseFormatDetector):
    method __init__ (line 56) | def __init__(self):
    method has_tool_call (line 65) | def has_tool_call(self, text: str) -> bool:
    method _get_parameter_value (line 69) | def _get_parameter_value(self, val: ast.AST) -> Any:
    method _parse_pythonic_call (line 109) | def _parse_pythonic_call(
    method _parse_pythonic_content (line 158) | def _parse_pythonic_content(
    method _parse_json_content (line 210) | def _parse_json_content(
    method _parse_tool_calls_content (line 238) | def _parse_tool_calls_content(
    method detect_and_parse (line 266) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP...
    method _strip_special_tokens (line 287) | def _strip_special_tokens(self, text: str) -> str:
    method parse_streaming_increment (line 291) | def parse_streaming_increment(
    method supports_structural_tag (line 367) | def supports_structural_tag(self) -> bool:
    method structure_info (line 376) | def structure_info(self) -> _GetInfoFunc:

FILE: python/sglang/srt/function_call/llama32_detector.py
  class Llama32Detector (line 18) | class Llama32Detector(BaseFormatDetector):
    method __init__ (line 28) | def __init__(self):
    method _convert_python_dict_to_json (line 37) | def _convert_python_dict_to_json(self, text: str) -> str:
    method has_tool_call (line 47) | def has_tool_call(self, text: str) -> bool:
    method detect_and_parse (line 53) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP...
    method parse_streaming_increment (line 115) | def parse_streaming_increment(
    method structure_info (line 139) | def structure_info(self) -> _GetInfoFunc:

FILE: python/sglang/srt/function_call/mimo_detector.py
  function _get_param_type (line 30) | def _get_param_type(func_name: str, param_name: str, tools: List[Tool]) ...
  function _convert_param_value (line 40) | def _convert_param_value(
  class MiMoDetector (line 137) | class MiMoDetector(BaseFormatDetector):
    method __init__ (line 149) | def __init__(self):
    method has_tool_call (line 159) | def has_tool_call(self, text: str) -> bool:
    method detect_and_parse (line 162) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP...
    method parse_streaming_increment (line 195) | def parse_streaming_increment(
    method _parse_tool_call (line 250) | def _parse_tool_call(
    method supports_structural_tag (line 277) | def supports_structural_tag(self) -> bool:
    method structure_info (line 280) | def structure_info(self) -> _GetInfoFunc:

FILE: python/sglang/srt/function_call/minimax_m2.py
  class MinimaxM2Detector (line 17) | class MinimaxM2Detector(BaseFormatDetector):
    method __init__ (line 29) | def __init__(self):
    method has_tool_call (line 56) | def has_tool_call(self, text: str) -> bool:
    method detect_and_parse (line 59) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP...
    method _convert_param_value (line 63) | def _convert_param_value(self, value: str, param_type: str) -> Any:
    method _extract_types_from_schema (line 67) | def _extract_types_from_schema(self, schema: Any) -> list[str]:
    method _convert_param_value_with_types (line 127) | def _convert_param_value_with_types(
    method _get_param_types_from_config (line 203) | def _get_param_types_from_config(
    method parse_streaming_increment (line 226) | def parse_streaming_increment(
    method _parse_and_stream_parameters (line 357) | def _parse_and_stream_parameters(
    method _reset_streaming_state (line 449) | def _reset_streaming_state(self, still_in_tool_call: bool = False):
    method _extract (line 458) | def _extract(self, text: str, tools: List[Tool]) -> Tuple[str, List[To...
    method _parse_block (line 477) | def _parse_block(self, block: str, tools: List[Tool]) -> List[ToolCall...
    method _parse_parameter (line 504) | def _parse_parameter(
    method supports_structural_tag (line 518) | def supports_structural_tag(self) -> bool:
    method structure_info (line 521) | def structure_info(self) -> _GetInfoFunc:

FILE: python/sglang/srt/function_call/mistral_detector.py
  class MistralDetector (line 18) | class MistralDetector(BaseFormatDetector):
    method __init__ (line 34) | def __init__(self):
    method has_tool_call (line 44) | def has_tool_call(self, text: str) -> bool:
    method detect_and_parse (line 48) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP...
    method parse_streaming_increment (line 115) | def parse_streaming_increment(
    method _try_parse_compact_args_format (line 198) | def _try_parse_compact_args_format(
    method _extract_json_value (line 253) | def _extract_json_value(
    method _extract_json_array (line 294) | def _extract_json_array(self, text: str) -> str:
    method structure_info (line 338) | def structure_info(self) -> _GetInfoFunc:

FILE: python/sglang/srt/function_call/pythonic_detector.py
  class PythonicDetector (line 19) | class PythonicDetector(BaseFormatDetector):
    method __init__ (line 34) | def __init__(self):
    method _text_strip (line 42) | def _text_strip(text: str) -> str:
    method has_tool_call (line 49) | def has_tool_call(self, text: str) -> bool:
    method detect_and_parse (line 52) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP...
    method _find_matching_bracket (line 113) | def _find_matching_bracket(self, buffer: str, start: int) -> int:
    method _strip_and_split_buffer (line 135) | def _strip_and_split_buffer(self, buffer: str) -> tuple[str, str]:
    method parse_streaming_increment (line 159) | def parse_streaming_increment(
    method _get_parameter_value (line 207) | def _get_parameter_value(self, val):
    method supports_structural_tag (line 220) | def supports_structural_tag(self) -> bool:
    method structure_info (line 223) | def structure_info(self) -> _GetInfoFunc:

FILE: python/sglang/srt/function_call/qwen25_detector.py
  class Qwen25Detector (line 17) | class Qwen25Detector(BaseFormatDetector):
    method __init__ (line 33) | def __init__(self):
    method has_tool_call (line 43) | def has_tool_call(self, text: str) -> bool:
    method detect_and_parse (line 47) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP...
    method parse_streaming_increment (line 75) | def parse_streaming_increment(
    method structure_info (line 115) | def structure_info(self) -> _GetInfoFunc:

FILE: python/sglang/srt/function_call/qwen3_coder_detector.py
  class Qwen3CoderDetector (line 18) | class Qwen3CoderDetector(BaseFormatDetector):
    method __init__ (line 19) | def __init__(self):
    method has_tool_call (line 57) | def has_tool_call(self, text: str) -> bool:
    method _get_arguments_config (line 60) | def _get_arguments_config(
    method _convert_param_value (line 89) | def _convert_param_value(
    method detect_and_parse (line 172) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP...
    method parse_streaming_increment (line 240) | def parse_streaming_increment(
    method supports_structural_tag (line 470) | def supports_structural_tag(self) -> bool:
    method structure_info (line 473) | def structure_info(self) -> _GetInfoFunc:

FILE: python/sglang/srt/function_call/step3_detector.py
  function get_argument_type (line 18) | def get_argument_type(func_name: str, arg_key: str, defined_tools: List[...
  function parse_arguments (line 31) | def parse_arguments(value: str) -> tuple[Any, bool]:
  class Step3Detector (line 43) | class Step3Detector(BaseFormatDetector):
    method __init__ (line 61) | def __init__(self):
    method has_tool_call (line 85) | def has_tool_call(self, text: str) -> bool:
    method _parse_steptml_invoke (line 89) | def _parse_steptml_invoke(
    method detect_and_parse (line 120) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP...
    method parse_streaming_increment (line 169) | def parse_streaming_increment(
    method _parse_partial_tool_call (line 256) | def _parse_partial_tool_call(self, tools: List[Tool]) -> StreamingPars...
    method _reset_streaming_state (line 395) | def _reset_streaming_state(self):
    method supports_structural_tag (line 402) | def supports_structural_tag(self) -> bool:
    method structure_info (line 406) | def structure_info(self) -> _GetInfoFunc:

FILE: python/sglang/srt/function_call/trinity_detector.py
  class TrinityDetector (line 11) | class TrinityDetector(Qwen25Detector):
    method _strip_think_tags (line 21) | def _strip_think_tags(self, text: str) -> str:
    method has_tool_call (line 25) | def has_tool_call(self, text: str) -> bool:
    method detect_and_parse (line 29) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP...
    method parse_streaming_increment (line 35) | def parse_streaming_increment(

FILE: python/sglang/srt/function_call/utils.py
  function _find_common_prefix (line 12) | def _find_common_prefix(s1: str, s2: str) -> str:
  function _partial_json_loads (line 23) | def _partial_json_loads(input_str: str, flags: Allow) -> Tuple[Any, int]:
  function _is_complete_json (line 52) | def _is_complete_json(input_str: str) -> bool:
  function _get_tool_schema_defs (line 60) | def _get_tool_schema_defs(tools: List[Tool]) -> dict:
  function _get_tool_schema (line 90) | def _get_tool_schema(tool: Tool) -> dict:
  function infer_type_from_json_schema (line 104) | def infer_type_from_json_schema(schema: Dict[str, Any]) -> Optional[str]:
  function get_json_schema_constraint (line 207) | def get_json_schema_constraint(

FILE: python/sglang/srt/hardware_backend/npu/allocator_npu.py
  class NPUPagedTokenToKVPoolAllocator (line 15) | class NPUPagedTokenToKVPoolAllocator(PagedTokenToKVPoolAllocator):
    method __init__ (line 16) | def __init__(
    method alloc_extend (line 28) | def alloc_extend(
    method alloc_decode (line 96) | def alloc_decode(
    method free (line 135) | def free(self, free_index: torch.Tensor):

FILE: python/sglang/srt/hardware_backend/npu/attention/ascend_backend.py
  function _reshape_kv_for_fia_nz (line 38) | def _reshape_kv_for_fia_nz(
  class ForwardMetadata (line 49) | class ForwardMetadata:
  class AscendAttnMaskBuilder (line 68) | class AscendAttnMaskBuilder:
    method __init__ (line 69) | def __init__(self, model_runner: ModelRunner, device, use_fia, use_mla):
    method generate_mask_flag (line 107) | def generate_mask_flag(max_seq_len):
    method generate_attn_mask (line 121) | def generate_attn_mask(max_seq_len, mode, dtype=torch.float16):
    method get_attention_mask_id (line 145) | def get_attention_mask_id(seq_lens, extend_lens):
    method update_attn_cache (line 163) | def update_attn_cache(
    method get_splitfuse_attn_mask (line 188) | def get_splitfuse_attn_mask(
  class AscendAttnBackend (line 206) | class AscendAttnBackend(AttentionBackend):
    method __init__ (line 208) | def __init__(self, model_runner: ModelRunner):
    method get_verify_buffers_to_fill_after_draft (line 263) | def get_verify_buffers_to_fill_after_draft(self):
    method update_verify_buffers_to_fill_after_draft (line 271) | def update_verify_buffers_to_fill_after_draft(
    method init_forward_metadata (line 276) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method init_cuda_graph_state (line 342) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
    method init_forward_metadata_capture_cuda_graph (line 351) | def init_forward_metadata_capture_cuda_graph(
    method init_forward_metadata_replay_cuda_graph (line 414) | def init_forward_metadata_replay_cuda_graph(
    method get_cuda_graph_seq_len_fill_value (line 447) | def get_cuda_graph_seq_len_fill_value(self):
    method _generate_alibi_bias (line 450) | def _generate_alibi_bias(
    method generate_alibi_bias (line 465) | def generate_alibi_bias(
    method attn_alibi (line 495) | def attn_alibi(
    method do_cp_balance_attn (line 565) | def do_cp_balance_attn(
    method forward_sparse (line 635) | def forward_sparse(
    method forward_extend (line 747) | def forward_extend(
    method forward_dllm (line 1130) | def forward_dllm(
    method forward_mtp (line 1189) | def forward_mtp(
    method forward_decode_graph (line 1370) | def forward_decode_graph(
    method forward_decode (line 1528) | def forward_decode(
    method forward_mixed (line 1780) | def forward_mixed(
  class AscendAttnMultiStepDraftBackend (line 1836) | class AscendAttnMultiStepDraftBackend:
    method __init__ (line 1842) | def __init__(
    method common_template (line 1855) | def common_template(self, forward_batch: ForwardBatch, call_fn: int):
    method init_forward_metadata (line 1861) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method init_cuda_graph_state (line 1868) | def init_cuda_graph_state(self, max_bs, max_num_tokens):
    method init_forward_metadata_capture_cuda_graph (line 1872) | def init_forward_metadata_capture_cuda_graph(self, forward_batch: Forw...
    method init_forward_metadata_replay_cuda_graph (line 1886) | def init_forward_metadata_replay_cuda_graph(

FILE: python/sglang/srt/hardware_backend/npu/attention/ascend_torch_native_backend.py
  class AscendTorchNativeAttnBackend (line 9) | class AscendTorchNativeAttnBackend:
    method __init__ (line 10) | def __init__(self):
    method scaled_dot_product_attention_with_softcapping (line 13) | def scaled_dot_product_attention_with_softcapping(
    method run_sdpa_forward_extend (line 56) | def run_sdpa_forward_extend(
    method run_sdpa_forward_decode (line 176) | def run_sdpa_forward_decode(
    method support_triton (line 281) | def support_triton(self):

FILE: python/sglang/srt/hardware_backend/npu/attention/mla_preprocess.py
  function is_mla_preprocess_enabled (line 16) | def is_mla_preprocess_enabled() -> bool:
  function is_fia_nz (line 21) | def is_fia_nz() -> bool:
  function round_up (line 30) | def round_up(val: int, align: int) -> int:
  function transdata (line 36) | def transdata(nd_mat, block_size: tuple = (16, 16)):
  function trans_rope_weight (line 55) | def trans_rope_weight(weight, rope_dim):
  class NPUFusedMLAPreprocess (line 63) | class NPUFusedMLAPreprocess(torch.nn.Module):
    method __init__ (line 64) | def __init__(
    method preprocess_weights (line 102) | def preprocess_weights(self, hidden_states):
    method mlaprolog_preprocess_weight (line 239) | def mlaprolog_preprocess_weight(self):
    method get_sin_cos (line 248) | def get_sin_cos(self, positions):
    method get_kv_cache_and_cache_idx (line 255) | def get_kv_cache_and_cache_idx(self, forward_batch):
    method forward_absorb_prepare_npu_rms_norm_cache (line 260) | def forward_absorb_prepare_npu_rms_norm_cache(
    method forward_mlapo (line 341) | def forward_mlapo(self, positions, hidden_states, forward_batch, zero_...
    method forward_mlaprolog (line 427) | def forward_mlaprolog(self, positions, hidden_states, forward_batch):
    method forward (line 468) | def forward(self, positions, hidden_states, forward_batch, zero_alloca...

FILE: python/sglang/srt/hardware_backend/npu/cmo.py
  function get_cmo_stream (line 6) | def get_cmo_stream():
  function set_cmo_stream (line 16) | def set_cmo_stream(stream):
  function prepare_weight_cache (line 21) | def prepare_weight_cache(handle, cache, PREFETCH_MAX_SIZE=1000000000):
  function wait_cmo_stream (line 50) | def wait_cmo_stream():

FILE: python/sglang/srt/hardware_backend/npu/graph_runner/eagle_draft_extend_npu_graph_runner.py
  class EAGLEDraftExtendNpuGraphRunner (line 33) | class EAGLEDraftExtendNpuGraphRunner(EAGLEDraftExtendCudaGraphRunner):
    method __init__ (line 34) | def __init__(self, eagle_worker: EAGLEWorker):
    method _create_graph (line 37) | def _create_graph(self):
    method _cache_loc_dtype (line 40) | def _cache_loc_dtype(self):
    method _capture_init (line 43) | def _capture_init(self, run_once_fn):
    method _capture_graph (line 49) | def _capture_graph(self, graph, pool, stream, run_once_fn):
    method _replay_update (line 56) | def _replay_update(self, seq_lens):
    method _replay (line 61) | def _replay(self, forward_batch: ForwardBatch):

FILE: python/sglang/srt/hardware_backend/npu/graph_runner/eagle_draft_npu_graph_runner.py
  class EAGLEDraftNpuGraphRunner (line 47) | class EAGLEDraftNpuGraphRunner(EAGLEDraftCudaGraphRunner):
    method __init__ (line 48) | def __init__(self, eagle_worker: EAGLEWorker):
    method _init_arch_map (line 54) | def _init_arch_map(self):
    method _create_graph (line 64) | def _create_graph(self):
    method _capture_init (line 67) | def _capture_init(self, run_once_fn):
    method _capture_graph (line 73) | def _capture_graph(self, graph, pool, stream, run_once_fn):
    method _get_update_attr_name (line 80) | def _get_update_attr_name(self):
    method _get_update_attr_type (line 83) | def _get_update_attr_type(self):
    method _replay_update (line 86) | def _replay_update(self, seq_lens):
    method _replay (line 94) | def _replay(self, forward_batch: ForwardBatch):
    method _cache_loc_dtype (line 108) | def _cache_loc_dtype(self):

FILE: python/sglang/srt/hardware_backend/npu/graph_runner/npu_graph_runner.py
  function patch_model_npu (line 55) | def patch_model_npu(
  class NPUGraphRunner (line 73) | class NPUGraphRunner(CudaGraphRunner):
    method __init__ (line 76) | def __init__(self, model_runner: ModelRunner):
    method _init_arch_map (line 85) | def _init_arch_map(self):
    method _create_device_graph (line 101) | def _create_device_graph(self):
    method _capture_graph (line 104) | def _capture_graph(self, graph, pool, stream, run_once_fn):
    method _get_update_attr_name (line 119) | def _get_update_attr_name(self):
    method _get_update_attr_type (line 122) | def _get_update_attr_type(self):
    method _update_inputs (line 125) | def _update_inputs(self, seq_lens):
    method _cache_loc_dtype (line 133) | def _cache_loc_dtype(self):
    method _init_profile_context_and_memory_record (line 136) | def _init_profile_context_and_memory_record(self):
    method _post_process_after_profile (line 160) | def _post_process_after_profile(self, prof_context):
    method replay (line 164) | def replay(

FILE: python/sglang/srt/hardware_backend/npu/graph_runner/vit_npu_graph_runner.py
  class ViTNpuGraphRunner (line 33) | class ViTNpuGraphRunner(ViTCudaGraphRunner):
    method __init__ (line 46) | def __init__(
    method device (line 58) | def device(self) -> torch.device:
    method dtype (line 62) | def dtype(self) -> torch.dtype:
    method _create_graph (line 65) | def _create_graph(
    method create_graph (line 128) | def create_graph(
    method replay (line 177) | def replay(
    method run (line 204) | def run(

FILE: python/sglang/srt/hardware_backend/npu/memory_pool_npu.py
  class NPUMHATokenToKVPool (line 18) | class NPUMHATokenToKVPool(MHATokenToKVPool):
    method __init__ (line 20) | def __init__(
    method _create_buffers (line 51) | def _create_buffers(self):
    method get_contiguous_buf_infos (line 86) | def get_contiguous_buf_infos(self):
    method set_kv_buffer (line 112) | def set_kv_buffer(
  class NPUMLATokenToKVPool (line 167) | class NPUMLATokenToKVPool(MLATokenToKVPool):
    method __init__ (line 169) | def __init__(
    method get_kv_size_bytes (line 240) | def get_kv_size_bytes(self):
    method get_kv_buffer (line 254) | def get_kv_buffer(self, layer_id: int):
    method get_key_buffer (line 262) | def get_key_buffer(self, layer_id: int):
    method get_value_buffer (line 270) | def get_value_buffer(self, layer_id: int):
    method get_index_k_buffer (line 278) | def get_index_k_buffer(self, layer_id: int):
    method get_contiguous_buf_infos (line 287) | def get_contiguous_buf_infos(self):
    method set_kv_buffer (line 310) | def set_kv_buffer(
    method set_index_k_buffer (line 344) | def set_index_k_buffer(

FILE: python/sglang/srt/hardware_backend/npu/modules/deepseek_v2_attention_mla_npu.py
  function forward_mha_prepare_npu (line 27) | def forward_mha_prepare_npu(
  function forward_mha_core_npu (line 132) | def forward_mha_core_npu(
  function forward_mla_prepare_npu (line 149) | def forward_mla_prepare_npu(
  function forward_mla_core_npu (line 255) | def forward_mla_core_npu(
  function forward_dsa_prepare_npu (line 297) | def forward_dsa_prepare_npu(
  function forward_dsa_core_npu (line 396) | def forward_dsa_core_npu(
  function npu_mla_preprocess (line 448) | def npu_mla_preprocess(

FILE: python/sglang/srt/hardware_backend/npu/modules/qwen_vl_processor.py
  function npu_wrapper_preprocess (line 19) | def npu_wrapper_preprocess(func):
  function npu_apply_qwen_image_preprocess_patch (line 144) | def npu_apply_qwen_image_preprocess_patch():

FILE: python/sglang/srt/hardware_backend/npu/moe/topk.py
  function fused_topk_npu (line 16) | def fused_topk_npu(

FILE: python/sglang/srt/hardware_backend/npu/quantization/fused_moe_method_npu.py
  function npu_fused_experts_w4a4 (line 17) | def npu_fused_experts_w4a4(
  function npu_fused_experts (line 103) | def npu_fused_experts(
  function npu_fused_experts_w8a8_decode (line 205) | def npu_fused_experts_w8a8_decode(
  function npu_fused_moe_without_routing_weights_bf16 (line 277) | def npu_fused_moe_without_routing_weights_bf16(
  function fused_moe_npu (line 308) | def fused_moe_npu(
  class _NPUFusedMoEMethodBase (line 387) | class _NPUFusedMoEMethodBase(FusedMoEMethodBase):
    method __init__ (line 389) | def __init__(
  class NPUW4A4Int4DynamicMoEMethod (line 396) | class NPUW4A4Int4DynamicMoEMethod(_NPUFusedMoEMethodBase):
    method process_weights_after_loading (line 398) | def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
    method _pack_to_int32 (line 429) | def _pack_to_int32(self, weight: torch.Tensor):
    method apply (line 438) | def apply(
  class NPUW8A8Int8DynamicMoEMethod (line 464) | class NPUW8A8Int8DynamicMoEMethod(_NPUFusedMoEMethodBase):
    method process_weights_after_loading (line 466) | def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
    method apply (line 493) | def apply(
    method apply_without_routing_weights (line 538) | def apply_without_routing_weights(
  class NPUW4A8Int8DynamicMoEMethod (line 586) | class NPUW4A8Int8DynamicMoEMethod(_NPUFusedMoEMethodBase):
    method _process_scale (line 588) | def _process_scale(
    method _update_bias (line 623) | def _update_bias(self, layer, w13_bias, w2_bias):
    method _pack_to_int32 (line 631) | def _pack_to_int32(self, weight: torch.Tensor):
    method process_weights_after_loading (line 638) | def process_weights_after_loading(
    method _process_weights_without_clip (line 659) | def _process_weights_without_clip(
    method _process_weights_with_clip (line 693) | def _process_weights_with_clip(self, layer: torch.nn.Module) -> None:
    method apply (line 707) | def apply(
    method apply_without_routing_weights (line 791) | def apply_without_routing_weights(
  class NPUW4A16Int4DynamicMoEMethod (line 835) | class NPUW4A16Int4DynamicMoEMethod(_NPUFusedMoEMethodBase):
    method _pack_to_int32 (line 837) | def _pack_to_int32(self, weight: torch.Tensor):
    method _unpack_from_int32 (line 858) | def _unpack_from_int32(
    method process_weights_after_loading (line 922) | def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
    method apply (line 963) | def apply(
    method apply_without_routing_weights (line 991) | def apply_without_routing_weights(

FILE: python/sglang/srt/hardware_backend/npu/quantization/linear_method_npu.py
  class _NPULinearMethodBase (line 12) | class _NPULinearMethodBase(LinearMethodBase):
    method __init__ (line 14) | def __init__(
  class NPUW8A8Int8LinearMethod (line 21) | class NPUW8A8Int8LinearMethod(_NPULinearMethodBase):
    method process_weights_after_loading (line 23) | def process_weights_after_loading(self, layer: torch.nn.Module):
    method apply (line 46) | def apply(
  class NPUW8A8Int8DynamicLinearMethod (line 79) | class NPUW8A8Int8DynamicLinearMethod(_NPULinearMethodBase):
    method process_weights_after_loading (line 81) | def process_weights_after_loading(self, layer: torch.nn.Module):
    method apply (line 90) | def apply(
  class NPU_W4A4DynamicLinearMethod (line 114) | class NPU_W4A4DynamicLinearMethod(_NPULinearMethodBase):
    method process_weights_after_loading (line 116) | def process_weights_after_loading(self, layer):
    method apply (line 125) | def apply(

FILE: python/sglang/srt/hardware_backend/npu/utils.py
  class NPUACLFormat (line 19) | class NPUACLFormat(IntEnum):
  class FusedMoEMode (line 25) | class FusedMoEMode(IntEnum):
  function _call_once (line 30) | def _call_once(fn: Callable):
  function set_default_server_args (line 44) | def set_default_server_args(args: "ServerArgs"):
  function init_npu_backend (line 92) | def init_npu_backend():
  function npu_format_cast (line 110) | def npu_format_cast(
  function get_indexer_weight_stream (line 142) | def get_indexer_weight_stream():

FILE: python/sglang/srt/layers/activation.py
  class SiluAndMul (line 63) | class SiluAndMul(MultiPlatformOp):
    method __init__ (line 64) | def __init__(self, *args, **kwargs):
    method forward_native (line 69) | def forward_native(self, x: torch.Tensor) -> torch.Tensor:
    method forward_cuda (line 73) | def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
    method forward_cpu (line 80) | def forward_cpu(self, x: torch.Tensor) -> torch.Tensor:
    method forward_npu (line 87) | def forward_npu(self, x: torch.Tensor) -> torch.Tensor:
    method forward_xpu (line 91) | def forward_xpu(self, x: torch.Tensor) -> torch.Tensor:
  class GeluAndMul (line 99) | class GeluAndMul(MultiPlatformOp):
    method __init__ (line 100) | def __init__(self, approximate="tanh"):
    method _forward_impl (line 104) | def _forward_impl(self, x: torch.Tensor) -> torch.Tensor:
    method forward_native (line 116) | def forward_native(self, x: torch.Tensor) -> torch.Tensor:
    method forward_cpu (line 120) | def forward_cpu(self, x: torch.Tensor) -> torch.Tensor:
    method forward_cuda (line 128) | def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
    method forward_xpu (line 131) | def forward_xpu(self, x: torch.Tensor) -> torch.Tensor:
    method forward_npu (line 134) | def forward_npu(self, x: torch.Tensor) -> torch.Tensor:
  class NewGELU (line 146) | class NewGELU(MultiPlatformOp):
    method forward_native (line 147) | def forward_native(self, x: torch.Tensor) -> torch.Tensor:
    method forward_cuda (line 151) | def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
  class ReLU2 (line 156) | class ReLU2(nn.Module):
    method forward (line 162) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class QuickGELU (line 167) | class QuickGELU(MultiPlatformOp):
    method forward_native (line 168) | def forward_native(self, x: torch.Tensor) -> torch.Tensor:
    method forward_cuda (line 171) | def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
    method forward_hip (line 174) | def forward_hip(self, x: torch.Tensor) -> torch.Tensor:
    method forward_npu (line 179) | def forward_npu(self, x: torch.Tensor) -> torch.Tensor:
  class XIELU (line 183) | class XIELU(MultiPlatformOp):
    method __init__ (line 190) | def __init__(
    method _xielu_python (line 244) | def _xielu_python(self, x: torch.Tensor) -> torch.Tensor:
    method _xielu_cuda (line 253) | def _xielu_cuda(self, x: torch.Tensor) -> torch.Tensor:
    method forward (line 282) | def forward(self, input: torch.Tensor) -> torch.Tensor:
  class ScaledActivation (line 293) | class ScaledActivation(nn.Module):
    method __init__ (line 299) | def __init__(
    method forward (line 321) | def forward(self, x: torch.Tensor) -> torch.Tensor:
    method weight_loader (line 324) | def weight_loader(self, param: nn.Parameter, loaded_weight: torch.Tens...
  function get_act_fn (line 344) | def get_act_fn(
  function get_cross_encoder_activation_function (line 369) | def get_cross_encoder_activation_function(config: PretrainedConfig):

FILE: python/sglang/srt/layers/amx_utils.py
  class CPUQuantMethod (line 12) | class CPUQuantMethod(IntEnum):
  function amx_process_weight_after_loading (line 19) | def amx_process_weight_after_loading(weight, is_conv=False):
  function dim_is_supported (line 35) | def dim_is_supported(weight):
  function dtype_is_supported (line 46) | def dtype_is_supported(weight):
  function is_dim_conv_weight (line 55) | def is_dim_conv_weight(weight):
  function _init_amx_conv_state (line 59) | def _init_amx_conv_state(conv_state):
  function _amx_process_weight_after_loading (line 76) | def _amx_process_weight_after_loading(
  class PackWeightMethod (line 131) | class PackWeightMethod:
    method __init__ (line 132) | def __init__(self, weight_names, transpose_dims=None):
    method process_weights_after_loading (line 136) | def process_weights_after_loading(self, module) -> None:

FILE: python/sglang/srt/layers/attention/aiter_backend.py
  class WrapperDispatch (line 79) | class WrapperDispatch(Enum):
  class ForwardMetadata (line 85) | class ForwardMetadata:
  class AiterAttnBackend (line 112) | class AiterAttnBackend(AttentionBackend):
    method __init__ (line 113) | def __init__(
    method make_mla_decode_meta_data_buffer (line 268) | def make_mla_decode_meta_data_buffer(self, max_seqlen_qo, batch_size):
    method make_mla_meta_data (line 331) | def make_mla_meta_data(
    method make_mla_prefill_ps_meta_data_buffer (line 375) | def make_mla_prefill_ps_meta_data_buffer(
    method make_mla_prefill_ps_meta_data (line 419) | def make_mla_prefill_ps_meta_data(
    method _transform_table_1_to_real (line 464) | def _transform_table_1_to_real(self, page_table: torch.Tensor) -> torc...
    method _resolve_v2_num_draft_tokens (line 474) | def _resolve_v2_num_draft_tokens(
    method _get_kv_indices_scratch (line 509) | def _get_kv_indices_scratch(
    method _set_uniform_qo_indptr (line 522) | def _set_uniform_qo_indptr(
    method _ensure_spec_v2_topk_supported (line 535) | def _ensure_spec_v2_topk_supported(self):
    method mla_fp8_prefill_attn (line 542) | def mla_fp8_prefill_attn(
    method init_forward_metadata (line 617) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method init_cuda_graph_state (line 1146) | def init_cuda_graph_state(
    method init_forward_metadata_capture_cuda_graph (line 1207) | def init_forward_metadata_capture_cuda_graph(
    method init_forward_metadata_replay_cuda_graph (line 1585) | def init_forward_metadata_replay_cuda_graph(
    method get_cuda_graph_seq_len_fill_value (line 1954) | def get_cuda_graph_seq_len_fill_value(self):
    method update_verify_buffers_to_fill_after_draft (line 1957) | def update_verify_buffers_to_fill_after_draft(
    method forward_extend (line 1965) | def forward_extend(
    method forward_decode (line 2366) | def forward_decode(
  class AiterIndicesUpdaterPrefill (line 2527) | class AiterIndicesUpdaterPrefill:
    method __init__ (line 2528) | def __init__(self, model_runner: ModelRunner, attn_backend: AttentionB...
    method update (line 2553) | def update(
    method update_single_wrapper (line 2565) | def update_single_wrapper(
  class AiterMlaIndicesUpdaterPrefill (line 2632) | class AiterMlaIndicesUpdaterPrefill:
    method __init__ (line 2633) | def __init__(self, model_runner: ModelRunner, attn_backend: AttentionB...
    method update (line 2648) | def update(
    method update_single_wrapper (line 2661) | def update_single_wrapper(
  class AiterMultiStepDraftBackend (line 2714) | class AiterMultiStepDraftBackend:
    method __init__ (line 2720) | def __init__(
    method common_template (line 2759) | def common_template(
    method init_forward_metadata (line 2791) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method init_cuda_graph_state (line 2812) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
    method init_forward_metadata_capture_cuda_graph (line 2823) | def init_forward_metadata_capture_cuda_graph(self, forward_batch: Forw...
    method init_forward_metadata_replay_cuda_graph (line 2837) | def init_forward_metadata_replay_cuda_graph(

FILE: python/sglang/srt/layers/attention/attention_registry.py
  function register_attention_backend (line 15) | def register_attention_backend(name):
  function create_flashinfer_backend (line 24) | def create_flashinfer_backend(runner):
  function create_trtllm_mla_backend (line 49) | def create_trtllm_mla_backend(runner):
  function create_aiter_backend (line 58) | def create_aiter_backend(runner):
  function create_wave_backend (line 65) | def create_wave_backend(runner):
  function create_ascend_backend (line 72) | def create_ascend_backend(runner):
  function create_nsa_backend (line 81) | def create_nsa_backend(runner):
  function create_triton_backend (line 88) | def create_triton_backend(runner):
  function create_torch_native_backend (line 106) | def create_torch_native_backend(runner):
  function create_flex_attention_backend (line 113) | def create_flex_attention_backend(runner):
  function create_flashmla_backend (line 120) | def create_flashmla_backend(runner):
  function create_flashattention_v3_backend (line 127) | def create_flashattention_v3_backend(runner):
  function create_flashattention_v4_backend (line 142) | def create_flashattention_v4_backend(runner):
  function create_cutlass_mla_backend (line 149) | def create_cutlass_mla_backend(runner):
  function create_trtllm_mha_backend (line 156) | def create_trtllm_mha_backend(runner):
  function create_intel_amx_backend (line 165) | def create_intel_amx_backend(runner):
  function create_dual_chunk_flash_attn_backend (line 172) | def create_dual_chunk_flash_attn_backend(runner):
  function attn_backend_wrapper (line 180) | def attn_backend_wrapper(runner: "ModelRunner", full_attn_backend: "Atte...
  function create_intel_xpu_backend (line 240) | def create_intel_xpu_backend(runner):

FILE: python/sglang/srt/layers/attention/base_attn_backend.py
  class AttentionBackend (line 17) | class AttentionBackend(ABC):
    method init_forward_metadata (line 21) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method init_cuda_graph_state (line 25) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
    method init_forward_metadata_capture_cuda_graph (line 29) | def init_forward_metadata_capture_cuda_graph(
    method init_forward_metadata_replay_cuda_graph (line 42) | def init_forward_metadata_replay_cuda_graph(
    method get_cuda_graph_seq_len_fill_value (line 56) | def get_cuda_graph_seq_len_fill_value(self):
    method get_verify_buffers_to_fill_after_draft (line 60) | def get_verify_buffers_to_fill_after_draft(self):
    method update_verify_buffers_to_fill_after_draft (line 68) | def update_verify_buffers_to_fill_after_draft(
    method forward (line 79) | def forward(
    method forward_decode (line 123) | def forward_decode(
    method forward_extend (line 135) | def forward_extend(
    method forward_mixed (line 147) | def forward_mixed(
    method support_triton (line 159) | def support_triton(self):
    method get_indexer_metadata (line 163) | def get_indexer_metadata(

FILE: python/sglang/srt/layers/attention/cutlass_mla_backend.py
  class CutlassMLADecodeMetadata (line 35) | class CutlassMLADecodeMetadata:
    method __init__ (line 39) | def __init__(
  class CutlassMLABackend (line 48) | class CutlassMLABackend(FlashInferMLAAttnBackend):
    method __init__ (line 51) | def __init__(
    method init_forward_metadata (line 82) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method init_cuda_graph_state (line 122) | def init_cuda_graph_state(
    method init_forward_metadata_capture_cuda_graph (line 146) | def init_forward_metadata_capture_cuda_graph(
    method init_forward_metadata_replay_cuda_graph (line 185) | def init_forward_metadata_replay_cuda_graph(
    method get_cuda_graph_seq_len_fill_value (line 223) | def get_cuda_graph_seq_len_fill_value(self):
    method forward_decode (line 226) | def forward_decode(

FILE: python/sglang/srt/layers/attention/double_sparsity_backend.py
  class DoubleSparseAttnBackend (line 16) | class DoubleSparseAttnBackend(AttentionBackend):
    method __init__ (line 17) | def __init__(self, model_runner: ModelRunner):
    method init_forward_metadata (line 52) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method forward_extend (line 113) | def forward_extend(
    method forward_decode (line 167) | def forward_decode(

FILE: python/sglang/srt/layers/attention/dual_chunk_flashattention_backend.py
  class DualChunkFlashAttentionMetadata (line 33) | class DualChunkFlashAttentionMetadata:
  class DualChunkFlashAttentionBackend (line 101) | class DualChunkFlashAttentionBackend(AttentionBackend):
    method __init__ (line 102) | def __init__(
    method get_sparse_attention_config (line 160) | def get_sparse_attention_config(self, layer_idx) -> List[Dict[str, Any]]:
    method init_forward_metadata (line 168) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method forward_extend (line 296) | def forward_extend(
    method forward_decode (line 409) | def forward_decode(
    method init_cuda_graph_state (line 486) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
    method init_forward_metadata_capture_cuda_graph (line 532) | def init_forward_metadata_capture_cuda_graph(
    method init_forward_metadata_replay_cuda_graph (line 580) | def init_forward_metadata_replay_cuda_graph(
    method get_cuda_graph_seq_len_fill_value (line 670) | def get_cuda_graph_seq_len_fill_value(self):
    method _dual_chunk_flash_attn_prefill (line 674) | def _dual_chunk_flash_attn_prefill(
    method _dual_chunk_flash_attn_prefill_func (line 831) | def _dual_chunk_flash_attn_prefill_func(
    method _do_flash_attn (line 1329) | def _do_flash_attn(
    method _merge_attn_outputs (line 1424) | def _merge_attn_outputs(
    method _dual_chunk_flash_attn_decoding (line 1466) | def _dual_chunk_flash_attn_decoding(
    method _dual_chunk_flash_attn_decoding_with_exp_sums (line 1560) | def _dual_chunk_flash_attn_decoding_with_exp_sums(
  function _vertical_slash_sparse_attention (line 1586) | def _vertical_slash_sparse_attention(
  function _sum_all_diagonal_matrix (line 1682) | def _sum_all_diagonal_matrix(mat: torch.tensor):
  function _get_block (line 1697) | def _get_block(block_table: torch.Tensor, block_size: int, begin: int, e...

FILE: python/sglang/srt/layers/attention/fla/chunk.py
  function chunk_gated_delta_rule_fwd (line 26) | def chunk_gated_delta_rule_fwd(
  class ChunkGatedDeltaRuleFunction (line 75) | class ChunkGatedDeltaRuleFunction(torch.autograd.Function):
    method forward (line 80) | def forward(
  function chunk_gated_delta_rule (line 115) | def chunk_gated_delta_rule(

FILE: python/sglang/srt/layers/attention/fla/chunk_delta_h.py
  function chunk_gated_delta_rule_fwd_kernel_h_blockdim64 (line 33) | def chunk_gated_delta_rule_fwd_kernel_h_blockdim64(
  function chunk_gated_delta_rule_fwd_h (line 274) | def chunk_gated_delta_rule_fwd_h(

FILE: python/sglang/srt/layers/attention/fla/chunk_o.py
  function chunk_fwd_kernel_o (line 30) | def chunk_fwd_kernel_o(
  function chunk_fwd_o (line 126) | def chunk_fwd_o(

FILE: python/sglang/srt/layers/attention/fla/chunk_scaled_dot_kkt.py
  function chunk_scaled_dot_kkt_fwd_kernel (line 25) | def chunk_scaled_dot_kkt_fwd_kernel(
  function chunk_scaled_dot_kkt_fwd (line 89) | def chunk_scaled_dot_kkt_fwd(

FILE: python/sglang/srt/layers/attention/fla/cumsum.py
  function chunk_local_cumsum_scalar_kernel (line 22) | def chunk_local_cumsum_scalar_kernel(
  function chunk_local_cumsum_vector_kernel (line 80) | def chunk_local_cumsum_vector_kernel(
  function chunk_local_cumsum_scalar (line 158) | def chunk_local_cumsum_scalar(
  function chunk_local_cumsum_vector (line 201) | def chunk_local_cumsum_vector(
  function chunk_local_cumsum (line 253) | def chunk_local_cumsum(

FILE: python/sglang/srt/layers/attention/fla/fused_gdn_gating.py
  function fused_gdn_gating_kernel (line 11) | def fused_gdn_gating_kernel(
  function fused_gdn_gating (line 42) | def fused_gdn_gating(

FILE: python/sglang/srt/layers/attention/fla/fused_norm_gate.py
  function layer_norm_gated_fwd_kernel (line 26) | def layer_norm_gated_fwd_kernel(
  function layer_norm_gated_fwd_kernel1 (line 106) | def layer_norm_gated_fwd_kernel1(
  function layer_norm_gated_fwd (line 173) | def layer_norm_gated_fwd(
  class LayerNormGatedFunction (line 266) | class LayerNormGatedFunction(torch.autograd.Function):
    method forward (line 268) | def forward(
  function rms_norm_gated (line 318) | def rms_norm_gated(
  class FusedRMSNormGated (line 343) | class FusedRMSNormGated(nn.Module):
    method __init__ (line 344) | def __init__(
    method forward (line 370) | def forward(

FILE: python/sglang/srt/layers/attention/fla/fused_recurrent.py
  function fused_recurrent_gated_delta_rule_fwd_kernel (line 16) | def fused_recurrent_gated_delta_rule_fwd_kernel(
  function fused_recurrent_gated_delta_rule_fwd (line 124) | def fused_recurrent_gated_delta_rule_fwd(
  function fused_recurrent_gated_delta_rule_packed_decode_kernel (line 186) | def fused_recurrent_gated_delta_rule_packed_decode_kernel(
  function fused_recurrent_gated_delta_rule_packed_decode (line 268) | def fused_recurrent_gated_delta_rule_packed_decode(
  class FusedRecurrentFunction (line 405) | class FusedRecurrentFunction(torch.autograd.Function):
    method forward (line 409) | def forward(
    method backward (line 439) | def backward(ctx, do, dht):
  function fused_recurrent_gated_delta_rule (line 447) | def fused_recurrent_gated_delta_rule(
  function fused_recurrent_gated_delta_rule_update_fwd_kernel (line 565) | def fused_recurrent_gated_delta_rule_update_fwd_kernel(
  function fused_recurrent_gated_delta_rule_update_fwd (line 743) | def fused_recurrent_gated_delta_rule_update_fwd(
  class FusedRecurrentUpdateFunction (line 829) | class FusedRecurrentUpdateFunction(torch.autograd.Function):
    method forward (line 833) | def forward(
    method backward (line 875) | def backward(ctx, do, dht):
  function fused_recurrent_gated_delta_rule_update (line 883) | def fused_recurrent_gated_delta_rule_update(

FILE: python/sglang/srt/layers/attention/fla/fused_sigmoid_gating_recurrent.py
  function fused_sigmoid_gating_delta_rule_update_kernel (line 9) | def fused_sigmoid_gating_delta_rule_update_kernel(
  function fused_sigmoid_gating_delta_rule_update (line 243) | def fused_sigmoid_gating_delta_rule_update(

FILE: python/sglang/srt/layers/attention/fla/index.py
  function prepare_lens (line 12) | def prepare_lens(cu_seqlens: torch.LongTensor) -> torch.LongTensor:
  function prepare_chunk_indices (line 17) | def prepare_chunk_indices(
  function prepare_chunk_offsets (line 30) | def prepare_chunk_offsets(

FILE: python/sglang/srt/layers/attention/fla/kda.py
  function cdiv (line 27) | def cdiv(a: int, b: int) -> int:
  function next_power_of_2 (line 32) | def next_power_of_2(n: int) -> int:
  function fused_recurrent_kda_fwd (line 39) | def fused_recurrent_kda_fwd(
  function fused_recurrent_kda (line 120) | def fused_recurrent_kda(
  function rms_norm_gated (line 159) | def rms_norm_gated(
  function chunk_kda_scaled_dot_kkt_fwd_kernel_intra_sub_inter (line 207) | def chunk_kda_scaled_dot_kkt_fwd_kernel_intra_sub_inter(
  function chunk_kda_scaled_dot_kkt_fwd_kernel_intra_sub_intra (line 312) | def chunk_kda_scaled_dot_kkt_fwd_kernel_intra_sub_intra(
  function chunk_kda_scaled_dot_kkt_fwd (line 402) | def chunk_kda_scaled_dot_kkt_fwd(
  function recompute_w_u_fwd_kernel (line 497) | def recompute_w_u_fwd_kernel(
  function recompute_w_u_fwd (line 640) | def recompute_w_u_fwd(
  function chunk_gla_fwd_kernel_o (line 701) | def chunk_gla_fwd_kernel_o(
  function chunk_gla_fwd_o_gk (line 809) | def chunk_gla_fwd_o_gk(
  function chunk_kda_fwd (line 853) | def chunk_kda_fwd(
  function chunk_kda (line 912) | def chunk_kda(
  function kda_gate_fwd_kernel (line 956) | def kda_gate_fwd_kernel(
  function fused_kda_gate (line 1018) | def fused_kda_gate(

FILE: python/sglang/srt/layers/attention/fla/l2norm.py
  function l2norm_fwd_kernel1 (line 24) | def l2norm_fwd_kernel1(
  function l2norm_fwd_kernel (line 55) | def l2norm_fwd_kernel(
  function l2norm_fwd (line 74) | def l2norm_fwd(
  class L2NormFunction (line 125) | class L2NormFunction(torch.autograd.Function):
    method forward (line 129) | def forward(ctx, x, eps=1e-6, output_dtype=None):
  function l2norm (line 133) | def l2norm(
  class L2Norm (line 142) | class L2Norm(nn.Module):
    method __init__ (line 144) | def __init__(self, eps: float = 1e-6, output_dtype: Optional[torch.dty...
    method forward (line 149) | def forward(self, x: torch.Tensor) -> torch.Tensor:

FILE: python/sglang/srt/layers/attention/fla/layernorm_gated.py
  function rms_norm_ref (line 34) | def rms_norm_ref(
  function _layer_norm_fwd_1pass_kernel (line 68) | def _layer_norm_fwd_1pass_kernel(
  function _get_sm_count (line 173) | def _get_sm_count(device: torch.device) -> int:
  function calc_rows_per_block (line 179) | def calc_rows_per_block(M: int, device: torch.device) -> int:
  function _layer_norm_fwd (line 194) | def _layer_norm_fwd(
  function rms_norm_gated (line 274) | def rms_norm_gated(
  class LayerNormFn (line 317) | class LayerNormFn(torch.autograd.Function):
    method forward (line 320) | def forward(
  function layernorm_fn (line 345) | def layernorm_fn(
  class LayerNorm (line 361) | class LayerNorm(torch.nn.Module):
    method __init__ (line 363) | def __init__(
    method reset_parameters (line 385) | def reset_parameters(self):
    method forward (line 389) | def forward(self, x, z=None):
  class RMSNorm (line 403) | class RMSNorm(torch.nn.Module):
    method __init__ (line 405) | def __init__(
    method reset_parameters (line 428) | def reset_parameters(self):
    method forward (line 431) | def forward(self, x, z=None):

FILE: python/sglang/srt/layers/attention/fla/op.py
  function safe_exp (line 26) | def safe_exp(x):
  function gather (line 33) | def gather(src, index, axis, _builder=None):
  function make_tensor_descriptor (line 59) | def make_tensor_descriptor(

FILE: python/sglang/srt/layers/attention/fla/solve_tril.py
  function solve_tril_16x16_kernel (line 24) | def solve_tril_16x16_kernel(
  function merge_16x16_to_32x32_inverse_kernel (line 81) | def merge_16x16_to_32x32_inverse_kernel(
  function merge_16x16_to_64x64_inverse_kernel (line 160) | def merge_16x16_to_64x64_inverse_kernel(
  function solve_tril (line 394) | def solve_tril(

FILE: python/sglang/srt/layers/attention/fla/utils.py
  function check_environments (line 26) | def check_environments():
  function get_abs_err (line 63) | def get_abs_err(x, y):
  function get_err_ratio (line 67) | def get_err_ratio(x, y):
  function assert_close (line 73) | def assert_close(prefix, ref, tri, ratio, warning=False, err_atol=1e-6):
  function tensor_cache (line 92) | def tensor_cache(fn: Callable[..., torch.Tensor]) -> Callable[..., torch...
  function input_guard (line 134) | def input_guard(fn: Callable[..., torch.Tensor]) -> Callable[..., torch....
  function require_version (line 174) | def require_version(version, hint):
  function checkpoint (line 202) | def checkpoint(fn):
  function _cpu_device_warning (line 209) | def _cpu_device_warning():
  function get_multiprocessor_count (line 218) | def get_multiprocessor_count(tensor_idx: int = 0) -> int:
  function get_available_device (line 229) | def get_available_device() -> str:
  function _check_platform (line 238) | def _check_platform() -> Literal["nvidia", "amd", "intel", "musa"]:
  function get_all_max_shared_mem (line 272) | def get_all_max_shared_mem():
  class Backend (line 285) | class Backend(Enum):
    method get_shared_memory (line 292) | def get_shared_memory(cls, arch: str) -> int:
  function check_shared_mem (line 300) | def check_shared_mem(arch: str = "none", tensor_idx: int = 0) -> bool:
  function custom_device_ctx (line 314) | def custom_device_ctx(index: int):
  function custom_device_ctx (line 324) | def custom_device_ctx(index: int):

FILE: python/sglang/srt/layers/attention/fla/wy_fast.py
  function recompute_w_u_fwd_kernel (line 23) | def recompute_w_u_fwd_kernel(
  function recompute_w_u_fwd (line 111) | def recompute_w_u_fwd(

FILE: python/sglang/srt/layers/attention/flashattention_backend.py
  class FlashAttentionMetadata (line 40) | class FlashAttentionMetadata:
    class LocalAttentionMetadata (line 75) | class LocalAttentionMetadata:
  function make_local_attention_virtual_batches (line 142) | def make_local_attention_virtual_batches(
  function cdiv (line 294) | def cdiv(a: int, b: int) -> int:
  function merge_state_v2_wrapper (line 301) | def merge_state_v2_wrapper(o, s_a, o_exp, s_b):
  class FlashAttentionBackend (line 305) | class FlashAttentionBackend(AttentionBackend):
    method __init__ (line 323) | def __init__(
    method init_forward_metadata (line 400) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method forward_extend (line 735) | def forward_extend(
    method forward_decode (line 1075) | def forward_decode(
    method init_cuda_graph_state (line 1358) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
    method init_forward_metadata_capture_cuda_graph (line 1645) | def init_forward_metadata_capture_cuda_graph(
    method init_forward_metadata_replay_cuda_graph (line 1897) | def init_forward_metadata_replay_cuda_graph(
    method get_cuda_graph_seq_len_fill_value (line 2258) | def get_cuda_graph_seq_len_fill_value(self):
    method _maybe_init_local_attn_metadata (line 2262) | def _maybe_init_local_attn_metadata(
    method _maybe_update_local_attn_metadata_for_capture (line 2306) | def _maybe_update_local_attn_metadata_for_capture(
    method _maybe_update_local_attn_metadata_for_replay (line 2365) | def _maybe_update_local_attn_metadata_for_replay(
    method _init_sliding_window_attn_spec_metadata (line 2445) | def _init_sliding_window_attn_spec_metadata(
  function _prepare_swa_spec_page_table_kernel (line 2498) | def _prepare_swa_spec_page_table_kernel(
  function prepare_swa_spec_page_table_triton (line 2559) | def prepare_swa_spec_page_table_triton(
  class FlashAttentionMultiStepBackend (line 2599) | class FlashAttentionMultiStepBackend:
    method __init__ (line 2601) | def __init__(
    method init_forward_metadata (line 2618) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method init_cuda_graph_state (line 2622) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
    method init_forward_metadata_capture_cuda_graph (line 2626) | def init_forward_metadata_capture_cuda_graph(
    method init_forward_metadata_replay_cuda_graph (line 2644) | def init_forward_metadata_replay_cuda_graph(
  function normal_decode_set_metadata (line 2669) | def normal_decode_set_metadata(
  function draft_decode_set_expand_metadata (line 2698) | def draft_decode_set_expand_metadata(

FILE: python/sglang/srt/layers/attention/flashinfer_backend.py
  class WrapperDispatch (line 57) | class WrapperDispatch(Enum):
  class MultiItemScoringParams (line 63) | class MultiItemScoringParams:
    method is_enabled (line 87) | def is_enabled(self) -> bool:
  class DecodeMetadata (line 93) | class DecodeMetadata:
  class PrefillMetadata (line 98) | class PrefillMetadata:
  class FlashInferAttnBackend (line 113) | class FlashInferAttnBackend(AttentionBackend):
    method __init__ (line 116) | def __init__(
    method _process_multi_item_scoring (line 303) | def _process_multi_item_scoring(
    method init_forward_metadata (line 425) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method init_cuda_graph_state (line 514) | def init_cuda_graph_state(
    method init_forward_metadata_capture_cuda_graph (line 548) | def init_forward_metadata_capture_cuda_graph(
    method init_forward_metadata_replay_cuda_graph (line 686) | def init_forward_metadata_replay_cuda_graph(
    method get_cuda_graph_seq_len_fill_value (line 748) | def get_cuda_graph_seq_len_fill_value(self):
    method forward_extend (line 751) | def forward_extend(
    method forward_decode (line 865) | def forward_decode(
    method _get_wrapper_idx (line 903) | def _get_wrapper_idx(self, layer: RadixAttention):
  class FlashInferIndicesUpdaterDecode (line 915) | class FlashInferIndicesUpdaterDecode:
    method __init__ (line 916) | def __init__(self, model_runner: ModelRunner, attn_backend: FlashInfer...
    method update (line 945) | def update(
    method update_single_wrapper (line 960) | def update_single_wrapper(
    method update_sliding_window (line 986) | def update_sliding_window(
    method update_cross_attention (line 1036) | def update_cross_attention(
    method call_begin_forward (line 1070) | def call_begin_forward(
  class FlashInferIndicesUpdaterPrefill (line 1175) | class FlashInferIndicesUpdaterPrefill:
    method __init__ (line 1176) | def __init__(self, model_runner: ModelRunner, attn_backend: FlashInfer...
    method update (line 1207) | def update(
    method update_single_wrapper (line 1223) | def update_single_wrapper(
    method update_sliding_window (line 1263) | def update_sliding_window(
    method update_cross_attention (line 1312) | def update_cross_attention(
    method call_begin_forward (line 1354) | def call_begin_forward(
  class FlashInferMultiStepDraftBackend (line 1463) | class FlashInferMultiStepDraftBackend:
    method __init__ (line 1469) | def __init__(
    method common_template (line 1510) | def common_template(
    method init_forward_metadata (line 1555) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method init_cuda_graph_state (line 1576) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
    method init_forward_metadata_capture_cuda_graph (line 1588) | def init_forward_metadata_capture_cuda_graph(self, forward_batch: Forw...
    method init_forward_metadata_replay_cuda_graph (line 1602) | def init_forward_metadata_replay_cuda_graph(
  function should_use_tensor_core (line 1620) | def should_use_tensor_core(

FILE: python/sglang/srt/layers/attention/flashinfer_mla_backend.py
  class DecodeMetadata (line 56) | class DecodeMetadata:
  class PrefillMetadata (line 61) | class PrefillMetadata:
  class FlashInferMhaChunkKVRunner (line 70) | class FlashInferMhaChunkKVRunner:
    method __init__ (line 71) | def __init__(
    method update_prefix_chunks (line 93) | def update_prefix_chunks(self, num_prefix_chunks: int):
    method update_wrapper (line 100) | def update_wrapper(
    method forward (line 153) | def forward(
  class FlashInferMLAAttnBackend (line 191) | class FlashInferMLAAttnBackend(AttentionBackend):
    method __init__ (line 194) | def __init__(
    method init_forward_metadata (line 288) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method init_cuda_graph_state (line 342) | def init_cuda_graph_state(
    method init_forward_metadata_capture_cuda_graph (line 373) | def init_forward_metadata_capture_cuda_graph(
    method init_forward_metadata_replay_cuda_graph (line 453) | def init_forward_metadata_replay_cuda_graph(
    method get_cuda_graph_seq_len_fill_value (line 510) | def get_cuda_graph_seq_len_fill_value(self):
    method init_mha_chunk_metadata (line 513) | def init_mha_chunk_metadata(
    method forward_extend (line 519) | def forward_extend(
    method forward_decode (line 595) | def forward_decode(
  class FlashInferMLAIndicesUpdaterDecode (line 656) | class FlashInferMLAIndicesUpdaterDecode:
    method __init__ (line 657) | def __init__(self, model_runner: ModelRunner, attn_backend: AttentionB...
    method update (line 674) | def update(
    method call_begin_forward (line 697) | def call_begin_forward(
  class FlashInferMLAIndicesUpdaterPrefill (line 765) | class FlashInferMLAIndicesUpdaterPrefill:
    method __init__ (line 766) | def __init__(self, model_runner: ModelRunner, attn_backend: AttentionB...
    method update (line 786) | def update(
    method call_begin_forward (line 817) | def call_begin_forward(
  class FlashInferMLAMultiStepDraftBackend (line 898) | class FlashInferMLAMultiStepDraftBackend:
    method __init__ (line 904) | def __init__(
    method common_template (line 950) | def common_template(
    method init_forward_metadata (line 988) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method init_cuda_graph_state (line 1009) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
    method init_forward_metadata_capture_cuda_graph (line 1021) | def init_forward_metadata_capture_cuda_graph(self, forward_batch: Forw...
    method init_forward_metadata_replay_cuda_graph (line 1035) | def init_forward_metadata_replay_cuda_graph(
  function fast_mla_decode_plan (line 1053) | def fast_mla_decode_plan(

FILE: python/sglang/srt/layers/attention/flashmla_backend.py
  class FlashMLADecodeMetadata (line 30) | class FlashMLADecodeMetadata:
    method __init__ (line 35) | def __init__(
  class FlashMLABackend (line 46) | class FlashMLABackend(FlashInferMLAAttnBackend):
    method __init__ (line 47) | def __init__(
    method init_forward_metadata (line 87) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method init_cuda_graph_state (line 153) | def init_cuda_graph_state(
    method init_forward_metadata_capture_cuda_graph (line 186) | def init_forward_metadata_capture_cuda_graph(
    method init_forward_metadata_replay_cuda_graph (line 285) | def init_forward_metadata_replay_cuda_graph(
    method get_cuda_graph_seq_len_fill_value (line 396) | def get_cuda_graph_seq_len_fill_value(self):
    method forward_decode (line 399) | def forward_decode(
    method forward_extend (line 471) | def forward_extend(
  class FlashMLAMultiStepDraftBackend (line 547) | class FlashMLAMultiStepDraftBackend:
    method __init__ (line 548) | def __init__(
    method common_template (line 581) | def common_template(
    method init_forward_metadata (line 591) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method init_cuda_graph_state (line 598) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
    method init_forward_metadata_capture_cuda_graph (line 604) | def init_forward_metadata_capture_cuda_graph(self, forward_batch: Forw...
    method init_forward_metadata_replay_cuda_graph (line 622) | def init_forward_metadata_replay_cuda_graph(

FILE: python/sglang/srt/layers/attention/hybrid_attn_backend.py
  class HybridAttnBackend (line 13) | class HybridAttnBackend(AttentionBackend):
    method __init__ (line 16) | def __init__(
    method _select_backend (line 27) | def _select_backend(self, forward_mode: ForwardMode) -> AttentionBackend:
    method init_forward_metadata (line 53) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method init_cuda_graph_state (line 57) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
    method init_forward_metadata_capture_cuda_graph (line 67) | def init_forward_metadata_capture_cuda_graph(
    method init_forward_metadata_replay_cuda_graph (line 88) | def init_forward_metadata_replay_cuda_graph(
    method get_cuda_graph_seq_len_fill_value (line 111) | def get_cuda_graph_seq_len_fill_value(self):
    method forward (line 114) | def forward(
    method forward_decode (line 142) | def forward_decode(
    method forward_extend (line 156) | def forward_extend(
    method get_indexer_metadata (line 171) | def get_indexer_metadata(
    method forward (line 177) | def forward(

FILE: python/sglang/srt/layers/attention/hybrid_linear_attn_backend.py
  function track_mamba_state_if_needed_kernel (line 37) | def track_mamba_state_if_needed_kernel(
  function track_mamba_states_if_needed (line 96) | def track_mamba_states_if_needed(
  class MambaAttnBackendBase (line 137) | class MambaAttnBackendBase(AttentionBackend):
    method __init__ (line 138) | def __init__(self, model_runner: ModelRunner):
    method _forward_metadata (line 153) | def _forward_metadata(self, forward_batch: ForwardBatch):
    method init_forward_metadata (line 233) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method _init_track_conv_indices (line 236) | def _init_track_conv_indices(
    method _init_track_ssm_indices (line 278) | def _init_track_ssm_indices(
    method init_forward_metadata_capture_cuda_graph (line 358) | def init_forward_metadata_capture_cuda_graph(
    method init_forward_metadata_replay_cuda_graph (line 372) | def init_forward_metadata_replay_cuda_graph(
    method init_forward_metadata_capture_cpu_graph (line 387) | def init_forward_metadata_capture_cpu_graph(
    method init_cuda_graph_state (line 401) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
    method init_cpu_graph_state (line 441) | def init_cpu_graph_state(self, max_bs: int, max_num_tokens: int):
    method _capture_metadata (line 458) | def _capture_metadata(
    method _replay_metadata (line 496) | def _replay_metadata(
    method get_cuda_graph_seq_len_fill_value (line 561) | def get_cuda_graph_seq_len_fill_value(self):
    method get_cpu_graph_seq_len_fill_value (line 564) | def get_cpu_graph_seq_len_fill_value(self):
    method _track_mamba_state_decode (line 567) | def _track_mamba_state_decode(
    method _track_mamba_state_extend (line 597) | def _track_mamba_state_extend(
  class Mamba2AttnBackend (line 632) | class Mamba2AttnBackend(MambaAttnBackendBase):
    method __init__ (line 635) | def __init__(self, model_runner: ModelRunner):
    method init_forward_metadata (line 641) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method init_forward_metadata_capture_cuda_graph (line 649) | def init_forward_metadata_capture_cuda_graph(
    method init_forward_metadata_replay_cuda_graph (line 668) | def init_forward_metadata_replay_cuda_graph(
    method forward (line 690) | def forward(
    method forward_decode (line 710) | def forward_decode(self, *args, **kwargs):
    method forward_extend (line 715) | def forward_extend(self, *args, **kwargs):
  class HybridLinearAttnBackend (line 721) | class HybridLinearAttnBackend(AttentionBackend):
    method __init__ (line 724) | def __init__(
    method init_forward_metadata (line 735) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method init_cuda_graph_state (line 739) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
    method init_cpu_graph_state (line 743) | def init_cpu_graph_state(self, max_bs: int, max_num_tokens: int):
    method init_forward_metadata_capture_cuda_graph (line 747) | def init_forward_metadata_capture_cuda_graph(
    method init_forward_metadata_capture_cpu_graph (line 768) | def init_forward_metadata_capture_cpu_graph(
    method init_forward_metadata_replay_cuda_graph (line 789) | def init_forward_metadata_replay_cuda_graph(
    method get_cuda_graph_seq_len_fill_value (line 812) | def get_cuda_graph_seq_len_fill_value(self):
    method get_cpu_graph_seq_len_fill_value (line 815) | def get_cpu_graph_seq_len_fill_value(self):
    method forward_decode (line 818) | def forward_decode(
    method forward_extend (line 850) | def forward_extend(
    method forward (line 882) | def forward(
    method update_mamba_state_after_mtp_verify (line 931) | def update_mamba_state_after_mtp_verify(

FILE: python/sglang/srt/layers/attention/intel_amx_backend.py
  class IntelAMXAttnBackend (line 15) | class IntelAMXAttnBackend(AttentionBackend):
    method __init__ (line 16) | def __init__(self, model_runner: ModelRunner):
    method init_forward_metadata (line 40) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method get_cpu_graph_seq_len_fill_value (line 60) | def get_cpu_graph_seq_len_fill_value(self):
    method init_forward_metadata_capture_cpu_graph (line 63) | def init_forward_metadata_capture_cpu_graph(
    method init_cpu_graph_state (line 86) | def init_cpu_graph_state(self, max_bs: int, max_num_tokens: int):
    method forward_extend (line 89) | def forward_extend(
    method forward_decode (line 128) | def forward_decode(
    method support_triton (line 164) | def support_triton(self):

FILE: python/sglang/srt/layers/attention/linear/gdn_backend.py
  class GDNKernelDispatcher (line 53) | class GDNKernelDispatcher:
    method __init__ (line 56) | def __init__(
    method packed_decode (line 125) | def packed_decode(
    method decode (line 158) | def decode(
    method extend (line 187) | def extend(
    method target_verify (line 212) | def target_verify(
  class GDNAttnBackend (line 242) | class GDNAttnBackend(MambaAttnBackendBase):
    method __init__ (line 245) | def __init__(self, model_runner: ModelRunner):
    method forward_decode (line 259) | def forward_decode(
    method forward_extend (line 334) | def forward_extend(

FILE: python/sglang/srt/layers/attention/linear/kda_backend.py
  class KDAKernelDispatcher (line 35) | class KDAKernelDispatcher:
    method __init__ (line 38) | def __init__(
    method decode (line 66) | def decode(
    method extend (line 95) | def extend(
  class KDAAttnBackend (line 121) | class KDAAttnBackend(MambaAttnBackendBase):
    method __init__ (line 124) | def __init__(self, model_runner: ModelRunner):
    method forward_decode (line 130) | def forward_decode(
    method forward_extend (line 170) | def forward_extend(

FILE: python/sglang/srt/layers/attention/linear/kernels/gdn_cutedsl.py
  class CuteDSLGDNKernel (line 9) | class CuteDSLGDNKernel(LinearAttnKernelBase):
    method decode (line 12) | def decode(
    method extend (line 43) | def extend(self, *args, **kwargs):
    method target_verify (line 46) | def target_verify(self, *args, **kwargs):

FILE: python/sglang/srt/layers/attention/linear/kernels/gdn_flashinfer.py
  function _get_flashinfer_gdn_kernels (line 32) | def _get_flashinfer_gdn_kernels():
  class FlashInferGDNKernel (line 73) | class FlashInferGDNKernel(LinearAttnKernelBase):
    method __init__ (line 83) | def __init__(self):
    method decode (line 112) | def decode(
    method extend (line 176) | def extend(
    method target_verify (line 251) | def target_verify(

FILE: python/sglang/srt/layers/attention/linear/kernels/gdn_triton.py
  class TritonGDNKernel (line 34) | class TritonGDNKernel(LinearAttnKernelBase):
    method packed_decode (line 39) | def packed_decode(
    method decode (line 94) | def decode(
    method extend (line 125) | def extend(
    method target_verify (line 156) | def target_verify(

FILE: python/sglang/srt/layers/attention/linear/kernels/kda_triton.py
  class TritonKDAKernel (line 15) | class TritonKDAKernel(LinearAttnKernelBase):
    method decode (line 18) | def decode(
    method extend (line 50) | def extend(

FILE: python/sglang/srt/layers/attention/linear/kernels/kernel_backend.py
  class LinearAttnKernelBase (line 6) | class LinearAttnKernelBase(ABC):
    method decode (line 14) | def decode(
    method extend (line 31) | def extend(
    method target_verify (line 45) | def target_verify(

FILE: python/sglang/srt/layers/attention/linear/lightning_attn.py
  function _fwd_diag_kernel (line 12) | def _fwd_diag_kernel(
  function _fwd_kv_parallel (line 141) | def _fwd_kv_parallel(
  function _fwd_kv_reduce (line 247) | def _fwd_kv_reduce(
  function _fwd_none_diag_kernel (line 312) | def _fwd_none_diag_kernel(
  class _attention (line 394) | class _attention(torch.autograd.Function):
    method forward (line 397) | def forward(ctx, q, k, v, s, kv_history):
  function lightning_attention (line 530) | def lightning_attention(q, k, v, ed, block_size=256, kv_history=None):
  function _linear_attn_decode_kernel (line 582) | def _linear_attn_decode_kernel(
  function linear_decode_forward_triton (line 663) | def linear_decode_forward_triton(
  class BailingLinearKernel (line 730) | class BailingLinearKernel:
    method jit_linear_forward_prefix (line 742) | def jit_linear_forward_prefix(

FILE: python/sglang/srt/layers/attention/linear/lightning_backend.py
  class LightningAttentionBackend (line 22) | class LightningAttentionBackend(MambaAttnBackendBase):
    method __init__ (line 39) | def __init__(self, model_runner: ModelRunner):
    method init_forward_metadata (line 70) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method init_forward_metadata_capture_cuda_graph (line 78) | def init_forward_metadata_capture_cuda_graph(
    method init_forward_metadata_replay_cuda_graph (line 93) | def init_forward_metadata_replay_cuda_graph(
    method _build_slope_tensor (line 112) | def _build_slope_tensor(
    method _prefill_and_mix_infer (line 157) | def _prefill_and_mix_infer(
    method _decode_infer (line 218) | def _decode_infer(self, q, k, v, kv_cache, state_indices_tensor, metad...
    method _linear_attention_entry (line 235) | def _linear_attention_entry(
    method forward_extend (line 272) | def forward_extend(
    method forward_decode (line 337) | def forward_decode(

FILE: python/sglang/srt/layers/attention/linear/linear_metadata.py
  class BailingLinearMetadata (line 10) | class BailingLinearMetadata(ForwardMetadata):
    method prepare_decode (line 19) | def prepare_decode(
    method prepare_mixed (line 38) | def prepare_mixed(

FILE: python/sglang/srt/layers/attention/linear/seg_la.py
  class SegLaMeta (line 18) | class SegLaMeta:
  function seg_la_kernel (line 34) | def seg_la_kernel(
  function seg_la_p_kernel (line 207) | def seg_la_p_kernel(
  function seg_la_s_kernel (line 348) | def seg_la_s_kernel(
  function seg_la_d_kernel (line 483) | def seg_la_d_kernel(
  function seg_la_mtp_kernel (line 554) | def seg_la_mtp_kernel(
  function seg_la_sum_kernel (line 646) | def seg_la_sum_kernel(T, O, DIM: tl.constexpr, NUM_BLOCK: tl.constexpr):
  function seg_la_fwd (line 657) | def seg_la_fwd(

FILE: python/sglang/srt/layers/attention/linear/utils.py
  class LinearAttnKernelBackend (line 15) | class LinearAttnKernelBackend(Enum):
    method is_triton (line 20) | def is_triton(self):
    method is_cutedsl (line 23) | def is_cutedsl(self):
    method is_flashinfer (line 26) | def is_flashinfer(self):
  function initialize_linear_attn_config (line 34) | def initialize_linear_attn_config(server_args: ServerArgs):
  function get_linear_attn_decode_backend (line 51) | def get_linear_attn_decode_backend() -> LinearAttnKernelBackend:
  function get_linear_attn_prefill_backend (line 61) | def get_linear_attn_prefill_backend() -> LinearAttnKernelBackend:

FILE: python/sglang/srt/layers/attention/mamba/causal_conv1d.py
  function _get_seq_lens_cpu (line 25) | def _get_seq_lens_cpu(query_start_loc, x):
  function causal_conv1d_fn (line 31) | def causal_conv1d_fn(
  function causal_conv1d_update (line 112) | def causal_conv1d_update(

FILE: python/sglang/srt/layers/attention/mamba/causal_conv1d_triton.py
  function _causal_conv1d_fwd_kernel (line 15) | def _causal_conv1d_fwd_kernel(  # continuous batching
  function causal_conv1d_fn (line 378) | def causal_conv1d_fn(
  function _causal_conv1d_update_kernel (line 571) | def _causal_conv1d_update_kernel(
  function causal_conv1d_update (line 980) | def causal_conv1d_update(

FILE: python/sglang/srt/layers/attention/mamba/mamba.py
  function mamba_v2_sharded_weight_loader (line 56) | def mamba_v2_sharded_weight_loader(
  class MambaMixer2 (line 155) | class MambaMixer2(torch.nn.Module):
    method __init__ (line 166) | def __init__(
    method forward (line 390) | def forward(
    method mamba_type (line 696) | def mamba_type(self) -> str:

FILE: python/sglang/srt/layers/attention/mamba/mamba2_metadata.py
  class ForwardMetadata (line 27) | class ForwardMetadata:
  class Mamba2Metadata (line 46) | class Mamba2Metadata(ForwardMetadata):
    class MixedMetadata (line 54) | class MixedMetadata:
    method _query_start_loc_to_chunk_indices_offsets (line 69) | def _query_start_loc_to_chunk_indices_offsets(
    method prepare_decode (line 153) | def prepare_decode(
    method prepare_mixed (line 175) | def prepare_mixed(

FILE: python/sglang/srt/layers/attention/mamba/mamba_state_scatter_triton.py
  function _fused_mamba_state_scatter_with_mask_kernel (line 15) | def _fused_mamba_state_scatter_with_mask_kernel(
  function fused_mamba_state_scatter_with_mask (line 90) | def fused_mamba_state_scatter_with_mask(

FILE: python/sglang/srt/layers/attention/mamba/mixer2_rms_norm_gated.py
  class Mixer2RMSNormGated (line 19) | class Mixer2RMSNormGated(MultiPlatformOp):
    method __init__ (line 20) | def __init__(
    method forward_native (line 48) | def forward_native(
    method forward_cuda (line 99) | def forward_cuda(

FILE: python/sglang/srt/layers/attention/mamba/ops/layernorm_gated.py
  function _layer_norm_fwd_1pass_kernel (line 14) | def _layer_norm_fwd_1pass_kernel(
  function _layer_norm_fwd (line 77) | def _layer_norm_fwd(
  function rms_norm_gated (line 145) | def rms_norm_gated(

FILE: python/sglang/srt/layers/attention/mamba/ops/mamba_ssm.py
  function softplus (line 21) | def softplus(dt):
  function softplus (line 28) | def softplus(dt):
  function _selective_scan_update_kernel (line 68) | def _selective_scan_update_kernel(
  function selective_state_update (line 300) | def selective_state_update(

FILE: python/sglang/srt/layers/attention/mamba/ops/ssd_bmm.py
  function _bmm_chunk_fwd_kernel (line 19) | def _bmm_chunk_fwd_kernel(
  function _bmm_chunk_fwd (line 131) | def _bmm_chunk_fwd(a, b, chunk_size, seq_idx=None, causal=False, output_...

FILE: python/sglang/srt/layers/attention/mamba/ops/ssd_chunk_scan.py
  function _chunk_scan_fwd_kernel (line 20) | def _chunk_scan_fwd_kernel(
  function _chunk_scan_fwd (line 422) | def _chunk_scan_fwd(

FILE: python/sglang/srt/layers/attention/mamba/ops/ssd_chunk_state.py
  function _chunk_cumsum_fwd_kernel (line 21) | def _chunk_cumsum_fwd_kernel(
  function _chunk_state_fwd_kernel (line 113) | def _chunk_state_fwd_kernel(
  function _chunk_state_varlen_kernel (line 263) | def _chunk_state_varlen_kernel(
  function _chunk_cumsum_fwd (line 444) | def _chunk_cumsum_fwd(
  function _chunk_state_fwd (line 496) | def _chunk_state_fwd(
  function chunk_state_varlen (line 568) | def chunk_state_varlen(

FILE: python/sglang/srt/layers/attention/mamba/ops/ssd_combined.py
  function is_int_pow_2 (line 24) | def is_int_pow_2(n):
  function _mamba_chunk_scan_combined_fwd (line 28) | def _mamba_chunk_scan_combined_fwd(
  function mamba_chunk_scan_combined (line 181) | def mamba_chunk_scan_combined(

FILE: python/sglang/srt/layers/attention/mamba/ops/ssd_state_passing.py
  function _state_passing_fwd_kernel (line 17) | def _state_passing_fwd_kernel(
  function _state_passing_fwd (line 171) | def _state_passing_fwd(

FILE: python/sglang/srt/layers/attention/mamba/ops/ssu_dispatch.py
  class MambaSSUBackend (line 15) | class MambaSSUBackend(ABC):
    method name (line 18) | def name(self) -> str:
    method __call__ (line 22) | def __call__(
  class TritonSSUBackend (line 45) | class TritonSSUBackend(MambaSSUBackend):
    method __init__ (line 48) | def __init__(self) -> None:
    method name (line 56) | def name(self) -> str:
    method __call__ (line 59) | def __call__(
  class FlashInferSSUBackend (line 102) | class FlashInferSSUBackend(MambaSSUBackend):
    method __init__ (line 105) | def __init__(self) -> None:
    method name (line 111) | def name(self) -> str:
    method __call__ (line 114) | def __call__(
  function initialize_mamba_selective_state_update_backend (line 170) | def initialize_mamba_selective_state_update_backend(server_args: ServerA...
  function selective_state_update (line 207) | def selective_state_update(

FILE: python/sglang/srt/layers/attention/merge_state.py
  function _supported_dtypes (line 15) | def _supported_dtypes(o: torch.Tensor) -> bool:
  function _supported_headdim (line 19) | def _supported_headdim(o: torch.Tensor) -> bool:
  function merge_state (line 26) | def merge_state(

FILE: python/sglang/srt/layers/attention/nsa/dequant_k_cache.py
  function dequantize_k_cache (line 6) | def dequantize_k_cache(quant_k_cache):
  function _dequantize_k_cache_ref (line 10) | def _dequantize_k_cache_ref(
  function _dequantize_k_cache_fast_wrapped (line 53) | def _dequantize_k_cache_fast_wrapped(
  function _dequantize_k_cache_fast (line 76) | def _dequantize_k_cache_fast(quant_k_cache, group_size: int = 128):
  function _dequantize_k_cache_fast_kernel (line 121) | def _dequantize_k_cache_fast_kernel(
  function dequantize_k_cache_paged (line 168) | def dequantize_k_cache_paged(
  function _dequantize_k_cache_paged_kernel (line 235) | def _dequantize_k_cache_paged_kernel(

FILE: python/sglang/srt/layers/attention/nsa/index_buf_accessor.py
  class GetK (line 22) | class GetK:
    method execute (line 24) | def execute(cls, *args, **kwargs):
    method slow (line 28) | def slow(
    method torch_fast (line 47) | def torch_fast(
    method triton (line 77) | def triton(
  class GetS (line 94) | class GetS:
    method execute (line 96) | def execute(cls, *args, **kwargs):
    method slow (line 100) | def slow(
    method torch_fast (line 119) | def torch_fast(
    method triton (line 146) | def triton(
  class GetKAndS (line 163) | class GetKAndS:
    method execute (line 165) | def execute(cls, *args, **kwargs):
    method triton (line 169) | def triton(
  class SetK (line 199) | class SetK:
    method execute (line 201) | def execute(cls, *args, buf, **kwargs):
    method slow (line 205) | def slow(
    method torch_fast (line 221) | def torch_fast(
  class SetS (line 249) | class SetS:
    method execute (line 251) | def execute(cls, *args, buf, **kwargs):
    method slow (line 255) | def slow(
    method torch_fast (line 271) | def torch_fast(
  class SetKAndS (line 301) | class SetKAndS:
    method execute (line 303) | def execute(cls, *args, buf, **kwargs):
    method vanilla (line 324) | def vanilla(cls, pool, buf, loc, index_k, index_k_scale):
    method triton (line 329) | def triton(cls, pool, buf, loc, index_k, index_k_scale):
  function _set_k_and_s_triton (line 339) | def _set_k_and_s_triton(
  function _set_k_and_s_triton_kernel (line 413) | def _set_k_and_s_triton_kernel(
  function _get_k_triton (line 455) | def _get_k_triton(
  function _get_k_triton_kernel (line 494) | def _get_k_triton_kernel(
  function _get_s_triton (line 533) | def _get_s_triton(
  function _get_s_triton_kernel (line 572) | def _get_s_triton_kernel(
  function _get_k_and_s_triton (line 610) | def _get_k_and_s_triton(
  function _get_k_and_s_triton_kernel (line 677) | def _get_k_and_s_triton_kernel(

FILE: python/sglang/srt/layers/attention/nsa/nsa_backend_mtp_precompute.py
  class PrecomputedMetadata (line 22) | class PrecomputedMetadata:
  function compute_cu_seqlens (line 53) | def compute_cu_seqlens(seqlens: torch.Tensor) -> torch.Tensor:
  class NativeSparseAttnBackendMTPPrecomputeMixin (line 61) | class NativeSparseAttnBackendMTPPrecomputeMixin:
    method _precompute_replay_metadata (line 68) | def _precompute_replay_metadata(
    method _precompute_decode_mode (line 115) | def _precompute_decode_mode(
    method _precompute_target_verify_mode (line 170) | def _precompute_target_verify_mode(
    method _precompute_draft_extend_mode (line 249) | def _precompute_draft_extend_mode(

FILE: python/sglang/srt/layers/attention/nsa/nsa_indexer.py
  class BaseIndexerMetadata (line 63) | class BaseIndexerMetadata(ABC):
    method get_seqlens_int32 (line 65) | def get_seqlens_int32(self) -> torch.Tensor:
    method get_page_table_64 (line 71) | def get_page_table_64(self) -> torch.Tensor:
    method get_page_table_1 (line 78) | def get_page_table_1(self) -> torch.Tensor:
    method get_seqlens_expanded (line 85) | def get_seqlens_expanded(self) -> torch.Tensor:
    method get_indexer_kvcache_range (line 90) | def get_indexer_kvcache_range(self) -> Tuple[torch.Tensor, torch.Tensor]:
    method get_indexer_seq_len_cpu (line 95) | def get_indexer_seq_len_cpu(self) -> torch.Tensor:
    method get_indexer_seq_len (line 100) | def get_indexer_seq_len(self) -> torch.Tensor:
    method get_nsa_extend_len_cpu (line 105) | def get_nsa_extend_len_cpu(self) -> List[int]:
    method get_token_to_batch_idx (line 110) | def get_token_to_batch_idx(self) -> torch.Tensor:
    method topk_transform (line 116) | def topk_transform(
  function rotate_activation (line 134) | def rotate_activation(x: torch.Tensor) -> torch.Tensor:
  class Indexer (line 149) | class Indexer(MultiPlatformOp):
    method __init__ (line 150) | def __init__(
    method _with_real_sm_count (line 230) | def _with_real_sm_count(self):
    method _weights_proj_bf16_in_fp32_out (line 244) | def _weights_proj_bf16_in_fp32_out(self, x: torch.Tensor) -> torch.Ten...
    method _project_and_scale_head_gates (line 261) | def _project_and_scale_head_gates(self, x: torch.Tensor):
    method _get_logits_head_gate (line 267) | def _get_logits_head_gate(self, x: torch.Tensor, q_scale: torch.Tensor):
    method _get_q_k_bf16 (line 273) | def _get_q_k_bf16(
    method _get_k_bf16 (line 346) | def _get_k_bf16(
    method _get_topk_paged (line 365) | def _get_topk_paged(
    method _should_chunk_mqa_logits (line 478) | def _should_chunk_mqa_logits(
    method _get_topk_ragged (line 497) | def _get_topk_ragged(
    method _forward_cuda_k_only (line 672) | def _forward_cuda_k_only(
    method _get_topk_ragged_with_cp (line 714) | def _get_topk_ragged_with_cp(
    method forward_indexer (line 863) | def forward_indexer(
    method _store_index_k_cache (line 946) | def _store_index_k_cache(
    method forward_cuda (line 998) | def forward_cuda(
    method forward_npu (line 1215) | def forward_npu(
    method do_npu_cp_balance_indexer (line 1427) | def do_npu_cp_balance_indexer(
  function scattered_to_tp_attn_full (line 1483) | def scattered_to_tp_attn_full(

FILE: python/sglang/srt/layers/attention/nsa/nsa_mtp_verification.py
  function verify_single_backend_fused_metadata_copy (line 11) | def verify_single_backend_fused_metadata_copy(
  function verify_multi_backend_fused_metadata_copy (line 207) | def verify_multi_backend_fused_metadata_copy(

FILE: python/sglang/srt/layers/attention/nsa/quant_k_cache.py
  function quantize_k_cache (line 6) | def quantize_k_cache(cache_k):
  function quantize_k_cache_separate (line 10) | def quantize_k_cache_separate(
  function _quantize_k_cache_ref (line 59) | def _quantize_k_cache_ref(
  function _quantize_k_cache_fast_wrapped (line 112) | def _quantize_k_cache_fast_wrapped(
  function _quantize_k_cache_fast (line 133) | def _quantize_k_cache_fast(k_nope, k_rope, group_size: int = 128):
  function _quantize_k_cache_fast_separate (line 190) | def _quantize_k_cache_fast_separate(k_nope, k_rope, group_size: int = 128):
  function _quantize_k_cache_fast_kernel (line 268) | def _quantize_k_cache_fast_kernel(
  function run_ans (line 445) | def run_ans():

FILE: python/sglang/srt/layers/attention/nsa/tilelang_kernel.py
  function fast_log2_ceil (line 31) | def fast_log2_ceil(x):
  function fast_pow2 (line 38) | def fast_pow2(x):
  function fast_round_scale (line 43) | def fast_round_scale(amax, fp8_max_inv):
  function act_quant_kernel (line 48) | def act_quant_kernel(
  function act_quant (line 98) | def act_quant(
  function fp8_index_kernel (line 129) | def fp8_index_kernel(h: int, d: int, clear_accum=True):
  function fp8_index (line 185) | def fp8_index(
  function sparse_attention_fwd_kernel_v1 (line 218) | def sparse_attention_fwd_kernel_v1(
  function sparse_attention_fwd_kernel_v2 (line 395) | def sparse_attention_fwd_kernel_v2(
  function sparse_mla_fwd_decode_partial (line 783) | def sparse_mla_fwd_decode_partial(
  function sparse_mla_fwd_decode_combine (line 933) | def sparse_mla_fwd_decode_combine(
  function tilelang_sparse_fwd (line 1012) | def tilelang_sparse_fwd(

FILE: python/sglang/srt/layers/attention/nsa/transform_index.py
  function transform_index_page_table_prefill (line 8) | def transform_index_page_table_prefill(**kwargs):
  function transform_index_page_table_decode (line 12) | def transform_index_page_table_decode(**kwargs):
  function transform_index_page_table_decode_kernel (line 17) | def transform_index_page_table_decode_kernel(
  function transform_index_page_table_decode_fast (line 38) | def transform_index_page_table_decode_fast(
  function transform_index_page_table_prefill_fast (line 72) | def transform_index_page_table_prefill_fast(
  function transform_index_page_table_decode_ref (line 94) | def transform_index_page_table_decode_ref(
  function transform_index_page_table_prefill_ref (line 115) | def transform_index_page_table_prefill_ref(

FILE: python/sglang/srt/layers/attention/nsa/triton_kernel.py
  function _act_quant_kernel (line 10) | def _act_quant_kernel(
  function act_quant (line 86) | def act_quant(
  function _get_valid_kv_indices_kernel (line 140) | def _get_valid_kv_indices_kernel(
  function get_valid_kv_indices (line 173) | def get_valid_kv_indices(

FILE: python/sglang/srt/layers/attention/nsa/utils.py
  function compute_nsa_seqlens (line 30) | def compute_nsa_seqlens(original_seq_lens, nsa_index_topk: int):
  function is_nsa_enable_prefill_cp (line 34) | def is_nsa_enable_prefill_cp():
  function is_nsa_prefill_cp_in_seq_split (line 38) | def is_nsa_prefill_cp_in_seq_split():
  function is_nsa_prefill_cp_round_robin_split (line 45) | def is_nsa_prefill_cp_round_robin_split():
  function can_nsa_prefill_cp_round_robin_split (line 52) | def can_nsa_prefill_cp_round_robin_split(forward_batch: "ForwardBatch"):
  function nsa_cp_round_robin_split_data (line 65) | def nsa_cp_round_robin_split_data(input_: Union[torch.Tensor, List]):
  function cal_padded_tokens (line 96) | def cal_padded_tokens(forward_batch: "ForwardBatch"):
  function pad_nsa_cache_seqlens (line 118) | def pad_nsa_cache_seqlens(forward_batch: "ForwardBatch", nsa_cache_seqle...
  class NSAContextParallelMetadata (line 139) | class NSAContextParallelMetadata:
  function can_cp_split (line 158) | def can_cp_split(seq_len: int, cp_size: int, use_nsa: bool, forward_batch):
  function cp_split_and_rebuild_data (line 182) | def cp_split_and_rebuild_data(forward_batch, input_: torch.Tensor):
  function cp_split_and_rebuild_position (line 199) | def cp_split_and_rebuild_position(forward_batch, positions: torch.Tensor):
  function nsa_cp_round_robin_split_q_seqs_kernel (line 219) | def nsa_cp_round_robin_split_q_seqs_kernel(
  function nsa_cp_round_robin_split_q_seqs_cpu (line 240) | def nsa_cp_round_robin_split_q_seqs_cpu(extend_seqs):
  function nsa_cp_round_robin_split_q_seqs (line 255) | def nsa_cp_round_robin_split_q_seqs(
  function nsa_use_prefill_cp (line 284) | def nsa_use_prefill_cp(forward_batch, nsa_enable_prefill_cp=None):
  function cp_attn_tp_all_gather_reorganazied_into_tensor (line 297) | def cp_attn_tp_all_gather_reorganazied_into_tensor(
  function cp_all_gather_rerange_output (line 341) | def cp_all_gather_rerange_output(input_tensor, cp_size, forward_batch, s...
  function calculate_cp_seq_idx (line 407) | def calculate_cp_seq_idx(cp_chunks_len, seqs_len):
  function prepare_input_dp_with_cp_dsa (line 455) | def prepare_input_dp_with_cp_dsa(

FILE: python/sglang/srt/layers/attention/nsa_backend.py
  class NSAFlashMLAMetadata (line 76) | class NSAFlashMLAMetadata:
    method slice (line 82) | def slice(self, sli):
    method copy_ (line 88) | def copy_(self, other: "NSAFlashMLAMetadata"):
  class NSAMetadata (line 94) | class NSAMetadata:
  class TopkTransformMethod (line 147) | class TopkTransformMethod(IntEnum):
  function _compiled_cat (line 155) | def _compiled_cat(tensors: list[torch.Tensor], dim: int = -1) -> torch.T...
  function _cat (line 159) | def _cat(tensors: list[torch.Tensor], dim: int = -1) -> torch.Tensor:
  class NSAIndexerMetadata (line 176) | class NSAIndexerMetadata(BaseIndexerMetadata):
    method get_seqlens_int32 (line 181) | def get_seqlens_int32(self) -> torch.Tensor:
    method get_page_table_64 (line 184) | def get_page_table_64(self) -> torch.Tensor:
    method get_page_table_1 (line 187) | def get_page_table_1(self) -> torch.Tensor:
    method get_seqlens_expanded (line 190) | def get_seqlens_expanded(self) -> torch.Tensor:
    method get_cu_seqlens_k (line 193) | def get_cu_seqlens_k(self) -> torch.Tensor:
    method get_indexer_kvcache_range (line 196) | def get_indexer_kvcache_range(self) -> Tuple[torch.Tensor, torch.Tensor]:
    method get_indexer_seq_len (line 199) | def get_indexer_seq_len(self) -> torch.Tensor:
    method get_indexer_seq_len_cpu (line 202) | def get_indexer_seq_len_cpu(self) -> torch.Tensor:
    method get_nsa_extend_len_cpu (line 205) | def get_nsa_extend_len_cpu(self) -> List[int]:
    method get_token_to_batch_idx (line 208) | def get_token_to_batch_idx(self) -> torch.Tensor:
    method topk_transform (line 211) | def topk_transform(
  class NativeSparseAttnBackend (line 278) | class NativeSparseAttnBackend(
    method __init__ (line 281) | def __init__(
    method get_device_int32_arange (line 362) | def get_device_int32_arange(self, l: int) -> torch.Tensor:
    method _transform_table_1_to_real (line 370) | def _transform_table_1_to_real(self, page_table: torch.Tensor) -> torc...
    method init_forward_metadata (line 380) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method _cal_indexer_k_start_end (line 656) | def _cal_indexer_k_start_end(
    method init_cuda_graph_state (line 732) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
    method init_forward_metadata_capture_cuda_graph (line 772) | def init_forward_metadata_capture_cuda_graph(
    method init_forward_metadata_replay_cuda_graph (line 927) | def init_forward_metadata_replay_cuda_graph(
    method init_forward_metadata_replay_cuda_graph_from_precomputed (line 1093) | def init_forward_metadata_replay_cuda_graph_from_precomputed(
    method forward_extend (line 1247) | def forward_extend(
    method forward_decode (line 1446) | def forward_decode(
    method _forward_fa3 (line 1587) | def _forward_fa3(
    method _forward_flashmla_sparse (line 1625) | def _forward_flashmla_sparse(
    method _forward_flashmla_kv (line 1674) | def _forward_flashmla_kv(
    method _forward_standard_mha (line 1719) | def _forward_standard_mha(
    method _forward_tilelang (line 1783) | def _forward_tilelang(
    method _forward_aiter (line 1801) | def _forward_aiter(
    method _forward_aiter_extend (line 1841) | def _forward_aiter_extend(
    method _forward_trtllm (line 1890) | def _forward_trtllm(
    method _pad_topk_indices (line 2007) | def _pad_topk_indices(
    method get_cuda_graph_seq_len_fill_value (line 2028) | def get_cuda_graph_seq_len_fill_value(self):
    method set_nsa_prefill_impl (line 2032) | def set_nsa_prefill_impl(self, forward_batch: Optional[ForwardBatch] =...
    method get_topk_transform_method (line 2081) | def get_topk_transform_method(self) -> TopkTransformMethod:
    method get_indexer_metadata (line 2096) | def get_indexer_metadata(
    method _compute_flashmla_metadata (line 2105) | def _compute_flashmla_metadata(self, cache_seqlens: torch.Tensor, seq_...
  class NativeSparseAttnMultiStepBackend (line 2125) | class NativeSparseAttnMultiStepBackend:
    method __init__ (line 2127) | def __init__(
    method init_forward_metadata (line 2144) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method init_cuda_graph_state (line 2148) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
    method init_forward_metadata_capture_cuda_graph (line 2152) | def init_forward_metadata_capture_cuda_graph(self, forward_batch: Forw...
    method init_forward_metadata_replay_cuda_graph (line 2164) | def init_forward_metadata_replay_cuda_graph(

FILE: python/sglang/srt/layers/attention/tbo_backend.py
  class TboAttnBackend (line 13) | class TboAttnBackend(AttentionBackend):
    method __init__ (line 14) | def __init__(self, primary: AttentionBackend, children: List[Attention...
    method init_new (line 20) | def init_new(cls, creator: Callable[[], AttentionBackend]):
    method init_forward_metadata (line 26) | def init_forward_metadata(self, forward_batch: "ForwardBatch"):
    method init_cuda_graph_state (line 35) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
    method init_forward_metadata_capture_cuda_graph (line 41) | def init_forward_metadata_capture_cuda_graph(
    method init_forward_metadata_replay_cuda_graph (line 72) | def init_forward_metadata_replay_cuda_graph(
    method _init_forward_metadata_cuda_graph_children (line 106) | def _init_forward_metadata_cuda_graph_children(
    method get_cuda_graph_seq_len_fill_value (line 176) | def get_cuda_graph_seq_len_fill_value(self):
    method forward_extend (line 182) | def forward_extend(self, *args, **kwargs):
    method forward_decode (line 185) | def forward_decode(self, *args, **kwargs):
    method get_indexer_metadata (line 188) | def get_indexer_metadata(self, layer_id: int, forward_batch: "ForwardB...
  function _init_forward_metadata_cuda_graph_split (line 192) | def _init_forward_metadata_cuda_graph_split(

FILE: python/sglang/srt/layers/attention/torch_flex_backend.py
  class TorchFlexAttnBackend (line 17) | class TorchFlexAttnBackend(AttentionBackend):
    method __init__ (line 18) | def __init__(self, model_runner: ModelRunner):
    method init_forward_metadata (line 26) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method _causal_mask (line 69) | def _causal_mask(self, b, h, q_idx, kv_idx):
    method _decode_mask (line 72) | def _decode_mask(self, b, h, q_idx, kv_idx):
    method _run_flex_forward_extend (line 75) | def _run_flex_forward_extend(
    method _run_flex_forward_decode (line 165) | def _run_flex_forward_decode(
    method forward_extend (line 236) | def forward_extend(
    method forward_decode (line 282) | def forward_decode(
    method support_triton (line 324) | def support_triton(self):

FILE: python/sglang/srt/layers/attention/torch_native_backend.py
  class TorchNativeAttnBackend (line 17) | class TorchNativeAttnBackend(AttentionBackend):
    method __init__ (line 18) | def __init__(self, model_runner: ModelRunner):
    method init_forward_metadata (line 23) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method _run_sdpa_forward_extend (line 27) | def _run_sdpa_forward_extend(
    method _run_sdpa_forward_decode (line 117) | def _run_sdpa_forward_decode(
    method forward_extend (line 192) | def forward_extend(
    method forward_decode (line 239) | def forward_decode(
    method support_triton (line 285) | def support_triton(self):

FILE: python/sglang/srt/layers/attention/triton_backend.py
  function logit_capping_mod (line 30) | def logit_capping_mod(logit_capping_method, logit_cap):
  class ForwardMetadata (line 39) | class ForwardMetadata:
  class TritonAttnBackend (line 56) | class TritonAttnBackend(AttentionBackend):
    method __init__ (line 57) | def __init__(
    method get_num_kv_splits (line 184) | def get_num_kv_splits(
    method init_forward_metadata (line 236) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method init_cuda_graph_state (line 441) | def init_cuda_graph_state(
    method init_forward_metadata_capture_cuda_graph (line 508) | def init_forward_metadata_capture_cuda_graph(
    method init_forward_metadata_replay_cuda_graph (line 664) | def init_forward_metadata_replay_cuda_graph(
    method get_cuda_graph_seq_len_fill_value (line 790) | def get_cuda_graph_seq_len_fill_value(self):
    method get_verify_buffers_to_fill_after_draft (line 793) | def get_verify_buffers_to_fill_after_draft(self):
    method update_verify_buffers_to_fill_after_draft (line 801) | def update_verify_buffers_to_fill_after_draft(
    method forward_extend (line 806) | def forward_extend(
    method _forward_extend_unified (line 908) | def _forward_extend_unified(
    method forward_decode (line 1038) | def forward_decode(
  class TritonMultiStepDraftBackend (line 1113) | class TritonMultiStepDraftBackend:
    method __init__ (line 1119) | def __init__(
    method common_template (line 1154) | def common_template(
    method init_forward_metadata (line 1195) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method init_cuda_graph_state (line 1216) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
    method init_forward_metadata_capture_cuda_graph (line 1237) | def init_forward_metadata_capture_cuda_graph(self, forward_batch: Forw...
    method init_forward_metadata_replay_cuda_graph (line 1251) | def init_forward_metadata_replay_cuda_graph(
  function get_num_kv_splits_triton (line 1270) | def get_num_kv_splits_triton(
  function update_sliding_window_buffer (line 1321) | def update_sliding_window_buffer(
  function update_sliding_window_buffer_cuda_graph (line 1361) | def update_sliding_window_buffer_cuda_graph(

FILE: python/sglang/srt/layers/attention/triton_ops/decode_attention.py
  function tanh (line 39) | def tanh(x):
  function _fwd_kernel_stage1 (line 45) | def _fwd_kernel_stage1(
  function _decode_att_m_fwd (line 182) | def _decode_att_m_fwd(
  function _fwd_grouped_kernel_stage1 (line 253) | def _fwd_grouped_kernel_stage1(
  function _decode_grouped_att_m_fwd (line 426) | def _decode_grouped_att_m_fwd(
  function _fwd_kernel_stage2 (line 516) | def _fwd_kernel_stage2(
  function _decode_softmax_reducev_fwd (line 586) | def _decode_softmax_reducev_fwd(
  function decode_attention_fwd_normal (line 636) | def decode_attention_fwd_normal(
  function decode_attention_fwd_grouped (line 681) | def decode_attention_fwd_grouped(
  function decode_attention_fwd (line 726) | def decode_attention_fwd(

FILE: python/sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py
  function tanh (line 23) | def tanh(x):
  function _fwd_kernel_flash_decode_stage1 (line 29) | def _fwd_kernel_flash_decode_stage1(
  function _fwd_kernel_flash_decode_stage2 (line 140) | def _fwd_kernel_flash_decode_stage2(
  function flash_decode_stage1 (line 192) | def flash_decode_stage1(
  function flash_decode_stage2 (line 255) | def flash_decode_stage2(mid_out, mid_out_logexpsum, B_Seqlen, O, block_s...
  function flash_decode_attention_fwd (line 284) | def flash_decode_attention_fwd(
  function _sparse_fwd_kernel_flash_decode_stage1 (line 329) | def _sparse_fwd_kernel_flash_decode_stage1(  # Double Sparsity's approxi...
  function _sparse_fwd_kernel_flash_decode_stage2 (line 401) | def _sparse_fwd_kernel_flash_decode_stage2(
  function _sparse_fwd_kernel_flash_decode_stage3 (line 517) | def _sparse_fwd_kernel_flash_decode_stage3(
  function sparse_flash_decode_stage1 (line 561) | def sparse_flash_decode_stage1(
  function sparse_flash_decode_stage2 (line 613) | def sparse_flash_decode_stage2(
  function sparse_flash_decode_stage3 (line 674) | def sparse_flash_decode_stage3(Seqlen, mid_out, mid_out_logexpsum, O, bl...
  function flash_decode_sparse_attention_fwd (line 700) | def flash_decode_sparse_attention_fwd(
  function _fwd_kernel (line 782) | def _fwd_kernel(
  function extend_attention_fwd (line 994) | def extend_attention_fwd(

FILE: python/sglang/srt/layers/attention/triton_ops/extend_attention.py
  function _get_block_sizes_for_extend_attention (line 35) | def _get_block_sizes_for_extend_attention(Lq: int, Lv: int):
  function tanh (line 108) | def tanh(x):
  function _copy_unified_indices_kernel (line 114) | def _copy_unified_indices_kernel(
  function build_unified_kv_indices (line 171) | def build_unified_kv_indices(
  function _fwd_kernel (line 220) | def _fwd_kernel(
  function extend_attention_fwd (line 552) | def extend_attention_fwd(
  function redundant_attention (line 660) | def redundant_attention(
  function _fwd_kernel_unified (line 698) | def _fwd_kernel_unified(
  function extend_attention_fwd_unified (line 950) | def extend_attention_fwd_unified(

FILE: python/sglang/srt/layers/attention/triton_ops/merge_state.py
  function merge_state_kernel (line 9) | def merge_state_kernel(
  function merge_state_triton (line 66) | def merge_state_triton(

FILE: python/sglang/srt/layers/attention/triton_ops/prefill_attention.py
  function _fwd_kernel (line 35) | def _fwd_kernel(
  function context_attention_fwd (line 170) | def context_attention_fwd(

FILE: python/sglang/srt/layers/attention/triton_ops/rocm_mla_decode_rope.py
  function is_hip (line 31) | def is_hip():
  function tanh (line 39) | def tanh(x):
  function _fwd_grouped_kernel_stage1_rope (line 45) | def _fwd_grouped_kernel_stage1_rope(
  function _decode_grouped_att_m_fwd_rope (line 310) | def _decode_grouped_att_m_fwd_rope(
  function decode_attention_fwd_grouped_rope (line 402) | def decode_attention_fwd_grouped_rope(

FILE: python/sglang/srt/layers/attention/triton_ops/trtllm_fp8_kv_kernel.py
  function _process_kv_tensor (line 26) | def _process_kv_tensor(
  function _fused_fp8_set_kv_buffer_kernel (line 88) | def _fused_fp8_set_kv_buffer_kernel(
  function fused_fp8_set_kv_buffer (line 204) | def fused_fp8_set_kv_buffer(
  function _naive_fp8_set_kv_buffer (line 420) | def _naive_fp8_set_kv_buffer(

FILE: python/sglang/srt/layers/attention/trtllm_mha_backend.py
  class TRTLLMMHAMetadata (line 48) | class TRTLLMMHAMetadata:
  class TRTLLMHAAttnBackend (line 65) | class TRTLLMHAAttnBackend(FlashInferAttnBackend):
    method __init__ (line 68) | def __init__(
    method _maybe_translate_swa (line 152) | def _maybe_translate_swa(
    method _alloc_swa_page_table (line 163) | def _alloc_swa_page_table(
    method _copy_swa_page_table (line 171) | def _copy_swa_page_table(
    method _get_layer_cache_loc (line 183) | def _get_layer_cache_loc(
    method _bind_swa_page_table (line 195) | def _bind_swa_page_table(
    method _get_layer_page_table (line 203) | def _get_layer_page_table(
    method init_cuda_graph_state (line 214) | def init_cuda_graph_state(
    method init_forward_metadata_capture_cuda_graph (line 306) | def init_forward_metadata_capture_cuda_graph(
    method init_forward_metadata_replay_cuda_graph (line 443) | def init_forward_metadata_replay_cuda_graph(
    method get_cuda_graph_seq_len_fill_value (line 548) | def get_cuda_graph_seq_len_fill_value(self) -> int:
    method _should_use_fused_fp8_path (line 552) | def _should_use_fused_fp8_path(self, save_kv_cache: bool, k: torch.Ten...
    method _fused_fp8_set_kv_buffer (line 556) | def _fused_fp8_set_kv_buffer(
    method init_forward_metadata (line 582) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method forward_decode (line 695) | def forward_decode(
    method forward_extend (line 781) | def forward_extend(
  class TRTLLMHAAttnMultiStepDraftBackend (line 883) | class TRTLLMHAAttnMultiStepDraftBackend(FlashInferMultiStepDraftBackend):
    method __init__ (line 886) | def __init__(
    method init_forward_metadata (line 899) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method init_cuda_graph_state (line 903) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int):
    method init_forward_metadata_capture_cuda_graph (line 907) | def init_forward_metadata_capture_cuda_graph(
    method init_forward_metadata_replay_cuda_graph (line 925) | def init_forward_metadata_replay_cuda_graph(

FILE: python/sglang/srt/layers/attention/trtllm_mla_backend.py
  function pad_draft_extend_query_kernel (line 56) | def pad_draft_extend_query_kernel(
  function unpad_draft_extend_output_kernel (line 129) | def unpad_draft_extend_output_kernel(
  function _quantize_fp8_qkv (line 199) | def _quantize_fp8_qkv(q, k, v, layer):
  class TRTLLMMLAPrefillMetadata (line 233) | class TRTLLMMLAPrefillMetadata:
  class TRTLLMMLADecodeMetadata (line 243) | class TRTLLMMLADecodeMetadata:
  class TRTLLMMLABackend (line 255) | class TRTLLMMLABackend(FlashInferMLAAttnBackend):
    method __init__ (line 258) | def __init__(
    method _calc_padded_blocks (line 318) | def _calc_padded_blocks(self, max_seq_len: int) -> int:
    method _create_block_kv_indices (line 341) | def _create_block_kv_indices(
    method init_cuda_graph_state (line 379) | def init_cuda_graph_state(
    method init_forward_metadata_capture_cuda_graph (line 426) | def init_forward_metadata_capture_cuda_graph(
    method init_forward_metadata_replay_cuda_graph (line 509) | def init_forward_metadata_replay_cuda_graph(
    method get_cuda_graph_seq_len_fill_value (line 574) | def get_cuda_graph_seq_len_fill_value(self) -> int:
    method init_forward_metadata (line 578) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method init_mha_chunk_metadata (line 673) | def init_mha_chunk_metadata(self, forward_batch: ForwardBatch):
    method pad_draft_extend_query (line 676) | def pad_draft_extend_query(
    method unpad_draft_extend_output (line 708) | def unpad_draft_extend_output(
    method forward_decode (line 756) | def forward_decode(
    method forward_extend (line 884) | def forward_extend(
  class TRTLLMMLAMultiStepDraftBackend (line 1150) | class TRTLLMMLAMultiStepDraftBackend(FlashInferMLAMultiStepDraftBackend):
    method __init__ (line 1153) | def __init__(

FILE: python/sglang/srt/layers/attention/utils.py
  function create_flashinfer_kv_indices_triton (line 17) | def create_flashinfer_kv_indices_triton(
  function get_num_page_per_block_flashmla (line 55) | def get_num_page_per_block_flashmla(page_size: int = 64) -> int:
  function create_flashmla_kv_indices_triton (line 61) | def create_flashmla_kv_indices_triton(
  function concat_and_cast_mha_k_kernel (line 115) | def concat_and_cast_mha_k_kernel(
  function concat_and_cast_mha_k_triton (line 153) | def concat_and_cast_mha_k_triton(
  function pad_sequence_with_mask_kernel (line 192) | def pad_sequence_with_mask_kernel(
  function pad_sequence_with_mask (line 245) | def pad_sequence_with_mask(
  function seqlens_expand_kernel (line 290) | def seqlens_expand_kernel(
  function seqlens_expand_triton (line 316) | def seqlens_expand_triton(
  function canonicalize_stride (line 357) | def canonicalize_stride(tensor: torch.Tensor) -> torch.Tensor:
  function mla_quantize_and_rope_for_fp8 (line 385) | def mla_quantize_and_rope_for_fp8(
  function concat_mla_absorb_q_general (line 470) | def concat_mla_absorb_q_general(q_nope, q_rope):
  function reshape_and_cache_flash (line 478) | def reshape_and_cache_flash(
  function launch_reshape_and_cache_flash (line 601) | def launch_reshape_and_cache_flash(

FILE: python/sglang/srt/layers/attention/vision.py
  function flash_attn_func (line 48) | def flash_attn_func(*args, ver: int = 3, **kwargs):
  class SingletonCache (line 105) | class SingletonCache:
    method set_data (line 108) | def set_data(self, value: Any) -> None:
    method get_data (line 111) | def get_data(self) -> Optional[Any]:
    method empty (line 114) | def empty(self) -> bool:
  function _get_cu_seqlens_for_shape (line 120) | def _get_cu_seqlens_for_shape(batch_size: int, seqlen: int, device) -> t...
  function resolve_seqlens (line 135) | def resolve_seqlens(
  class VisionSdpaAttention (line 156) | class VisionSdpaAttention(nn.Module):
    method __init__ (line 162) | def __init__(
    method _generate_mask_cache (line 183) | def _generate_mask_cache(
    method generate_patch_attention_mask (line 215) | def generate_patch_attention_mask(
    method forward (line 237) | def forward(
  class VisionTritonAttention (line 309) | class VisionTritonAttention(nn.Module):
    method __init__ (line 314) | def __init__(
    method forward (line 324) | def forward(
  class VisionFlash3Attention (line 380) | class VisionFlash3Attention(nn.Module):
    method __init__ (line 381) | def __init__(
    method forward (line 393) | def forward(
  class VisionFlash4Attention (line 439) | class VisionFlash4Attention(nn.Module):
    method __init__ (line 440) | def __init__(
    method forward (line 448) | def forward(
  class VisionFlashInferAttention (line 491) | class VisionFlashInferAttention(nn.Module):
    method __init__ (line 492) | def __init__(
    method forward (line 503) | def forward(
  class VisionAiterAttention (line 613) | class VisionAiterAttention(nn.Module):
    method __init__ (line 614) | def __init__(
    method forward (line 630) | def forward(
  class VisionAscendAttention (line 657) | class VisionAscendAttention(nn.Module):
    method __init__ (line 659) | def __init__(
    method forward (line 667) | def forward(
  class VisionAttention (line 724) | class VisionAttention(nn.Module):
    method __init__ (line 737) | def __init__(
    method _init_qk_norm (line 860) | def _init_qk_norm(
    method _determine_attention_backend (line 885) | def _determine_attention_backend(self, passed_backend: Optional[str]) ...
    method _apply_qk_norm_head_size (line 919) | def _apply_qk_norm_head_size(self, q: torch.Tensor, k: torch.Tensor):
    method _apply_qk_norm (line 929) | def _apply_qk_norm(self, q: torch.Tensor, k: torch.Tensor):
    method forward (line 967) | def forward(

FILE: python/sglang/srt/layers/attention/vision_utils.py
  function update_vit_attn_dummy_heads_config (line 8) | def update_vit_attn_dummy_heads_config(config):
  function pad_vit_attn_dummy_heads (line 26) | def pad_vit_attn_dummy_heads(config, name: str, loaded_weight: torch.Ten...

FILE: python/sglang/srt/layers/attention/wave_backend.py
  function get_num_kv_splits_triton (line 26) | def get_num_kv_splits_triton(
  class ForwardMetadata (line 78) | class ForwardMetadata:
  class WaveAttnBackend (line 90) | class WaveAttnBackend(AttentionBackend):
    method __init__ (line 91) | def __init__(
    method get_num_kv_splits (line 162) | def get_num_kv_splits(
    method init_forward_metadata (line 195) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method init_cuda_graph_state (line 344) | def init_cuda_graph_state(
    method init_forward_metadata_capture_cuda_graph (line 388) | def init_forward_metadata_capture_cuda_graph(
    method init_forward_metadata_replay_cuda_graph (line 472) | def init_forward_metadata_replay_cuda_graph(
    method get_cuda_graph_seq_len_fill_value (line 540) | def get_cuda_graph_seq_len_fill_value(self):
    method forward_extend (line 543) | def forward_extend(
    method forward_decode (line 589) | def forward_decode(

FILE: python/sglang/srt/layers/attention/wave_ops/decode_attention.py
  function get_wave_kernel (line 27) | def get_wave_kernel(
  function decode_attention_intermediate_arrays_shapes (line 90) | def decode_attention_intermediate_arrays_shapes(
  function decode_attention_wave (line 105) | def decode_attention_wave(
  function decode_attention_fwd (line 157) | def decode_attention_fwd(

FILE: python/sglang/srt/layers/attention/wave_ops/extend_attention.py
  function get_wave_kernel (line 23) | def get_wave_kernel(
  function extend_attention_wave (line 81) | def extend_attention_wave(

FILE: python/sglang/srt/layers/attention/wave_ops/prefill_attention.py
  function prefill_attention_wave (line 22) | def prefill_attention_wave(

FILE: python/sglang/srt/layers/attention/xpu_backend.py
  class XPUAttentionBackend (line 26) | class XPUAttentionBackend(AttentionBackend):
    method __init__ (line 36) | def __init__(
    method init_forward_metadata (line 93) | def init_forward_metadata(self, forward_batch: ForwardBatch):
    method forward_extend (line 380) | def forward_extend(
    method forward_decode (line 672) | def forward_decode(
    method get_cuda_graph_seq_len_fill_value (line 931) | def get_cuda_graph_seq_len_fill_value(self):
    method _init_local_attn_metadata (line 935) | def _init_local_attn_metadata(
    method _init_sliding_window_attn_spec_metadata (line 979) | def _init_sliding_window_attn_spec_metadata(

FILE: python/sglang/srt/layers/communicator.py
  function apply_flashinfer_allreduce_fusion (line 94) | def apply_flashinfer_allreduce_fusion(batch_size: int):
  function apply_aiter_all_reduce_fusion (line 108) | def apply_aiter_all_reduce_fusion(input_tensor: torch.Tensor):
  class ScatterMode (line 122) | class ScatterMode(Enum):
    method model_input_output (line 136) | def model_input_output():
  class AttentionInputs (line 143) | class AttentionInputs:
    method __init__ (line 145) | def __init__(
    method tp_all_gather_hidden_states (line 157) | def tp_all_gather_hidden_states(self, hidden_states, forward_batch):
    method fetch_qkv_latent (line 163) | def fetch_qkv_latent(self):
    method fetch_hidden_states (line 176) | def fetch_hidden_states(self):
  class AttnTpContext (line 187) | class AttnTpContext:
    method __init__ (line 188) | def __init__(self):
    method init_context (line 193) | def init_context(self, q_lora_rank, is_nsa):
    method use_input_scattered (line 214) | def use_input_scattered(self, forward_batch: ForwardBatch):
    method input_scattered (line 225) | def input_scattered(self):
    method set_attn_inputs (line 228) | def set_attn_inputs(self, attn_inputs: AttentionInputs):
    method fetch_qkv_latent (line 231) | def fetch_qkv_latent(self):
    method fetch_hidden_states (line 235) | def fetch_hidden_states(self):
    method maybe_input_scattered (line 240) | def maybe_input_scattered(self, forward_batch: ForwardBatch):
  function get_attn_tp_context (line 252) | def get_attn_tp_context():
  class _LayerModeComputationContext (line 257) | class _LayerModeComputationContext:
    method previous_layer (line 264) | def previous_layer(self):
  class LayerScatterModes (line 276) | class LayerScatterModes:
    method init_new (line 285) | def init_new(cls, **kwargs):
    method _compute_layer_input_mode (line 296) | def _compute_layer_input_mode(cls, context: _LayerModeComputationConte...
    method _compute_mlp_mode (line 302) | def _compute_mlp_mode(cls, context: _LayerModeComputationContext):
    method _should_gather_for_tbo (line 321) | def _should_gather_for_tbo(cls, context: _LayerModeComputationContext):
    method _compute_middle_residual_mode (line 330) | def _compute_middle_residual_mode(cls, context: _LayerModeComputationC...
    method _compute_layer_output_mode (line 339) | def _compute_layer_output_mode(cls, context: _LayerModeComputationCont...
  function enable_moe_dense_fully_dp (line 352) | def enable_moe_dense_fully_dp():
  class LayerCommunicator (line 356) | class LayerCommunicator:
    method __init__ (line 357) | def __init__(
    method _post_init_communicate (line 380) | def _post_init_communicate(self):
    method prepare_attn_and_capture_last_layer_outputs (line 404) | def prepare_attn_and_capture_last_layer_outputs(
    method prepare_attn (line 430) | def prepare_attn(
    method _tp_reduce_scatter (line 544) | def _tp_reduce_scatter(
    method prepare_mlp (line 563) | def prepare_mlp(
    method postprocess_layer (line 581) | def postprocess_layer(
    method should_use_reduce_scatter (line 595) | def should_use_reduce_scatter(self, forward_batch: ForwardBatch):
    method should_fuse_mlp_allreduce_with_next_layer (line 611) | def should_fuse_mlp_allreduce_with_next_layer(
  class CommunicateContext (line 646) | class CommunicateContext:
    method is_same_group_size (line 657) | def is_same_group_size(self, a: ScatterMode, b: ScatterMode):
    method init_new (line 661) | def init_new(cls):
  class CommunicateSimpleFn (line 687) | class CommunicateSimpleFn:
    method get_fn (line 689) | def get_fn(
    method _trivial (line 707) | def _trivial(
    method _scattered_to_tp_attn_full (line 715) | def _scattered_to_tp_attn_full(
  class CommunicateWithAllReduceAndLayerNormFn (line 753) | class CommunicateWithAllReduceAndLayerNormFn:
    method get_fn (line 760) | def get_fn(
    method _simple (line 808) | def _simple(
    method _gather_hidden_states_and_residual (line 821) | def _gather_hidden_states_and_residual(
    method _scatter_hidden_states_and_residual (line 885) | def _scatter_hidden_states_and_residual(
    method _tp_all_reduce_with_scattered_residual (line 906) | def _tp_all_reduce_with_scattered_residual(
  class CommunicateSummableTensorPairFn (line 922) | class CommunicateSummableTensorPairFn:
    method execute (line 926) | def execute(
    method get_fn (line 942) | def get_fn(
    method _trivial (line 979) | def _trivial(
    method _scatter_hidden_states (line 989) | def _scatter_hidden_states(
    method _gather (line 1008) | def _gather(
    method _scatter (line 1028) | def _scatter(

FILE: python/sglang/srt/layers/communicator_nsa_cp.py
  function nsa_enable_prefill_cp (line 42) | def nsa_enable_prefill_cp():
  class NSACPLayerCommunicator (line 49) | class NSACPLayerCommunicator(LayerCommunicator):
    method __init__ (line 50) | def __init__(
    method _post_init_communicate (line 69) | def _post_init_communicate(self):
  class NSACPCommunicateSimpleFn (line 95) | class NSACPCommunicateSimpleFn(CommunicateSimpleFn):
    method get_fn (line 97) | def get_fn(
  class NSACPCommunicateWithAllReduceAndLayerNormFn (line 108) | class NSACPCommunicateWithAllReduceAndLayerNormFn(
    method get_fn (line 117) | def get_fn(
    method _gather_hidden_states_and_residual (line 141) | def _gather_hidden_states_and_residual(
  class NSACPCommunicateSummableTensorPairFn (line 167) | class NSACPCommunicateSummableTensorPairFn(CommunicateSummableTensorPair...
    method get_fn (line 171) | def get_fn(
    method _scatter_hidden_states (line 194) | def _scatter_hidden_states(

FILE: python/sglang/srt/layers/conv.py
  function _tuplify (line 23) | def _tuplify(val, n: int) -> tuple:
  function _check_enable_linear (line 30) | def _check_enable_linear(
  function _reverse_repeat_tuple (line 46) | def _reverse_repeat_tuple(t: tuple) -> tuple:
  function _compute_same_padding_for_pad (line 51) | def _compute_same_padding_for_pad(kernel_size: tuple, dilation: tuple) -...
  function _validate_conv_args (line 65) | def _validate_conv_args(
  class Conv2dLayer (line 94) | class Conv2dLayer(MultiPlatformOp):
    method __init__ (line 97) | def __init__(
    method _reset_parameters (line 153) | def _reset_parameters(self):
    method _forward_mulmat (line 160) | def _forward_mulmat(self, x: torch.Tensor) -> torch.Tensor:
    method _forward_conv (line 168) | def _forward_conv(self, x: torch.Tensor) -> torch.Tensor:
    method forward_native (line 189) | def forward_native(self, x: torch.Tensor) -> torch.Tensor:
    method forward_cuda (line 194) | def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
  class Conv3dLayer (line 200) | class Conv3dLayer(MultiPlatformOp):
    method __init__ (line 203) | def __init__(
    method _reset_parameters (line 256) | def _reset_parameters(self):
    method _forward_mulmat (line 263) | def _forward_mulmat(self, x: torch.Tensor) -> torch.Tensor:
    method _forward_conv (line 271) | def _forward_conv(self, x: torch.Tensor) -> torch.Tensor:
    method forward_native (line 292) | def forward_native(self, x: torch.Tensor) -> torch.Tensor:
    method forward_cuda (line 297) | def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:

FILE: python/sglang/srt/layers/deep_gemm_wrapper/compile_utils.py
  function update_deep_gemm_config (line 43) | def update_deep_gemm_config(gpu_id: int, server_args: ServerArgs):
  class DeepGemmKernelType (line 95) | class DeepGemmKernelType(IntEnum):
  function _maybe_compile_deep_gemm_one_type_all (line 106) | def _maybe_compile_deep_gemm_one_type_all(
  function _compile_deep_gemm_one_type_all (line 151) | def _compile_deep_gemm_one_type_all(
  class _BaseWarmupExecutor (line 213) | class _BaseWarmupExecutor:
    method create (line 215) | def create(kernel_type: DeepGemmKernelType, **kwargs):
    method get_memory_requirement (line 224) | def get_memory_requirement(
    method execute (line 246) | def execute(self, m):
  function _empty_token_fp8 (line 250) | def _empty_token_fp8(size):
  function _empty_block_fp8 (line 260) | def _empty_block_fp8(size):
  class _NormalWarmupExecutor (line 275) | class _NormalWarmupExecutor(_BaseWarmupExecutor):
    method __init__ (line 276) | def __init__(self, max_m: int, n: int, k: int, num_groups: int):
    method execute (line 281) | def execute(self, m):
  class _GroupedContWarmupExecutor (line 289) | class _GroupedContWarmupExecutor(_BaseWarmupExecutor):
    method __init__ (line 290) | def __init__(self, max_m: int, n: int, k: int, num_groups: int):
    method execute (line 296) | def execute(self, m):
  class _GroupedMaskedWarmupExecutor (line 305) | class _GroupedMaskedWarmupExecutor(_BaseWarmupExecutor):
    method __init__ (line 306) | def __init__(self, max_m: int, n: int, k: int, num_groups: int):
    method execute (line 314) | def execute(self, m):
  class _BF16F32WarmupExecutor (line 325) | class _BF16F32WarmupExecutor(_BaseWarmupExecutor):
    method __init__ (line 326) | def __init__(self, max_m: int, n: int, k: int, num_groups: int):
    method execute (line 331) | def execute(self, m):
  function deep_gemm_execution_hook (line 336) | def deep_gemm_execution_hook(

FILE: python/sglang/srt/layers/deep_gemm_wrapper/configurer.py
  function _compute_enable_deep_gemm (line 9) | def _compute_enable_deep_gemm():

FILE: python/sglang/srt/layers/deep_gemm_wrapper/entrypoint.py
  function grouped_gemm_nt_f8f8bf16_masked (line 26) | def grouped_gemm_nt_f8f8bf16_masked(
  function grouped_gemm_nt_f8f8bf16_contig (line 67) | def grouped_gemm_nt_f8f8bf16_contig(
  function gemm_nt_f8f8bf16 (line 84) | def gemm_nt_f8f8bf16(
  function gemm_nt_bf16bf16f32 (line 105) | def gemm_nt_bf16bf16f32(
  function update_deep_gemm_config (line 119) | def update_deep_gemm_config(gpu_id: int, server_args: ServerArgs):
  function configure_deep_gemm_num_sms (line 124) | def configure_deep_gemm_num_sms(num_sms):
  function _sanity_check_input (line 136) | def _sanity_check_input(x_fp8: Tuple[torch.Tensor, torch.Tensor]):

FILE: python/sglang/srt/layers/dp_attention.py
  class DpPaddingMode (line 50) | class DpPaddingMode(IntEnum):
    method is_max_len (line 57) | def is_max_len(self):
    method is_sum_len (line 60) | def is_sum_len(self):
    method get_dp_padding_mode (line 64) | def get_dp_padding_mode(
    method get_default_mode_in_cuda_graph (line 86) | def get_default_mode_in_cuda_graph(cls) -> DpPaddingMode:
  class _DpGatheredBufferWrapper (line 95) | class _DpGatheredBufferWrapper:
    method set_metadata (line 107) | def set_metadata(cls, hidden_size: int, dtype: torch.dtype, device: to...
    method set_dp_buffer_len (line 113) | def set_dp_buffer_len(
    method get_global_dp_buffer (line 126) | def get_global_dp_buffer(cls) -> torch.Tensor:
    method get_local_dp_buffer (line 136) | def get_local_dp_buffer(cls) -> torch.Tensor:
    method get_global_dp_buffer_len (line 146) | def get_global_dp_buffer_len(cls) -> int:
    method get_local_dp_buffer_len (line 150) | def get_local_dp_buffer_len(cls) -> int:
    method get_dp_global_num_tokens (line 154) | def get_dp_global_num_tokens(cls) -> List[int]:
    method get_dp_hidden_size (line 158) | def get_dp_hidden_size(cls) -> int:
    method get_dp_dtype (line 162) | def get_dp_dtype(cls) -> torch.dtype:
    method get_dp_device (line 166) | def get_dp_device(cls) -> torch.device:
    method set_is_extend_in_batch (line 170) | def set_is_extend_in_batch(cls, is_extend_in_batch: bool):
    method get_is_extend_in_batch (line 174) | def get_is_extend_in_batch(cls) -> bool:
    method is_dp_max_padding (line 178) | def is_dp_max_padding(cls) -> bool:
  function set_dp_buffer_len (line 182) | def set_dp_buffer_len(
  function get_global_dp_buffer (line 193) | def get_global_dp_buffer() -> torch.Tensor:
  function get_local_dp_buffer (line 197) | def get_local_dp_buffer() -> torch.Tensor:
  function get_global_dp_buffer_len (line 201) | def get_global_dp_buffer_len() -> int:
  function get_local_dp_buffer_len (line 205) | def get_local_dp_buffer_len() -> int:
  function get_dp_global_num_tokens (line 209) | def get_dp_global_num_tokens() -> List[int]:
  function get_dp_hidden_size (line 213) | def get_dp_hidden_size() -> int:
  function get_dp_dtype (line 217) | def get_dp_dtype() -> torch.dtype:
  function get_dp_device (line 221) | def get_dp_device() -> torch.device:
  function set_is_extend_in_batch (line 225) | def set_is_extend_in_batch(is_extend_in_batch: bool):
  function get_is_extend_in_batch (line 229) | def get_is_extend_in_batch() -> bool:
  function is_dp_max_padding (line 233) | def is_dp_max_padding() -> bool:
  function compute_dp_attention_world_info (line 237) | def compute_dp_attention_world_info(
  function compute_dp_attention_local_info (line 254) | def compute_dp_attention_local_info(
  function initialize_dp_attention (line 271) | def initialize_dp_attention(
  function is_dp_attention_enabled (line 311) | def is_dp_attention_enabled() -> bool:
  function is_allocation_symmetric (line 315) | def is_allocation_symmetric() -> bool:
  function get_attention_tp_group (line 319) | def get_attention_tp_group() -> GroupCoordinator:
  function get_attention_tp_rank (line 323) | def get_attention_tp_rank() -> int:
  function get_attention_tp_size (line 327) | def get_attention_tp_size() -> int:
  function get_attention_cp_group (line 331) | def get_attention_cp_group() -> GroupCoordinator:
  function get_attention_cp_rank (line 335) | def get_attention_cp_rank() -> int:
  function get_attention_cp_size (line 339) | def get_attention_cp_size() -> int:
  function get_attention_dp_rank (line 343) | def get_attention_dp_rank() -> int:
  function get_attention_dp_size (line 348) | def get_attention_dp_size() -> int:
  function get_local_attention_dp_rank (line 353) | def get_local_attention_dp_rank() -> int:
  function get_local_attention_dp_size (line 358) | def get_local_attention_dp_size() -> int:
  function disable_dp_size (line 364) | def disable_dp_size():
  function get_dp_local_info (line 384) | def get_dp_local_info(forward_batch: ForwardBatch) -> Tuple[torch.Tensor...
  function memcpy_triton_kernel (line 403) | def memcpy_triton_kernel(
  function prod (line 428) | def prod(x):
  function memcpy_triton (line 432) | def memcpy_triton(dst, src, dim, offset, sz, offset_src):
  function _dp_gather_via_all_reduce (line 443) | def _dp_gather_via_all_reduce(
  function _dp_gather_via_all_gather (line 478) | def _dp_gather_via_all_gather(
  function _dp_gather (line 498) | def _dp_gather(
  function dp_gather_partial (line 514) | def dp_gather_partial(
  function dp_gather_replicate (line 522) | def dp_gather_replicate(
  function dp_scatter (line 530) | def dp_scatter(
  function dp_reduce_scatter_tensor (line 552) | def dp_reduce_scatter_tensor(output: torch.Tensor, input: torch.Tensor):
  function attn_tp_reduce_scatter_tensor (line 563) | def attn_tp_reduce_scatter_tensor(output: torch.Tensor, input: torch.Ten...
  function attn_cp_reduce_scatter_tensor (line 567) | def attn_cp_reduce_scatter_tensor(output: torch.Tensor, input: torch.Ten...
  function attn_tp_all_reduce (line 571) | def attn_tp_all_reduce(input: torch.Tensor):
  function attn_tp_all_gather_into_tensor (line 575) | def attn_tp_all_gather_into_tensor(output: torch.Tensor, input: torch.Te...
  function attn_cp_all_gather_into_tensor (line 579) | def attn_cp_all_gather_into_tensor(output: torch.Tensor, input: torch.Te...
  function attn_tp_all_gather (line 583) | def attn_tp_all_gather(output_list: List[torch.Tensor], input: torch.Ten...

FILE: python/sglang/srt/layers/elementwise.py
  function fused_softcap_kernel (line 38) | def fused_softcap_kernel(
  function fused_softcap (line 62) | def fused_softcap(x, softcap_const, autotune=False):
  class Softcap (line 76) | class Softcap:
    method __init__ (line 77) | def __init__(self, softcap_const: float):
    method __call__ (line 80) | def __call__(self, *args, **kwargs):
    method forward (line 83) | def forward(self, x: torch.Tensor) -> torch.Tensor:
    method forward_native (line 89) | def forward_native(self, x: torch.Tensor) -> torch.Tensor:
    method forward_cuda (line 92) | def forward_cuda(self, x: torch.Tensor, autotune=False) -> torch.Tensor:
  function fused_dual_residual_rmsnorm_kernel (line 139) | def fused_dual_residual_rmsnorm_kernel(
  function fused_dual_residual_rmsnorm (line 189) | def fused_dual_residual_rmsnorm(x, residual, weight1, weight2, eps, auto...
  function fused_rmsnorm_kernel (line 225) | def fused_rmsnorm_kernel(
  function fused_rmsnorm (line 255) | def fused_rmsnorm(x, weight, eps, autotune=False, inplace=False):
  class FusedDualResidualRMSNorm (line 276) | class FusedDualResidualRMSNorm:
    method __init__ (line 282) | def __init__(self, rmsnorm1, rmsnorm2) -> None:  # the one after rmsnorm1
    method __call__ (line 289) | def __call__(self, *args, **kwargs):
    method forward (line 292) | def forward(
    method forward_cuda (line 300) | def forward_cuda(
    method forward_flashinfer (line 312) | def forward_flashinfer(
    method forward_native (line 321) | def forward_native(
  function experts_combine_kernel (line 332) | def experts_combine_kernel(
  function experts_combine_triton (line 363) | def experts_combine_triton(
  function gelu_and_mul_kernel (line 408) | def gelu_and_mul_kernel(
  function gelu_and_mul_triton (line 445) | def gelu_and_mul_triton(
  function silu_and_mul_kernel (line 502) | def silu_and_mul_kernel(
  function silu_and_mul_triton (line 539) | def silu_and_mul_triton(

FILE: python/sglang/srt/layers/flashinfer_comm_fusion.py
  function is_flashinfer_allreduce_unavailable (line 41) | def is_flashinfer_allreduce_unavailable() -> bool:
  class FlashInferWorkspaceManager (line 45) | class FlashInferWorkspaceManager:
    method __init__ (line 46) | def __init__(self):
    method initialize (line 55) | def initialize(
    method is_buffer_size_sufficient (line 106) | def is_buffer_size_sufficient(
    method cleanup (line 127) | def cleanup(self):
  function ensure_workspace_initialized (line 147) | def ensure_workspace_initialized(
  function fake_flashinfer_allreduce_residual_rmsnorm (line 191) | def fake_flashinfer_allreduce_residual_rmsnorm(
  function flashinfer_allreduce_residual_rmsnorm (line 210) | def flashinfer_allreduce_residual_rmsnorm(
  function cleanup_flashinfer_workspace (line 286) | def cleanup_flashinfer_workspace():

FILE: python/sglang/srt/layers/int4fp8_utils.py
  function quantize_fp8_scale_tensorwise (line 13) | def quantize_fp8_scale_tensorwise(w: torch.Tensor) -> Tuple[torch.Tensor...
  function quantize_int4_scale_columnwise (line 20) | def quantize_int4_scale_columnwise(
  function pack_int4_to_int32 (line 30) | def pack_int4_to_int32(to_pack: torch.Tensor, reorder: bool = True) -> t...

FILE: python/sglang/srt/layers/layernorm.py
  function _forward_with_allreduce_fusion (line 89) | def _forward_with_allreduce_fusion(
  class RMSNorm (line 136) | class RMSNorm(MultiPlatformOp):
    method __init__ (line 137) | def __init__(
    method forward_cuda (line 165) | def forward_cuda(
    method forward_npu (line 198) | def forward_npu(
    method forward_aiter (line 213) | def forward_aiter(
    method forward_hip (line 235) | def forward_hip(
    method forward_native (line 261) | def forward_native(
    method forward_cpu (line 311) | def forward_cpu(
    method forward_xpu (line 331) | def forward_xpu(
    method forward_with_allreduce_fusion (line 347) | def forward_with_allreduce_fusion(
  class LayerNorm (line 359) | class LayerNorm(MultiPlatformOp):
    method __init__ (line 360) | def __init__(
    method forward_cuda (line 378) | def forward_cuda(
    method forward_native (line 391) | def forward_native(
    method forward_hip (line 407) | def forward_hip(
    method forward_npu (line 413) | def forward_npu(
    method forward_cpu (line 419) | def forward_cpu(
  class GemmaRMSNorm (line 432) | class GemmaRMSNorm(MultiPlatformOp):
    method __init__ (line 433) | def __init__(
    method _forward_impl (line 445) | def _forward_impl(
    method forward_native (line 461) | def forward_native(
    method forward_cuda (line 481) | def forward_cuda(
    method forward_cpu (line 489) | def forward_cpu(
    method forward_npu (line 508) | def forward_npu(
    method forward_xpu (line 525) | def forward_xpu(
    method forward_with_allreduce_fusion (line 533) | def forward_with_allreduce_fusion(
  class Gemma3RMSNorm (line 546) | class Gemma3RMSNorm(MultiPlatformOp):
    method __init__ (line 547) | def __init__(self, dim: int, eps: float = 1e-6):
    method _norm (line 553) | def _norm(self, x):
    method forward_native (line 556) | def forward_native(self, x):
    method forward_cpu (line 563) | def forward_cpu(self, x):
    method forward_cuda (line 568) | def forward_cuda(self, x):
    method forward_npu (line 571) | def forward_npu(self, x):
    method extra_repr (line 575) | def extra_repr(self):

FILE: python/sglang/srt/layers/linear.py
  function adjust_marlin_shard (line 81) | def adjust_marlin_shard(param, shard_size, shard_offset):
  function adjust_bitsandbytes_4bit_shard (line 89) | def adjust_bitsandbytes_4bit_shard(
  function adjust_scalar_to_fused_array (line 104) | def adjust_scalar_to_fused_array(param, loaded_weight, shard_id):
  function adjust_shard_offsets (line 127) | def adjust_shard_offsets(shard_offsets, loaded_weight, dim):
  class LinearBase (line 141) | class LinearBase(torch.nn.Module):
    method __init__ (line 153) | def __init__(
    method forward (line 179) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class ReplicatedLinear (line 183) | class ReplicatedLinear(LinearBase):
    method __init__ (line 197) | def __init__(
    method weight_loader (line 242) | def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor):
    method forward (line 264) | def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, Optional[tor...
    method extra_repr (line 271) | def extra_repr(self) -> str:
  class ColumnParallelLinear (line 278) | class ColumnParallelLinear(LinearBase):
    method __init__ (line 302) | def __init__(
    method weight_loader (line 371) | def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor):
    method weight_loader_v2 (line 420) | def weight_loader_v2(self, param: Parameter, loaded_weight: torch.Tens...
    method forward (line 447) | def forward(self, input_):
    method extra_repr (line 461) | def extra_repr(self) -> str:
  class MergedColumnParallelLinear (line 470) | class MergedColumnParallelLinear(ColumnParallelLinear):
    method __init__ (line 493) | def __init__(
    method weight_loader (line 530) | def weight_loader(
    method _load_fused_module_from_checkpoint (line 708) | def _load_fused_module_from_checkpoint(
    method _load_merged_block_scale (line 748) | def _load_merged_block_scale(
    method weight_loader_v2 (line 793) | def weight_loader_v2(
  class QKVParallelLinear (line 858) | class QKVParallelLinear(ColumnParallelLinear):
    method __init__ (line 884) | def __init__(
    method _get_shard_offset_mapping (line 953) | def _get_shard_offset_mapping(self, loaded_shard_id: str):
    method _get_shard_size_mapping (line 963) | def _get_shard_size_mapping(self, loaded_shard_id: str):
    method _load_fused_module_from_checkpoint (line 971) | def _load_fused_module_from_checkpoint(
    method _load_qkv_block_scale (line 1016) | def _load_qkv_block_scale(
    method weight_loader_v2 (line 1045) | def weight_loader_v2(
    method weight_loader (line 1087) | def weight_loader(
  class RowParallelLinear (line 1304) | class RowParallelLinear(LinearBase):
    method __init__ (line 1330) | def __init__(
    method weight_loader (line 1391) | def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor):
    method weight_loader_v2 (line 1454) | def weight_loader_v2(self, param: BasevLLMParameter, loaded_weight: to...
    method forward (line 1484) | def forward(self, input_, skip_all_reduce=False):
    method extra_repr (line 1515) | def extra_repr(self) -> str:
  class MergedColumnParallelRepeatedLinear (line 1524) | class MergedColumnParallelRepeatedLinear(LinearBase):
    method __init__ (line 1537) | def __init__(
    method forward (line 1576) | def forward(self, input_: torch.Tensor) -> torch.Tensor:
    method weight_loader (line 1579) | def weight_loader(
  class ColumnParallelBatchedLinear (line 1594) | class ColumnParallelBatchedLinear(nn.Module):
    method __init__ (line 1605) | def __init__(
    method forward (line 1617) | def forward(self, input: torch.Tensor) -> torch.Tensor:
    method weight_loader (line 1620) | def weight_loader(

FILE: python/sglang/srt/layers/model_parallel.py
  function _shard_tensor (line 24) | def _shard_tensor(
  class ColwiseParallelSharded (line 68) | class ColwiseParallelSharded(ColwiseParallel):
    method _partition_linear_fn (line 76) | def _partition_linear_fn(self, name, module, device_mesh):
  class RowwiseParallelMaybeWait (line 86) | class RowwiseParallelMaybeWait(RowwiseParallel):
    method _partition_linear_fn (line 94) | def _partition_linear_fn(self, name, module, device_mesh):
    method _prepare_output_fn (line 112) | def _prepare_output_fn(output_layouts, use_local_output, mod, outputs,...
  function tensor_parallel (line 121) | def tensor_parallel(

FILE: python/sglang/srt/layers/moe/cutlass_moe.py
  function cutlass_fused_experts_fp8 (line 29) | def cutlass_fused_experts_fp8(
  function cutlass_moe_fp4 (line 348) | def cutlass_moe_fp4(

FILE: python/sglang/srt/layers/moe/cutlass_moe_params.py
  class CutlassMoEType (line 8) | class CutlassMoEType(Enum):
  class CutlassMoEParams (line 19) | class CutlassMoEParams:
    method __init__ (line 95) | def __init__(
    method to_gemm1_args (line 155) | def to_gemm1_args(self) -> dict:
    method to_gemm2_args (line 172) | def to_gemm2_args(self) -> dict:

FILE: python/sglang/srt/layers/moe/cutlass_w4a8_moe.py
  function cutlass_w4a8_moe (line 35) | def cutlass_w4a8_moe(
  function cutlass_w4a8_moe_deepep_normal (line 223) | def cutlass_w4a8_moe_deepep_normal(
  function cutlass_w4a8_moe_deepep_ll (line 413) | def cutlass_w4a8_moe_deepep_ll(

FILE: python/sglang/srt/layers/moe/ep_moe/kernels.py
  function _get_launch_config_1d (line 19) | def _get_launch_config_1d(device, numel):
  function _get_launch_config_2d (line 46) | def _get_launch_config_2d(device, m, n):
  function deepep_permute_triton_kernel (line 74) | def deepep_permute_triton_kernel(
  function deepep_post_reorder_triton_kernel (line 105) | def deepep_post_reorder_triton_kernel(
  function compute_src2dst_triton_kernel (line 138) | def compute_src2dst_triton_kernel(
  function deepep_compute_src2dst_triton_kernel (line 149) | def deepep_compute_src2dst_triton_kernel(
  function deepep_run_moe_deep_preprocess (line 160) | def deepep_run_moe_deep_preprocess(topk_ids: torch.Tensor, num_experts: ...
  function compute_seg_indptr_triton_kernel (line 183) | def compute_seg_indptr_triton_kernel(reorder_topk_ids, seg_indptr, num_t...
  function cutlass_w4_run_moe_ep_preproess (line 199) | def cutlass_w4_run_moe_ep_preproess(topk_ids: torch.Tensor):
  function pre_reorder_triton_kernel_for_cutlass_moe (line 213) | def pre_reorder_triton_kernel_for_cutlass_moe(
  function pre_reorder_for_cutlass_moe (line 257) | def pre_reorder_for_cutlass_moe(
  function _silu_and_mul_post_quant_kernel (line 287) | def _silu_and_mul_post_quant_kernel(
  function silu_and_mul_masked_post_quant_fwd (line 364) | def silu_and_mul_masked_post_quant_fwd(
  function silu_mul_static_tensorwise_quant_triton_kernel_for_cutlass_moe (line 433) | def silu_mul_static_tensorwise_quant_triton_kernel_for_cutlass_moe(
  function silu_mul_static_tensorwise_quant_for_cutlass_moe (line 465) | def silu_mul_static_tensorwise_quant_for_cutlass_moe(
  function post_reorder_triton_kernel_for_cutlass_moe (line 489) | def post_reorder_triton_kernel_for_cutlass_moe(
  function post_reorder_for_cutlass_moe (line 538) | def post_reorder_for_cutlass_moe(
  function post_reorder_triton_kernel (line 569) | def post_reorder_triton_kernel(
  function _fwd_kernel_ep_scatter_1 (line 609) | def _fwd_kernel_ep_scatter_1(
  function _fwd_kernel_ep_scatter_2 (line 642) | def _fwd_kernel_ep_scatter_2(
  function ep_scatter (line 715) | def ep_scatter(
  function _fwd_kernel_ep_gather (line 791) | def _fwd_kernel_ep_gather(
  function ep_gather (line 857) | def ep_gather(
  function get_tma_aligned_size (line 896) | def get_tma_aligned_size(x: int, element_size: int) -> int:
  function _tma_align_input_scale_kernel (line 916) | def _tma_align_input_scale_kernel(
  function tma_align_input_scale (line 946) | def tma_align_input_scale(input_scale: torch.Tensor):
  function compute_masked_m_triton_kernel (line 972) | def compute_masked_m_triton_kernel(seg_indptr, masked_m):
  function deepgemm_compute_src2dst_triton_kernel (line 980) | def deepgemm_compute_src2dst_triton_kernel(
  function fill_gateup_input_triton_kernel (line 1001) | def fill_gateup_input_triton_kernel(
  function moe_ep_deepgemm_preprocess (line 1041) | def moe_ep_deepgemm_preprocess(
  function compute_identity_kernel (line 1119) | def compute_identity_kernel(
  function zero_experts_compute_triton (line 1157) | def zero_experts_compute_triton(
  function compute_problem_sizes_w4a8_kernel (line 1193) | def compute_problem_sizes_w4a8_kernel(
  function compute_problem_sizes_w4a8 (line 1230) | def compute_problem_sizes_w4a8(
  function deepep_ll_get_cutlass_w4a8_moe_mm_data (line 1247) | def deepep_ll_get_cutlass_w4a8_moe_mm_data(
  function _silu_and_mul_post_per_tensor_quant_kernel (line 1265) | def _silu_and_mul_post_per_tensor_quant_kernel(
  function silu_and_mul_masked_post_per_tensor_quant_fwd (line 1330) | def silu_and_mul_masked_post_per_tensor_quant_fwd(

FILE: python/sglang/srt/layers/moe/flashinfer_cutedsl_moe.py
  function get_cute_dtype (line 11) | def get_cute_dtype(input: torch.Tensor) -> str:
  function flashinfer_cutedsl_moe_masked (line 22) | def flashinfer_cutedsl_moe_masked(

FILE: python/sglang/srt/layers/moe/flashinfer_trtllm_moe.py
  function _fake_fp8_block_scale_moe (line 8) | def _fake_fp8_block_scale_moe(
  function trtllm_fp8_block_scale_moe_wrapper (line 38) | def trtllm_fp8_block_scale_moe_wrapper(
  function _fake_fp8_block_scale_routed_moe (line 100) | def _fake_fp8_block_scale_routed_moe(
  function trtllm_fp8_block_scale_routed_moe_wrapper (line 130) | def trtllm_fp8_block_scale_routed_moe_wrapper(
  function _fake_fp8_per_tensor_scale_moe (line 192) | def _fake_fp8_per_tensor_scale_moe(
  function trtllm_fp8_per_tensor_scale_moe_wrapper (line 220) | def trtllm_fp8_per_tensor_scale_moe_wrapper(

FILE: python/sglang/srt/layers/moe/fused_moe_native.py
  function fused_moe_forward_native (line 18) | def fused_moe_forward_native(
  function moe_forward_native (line 49) | def moe_forward_native(

FILE: python/sglang/srt/layers/moe/fused_moe_triton/__init__.py
  function override_config (line 21) | def override_config(config):
  function get_config (line 29) | def get_config() -> Optional[Dict[str, Any]]: