SYMBOL INDEX (12024 symbols across 1049 files) FILE: .github/update_ci_permission.py function github_api_get (line 64) | def github_api_get(endpoint, params=None): function get_write_access_users (line 103) | def get_write_access_users(): function get_top_contributors (line 120) | def get_top_contributors(days=90, limit=50): function load_existing_permissions (line 139) | def load_existing_permissions(): function sort_permissions_file (line 149) | def sort_permissions_file(): function main (line 165) | def main(): FILE: 3rdparty/amd/tuning/benchmark_moe_rocm.py function main (line 21) | def main(model, tp_size, dtype: str, batches): function prune_configs (line 28) | def prune_configs(M, N, K, configs): function union_of_list_of_dicts (line 103) | def union_of_list_of_dicts(l1, l2): function run_grid (line 114) | def run_grid(bs, model, method, tp_size, dtype: str): function run_timing (line 268) | def run_timing( FILE: benchmark/asr/bench_sglang.py function to_bytes (line 19) | def to_bytes(y, sr): function run_asr_chat (line 26) | async def run_asr_chat(client, model_name, y, sr): function run_asr_transcription_sync (line 55) | def run_asr_transcription_sync(client, model_name, y, sr, language=None): function run_asr_transcription_stream_sync (line 75) | def run_asr_transcription_stream_sync( function run_asr_transcription (line 125) | async def run_asr_transcription( function bound_asr (line 153) | async def bound_asr( function process_dataset (line 199) | async def process_dataset( function run_evaluation (line 259) | def run_evaluation(args): FILE: benchmark/bench_attention_sink/bench_attention_sink_triton.py function benchmark_decode (line 36) | def benchmark_decode(B, S, H_Q, H_KV, D): function benchmark_extend (line 139) | def benchmark_extend(B, S, H_Q, H_KV, D): FILE: benchmark/bench_in_batch_prefix/bench_in_batch_prefix.py function generate_random_string (line 18) | def generate_random_string(token_length: int) -> str: function generate_unique_prefix (line 35) | def generate_unique_prefix(base_text, index): function text_qa (line 40) | def text_qa(s, question, gen_len): function prepare_prompts (line 45) | def prepare_prompts(num_prefix, num_samples_per_prefix, prefix_length, s... function test_batch_by_batch (line 62) | def test_batch_by_batch(all_prompts, gen_len): function test_batch_by_batch_with_hint (line 76) | def test_batch_by_batch_with_hint(all_prompts, gen_len): function test_send_all (line 92) | def test_send_all(all_prompts, gen_len): FILE: benchmark/bench_linear_attention/bench_gdn_decode.py function make_inputs (line 44) | def make_inputs( function run_baseline (line 94) | def run_baseline(inp): function run_packed (line 132) | def run_packed(inp): function check_correctness (line 160) | def check_correctness(B, H, HV, K, V, pool_size, device, dtype, seed=42): function bench_shape (line 211) | def bench_shape(B, H, HV, K, V, pool_size, device, dtype): function run_correctness (line 304) | def run_correctness(device, dtype): function run_benchmark (line 359) | def run_benchmark(device, dtype, args): function main (line 424) | def main(): FILE: benchmark/bench_linear_attention/bench_gdn_prefill.py function make_k_contiguous (line 42) | def make_k_contiguous(t: torch.Tensor) -> torch.Tensor: function gdn_flops (line 50) | def gdn_flops( function gdn_bytes (line 68) | def gdn_bytes( function make_inputs (line 101) | def make_inputs( function run_triton (line 166) | def run_triton(inp): function run_flashinfer (line 185) | def run_flashinfer(inp): function check_shape (line 252) | def check_shape( function bench_shape (line 355) | def bench_shape(B, H, T_per_seq, K, V, pool_size, device, dtype): function run_correctness (line 453) | def run_correctness(device, dtype): function run_benchmark (line 520) | def run_benchmark(device, dtype, args): function main (line 570) | def main(): FILE: benchmark/bench_rope/benchmark_rope_index.py class DummyVisionConfig (line 25) | class DummyVisionConfig: class DummyHFConfig (line 30) | class DummyHFConfig: function calculate_stats (line 42) | def calculate_stats(times: list[float]) -> dict[str, float]: function _sync (line 54) | def _sync(device: torch.device): function _approx_hw (line 59) | def _approx_hw(patches: int, merge: int) -> tuple[int, int]: function generate_test_data (line 66) | def generate_test_data( function benchmark_rope_index (line 224) | def benchmark_rope_index( FILE: benchmark/benchmark_batch/benchmark_batch.py function generate_random_prompt (line 30) | def generate_random_prompt(index, tokenizer_dir, num_tokens): function prepare_all_prompts (line 43) | def prepare_all_prompts(num_requests, batch_size, num_tokens, tokenizer_... function send_batch_request (line 75) | def send_batch_request(endpoint, prompts, gen_tokens, request_id): function run_benchmark (line 101) | def run_benchmark(endpoint, batched_prompts, batch_size, gen_tokens): function process_results (line 130) | def process_results(results, total_latency, num_requests): function main (line 167) | def main(): FILE: benchmark/benchmark_batch/benchmark_tokenizer.py function main (line 11) | def main(): function run_benchmark (line 65) | def run_benchmark( function benchmark (line 86) | def benchmark(*, data, batch_size, sequential_fn, batch_fn, num_runs, ba... function print_results (line 119) | def print_results(*, results, func_name, batch_mode): function print_runs (line 163) | def print_runs(*, label, runs, avg): function measure_times (line 170) | def measure_times(*, fn, num_runs): function generate_random_token_ids (line 179) | def generate_random_token_ids(*, num_prompts, num_tokens, tokenizer): function parse_args (line 188) | def parse_args(): FILE: benchmark/boolq/bench_sglang.py function get_example (line 15) | def get_example(lines, i, answer): function few_shot_examples (line 22) | def few_shot_examples(lines, k): function main (line 29) | def main(args): FILE: benchmark/boolq/convert_parquet_to_json.py function convert_parquet_to_json (line 6) | def convert_parquet_to_json(input_file, output_file): FILE: benchmark/ceval/bench_sglang.py function get_one_example (line 20) | def get_one_example(line, include_answer): function get_few_shot_examples (line 32) | def get_few_shot_examples(lines): function get_answer_value (line 39) | def get_answer_value(response): function main (line 49) | def main(args): FILE: benchmark/dspy/bench_dspy_intro.py class BasicQA (line 12) | class BasicQA(dspy.Signature): class GenerateAnswer (line 19) | class GenerateAnswer(dspy.Signature): class RAG (line 27) | class RAG(dspy.Module): method __init__ (line 28) | def __init__(self, num_passages=3): method forward (line 34) | def forward(self, question): function main (line 40) | def main(args): FILE: benchmark/fla/benchmark_layernorm_gated.py function benchmark_layer_norm_fwd (line 15) | def benchmark_layer_norm_fwd( function main (line 264) | def main(): FILE: benchmark/generative_agents/agent_functions.py function poignancy_event (line 8) | def poignancy_event(s, persona_name, persona_iss, event): function poignancy_event_prompt (line 18) | def poignancy_event_prompt(persona_name, persona_iss, event): function generate_event_triple (line 31) | def generate_event_triple(s, persona_name, action): function generate_event_triple_prompt (line 56) | def generate_event_triple_prompt(persona_name, action): function generate_pronunciatio (line 83) | def generate_pronunciatio(s, action): function generate_pronunciatio_prompt (line 89) | def generate_pronunciatio_prompt(action): function action_location_sector (line 98) | def action_location_sector( function action_location_sector_prompt (line 158) | def action_location_sector_prompt( function action_location_object (line 219) | def action_location_object( function action_location_object_prompt (line 260) | def action_location_object_prompt( FILE: benchmark/generative_agents/bench_other.py function main (line 18) | def main(args): FILE: benchmark/generative_agents/bench_sglang.py function main (line 21) | def main(args): FILE: benchmark/gsm8k/bench_other.py function get_one_example (line 19) | def get_one_example(lines, i, include_answer): function get_few_shot_examples (line 26) | def get_few_shot_examples(lines, k): function get_answer_value (line 33) | def get_answer_value(answer_str): function main (line 44) | def main(args): FILE: benchmark/gsm8k/bench_sglang.py function get_one_example (line 22) | def get_one_example(lines, i, include_answer): function get_few_shot_examples (line 29) | def get_few_shot_examples(lines, k): function get_answer_value (line 36) | def get_answer_value(answer_str): function main (line 47) | def main(args): FILE: benchmark/hellaswag/bench_other.py function get_one_example (line 14) | def get_one_example(lines, i, include_answer): function get_few_shot_examples (line 21) | def get_few_shot_examples(lines, k): function main (line 28) | def main(args): FILE: benchmark/hellaswag/bench_sglang.py function get_one_example (line 16) | def get_one_example(lines, i, include_answer): function get_few_shot_examples (line 23) | def get_few_shot_examples(lines, k): function main (line 30) | def main(args): FILE: benchmark/hf3fs/bench_client.py function print_stats (line 13) | def print_stats(x: List[int]): function test (line 26) | def test(): function bench (line 66) | def bench(): function main (line 155) | def main(): FILE: benchmark/hf3fs/bench_storage.py function print_stats (line 17) | def print_stats(x: List[int]): function test (line 30) | def test(): function bench (line 129) | def bench(): function allclose (line 195) | def allclose(): function main (line 250) | def main(): FILE: benchmark/hicache/bench_long_context.py class ContextWorkloadGenerator (line 18) | class ContextWorkloadGenerator(WorkloadGenerator): method __init__ (line 19) | def __init__(self, args): method response_handler (line 68) | def response_handler(self): FILE: benchmark/hicache/bench_mix.py function write_debug_log (line 28) | def write_debug_log(data): function parse_args (line 42) | def parse_args(): function load_config (line 92) | def load_config(): class UserData (line 125) | class UserData: function synchronized (line 134) | def synchronized(): class UserGenerator (line 146) | class UserGenerator: method __init__ (line 147) | def __init__(self, config, model_path, dataset_path): method gen (line 195) | def gen(self): method push (line 230) | def push(self, user_data, generated_text, len_itl): method pop (line 270) | def pop(self): function gen_payload (line 279) | def gen_payload(prompt, output_len): function async_request_sglang_generate (line 299) | async def async_request_sglang_generate( class AtomicCounter (line 371) | class AtomicCounter: method __init__ (line 372) | def __init__(self, initial_value=0): method increment (line 377) | def increment(self, amount=1): method get (line 381) | def get(self): class WorkloadGenerator (line 385) | class WorkloadGenerator: method __init__ (line 386) | def __init__(self, args): method handle_request (line 417) | async def handle_request(self, user_data): method request_sender (line 427) | def request_sender(self): method response_handler (line 448) | def response_handler(self): method run (line 475) | def run(self): function main (line 544) | def main(): FILE: benchmark/hicache/bench_multiturn.py function parse_args (line 25) | def parse_args(): function log_to_jsonl_file (line 171) | def log_to_jsonl_file(data, file_path="performance_metrics.jsonl", tag=""): class ReadyQueue (line 183) | class ReadyQueue: method __init__ (line 188) | def __init__(self, init_requests=None, policy="random"): method append (line 193) | def append(self, item): method pop (line 197) | def pop(self): class WorkloadGenerator (line 211) | class WorkloadGenerator: method __init__ (line 212) | def __init__(self, args): method handle_request (line 388) | async def handle_request(self, item): method request_sender (line 402) | def request_sender(self): method response_handler (line 435) | def response_handler(self): method run (line 544) | def run(self): FILE: benchmark/hicache/bench_serving.py class RequestFuncInput (line 43) | class RequestFuncInput: class RequestFuncOutput (line 56) | class RequestFuncOutput: function async_request_openai_completions (line 69) | async def async_request_openai_completions( function async_request_profile (line 202) | async def async_request_profile(api_url: str) -> RequestFuncOutput: class BenchmarkMetrics (line 228) | class BenchmarkMetrics: function get_requests (line 261) | async def get_requests( function calculate_metrics (line 284) | def calculate_metrics( function benchmark (line 372) | async def benchmark( function run_benchmark (line 685) | def run_benchmark(args_: argparse.Namespace): FILE: benchmark/hicache/data_processing.py function common_filter_chat (line 37) | def common_filter_chat( function sample_sharegpt_requests (line 94) | def sample_sharegpt_requests( function sample_ultrachat_requests (line 150) | def sample_ultrachat_requests( function sample_loogle_requests (line 198) | def sample_loogle_requests( function sample_nextqa_requests (line 267) | def sample_nextqa_requests( function sample_random_requests (line 347) | def sample_random_requests( function sample_generated_shared_prefix_requests (line 434) | def sample_generated_shared_prefix_requests( function get_dataset (line 520) | def get_dataset(args, tokenizer): FILE: benchmark/hicache/nextqa.py function find_video_files (line 9) | def find_video_files(video_dir) -> List[str]: function video_frames (line 24) | def video_frames(video_path, max_frames) -> int: class Video (line 30) | class Video: method __init__ (line 31) | def __init__(self, video_path, num_frames): method __str__ (line 35) | def __str__(self): method __iter__ (line 38) | def __iter__(self): class VideoPrompt (line 42) | class VideoPrompt(Video): method __init__ (line 43) | def __init__(self, video_path, num_frames, prompt): method __str__ (line 47) | def __str__(self): method __iter__ (line 50) | def __iter__(self): class VideoLoader (line 54) | class VideoLoader: class VideoFileLoader (line 58) | class VideoFileLoader(VideoLoader): method __init__ (line 63) | def __init__(self, video_dir, batch_size=1, max_frames=sys.maxsize): method __iter__ (line 71) | def __iter__(self): # (file, number of frames) class NExTQALoader (line 85) | class NExTQALoader(VideoLoader): method __init__ (line 91) | def __init__( method get_video_prompt (line 114) | def get_video_prompt(self, entry, max_frames) -> VideoPrompt: method __iter__ (line 126) | def __iter__(self): FILE: benchmark/hicache/perf.py function jit_hicache_impl (line 8) | def jit_hicache_impl( function ref_hicache_impl (line 33) | def ref_hicache_impl( class HicacheBenchArgs (line 57) | class HicacheBenchArgs(NamedTuple): function perf (line 63) | def perf(f: Callable[[], Any], loop: int = 100) -> float: function test_hicache_kernel (line 79) | def test_hicache_kernel(args: HicacheBenchArgs) -> None: function main (line 210) | def main() -> None: FILE: benchmark/json_decode_regex/bench_other.py function json_decode (line 17) | def json_decode(document, generate): function main (line 38) | def main(args): FILE: benchmark/json_decode_regex/bench_sglang.py function json_warm_up (line 17) | def json_warm_up(s): function json_decode (line 32) | def json_decode(s, document): function main (line 47) | def main(args): FILE: benchmark/json_decode_regex/build_dataset.py function get_content (line 35) | def get_content(city_name): FILE: benchmark/json_jump_forward/bench_other.py function character_gen (line 44) | def character_gen(name, generate): function city_gen (line 51) | def city_gen(document, generate): function character_maker (line 61) | def character_maker(lm, name): function call_generate_lmql (line 85) | async def call_generate_lmql( function city_maker (line 109) | def city_maker(lm, document): function bench_character (line 132) | def bench_character(args): function bench_city_doc (line 210) | def bench_city_doc(args): function main (line 254) | def main(args): FILE: benchmark/json_jump_forward/bench_sglang.py function character_gen (line 44) | def character_gen(s, name): function city_gen (line 51) | def city_gen(s, document): function bench_city_doc (line 59) | def bench_city_doc(args): function bench_character (line 82) | def bench_character(args): function main (line 106) | def main(args): FILE: benchmark/json_jump_forward/build_dataset.py function get_content (line 35) | def get_content(city_name): FILE: benchmark/json_schema/bench_sglang.py function schema_gen (line 20) | def schema_gen(s, message: Tuple[str, str], json_schema: str): function contains_formats (line 29) | def contains_formats(schema, formats: List[str]): function convert_dataset (line 43) | def convert_dataset(path: str): function bench_schema (line 76) | def bench_schema(args): function main (line 111) | def main(args): FILE: benchmark/kernels/all_reduce/benchmark_aiter.py function parse_args (line 19) | def parse_args(): function get_env_rank_world (line 55) | def get_env_rank_world() -> Tuple[int, int, int]: function init_dist (line 62) | def init_dist(backend: str): function get_device (line 73) | def get_device(local_rank: int) -> torch.device: function human_size (line 78) | def human_size(num_bytes: int) -> str: function get_message_sizes (line 87) | def get_message_sizes() -> List[int]: function run_once (line 105) | def run_once(comm, inp: torch.Tensor) -> Optional[torch.Tensor]: function bench_impl (line 114) | def bench_impl( function main (line 201) | def main(): FILE: benchmark/kernels/all_reduce/benchmark_all_reduce.py function parse_args (line 26) | def parse_args(): function get_env_rank_world (line 62) | def get_env_rank_world() -> Tuple[int, int, int]: function init_dist (line 69) | def init_dist(backend: str): function get_device (line 92) | def get_device(local_rank: int) -> torch.device: function human_size (line 97) | def human_size(num_bytes: int) -> str: function get_message_sizes (line 106) | def get_message_sizes() -> List[int]: function run_once (line 124) | def run_once(comm, inp: torch.Tensor) -> Optional[torch.Tensor]: function bench_impl (line 133) | def bench_impl( function main (line 220) | def main(): FILE: benchmark/kernels/all_reduce/benchmark_fused_ar_rms_amd.py function parse_shapes (line 48) | def parse_shapes(raw: str) -> List[Shape]: function dtype_from_name (line 64) | def dtype_from_name(name: str) -> torch.dtype: function check_close (line 76) | def check_close( function _measure_us (line 92) | def _measure_us( function _barrier (line 128) | def _barrier(device: torch.device): function _mean_across_ranks (line 135) | def _mean_across_ranks(value: float, device: torch.device) -> float: function _all_true_across_ranks (line 142) | def _all_true_across_ranks(value: bool, device: torch.device) -> bool: function _make_inputs (line 148) | def _make_inputs( function _split_reference (line 171) | def _split_reference( function bench_eager (line 185) | def bench_eager( function bench_graph (line 234) | def bench_graph( function _shape_bytes (line 313) | def _shape_bytes(shape: Shape, dtype: torch.dtype) -> int: function parse_args (line 318) | def parse_args(): function main (line 367) | def main(): FILE: benchmark/kernels/all_reduce/benchmark_mscclpp.py function torch_allreduce (line 34) | def torch_allreduce(torch_input: torch.Tensor, group: ProcessGroup) -> t... function msccl_allreduce (line 39) | def msccl_allreduce( function pynccl_allreduce (line 45) | def pynccl_allreduce( function _bench_graph_time (line 52) | def _bench_graph_time(func, inp_randn, warmup_loop=2, graph_loop=10, tes... function _bench_eager_time (line 84) | def _bench_eager_time(func, inp_randn, warmup_loop=2, test_loop=10): function get_torch_prof_ctx (line 106) | def get_torch_prof_ctx(do_prof: bool): function human_readable_size (line 122) | def human_readable_size(size, decimal_places=1): function print_markdown_table (line 137) | def print_markdown_table(data): FILE: benchmark/kernels/all_reduce/benchmark_torch_symm_mem.py function torch_allreduce (line 55) | def torch_allreduce(torch_input: torch.Tensor, group: ProcessGroup) -> t... function torch_symm_mem_allreduce (line 60) | def torch_symm_mem_allreduce( function pynccl_allreduce (line 66) | def pynccl_allreduce( function _bench_graph_time (line 73) | def _bench_graph_time(func, inp_randn, warmup_loop=2, graph_loop=10, tes... function _bench_eager_time (line 105) | def _bench_eager_time(func, inp_randn, warmup_loop=2, test_loop=10): function get_torch_prof_ctx (line 127) | def get_torch_prof_ctx(do_prof: bool): function human_readable_size (line 143) | def human_readable_size(size, decimal_places=1): function print_markdown_table (line 158) | def print_markdown_table(data): FILE: benchmark/kernels/decoding_attention_triton/triton_flashinfer_cudnn.py function benchmark_forward (line 13) | def benchmark_forward( function time_fwd (line 34) | def time_fwd(func, *args, **kwargs): function decode_attention_sglang (line 39) | def decode_attention_sglang( function decode_attention_flashinfer (line 98) | def decode_attention_flashinfer(dtype, head_num_q, head_num_kv): function convert_to_cudnn_type (line 159) | def convert_to_cudnn_type(torch_type): function decode_attention_cudnn (line 174) | def decode_attention_cudnn( function calculate_diff (line 287) | def calculate_diff(): FILE: benchmark/kernels/deepep/deepep_utils.py function init_dist (line 12) | def init_dist(local_rank: int, num_local_ranks: int, args): function calc_diff (line 36) | def calc_diff(x: torch.Tensor, y: torch.Tensor): function per_token_cast_to_fp8 (line 43) | def per_token_cast_to_fp8(x: torch.Tensor): function per_token_cast_back (line 53) | def per_token_cast_back(x_fp8: torch.Tensor, x_scales: torch.Tensor): function inplace_unique (line 59) | def inplace_unique(x: torch.Tensor, num_slots: int): function create_grouped_scores (line 74) | def create_grouped_scores( function bench (line 84) | def bench(fn, num_warmups: int = 20, num_tests: int = 30, post_fn=None): class empty_suppress (line 114) | class empty_suppress: method __enter__ (line 115) | def __enter__(self): method __exit__ (line 118) | def __exit__(self, *_): class suppress_stdout_stderr (line 122) | class suppress_stdout_stderr: method __enter__ (line 123) | def __enter__(self): method __exit__ (line 143) | def __exit__(self, *_): function bench_kineto (line 157) | def bench_kineto( function hash_tensor (line 217) | def hash_tensor(t: torch.Tensor): FILE: benchmark/kernels/deepep/tuning_deepep.py function test_main (line 30) | def test_main( function _write_output (line 421) | def _write_output(args, output_data): function test_loop (line 429) | def test_loop(local_rank: int, num_local_ranks: int, args): FILE: benchmark/kernels/deepseek/benchmark_deepgemm_fp8_gemm.py function tl_gemm (line 21) | def tl_gemm( function per_token_cast_to_fp8 (line 98) | def per_token_cast_to_fp8(x: torch.Tensor) -> Tuple[torch.Tensor, torch.... function per_block_cast_to_fp8 (line 108) | def per_block_cast_to_fp8(x: torch.Tensor) -> Tuple[torch.Tensor, torch.... function fp8_gemm_deepgemm (line 123) | def fp8_gemm_deepgemm( function fp8_gemm_sglang (line 140) | def fp8_gemm_sglang( function fp8_gemm_vllm (line 159) | def fp8_gemm_vllm( function calculate_diff (line 178) | def calculate_diff(m: int, n: int, k: int): function get_weight_shapes (line 236) | def get_weight_shapes(tp_size): function create_benchmark_configs (line 269) | def create_benchmark_configs(tp_size): function get_benchmark (line 281) | def get_benchmark(tp_size): FILE: benchmark/kernels/deepseek/benchmark_deepgemm_fp8_gemm_blackwell.py function per_block_cast_to_fp8 (line 19) | def per_block_cast_to_fp8(x: torch.Tensor) -> Tuple[torch.Tensor, torch.... function get_weight_shapes (line 35) | def get_weight_shapes(tp_size): function create_benchmark_configs (line 68) | def create_benchmark_configs(tp_size): function fp8_gemm_flashinfer (line 80) | def fp8_gemm_flashinfer( function fp8_gemm_deepgemm_blackwell (line 98) | def fp8_gemm_deepgemm_blackwell( function check_accuracy (line 112) | def check_accuracy(a, b, atol, rtol, percent): function calculate_diff (line 136) | def calculate_diff(m: int, n: int, k: int): function _benchmark (line 178) | def _benchmark(m, n, k, tp_size, provider): function get_benchmark_plot_friendly (line 231) | def get_benchmark_plot_friendly(tp_size): function get_benchmark (line 256) | def get_benchmark(tp_size): FILE: benchmark/kernels/deepseek/benchmark_deepgemm_fp8_group_gemm.py function construct_grouped_and_flat_fp8 (line 18) | def construct_grouped_and_flat_fp8( function fp8_gemm_group_triton_kernel (line 87) | def fp8_gemm_group_triton_kernel( function fp8_gemm_group_triton (line 183) | def fp8_gemm_group_triton(a_tuple, b_tuple, c, num_groups): function fp8_gemm_group_deepgemm (line 244) | def fp8_gemm_group_deepgemm(x_fp8_grouped, y_fp8_grouped, out, m_indices): function calculate_diff (line 254) | def calculate_diff(m: int, n: int, k: int, num_groups: int): function get_weight_shapes (line 321) | def get_weight_shapes(tp_size): function create_benchmark_configs (line 354) | def create_benchmark_configs(tp_size): function get_benchmark (line 367) | def get_benchmark(tp_size): FILE: benchmark/kernels/elementwise/benchmark_concat_mla.py function create_data (line 15) | def create_data(num_tokens): function fn_torch (line 36) | def fn_torch(k, k_nope, k_rope): function fn_hack_non_strided (line 41) | def fn_hack_non_strided(k, k_nope, k_rope): function fn_torch_compiled (line 50) | def fn_torch_compiled(k, k_nope, k_rope): function fn_cuda (line 54) | def fn_cuda(k, k_nope, k_rope): function fn_triton_kernel (line 59) | def fn_triton_kernel( function fn_triton (line 109) | def fn_triton(k, k_nope, k_rope): function execute_and_get_output (line 130) | def execute_and_get_output(f, data): function benchmark (line 182) | def benchmark(num_tokens, provider): FILE: benchmark/kernels/flashinfer_allreduce_fusion/benchmark_fused_collective.py function setup_flashinfer_workspace (line 85) | def setup_flashinfer_workspace( function cleanup_flashinfer_workspace (line 122) | def cleanup_flashinfer_workspace(ipc_handles): class FlashInferFusedAllReduceParams (line 134) | class FlashInferFusedAllReduceParams: method __init__ (line 137) | def __init__( method get_trtllm_fused_allreduce_kwargs (line 152) | def get_trtllm_fused_allreduce_kwargs(self): function flashinfer_fused_allreduce_rmsnorm (line 162) | def flashinfer_fused_allreduce_rmsnorm( function flashinfer_fused_allreduce_rmsnorm_fp8_quant (line 202) | def flashinfer_fused_allreduce_rmsnorm_fp8_quant( function flashinfer_fused_allreduce_rmsnorm_fp4_quant (line 244) | def flashinfer_fused_allreduce_rmsnorm_fp4_quant( function standard_allreduce_rmsnorm (line 287) | def standard_allreduce_rmsnorm( function standard_allreduce_rmsnorm_fp8_quant (line 316) | def standard_allreduce_rmsnorm_fp8_quant( function standard_allreduce_rmsnorm_fp4_quant (line 353) | def standard_allreduce_rmsnorm_fp4_quant( function standard_allreduce_rmsnorm_native (line 397) | def standard_allreduce_rmsnorm_native( function standard_allreduce_rmsnorm_fp8_quant_native (line 415) | def standard_allreduce_rmsnorm_fp8_quant_native( function standard_allreduce_rmsnorm_fp4_quant_native (line 443) | def standard_allreduce_rmsnorm_fp4_quant_native( function standard_allreduce_rmsnorm_native_compiled (line 478) | def standard_allreduce_rmsnorm_native_compiled( function standard_allreduce_rmsnorm_fp8_quant_native_compiled (line 491) | def standard_allreduce_rmsnorm_fp8_quant_native_compiled( function standard_allreduce_rmsnorm_fp4_quant_native_compiled (line 511) | def standard_allreduce_rmsnorm_fp4_quant_native_compiled( function create_test_tensors (line 532) | def create_test_tensors( function benchmark_operation (line 566) | def benchmark_operation( function run_benchmarks (line 605) | def run_benchmarks( function prepare_results_with_speedups (line 894) | def prepare_results_with_speedups(results_dict): function print_results (line 993) | def print_results(results_dict, seq_len, hidden_dim, dtype, use_residual... function format_results_markdown (line 1019) | def format_results_markdown( function save_results_to_file (line 1064) | def save_results_to_file( function main (line 1087) | def main(): FILE: benchmark/kernels/fused_moe_triton/benchmark_sglang_fused_moe_triton.py function fused_moe_triton_api (line 31) | def fused_moe_triton_api( function fused_moe_sglang_api (line 61) | def fused_moe_sglang_api( function benchmark (line 115) | def benchmark( function main (line 190) | def main(): FILE: benchmark/kernels/fused_moe_triton/benchmark_torch_compile_fused_moe.py function get_model_config (line 16) | def get_model_config(model_name: str, tp_size: int): function fused_topk_native (line 77) | def fused_topk_native( function fused_moe_torch (line 97) | def fused_moe_torch( function fused_moe_torch_compile (line 127) | def fused_moe_torch_compile( function fused_moe_sglang_api (line 153) | def fused_moe_sglang_api( function benchmark (line 203) | def benchmark(batch_size, provider, model_config, use_fp8_w8a8=False): function main (line 281) | def main(): FILE: benchmark/kernels/fused_moe_triton/benchmark_vllm_vs_sglang_fused_moe_triton.py function fused_moe_vllm_api (line 22) | def fused_moe_vllm_api( function fused_moe_sglang_api (line 68) | def fused_moe_sglang_api( function benchmark (line 120) | def benchmark(batch_size, provider, model_config, use_fp8_w8a8=False): function main (line 214) | def main(): FILE: benchmark/kernels/fused_moe_triton/common_utils.py class BenchmarkConfig (line 14) | class BenchmarkConfig(TypedDict): function calculate_shard_intermediate_size (line 23) | def calculate_shard_intermediate_size( function get_model_config (line 32) | def get_model_config( function get_rocm_configs_compute_bound (line 161) | def get_rocm_configs_compute_bound() -> List[Dict[str, int]]: function get_configs_compute_bound (line 184) | def get_configs_compute_bound() -> List[Dict[str, int]]: function sort_config (line 208) | def sort_config(config: BenchmarkConfig) -> BenchmarkConfig: function save_configs (line 223) | def save_configs( function get_config_filename (line 233) | def get_config_filename( function get_default_batch_sizes (line 271) | def get_default_batch_sizes() -> List[int]: FILE: benchmark/kernels/fused_moe_triton/tuning_client.py function read_long_prompt (line 21) | def read_long_prompt(): function openai_stream_test (line 31) | def openai_stream_test(model, ip, port): FILE: benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py function benchmark_config (line 40) | def benchmark_config( class BenchmarkWorker (line 236) | class BenchmarkWorker: method __init__ (line 238) | def __init__(self, seed: int, server_args: ServerArgs) -> None: method benchmark (line 247) | def benchmark( method tune (line 315) | def tune( function main (line 365) | def main(args: argparse.Namespace): FILE: benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton_sep.py class MoeInputs (line 45) | class MoeInputs: class KernelWrapper (line 52) | class KernelWrapper: method __init__ (line 53) | def __init__(self, moe_inputs, use_cuda_graph=True, inner_iter=10, **k... method cuda_graph_wrapper (line 64) | def cuda_graph_wrapper(self): method forward_cost (line 95) | def forward_cost(self, try_cnt=2): function load_topk_ids (line 119) | def load_topk_ids(topk_ids_dir, i: int): function benchmark_config (line 128) | def benchmark_config( class BestConfigTrace (line 408) | class BestConfigTrace: method __init__ (line 409) | def __init__(self, name, down_moe=False): method update (line 414) | def update(self, config, time_cost_all): method time_cost (line 426) | def time_cost(self, block_m): method config_dict (line 432) | def config_dict(self, block_m): class BenchmarkWorker (line 445) | class BenchmarkWorker: method __init__ (line 447) | def __init__(self, seed: int, server_args: ServerArgs) -> None: method benchmark (line 456) | def benchmark( method tune (line 494) | def tune( method cmp_configs (line 563) | def cmp_configs( function save_configs_sep (line 621) | def save_configs_sep( function main (line 659) | def main(args: argparse.Namespace): FILE: benchmark/kernels/quantization/bench_fp4_quant.py function _test_accuracy_once (line 17) | def _test_accuracy_once(E, M, K, input_dtype, device): function benchmark (line 53) | def benchmark(M, K, provider): function test_accuracy (line 114) | def test_accuracy(): FILE: benchmark/kernels/quantization/bench_int8_quant.py function torch_int8_quant (line 12) | def torch_int8_quant(x): function _test_accuracy_once (line 23) | def _test_accuracy_once(M, K, input_dtype, device): function test_accuracy (line 35) | def test_accuracy(): function benchmark (line 59) | def benchmark(batch_size, provider): FILE: benchmark/kernels/quantization/tuning_block_wise_kernel.py function w8a8_block_matmul (line 46) | def w8a8_block_matmul( function get_rocm_configs_compute_bound (line 138) | def get_rocm_configs_compute_bound(): function get_configs_compute_bound (line 161) | def get_configs_compute_bound(): function get_weight_shapes (line 185) | def get_weight_shapes(tp_size): function benchmark_config (line 218) | def benchmark_config( function tune (line 245) | def tune(M, N, K, block_size, out_dtype, search_space, input_type): function save_configs (line 318) | def save_configs( function get_available_gpu_count (line 354) | def get_available_gpu_count(): function tune_on_gpu (line 359) | def tune_on_gpu(args_dict): function distribute_batch_sizes (line 405) | def distribute_batch_sizes(batch_sizes, num_gpus): function main (line 415) | def main(args): FILE: benchmark/kernels/scheduler_batch/benchmark_get_last_loc_triton.py function get_last_loc_torch (line 11) | def get_last_loc_torch( function get_last_loc_kernel (line 24) | def get_last_loc_kernel( function get_last_loc_triton (line 47) | def get_last_loc_triton( function test_get_last_loc (line 69) | def test_get_last_loc(): function get_benchmark (line 94) | def get_benchmark(): function run_benchmark (line 144) | def run_benchmark(save_path: str = "./configs/benchmark_ops/get_last_loc... FILE: benchmark/kernels/scheduler_batch/benchmark_write_req_to_token_pool_triton.py function write_req_to_token_pool_triton (line 12) | def write_req_to_token_pool_triton( function write_req_to_token_pool_triton_optimize (line 49) | def write_req_to_token_pool_triton_optimize( function write_req_to_token_pool_reference (line 91) | def write_req_to_token_pool_reference( function test_write_req_to_token_pool (line 114) | def test_write_req_to_token_pool(): function get_benchmark (line 231) | def get_benchmark(): function run_benchmark (line 315) | def run_benchmark(save_path: str = "./configs/benchmark_ops/write_req_to... FILE: benchmark/kernels/sliding_window_attention_triton/bench_triton_swa_kernel.py function extend_attention_fwd_torch (line 11) | def extend_attention_fwd_torch( function _build_batch (line 85) | def _build_batch( function _run_triton (line 177) | def _run_triton(inputs): function _run_torch_ref (line 196) | def _run_torch_ref(inputs): function bench (line 241) | def bench( FILE: benchmark/line_retrieval/bench_sglang.py function line_retrieval (line 17) | def line_retrieval(s, prefix, suffix, body_0, body_1, body_2, body_3): function eval_model (line 30) | def eval_model(args, line_obj, num_hoops, src_indices, dst_percents): function main (line 131) | def main(args): FILE: benchmark/line_retrieval/gen_data.py function generate_lines (line 16) | def generate_lines(random_words, num_lines, redirect_ratio): FILE: benchmark/llava_bench/bench_sglang.py function image_qa (line 17) | def image_qa(s, image_file, question): function main (line 22) | def main(args): FILE: benchmark/llm_judge/bench_other.py function multi_dimension_judge (line 24) | def multi_dimension_judge(article, generate): function multi_dimension_judge_async (line 52) | async def multi_dimension_judge_async(article, generate): function main (line 80) | def main(args): FILE: benchmark/llm_judge/bench_sglang.py function multi_dimension_judge (line 25) | def multi_dimension_judge(s, article): function main (line 54) | def main(args): FILE: benchmark/long_json_decode/bench_other.py function json_decode (line 13) | def json_decode(document, generate): function main (line 30) | def main(args): FILE: benchmark/long_json_decode/bench_sglang.py function json_decode (line 14) | def json_decode(s, document): function main (line 34) | def main(args): FILE: benchmark/lora/launch_server.py function launch_server (line 11) | def launch_server(args): FILE: benchmark/lora/lora_bench.py function async_request_openai_completions (line 46) | async def async_request_openai_completions( function benchmark (line 147) | async def benchmark( function run_benchmark (line 343) | def run_benchmark(args_: argparse.Namespace): function set_ulimit (line 401) | def set_ulimit(target_soft_limit=65535): FILE: benchmark/mmlu/bench_other.py function format_subject (line 20) | def format_subject(subject): function format_example (line 28) | def format_example(df, idx, include_answer=True): function gen_prompt (line 39) | def gen_prompt(train_df, subject, k=-1): function evaluate (line 50) | def evaluate(args, subject, dev_df, test_df, call_generate): function main (line 115) | def main(args): FILE: benchmark/mmlu/bench_sglang.py function format_subject (line 25) | def format_subject(subject): function format_example (line 33) | def format_example(df, idx, include_answer=True): function gen_prompt (line 44) | def gen_prompt(train_df, subject, k=-1): function download_data (line 55) | def download_data(data_dir): function main (line 77) | def main(args): FILE: benchmark/mmmu/bench_hf.py function eval_mmmu (line 18) | def eval_mmmu(args): FILE: benchmark/mmmu/bench_sglang.py class RequestFuncOutput (line 42) | class RequestFuncOutput: function async_request_profile (line 54) | async def async_request_profile(api_url: str) -> RequestFuncOutput: function _get_prefix_suffix (line 72) | def _get_prefix_suffix(prompt: str) -> Tuple[str, str]: function process_sample (line 79) | async def process_sample( function process_sample_with_semaphore (line 127) | async def process_sample_with_semaphore( function eval_mmmu (line 143) | async def eval_mmmu(args) -> None: function parse_args (line 236) | def parse_args(): function main (line 249) | def main(): FILE: benchmark/mmmu/data_utils.py function get_multi_choice_info (line 78) | def get_multi_choice_info(options): function load_yaml (line 94) | def load_yaml(file_path): function parse_img_path (line 104) | def parse_img_path(text): function process_single_sample (line 109) | def process_single_sample(data): function save_json (line 138) | def save_json(filename, ds): function save_jsonl (line 145) | def save_jsonl(filename, data): function save_args (line 163) | def save_args(args, path_dir): function construct_prompt (line 173) | def construct_prompt(sample, config): FILE: benchmark/mmmu/eval_utils.py class EvalArgs (line 28) | class EvalArgs: method add_cli_args (line 46) | def add_cli_args(parser: argparse.ArgumentParser): method from_cli_args (line 133) | def from_cli_args(cls, args: argparse.Namespace): function set_seed (line 138) | def set_seed(seed_value): function prepare_samples (line 154) | def prepare_samples(eval_args: EvalArgs): function get_sampling_params (line 258) | def get_sampling_params(eval_args): function parse_multi_choice_response (line 276) | def parse_multi_choice_response(response, all_choices, index2ans): function check_is_number (line 332) | def check_is_number(string): function normalize_str (line 344) | def normalize_str(string): function extract_numbers (line 369) | def extract_numbers(string): function parse_open_response (line 392) | def parse_open_response(response): function eval_multi_choice (line 468) | def eval_multi_choice(gold_i, pred_i): function eval_open (line 497) | def eval_open(gold_i, pred_i): function evaluate (line 526) | def evaluate(samples): function calculate_ins_level_acc (line 553) | def calculate_ins_level_acc(results: Dict): function process_result (line 565) | def process_result(response, sample, answer_dict, out_samples): function eval_result (line 589) | def eval_result(model_answer_path, answer_dict, eval_output_path=None): FILE: benchmark/mtbench/bench_other.py function load_questions (line 15) | def load_questions(filename): function write_answers (line 24) | def write_answers(filename, model_id, questions, answers): function main (line 40) | def main(args): FILE: benchmark/mtbench/bench_sglang.py function load_questions (line 15) | def load_questions(filename): function write_answers (line 24) | def write_answers(filename, model_id, questions, answers): function answer_mt_bench (line 41) | def answer_mt_bench(s, question_1, question_2): function main (line 49) | def main(args): FILE: benchmark/mtbench/bench_sglang_eagle.py function load_questions (line 24) | def load_questions(filename): function write_answers (line 33) | def write_answers(filename, model_id, questions, answers): function answer_mt_bench (line 50) | def answer_mt_bench(s, question_1, question_2): function main (line 60) | def main(args): FILE: benchmark/multi_chain_reasoning/bench_other.py function get_answer_value (line 18) | def get_answer_value(answer_str): function multi_chain_gsm8k (line 39) | def multi_chain_gsm8k(question, num_chains, call_generate): function multi_chain_gsm8k_async (line 65) | async def multi_chain_gsm8k_async(question, num_chains, call_generate): function main (line 91) | def main(args): FILE: benchmark/multi_chain_reasoning/bench_sglang.py function get_answer_value (line 18) | def get_answer_value(answer_str): function main (line 39) | def main(args): FILE: benchmark/multi_document_qa/bench_other.py function multi_document_qa (line 18) | def multi_document_qa(docs, question, generate): function main (line 37) | def main(args): FILE: benchmark/multi_document_qa/bench_sglang.py function multi_document_qa (line 14) | def multi_document_qa(s, docs, question): function main (line 33) | def main(args): FILE: benchmark/multi_turn_chat/bench_other.py function multi_turns (line 15) | def multi_turns(generate, qas): function main (line 24) | def main(args): FILE: benchmark/multi_turn_chat/bench_sglang.py function multi_turns (line 17) | def multi_turns(s, qas): function main (line 23) | def main(args): FILE: benchmark/multi_turn_chat/data_gen.py function gen_prompt (line 7) | def gen_prompt(tokenizer, token_num): function gen_arguments (line 15) | def gen_arguments(args, tokenizer): FILE: benchmark/multi_turn_chat/long_prompt_multi_turn.py function gen_prompt (line 18) | def gen_prompt(tokenizer, token_num): function get_cache_path (line 25) | def get_cache_path(args): function gen_arguments (line 34) | def gen_arguments(args, tokenizer): function multi_turns (line 72) | def multi_turns(s, system_prompt, qas): function main (line 80) | def main(args): FILE: benchmark/prefill_only/bench_embeddings.py function build_embeddings_request (line 81) | def build_embeddings_request(index: int, item_count: int) -> tuple: function validate_embeddings_response (line 100) | def validate_embeddings_response(response_data: dict) -> bool: function build_warmup_embeddings_request (line 110) | def build_warmup_embeddings_request() -> dict: function run_benchmark (line 122) | async def run_benchmark(rps, duration_secs, item_count): function main (line 137) | async def main(): FILE: benchmark/prefill_only/bench_score.py function create_score_request_builder (line 65) | def create_score_request_builder(): function validate_score_response (line 115) | def validate_score_response(response_data: dict) -> bool: function build_warmup_score_request (line 120) | def build_warmup_score_request() -> dict: function run_benchmark (line 155) | async def run_benchmark(rps, duration_secs, item_count): function main (line 173) | async def main(): FILE: benchmark/prefill_only/util.py class BenchmarkConfig (line 22) | class BenchmarkConfig: method __init__ (line 25) | def __init__(self): function generate_text_with_token_count (line 46) | def generate_text_with_token_count( function setup_profiler (line 87) | def setup_profiler(config: BenchmarkConfig, benchmark_name: str) -> None: function prepare_all_requests_parallel (line 106) | def prepare_all_requests_parallel( function sleep_with_distribution (line 187) | async def sleep_with_distribution(distribution: str, rps: float) -> None: function build_http_request_json (line 208) | def build_http_request_json(request_data: Any) -> str: function make_http_call (line 221) | async def make_http_call( function send_profile_request (line 284) | async def send_profile_request( function call_freeze_gc_http (line 331) | async def call_freeze_gc_http(session: aiohttp.ClientSession, http_url: ... function send_warmup_requests (line 359) | async def send_warmup_requests( function perform_global_warmup_and_freeze (line 398) | async def perform_global_warmup_and_freeze( function process_results (line 430) | async def process_results( function print_csv_results (line 573) | def print_csv_results(all_results: List[Dict[str, Any]]) -> None: function run_benchmark_main (line 628) | async def run_benchmark_main( function run_generic_benchmark (line 696) | async def run_generic_benchmark( FILE: benchmark/react/bench_other.py function get_prompt (line 12) | def get_prompt(question): function main (line 86) | def main(args): FILE: benchmark/react/bench_sglang.py function webthink (line 14) | def webthink(s, question, triplets): function main (line 109) | def main(args): FILE: benchmark/reasoning_benchmark/answer_extraction.py function _fix_fracs (line 8) | def _fix_fracs(string): function _fix_a_slash_b (line 40) | def _fix_a_slash_b(string): function _fix_sqrt (line 57) | def _fix_sqrt(string): function _fix_tan (line 63) | def _fix_tan(string): function strip_string (line 69) | def strip_string(string): function extract_boxed_answers (line 182) | def extract_boxed_answers(text): function extract_program_output (line 200) | def extract_program_output(pred_str): function extract_answer (line 214) | def extract_answer(pred_str, exhaust=False): function extract_math_answer (line 253) | def extract_math_answer(question, reasoning, task): FILE: benchmark/reasoning_benchmark/bench_sglang.py function reasoning_gen (line 19) | def reasoning_gen(s, question: str): function convert_dataset (line 31) | def convert_dataset(path: str, question_key: str, answer_key: str, num_t... function main (line 44) | def main(args): FILE: benchmark/reasoning_benchmark/eval_utils.py function parse_digits (line 11) | def parse_digits(num): function is_digit (line 28) | def is_digit(num): function symbolic_equal (line 33) | def symbolic_equal(a, b): function math_equal (line 59) | def math_equal(prediction, reference, include_percentage=True, is_close=... FILE: benchmark/tip_suggestion/bench_other.py function expand_tip (line 15) | def expand_tip(topic, tip, generate): function suggest_tips (line 34) | def suggest_tips(topic, generate): function main (line 57) | def main(args): FILE: benchmark/tip_suggestion/bench_sglang.py function expand_tip (line 16) | def expand_tip(s, topic, tip): function suggest_tips (line 36) | def suggest_tips(s, topic): function main (line 54) | def main(args): FILE: benchmark/tip_suggestion/lmql_funcs.py function expand_tip_async (line 4) | async def expand_tip_async(topic, tip, generate): function suggest_tips_async (line 23) | async def suggest_tips_async(topic, generate): FILE: benchmark/tree_of_thought_deep/bench_other.py function get_answer_value (line 18) | def get_answer_value(answer_str): function most_frequent_number (line 29) | def most_frequent_number(numbers): function propose_plan (line 47) | def propose_plan(s, question, num_branches, call_generate): function execute_plan (line 62) | def execute_plan(s, num_branches, call_generate): function reflect_solution (line 75) | def reflect_solution(s, num_branches, call_generate): function get_final_answer (line 88) | def get_final_answer(s, num_branches, call_generate): function tree_search (line 101) | def tree_search(question, num_branches, call_generate): function main (line 122) | def main(args): FILE: benchmark/tree_of_thought_deep/bench_sglang.py function get_answer_value (line 20) | def get_answer_value(answer_str): function most_frequent_number (line 31) | def most_frequent_number(numbers): function propose_plan (line 44) | def propose_plan(s, question, num_branches): function execute_plan (line 54) | def execute_plan(s, num_branches): function reflect_solution (line 63) | def reflect_solution(s, num_branches): function get_final_answer (line 72) | def get_final_answer(s, num_branches): function tree_search (line 82) | def tree_search(s, question, num_branches): function main (line 104) | def main(args): FILE: benchmark/tree_of_thought_deep/lmql_funcs.py function propose_plan_async (line 10) | async def propose_plan_async(s, question, num_branches, call_generate): function execute_plan_async (line 25) | async def execute_plan_async(s, num_branches, call_generate): function reflect_solution_async (line 38) | async def reflect_solution_async(s, num_branches, call_generate): function get_final_answer_async (line 51) | async def get_final_answer_async(s, num_branches, call_generate): function tree_search_async (line 64) | async def tree_search_async(question, num_branches, call_generate): FILE: benchmark/tree_of_thought_v0/bench_other.py function get_answer_value (line 18) | def get_answer_value(answer_str): function most_frequent_number (line 29) | def most_frequent_number(numbers): function propose_plan (line 47) | def propose_plan(s, question, num_branches, call_generate): function execute_plan (line 62) | def execute_plan(s, num_branches, call_generate): function reflect_solution (line 75) | def reflect_solution(s, num_branches, call_generate): function tree_search (line 88) | def tree_search(question, num_branches, call_generate): function main (line 102) | def main(args): FILE: benchmark/tree_of_thought_v0/bench_sglang.py function get_answer_value (line 20) | def get_answer_value(answer_str): function most_frequent_number (line 31) | def most_frequent_number(numbers): function propose_plan (line 44) | def propose_plan(s, question, num_branches): function execute_plan (line 54) | def execute_plan(s, num_branches): function reflect_solution (line 63) | def reflect_solution(s, num_branches): function tree_search (line 73) | def tree_search(s, question, num_branches): function main (line 93) | def main(args): FILE: docs/conf.py function setup (line 136) | def setup(app): FILE: docs/deploy.py function run_cmd (line 7) | def run_cmd(cmd): FILE: docs/performance_dashboard/app.js constant GITHUB_REPO (line 3) | const GITHUB_REPO = 'sgl-project/sglang'; constant WORKFLOW_NAME (line 4) | const WORKFLOW_NAME = 'nightly-test-nvidia.yml'; constant ARTIFACT_PREFIX (line 5) | const ARTIFACT_PREFIX = 'consolidated-metrics-'; function init (line 40) | async function init() { function loadData (line 58) | async function loadData() { function fetchWorkflowRuns (line 93) | async function fetchWorkflowRuns() { function fetchMetricsForRun (line 112) | async function fetchMetricsForRun(run) { function isDiffusionResult (line 151) | function isDiffusionResult(result) { function populateFilters (line 156) | function populateFilters() { function formatIoLenLabel (line 227) | function formatIoLenLabel(ioKey) { function sortIoLengths (line 237) | function sortIoLengths(ioLengths) { function populateSelectWithLabels (line 247) | function populateSelectWithLabels(selectId, options, labelFormatter) { function updateIoLenFilter (line 258) | function updateIoLenFilter() { function updateVariantFilter (line 308) | function updateVariantFilter() { function populateSelect (line 344) | function populateSelect(selectId, options) { function populateSelectNoAll (line 354) | function populateSelectNoAll(selectId, options) { function createMetricTabs (line 368) | function createMetricTabs() { function detectCurrentDataType (line 396) | function detectCurrentDataType() { function selectMetricTab (line 416) | function selectMetricTab(metricKey, tabElement) { function handleModelFilterChange (line 429) | function handleModelFilterChange(model) { function handleGpuFilterChange (line 441) | function handleGpuFilterChange() { function updateStats (line 452) | function updateStats() { function updateCharts (line 498) | function updateCharts() { function prepareChartData (line 512) | function prepareChartData(gpuFilter, modelFilter, variantFilter, ioLenFi... function prepareChartDataByBatch (line 581) | function prepareChartDataByBatch(gpuFilter, modelFilter, variantFilter, ... function updateMetricChart (line 741) | function updateMetricChart(chartDataByBatch, metricType) { function getChartOptions (line 826) | function getChartOptions(yAxisLabel) { function escapeHtml (line 880) | function escapeHtml(text) { function updateRunsTable (line 887) | function updateRunsTable() { function refreshData (line 937) | async function refreshData() { function formatNumber (line 944) | function formatNumber(num) { function getAuthHeaders (line 955) | function getAuthHeaders() { function checkAuthAndInit (line 964) | async function checkAuthAndInit() { function handleLogin (line 1014) | async function handleLogin(event) { FILE: docs/performance_dashboard/fetch_metrics.py function get_github_token (line 31) | def get_github_token() -> Optional[str]: function get_headers (line 55) | def get_headers(token: Optional[str]) -> dict: function fetch_workflow_runs (line 65) | def fetch_workflow_runs( function fetch_run_artifacts (line 97) | def fetch_run_artifacts(token: Optional[str], run_id: int) -> list: function download_artifact (line 107) | def download_artifact(token: Optional[str], artifact_id: int) -> Optiona... function extract_metrics_from_zip (line 128) | def extract_metrics_from_zip(zip_content: bytes) -> Optional[dict]: function fetch_metrics_for_run (line 144) | def fetch_metrics_for_run(token: Optional[str], run: dict) -> Optional[d... function fetch_single_run (line 184) | def fetch_single_run(token: Optional[str], run_id: int) -> Optional[dict]: function main (line 195) | def main(): FILE: docs/performance_dashboard/server.py function hash_password (line 64) | def hash_password(password): function create_auth_token (line 69) | def create_auth_token(): function verify_auth_token (line 82) | def verify_auth_token(token): function get_github_token (line 95) | def get_github_token(): function fetch_metrics_from_github (line 117) | def fetch_metrics_from_github(days=30): function update_cache_async (line 219) | def update_cache_async(): function start_periodic_refresh (line 237) | def start_periodic_refresh(interval_hours): class DashboardHandler (line 252) | class DashboardHandler(http.server.SimpleHTTPRequestHandler): method __init__ (line 255) | def __init__(self, *args, directory=None, **kwargs): method _send_json (line 258) | def _send_json(self, data, status=200): method _check_auth (line 266) | def _check_auth(self): method do_GET (line 278) | def do_GET(self): method do_POST (line 297) | def do_POST(self): method handle_auth_check (line 305) | def handle_auth_check(self): method handle_login (line 309) | def handle_login(self): method handle_metrics_api (line 335) | def handle_metrics_api(self, parsed): method handle_refresh_api (line 351) | def handle_refresh_api(self): method log_message (line 356) | def log_message(self, format, *args): function main (line 361) | def main(): FILE: docs/release_lookup/generate_index.py function run_git (line 14) | def run_git(cmd): function is_stable_release (line 23) | def is_stable_release(tag_name): function get_tags (line 31) | def get_tags(): function extract_pr_num (line 60) | def extract_pr_num(message): function process_tag_line (line 75) | def process_tag_line(tags, commit_map, pr_map, tag_type, tag_to_idx): function main (line 129) | def main(): FILE: docs/wrap_run_llm.py function insert_runllm_widget (line 5) | def insert_runllm_widget(html_content): function process_html_files (line 16) | def process_html_files(build_dir): function main (line 34) | def main(): FILE: examples/checkpoint_engine/update.py function timer (line 29) | def timer(msg: str): function check_sglang_ready (line 36) | def check_sglang_ready( function split_checkpoint_files (line 60) | def split_checkpoint_files( function split_tensors (line 73) | def split_tensors( function req_inference (line 94) | def req_inference( function update_weights (line 123) | def update_weights( function join (line 161) | def join( FILE: examples/frontend_language/quick_start/anthropic_example_chat.py function multi_turn_question (line 11) | def multi_turn_question(s, question_1, question_2): function single (line 18) | def single(): function stream (line 30) | def stream(): function batch (line 42) | def batch(): FILE: examples/frontend_language/quick_start/anthropic_example_complete.py function few_shot_qa (line 11) | def few_shot_qa(s, question): function single (line 24) | def single(): function stream (line 33) | def stream(): function batch (line 43) | def batch(): FILE: examples/frontend_language/quick_start/azure_openai_example_chat.py function multi_turn_question (line 13) | def multi_turn_question(s, question_1, question_2): function single (line 21) | def single(): function stream (line 33) | def stream(): function batch (line 45) | def batch(): FILE: examples/frontend_language/quick_start/gemini_example_chat.py function multi_turn_question (line 11) | def multi_turn_question(s, question_1, question_2): function single (line 18) | def single(): function stream (line 30) | def stream(): function batch (line 42) | def batch(): FILE: examples/frontend_language/quick_start/gemini_example_complete.py function few_shot_qa (line 11) | def few_shot_qa(s, question): function single (line 24) | def single(): function stream (line 33) | def stream(): function batch (line 43) | def batch(): FILE: examples/frontend_language/quick_start/gemini_example_multimodal_chat.py function image_qa (line 11) | def image_qa(s, image_file1, image_file2, question): FILE: examples/frontend_language/quick_start/local_example_chat.py function multi_turn_question (line 10) | def multi_turn_question(s, question_1, question_2): function single (line 17) | def single(): function stream (line 29) | def stream(): function batch (line 41) | def batch(): FILE: examples/frontend_language/quick_start/local_example_complete.py function few_shot_qa (line 10) | def few_shot_qa(s, question): function single (line 23) | def single(): function stream (line 32) | def stream(): function batch (line 42) | def batch(): FILE: examples/frontend_language/quick_start/local_example_llava_next.py function image_qa (line 10) | def image_qa(s, image_path, question): function single (line 15) | def single(): function stream (line 22) | def stream(): function batch (line 35) | def batch(): FILE: examples/frontend_language/quick_start/openai_example_chat.py function multi_turn_question (line 11) | def multi_turn_question(s, question_1, question_2): function single (line 19) | def single(): function stream (line 31) | def stream(): function batch (line 43) | def batch(): FILE: examples/frontend_language/quick_start/openai_example_complete.py function few_shot_qa (line 11) | def few_shot_qa(s, question): function single (line 24) | def single(): function stream (line 33) | def stream(): function batch (line 43) | def batch(): FILE: examples/frontend_language/quick_start/openai_example_n.py function multi_turn_question (line 11) | def multi_turn_question(s, question_1, question_2): function single (line 24) | def single(): function batch (line 40) | def batch(): FILE: examples/frontend_language/quick_start/openai_example_o1.py function multi_turn_question (line 11) | def multi_turn_question(s, question_1, question_2): function single (line 19) | def single(): function batch (line 31) | def batch(): FILE: examples/frontend_language/quick_start/openrouter_example_chat.py function multi_turn_question (line 13) | def multi_turn_question(s, question_1, question_2): function single (line 21) | def single(): function stream (line 33) | def stream(): function batch (line 45) | def batch(): FILE: examples/frontend_language/quick_start/together_example_chat.py function multi_turn_question (line 13) | def multi_turn_question(s, question_1, question_2): function single (line 21) | def single(): function stream (line 33) | def stream(): function batch (line 45) | def batch(): FILE: examples/frontend_language/quick_start/together_example_complete.py function few_shot_qa (line 13) | def few_shot_qa(s, question): function single (line 26) | def single(): function stream (line 35) | def stream(): function batch (line 45) | def batch(): FILE: examples/frontend_language/usage/chinese_regex.py function character_gen (line 22) | def character_gen(s, name): function main (line 45) | def main(): FILE: examples/frontend_language/usage/choices_logprob.py function tool_use (line 11) | def tool_use(s, question): function main (line 16) | def main(): FILE: examples/frontend_language/usage/cot_decoding.py function cot_decoding (line 13) | def cot_decoding(s, question, get_top_k, is_chat_model, verbose): FILE: examples/frontend_language/usage/json_decode.py function character_gen (line 33) | def character_gen(s, name): function driver_character_gen (line 44) | def driver_character_gen(): class Weapon (line 49) | class Weapon(str, Enum): class Wizard (line 58) | class Wizard(BaseModel): function pydantic_wizard_gen (line 65) | def pydantic_wizard_gen(s): function driver_pydantic_wizard_gen (line 75) | def driver_pydantic_wizard_gen(): FILE: examples/frontend_language/usage/json_logprobs.py function openai_api_request (line 15) | def openai_api_request(name): function srt_api_request (line 38) | def srt_api_request(name): function pretty_print (line 70) | def pretty_print(res): FILE: examples/frontend_language/usage/llava_video/srt_example_llava_v.py function video_qa (line 20) | def video_qa(s, num_frames, video_path, question): function single (line 25) | def single(path, num_frames=16): function split_into_chunks (line 36) | def split_into_chunks(lst, num_chunks): function save_batch_results (line 50) | def save_batch_results(batch_video_files, states, cur_chunk, batch_idx, ... function compile_and_cleanup_final_results (line 60) | def compile_and_cleanup_final_results(cur_chunk, num_batches, save_dir): function find_video_files (line 75) | def find_video_files(video_dir): function batch (line 90) | def batch(video_dir, save_dir, cur_chunk, num_chunks, num_frames=16, bat... FILE: examples/frontend_language/usage/openai_chat_speculative.py function gen_character_spec (line 24) | def gen_character_spec(s): function gen_character_spec_no_few_shot (line 42) | def gen_character_spec_no_few_shot(s): function gen_character_normal (line 55) | def gen_character_normal(s): function multi_turn_question (line 62) | def multi_turn_question(s, question_1, question_2): function test_spec_single_turn (line 80) | def test_spec_single_turn(): function test_inaccurate_spec_single_turn (line 93) | def test_inaccurate_spec_single_turn(): function test_normal_single_turn (line 103) | def test_normal_single_turn(): function test_spec_multi_turn (line 109) | def test_spec_multi_turn(): function test_spec_multi_turn_stream (line 122) | def test_spec_multi_turn_stream(): FILE: examples/frontend_language/usage/openai_speculative.py function gen_character_spec (line 10) | def gen_character_spec(s): function gen_character_no_spec (line 19) | def gen_character_no_spec(s): function gen_character_spec_no_few_shot (line 28) | def gen_character_spec_no_few_shot(s): FILE: examples/frontend_language/usage/parallel_sample.py function parallel_sample (line 10) | def parallel_sample(s, question, n): FILE: examples/frontend_language/usage/readme_examples.py function tool_use (line 11) | def tool_use(s, question): function tip_suggestion (line 26) | def tip_suggestion(s): function regular_expression_gen (line 43) | def regular_expression_gen(s): function text_qa (line 53) | def text_qa(s, question): function driver_tool_use (line 58) | def driver_tool_use(): function driver_tip_suggestion (line 64) | def driver_tip_suggestion(): function driver_regex (line 70) | def driver_regex(): function driver_batching (line 76) | def driver_batching(): function driver_stream (line 91) | def driver_stream(): FILE: examples/frontend_language/usage/sgl_gen_min_tokens.py function long_answer (line 12) | def long_answer(s): function short_answer (line 18) | def short_answer(s): FILE: examples/frontend_language/usage/streaming.py function multi_turn_question (line 12) | def multi_turn_question(s, question_1, question_2): function stream_a_variable (line 23) | def stream_a_variable(): function async_stream (line 35) | async def async_stream(): FILE: examples/frontend_language/usage/triton/models/character_generation/1/model.py class Character (line 12) | class Character(BaseModel): function character_gen (line 19) | def character_gen(s, name): class TritonPythonModel (line 29) | class TritonPythonModel: method initialize (line 30) | def initialize(self, args): method execute (line 33) | def execute(self, requests): FILE: examples/profiler/nsys_profile_tools/gputrc2graph.py function load_engine_model (line 18) | def load_engine_model(): class GPUTrace2Graph (line 34) | class GPUTrace2Graph: method __init__ (line 39) | def __init__(self): method gen_nonoverlapped_sum_from_gputrace (line 45) | def gen_nonoverlapped_sum_from_gputrace(self, in_file, out_file): method sum_non_overlapping_intervals (line 66) | def sum_non_overlapping_intervals(self, df): method make_html (line 107) | def make_html(self, df, output_dir, title): method anno_gpu_kernname (line 165) | def anno_gpu_kernname(self, df, mapping): method make_nongpu_row (line 175) | def make_nongpu_row(self, df, nongpu_sec): method is_valid_file (line 183) | def is_valid_file(self, base_file): method should_gen_file (line 189) | def should_gen_file(self, new_file, base_file): method gen_sum_file (line 203) | def gen_sum_file(self, file, nsys_cmd): method gen_graph (line 250) | def gen_graph(self, in_file, out_dir, title, nsys_cmd, engine_model): function parse_tuple (line 290) | def parse_tuple(s): function main (line 294) | def main(): FILE: examples/runtime/engine/custom_server.py function generate (line 14) | async def generate(request): function generate_stream (line 26) | async def generate_stream(request): function run_server (line 46) | def run_server(): FILE: examples/runtime/engine/embedding.py function main (line 4) | def main(): FILE: examples/runtime/engine/fastapi_engine_inference.py function lifespan (line 26) | async def lifespan(app: FastAPI): function generate_text (line 47) | async def generate_text(request: Request): function start_server (line 78) | def start_server(args, timeout=60): function send_requests (line 129) | def send_requests(server_url, prompts, max_new_tokens, temperature): FILE: examples/runtime/engine/launch_engine.py function main (line 8) | def main(): FILE: examples/runtime/engine/offline_batch_inference.py function main (line 13) | def main( FILE: examples/runtime/engine/offline_batch_inference_async.py class InferenceEngine (line 19) | class InferenceEngine: method __init__ (line 20) | def __init__(self, **kwargs): method generate (line 23) | async def generate(self, prompt, sampling_params): function run_server (line 28) | async def run_server(server_args): FILE: examples/runtime/engine/offline_batch_inference_eagle.py function main (line 4) | def main(): FILE: examples/runtime/engine/offline_batch_inference_qwen_1m.py function load_prompt (line 11) | def load_prompt() -> str: function process_requests (line 29) | def process_requests(llm: sgl.Engine, prompts: list[str]) -> None: function initialize_engine (line 50) | def initialize_engine() -> sgl.Engine: function main (line 67) | def main(): FILE: examples/runtime/engine/offline_batch_inference_vlm.py function main (line 14) | def main( FILE: examples/runtime/engine/save_remote_state.py function main (line 45) | def main(args): FILE: examples/runtime/engine/save_sharded_state.py function main (line 50) | def main(args): FILE: examples/runtime/hidden_states/hidden_states_engine.py function main (line 15) | def main(): FILE: examples/runtime/hidden_states/hidden_states_server.py function main (line 23) | def main(): FILE: examples/runtime/lora.py function main (line 16) | def main(): FILE: examples/runtime/multimodal/llama3_llava_server.py function send_request (line 27) | async def send_request(url, data, delay=0): function test_concurrent (line 35) | async def test_concurrent(args): function test_streaming (line 68) | def test_streaming(args): FILE: examples/runtime/multimodal/llava_onevision_server.py function download_video (line 27) | def download_video(url, cache_dir): function create_openai_client (line 41) | def create_openai_client(base_url): function image_stream_request_test (line 45) | def image_stream_request_test(client): function multi_image_stream_request_test (line 82) | def multi_image_stream_request_test(client): function video_stream_request_test (line 129) | def video_stream_request_test(client, video_path): function image_speed_test (line 152) | def image_speed_test(client): function video_speed_test (line 184) | def video_speed_test(client, video_path): function prepare_video_messages (line 202) | def prepare_video_messages(video_path): function print_speed_test_results (line 236) | def print_speed_test_results(request, start_time, end_time): function main (line 250) | def main(): FILE: examples/runtime/multimodal/pixtral_server.py function send_request (line 28) | async def send_request(url, data, delay=0): function test_concurrent (line 36) | async def test_concurrent(args): function test_streaming (line 73) | def test_streaming(args): FILE: examples/runtime/multimodal/qwen_llava_server.py function send_request (line 27) | async def send_request(url, data, delay=0): function test_concurrent (line 35) | async def test_concurrent(args): function test_streaming (line 68) | def test_streaming(args): FILE: examples/runtime/qwen3_vl_reranker.py function rerank_text_only (line 25) | def rerank_text_only(): function rerank_with_images (line 51) | def rerank_with_images(): function rerank_multimodal_query (line 106) | def rerank_multimodal_query(): function main (line 156) | def main(): FILE: examples/runtime/token_in_token_out/token_in_token_out_llm_engine.py function main (line 11) | def main(): FILE: examples/runtime/token_in_token_out/token_in_token_out_llm_server.py function main (line 23) | def main(): FILE: examples/runtime/token_in_token_out/token_in_token_out_vlm_engine.py function get_input_ids (line 14) | def get_input_ids( function token_in_out_example (line 37) | def token_in_out_example( FILE: examples/runtime/token_in_token_out/token_in_token_out_vlm_server.py function get_input_ids (line 26) | def get_input_ids() -> Tuple[list[int], list]: function main (line 45) | def main(): FILE: examples/runtime/vertex_predict.py class VertexPrediction (line 33) | class VertexPrediction: class LocalVertexEndpoint (line 37) | class LocalVertexEndpoint: method __init__ (line 38) | def __init__(self) -> None: method predict (line 41) | def predict(self, instances: List[dict], parameters: Optional[dict] = ... FILE: examples/usage/modelopt_quantize_and_export.py function _validate_export (line 26) | def _validate_export(export_dir: str) -> bool: function _get_export_info (line 56) | def _get_export_info(export_dir: str) -> Optional[dict]: function quantize_and_export_model (line 78) | def quantize_and_export_model( function deploy_exported_model (line 175) | def deploy_exported_model( function main (line 224) | def main(): FILE: python/sglang/_mps_stub.py class Stream (line 17) | class Stream: method __init__ (line 24) | def __init__(self, device: Any = None, priority: int = 0) -> None: method synchronize (line 27) | def synchronize(self) -> None: method wait_stream (line 30) | def wait_stream(self, stream: Any) -> None: method wait_event (line 33) | def wait_event(self, event: Any) -> None: method record_event (line 36) | def record_event(self, event: Any = None) -> Any: method query (line 39) | def query(self) -> bool: method __enter__ (line 43) | def __enter__(self) -> "Stream": method __exit__ (line 46) | def __exit__(self, *args: Any) -> None: class Event (line 50) | class Event: method __init__ (line 53) | def __init__(self, enable_timing: bool = False) -> None: method record (line 56) | def record(self, stream: Any = None) -> None: method wait (line 59) | def wait(self, stream: Any = None) -> None: method query (line 62) | def query(self) -> bool: method synchronize (line 65) | def synchronize(self) -> None: method elapsed_time (line 68) | def elapsed_time(self, end_event: Any) -> float: function current_stream (line 75) | def current_stream(device: Any = None) -> Stream: function stream (line 80) | def stream(s: Any) -> Stream: function set_device (line 85) | def set_device(device: Any) -> None: # noqa: ARG001 function current_device (line 90) | def current_device() -> int: function device_count (line 95) | def device_count() -> int: class _MPSDeviceProperties (line 101) | class _MPSDeviceProperties: method __getattr__ (line 114) | def __getattr__(self, name: str) -> Any: function get_device_properties (line 125) | def get_device_properties(device: Any = 0) -> _MPSDeviceProperties: # n... class _MPSMemoryTracker (line 137) | class _MPSMemoryTracker: method __init__ (line 145) | def __init__(self) -> None: method memory_allocated (line 149) | def memory_allocated(self, device: Any = None) -> int: # noqa: ARG002 method memory_reserved (line 157) | def memory_reserved(self, device: Any = None) -> int: # noqa: ARG002 method max_memory_allocated (line 165) | def max_memory_allocated(self, device: Any = None) -> int: # noqa: AR... method max_memory_reserved (line 169) | def max_memory_reserved(self, device: Any = None) -> int: # noqa: ARG002 method reset_peak_memory_stats (line 173) | def reset_peak_memory_stats(self, device: Any = None) -> None: # noqa... function _patch_non_blocking (line 183) | def _patch_non_blocking() -> None: function install (line 226) | def install() -> None: FILE: python/sglang/_triton_stub.py class _StubBase (line 19) | class _StubBase: method __init_subclass__ (line 25) | def __init_subclass__(cls, **kwargs): class _MockModule (line 29) | class _MockModule(types.ModuleType): method __init__ (line 37) | def __init__(self, name: str): method __getattr__ (line 48) | def __getattr__(self, name: str): method __call__ (line 66) | def __call__(self, *args, **kwargs): method __instancecheck__ (line 77) | def __instancecheck__(self, instance): method __contains__ (line 81) | def __contains__(self, item): method __iter__ (line 85) | def __iter__(self): method __len__ (line 88) | def __len__(self): method __bool__ (line 91) | def __bool__(self): method __repr__ (line 94) | def __repr__(self): function _cdiv (line 98) | def _cdiv(a: int, b: int) -> int: function _next_power_of_2 (line 103) | def _next_power_of_2(n: int) -> int: class _Config (line 108) | class _Config: method __init__ (line 111) | def __init__(self, kwargs=None, num_warps=4, num_stages=2, **extra): class _TritonFinder (line 117) | class _TritonFinder: method find_module (line 128) | def find_module(self, fullname, path=None): method load_module (line 133) | def load_module(self, fullname): function _make_mock (line 148) | def _make_mock(name: str) -> _MockModule: function install (line 155) | def install() -> None: FILE: python/sglang/bench_offline_throughput.py class BenchArgs (line 35) | class BenchArgs: method add_cli_args (line 63) | def add_cli_args(parser: argparse.ArgumentParser): method from_cli_args (line 201) | def from_cli_args(cls, args: argparse.Namespace): function throughput_test_once (line 206) | def throughput_test_once( function monitor_trace_file (line 294) | def monitor_trace_file(known_files, directory, interval=1): function _create_ray_engine_backend (line 326) | def _create_ray_engine_backend(server_args: ServerArgs): function throughput_test (line 394) | def throughput_test( FILE: python/sglang/bench_one_batch.py function start_profile (line 93) | def start_profile(profile_activities, profile_record_shapes=False, rank_... function stop_profile (line 124) | def stop_profile( class BenchArgs (line 158) | class BenchArgs: method add_cli_args (line 178) | def add_cli_args(parser: argparse.ArgumentParser): method from_cli_args (line 245) | def from_cli_args(cls, args: argparse.Namespace): function load_model (line 259) | def load_model(server_args, port_args, gpu_id, tp_rank): function prepare_inputs_for_correctness_test (line 289) | def prepare_inputs_for_correctness_test(bench_args, tokenizer, custom_pr... function prepare_extend_inputs_for_correctness_test (line 334) | def prepare_extend_inputs_for_correctness_test( function prepare_synthetic_inputs_for_latency_test (line 348) | def prepare_synthetic_inputs_for_latency_test( class TreeCacheNamespace (line 377) | class TreeCacheNamespace(SimpleNamespace): method supports_swa (line 378) | def supports_swa(self) -> bool: method supports_mamba (line 381) | def supports_mamba(self) -> bool: method is_chunk_cache (line 384) | def is_chunk_cache(self) -> bool: method is_tree_cache (line 387) | def is_tree_cache(self) -> bool: method evict (line 390) | def evict(self, params: EvictParams): function extend (line 395) | def extend(reqs, model_runner): function decode (line 422) | def decode(input_token_ids, batch, model_runner): function _maybe_prepare_mlp_sync_batch (line 433) | def _maybe_prepare_mlp_sync_batch(batch: ScheduleBatch, model_runner): function _read_prompts_from_file (line 448) | def _read_prompts_from_file(prompt_file, rank_print): function _get_torch_profiler_output_dir (line 461) | def _get_torch_profiler_output_dir(): function _create_torch_profiler_filename (line 465) | def _create_torch_profiler_filename( function _save_profile_trace_results (line 473) | def _save_profile_trace_results(profiler, filename): function correctness_test (line 484) | def correctness_test( function synchronize (line 533) | def synchronize(device): function latency_test_run_once (line 537) | def latency_test_run_once( function latency_test (line 676) | def latency_test( function main (line 791) | def main(server_args, bench_args): FILE: python/sglang/bench_one_batch_server.py function run_benchmark (line 25) | def run_benchmark(server_args: ServerArgs, bench_args: BenchArgs): FILE: python/sglang/bench_serving.py function _get_bool_env_var (line 60) | def _get_bool_env_var(name: str, default: str = "false") -> bool: function _create_bench_client_session (line 65) | def _create_bench_client_session(): class RequestFuncInput (line 79) | class RequestFuncInput: class RequestFuncOutput (line 93) | class RequestFuncOutput: method init_new (line 106) | def init_new(request_func_input: RequestFuncInput): function get_auth_headers (line 112) | def get_auth_headers() -> Dict[str, str]: function get_request_headers (line 123) | def get_request_headers() -> Dict[str, str]: function wait_for_endpoint (line 130) | def wait_for_endpoint(url: str, timeout_sec: int = 60) -> bool: function async_request_trt_llm (line 153) | async def async_request_trt_llm( function async_request_openai_completions (line 224) | async def async_request_openai_completions( function async_request_openai_chat_completions (line 338) | async def async_request_openai_chat_completions( function async_request_truss (line 518) | async def async_request_truss( function async_request_sglang_generate (line 598) | async def async_request_sglang_generate( function async_request_openai_embeddings (line 704) | async def async_request_openai_embeddings( function async_request_gserver (line 754) | async def async_request_gserver( function async_request_profile (line 761) | async def async_request_profile(api_url: str) -> RequestFuncOutput: function _build_profile_urls (line 816) | def _build_profile_urls( function _call_profile_pd (line 835) | async def _call_profile_pd(profile_urls: List[Tuple[str, str]], mode: st... class BenchmarkMetrics (line 875) | class BenchmarkMetrics: function get_request (line 912) | async def get_request( function calculate_metrics (line 952) | def calculate_metrics( function wrap_multi_turn_request_func (line 1130) | def wrap_multi_turn_request_func(request_func: Callable, backend: str) -... function benchmark (line 1163) | async def benchmark( function check_chat_template (line 1638) | def check_chat_template(model_path): function set_global_args (line 1647) | def set_global_args(args_: argparse.Namespace): function run_benchmark (line 1653) | def run_benchmark(args_: argparse.Namespace): class LoRAPathAction (line 1890) | class LoRAPathAction(argparse.Action): method __call__ (line 1891) | def __call__(self, parser, namespace, values, option_string=None): FILE: python/sglang/benchmark/bench_utils.py function run_bench (line 7) | def run_bench( FILE: python/sglang/benchmark/datasets/__init__.py function get_dataset (line 30) | def get_dataset(args, tokenizer, model_id=None): FILE: python/sglang/benchmark/datasets/common.py class DatasetRow (line 22) | class DatasetRow: method __post_init__ (line 33) | def __post_init__(self): class BaseDataset (line 43) | class BaseDataset(ABC): method from_args (line 46) | def from_args(cls, args: Namespace) -> "BaseDataset": ... method load (line 49) | def load( function compute_random_lens (line 56) | def compute_random_lens(full_len: int, range_ratio: float, num: int) -> ... function get_available_tokens (line 68) | def get_available_tokens(tokenizer): function gen_prompt (line 73) | def gen_prompt(tokenizer, token_num): function gen_mm_prompt (line 80) | def gen_mm_prompt(tokenizer, image_pad_id, token_num): FILE: python/sglang/benchmark/datasets/custom.py class CustomDataset (line 20) | class CustomDataset(BaseDataset): method from_args (line 29) | def from_args(cls, args: Namespace) -> "CustomDataset": method load (line 40) | def load( function sample_custom_requests (line 54) | def sample_custom_requests( FILE: python/sglang/benchmark/datasets/generated_shared_prefix.py class GeneratedSharedPrefixDataset (line 23) | class GeneratedSharedPrefixDataset(BaseDataset): method from_args (line 37) | def from_args(cls, args: Namespace) -> "GeneratedSharedPrefixDataset": method load (line 53) | def load( function get_gen_prefix_cache_path (line 72) | def get_gen_prefix_cache_path( function sample_generated_shared_prefix_requests (line 92) | def sample_generated_shared_prefix_requests( FILE: python/sglang/benchmark/datasets/image.py class ImageDataset (line 22) | class ImageDataset(BaseDataset): method from_args (line 35) | def from_args(cls, args: Namespace) -> "ImageDataset": method load (line 49) | def load(self, tokenizer=None, model_id=None) -> List[DatasetRow]: function parse_image_resolution (line 66) | def parse_image_resolution(image_resolution: str) -> Tuple[int, int]: function create_mm_data_row (line 96) | def create_mm_data_row( function sample_image_requests (line 170) | def sample_image_requests( FILE: python/sglang/benchmark/datasets/mmmu.py class MMMUDataset (line 17) | class MMMUDataset(BaseDataset): method from_args (line 23) | def from_args(cls, args: Namespace) -> "MMMUDataset": method load (line 30) | def load(self, tokenizer=None, model_id=None) -> List[DatasetRow]: function sample_mmmu_requests (line 40) | def sample_mmmu_requests( FILE: python/sglang/benchmark/datasets/mooncake.py class MooncakeDataset (line 20) | class MooncakeDataset(BaseDataset): method from_args (line 26) | def from_args(cls, args: Namespace) -> "MooncakeDataset": method load (line 33) | def load(self, tokenizer=None, model_id=None) -> List[Dict]: function get_mooncake_request_over_time (line 50) | async def get_mooncake_request_over_time( FILE: python/sglang/benchmark/datasets/openai_dataset.py class OpenAIDataset (line 13) | class OpenAIDataset(BaseDataset): method from_args (line 19) | def from_args(cls, args: Namespace) -> "OpenAIDataset": method load (line 26) | def load( function sample_openai_requests (line 37) | def sample_openai_requests( FILE: python/sglang/benchmark/datasets/random.py class RandomDataset (line 21) | class RandomDataset(BaseDataset): method from_args (line 31) | def from_args(cls, args: Namespace) -> "RandomDataset": method load (line 42) | def load( function sample_random_requests (line 57) | def sample_random_requests( FILE: python/sglang/benchmark/datasets/sharegpt.py class ShareGPTDataset (line 25) | class ShareGPTDataset(BaseDataset): method from_args (line 34) | def from_args(cls, args: Namespace) -> "ShareGPTDataset": method load (line 45) | def load( function sample_sharegpt_requests (line 59) | def sample_sharegpt_requests( FILE: python/sglang/benchmark/utils.py function remove_prefix (line 17) | def remove_prefix(text: str, prefix: str) -> str: function remove_suffix (line 21) | def remove_suffix(text: str, suffix: str) -> str: function parse_custom_headers (line 25) | def parse_custom_headers(header_list: List[str]) -> Dict[str, str]: function get_model (line 29) | def get_model(pretrained_model_name_or_path: str) -> str: function get_tokenizer (line 44) | def get_tokenizer( function get_processor (line 67) | def get_processor( function download_and_cache_hf_file (line 90) | def download_and_cache_hf_file( function download_and_cache_file (line 101) | def download_and_cache_file(url: str, filename: Optional[str] = None): function is_file_valid_json (line 135) | def is_file_valid_json(path): function set_ulimit (line 151) | def set_ulimit(target_soft_limit=65535): FILE: python/sglang/check_env.py function is_cuda_v2 (line 16) | def is_cuda_v2(): class BaseEnv (line 56) | class BaseEnv: method __init__ (line 59) | def __init__(self): method get_info (line 63) | def get_info(self) -> dict: method get_topology (line 70) | def get_topology(self) -> dict: method get_package_versions (line 73) | def get_package_versions(self) -> dict: method get_device_info (line 87) | def get_device_info(self): method get_hypervisor_vendor (line 113) | def get_hypervisor_vendor(self) -> dict: method get_ulimit_soft (line 123) | def get_ulimit_soft(self) -> dict: method check_env (line 127) | def check_env(self): class GPUEnv (line 144) | class GPUEnv(BaseEnv): method get_info (line 147) | def get_info(self): method _get_cuda_version_info (line 156) | def _get_cuda_version_info(self): method _get_nvcc_info (line 170) | def _get_nvcc_info(self): method _get_cuda_driver_version (line 193) | def _get_cuda_driver_version(self): method get_topology (line 214) | def get_topology(self): class HIPEnv (line 235) | class HIPEnv(BaseEnv): method get_info (line 238) | def get_info(self): method _get_cuda_version_info (line 247) | def _get_cuda_version_info(self): method _get_hipcc_info (line 258) | def _get_hipcc_info(self): method _get_rocm_driver_version (line 276) | def _get_rocm_driver_version(self): method get_topology (line 294) | def get_topology(self): class NPUEnv (line 310) | class NPUEnv(BaseEnv): method __init__ (line 319) | def __init__(self): method get_info (line 323) | def get_info(self): method get_device_info (line 331) | def get_device_info(self): method _get_cann_version_info (line 346) | def _get_cann_version_info(self): method _get_cann_info (line 365) | def _get_cann_info(self, CANN_HOME: str): method _get_ascend_driver_version (line 384) | def _get_ascend_driver_version(self): method get_topology (line 407) | def get_topology(self): class MUSAEnv (line 425) | class MUSAEnv(BaseEnv): method get_info (line 428) | def get_info(self): method _get_musa_version_info (line 437) | def _get_musa_version_info(self): method _get_mcc_info (line 451) | def _get_mcc_info(self): method _get_musa_driver_version (line 472) | def _get_musa_driver_version(self): method get_topology (line 494) | def get_topology(self): FILE: python/sglang/cli/generate.py function generate (line 6) | def generate(args, extra_argv): FILE: python/sglang/cli/main.py function version (line 7) | def version(args, extra_argv): function main (line 12) | def main(): FILE: python/sglang/cli/serve.py function _extract_model_type_override (line 16) | def _extract_model_type_override(extra_argv): function serve (line 49) | def serve(args, extra_argv): FILE: python/sglang/cli/utils.py function _is_diffusers_model_dir (line 12) | def _is_diffusers_model_dir(model_dir: str) -> bool: function get_is_diffusion_model (line 24) | def get_is_diffusion_model(model_path: str) -> bool: function get_model_path (line 65) | def get_model_path(extra_argv): function get_git_commit_hash (line 94) | def get_git_commit_hash() -> str: FILE: python/sglang/compile_deep_gemm.py class CompileArgs (line 40) | class CompileArgs: method add_cli_args (line 44) | def add_cli_args(parser: argparse.ArgumentParser): method from_cli_args (line 48) | def from_cli_args(cls, args: argparse.Namespace): function warm_up_compile (line 57) | async def warm_up_compile( function launch_server_internal (line 76) | def launch_server_internal(server_args): function launch_server_process_and_send_one_request (line 85) | def launch_server_process_and_send_one_request( function refine_server_args (line 145) | def refine_server_args(server_args: ServerArgs, compile_args: CompileArgs): function run_compile (line 156) | def run_compile(server_args: ServerArgs, compile_args: CompileArgs): FILE: python/sglang/eval/llama3_eval.py function fetch_responses (line 39) | async def fetch_responses( class CustomAsyncHTTPXClient (line 79) | class CustomAsyncHTTPXClient(httpx.AsyncClient): method send (line 80) | async def send(self, request: httpx.Request, *args, **kwargs) -> httpx... function get_client (line 87) | def get_client(provider): function benchmark (line 103) | async def benchmark(args): function get_mmlu_answer (line 144) | def get_mmlu_answer(response): function get_mmlu_cot_answer (line 150) | def get_mmlu_cot_answer(response): function get_answer_gsm8k (line 172) | def get_answer_gsm8k(response): function get_dataset_from_task (line 190) | def get_dataset_from_task(task, response_path, model_size): function analyze (line 221) | def analyze(task, response_path, model_size): FILE: python/sglang/eval/loogle_eval.py function get_client (line 15) | def get_client(api_url: str) -> openai.AsyncOpenAI: function get_dataset (line 21) | def get_dataset(): function fetch_response (line 25) | async def fetch_response( function benchmark (line 66) | async def benchmark(args): function analyse (line 98) | def analyse(args): FILE: python/sglang/global_config.py class GlobalConfig (line 6) | class GlobalConfig: method __init__ (line 11) | def __init__(self): FILE: python/sglang/jit_kernel/__main__.py function generate_clangd (line 4) | def generate_clangd(): FILE: python/sglang/jit_kernel/add_constant.py function _jit_add_constant_module (line 14) | def _jit_add_constant_module(constant: int) -> Module: function add_constant (line 24) | def add_constant(src: torch.Tensor, constant: int) -> torch.Tensor: FILE: python/sglang/jit_kernel/awq_dequantize.py function _jit_awq_dequantize_module (line 14) | def _jit_awq_dequantize_module(dtype: torch.dtype) -> Module: function awq_dequantize (line 24) | def awq_dequantize( FILE: python/sglang/jit_kernel/awq_marlin_repack.py function _jit_awq_marlin_repack_module (line 14) | def _jit_awq_marlin_repack_module() -> Module: function awq_marlin_repack (line 22) | def awq_marlin_repack( function awq_marlin_moe_repack (line 40) | def awq_marlin_moe_repack( FILE: python/sglang/jit_kernel/benchmark/bench_awq_dequantize.py function check_correctness (line 29) | def check_correctness(): function benchmark (line 85) | def benchmark(qweight_row, qweight_col, provider): FILE: python/sglang/jit_kernel/benchmark/bench_awq_marlin_moe_repack.py function awq_pack (line 24) | def awq_pack(q_w, num_bits, size_k, size_n): function make_moe_weights (line 37) | def make_moe_weights(num_experts, size_k, size_n, num_bits, group_size): function check_correctness (line 54) | def check_correctness(): function benchmark (line 102) | def benchmark(num_experts, size_k, size_n, num_bits, provider): FILE: python/sglang/jit_kernel/benchmark/bench_awq_marlin_repack.py function awq_pack (line 25) | def awq_pack(q_w, num_bits, size_k, size_n): function check_correctness (line 45) | def check_correctness(): function benchmark (line 86) | def benchmark(size_k, size_n, num_bits, provider): FILE: python/sglang/jit_kernel/benchmark/bench_concat_mla.py function aot_concat_mla_k (line 27) | def aot_concat_mla_k(k, k_nope, k_rope): function jit_concat_mla_k (line 31) | def jit_concat_mla_k(k, k_nope, k_rope): function torch_concat_mla_k (line 35) | def torch_concat_mla_k(k, k_nope, k_rope): function aot_concat_mla_absorb_q (line 41) | def aot_concat_mla_absorb_q(a, b): function jit_concat_mla_absorb_q (line 45) | def jit_concat_mla_absorb_q(a, b): function torch_concat_mla_absorb_q (line 49) | def torch_concat_mla_absorb_q(a, b, out): function _create_concat_mla_k_data (line 65) | def _create_concat_mla_k_data(num_tokens): function bench_concat_mla_k (line 102) | def bench_concat_mla_k(num_tokens: int, provider: str): function bench_concat_mla_absorb_q (line 137) | def bench_concat_mla_absorb_q(dim_0: int, dim_1: int, provider: str): FILE: python/sglang/jit_kernel/benchmark/bench_fused_add_rmsnorm.py function sglang_jit_fused_add_rmsnorm (line 14) | def sglang_jit_fused_add_rmsnorm( function flashinfer_fused_add_rmsnorm (line 20) | def flashinfer_fused_add_rmsnorm( function benchmark (line 56) | def benchmark(hidden_size: int, batch_size: int, provider: str): FILE: python/sglang/jit_kernel/benchmark/bench_fused_norm_scale_shift.py function preprocess_layer (line 37) | def preprocess_layer(layer, affine: bool, D: int, DTYPE: torch.dtype): function bench_fused_norm_scale_shift (line 65) | def bench_fused_norm_scale_shift( function bench_fused_scale_residual_norm_scale_shift (line 100) | def bench_fused_scale_residual_norm_scale_shift( FILE: python/sglang/jit_kernel/benchmark/bench_gptq_marlin.py function _run_gemm (line 29) | def _run_gemm(fn, a): function _run_gemm_aot (line 51) | def _run_gemm_aot(a): function check_correctness (line 73) | def check_correctness(): function benchmark (line 112) | def benchmark(size_m, provider): FILE: python/sglang/jit_kernel/benchmark/bench_gptq_marlin_repack.py function _get_inputs (line 24) | def _get_inputs(size_k): function check_correctness (line 37) | def check_correctness(): function benchmark (line 79) | def benchmark(size_k, provider): FILE: python/sglang/jit_kernel/benchmark/bench_hadamard.py function torch_hadamard_transform (line 47) | def torch_hadamard_transform(x, scale, H, dim, dim_padded): function benchmark (line 85) | def benchmark(batch_size: int, dim: int, provider: str) -> Tuple[float, ... FILE: python/sglang/jit_kernel/benchmark/bench_hicache.py class HiCacheCache (line 44) | class HiCacheCache: method get_slice (line 50) | def get_slice(self, num_layers: int, element_size: int) -> "HiCacheCac... function gen_indices (line 67) | def gen_indices( function sglang_aot_transfer_one (line 79) | def sglang_aot_transfer_one( function sglang_jit_transfer_one (line 100) | def sglang_jit_transfer_one( function sglang_aot_transfer_all (line 121) | def sglang_aot_transfer_all( function sglang_jit_transfer_all (line 144) | def sglang_jit_transfer_all( function pytorch_transfer (line 168) | def pytorch_transfer( function benchmark_one_layer_h2d (line 218) | def benchmark_one_layer_h2d( function _create_ptr_tensor (line 299) | def _create_ptr_tensor(tensors, device="cuda"): function benchmark_all_layer_d2h (line 321) | def benchmark_all_layer_d2h( FILE: python/sglang/jit_kernel/benchmark/bench_moe_wna16_marlin.py function stack_and_dev (line 18) | def stack_and_dev(tensors): function _make_inputs (line 48) | def _make_inputs(size_m): function _run_jit (line 76) | def _run_jit( function _run_aot (line 116) | def _run_aot( function check_correctness (line 156) | def check_correctness(): function benchmark (line 204) | def benchmark(size_m, provider): FILE: python/sglang/jit_kernel/benchmark/bench_norm.py function benchmark_rmsnorm (line 48) | def benchmark_rmsnorm(hidden_size: int, batch_size: int, provider: str): function benchmark_fused_add_rmsnorm (line 72) | def benchmark_fused_add_rmsnorm(hidden_size: int, batch_size: int, provi... FILE: python/sglang/jit_kernel/benchmark/bench_norm_impls.py function effective_rows_from_shape (line 158) | def effective_rows_from_shape(input_shape: list[int]) -> int: function ensure_repo (line 165) | def ensure_repo(repo_name: str, repo_url: str) -> Path: function ensure_python_dep (line 178) | def ensure_python_dep(module_name: str, package_name: str | None = None)... function dtype_from_name (line 189) | def dtype_from_name(name: str) -> torch.dtype: function dtype_name (line 201) | def dtype_name(dtype: torch.dtype) -> str: function normalize_hidden_sizes (line 210) | def normalize_hidden_sizes(text: str) -> list[int]: function normalize_dtypes (line 214) | def normalize_dtypes(text: str) -> list[torch.dtype]: function prewarm (line 218) | def prewarm(fn: Callable[[], object], iters: int = 3) -> None: function benchmark_provider (line 224) | def benchmark_provider( function geometric_mean (line 251) | def geometric_mean(values: list[float]) -> float: function load_flaggems (line 258) | def load_flaggems(): function load_quack (line 272) | def load_quack(): function build_rmsnorm_providers (line 286) | def build_rmsnorm_providers(dtype: torch.dtype, batch_size: int, hidden_... function build_fused_add_rmsnorm_providers (line 318) | def build_fused_add_rmsnorm_providers( function build_layernorm_providers (line 370) | def build_layernorm_providers(dtype: torch.dtype, batch_size: int, hidde... function maybe_benchmark (line 404) | def maybe_benchmark( function write_csv (line 451) | def write_csv(rows: list[dict[str, object]], output_path: Path) -> None: function write_markdown (line 478) | def write_markdown(rows: list[dict[str, object]], output_path: Path) -> ... function run_suite (line 561) | def run_suite( function run_shape_suite (line 620) | def run_shape_suite( function main (line 686) | def main() -> None: FILE: python/sglang/jit_kernel/benchmark/bench_nvfp4_blockwise_moe.py function _round_up (line 22) | def _round_up(x: int, y: int) -> int: function _expert_offsets (line 26) | def _expert_offsets(m_per_expert: list[int], device: torch.device) -> to... function _blockscale_offsets (line 33) | def _blockscale_offsets(m_per_expert: list[int], device: torch.device) -... function _prepare_case (line 40) | def _prepare_case( function _torch_ref_group_mm (line 130) | def _torch_ref_group_mm(case: dict[str, Any]) -> torch.Tensor: function _aot_cutlass_fp4_group_mm (line 143) | def _aot_cutlass_fp4_group_mm(case: dict[str, Any]) -> torch.Tensor: function _probe_legacy_aot_group_mm (line 171) | def _probe_legacy_aot_group_mm() -> tuple[bool, str]: function benchmark (line 227) | def benchmark(total_tokens, n, k, num_experts, provider): FILE: python/sglang/jit_kernel/benchmark/bench_nvfp4_quant.py function _torch_ref_quant (line 23) | def _torch_ref_quant(input: torch.Tensor, input_global_scale: torch.Tens... function _aot_scaled_fp4_quant (line 48) | def _aot_scaled_fp4_quant(input: torch.Tensor, input_global_scale: torch... function _probe_legacy_aot_quant (line 63) | def _probe_legacy_aot_quant() -> tuple[bool, str]: function _probe_flashinfer_quant (line 92) | def _probe_flashinfer_quant() -> tuple[bool, str]: function benchmark (line 157) | def benchmark(m, n, provider): FILE: python/sglang/jit_kernel/benchmark/bench_nvfp4_scaled_mm.py function _dequantize_to_fp16 (line 37) | def _dequantize_to_fp16( function _aot_cutlass_scaled_fp4_mm (line 61) | def _aot_cutlass_scaled_fp4_mm( function _probe_legacy_aot_scaled_mm (line 76) | def _probe_legacy_aot_scaled_mm() -> tuple[bool, str]: function benchmark (line 143) | def benchmark(m, n, k, provider): FILE: python/sglang/jit_kernel/benchmark/bench_per_tensor_quant_fp8.py function vllm_scaled_fp8_quant (line 28) | def vllm_scaled_fp8_quant( function sglang_scaled_fp8_quant (line 37) | def sglang_scaled_fp8_quant( function calculate_diff (line 52) | def calculate_diff(batch_size: int, seq_len: int): function benchmark (line 99) | def benchmark(element_count, provider): FILE: python/sglang/jit_kernel/benchmark/bench_per_token_group_quant_8bit.py function _flatten_to_2d (line 159) | def _flatten_to_2d(t: torch.Tensor) -> torch.Tensor: function _make_sglang_bench_fn (line 166) | def _make_sglang_bench_fn( function benchmark (line 249) | def benchmark( FILE: python/sglang/jit_kernel/benchmark/bench_qknorm.py function sglang_aot_qknorm (line 20) | def sglang_aot_qknorm( function sglang_jit_qknorm (line 39) | def sglang_jit_qknorm( function flashinfer_qknorm (line 49) | def flashinfer_qknorm( function torch_impl_qknorm (line 62) | def torch_impl_qknorm( function benchmark (line 114) | def benchmark( FILE: python/sglang/jit_kernel/benchmark/bench_qknorm_across_heads.py function sglang_jit_qknorm_across_heads (line 18) | def sglang_jit_qknorm_across_heads( function sglang_aot_qknorm_across_heads (line 28) | def sglang_aot_qknorm_across_heads( function flashinfer_qknorm_across_heads (line 43) | def flashinfer_qknorm_across_heads( function torch_impl_qknorm_across_heads (line 56) | def torch_impl_qknorm_across_heads( function benchmark (line 101) | def benchmark( FILE: python/sglang/jit_kernel/benchmark/bench_qwen_image_modulation.py function _make_common_inputs (line 28) | def _make_common_inputs(batch_size: int, seq_len: int, hidden_size: int): function bench_layernorm_scale_shift_gate_select01 (line 55) | def bench_layernorm_scale_shift_gate_select01( function bench_residual_layernorm_scale_shift_gate_select01 (line 116) | def bench_residual_layernorm_scale_shift_gate_select01( FILE: python/sglang/jit_kernel/benchmark/bench_renorm.py function torch_top_k_renorm_probs (line 11) | def torch_top_k_renorm_probs(probs, top_k): function torch_top_p_renorm_probs (line 42) | def torch_top_p_renorm_probs(probs, top_p, eps=1e-5): function torch_top_k_mask_logits (line 81) | def torch_top_k_mask_logits(logits, top_k): function calculate_diff_top_k_renorm (line 106) | def calculate_diff_top_k_renorm(batch_size, vocab_size, k): function calculate_diff_top_p_renorm (line 122) | def calculate_diff_top_p_renorm(batch_size, vocab_size, p): function calculate_diff_top_k_mask (line 138) | def calculate_diff_top_k_mask(batch_size, vocab_size, k): function benchmark_top_k_renorm (line 181) | def benchmark_top_k_renorm(batch_size, vocab_size, k, provider): function benchmark_top_p_renorm (line 214) | def benchmark_top_p_renorm(batch_size, vocab_size, p, provider): function benchmark_top_k_mask (line 243) | def benchmark_top_k_mask(batch_size, vocab_size, k, provider): FILE: python/sglang/jit_kernel/benchmark/bench_rmsnorm.py function sglang_aot_rmsnorm (line 18) | def sglang_aot_rmsnorm( function sglang_jit_rmsnorm (line 25) | def sglang_jit_rmsnorm( function flashinfer_rmsnorm (line 32) | def flashinfer_rmsnorm( function torch_impl_rmsnorm (line 40) | def torch_impl_rmsnorm( function benchmark (line 79) | def benchmark(hidden_size: int, batch_size: int, provider: str): FILE: python/sglang/jit_kernel/benchmark/bench_rope.py function create_cos_sin_cache (line 20) | def create_cos_sin_cache( function flashinfer_rope (line 48) | def flashinfer_rope( function sglang_pos_enc_rope (line 67) | def sglang_pos_enc_rope( function sglang_fused_rope (line 86) | def sglang_fused_rope( function jit_rope_then_store (line 102) | def jit_rope_then_store( function jit_fused_rope_store (line 134) | def jit_fused_rope_store( function benchmark (line 198) | def benchmark(batch_size: int, num_q_k_heads: str, is_neox: bool, provid... function benchmark_store (line 255) | def benchmark_store(batch_size: int, num_q_k_heads: str, is_neox: bool, ... FILE: python/sglang/jit_kernel/benchmark/bench_store_cache.py function sglang_jit_store_cache (line 17) | def sglang_jit_store_cache( function torch_compile_store_cache (line 28) | def torch_compile_store_cache( function torch_streams_store_cache (line 42) | def torch_streams_store_cache( function benchmark (line 89) | def benchmark( FILE: python/sglang/jit_kernel/benchmark/utils.py function is_in_ci (line 16) | def is_in_ci() -> bool: function get_benchmark_range (line 21) | def get_benchmark_range(full_range: List, ci_range: List) -> List: function run_benchmark (line 26) | def run_benchmark( function run_benchmark_no_cudagraph (line 43) | def run_benchmark_no_cudagraph( FILE: python/sglang/jit_kernel/concat_mla.py function _jit_concat_mla_k_module (line 14) | def _jit_concat_mla_k_module() -> Module: function _jit_concat_mla_absorb_q_module (line 23) | def _jit_concat_mla_absorb_q_module() -> Module: function concat_mla_k (line 31) | def concat_mla_k(k: torch.Tensor, k_nope: torch.Tensor, k_rope: torch.Te... function concat_mla_absorb_q (line 47) | def concat_mla_absorb_q(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor: FILE: python/sglang/jit_kernel/csrc/fast-hadamard-transform/code_gen.py function string_to_array (line 161) | def string_to_array(string): function array_code_gen (line 172) | def array_code_gen(arr): function main (line 185) | def main(): FILE: python/sglang/jit_kernel/csrc/fast-hadamard-transform/fast_hadamard_transform.h type HadamardParamsBase (line 11) | struct HadamardParamsBase { FILE: python/sglang/jit_kernel/csrc/fast-hadamard-transform/fast_hadamard_transform_common.h type uint8 (line 16) | struct uint8 { type BytesToType (line 25) | struct BytesToType type BytesToType (line 31) | struct BytesToType type BytesToType (line 37) | struct BytesToType type BytesToType (line 43) | struct BytesToType type BytesToType (line 49) | struct BytesToType type BytesToType (line 55) | struct BytesToType function __device__ (line 64) | __device__ inline T operator()(T const& x, T const& y) { function T (line 73) | inline T run(T x, Operator& op) { type Allreduce (line 81) | struct Allreduce function T (line 83) | inline T run(T x, Operator& op) { function cilog2 (line 92) | constexpr int cilog2(int val) { function hadamard_mult_thread (line 99) | void hadamard_mult_thread(float x[kNChunks][1 << kLogN]) { function __device__ (line 141) | inline __device__ void load_input(input_t* x, float x_vals[kNChunks][kNE... FILE: python/sglang/jit_kernel/csrc/fast-hadamard-transform/fast_hadamard_transform_special.h function hadamard_mult_thread_12 (line 12) | void hadamard_mult_thread_12(float x[12]) { function hadamard_mult_thread_20 (line 32) | void hadamard_mult_thread_20(float x[20]) { function hadamard_mult_thread_28 (line 80) | void hadamard_mult_thread_28(float x[28]) { function hadamard_mult_thread_40 (line 172) | void hadamard_mult_thread_40(float x[40]) { FILE: python/sglang/jit_kernel/csrc/gemm/marlin/dequant.h function namespace (line 68) | namespace device::marlin { FILE: python/sglang/jit_kernel/csrc/gemm/marlin/kernel.h function namespace (line 13) | namespace device::marlin { FILE: python/sglang/jit_kernel/csrc/gemm/marlin/marlin_template.h function namespace (line 32) | namespace device::marlin { function __device__ (line 254) | __device__ inline void wait_negative_and_add(int* lock) { function transform_a (line 612) | auto transform_a = [&](int i) { function init_same_group (line 834) | auto init_same_group = [&](int pipe) { function matmul (line 1053) | auto matmul = [&](int k) { FILE: python/sglang/jit_kernel/csrc/gemm/marlin_moe/kernel.h function namespace (line 16) | namespace device::marlin_moe { FILE: python/sglang/jit_kernel/csrc/gemm/marlin_moe/marlin_template.h function namespace (line 33) | namespace device::marlin_moe { function __device__ (line 265) | __device__ inline void wait_negative_and_add(int* lock) { function else (line 351) | else if constexpr (std::is_same::value) { function read_moe_block_data (line 444) | auto read_moe_block_data = [&](int block_id) { function transform_a (line 756) | auto transform_a = [&](int i) { function init_same_group (line 996) | auto init_same_group = [&](int pipe) { function matmul (line 1218) | auto matmul = [&](int k) { function write_result (line 1516) | auto write_result = [&](bool last) { FILE: python/sglang/jit_kernel/cutedsl_gdn.py function _define_kernels (line 33) | def _define_kernels(): function _create_jit_functions (line 951) | def _create_jit_functions(): function _get_jit_functions (line 1273) | def _get_jit_functions(): function _get_compiled_kernel (line 1280) | def _get_compiled_kernel(N, H, HV, K, V, pool_size, use_small_batch, is_... function cutedsl_fused_sigmoid_gating_delta_rule_update (line 1373) | def cutedsl_fused_sigmoid_gating_delta_rule_update( FILE: python/sglang/jit_kernel/diffusion/cutedsl/common/norm_fusion.py function apply_norm_cta (line 15) | def apply_norm_cta( function apply_rmsnorm_cta (line 32) | def apply_rmsnorm_cta( function apply_layernorm_cta (line 61) | def apply_layernorm_cta( function broadcast_tensor_for_bsfd (line 130) | def broadcast_tensor_for_bsfd( function tensor_slice_for_bsfd (line 160) | def tensor_slice_for_bsfd( FILE: python/sglang/jit_kernel/diffusion/cutedsl/common/reduce.py function warp_reduce_sum (line 8) | def warp_reduce_sum(val: cute.Numeric, reduce_size: int = 32) -> cute.Nu... function cta_reduce_sum (line 16) | def cta_reduce_sum( FILE: python/sglang/jit_kernel/diffusion/cutedsl/scale_residual_norm_scale_shift.py function to_cute_arg (line 18) | def to_cute_arg( function to_fake_cute_args (line 42) | def to_fake_cute_args(t: torch.Tensor): class ScaleResidualNormScaleShift (line 57) | class ScaleResidualNormScaleShift: method make_hash_key (line 59) | def make_hash_key(cls, *inputs): method __init__ (line 81) | def __init__(self, D: int, norm_type: str): method __call__ (line 88) | def __call__( method kernel (line 135) | def kernel( function validate_x (line 228) | def validate_x(t: torch.Tensor, B: int, S: int, D: int): function validate_weight_bias (line 237) | def validate_weight_bias(t: Optional[torch.Tensor], B: int, S: int, D: i... function validate_scale_shift (line 248) | def validate_scale_shift(t: torch.Tensor, B: int, S: int, D: int): function validate_gate (line 271) | def validate_gate(t: Union[torch.Tensor, int], B: int, S: int, D: int): function fused_norm_scale_shift (line 278) | def fused_norm_scale_shift( function _fused_norm_scale_shift_fake (line 344) | def _fused_norm_scale_shift_fake(x, weight, bias, scale, shift, norm_typ... function fused_scale_residual_norm_scale_shift (line 352) | def fused_scale_residual_norm_scale_shift( function _fused_scale_residual_norm_scale_shift_fake (line 426) | def _fused_scale_residual_norm_scale_shift_fake( FILE: python/sglang/jit_kernel/diffusion/triton/mps_fallback.py function _torch_to_mlx (line 45) | def _torch_to_mlx(tensor: torch.Tensor) -> "mx.array": function _mlx_to_torch (line 53) | def _mlx_to_torch(array: "mx.array", device: torch.device) -> torch.Tensor: function fuse_scale_shift_kernel_native (line 64) | def fuse_scale_shift_kernel_native( function fuse_scale_shift_gate_select01_kernel_native (line 97) | def fuse_scale_shift_gate_select01_kernel_native( function apply_rotary_embedding_native (line 118) | def apply_rotary_embedding_native( function norm_infer_native (line 131) | def norm_infer_native( function triton_one_pass_rms_norm_native (line 160) | def triton_one_pass_rms_norm_native( function rms_norm_fn_native (line 172) | def rms_norm_fn_native( function norm_infer_native (line 223) | def norm_infer_native( # noqa: F811 function triton_one_pass_rms_norm_native (line 250) | def triton_one_pass_rms_norm_native( # noqa: F811 function rms_norm_fn_native (line 262) | def rms_norm_fn_native( # noqa: F811 FILE: python/sglang/jit_kernel/diffusion/triton/norm.py function maybe_contiguous_lastdim (line 10) | def maybe_contiguous_lastdim(x): function maybe_contiguous (line 14) | def maybe_contiguous(x): function triton_autotune_configs (line 18) | def triton_autotune_configs(): function _layer_norm_fwd_1pass_kernel (line 64) | def _layer_norm_fwd_1pass_kernel( function _layer_norm_fwd (line 188) | def _layer_norm_fwd( function _layer_norm_fwd_impl (line 251) | def _layer_norm_fwd_impl( class LayerNormFn (line 373) | class LayerNormFn: method forward (line 376) | def forward( function layer_norm_fn (line 453) | def layer_norm_fn( function _norm_infer_kernel (line 496) | def _norm_infer_kernel( function norm_infer (line 540) | def norm_infer( function rms_norm_fn (line 582) | def rms_norm_fn( FILE: python/sglang/jit_kernel/diffusion/triton/npu_fallback.py function fuse_scale_shift_native (line 5) | def fuse_scale_shift_native( function apply_rotary_embedding_native (line 16) | def apply_rotary_embedding_native( FILE: python/sglang/jit_kernel/diffusion/triton/rmsnorm_onepass.py function _rms_norm_tiled_onepass (line 8) | def _rms_norm_tiled_onepass( function triton_one_pass_rms_norm (line 36) | def triton_one_pass_rms_norm(x: torch.Tensor, w: torch.Tensor, eps: floa... FILE: python/sglang/jit_kernel/diffusion/triton/rotary.py function _rotary_embedding_kernel (line 18) | def _rotary_embedding_kernel( function apply_rotary_embedding (line 67) | def apply_rotary_embedding( FILE: python/sglang/jit_kernel/diffusion/triton/scale_shift.py function _fused_layernorm_scale_shift_gate_select01_kernel (line 9) | def _fused_layernorm_scale_shift_gate_select01_kernel( function _fused_residual_layernorm_scale_shift_gate_select01_kernel (line 116) | def _fused_residual_layernorm_scale_shift_gate_select01_kernel( function _fused_scale_shift_4d_kernel (line 247) | def _fused_scale_shift_4d_kernel( function fuse_scale_shift_kernel_blc_opt (line 292) | def fuse_scale_shift_kernel_blc_opt( function fuse_scale_shift_gate_select01_kernel_blc_opt (line 360) | def fuse_scale_shift_gate_select01_kernel_blc_opt( function fuse_scale_shift_kernel (line 447) | def fuse_scale_shift_kernel( function fuse_scale_shift_gate_select01_kernel (line 566) | def fuse_scale_shift_gate_select01_kernel( function fuse_layernorm_scale_shift_gate_select01_kernel (line 638) | def fuse_layernorm_scale_shift_gate_select01_kernel( function fuse_residual_layernorm_scale_shift_gate_select01_kernel (line 727) | def fuse_residual_layernorm_scale_shift_gate_select01_kernel( FILE: python/sglang/jit_kernel/flash_attention_v4.py function _maybe_contiguous (line 16) | def _maybe_contiguous(x: Optional[torch.Tensor]) -> Optional[torch.Tensor]: function flash_attn_varlen_func (line 20) | def flash_attn_varlen_func( function flash_attn_with_kvcache (line 92) | def flash_attn_with_kvcache( FILE: python/sglang/jit_kernel/fused_metadata_copy.py function _jit_fused_metadata_copy_module (line 29) | def _jit_fused_metadata_copy_module( function _jit_fused_metadata_copy_multi_module (line 62) | def _jit_fused_metadata_copy_multi_module( function fused_metadata_copy_cuda (line 97) | def fused_metadata_copy_cuda( function fused_metadata_copy_multi_cuda (line 199) | def fused_metadata_copy_multi_cuda( FILE: python/sglang/jit_kernel/fused_store_index_cache.py function _jit_nsa_fused_store_module (line 30) | def _jit_nsa_fused_store_module( function can_use_nsa_fused_store (line 55) | def can_use_nsa_fused_store( function fused_store_index_k_cache (line 67) | def fused_store_index_k_cache( FILE: python/sglang/jit_kernel/gptq_marlin.py function _jit_gptq_marlin_module (line 18) | def _jit_gptq_marlin_module(dtype: torch.dtype) -> Module: function _or_empty (line 28) | def _or_empty( function gptq_marlin_gemm (line 34) | def gptq_marlin_gemm( FILE: python/sglang/jit_kernel/gptq_marlin_repack.py function _jit_gptq_marlin_repack_module (line 17) | def _jit_gptq_marlin_repack_module() -> Module: function gptq_marlin_repack (line 25) | def gptq_marlin_repack( FILE: python/sglang/jit_kernel/hadamard.py function _jit_hadamard_module (line 14) | def _jit_hadamard_module(dtype: torch.dtype) -> Module: function _hadamard_transform_impl (line 32) | def _hadamard_transform_impl( function hadamard_transform (line 59) | def hadamard_transform(x: torch.Tensor, scale: float = 1.0) -> torch.Ten... function hadamard_transform_12n (line 64) | def hadamard_transform_12n(x: torch.Tensor, scale: float = 1.0) -> torch... function hadamard_transform_20n (line 69) | def hadamard_transform_20n(x: torch.Tensor, scale: float = 1.0) -> torch... function hadamard_transform_28n (line 74) | def hadamard_transform_28n(x: torch.Tensor, scale: float = 1.0) -> torch... function hadamard_transform_40n (line 79) | def hadamard_transform_40n(x: torch.Tensor, scale: float = 1.0) -> torch... FILE: python/sglang/jit_kernel/hicache.py function _jit_hicache_module (line 16) | def _jit_hicache_module(*, element_size: int, unroll: int, block_quota: ... function can_use_hicache_jit_kernel (line 34) | def can_use_hicache_jit_kernel( function _default_unroll (line 58) | def _default_unroll(element_size: int) -> int: function transfer_hicache_one_layer (line 69) | def transfer_hicache_one_layer( function transfer_hicache_all_layer (line 104) | def transfer_hicache_all_layer( FILE: python/sglang/jit_kernel/include/sgl_kernel/scalar_type.hpp type host (line 9) | namespace host { class ScalarType (line 20) | class ScalarType { type NanRepr (line 22) | enum NanRepr : uint8_t { method ScalarType (line 30) | constexpr ScalarType( method ScalarType (line 44) | static constexpr ScalarType int_(uint8_t size_bits, int32_t bias = 0) { method ScalarType (line 48) | static constexpr ScalarType uint(uint8_t size_bits, int32_t bias = 0) { method ScalarType (line 53) | static constexpr ScalarType float_IEEE754(uint8_t exponent, uint8_t ... method ScalarType (line 59) | static constexpr ScalarType float_(uint8_t exponent, uint8_t mantiss... method member_id_field_width (line 84) | static constexpr size_t member_id_field_width() { method reduce_members_helper (line 90) | static constexpr auto reduce_members_helper(Fn f, Init val, Member m... method reduce_members (line 100) | constexpr auto reduce_members(Fn f, Init init) const { method reduce_member_types (line 106) | static constexpr auto reduce_member_types(Fn f, Init init) { method id_size_bits (line 111) | static constexpr auto id_size_bits() { method Id (line 120) | constexpr Id id() const { method ScalarType (line 134) | static constexpr ScalarType from_id(Id id) { method size_bits (line 148) | constexpr int64_t size_bits() const { method is_signed (line 151) | constexpr bool is_signed() const { method is_integer (line 154) | constexpr bool is_integer() const { method is_floating_point (line 157) | constexpr bool is_floating_point() const { method is_ieee_754 (line 160) | constexpr bool is_ieee_754() const { method has_nans (line 163) | constexpr bool has_nans() const { method has_infs (line 166) | constexpr bool has_infs() const { method has_bias (line 169) | constexpr bool has_bias() const { method _floating_point_max (line 175) | double _floating_point_max() const { method _raw_max (line 208) | constexpr std::variant _raw_max() const { method _raw_min (line 217) | constexpr std::variant _raw_min() const { method max (line 242) | constexpr std::variant max() const { method min (line 248) | constexpr std::variant min() const { method str (line 254) | std::string str() const { FILE: python/sglang/jit_kernel/include/sgl_kernel/source_location.h type source_location_fallback (line 18) | struct source_location_fallback { FILE: python/sglang/jit_kernel/include/sgl_kernel/tensor.h function namespace (line 38) | namespace host { type PrintableDevice (line 129) | struct PrintableDevice { type SymbolicDType (line 253) | struct SymbolicDType { function DLDataType (line 274) | auto unwrap(DebugInfo info = {}) const -> DLDataType { type SymbolicDevice (line 316) | struct SymbolicDevice { function DLDevice (line 341) | auto unwrap(DebugInfo info = {}) const -> DLDevice { function namespace (line 382) | namespace details { function SymbolicDevice (line 432) | struct DeviceRef : BaseRef { FILE: python/sglang/jit_kernel/include/sgl_kernel/utils.h function namespace (line 57) | namespace host { FILE: python/sglang/jit_kernel/kvcache.py function _jit_kvcache_module (line 20) | def _jit_kvcache_module(row_bytes: int) -> Module: function can_use_store_cache (line 31) | def can_use_store_cache(size: int) -> bool: function store_cache (line 49) | def store_cache( FILE: python/sglang/jit_kernel/moe_lora_align.py function _jit_moe_align_module (line 14) | def _jit_moe_align_module(dtype: torch.dtype) -> Module: function moe_lora_align_block_size (line 26) | def moe_lora_align_block_size( FILE: python/sglang/jit_kernel/moe_wna16_marlin.py function _jit_moe_wna16_marlin_module (line 18) | def _jit_moe_wna16_marlin_module(dtype: torch.dtype) -> Module: function _or_empty (line 33) | def _or_empty( function moe_wna16_marlin_gemm (line 39) | def moe_wna16_marlin_gemm( FILE: python/sglang/jit_kernel/ngram_embedding.py function _jit_ngram_embedding_module (line 13) | def _jit_ngram_embedding_module() -> Module: function compute_n_gram_ids (line 24) | def compute_n_gram_ids( function update_token_table (line 69) | def update_token_table( FILE: python/sglang/jit_kernel/norm.py function _jit_qknorm_module (line 20) | def _jit_qknorm_module(head_dim: int, dtype: torch.dtype) -> Module: function _jit_rmsnorm_module (line 31) | def _jit_rmsnorm_module(hidden_size: int, dtype: torch.dtype) -> Module: function _jit_fused_add_rmsnorm_module (line 42) | def _jit_fused_add_rmsnorm_module(dtype: torch.dtype) -> Module: function _jit_qknorm_across_heads_module (line 53) | def _jit_qknorm_across_heads_module(dtype: torch.dtype) -> Module: function can_use_fused_inplace_qknorm (line 66) | def can_use_fused_inplace_qknorm(head_dim: int, dtype: torch.dtype) -> b... function fused_inplace_qknorm (line 79) | def fused_inplace_qknorm( function rmsnorm (line 93) | def rmsnorm( function fused_add_rmsnorm (line 105) | def fused_add_rmsnorm( function fused_inplace_qknorm_across_heads (line 115) | def fused_inplace_qknorm_across_heads( FILE: python/sglang/jit_kernel/nvfp4.py function _find_package_root (line 22) | def _find_package_root(package: str) -> Optional[pathlib.Path]: function _resolve_cutlass_include_paths (line 29) | def _resolve_cutlass_include_paths() -> list[str]: function _nvfp4_cuda_flags (line 59) | def _nvfp4_cuda_flags() -> list[str]: function _get_nvfp4_cuda_arch_list (line 73) | def _get_nvfp4_cuda_arch_list() -> str: function _nvfp4_arch_env (line 90) | def _nvfp4_arch_env(): function _jit_nvfp4_quant_module (line 104) | def _jit_nvfp4_quant_module() -> Module: function _jit_nvfp4_expert_quant_module (line 127) | def _jit_nvfp4_expert_quant_module() -> Module: function _jit_nvfp4_scaled_mm_module (line 154) | def _jit_nvfp4_scaled_mm_module() -> Module: function _jit_nvfp4_blockwise_moe_module (line 176) | def _jit_nvfp4_blockwise_moe_module() -> Module: function cutlass_scaled_fp4_mm (line 198) | def cutlass_scaled_fp4_mm( function cutlass_fp4_group_mm (line 214) | def cutlass_fp4_group_mm( function _scaled_fp4_quant_custom_op (line 283) | def _scaled_fp4_quant_custom_op( function scaled_fp4_quant (line 293) | def scaled_fp4_quant( function _shuffle_rows_torch (line 329) | def _shuffle_rows_torch( function _scaled_fp4_experts_quant_custom_op (line 343) | def _scaled_fp4_experts_quant_custom_op( function scaled_fp4_experts_quant (line 362) | def scaled_fp4_experts_quant( function _scaled_fp4_grouped_quant_custom_op (line 426) | def _scaled_fp4_grouped_quant_custom_op( function scaled_fp4_grouped_quant (line 446) | def scaled_fp4_grouped_quant( function _silu_and_mul_scaled_fp4_grouped_quant_custom_op (line 486) | def _silu_and_mul_scaled_fp4_grouped_quant_custom_op( function silu_and_mul_scaled_fp4_grouped_quant (line 506) | def silu_and_mul_scaled_fp4_grouped_quant( function _cutlass_fp4_group_mm_custom_op (line 557) | def _cutlass_fp4_group_mm_custom_op( function suggest_nvfp4_global_scale (line 602) | def suggest_nvfp4_global_scale(x: torch.Tensor) -> torch.Tensor: FILE: python/sglang/jit_kernel/per_tensor_quant_fp8.py function _jit_per_tensor_quant_fp8_module (line 15) | def _jit_per_tensor_quant_fp8_module(is_static: bool, dtype: torch.dtype... function per_tensor_quant_fp8 (line 29) | def per_tensor_quant_fp8( FILE: python/sglang/jit_kernel/per_token_group_quant_8bit.py function _jit_per_token_group_quant_8bit_module (line 17) | def _jit_per_token_group_quant_8bit_module( function _per_token_group_quant_8bit_custom_op (line 38) | def _per_token_group_quant_8bit_custom_op( function per_token_group_quant_8bit (line 75) | def per_token_group_quant_8bit( FILE: python/sglang/jit_kernel/rope.py function _jit_rotary_embedding_module (line 21) | def _jit_rotary_embedding_module() -> Module: function _jit_fused_rope_module (line 30) | def _jit_fused_rope_module(is_neox: bool, rope_dim: int, dtype: torch.dt... function rotary_embedding_with_key (line 47) | def rotary_embedding_with_key( function rotary_embedding_without_key (line 63) | def rotary_embedding_without_key( function rotary_embedding (line 74) | def rotary_embedding( class FusedSetKVBufferArg (line 94) | class FusedSetKVBufferArg: function apply_rope_inplace (line 113) | def apply_rope_inplace( function apply_rope_inplace_with_kvcache (line 141) | def apply_rope_inplace_with_kvcache( function apply_rope_with_cos_sin_cache_inplace (line 179) | def apply_rope_with_cos_sin_cache_inplace( FILE: python/sglang/jit_kernel/tests/test_add_constant.py function test_add_constant (line 9) | def test_add_constant(size: int, constant: int) -> None: FILE: python/sglang/jit_kernel/tests/test_awq_dequantize.py function reverse_awq_order (line 16) | def reverse_awq_order(t: torch.Tensor): function awq_dequantize_torch (line 35) | def awq_dequantize_torch( function test_awq_dequantize_jit_vs_torch (line 76) | def test_awq_dequantize_jit_vs_torch( function test_awq_dequantize_jit_vs_aot (line 124) | def test_awq_dequantize_jit_vs_aot( FILE: python/sglang/jit_kernel/tests/test_awq_marlin_moe_repack.py function _has_aot_awq_marlin_moe_repack (line 12) | def _has_aot_awq_marlin_moe_repack() -> bool: function awq_pack (line 21) | def awq_pack( function test_awq_marlin_moe_repack_jit_vs_aot (line 46) | def test_awq_marlin_moe_repack_jit_vs_aot( function test_awq_marlin_moe_repack_shape (line 87) | def test_awq_marlin_moe_repack_shape( FILE: python/sglang/jit_kernel/tests/test_awq_marlin_repack.py function _has_aot_awq_marlin_repack (line 13) | def _has_aot_awq_marlin_repack() -> bool: function awq_pack (line 22) | def awq_pack( function test_awq_marlin_repack_jit_vs_aot (line 46) | def test_awq_marlin_repack_jit_vs_aot(num_bits, k_tiles, n_tiles, group_... function test_awq_marlin_repack_correct (line 76) | def test_awq_marlin_repack_correct(num_bits, k_tiles, n_tiles, group_size): FILE: python/sglang/jit_kernel/tests/test_concat_mla.py function torch_concat_mla_k (line 8) | def torch_concat_mla_k( function torch_concat_mla_absorb_q (line 21) | def torch_concat_mla_absorb_q( function sgl_kernel_concat_mla_k (line 33) | def sgl_kernel_concat_mla_k( function sgl_kernel_concat_mla_absorb_q (line 42) | def sgl_kernel_concat_mla_absorb_q( function jit_concat_mla_k (line 52) | def jit_concat_mla_k( function jit_concat_mla_absorb_q (line 61) | def jit_concat_mla_absorb_q( function test_concat_mla_k_jit_vs_torch (line 89) | def test_concat_mla_k_jit_vs_torch(num_tokens: int) -> None: function test_concat_mla_k_jit_vs_aot (line 110) | def test_concat_mla_k_jit_vs_aot(num_tokens: int) -> None: function test_concat_mla_absorb_q_jit_vs_torch (line 138) | def test_concat_mla_absorb_q_jit_vs_torch(dim_0: int, dim_1: int) -> None: function test_concat_mla_absorb_q_jit_vs_aot (line 155) | def test_concat_mla_absorb_q_jit_vs_aot(dim_0: int, dim_1: int) -> None: FILE: python/sglang/jit_kernel/tests/test_cutedsl_gdn.py function run_triton_kernel (line 29) | def run_triton_kernel(A_log, dt_bias, q, k, v, a, b, initial_state, indi... function test_cutedsl_gdn_precision (line 57) | def test_cutedsl_gdn_precision(B: int): function test_cutedsl_gdn_performance (line 114) | def test_cutedsl_gdn_performance(B: int): FILE: python/sglang/jit_kernel/tests/test_flash_attention_4.py function apply_rotary_emb (line 19) | def apply_rotary_emb( function unpad_input (line 81) | def unpad_input(hidden_states, attention_mask, unused_mask=None): function pad_input (line 115) | def pad_input(hidden_states, indices, batch, seqlen): function generate_random_padding_mask (line 133) | def generate_random_padding_mask( function generate_qkv (line 171) | def generate_qkv( function construct_local_mask (line 322) | def construct_local_mask( function construct_chunk_mask (line 363) | def construct_chunk_mask( function attention_ref (line 399) | def attention_ref( function test_flash_attn_varlen_output (line 604) | def test_flash_attn_varlen_output( function test_flash_attn_kvcache (line 997) | def test_flash_attn_kvcache( function _generate_block_kvcache (line 1471) | def _generate_block_kvcache( FILE: python/sglang/jit_kernel/tests/test_fused_add_rmsnorm.py function sglang_jit_fused_add_rmsnorm (line 9) | def sglang_jit_fused_add_rmsnorm( function flashinfer_fused_add_rmsnorm (line 17) | def flashinfer_fused_add_rmsnorm( function test_fused_add_rmsnorm (line 39) | def test_fused_add_rmsnorm(batch_size: int, hidden_size: int) -> None: FILE: python/sglang/jit_kernel/tests/test_fused_metadata_copy.py function create_test_metadata (line 21) | def create_test_metadata( function reference_copy_decode (line 125) | def reference_copy_decode(src, dst, max_len): function reference_copy_target_verify (line 148) | def reference_copy_target_verify(src, dst, max_seqlen_k, seqlens_expande... function reference_copy_draft_extend (line 176) | def reference_copy_draft_extend(src, dst, max_seqlen_k, seqlens_expanded... function test_fused_metadata_copy_dtype_validation (line 209) | def test_fused_metadata_copy_dtype_validation(): function test_fused_metadata_copy (line 322) | def test_fused_metadata_copy(bs, forward_mode, has_real_page_table, has_... function test_fused_metadata_copy_large_batch (line 423) | def test_fused_metadata_copy_large_batch(bs): function create_test_metadata_multi (line 488) | def create_test_metadata_multi( function reference_copy_for_loop (line 594) | def reference_copy_for_loop(src, dst_list, bs, max_len): function test_fused_metadata_copy_multi_dtype_validation (line 618) | def test_fused_metadata_copy_multi_dtype_validation(): function test_fused_metadata_copy_multi (line 713) | def test_fused_metadata_copy_multi(bs, has_real_page_table, has_flashmla): function test_fused_metadata_copy_multi_large_batch (line 915) | def test_fused_metadata_copy_multi_large_batch(bs): FILE: python/sglang/jit_kernel/tests/test_fused_norm_scale_shift.py function _tol (line 39) | def _tol(dtype: torch.dtype): function cuda_setup (line 44) | def cuda_setup(): function _apply_scale_shift (line 50) | def _apply_scale_shift(y: Tensor, scale: Tensor, shift: Tensor) -> Tensor: function fused_norm_scale_shift_ref (line 63) | def fused_norm_scale_shift_ref( function fused_scale_residual_norm_scale_shift_ref (line 83) | def fused_scale_residual_norm_scale_shift_ref( function _make_tensor (line 117) | def _make_tensor(index_mode: str, shape: Tuple, dtype: torch.dtype): function run_norm_scale_shift (line 124) | def run_norm_scale_shift( function run_scale_resi_norm_scale_shift (line 147) | def run_scale_resi_norm_scale_shift( class TestFusedNormScaleShift (line 178) | class TestFusedNormScaleShift: method test_shape_dtype (line 181) | def test_shape_dtype(self, shape, dtype, norm_type): method test_dtype_0 (line 185) | def test_dtype_0(self, dtype, norm_type): method test_dtype_1 (line 189) | def test_dtype_1(self, dtype, norm_type): method test_normtype_affine (line 193) | def test_normtype_affine(self, affine_mode, norm_type): method test_index_mode (line 197) | def test_index_mode(self, index_mode, norm_type): class TestFusedScaleResidualNormScaleShift (line 204) | class TestFusedScaleResidualNormScaleShift: method test_shape_dtype (line 207) | def test_shape_dtype(self, shape, dtype, norm_type): method test_dtype_0 (line 211) | def test_dtype_0(self, dtype, norm_type): method test_dtype_1 (line 215) | def test_dtype_1(self, dtype, norm_type): method test_normtype_affine (line 221) | def test_normtype_affine(self, affine_mode, norm_type): method test_scale_shift_index_mode (line 225) | def test_scale_shift_index_mode(self, index_mode, norm_type): method test_gate_index_mode (line 231) | def test_gate_index_mode(self, index_mode, norm_type): FILE: python/sglang/jit_kernel/tests/test_fused_store_index_cache.py function _skip_if_unavailable (line 56) | def _skip_if_unavailable(page_size: int = PAGE_SIZE): function _num_pages (line 71) | def _num_pages(loc: torch.Tensor, page_size: int, extra: int = 1) -> int: function _make_buffer (line 75) | def _make_buffer(num_pages: int, page_size: int = PAGE_SIZE) -> torch.Te... function _read_token_from_buffer (line 83) | def _read_token_from_buffer( function _write_token_to_buffer (line 109) | def _write_token_to_buffer( function _gather_tokens (line 135) | def _gather_tokens( function _reference_quantize_and_store (line 152) | def _reference_quantize_and_store( function _import_act_quant (line 182) | def _import_act_quant(): function _ref_store_via_act_quant (line 191) | def _ref_store_via_act_quant( function test_fused_kernel_matches_own_algorithm (line 241) | def test_fused_kernel_matches_own_algorithm(num_tokens: int, base_index:... function test_fused_kernel_vs_act_quant_semantic (line 298) | def test_fused_kernel_vs_act_quant_semantic(scale_fmt: Optional[str]): function test_roundtrip_reconstruction (line 368) | def test_roundtrip_reconstruction(num_tokens: int): function test_single_token (line 395) | def test_single_token(): function test_zero_input (line 412) | def test_zero_input(): function test_reference_writes_nonzero (line 436) | def test_reference_writes_nonzero(): FILE: python/sglang/jit_kernel/tests/test_fused_verify_triton_gdn.py function _make_tensors (line 26) | def _make_tensors(N, T, H, HV, K, V, device="cuda", seed=2025): function run_reference (line 42) | def run_reference( function run_fused_mtp (line 91) | def run_fused_mtp( function test_fused_gdn_mtp_precision (line 135) | def test_fused_gdn_mtp_precision(N: int, T: int): function test_mtp_single_step_decode (line 178) | def test_mtp_single_step_decode(N: int): FILE: python/sglang/jit_kernel/tests/test_gptq_marlin.py function test_gptq_marlin_gemm (line 23) | def test_gptq_marlin_gemm( FILE: python/sglang/jit_kernel/tests/test_gptq_marlin_repack.py function test_gptq_marlin_repack (line 34) | def test_gptq_marlin_repack( FILE: python/sglang/jit_kernel/tests/test_hadamard_jit.py function _parse_hadamard_str (line 135) | def _parse_hadamard_str(s): function hadamard_transform_ref (line 154) | def hadamard_transform_ref(x, scale=1.0): function hadamard_transform_mn_ref (line 173) | def hadamard_transform_mn_ref(x, multiple, scale=1.0): function test_hadamard_transform (line 223) | def test_hadamard_transform(dim, dtype): function test_hadamard_transform_non_power_of_two (line 254) | def test_hadamard_transform_non_power_of_two(dim, dtype): function test_hadamard_transform_3d_input (line 277) | def test_hadamard_transform_3d_input(dtype): function test_hadamard_transform_scale_one (line 298) | def test_hadamard_transform_scale_one(dtype): function test_hadamard_transform_12n (line 328) | def test_hadamard_transform_12n(dim, dtype): function test_hadamard_transform_20n (line 352) | def test_hadamard_transform_20n(dim, dtype): function test_hadamard_transform_28n (line 376) | def test_hadamard_transform_28n(dim, dtype): function test_hadamard_transform_40n (line 400) | def test_hadamard_transform_40n(dim, dtype): FILE: python/sglang/jit_kernel/tests/test_moe_lora_align_block_size.py function round_up (line 16) | def round_up(x, base): function CEILDIV (line 20) | def CEILDIV(x, y): function sample_data (line 24) | def sample_data(num_experts, max_loras, num_tokens, topk_num): function test_moe_lora_align_block_size (line 66) | def test_moe_lora_align_block_size( FILE: python/sglang/jit_kernel/tests/test_moe_wna16_marlin.py function _has_aot_moe_wna16_marlin_gemm (line 12) | def _has_aot_moe_wna16_marlin_gemm() -> bool: function stack_and_dev (line 21) | def stack_and_dev(tensors: list[torch.Tensor]): function _get_scalar_type (line 26) | def _get_scalar_type(num_bits: int, has_zp: bool): function _setup_moe_weights (line 34) | def _setup_moe_weights(e, n, k, quant_type, group_size, act_order, dtype): function _run_single_gemm (line 77) | def _run_single_gemm( function _run_single_gemm_aot (line 131) | def _run_single_gemm_aot( function generate_test_cases (line 184) | def generate_test_cases(): function test_moe_wna16_marlin_gemm (line 232) | def test_moe_wna16_marlin_gemm( FILE: python/sglang/jit_kernel/tests/test_norm_jit.py function _jit_rmsnorm (line 15) | def _jit_rmsnorm(input, weight, output, eps): function _fi_rmsnorm (line 21) | def _fi_rmsnorm(input, weight, out, eps): function _jit_fused_add_rmsnorm (line 27) | def _jit_fused_add_rmsnorm(input, residual, weight, eps): function _fi_fused_add_rmsnorm (line 33) | def _fi_fused_add_rmsnorm(input, residual, weight, eps): function test_rmsnorm_jit (line 43) | def test_rmsnorm_jit(batch_size, hidden_size, dtype, specify_out): function test_fused_add_rmsnorm_jit (line 65) | def test_fused_add_rmsnorm_jit(batch_size, hidden_size, dtype): FILE: python/sglang/jit_kernel/tests/test_nvfp4_blockwise_moe.py function _nvfp4_supported (line 14) | def _nvfp4_supported() -> bool: function _round_up (line 18) | def _round_up(x: int, y: int) -> int: function _build_expert_offsets (line 22) | def _build_expert_offsets( function _build_blockscale_offsets (line 31) | def _build_blockscale_offsets( function test_nvfp4_blockwise_moe_grouped_mm (line 44) | def test_nvfp4_blockwise_moe_grouped_mm(dtype: torch.dtype) -> None: FILE: python/sglang/jit_kernel/tests/test_nvfp4_gemm.py function _nvfp4_supported (line 7) | def _nvfp4_supported() -> bool: function e2m1_to_fp32 (line 35) | def e2m1_to_fp32(int4_value: int) -> float: function break_fp4_bytes (line 42) | def break_fp4_bytes(a: torch.Tensor) -> torch.Tensor: function convert_swizzled_to_linear (line 53) | def convert_swizzled_to_linear( function dequantize_to_dtype (line 67) | def dequantize_to_dtype( function get_ref_results (line 84) | def get_ref_results( function test_nvfp4_gemm (line 103) | def test_nvfp4_gemm(dtype: torch.dtype, shape: tuple[int, int, int]) -> ... FILE: python/sglang/jit_kernel/tests/test_nvfp4_quant.py function _nvfp4_supported (line 16) | def _nvfp4_supported() -> bool: function _silu_and_mul_reference (line 20) | def _silu_and_mul_reference(x: torch.Tensor) -> torch.Tensor: function cast_from_fp4 (line 60) | def cast_from_fp4(x: torch.Tensor, m: int, n: int) -> torch.Tensor: function cast_to_fp4 (line 68) | def cast_to_fp4(x: torch.Tensor) -> torch.Tensor: function get_reciprocal (line 82) | def get_reciprocal(x): function ref_nvfp4_quant (line 88) | def ref_nvfp4_quant(x: torch.Tensor, global_scale: torch.Tensor): function recover_swizzled_scales (line 103) | def recover_swizzled_scales(scale: torch.Tensor, m: int, n: int) -> torc... function test_quantize_to_fp4 (line 118) | def test_quantize_to_fp4(dtype: torch.dtype, shape: tuple[int, int]) -> ... function test_quantize_to_fp4_padded (line 139) | def test_quantize_to_fp4_padded(shape: tuple[int, int]) -> None: function test_quantize_to_fp4_grouped (line 160) | def test_quantize_to_fp4_grouped(shape: tuple[int, int, int]) -> None: function test_silu_and_mul_quantize_to_fp4_grouped (line 186) | def test_silu_and_mul_quantize_to_fp4_grouped(shape: tuple[int, int, int... FILE: python/sglang/jit_kernel/tests/test_per_tensor_quant_fp8.py function sglang_scaled_fp8_quant (line 19) | def sglang_scaled_fp8_quant( function torch_scaled_fp8_quant (line 34) | def torch_scaled_fp8_quant(tensor, inv_scale): function test_jit_per_tensor_quant_compare_implementations (line 46) | def test_jit_per_tensor_quant_compare_implementations( function test_jit_per_tensor_quant_supports_3d (line 62) | def test_jit_per_tensor_quant_supports_3d(shape): FILE: python/sglang/jit_kernel/tests/test_per_token_group_quant_8bit.py function test_per_token_group_quant_with_column_major (line 108) | def test_per_token_group_quant_with_column_major( FILE: python/sglang/jit_kernel/tests/test_pos_enc.py function burn_kernel (line 13) | def burn_kernel(out_ptr, iters: tl.constexpr): function triton_burn (line 29) | def triton_burn(ms: float, grid=(256,)): function create_test_inputs (line 36) | def create_test_inputs( function create_cos_sin_cache (line 59) | def create_cos_sin_cache(rotary_dim, max_position_embeddings, base, dtyp... function _apply_rotary_emb (line 86) | def _apply_rotary_emb( class RotaryEmbedding (line 115) | class RotaryEmbedding(torch.nn.Module): method __init__ (line 117) | def __init__( method _compute_inv_freq (line 138) | def _compute_inv_freq(self, base: Union[int, float]) -> torch.Tensor: method _compute_cos_sin_cache (line 147) | def _compute_cos_sin_cache(self) -> torch.Tensor: method forward_native (line 158) | def forward_native( function get_torch_rotary_embedding (line 199) | def get_torch_rotary_embedding( function get_sgl_rotary_embedding (line 213) | def get_sgl_rotary_embedding( function compare_results (line 234) | def compare_results(jit_out, sgl_out, dtype): function test_correctness (line 284) | def test_correctness( function test_performance (line 373) | def test_performance( FILE: python/sglang/jit_kernel/tests/test_qknorm.py function sglang_aot_qknorm (line 10) | def sglang_aot_qknorm( function sglang_jit_qknorm (line 25) | def sglang_jit_qknorm( function flashinfer_qknorm (line 36) | def flashinfer_qknorm( function torch_impl_qknorm (line 49) | def torch_impl_qknorm( function test_qknorm (line 82) | def test_qknorm(batch_size: int, n_k: int, n_q: int, head_dim: int) -> N... FILE: python/sglang/jit_kernel/tests/test_qknorm_across_heads.py function sglang_jit_qknorm_across_heads (line 10) | def sglang_jit_qknorm_across_heads( function sglang_aot_qknorm_across_heads (line 21) | def sglang_aot_qknorm_across_heads( function torch_impl_qknorm_across_heads (line 34) | def torch_impl_qknorm_across_heads( function test_qknorm_across_heads (line 61) | def test_qknorm_across_heads(batch_size: int, hidden_dim: int) -> None: FILE: python/sglang/jit_kernel/tests/test_qwen_image_modulation.py function _tol (line 23) | def _tol(dtype: torch.dtype) -> tuple[float, float]: function _make_modulation_tensors (line 29) | def _make_modulation_tensors(batch_size: int, hidden_size: int, dtype: t... function _baseline_select01_modulation (line 39) | def _baseline_select01_modulation( function _baseline_residual_select01_modulation (line 72) | def _baseline_residual_select01_modulation( function cuda_setup (line 109) | def cuda_setup(): function test_fused_layernorm_scale_shift_gate_select01 (line 119) | def test_fused_layernorm_scale_shift_gate_select01( function test_fused_residual_layernorm_scale_shift_gate_select01 (line 166) | def test_fused_residual_layernorm_scale_shift_gate_select01( FILE: python/sglang/jit_kernel/tests/test_renorm.py function test_top_k_renorm_probs (line 12) | def test_top_k_renorm_probs(batch_size, vocab_size, k): function test_top_p_renorm_probs (line 47) | def test_top_p_renorm_probs(batch_size, vocab_size, p): function test_top_k_mask_logits (line 82) | def test_top_k_mask_logits(batch_size, vocab_size, k, neginf_input): FILE: python/sglang/jit_kernel/tests/test_rmsnorm.py function sglang_jit_rmsnorm (line 10) | def sglang_jit_rmsnorm(input: torch.Tensor, weight: torch.Tensor) -> None: function flashinfer_rmsnorm (line 16) | def flashinfer_rmsnorm(input: torch.Tensor, weight: torch.Tensor) -> None: function test_rmsnorm (line 36) | def test_rmsnorm(batch_size: int, hidden_size: int) -> None: FILE: python/sglang/jit_kernel/tests/test_rope.py function create_cos_sin_cache (line 14) | def create_cos_sin_cache( function sglang_jit_rope (line 40) | def sglang_jit_rope( function flashinfer_rope (line 52) | def flashinfer_rope( function torch_impl_rope (line 75) | def torch_impl_rope( function test_rope (line 110) | def test_rope( function test_rope_position_dtypes (line 138) | def test_rope_position_dtypes(dtype: torch.dtype) -> None: function test_partial_rope (line 163) | def test_partial_rope(batch_size: int, is_neox: bool, rope_dim: int, hea... function test_fused_rope_store (line 190) | def test_fused_rope_store( FILE: python/sglang/jit_kernel/tests/test_store_cache.py function test_store_cache (line 24) | def test_store_cache(batch_size: int, element_dim: int) -> None: function test_store_cache_dtypes (line 49) | def test_store_cache_dtypes( function test_store_cache_int32_indices (line 68) | def test_store_cache_int32_indices(batch_size: int, element_dim: int) ->... function _valid_num_splits (line 82) | def _valid_num_splits(element_dim: int, dtype: torch.dtype) -> list: function test_store_cache_num_split (line 102) | def test_store_cache_num_split( function test_can_use_store_cache (line 119) | def test_can_use_store_cache() -> None: FILE: python/sglang/jit_kernel/tests/test_timestep_embedding.py function get_timestep_embedding_reference (line 37) | def get_timestep_embedding_reference( function test_timestep_embedding_correctness_with_sgld (line 73) | def test_timestep_embedding_correctness_with_sgld(batch_size, dim, dtype): function test_timestep_embedding_correctness_with_diffusers (line 91) | def test_timestep_embedding_correctness_with_diffusers( function test_timestep_embedding_perf (line 115) | def test_timestep_embedding_perf(): FILE: python/sglang/jit_kernel/timestep_embedding.py function _jit_timestep_embedding_module (line 14) | def _jit_timestep_embedding_module(dtype: torch.dtype) -> Module: function timestep_embedding (line 24) | def timestep_embedding( FILE: python/sglang/jit_kernel/utils.py function is_in_ci (line 17) | def is_in_ci() -> bool: function should_run_full_tests (line 22) | def should_run_full_tests() -> bool: function get_ci_test_range (line 26) | def get_ci_test_range(full_range: List[Any], ci_range: List[Any]) -> Lis... function cache_once (line 32) | def cache_once(fn: F) -> F: function _make_wrapper (line 49) | def _make_wrapper(tup: Tuple[str, str]) -> str: function _resolve_kernel_path (line 55) | def _resolve_kernel_path() -> pathlib.Path: class CPPArgList (line 86) | class CPPArgList(list[str]): method __str__ (line 87) | def __str__(self) -> str: function is_hip_runtime (line 104) | def is_hip_runtime() -> bool: function make_cpp_args (line 108) | def make_cpp_args(*args: CPP_TEMPLATE_TYPE) -> CPPArgList: function load_jit (line 121) | def load_jit( function is_arch_support_pdl (line 215) | def is_arch_support_pdl() -> bool: function _get_cuda_arch_value (line 223) | def _get_cuda_arch_value() -> int: function _get_cuda_arch_list (line 231) | def _get_cuda_arch_list() -> str: FILE: python/sglang/lang/api.py function function (line 23) | def function( function Runtime (line 35) | def Runtime(*args, **kwargs): function Engine (line 42) | def Engine(*args, **kwargs): function set_default_backend (line 49) | def set_default_backend(backend: BaseBackend): function flush_cache (line 53) | def flush_cache(backend: Optional[BaseBackend] = None): function get_server_info (line 64) | def get_server_info(backend: Optional[BaseBackend] = None): function gen (line 75) | def gen( function gen_int (line 142) | def gen_int( function gen_string (line 185) | def gen_string( function image (line 228) | def image(expr: SglExpr): function video (line 232) | def video(path: str, num_frames: int): function select (line 236) | def select( function _role_common (line 246) | def _role_common(name: str, expr: Optional[SglExpr] = None): function system (line 253) | def system(expr: Optional[SglExpr] = None): function user (line 257) | def user(expr: Optional[SglExpr] = None): function assistant (line 261) | def assistant(expr: Optional[SglExpr] = None): function system_begin (line 265) | def system_begin(): function system_end (line 269) | def system_end(): function user_begin (line 273) | def user_begin(): function user_end (line 277) | def user_end(): function assistant_begin (line 281) | def assistant_begin(): function assistant_end (line 285) | def assistant_end(): function separate_reasoning (line 289) | def separate_reasoning( FILE: python/sglang/lang/backend/anthropic.py class Anthropic (line 12) | class Anthropic(BaseBackend): method __init__ (line 13) | def __init__(self, model_name, *args, **kwargs): method get_chat_template (line 23) | def get_chat_template(self): method generate (line 26) | def generate( method generate_stream (line 51) | def generate_stream( FILE: python/sglang/lang/backend/base_backend.py class BaseBackend (line 9) | class BaseBackend: method __init__ (line 10) | def __init__(self) -> None: method get_model_name (line 14) | def get_model_name(self): method get_chat_template (line 17) | def get_chat_template(self): method cache_prefix (line 20) | def cache_prefix(self, prefix_str: str): method uncache_prefix (line 23) | def uncache_prefix(self, rid: str): method end_request (line 26) | def end_request(self, rid: Union[str, List[str]]): method begin_program (line 29) | def begin_program(self, s: StreamExecutor): method end_program (line 32) | def end_program(self, s: Union[StreamExecutor, List[StreamExecutor]]): method commit_lazy_operations (line 35) | def commit_lazy_operations(self, s: StreamExecutor): method fork_program (line 38) | def fork_program( method fill_image (line 46) | def fill_image(self, s: StreamExecutor): method generate (line 49) | def generate( method generate_stream (line 56) | def generate_stream( method select (line 63) | def select( method concatenate_and_append (line 72) | def concatenate_and_append(self, src_rids: List[str], dst_rid: str): method shutdown (line 75) | def shutdown(self): method flush_cache (line 78) | def flush_cache(self): method get_server_info (line 81) | def get_server_info(self): FILE: python/sglang/lang/backend/litellm.py class LiteLLM (line 15) | class LiteLLM(BaseBackend): method __init__ (line 16) | def __init__( method get_chat_template (line 47) | def get_chat_template(self): method generate (line 50) | def generate( method generate_stream (line 70) | def generate_stream( FILE: python/sglang/lang/backend/openai.py function create_logit_bias_int (line 25) | def create_logit_bias_int(tokenizer): class TokenUsage (line 48) | class TokenUsage: method reset (line 52) | def reset(self): class OpenAI (line 56) | class OpenAI(BaseBackend): method __init__ (line 57) | def __init__( method get_chat_template (line 106) | def get_chat_template(self): method _prepare_spec_execution (line 109) | def _prepare_spec_execution( method generate (line 140) | def generate( method spec_fill (line 224) | def spec_fill(self, value: str): method spec_pattern_match (line 228) | def spec_pattern_match(self, comp): method role_end_generate (line 248) | def role_end_generate( method generate_stream (line 283) | def generate_stream( method select (line 312) | def select( function openai_completion (line 383) | def openai_completion( function openai_completion_stream (line 425) | def openai_completion_stream( FILE: python/sglang/lang/backend/runtime_endpoint.py class RuntimeEndpoint (line 26) | class RuntimeEndpoint(BaseBackend): method __init__ (line 27) | def __init__( method get_model_name (line 56) | def get_model_name(self): method flush_cache (line 59) | def flush_cache(self): method get_server_info (line 68) | def get_server_info(self): method get_chat_template (line 77) | def get_chat_template(self): method cache_prefix (line 80) | def cache_prefix(self, prefix_str: str): method start_profile (line 89) | def start_profile(self): method stop_profile (line 97) | def stop_profile(self): method commit_lazy_operations (line 105) | def commit_lazy_operations(self, s: StreamExecutor): method fill_image (line 116) | def fill_image(self, s: StreamExecutor): method _handle_dtype_to_regex (line 127) | def _handle_dtype_to_regex(self, sampling_params: SglSamplingParams): method generate (line 159) | def generate( method generate_stream (line 198) | def generate_stream( method select (line 248) | def select( method concatenate_and_append (line 317) | def concatenate_and_append(self, src_rids: List[str], dst_rid: str): method _generate_http_request (line 326) | def _generate_http_request(self, s: StreamExecutor, data): method _add_images (line 337) | def _add_images(self, s: StreamExecutor, data): method _assert_success (line 342) | def _assert_success(self, res): function compute_normalized_prompt_logprobs (line 351) | def compute_normalized_prompt_logprobs(input_logprobs): class Runtime (line 356) | class Runtime: method __init__ (line 366) | def __init__( method shutdown (line 436) | def shutdown(self): method start_profile (line 443) | def start_profile(self): method stop_profile (line 446) | def stop_profile(self): method cache_prefix (line 449) | def cache_prefix(self, prefix: str): method get_tokenizer (line 452) | def get_tokenizer(self): method async_generate (line 462) | async def async_generate( method generate (line 500) | def generate( method encode (line 524) | def encode( method get_server_info (line 532) | async def get_server_info(self): method __del__ (line 543) | def __del__(self): FILE: python/sglang/lang/backend/vertexai.py class VertexAI (line 20) | class VertexAI(BaseBackend): method __init__ (line 21) | def __init__(self, model_name, safety_settings=None): method get_chat_template (line 35) | def get_chat_template(self): method generate (line 38) | def generate( method generate_stream (line 62) | def generate_stream( method text_to_vertexai_input (line 85) | def text_to_vertexai_input(self, text, images): method messages_to_vertexai_input (line 99) | def messages_to_vertexai_input(self, messages): FILE: python/sglang/lang/chat_template.py class ChatTemplateStyle (line 7) | class ChatTemplateStyle(Enum): class ChatTemplate (line 13) | class ChatTemplate: method get_prefix_and_suffix (line 22) | def get_prefix_and_suffix( method get_prompt (line 43) | def get_prompt(self, messages: List[Dict]) -> str: function register_chat_template (line 61) | def register_chat_template(template): function register_chat_template_matching_function (line 65) | def register_chat_template_matching_function(func): function get_chat_template (line 69) | def get_chat_template(name): function get_chat_template_by_model_path (line 73) | def get_chat_template_by_model_path(model_path): function match_deepseek (line 528) | def match_deepseek(model_path: str): function match_orion (line 536) | def match_orion(model_path: str): function match_deepseek_janus_pro (line 542) | def match_deepseek_janus_pro(model_path: str): function match_dbrx (line 548) | def match_dbrx(model_path: str): function match_vicuna (line 556) | def match_vicuna(model_path: str): function match_llama2_chat (line 562) | def match_llama2_chat(model_path: str): function match_mistral (line 572) | def match_mistral(model_path: str): function match_llama3_instruct (line 578) | def match_llama3_instruct(model_path: str): function match_chat_ml (line 584) | def match_chat_ml(model_path: str): function match_chat_yi (line 604) | def match_chat_yi(model_path: str): function match_gemma_it (line 614) | def match_gemma_it(model_path: str): function match_openbmb_minicpm (line 620) | def match_openbmb_minicpm(model_path: str): function match_c4ai_command_r (line 628) | def match_c4ai_command_r(model_path: str): function match_granite_instruct (line 634) | def match_granite_instruct(model_path: str): function match_gemma3_instruct (line 640) | def match_gemma3_instruct(model_path: str): function match_internvl_chat (line 646) | def match_internvl_chat(model_path: str): function match_interns1_chat (line 652) | def match_interns1_chat(model_path: str): FILE: python/sglang/lang/choices.py class ChoicesDecision (line 9) | class ChoicesDecision: class ChoicesSamplingMethod (line 14) | class ChoicesSamplingMethod(ABC): method requires_unconditional_logprobs (line 17) | def requires_unconditional_logprobs(self) -> bool: method __call__ (line 21) | def __call__( class TokenLengthNormalized (line 32) | class TokenLengthNormalized(ChoicesSamplingMethod): method __call__ (line 34) | def __call__( class GreedyTokenSelection (line 56) | class GreedyTokenSelection(ChoicesSamplingMethod): method __call__ (line 58) | def __call__( method _build_logprob_matrix (line 87) | def _build_logprob_matrix(self, input_token_logprobs, max_tokens, num_... method _greedy_selection (line 97) | def _greedy_selection(self, logprob_matrix, num_options, max_tokens): class UnconditionalLikelihoodNormalized (line 110) | class UnconditionalLikelihoodNormalized(ChoicesSamplingMethod): method requires_unconditional_logprobs (line 113) | def requires_unconditional_logprobs(self) -> bool: method __call__ (line 116) | def __call__( method _normalize_logprobs (line 150) | def _normalize_logprobs(self, input_token_logprobs, unconditional_toke... FILE: python/sglang/lang/interpreter.py function run_internal (line 42) | def run_internal(state, program, func_args, func_kwargs, sync): function run_program (line 57) | def run_program( function run_program_batch (line 93) | def run_program_batch( function _run_program_batch_generator (line 184) | def _run_program_batch_generator( function cache_program (line 242) | def cache_program(program, backend): class StreamExecutor (line 250) | class StreamExecutor: method __init__ (line 253) | def __init__( method submit (line 318) | def submit(self, expr: SglExpr): method sync (line 326) | def sync(self): method get_var (line 330) | def get_var(self, name): method set_var (line 335) | def set_var(self, name, value): method get_meta_info (line 338) | def get_meta_info(self, name, timeout=None): method fork (line 346) | def fork( method text (line 380) | def text(self): method messages (line 384) | def messages(self): method error (line 388) | def error(self): method end (line 392) | def end(self): method _thread_worker_func (line 398) | def _thread_worker_func(self): method _execute (line 437) | def _execute(self, other): method _execute_fill (line 481) | def _execute_fill(self, value: str, prefix=False): method _execute_image (line 500) | def _execute_image(self, expr: SglImage): method _execute_video (line 509) | def _execute_video(self, expr: SglVideo): method _spec_gen (line 519) | def _spec_gen(self, sampling_params): method _execute_gen (line 569) | def _execute_gen(self, expr: SglGen): method _execute_select (line 623) | def _execute_select(self, expr: SglSelect): method _execute_variable (line 636) | def _execute_variable(self, expr: SglVariable): method _execute_role_begin (line 641) | def _execute_role_begin(self, expr: SglRoleBegin): method _execute_role_end (line 659) | def _execute_role_end(self, expr: SglRoleEnd): method _execute_var_scope_begin (line 695) | def _execute_var_scope_begin(self, expr: SglVarScopeBegin): method _execute_var_scope_end (line 698) | def _execute_var_scope_end(self, expr: SglVarScopeEnd): method _execute_commit_lazy_operations (line 702) | def _execute_commit_lazy_operations(self, expr: SglCommitLazy): method _execute_concatenate_and_append_text (line 705) | def _execute_concatenate_and_append_text(self, expr: SglConcateAndAppe... method _execute_concatenate_and_append_kv_cache (line 714) | def _execute_concatenate_and_append_kv_cache(self, expr: SglConcateAnd... method _execute_separate_reasoning (line 730) | def _execute_separate_reasoning(self, expr: SglSeparateReasoning): method _init_var_event (line 764) | def _init_var_event(self, expr): method _resolve_sampling_params (line 775) | def _resolve_sampling_params(self, sampling_params): method __del__ (line 824) | def __del__(self): class ProgramState (line 828) | class ProgramState: method __init__ (line 831) | def __init__(self, stream_executor: StreamExecutor): method _role_common (line 834) | def _role_common(self, name: str, expr: Optional[SglExpr] = None): method system (line 849) | def system(self, expr: Optional[SglExpr] = None): method user (line 852) | def user(self, expr: Optional[SglExpr] = None): method assistant (line 855) | def assistant(self, expr: Optional[SglExpr] = None): method var_scope (line 859) | def var_scope(self, name: str): method fork (line 864) | def fork( method copy (line 875) | def copy(self, position_ids_offset: Optional[List[int]] = None): method text (line 882) | def text(self): method messages (line 885) | def messages(self): method sync (line 888) | def sync(self): method error (line 891) | def error(self): method text_iter (line 894) | def text_iter(self, var_name: Optional[str] = None): method text_async_iter (line 932) | async def text_async_iter( method get_var (line 977) | def get_var(self, name): method set_var (line 980) | def set_var(self, name, value): method get_meta_info (line 983) | def get_meta_info(self, name): method __iadd__ (line 986) | def __iadd__(self, other): method __getitem__ (line 992) | def __getitem__(self, name): method __setitem__ (line 995) | def __setitem__(self, name, value): method __contains__ (line 998) | def __contains__(self, name): method __del__ (line 1001) | def __del__(self): method __repr__ (line 1004) | def __repr__(self) -> str: class ProgramStateGroup (line 1008) | class ProgramStateGroup: method __init__ (line 1009) | def __init__( method join (line 1015) | def join(self, mode: str = "gather_variable"): method __getitem__ (line 1041) | def __getitem__(self, i: int): method __setitem__ (line 1044) | def __setitem__(self, i: int, value): method __iadd__ (line 1047) | def __iadd__(self, other): FILE: python/sglang/lang/ir.py class SglSamplingParams (line 18) | class SglSamplingParams: method clone (line 42) | def clone(self): method to_openai_kwargs (line 64) | def to_openai_kwargs(self): method to_vertexai_kwargs (line 79) | def to_vertexai_kwargs(self): method to_anthropic_kwargs (line 93) | def to_anthropic_kwargs(self): method to_litellm_kwargs (line 109) | def to_litellm_kwargs(self): method to_srt_kwargs (line 121) | def to_srt_kwargs(self): class SglFunction (line 141) | class SglFunction: method __init__ (line 142) | def __init__(self, func, num_api_spec_tokens=None, bind_arguments=None): method bind (line 154) | def bind(self, **kwargs): method run (line 160) | def run( method run_batch (line 223) | def run_batch( method trace (line 304) | def trace(self, *, backend=None, **kwargs): method cache (line 310) | def cache(self, backend=None): method __call__ (line 316) | def __call__(self, *args, **kwargs): class SglExpr (line 327) | class SglExpr: method __init__ (line 330) | def __init__(self): method __add__ (line 336) | def __add__(self, other): method __radd__ (line 343) | def __radd__(self, other): method concatenate_ir (line 350) | def concatenate_ir(self, a, b): method print_graph_dfs (line 361) | def print_graph_dfs(self): class SglExprList (line 397) | class SglExprList(SglExpr): method __init__ (line 398) | def __init__(self, expr_list: List[SglExpr]): method __repr__ (line 402) | def __repr__(self): class SglArgument (line 406) | class SglArgument(SglExpr): method __init__ (line 407) | def __init__(self, name: str, value: str): method __repr__ (line 412) | def __repr__(self): method __len__ (line 415) | def __len__(self): method __getitem__ (line 418) | def __getitem__(self, i): method __int__ (line 421) | def __int__(self): method __bool__ (line 424) | def __bool__(self): method __format__ (line 427) | def __format__(self, *args): class SglImage (line 434) | class SglImage(SglExpr): method __init__ (line 435) | def __init__(self, path: str): method __repr__ (line 438) | def __repr__(self) -> str: class SglVideo (line 442) | class SglVideo(SglExpr): method __init__ (line 443) | def __init__(self, path: str, num_frames: int): method __repr__ (line 447) | def __repr__(self) -> str: class SglGen (line 451) | class SglGen(SglExpr): method __init__ (line 452) | def __init__( method __repr__ (line 502) | def __repr__(self): class SglConstantText (line 506) | class SglConstantText(SglExpr): method __init__ (line 507) | def __init__(self, value: str): method __repr__ (line 511) | def __repr__(self): class SglRoleBegin (line 515) | class SglRoleBegin(SglExpr): method __init__ (line 516) | def __init__(self, role: str): method __repr__ (line 520) | def __repr__(self): class SglRoleEnd (line 524) | class SglRoleEnd(SglExpr): method __init__ (line 525) | def __init__(self, role: str): method __repr__ (line 529) | def __repr__(self): class SglSelect (line 533) | class SglSelect(SglExpr): method __init__ (line 535) | def __init__( method __repr__ (line 548) | def __repr__(self): class SglFork (line 552) | class SglFork(SglExpr): method __init__ (line 553) | def __init__(self, number: int, position_ids_offset=None): method __repr__ (line 558) | def __repr__(self): class SglGetForkItem (line 565) | class SglGetForkItem(SglExpr): method __init__ (line 566) | def __init__(self, index: int): method __repr__ (line 570) | def __repr__(self): class SglVariable (line 574) | class SglVariable(SglExpr): method __init__ (line 575) | def __init__(self, name: str, source): method __repr__ (line 580) | def __repr__(self): class SglVarScopeBegin (line 584) | class SglVarScopeBegin(SglExpr): method __init__ (line 585) | def __init__(self, name: str): method __repr__ (line 589) | def __repr__(self): class SglVarScopeEnd (line 593) | class SglVarScopeEnd(SglExpr): method __init__ (line 594) | def __init__(self, name: str): method __repr__ (line 598) | def __repr__(self): class SglConcateAndAppend (line 602) | class SglConcateAndAppend(SglExpr): method __init__ (line 603) | def __init__(self, states): method __repr__ (line 607) | def __repr__(self): class SglCommitLazy (line 611) | class SglCommitLazy(SglExpr): method __init__ (line 612) | def __init__(self): method __repr__ (line 615) | def __repr__(self): class SglSeparateReasoning (line 619) | class SglSeparateReasoning(SglExpr): method __init__ (line 620) | def __init__(self, model_type: str, expr: SglExpr): method process_name_for_reasoning (line 628) | def process_name_for_reasoning(self, name): method _process_expr (line 633) | def _process_expr(self, expr): method __repr__ (line 642) | def __repr__(self): FILE: python/sglang/lang/tracer.py class StopTracing (line 25) | class StopTracing(Exception): function extract_prefix_by_tracing (line 29) | def extract_prefix_by_tracing(program, backend): function trace_program (line 54) | def trace_program(program, arguments, backend): class TracerProgramState (line 75) | class TracerProgramState(ProgramState): method __init__ (line 76) | def __init__(self, backend, arguments, only_trace_prefix): method fork (line 108) | def fork(self, size: int = 1, position_ids_offset: Optional[List[int]]... method _append_node (line 139) | def _append_node(self, other: SglExpr): method _execute (line 144) | def _execute(self, other: SglExpr): method __iadd__ (line 175) | def __iadd__(self, other): method _execute_fill (line 179) | def _execute_fill(self, expr: SglConstantText): method _execute_gen (line 184) | def _execute_gen(self, expr: SglGen): method _execute_select (line 190) | def _execute_select(self, expr: SglSelect): method _execute_role_begin (line 198) | def _execute_role_begin(self, expr: SglRoleBegin): method _execute_role_end (line 217) | def _execute_role_end(self, expr: SglRoleEnd): method _execute_var_scope_end (line 228) | def _execute_var_scope_end(self, expr: SglVarScopeEnd): method get_var (line 232) | def get_var(self, name): method flatten_nodes (line 240) | def flatten_nodes(self): method __del__ (line 253) | def __del__(self): class TracingScope (line 257) | class TracingScope: method __init__ (line 260) | def __init__(self, tracer_state: TracerProgramState): method __enter__ (line 264) | def __enter__(self): method __exit__ (line 268) | def __exit__(self, exc_type, exc_value, traceback): method get_current_scope (line 272) | def get_current_scope(): method add_child_state (line 275) | def add_child_state(self, state: TracerProgramState): FILE: python/sglang/launch_server.py function run_server (line 15) | def run_server(server_args): FILE: python/sglang/multimodal_gen/.claude/skills/diffusion-kernel/scripts/bench_diffusion_denoise.py function required_gpus_for_model (line 230) | def required_gpus_for_model(model_key: str) -> int: function build_sglang_cmd (line 238) | def build_sglang_cmd( function run_benchmark_once (line 284) | def run_benchmark_once( function print_results_table (line 377) | def print_results_table(results: list[dict]): function inject_kernels_example (line 420) | def inject_kernels_example(): function main (line 475) | def main(): FILE: python/sglang/multimodal_gen/.claude/skills/diffusion-kernel/scripts/bench_diffusion_rmsnorm.py function pytorch_rmsnorm (line 52) | def pytorch_rmsnorm( function benchmark_kernel (line 64) | def benchmark_kernel( function run_benchmark (line 86) | def run_benchmark(): FILE: python/sglang/multimodal_gen/.claude/skills/diffusion-kernel/scripts/diffusion_skill_env.py function get_repo_root (line 16) | def get_repo_root() -> Path: function get_assets_dir (line 22) | def get_assets_dir(repo_root: Path | None = None) -> Path: function get_output_dir (line 27) | def get_output_dir(name: str, repo_root: Path | None = None) -> Path: function ensure_dir (line 34) | def ensure_dir(path: Path) -> Path: function check_write_access (line 39) | def check_write_access(repo_root: Path | None = None) -> Path: function _run_nvidia_smi (line 47) | def _run_nvidia_smi(query: str) -> list[list[str]]: function get_gpu_inventory (line 63) | def get_gpu_inventory() -> list[dict[str, int | str]]: function get_busy_gpu_uuids (line 79) | def get_busy_gpu_uuids() -> set[str]: function pick_idle_gpus (line 84) | def pick_idle_gpus( function configure_runtime_env (line 107) | def configure_runtime_env(required_gpus: int = 1) -> str | None: function main (line 116) | def main() -> None: FILE: python/sglang/multimodal_gen/apps/ComfyUI_SGLDiffusion/core/generator.py class SGLDiffusionGenerator (line 35) | class SGLDiffusionGenerator: method __init__ (line 38) | def __init__(self): method __del__ (line 57) | def __del__(self): method init_generator (line 60) | def init_generator( method kill_generator (line 77) | def kill_generator(self): method close_generator (line 115) | def close_generator(self): method get_comfyui_model (line 126) | def get_comfyui_model(self, model_path: str, model_options: dict = None): method load_model (line 185) | def load_model( FILE: python/sglang/multimodal_gen/apps/ComfyUI_SGLDiffusion/core/model_patcher.py class SGLDModelPatcher (line 10) | class SGLDModelPatcher(ModelPatcher): method __init__ (line 13) | def __init__( method clone (line 32) | def clone(self): method model_size (line 53) | def model_size(self): method load (line 60) | def load( method patch_model (line 70) | def patch_model( method unpatch_model (line 80) | def unpatch_model(self, device_to=None, unpatch_weights=True): FILE: python/sglang/multimodal_gen/apps/ComfyUI_SGLDiffusion/core/server_api.py class SGLDiffusionServerAPI (line 16) | class SGLDiffusionServerAPI: method __init__ (line 19) | def __init__(self, base_url: str, api_key: str = "sk-proj-1234567890"): method get_model_info (line 41) | def get_model_info(self) -> Dict[str, Any]: method generate_image (line 63) | def generate_image( method generate_video (line 205) | def generate_video( method _build_image_common_params (line 351) | def _build_image_common_params( method _get_content_type (line 399) | def _get_content_type(self, file_path: str) -> str: method decode_image_from_response (line 410) | def decode_image_from_response( method set_lora (line 442) | def set_lora( method unset_lora (line 489) | def unset_lora( FILE: python/sglang/multimodal_gen/apps/ComfyUI_SGLDiffusion/executors/base.py class SGLDiffusionExecutor (line 8) | class SGLDiffusionExecutor(torch.nn.Module): method __init__ (line 11) | def __init__(self, generator, model_path, model, config): method should_suppress_logs (line 21) | def should_suppress_logs(timestep): method set_lora (line 27) | def set_lora(self, lora_nickname=None, lora_path=None, strength=None, ... method _unpack_latents (line 37) | def _unpack_latents(self, latents, height, width, channels): method _pack_latents (line 46) | def _pack_latents(self, latents): FILE: python/sglang/multimodal_gen/apps/ComfyUI_SGLDiffusion/executors/flux.py class FluxExecutor (line 18) | class FluxExecutor(SGLDiffusionExecutor): method __init__ (line 21) | def __init__(self, generator, model_path, model, config): method forward (line 24) | def forward(self, x, timestep, context, y=None, guidance=None, **kwargs): FILE: python/sglang/multimodal_gen/apps/ComfyUI_SGLDiffusion/executors/qwen_image.py class QwenImageExecutor (line 20) | class QwenImageExecutor(SGLDiffusionExecutor): method __init__ (line 23) | def __init__(self, generator, model_path, model, config): method _pack_latents (line 27) | def _pack_latents(self, x): method _unpack_latents (line 52) | def _unpack_latents(self, latents, num_embeds, orig_shape, x): method forward (line 67) | def forward(self, x, timestep, context, **kwargs): class QwenImageEditExecutor (line 108) | class QwenImageEditExecutor(QwenImageExecutor): method __init__ (line 111) | def __init__(self, generator, model_path, model, config): method forward (line 114) | def forward( FILE: python/sglang/multimodal_gen/apps/ComfyUI_SGLDiffusion/executors/zimage.py class ZImageExecutor (line 18) | class ZImageExecutor(SGLDiffusionExecutor): method __init__ (line 21) | def __init__(self, generator, model_path, model, config): method forward (line 24) | def forward(self, x, timesteps, context, **kwargs): FILE: python/sglang/multimodal_gen/apps/ComfyUI_SGLDiffusion/nodes.py class SGLDOptions (line 21) | class SGLDOptions: method INPUT_TYPES (line 23) | def INPUT_TYPES(cls): method create_options (line 72) | def create_options( class SGLDLoraLoader (line 113) | class SGLDLoraLoader: method INPUT_TYPES (line 115) | def INPUT_TYPES(cls): method load_lora (line 137) | def load_lora( class SGLDUNETLoader (line 166) | class SGLDUNETLoader: method __init__ (line 167) | def __init__(self): method INPUT_TYPES (line 171) | def INPUT_TYPES(s): method load_unet (line 187) | def load_unet(self, unet_name, weight_dtype, sgld_options: dict = None): class SGLDiffusionServerModel (line 202) | class SGLDiffusionServerModel: method INPUT_TYPES (line 206) | def INPUT_TYPES(cls): method load_server (line 231) | def load_server(self, base_url: str, api_key: str): class SGLDiffusionGenerateImage (line 246) | class SGLDiffusionGenerateImage: method INPUT_TYPES (line 250) | def INPUT_TYPES(cls): method generate_image (line 336) | def generate_image( class SGLDiffusionGenerateVideo (line 397) | class SGLDiffusionGenerateVideo: method INPUT_TYPES (line 401) | def INPUT_TYPES(cls): method generate_video (line 514) | def generate_video( class SGLDiffusionServerSetLora (line 579) | class SGLDiffusionServerSetLora: method INPUT_TYPES (line 583) | def INPUT_TYPES(cls): method set_lora (line 624) | def set_lora( class SGLDiffusionServerUnsetLora (line 650) | class SGLDiffusionServerUnsetLora: method INPUT_TYPES (line 654) | def INPUT_TYPES(cls): method unset_lora (line 681) | def unset_lora( FILE: python/sglang/multimodal_gen/apps/ComfyUI_SGLDiffusion/test/test_flux_pipeline.py function test_comfyui_flux_pipeline_direct (line 13) | def test_comfyui_flux_pipeline_direct() -> None: FILE: python/sglang/multimodal_gen/apps/ComfyUI_SGLDiffusion/test/test_qwen_image_edit_pipeline.py function test_comfyui_qwen_image_edit_pipeline_direct (line 13) | def test_comfyui_qwen_image_edit_pipeline_direct() -> None: FILE: python/sglang/multimodal_gen/apps/ComfyUI_SGLDiffusion/test/test_qwen_image_pipeline.py function test_comfyui_qwen_image_pipeline_direct (line 13) | def test_comfyui_qwen_image_pipeline_direct() -> None: FILE: python/sglang/multimodal_gen/apps/ComfyUI_SGLDiffusion/test/test_zimage_pipeline.py function test_comfyui_zimage_pipeline_direct (line 13) | def test_comfyui_zimage_pipeline_direct() -> None: FILE: python/sglang/multimodal_gen/apps/ComfyUI_SGLDiffusion/utils.py function _ensure_dir (line 15) | def _ensure_dir(path: str) -> None: function _to_numpy_image (line 19) | def _to_numpy_image(image: torch.Tensor) -> np.ndarray: function _to_hwc_tensor (line 35) | def _to_hwc_tensor(image: torch.Tensor) -> torch.Tensor: function is_empty_image (line 52) | def is_empty_image(image: torch.Tensor, tolerance: float = 1e-6) -> bool: function get_image_path (line 80) | def get_image_path(image: torch.Tensor) -> str: function convert_b64_to_tensor_image (line 100) | def convert_b64_to_tensor_image(b64_image: str) -> torch.Tensor: class SGLDVideoInput (line 131) | class SGLDVideoInput(VideoInput): method __init__ (line 132) | def __init__(self, video_path: str, height: int, width: int): method get_dimensions (line 139) | def get_dimensions(self) -> tuple[int, int]: method get_components (line 148) | def get_components(self): method save_to (line 155) | def save_to(self, path: str, format=None, codec=None, metadata=None): function convert_video_to_comfy_video (line 169) | def convert_video_to_comfy_video( FILE: python/sglang/multimodal_gen/apps/webui/main.py function add_webui_args (line 20) | def add_webui_args(parser: argparse.ArgumentParser): function run_sgl_diffusion_webui (line 27) | def run_sgl_diffusion_webui(server_args: ServerArgs): FILE: python/sglang/multimodal_gen/benchmarks/bench_offline_throughput.py class BatchOutput (line 50) | class BatchOutput: class BenchArgs (line 63) | class BenchArgs: method add_cli_args (line 91) | def add_cli_args(parser: argparse.ArgumentParser): method from_cli_args (line 171) | def from_cli_args(cls, args: argparse.Namespace): function initialize_engine (line 177) | def initialize_engine(server_args: ServerArgs) -> DiffGenerator: function generate_batch (line 185) | def generate_batch( function calculate_metrics (line 225) | def calculate_metrics( function throughput_test (line 265) | def throughput_test( function display_results (line 349) | def display_results( function save_results (line 390) | def save_results( function main (line 421) | def main(): FILE: python/sglang/multimodal_gen/benchmarks/bench_serving.py function _compute_scale_factor (line 50) | def _compute_scale_factor(req: RequestFuncInput, args) -> Optional[float]: function _compute_expected_latency_ms_from_base (line 66) | def _compute_expected_latency_ms_from_base( function _infer_slo_base_time_ms_from_warmups (line 78) | def _infer_slo_base_time_ms_from_warmups( function _populate_slo_ms_from_warmups (line 99) | def _populate_slo_ms_from_warmups( function async_request_image_sglang (line 129) | async def async_request_image_sglang( function async_request_video_sglang (line 223) | async def async_request_video_sglang( function calculate_metrics (line 380) | def calculate_metrics( function wait_for_service (line 434) | def wait_for_service(base_url: str, timeout: int = 1200) -> None: function benchmark (line 455) | async def benchmark(args): FILE: python/sglang/multimodal_gen/benchmarks/compare_perf.py function calculate_diff (line 9) | def calculate_diff(base: float, new: float) -> Tuple[float, float]: function calculate_upper_bound (line 19) | def calculate_upper_bound(baseline: float, rel_tol: float, min_abs_tol: ... function calculate_lower_bound (line 26) | def calculate_lower_bound(baseline: float, rel_tol: float, min_abs_tol: ... function get_perf_status_emoji (line 33) | def get_perf_status_emoji( function consolidate_steps (line 57) | def consolidate_steps( function _load_benchmark_file (line 106) | def _load_benchmark_file(file_path: str) -> Dict[str, Any]: function _get_status_emoji_from_diff_percent (line 112) | def _get_status_emoji_from_diff_percent(diff_pct): function _print_single_comparison_report (line 121) | def _print_single_comparison_report( function _print_multi_comparison_report (line 168) | def _print_multi_comparison_report( function compare_benchmarks (line 214) | def compare_benchmarks(file_paths: List[str], output_format: str = "mark... FILE: python/sglang/multimodal_gen/benchmarks/datasets.py class RequestFuncInput (line 20) | class RequestFuncInput: class RequestFuncOutput (line 36) | class RequestFuncOutput: function is_dir_not_empty (line 46) | def is_dir_not_empty(path: str) -> bool: class BaseDataset (line 50) | class BaseDataset(ABC): method __init__ (line 51) | def __init__(self, args, api_url: str = "", model: str = ""): method __len__ (line 58) | def __len__(self) -> int: method __getitem__ (line 62) | def __getitem__(self, idx: int) -> RequestFuncInput: method get_requests (line 65) | def get_requests(self) -> List[RequestFuncInput]: class VBenchDataset (line 69) | class VBenchDataset(BaseDataset): method __init__ (line 78) | def __init__(self, args, api_url: str = "", model: str = ""): method _load_data (line 83) | def _load_data(self) -> List[Dict[str, Any]]: method _download_file (line 93) | def _download_file(self, url: str, dest_path: str) -> None: method _load_t2v_prompts (line 101) | def _load_t2v_prompts(self) -> List[Dict[str, Any]]: method _auto_download_i2v_dataset (line 123) | def _auto_download_i2v_dataset(self) -> Optional[str]: method _load_from_i2v_json (line 176) | def _load_from_i2v_json(self, json_path: str) -> List[Dict[str, Any]]: method _scan_directory_for_images (line 197) | def _scan_directory_for_images(self, path: str) -> List[Dict[str, Any]]: method _create_dummy_data (line 216) | def _create_dummy_data(self) -> List[Dict[str, Any]]: method _load_i2v_data (line 229) | def _load_i2v_data(self) -> List[Dict[str, Any]]: method _resize_data (line 256) | def _resize_data(self, data: List[Dict[str, Any]]) -> List[Dict[str, A... method __len__ (line 267) | def __len__(self) -> int: method __getitem__ (line 270) | def __getitem__(self, idx: int) -> RequestFuncInput: class RandomDataset (line 284) | class RandomDataset(BaseDataset): method __init__ (line 285) | def __init__(self, args, api_url: str = "", model: str = ""): method __len__ (line 289) | def __len__(self) -> int: method __getitem__ (line 292) | def __getitem__(self, idx: int) -> RequestFuncInput: FILE: python/sglang/multimodal_gen/configs/models/adapter/base.py class AdapterArchConfig (line 10) | class AdapterArchConfig(ArchConfig): method __post_init__ (line 39) | def __post_init__(self) -> None: class AdapterConfig (line 45) | class AdapterConfig(ModelConfig): method add_cli_args (line 52) | def add_cli_args(parser: Any, prefix: str = "dit-config") -> Any: FILE: python/sglang/multimodal_gen/configs/models/adapter/ltx_2_connector.py class LTX2ConnectorArchConfig (line 10) | class LTX2ConnectorArchConfig(AdapterArchConfig): class LTX2ConnectorConfig (line 29) | class LTX2ConnectorConfig(AdapterConfig): FILE: python/sglang/multimodal_gen/configs/models/base.py class ArchConfig (line 16) | class ArchConfig: method __getattr__ (line 22) | def __getattr__(self, name: str): method __setattr__ (line 31) | def __setattr__(self, key, value): class ModelConfig (line 44) | class ModelConfig: method __getattr__ (line 52) | def __getattr__(self, name): method __getstate__ (line 60) | def __getstate__(self): method __setstate__ (line 66) | def __setstate__(self, state): method update_model_arch (line 71) | def update_model_arch(self, source_model_dict: dict[str, Any]) -> None: method update_model_config (line 83) | def update_model_config(self, source_model_dict: dict[str, Any]) -> None: FILE: python/sglang/multimodal_gen/configs/models/bridges/mova_dual_tower.py function _is_conditioner_block (line 9) | def _is_conditioner_block(name: str, module) -> bool: class MOVADualTowerArchConfig (line 15) | class MOVADualTowerArchConfig(DiTArchConfig): method __post_init__ (line 34) | def __post_init__(self): class MOVADualTowerConfig (line 41) | class MOVADualTowerConfig(DiTConfig): FILE: python/sglang/multimodal_gen/configs/models/dits/base.py class DiTArchConfig (line 13) | class DiTArchConfig(ArchConfig): method __post_init__ (line 47) | def __post_init__(self) -> None: class DiTConfig (line 53) | class DiTConfig(ModelConfig): method add_cli_args (line 61) | def add_cli_args(parser: Any, prefix: str = "dit-config") -> Any: FILE: python/sglang/multimodal_gen/configs/models/dits/flux.py class FluxArchConfig (line 11) | class FluxArchConfig(DiTArchConfig): method __post_init__ (line 68) | def __post_init__(self): class FluxConfig (line 76) | class FluxConfig(DiTConfig): FILE: python/sglang/multimodal_gen/configs/models/dits/glmimage.py class GlmImageArchConfig (line 7) | class GlmImageArchConfig(DiTArchConfig): method __post_init__ (line 28) | def __post_init__(self): class GlmImageDitConfig (line 36) | class GlmImageDitConfig(DiTConfig): FILE: python/sglang/multimodal_gen/configs/models/dits/helios.py function is_blocks (line 7) | def is_blocks(n: str, m) -> bool: class HeliosArchConfig (line 12) | class HeliosArchConfig(DiTArchConfig): method __post_init__ (line 69) | def __post_init__(self): class HeliosConfig (line 77) | class HeliosConfig(DiTConfig): FILE: python/sglang/multimodal_gen/configs/models/dits/hunyuan3d.py class Hunyuan3DDiTArchConfig (line 8) | class Hunyuan3DDiTArchConfig(DiTArchConfig): method __post_init__ (line 33) | def __post_init__(self) -> None: class Hunyuan3DDiTConfig (line 40) | class Hunyuan3DDiTConfig(DiTConfig): FILE: python/sglang/multimodal_gen/configs/models/dits/hunyuanvideo.py function is_double_block (line 11) | def is_double_block(n: str, m) -> bool: function is_single_block (line 15) | def is_single_block(n: str, m) -> bool: function is_refiner_block (line 19) | def is_refiner_block(n: str, m) -> bool: function is_txt_in (line 23) | def is_txt_in(n: str, m) -> bool: class HunyuanVideoArchConfig (line 28) | class HunyuanVideoArchConfig(DiTArchConfig): method __post_init__ (line 174) | def __post_init__(self): class HunyuanVideoConfig (line 181) | class HunyuanVideoConfig(DiTConfig): FILE: python/sglang/multimodal_gen/configs/models/dits/ltx_2.py class LTXModelType (line 8) | class LTXModelType(Enum): method is_video_enabled (line 20) | def is_video_enabled(self) -> bool: method is_audio_enabled (line 23) | def is_audio_enabled(self) -> bool: class LTX2RopeType (line 27) | class LTX2RopeType(str, Enum): class LTX2AttentionFunction (line 39) | class LTX2AttentionFunction(str, Enum): function is_blocks (line 50) | def is_blocks(n: str, m) -> bool: class LTX2ArchConfig (line 55) | class LTX2ArchConfig(DiTArchConfig): method __post_init__ (line 154) | def __post_init__(self): class LTX2Config (line 171) | class LTX2Config(DiTConfig): FILE: python/sglang/multimodal_gen/configs/models/dits/mova_audio.py function _is_blocks (line 9) | def _is_blocks(n: str, m) -> bool: class MOVAAudioArchConfig (line 14) | class MOVAAudioArchConfig(DiTArchConfig): method __post_init__ (line 54) | def __post_init__(self): class MOVAAudioConfig (line 65) | class MOVAAudioConfig(DiTConfig): FILE: python/sglang/multimodal_gen/configs/models/dits/mova_video.py function _is_blocks (line 9) | def _is_blocks(n: str, m) -> bool: class MOVAVideoArchConfig (line 14) | class MOVAVideoArchConfig(DiTArchConfig): method __post_init__ (line 53) | def __post_init__(self): class MOVAVideoConfig (line 64) | class MOVAVideoConfig(DiTConfig): FILE: python/sglang/multimodal_gen/configs/models/dits/qwenimage.py class QwenImageArchConfig (line 11) | class QwenImageArchConfig(DiTArchConfig): method __post_init__ (line 38) | def __post_init__(self): class QwenImageEditPlus_2511_ArchConfig (line 46) | class QwenImageEditPlus_2511_ArchConfig(QwenImageArchConfig): class QwenImageDitConfig (line 51) | class QwenImageDitConfig(DiTConfig): class QwenImageEditPlus_2511_DitConfig (line 58) | class QwenImageEditPlus_2511_DitConfig(DiTConfig): FILE: python/sglang/multimodal_gen/configs/models/dits/sana.py class SanaArchConfig (line 21) | class SanaArchConfig(DiTArchConfig): method __post_init__ (line 48) | def __post_init__(self): class SanaConfig (line 55) | class SanaConfig(DiTConfig): FILE: python/sglang/multimodal_gen/configs/models/dits/wanvideo.py function is_blocks (line 9) | def is_blocks(n: str, m) -> bool: class WanVideoArchConfig (line 14) | class WanVideoArchConfig(DiTArchConfig): method __post_init__ (line 94) | def __post_init__(self): class WanVideoConfig (line 102) | class WanVideoConfig(DiTConfig): FILE: python/sglang/multimodal_gen/configs/models/dits/zimage.py function is_zimage_layer (line 10) | def is_zimage_layer(n: str, m) -> bool: class ZImageArchConfig (line 22) | class ZImageArchConfig(DiTArchConfig): method __post_init__ (line 67) | def __post_init__(self): class ZImageDitConfig (line 75) | class ZImageDitConfig(DiTConfig): FILE: python/sglang/multimodal_gen/configs/models/encoders/base.py class EncoderArchConfig (line 15) | class EncoderArchConfig(ArchConfig): class TextEncoderArchConfig (line 30) | class TextEncoderArchConfig(EncoderArchConfig): method __post_init__ (line 49) | def __post_init__(self) -> None: class ImageEncoderArchConfig (line 58) | class ImageEncoderArchConfig(EncoderArchConfig): class BaseEncoderOutput (line 63) | class BaseEncoderOutput: class EncoderConfig (line 72) | class EncoderConfig(ModelConfig): class TextEncoderConfig (line 81) | class TextEncoderConfig(EncoderConfig): class ImageEncoderConfig (line 91) | class ImageEncoderConfig(EncoderConfig): FILE: python/sglang/multimodal_gen/configs/models/encoders/clip.py function _is_transformer_layer (line 15) | def _is_transformer_layer(n: str, m) -> bool: function _is_embeddings (line 19) | def _is_embeddings(n: str, m) -> bool: class CLIPTextArchConfig (line 24) | class CLIPTextArchConfig(TextEncoderArchConfig): class CLIPVisionArchConfig (line 61) | class CLIPVisionArchConfig(ImageEncoderArchConfig): class CLIPTextConfig (line 87) | class CLIPTextConfig(TextEncoderConfig): class CLIPVisionConfig (line 96) | class CLIPVisionConfig(ImageEncoderConfig): FILE: python/sglang/multimodal_gen/configs/models/encoders/gemma2.py function _is_transformer_layer (line 19) | def _is_transformer_layer(n: str, m) -> bool: function _is_embeddings (line 23) | def _is_embeddings(n: str, m) -> bool: function _is_final_norm (line 27) | def _is_final_norm(n: str, m) -> bool: class Gemma2ArchConfig (line 32) | class Gemma2ArchConfig(TextEncoderArchConfig): class Gemma2Config (line 85) | class Gemma2Config(TextEncoderConfig): FILE: python/sglang/multimodal_gen/configs/models/encoders/gemma_3.py function _is_transformer_layer (line 13) | def _is_transformer_layer(n: str, m) -> bool: function _is_embeddings (line 17) | def _is_embeddings(n: str, m) -> bool: function _is_final_norm (line 21) | def _is_final_norm(n: str, m) -> bool: class Gemma3ArchConfig (line 26) | class Gemma3ArchConfig(TextEncoderArchConfig): class Gemma3Config (line 78) | class Gemma3Config(TextEncoderConfig): FILE: python/sglang/multimodal_gen/configs/models/encoders/llama.py function _is_transformer_layer (line 12) | def _is_transformer_layer(n: str, m) -> bool: function _is_embeddings (line 16) | def _is_embeddings(n: str, m) -> bool: function _is_final_norm (line 20) | def _is_final_norm(n: str, m) -> bool: class LlamaArchConfig (line 25) | class LlamaArchConfig(TextEncoderArchConfig): class LlamaConfig (line 66) | class LlamaConfig(TextEncoderConfig): FILE: python/sglang/multimodal_gen/configs/models/encoders/qwen3.py function _is_transformer_layer (line 12) | def _is_transformer_layer(n: str, m) -> bool: function _is_embeddings (line 16) | def _is_embeddings(n: str, m) -> bool: function _is_final_norm (line 20) | def _is_final_norm(n: str, m) -> bool: class Qwen3TextArchConfig (line 25) | class Qwen3TextArchConfig(TextEncoderArchConfig): method __post_init__ (line 72) | def __post_init__(self) -> None: class Qwen3TextConfig (line 82) | class Qwen3TextConfig(TextEncoderConfig): FILE: python/sglang/multimodal_gen/configs/models/encoders/qwen_image.py function _is_transformer_layer (line 12) | def _is_transformer_layer(n: str, m) -> bool: function _is_embeddings (line 16) | def _is_embeddings(n: str, m) -> bool: function _is_final_norm (line 20) | def _is_final_norm(n: str, m) -> bool: class QwenImageArchConfig (line 25) | class QwenImageArchConfig(TextEncoderArchConfig): class Qwen2_5VLConfig (line 66) | class Qwen2_5VLConfig(TextEncoderConfig): FILE: python/sglang/multimodal_gen/configs/models/encoders/t5.py function _is_transformer_layer (line 13) | def _is_transformer_layer(n: str, m) -> bool: function _is_embeddings (line 17) | def _is_embeddings(n: str, m) -> bool: function _is_final_layernorm (line 21) | def _is_final_layernorm(n: str, m) -> bool: class T5ArchConfig (line 26) | class T5ArchConfig(TextEncoderArchConfig): method __post_init__ (line 65) | def __post_init__(self): class T5Config (line 84) | class T5Config(TextEncoderConfig): method add_cli_args (line 94) | def add_cli_args( FILE: python/sglang/multimodal_gen/configs/models/vaes/base.py class VAEArchConfig (line 16) | class VAEArchConfig(ArchConfig): class VAEConfig (line 25) | class VAEConfig(ModelConfig): method __post_init__ (line 45) | def __post_init__(self): method post_init (line 50) | def post_init(self): method add_cli_args (line 54) | def add_cli_args(parser: Any, prefix: str = "vae-config") -> Any: method get_vae_scale_factor (line 143) | def get_vae_scale_factor(self): method encode_sample_mode (line 146) | def encode_sample_mode(self): method from_cli_args (line 150) | def from_cli_args(cls, args: argparse.Namespace) -> "VAEConfig": FILE: python/sglang/multimodal_gen/configs/models/vaes/dac.py class DacVAEArchConfig (line 11) | class DacVAEArchConfig(ArchConfig): class DacVAEConfig (line 27) | class DacVAEConfig(ModelConfig): FILE: python/sglang/multimodal_gen/configs/models/vaes/flux.py class FluxVAEArchConfig (line 8) | class FluxVAEArchConfig(VAEArchConfig): class Flux2VAEArchConfig (line 30) | class Flux2VAEArchConfig(FluxVAEArchConfig): class FluxVAEConfig (line 35) | class FluxVAEConfig(VAEConfig): method __post_init__ (line 44) | def __post_init__(self): method post_init (line 49) | def post_init(self): class Flux2VAEConfig (line 69) | class Flux2VAEConfig(FluxVAEConfig): FILE: python/sglang/multimodal_gen/configs/models/vaes/glmimage.py class GlmImageVAEArchConfig (line 9) | class GlmImageVAEArchConfig(VAEArchConfig): class GlmImageVAEConfig (line 39) | class GlmImageVAEConfig(VAEConfig): method get_vae_scale_factor (line 48) | def get_vae_scale_factor(self): method __post_init__ (line 51) | def __post_init__(self): method post_init (line 56) | def post_init(self): FILE: python/sglang/multimodal_gen/configs/models/vaes/hunyuan3d.py class Hunyuan3DVAEArchConfig (line 8) | class Hunyuan3DVAEArchConfig(VAEArchConfig): class Hunyuan3DVAEConfig (line 16) | class Hunyuan3DVAEConfig(VAEConfig): FILE: python/sglang/multimodal_gen/configs/models/vaes/hunyuanvae.py class HunyuanVAEArchConfig (line 10) | class HunyuanVAEArchConfig(VAEArchConfig): method __post_init__ (line 35) | def __post_init__(self): class HunyuanVAEConfig (line 40) | class HunyuanVAEConfig(VAEConfig): FILE: python/sglang/multimodal_gen/configs/models/vaes/ltx_audio.py class LTXAudioVAEArchConfig (line 9) | class LTXAudioVAEArchConfig(VAEArchConfig): class LTXAudioVAEConfig (line 29) | class LTXAudioVAEConfig(VAEConfig): FILE: python/sglang/multimodal_gen/configs/models/vaes/ltx_video.py class LTXVideoVAEArchConfig (line 9) | class LTXVideoVAEArchConfig(VAEArchConfig): class LTXVideoVAEConfig (line 55) | class LTXVideoVAEConfig(VAEConfig): FILE: python/sglang/multimodal_gen/configs/models/vaes/qwenimage.py class QwenImageVAEArchConfig (line 10) | class QwenImageVAEArchConfig(VAEArchConfig): class QwenImageVAEConfig (line 32) | class QwenImageVAEConfig(VAEConfig): method get_vae_scale_factor (line 41) | def get_vae_scale_factor(self): method __post_init__ (line 44) | def __post_init__(self): method post_init (line 49) | def post_init(self): FILE: python/sglang/multimodal_gen/configs/models/vaes/sana.py class SanaVAEArchConfig (line 18) | class SanaVAEArchConfig(VAEArchConfig): class SanaVAEConfig (line 28) | class SanaVAEConfig(VAEConfig): method post_init (line 37) | def post_init(self): FILE: python/sglang/multimodal_gen/configs/models/vaes/wanvae.py class WanVAEArchConfig (line 12) | class WanVAEArchConfig(VAEArchConfig): method __post_init__ (line 65) | def __post_init__(self): class WanVAEConfig (line 77) | class WanVAEConfig(VAEConfig): method __post_init__ (line 88) | def __post_init__(self): FILE: python/sglang/multimodal_gen/configs/models/vocoder/base.py class VocoderArchConfig (line 12) | class VocoderArchConfig(ArchConfig): class VocoderConfig (line 19) | class VocoderConfig(ModelConfig): method from_cli_args (line 23) | def from_cli_args(cls, args: argparse.Namespace) -> "VocoderConfig": FILE: python/sglang/multimodal_gen/configs/models/vocoder/ltx_vocoder.py class LTXVocoderArchConfig (line 12) | class LTXVocoderArchConfig(VocoderArchConfig): class LTXVocoderConfig (line 28) | class LTXVocoderConfig(VocoderConfig): FILE: python/sglang/multimodal_gen/configs/pipeline_configs/base.py class ModelTaskType (line 46) | class ModelTaskType(Enum): method is_image_gen (line 58) | def is_image_gen(self) -> bool: method requires_image_input (line 65) | def requires_image_input(self) -> bool: method accepts_image_input (line 72) | def accepts_image_input(self) -> bool: method data_type (line 81) | def data_type(self) -> DataType: class STA_Mode (line 90) | class STA_Mode(str, Enum): function preprocess_text (line 100) | def preprocess_text(prompt: str) -> str: function postprocess_text (line 104) | def postprocess_text(output: BaseEncoderOutput, _text_inputs) -> torch.t... function shard_rotary_emb_for_sp (line 108) | def shard_rotary_emb_for_sp(emb): function maybe_unpad_latents (line 143) | def maybe_unpad_latents(latents, batch): class PipelineConfig (line 160) | class PipelineConfig: method postprocess_image (line 206) | def postprocess_image(self, image): method calculate_condition_image_size (line 235) | def calculate_condition_image_size(self, image, width, height) -> tupl... method prepare_sigmas (line 242) | def prepare_sigmas(self, sigmas, num_inference_steps): method preprocess_condition_image (line 246) | def preprocess_condition_image( method prepare_calculated_size (line 256) | def prepare_calculated_size(self, image): method prepare_image_processor_kwargs (line 259) | def prepare_image_processor_kwargs(self, batch, neg=False): method postprocess_image_latent (line 262) | def postprocess_image_latent(self, latent_condition, batch): method slice_noise_pred (line 292) | def slice_noise_pred(self, noise, latents): method adjust_num_frames (line 295) | def adjust_num_frames(self, num_frames): method tokenize_prompt (line 299) | def tokenize_prompt(self, prompt: list[str], tokenizer, tok_kwargs) ->... method prepare_latent_shape (line 302) | def prepare_latent_shape(self, batch, batch_size, num_frames): method allow_set_num_frames (line 317) | def allow_set_num_frames(self): method get_decode_scale_and_shift (line 320) | def get_decode_scale_and_shift(self, device, dtype, vae): method maybe_pack_latents (line 332) | def maybe_pack_latents(self, latents, batch_size, batch): method maybe_prepare_latent_ids (line 335) | def maybe_prepare_latent_ids(self, latents): method postprocess_vae_encode (line 339) | def postprocess_vae_encode(self, image_latents, vae): method preprocess_decoding (line 343) | def preprocess_decoding(self, latents, server_args=None, vae=None): method gather_latents_for_sp (line 346) | def gather_latents_for_sp(self, latents): method preprocess_vae_image (line 351) | def preprocess_vae_image(self, batch, vae_image_processor): method shard_latents_for_sp (line 354) | def shard_latents_for_sp(self, batch, latents): method get_pos_prompt_embeds (line 383) | def get_pos_prompt_embeds(self, batch): method get_neg_prompt_embeds (line 386) | def get_neg_prompt_embeds(self, batch): method post_denoising_loop (line 389) | def post_denoising_loop(self, latents, batch): method post_decoding (line 393) | def post_decoding(self, frames, server_args): method prepare_pos_cond_kwargs (line 396) | def prepare_pos_cond_kwargs(self, batch, device, rotary_emb, dtype): method prepare_neg_cond_kwargs (line 399) | def prepare_neg_cond_kwargs(self, batch, device, rotary_emb, dtype): method add_cli_args (line 403) | def add_cli_args( method update_config_from_dict (line 534) | def update_config_from_dict(self, args: dict[str, Any], prefix: str = ... method from_kwargs (line 553) | def from_kwargs( method check_pipeline_config (line 670) | def check_pipeline_config(self) -> None: method dump_to_json (line 691) | def dump_to_json(self, file_path: str): method load_from_json (line 720) | def load_from_json(self, file_path: str): method update_pipeline_config (line 725) | def update_pipeline_config(self, source_pipeline_dict: dict[str, Any])... class ImagePipelineConfig (line 753) | class ImagePipelineConfig(PipelineConfig): method _prepare_sigmas (line 756) | def _prepare_sigmas(self, sigmas, num_inference_steps): method shard_latents_for_sp (line 764) | def shard_latents_for_sp(self, batch, latents): method gather_latents_for_sp (line 786) | def gather_latents_for_sp(self, latents): method _unpad_and_unpack_latents (line 791) | def _unpad_and_unpack_latents(self, latents, batch): class SpatialImagePipelineConfig (line 807) | class SpatialImagePipelineConfig(ImagePipelineConfig): method shard_latents_for_sp (line 814) | def shard_latents_for_sp(self, batch, latents): method gather_latents_for_sp (line 840) | def gather_latents_for_sp(self, latents): class SlidingTileAttnConfig (line 850) | class SlidingTileAttnConfig(PipelineConfig): function parse_int_list (line 867) | def parse_int_list(value: str) -> list[int]: FILE: python/sglang/multimodal_gen/configs/pipeline_configs/diffusers_generic.py class DiffusersGenericPipelineConfig (line 20) | class DiffusersGenericPipelineConfig(PipelineConfig): method check_pipeline_config (line 64) | def check_pipeline_config(self) -> None: method adjust_size (line 70) | def adjust_size(self, width, height, image): method adjust_num_frames (line 76) | def adjust_num_frames(self, num_frames): FILE: python/sglang/multimodal_gen/configs/pipeline_configs/flux.py function t5_postprocess_text (line 37) | def t5_postprocess_text(outputs: BaseEncoderOutput, _text_inputs) -> tor... class FluxPipelineConfig (line 42) | class FluxPipelineConfig(ImagePipelineConfig): method prepare_sigmas (line 89) | def prepare_sigmas(self, sigmas, num_inference_steps): method prepare_latent_shape (line 92) | def prepare_latent_shape(self, batch, batch_size, num_frames): method maybe_pack_latents (line 101) | def maybe_pack_latents(self, latents, batch_size, batch): method get_pos_prompt_embeds (line 110) | def get_pos_prompt_embeds(self, batch): method get_neg_prompt_embeds (line 113) | def get_neg_prompt_embeds(self, batch): method _prepare_latent_image_ids (line 116) | def _prepare_latent_image_ids(self, original_height, original_width, d... method get_freqs_cis (line 138) | def get_freqs_cis(self, prompt_embeds, width, height, device, rotary_e... method post_denoising_loop (line 157) | def post_denoising_loop(self, latents, batch): method prepare_pos_cond_kwargs (line 169) | def prepare_pos_cond_kwargs(self, batch, device, rotary_emb, dtype): method prepare_neg_cond_kwargs (line 184) | def prepare_neg_cond_kwargs(self, batch, device, rotary_emb, dtype): function _prepare_latent_ids (line 200) | def _prepare_latent_ids( function _unpack_latents_with_ids (line 231) | def _unpack_latents_with_ids( function _patchify_latents (line 260) | def _patchify_latents(latents): function _unpatchify_latents (line 272) | def _unpatchify_latents(latents): function _prepare_text_ids (line 284) | def _prepare_text_ids( function _prepare_image_ids (line 303) | def _prepare_image_ids( function flux2_postprocess_text (line 332) | def flux2_postprocess_text(outputs: BaseEncoderOutput, _text_inputs) -> ... function flux2_klein_postprocess_text (line 344) | def flux2_klein_postprocess_text( class Flux2MistralTextArchConfig (line 359) | class Flux2MistralTextArchConfig(TextEncoderArchConfig): method __post_init__ (line 372) | def __post_init__(self): class Flux2MistralTextConfig (line 384) | class Flux2MistralTextConfig(TextEncoderConfig): function format_text_input (line 390) | def format_text_input(prompts: List[str], system_message: str = None): function flux_2_preprocess_text (line 408) | def flux_2_preprocess_text(prompt: str): function flux2_pack_latents (line 414) | def flux2_pack_latents(latents): class Flux2PipelineConfig (line 422) | class Flux2PipelineConfig(FluxPipelineConfig): method tokenize_prompt (line 441) | def tokenize_prompt(self, prompts: list[str], tokenizer, tok_kwargs) -... method prepare_latent_shape (line 458) | def prepare_latent_shape(self, batch, batch_size, num_frames): method get_pos_prompt_embeds (line 467) | def get_pos_prompt_embeds(self, batch): method get_neg_prompt_embeds (line 470) | def get_neg_prompt_embeds(self, batch): method calculate_condition_image_size (line 473) | def calculate_condition_image_size( method preprocess_condition_image (line 496) | def preprocess_condition_image( method postprocess_image_latent (line 510) | def postprocess_image_latent(self, latent_condition, batch): method prepare_condition_image_latent_ids (line 523) | def prepare_condition_image_latent_ids(self, image_latents, batch): method get_freqs_cis (line 528) | def get_freqs_cis(self, prompt_embeds, width, height, device, rotary_e... method prepare_pos_cond_kwargs (line 558) | def prepare_pos_cond_kwargs(self, batch, device, rotary_emb, dtype): method prepare_neg_cond_kwargs (line 570) | def prepare_neg_cond_kwargs(self, batch, device, rotary_emb, dtype): method maybe_pack_latents (line 573) | def maybe_pack_latents(self, latents, batch_size, batch): method maybe_prepare_latent_ids (line 576) | def maybe_prepare_latent_ids(self, latents): method postprocess_vae_encode (line 579) | def postprocess_vae_encode(self, image_latents, vae): method _check_vae_has_bn (line 584) | def _check_vae_has_bn(self, vae): method preprocess_decoding (line 590) | def preprocess_decoding(self, latents, server_args=None, vae=None): method get_decode_scale_and_shift (line 601) | def get_decode_scale_and_shift(self, device, dtype, vae): method post_denoising_loop (line 630) | def post_denoising_loop(self, latents, batch): method slice_noise_pred (line 636) | def slice_noise_pred(self, noise, latents): class Flux2KleinPipelineConfig (line 643) | class Flux2KleinPipelineConfig(Flux2PipelineConfig): method tokenize_prompt (line 661) | def tokenize_prompt(self, prompts: list[str], tokenizer, tok_kwargs) -... FILE: python/sglang/multimodal_gen/configs/pipeline_configs/flux_finetuned.py class Flux2FinetunedPipelineConfig (line 25) | class Flux2FinetunedPipelineConfig(Flux2PipelineConfig): method preprocess_decoding (line 39) | def preprocess_decoding( method get_decode_scale_and_shift (line 73) | def get_decode_scale_and_shift(self, device, dtype, vae): FILE: python/sglang/multimodal_gen/configs/pipeline_configs/glm_image.py class GlmImagePipelineConfig (line 18) | class GlmImagePipelineConfig(SpatialImagePipelineConfig): method __post_init__ (line 46) | def __post_init__(self): method get_freqs_cis (line 50) | def get_freqs_cis(self, batch, device, rotary_emb, dtype): method prepare_pos_cond_kwargs (line 57) | def prepare_pos_cond_kwargs(self, batch, device, rotary_emb, dtype): method prepare_neg_cond_kwargs (line 68) | def prepare_neg_cond_kwargs(self, batch, device, rotary_emb, dtype): method get_decode_scale_and_shift (line 79) | def get_decode_scale_and_shift(self, device, dtype, vae): method post_denoising_loop (line 92) | def post_denoising_loop(self, latents, batch): method post_decoding (line 97) | def post_decoding(self, frames, server_args): FILE: python/sglang/multimodal_gen/configs/pipeline_configs/helios.py function umt5_postprocess_text (line 26) | def umt5_postprocess_text(outputs: BaseEncoderOutput, _text_inputs) -> t... class HeliosT2VConfig (line 45) | class HeliosT2VConfig(PipelineConfig): method __post_init__ (line 94) | def __post_init__(self): class HeliosMidConfig (line 100) | class HeliosMidConfig(HeliosT2VConfig): class HeliosDistilledConfig (line 111) | class HeliosDistilledConfig(HeliosT2VConfig): FILE: python/sglang/multimodal_gen/configs/pipeline_configs/hunyuan.py class PromptTemplate (line 34) | class PromptTemplate(TypedDict): function llama_preprocess_text (line 45) | def llama_preprocess_text(prompt: str) -> str: function llama_postprocess_text (line 49) | def llama_postprocess_text(outputs: BaseEncoderOutput, _text_inputs) -> ... function clip_preprocess_text (line 59) | def clip_preprocess_text(prompt: str) -> str: function clip_postprocess_text (line 63) | def clip_postprocess_text(outputs: BaseEncoderOutput, _text_inputs) -> t... class HunyuanConfig (line 69) | class HunyuanConfig(PipelineConfig): method __post_init__ (line 101) | def __post_init__(self): class FastHunyuanConfig (line 107) | class FastHunyuanConfig(HunyuanConfig): FILE: python/sglang/multimodal_gen/configs/pipeline_configs/hunyuan3d.py class Hunyuan3D2PipelineConfig (line 15) | class Hunyuan3D2PipelineConfig(PipelineConfig): method __post_init__ (line 66) | def __post_init__(self): method prepare_latent_shape (line 70) | def prepare_latent_shape(self, batch, batch_size, num_frames): FILE: python/sglang/multimodal_gen/configs/pipeline_configs/ltx_2.py function pack_text_embeds (line 26) | def pack_text_embeds( function _gemma_postprocess_func (line 96) | def _gemma_postprocess_func( class LTX2PipelineConfig (line 115) | class LTX2PipelineConfig(PipelineConfig): method vae_scale_factor (line 135) | def vae_scale_factor(self): method vae_temporal_compression (line 139) | def vae_temporal_compression(self): method prepare_latent_shape (line 142) | def prepare_latent_shape(self, batch, batch_size, num_frames): method prepare_audio_latent_shape (line 159) | def prepare_audio_latent_shape(self, batch, batch_size, num_frames): method prepare_sigmas (line 199) | def prepare_sigmas(self, sigmas, num_inference_steps): method tokenize_prompt (line 207) | def tokenize_prompt(self, prompt: list[str], tokenizer, tok_kwargs) ->... method maybe_pack_latents (line 229) | def maybe_pack_latents(self, latents, batch_size, batch): method _infer_video_latent_frames_and_tokens_per_frame (line 255) | def _infer_video_latent_frames_and_tokens_per_frame( method shard_latents_for_sp (line 308) | def shard_latents_for_sp(self, batch, latents): method gather_latents_for_sp (line 353) | def gather_latents_for_sp(self, latents): method maybe_pack_audio_latents (line 361) | def maybe_pack_audio_latents(self, latents, batch_size, batch): method get_pos_prompt_embeds (line 375) | def get_pos_prompt_embeds(self, batch): method get_neg_prompt_embeds (line 383) | def get_neg_prompt_embeds(self, batch): method get_decode_scale_and_shift (line 390) | def get_decode_scale_and_shift(self, device, dtype, vae): method _unpack_latents (line 423) | def _unpack_latents( method _denormalize_latents (line 454) | def _denormalize_latents( method _denormalize_audio_latents (line 469) | def _denormalize_audio_latents( method _unpack_audio_latents (line 477) | def _unpack_audio_latents( method _unpad_and_unpack_latents (line 497) | def _unpad_and_unpack_latents(self, latents, audio_latents, batch, vae... class LTX2I2VPipelineConfig (line 582) | class LTX2I2VPipelineConfig(LTX2PipelineConfig): FILE: python/sglang/multimodal_gen/configs/pipeline_configs/mova.py class MOVAPipelineConfig (line 27) | class MOVAPipelineConfig(PipelineConfig): method _center_crop_and_resize (line 55) | def _center_crop_and_resize( method adjust_num_frames (line 104) | def adjust_num_frames(self, num_frames: int) -> int: method preprocess_condition_image (line 123) | def preprocess_condition_image( method prepare_latent_shape (line 129) | def prepare_latent_shape(self, batch, batch_size, num_frames): method prepare_audio_latent_shape (line 141) | def prepare_audio_latent_shape(self, batch_size, num_samples, audio_vae): method normalize_video_latents (line 145) | def normalize_video_latents(self, latents: torch.Tensor, video_vae) ->... method denormalize_video_latents (line 158) | def denormalize_video_latents( class MOVA360PConfig (line 175) | class MOVA360PConfig(MOVAPipelineConfig): class MOVA720PConfig (line 182) | class MOVA720PConfig(MOVAPipelineConfig): FILE: python/sglang/multimodal_gen/configs/pipeline_configs/qwen_image.py function _extract_masked_hidden (line 25) | def _extract_masked_hidden(hidden_states: torch.Tensor, mask: torch.Tens... function qwen_image_preprocess_text (line 34) | def qwen_image_preprocess_text(prompt): function qwen_image_postprocess_text (line 42) | def qwen_image_postprocess_text(outputs, _text_inputs, drop_idx=34): function _normalize_prompt_list (line 59) | def _normalize_prompt_list(prompt): function _normalize_image_list (line 63) | def _normalize_image_list(images): function _build_qwen_edit_image_prompt (line 69) | def _build_qwen_edit_image_prompt(num_images: int) -> str: function _resolve_qwen_edit_per_prompt_images (line 74) | def _resolve_qwen_edit_per_prompt_images(prompt_list, image_list): function _pack_latents (line 91) | def _pack_latents(latents, batch_size, num_channels_latents, height, wid... class QwenImagePipelineConfig (line 104) | class QwenImagePipelineConfig(ImagePipelineConfig): method prepare_sigmas (line 144) | def prepare_sigmas(self, sigmas, num_inference_steps): method prepare_image_processor_kwargs (line 147) | def prepare_image_processor_kwargs(self, batch, neg=False): method get_vae_scale_factor (line 156) | def get_vae_scale_factor(self): method prepare_latent_shape (line 159) | def prepare_latent_shape(self, batch, batch_size, num_frames): method maybe_pack_latents (line 167) | def maybe_pack_latents(self, latents, batch_size, batch): method get_decode_scale_and_shift (line 176) | def get_decode_scale_and_shift(self, device, dtype, vae): method get_freqs_cis (line 189) | def get_freqs_cis(img_shapes, txt_seq_lens, rotary_emb, device, dtype): method _prepare_cond_kwargs (line 203) | def _prepare_cond_kwargs(self, batch, prompt_embeds, rotary_emb, devic... method prepare_pos_cond_kwargs (line 239) | def prepare_pos_cond_kwargs(self, batch, device, rotary_emb, dtype): method prepare_neg_cond_kwargs (line 244) | def prepare_neg_cond_kwargs(self, batch, device, rotary_emb, dtype): method post_denoising_loop (line 249) | def post_denoising_loop(self, latents, batch): class QwenImageEditPipelineConfig (line 263) | class QwenImageEditPipelineConfig(QwenImagePipelineConfig): method _prepare_edit_cond_kwargs (line 268) | def _prepare_edit_cond_kwargs( method preprocess_condition_image (line 324) | def preprocess_condition_image( method postprocess_image_latent (line 332) | def postprocess_image_latent(self, latent_condition, batch): method prepare_pos_cond_kwargs (line 359) | def prepare_pos_cond_kwargs(self, batch, device, rotary_emb, dtype): method prepare_neg_cond_kwargs (line 364) | def prepare_neg_cond_kwargs(self, batch, device, rotary_emb, dtype): method calculate_condition_image_size (line 369) | def calculate_condition_image_size(self, image, width, height) -> tupl... method slice_noise_pred (line 375) | def slice_noise_pred(self, noise, latents): class QwenImageEditPlusPipelineConfig (line 386) | class QwenImageEditPlusPipelineConfig(QwenImageEditPipelineConfig): method _get_condition_image_sizes (line 389) | def _get_condition_image_sizes(self, batch) -> list[tuple[int, int]]: method prepare_image_processor_kwargs (line 404) | def prepare_image_processor_kwargs(self, batch, neg=False) -> dict: method prepare_calculated_size (line 433) | def prepare_calculated_size(self, image): method resize_condition_image (line 436) | def resize_condition_image(self, images, target_width, target_height): method calculate_condition_image_size (line 444) | def calculate_condition_image_size(self, image, width, height) -> tupl... method calculate_vae_image_size (line 450) | def calculate_vae_image_size(self, image, width, height) -> tuple[int,... method preprocess_vae_image (line 456) | def preprocess_vae_image(self, batch, vae_image_processor): method _prepare_edit_cond_kwargs (line 469) | def _prepare_edit_cond_kwargs( class QwenImageEditPlus_2511_PipelineConfig (line 535) | class QwenImageEditPlus_2511_PipelineConfig(QwenImageEditPlusPipelineCon... class QwenImageLayeredPipelineConfig (line 540) | class QwenImageLayeredPipelineConfig(QwenImageEditPipelineConfig): method _prepare_edit_cond_kwargs (line 544) | def _prepare_edit_cond_kwargs( method _unpad_and_unpack_latents (line 579) | def _unpad_and_unpack_latents(self, latents, batch): method allow_set_num_frames (line 600) | def allow_set_num_frames(self): method post_denoising_loop (line 603) | def post_denoising_loop(self, latents, batch): FILE: python/sglang/multimodal_gen/configs/pipeline_configs/sana.py function sana_postprocess_text (line 37) | def sana_postprocess_text(outputs: BaseEncoderOutput, _text_inputs) -> t... class SanaPipelineConfig (line 44) | class SanaPipelineConfig(SpatialImagePipelineConfig): method prepare_latent_shape (line 76) | def prepare_latent_shape(self, batch, batch_size, num_frames): method get_pos_prompt_embeds (line 86) | def get_pos_prompt_embeds(self, batch): method get_neg_prompt_embeds (line 90) | def get_neg_prompt_embeds(self, batch): method prepare_pos_cond_kwargs (line 93) | def prepare_pos_cond_kwargs(self, batch, device, rotary_emb, dtype): method prepare_neg_cond_kwargs (line 104) | def prepare_neg_cond_kwargs(self, batch, device, rotary_emb, dtype): method post_denoising_loop (line 113) | def post_denoising_loop(self, latents, batch): FILE: python/sglang/multimodal_gen/configs/pipeline_configs/wan.py function t5_postprocess_text (line 26) | def t5_postprocess_text(outputs: BaseEncoderOutput, _text_inputs) -> tor... class WanI2VCommonConfig (line 43) | class WanI2VCommonConfig(PipelineConfig): method adjust_num_frames (line 45) | def adjust_num_frames(self, num_frames): class WanT2V480PConfig (line 59) | class WanT2V480PConfig(PipelineConfig): method __post_init__ (line 90) | def __post_init__(self): class TurboWanT2V480PConfig (line 96) | class TurboWanT2V480PConfig(WanT2V480PConfig): class WanT2V720PConfig (line 106) | class WanT2V720PConfig(WanT2V480PConfig): class WanI2V480PConfig (line 116) | class WanI2V480PConfig(WanT2V480PConfig, WanI2VCommonConfig): method postprocess_image (line 132) | def postprocess_image(self, image): method __post_init__ (line 135) | def __post_init__(self) -> None: class WanI2V720PConfig (line 141) | class WanI2V720PConfig(WanI2V480PConfig): class TurboWanI2V720Config (line 152) | class TurboWanI2V720Config(WanI2V720PConfig): method __post_init__ (line 159) | def __post_init__(self) -> None: class FastWan2_1_T2V_480P_Config (line 164) | class FastWan2_1_T2V_480P_Config(WanT2V480PConfig): class Wan2_2_TI2V_5B_Config (line 177) | class Wan2_2_TI2V_5B_Config(WanT2V480PConfig, WanI2VCommonConfig): method prepare_latent_shape (line 184) | def prepare_latent_shape(self, batch, batch_size, num_frames): method __post_init__ (line 193) | def __post_init__(self) -> None: class FastWan2_2_TI2V_5B_Config (line 200) | class FastWan2_2_TI2V_5B_Config(Wan2_2_TI2V_5B_Config): class Wan2_2_T2V_A14B_Config (line 208) | class Wan2_2_T2V_A14B_Config(WanT2V480PConfig): method __post_init__ (line 212) | def __post_init__(self) -> None: class Wan2_2_I2V_A14B_Config (line 217) | class Wan2_2_I2V_A14B_Config(WanI2V480PConfig): method __post_init__ (line 221) | def __post_init__(self) -> None: class SelfForcingWanT2V480PConfig (line 230) | class SelfForcingWanT2V480PConfig(WanT2V480PConfig): FILE: python/sglang/multimodal_gen/configs/pipeline_configs/zimage.py function zimage_preprocess_text (line 26) | def zimage_preprocess_text(prompt: str): function zimage_postprocess_text (line 33) | def zimage_postprocess_text(outputs: BaseEncoderOutput, _text_inputs) ->... class TransformersModelConfig (line 39) | class TransformersModelConfig(EncoderConfig): class ZImagePipelineConfig (line 44) | class ZImagePipelineConfig(ImagePipelineConfig): method tokenize_prompt (line 64) | def tokenize_prompt(self, prompts: list[str], tokenizer, tok_kwargs) -... method _ceil_to_multiple (line 80) | def _ceil_to_multiple(x: int, m: int) -> int: method _build_zimage_sp_plan (line 85) | def _build_zimage_sp_plan(self, batch) -> dict: method _get_zimage_sp_plan (line 145) | def _get_zimage_sp_plan(self, batch) -> dict: method _shard_cap (line 152) | def _shard_cap(self, cap: torch.Tensor, plan: dict) -> torch.Tensor: method get_pos_prompt_embeds (line 165) | def get_pos_prompt_embeds(self, batch): method shard_latents_for_sp (line 172) | def shard_latents_for_sp(self, batch, latents): method gather_latents_for_sp (line 199) | def gather_latents_for_sp(self, latents): method post_denoising_loop (line 206) | def post_denoising_loop(self, latents, batch): method get_freqs_cis (line 222) | def get_freqs_cis(self, prompt_embeds, width, height, device, rotary_e... method prepare_pos_cond_kwargs (line 306) | def prepare_pos_cond_kwargs(self, batch, device, rotary_emb, dtype): method prepare_neg_cond_kwargs (line 318) | def prepare_neg_cond_kwargs(self, batch, device, rotary_emb, dtype): FILE: python/sglang/multimodal_gen/configs/quantization.py class NunchakuSVDQuantArgs (line 23) | class NunchakuSVDQuantArgs: method _adjust_config (line 36) | def _adjust_config(self) -> None: method validate (line 75) | def validate(self) -> None: method add_cli_args (line 128) | def add_cli_args(parser) -> None: method from_dict (line 164) | def from_dict(cls, kwargs: dict[str, Any]) -> "NunchakuSVDQuantArgs": FILE: python/sglang/multimodal_gen/configs/sample/diffusers_generic.py class DiffusersGenericSamplingParams (line 18) | class DiffusersGenericSamplingParams(SamplingParams): method __post_init__ (line 44) | def __post_init__(self) -> None: FILE: python/sglang/multimodal_gen/configs/sample/flux.py class FluxSamplingParams (line 11) | class FluxSamplingParams(SamplingParams): class Flux2KleinSamplingParams (line 23) | class Flux2KleinSamplingParams(FluxSamplingParams): FILE: python/sglang/multimodal_gen/configs/sample/glmimage.py class GlmImageSamplingParams (line 7) | class GlmImageSamplingParams(SamplingParams): FILE: python/sglang/multimodal_gen/configs/sample/helios.py class HeliosT2VSamplingParams (line 8) | class HeliosT2VSamplingParams(SamplingParams): class HeliosMidSamplingParams (line 39) | class HeliosMidSamplingParams(HeliosT2VSamplingParams): class HeliosDistilledSamplingParams (line 46) | class HeliosDistilledSamplingParams(HeliosT2VSamplingParams): FILE: python/sglang/multimodal_gen/configs/sample/hunyuan.py class HunyuanSamplingParams (line 11) | class HunyuanSamplingParams(SamplingParams): class FastHunyuanSamplingParam (line 54) | class FastHunyuanSamplingParam(HunyuanSamplingParams): FILE: python/sglang/multimodal_gen/configs/sample/hunyuan3d.py class Hunyuan3DSamplingParams (line 10) | class Hunyuan3DSamplingParams(SamplingParams): method __post_init__ (line 21) | def __post_init__(self): FILE: python/sglang/multimodal_gen/configs/sample/ltx_2.py class LTX2SamplingParams (line 7) | class LTX2SamplingParams(SamplingParams): FILE: python/sglang/multimodal_gen/configs/sample/mova.py class MOVASamplingParams (line 8) | class MOVASamplingParams(SamplingParams): class MOVA_360P_SamplingParams (line 33) | class MOVA_360P_SamplingParams(MOVASamplingParams): class MOVA_720P_SamplingParams (line 48) | class MOVA_720P_SamplingParams(MOVASamplingParams): FILE: python/sglang/multimodal_gen/configs/sample/qwenimage.py class QwenImageSamplingParams (line 10) | class QwenImageSamplingParams(SamplingParams): class QwenImage2512SamplingParams (line 19) | class QwenImage2512SamplingParams(QwenImageSamplingParams): class QwenImageEditPlusSamplingParams (line 26) | class QwenImageEditPlusSamplingParams(QwenImageSamplingParams): class QwenImageLayeredSamplingParams (line 34) | class QwenImageLayeredSamplingParams(QwenImageSamplingParams): FILE: python/sglang/multimodal_gen/configs/sample/sampling_params.py function _json_safe (line 28) | def _json_safe(obj: Any): function generate_request_id (line 44) | def generate_request_id() -> str: function _sanitize_filename (line 48) | def _sanitize_filename(name: str, replacement: str = "_", max_length: in... class DataType (line 69) | class DataType(Enum): method get_default_extension (line 74) | def get_default_extension(self) -> str: class SamplingParams (line 83) | class SamplingParams: method _set_output_file_ext (line 187) | def _set_output_file_ext(self): method _set_output_file_name (line 197) | def _set_output_file_name(self): method __post_init__ (line 230) | def __post_init__(self) -> None: method _adjust_output_quality (line 251) | def _adjust_output_quality(self, output_quality: str, data_type: DataT... method _validate (line 258) | def _validate(self): method check_sampling_param (line 344) | def check_sampling_param(self): method _validate_with_pipeline_config (line 348) | def _validate_with_pipeline_config(self, pipeline_config): method _adjust (line 366) | def _adjust( method from_pretrained (line 522) | def from_pretrained(cls, model_path: str, **kwargs) -> "SamplingParams": method from_user_sampling_params_args (line 532) | def from_user_sampling_params_args( method output_size_str (line 590) | def output_size_str(self) -> str: method seconds (line 593) | def seconds(self) -> float: method add_cli_args (line 597) | def add_cli_args(parser: Any) -> Any: method get_cli_args (line 882) | def get_cli_args(cls, args: argparse.Namespace): method output_file_path (line 906) | def output_file_path(self): method _merge_with_user_params (line 911) | def _merge_with_user_params( method n_tokens (line 947) | def n_tokens(self) -> int: class CacheParams (line 962) | class CacheParams: FILE: python/sglang/multimodal_gen/configs/sample/sana.py class SanaSamplingParams (line 13) | class SanaSamplingParams(SamplingParams): FILE: python/sglang/multimodal_gen/configs/sample/teacache.py class TeaCacheParams (line 10) | class TeaCacheParams(CacheParams): class WanTeaCacheParams (line 17) | class WanTeaCacheParams(CacheParams): method coefficients (line 26) | def coefficients(self) -> list[float]: method ret_steps (line 33) | def ret_steps(self) -> int: method get_cutoff_steps (line 39) | def get_cutoff_steps(self, num_inference_steps: int) -> int: FILE: python/sglang/multimodal_gen/configs/sample/wan.py class WanT2V_1_3B_SamplingParams (line 11) | class WanT2V_1_3B_SamplingParams(SamplingParams): class WanT2V_14B_SamplingParams (line 55) | class WanT2V_14B_SamplingParams(SamplingParams): class WanI2V_14B_480P_SamplingParam (line 102) | class WanI2V_14B_480P_SamplingParam(WanT2V_1_3B_SamplingParams): class WanI2V_14B_720P_SamplingParam (line 138) | class WanI2V_14B_720P_SamplingParam(WanT2V_14B_SamplingParams): class FastWanT2V480PConfig (line 176) | class FastWanT2V480PConfig(WanT2V_1_3B_SamplingParams): class Wan2_1_Fun_1_3B_InP_SamplingParams (line 190) | class Wan2_1_Fun_1_3B_InP_SamplingParams(SamplingParams): class Wan2_2_Base_SamplingParams (line 208) | class Wan2_2_Base_SamplingParams(SamplingParams): class Wan2_2_TI2V_5B_SamplingParam (line 222) | class Wan2_2_TI2V_5B_SamplingParam(Wan2_2_Base_SamplingParams): class Wan2_2_T2V_A14B_SamplingParam (line 242) | class Wan2_2_T2V_A14B_SamplingParam(Wan2_2_Base_SamplingParams): class Wan2_2_I2V_A14B_SamplingParam (line 262) | class Wan2_2_I2V_A14B_SamplingParam(Wan2_2_Base_SamplingParams): class Turbo_Wan2_2_I2V_A14B_SamplingParam (line 282) | class Turbo_Wan2_2_I2V_A14B_SamplingParam(Wan2_2_Base_SamplingParams): class SelfForcingWanT2V480PConfig (line 293) | class SelfForcingWanT2V480PConfig(WanT2V_1_3B_SamplingParams): FILE: python/sglang/multimodal_gen/configs/sample/zimage.py class ZImageTurboSamplingParams (line 11) | class ZImageTurboSamplingParams(SamplingParams): class ZImageSamplingParams (line 38) | class ZImageSamplingParams(SamplingParams): FILE: python/sglang/multimodal_gen/configs/utils.py function update_config_from_args (line 7) | def update_config_from_args( function clean_cli_args (line 52) | def clean_cli_args(args: argparse.Namespace) -> dict[str, Any]: FILE: python/sglang/multimodal_gen/csrc/attn/vmoba_attn/tests/test_vmoba_attn.py function generate_test_data (line 9) | def generate_test_data( function test_moba_attn_varlen_forward (line 80) | def test_moba_attn_varlen_forward( FILE: python/sglang/multimodal_gen/csrc/attn/vmoba_attn/vmoba/vmoba.py function _unsupported (line 20) | def _unsupported(*args, **kwargs): function calc_chunks (line 35) | def calc_chunks(cu_seqlen, moba_chunk_size): function _select_threshold_query_head (line 76) | def _select_threshold_query_head( function _select_threshold_block (line 150) | def _select_threshold_block( function _select_threshold_overall (line 231) | def _select_threshold_overall( function _select_threshold_head_global (line 329) | def _select_threshold_head_global( class MixedAttention (line 403) | class MixedAttention(torch.autograd.Function): method forward (line 405) | def forward( method backward (line 512) | def backward(ctx, d_output): function moba_attn_varlen (line 602) | def moba_attn_varlen( function process_moba_input (line 858) | def process_moba_input( function process_moba_output (line 929) | def process_moba_output( function generate_data (line 962) | def generate_data(batch_size, seqlen, num_head, head_dim, dtype): function test_attn_varlen_moba_speed (line 989) | def test_attn_varlen_moba_speed( FILE: python/sglang/multimodal_gen/csrc/render/hunyuan3d_rasterizer/__init__.py function _load_custom_rasterizer (line 20) | def _load_custom_rasterizer(): function rasterize (line 42) | def rasterize( function interpolate (line 65) | def interpolate( FILE: python/sglang/multimodal_gen/csrc/render/hunyuan3d_rasterizer/rasterizer.cpp function rasterizeTriangleCPU (line 7) | void rasterizeTriangleCPU(int idx, float* vt0, float* vt1, float* vt2, i... function barycentricFromImgcoordCPU (line 45) | void barycentricFromImgcoordCPU(float* V, int* F, int* findices, INT64* ... function rasterizeImagecoordsKernelCPU (line 84) | void rasterizeImagecoordsKernelCPU(float* V, int* F, float* d, INT64* zb... function rasterize_image_cpu (line 97) | std::vector rasterize_image_cpu(torch::Tensor V, torch::T... function rasterize_image (line 128) | std::vector rasterize_image(torch::Tensor V, torch::Tenso... function PYBIND11_MODULE (line 138) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: python/sglang/multimodal_gen/csrc/render/hunyuan3d_rasterizer/rasterizer.h function calculateSignedArea2 (line 16) | inline float calculateSignedArea2(float* a, float* b, float* c) { function calculateBarycentricCoordinate (line 20) | inline void calculateBarycentricCoordinate(float* a, float* b, float* c,... function isBarycentricCoordInBounds (line 41) | inline bool isBarycentricCoordInBounds(float* barycentricCoord) { FILE: python/sglang/multimodal_gen/csrc/render/mesh_processor/__init__.py function _load_mesh_processor (line 20) | def _load_mesh_processor(): function meshVerticeInpaint (line 40) | def meshVerticeInpaint( FILE: python/sglang/multimodal_gen/csrc/render/mesh_processor/mesh_processor.cpp function meshVerticeInpaint_smooth (line 17) | std::pair, function meshVerticeInpaint (line 146) | std::pair, py::array_t> meshVerticeInpaint(p... function PYBIND11_MODULE (line 157) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: python/sglang/multimodal_gen/envs.py function get_default_cache_root (line 62) | def get_default_cache_root() -> str: function get_default_config_root (line 69) | def get_default_config_root() -> str: function maybe_convert_int (line 76) | def maybe_convert_int(value: str | None) -> int | None: function _lazy_str (line 81) | def _lazy_str(key: str, default: str | None = None) -> Callable[[], str ... function _lazy_int (line 85) | def _lazy_int(key: str, default: str | int | None = None) -> Callable[[]... function _lazy_float (line 95) | def _lazy_float(key: str, default: str | float) -> Callable[[], float]: function _lazy_bool (line 99) | def _lazy_bool(key: str, default: str = "false") -> Callable[[], bool]: function _lazy_bool_any (line 103) | def _lazy_bool_any(keys: list[str], default: str = "false") -> Callable[... function _lazy_path (line 117) | def _lazy_path( function _create_secondary_getter (line 295) | def _create_secondary_getter(suffix, type_func, default_val): function _secondary_taylorseer_getter (line 314) | def _secondary_taylorseer_getter(): function __getattr__ (line 327) | def __getattr__(name: str): function __dir__ (line 334) | def __dir__(): FILE: python/sglang/multimodal_gen/registry.py function _discover_and_register_pipelines (line 139) | def _discover_and_register_pipelines(): function get_pipeline_config_classes (line 192) | def get_pipeline_config_classes( class ConfigInfo (line 205) | class ConfigInfo: function register_configs (line 223) | def register_configs( function get_model_short_name (line 251) | def get_model_short_name(model_id: str) -> str: function _get_config_info (line 259) | def _get_config_info( class ModelInfo (line 330) | class ModelInfo: function _get_diffusers_model_info (line 341) | def _get_diffusers_model_info( function get_model_info (line 397) | def get_model_info( function _register_configs (line 528) | def _register_configs(): function is_known_non_diffusers_multimodal_model (line 829) | def is_known_non_diffusers_multimodal_model(model_path: str) -> bool: function get_non_diffusers_pipeline_name (line 836) | def get_non_diffusers_pipeline_name(model_path: str) -> Optional[str]: FILE: python/sglang/multimodal_gen/runtime/cache/cache_dit_integration.py function _patch_cache_dit_similarity (line 41) | def _patch_cache_dit_similarity(): function _build_parallelism_config (line 105) | def _build_parallelism_config( function _mark_transformer_parallelized (line 130) | def _mark_transformer_parallelized(transformer, config, sp_group, tp_gro... function get_scm_mask (line 138) | def get_scm_mask( class CacheDitConfig (line 182) | class CacheDitConfig: function enable_cache_on_transformer (line 225) | def enable_cache_on_transformer( function enable_cache_on_dual_transformer (line 339) | def enable_cache_on_dual_transformer( function refresh_context_on_transformer (line 527) | def refresh_context_on_transformer( function refresh_context_on_dual_transformer (line 548) | def refresh_context_on_dual_transformer( FILE: python/sglang/multimodal_gen/runtime/cache/teacache.py class TeaCacheContext (line 33) | class TeaCacheContext: class TeaCacheMixin (line 59) | class TeaCacheMixin: method _init_teacache_state (line 132) | def _init_teacache_state(self) -> None: method reset_teacache_state (line 155) | def reset_teacache_state(self) -> None: method _compute_l1_and_decide (line 171) | def _compute_l1_and_decide( method _compute_teacache_decision (line 218) | def _compute_teacache_decision( method _get_teacache_context (line 259) | def _get_teacache_context(self) -> TeaCacheContext | None: method maybe_cache_states (line 304) | def maybe_cache_states( method should_skip_forward_for_cached_states (line 310) | def should_skip_forward_for_cached_states(self, **kwargs: dict[str, An... method retrieve_cached_states (line 314) | def retrieve_cached_states(self, hidden_states: torch.Tensor) -> torch... FILE: python/sglang/multimodal_gen/runtime/distributed/__init__.py function _get_folding_tp_group (line 61) | def _get_folding_tp_group( FILE: python/sglang/multimodal_gen/runtime/distributed/communication_op.py function tensor_model_parallel_all_reduce (line 16) | def tensor_model_parallel_all_reduce( function tensor_model_parallel_all_gather (line 24) | def tensor_model_parallel_all_gather( function sequence_model_parallel_all_to_all_4D (line 33) | def sequence_model_parallel_all_to_all_4D( function sequence_model_parallel_all_gather (line 40) | def sequence_model_parallel_all_gather( function cfg_model_parallel_all_gather (line 47) | def cfg_model_parallel_all_gather( function cfg_model_parallel_all_reduce (line 54) | def cfg_model_parallel_all_reduce( FILE: python/sglang/multimodal_gen/runtime/distributed/device_communicators/base_device_communicator.py class DistributedAutograd (line 14) | class DistributedAutograd: class AllReduce (line 22) | class AllReduce(torch.autograd.Function): method forward (line 30) | def forward( method backward (line 43) | def backward(ctx: Any, grad_output: Tensor) -> tuple[None, Tensor, N... class AllGather (line 48) | class AllGather(torch.autograd.Function): method forward (line 56) | def forward( method backward (line 82) | def backward(ctx: Any, grad_output: Tensor) -> tuple[None, Tensor, N... class AllToAll4D (line 102) | class AllToAll4D(torch.autograd.Function): method forward (line 114) | def forward( method backward (line 185) | def backward( class DeviceCommunicatorBase (line 198) | class DeviceCommunicatorBase: method __init__ (line 206) | def __init__( method all_reduce (line 224) | def all_reduce( method all_gather (line 230) | def all_gather(self, input_: torch.Tensor, dim: int = -1) -> torch.Ten... method all_to_all_4D (line 238) | def all_to_all_4D( method gather (line 246) | def gather( method send (line 277) | def send(self, tensor: torch.Tensor, dst: int | None = None) -> None: method recv (line 284) | def recv( method destroy (line 296) | def destroy(self) -> None: FILE: python/sglang/multimodal_gen/runtime/distributed/device_communicators/cpu_communicator.py class CpuCommunicator (line 14) | class CpuCommunicator(DeviceCommunicatorBase): method __init__ (line 16) | def __init__( method all_reduce (line 36) | def all_reduce( method gather (line 44) | def gather( method all_gather (line 77) | def all_gather(self, input_: torch.Tensor, dim: int = -1) -> torch.Ten... class _CPUSHMDistributed (line 106) | class _CPUSHMDistributed: method __init__ (line 108) | def __init__(self, communicator: CpuCommunicator): method _init_cpu_shm (line 120) | def _init_cpu_shm(self) -> int: method all_reduce (line 135) | def all_reduce( method gather (line 140) | def gather( method all_gather_into_tensor (line 155) | def all_gather_into_tensor( FILE: python/sglang/multimodal_gen/runtime/distributed/device_communicators/cuda_communicator.py class CudaCommunicator (line 14) | class CudaCommunicator(DeviceCommunicatorBase): method __init__ (line 16) | def __init__( method all_reduce (line 36) | def all_reduce(self, input_, op: torch.distributed.ReduceOp | None = N... method send (line 49) | def send(self, tensor: torch.Tensor, dst: int | None = None) -> None: method recv (line 61) | def recv( method destroy (line 77) | def destroy(self) -> None: FILE: python/sglang/multimodal_gen/runtime/distributed/device_communicators/pynccl.py class PyNcclCommunicator (line 27) | class PyNcclCommunicator: method __init__ (line 29) | def __init__( method all_reduce (line 119) | def all_reduce( method all_gather (line 147) | def all_gather( method reduce_scatter (line 170) | def reduce_scatter( method send (line 198) | def send(self, tensor: torch.Tensor, dst: int, stream=None): method recv (line 216) | def recv(self, tensor: torch.Tensor, src: int, stream=None): method broadcast (line 234) | def broadcast(self, tensor: torch.Tensor, src: int, stream=None): FILE: python/sglang/multimodal_gen/runtime/distributed/device_communicators/pynccl_wrapper.py class ncclUniqueId (line 50) | class ncclUniqueId(ctypes.Structure): class ncclDataTypeEnum (line 60) | class ncclDataTypeEnum: method from_torch (line 79) | def from_torch(cls, dtype: torch.dtype) -> int: class ncclRedOpTypeEnum (line 102) | class ncclRedOpTypeEnum: method from_torch (line 111) | def from_torch(cls, op: ReduceOp) -> int: class Function (line 126) | class Function: class NCCLLibrary (line 132) | class NCCLLibrary: method __init__ (line 268) | def __init__(self, so_file: str | None = None): method ncclGetErrorString (line 301) | def ncclGetErrorString(self, result: ncclResult_t) -> str: method NCCL_CHECK (line 304) | def NCCL_CHECK(self, result: ncclResult_t) -> None: method ncclGetVersion (line 309) | def ncclGetVersion(self) -> str: method ncclGetUniqueId (line 319) | def ncclGetUniqueId(self) -> ncclUniqueId: method ncclCommInitRank (line 324) | def ncclCommInitRank( method ncclAllReduce (line 335) | def ncclAllReduce( method ncclReduceScatter (line 356) | def ncclReduceScatter( method ncclAllGather (line 377) | def ncclAllGather( method ncclSend (line 396) | def ncclSend( method ncclRecv (line 409) | def ncclRecv( method ncclBroadcast (line 422) | def ncclBroadcast( method ncclCommDestroy (line 438) | def ncclCommDestroy(self, comm: ncclComm_t) -> None: FILE: python/sglang/multimodal_gen/runtime/distributed/group_coordinator.py function get_local_torch_device (line 45) | def get_local_torch_device() -> torch.device: function _get_unique_name (line 51) | def _get_unique_name(name: str) -> str: function _split_tensor_dict (line 64) | def _split_tensor_dict( function _update_nested_dict (line 108) | def _update_nested_dict(nested_dict, flattened_key, value): class GraphCaptureContext (line 119) | class GraphCaptureContext: class GroupCoordinator (line 123) | class GroupCoordinator: method __init__ (line 152) | def __init__( method first_rank (line 220) | def first_rank(self): method last_rank (line 225) | def last_rank(self): method is_first_rank (line 230) | def is_first_rank(self): method is_last_rank (line 235) | def is_last_rank(self): method next_rank (line 240) | def next_rank(self): method prev_rank (line 247) | def prev_rank(self): method group_next_rank (line 254) | def group_next_rank(self): method group_prev_rank (line 261) | def group_prev_rank(self): method skip_rank (line 268) | def skip_rank(self): method group_skip_rank (line 275) | def group_skip_rank(self): method graph_capture (line 282) | def graph_capture(self, graph_capture_context: GraphCaptureContext | N... method all_to_all_4D (line 305) | def all_to_all_4D( method all_reduce (line 312) | def all_reduce( method all_gather (line 332) | def all_gather( method gather (line 380) | def gather(self, input_: torch.Tensor, dst: int = 0, dim: int = -1) ->... method broadcast (line 411) | def broadcast(self, input_: torch.Tensor, src: int = 0, async_op: bool... method broadcast_object (line 429) | def broadcast_object(self, obj: Optional[Any] = None, src: int = 0): method broadcast_object_list (line 453) | def broadcast_object_list( method send_object (line 473) | def send_object(self, obj: Any, dst: int) -> None: method recv_object (line 500) | def recv_object(self, src: int) -> Any: method broadcast_tensor_dict (line 536) | def broadcast_tensor_dict( method send_tensor_dict (line 616) | def send_tensor_dict( method recv_tensor_dict (line 658) | def recv_tensor_dict( method barrier (line 697) | def barrier(self): method send (line 706) | def send(self, tensor: torch.Tensor, dst: Optional[int] = None) -> None: method recv (line 722) | def recv( method destroy (line 742) | def destroy(self) -> None: class PipelineGroupCoordinator (line 755) | class PipelineGroupCoordinator(GroupCoordinator): method __init__ (line 774) | def __init__( method reset_buffer (line 866) | def reset_buffer(self): method set_config (line 877) | def set_config(self, dtype: torch.dtype): method set_recv_buffer (line 880) | def set_recv_buffer( method set_extra_tensors_recv_buffer (line 902) | def set_extra_tensors_recv_buffer( method _check_shape_and_buffer (line 914) | def _check_shape_and_buffer( method _communicate_shapes (line 966) | def _communicate_shapes(self, tensor_send_to_next=None, recv_prev=False): method pipeline_send (line 1053) | def pipeline_send( method pipeline_isend (line 1062) | def pipeline_isend( method pipeline_recv (line 1071) | def pipeline_recv(self, idx: int = -1, name: str = "latent") -> torch.... method add_pipeline_recv_task (line 1077) | def add_pipeline_recv_task(self, idx: int = -1, name: str = "latent"): method recv_next (line 1081) | def recv_next(self): method get_pipeline_recv_data (line 1091) | def get_pipeline_recv_data( method _pipeline_irecv (line 1104) | def _pipeline_irecv(self, tensor: torch.tensor): method _pipeline_isend (line 1115) | def _pipeline_isend(self, tensor: torch.tensor): method set_skip_tensor_recv_buffer (line 1126) | def set_skip_tensor_recv_buffer( method pipeline_send_skip (line 1140) | def pipeline_send_skip(self, tensor: torch.Tensor) -> None: method pipeline_isend_skip (line 1144) | def pipeline_isend_skip(self, tensor: torch.Tensor) -> None: method pipeline_recv_skip (line 1148) | def pipeline_recv_skip(self, idx: int = -1) -> torch.Tensor: method add_pipeline_recv_skip_task (line 1152) | def add_pipeline_recv_skip_task(self, idx: int = -1): method get_pipeline_recv_skip_data (line 1155) | def get_pipeline_recv_skip_data(self, idx: int = -1) -> torch.Tensor: method recv_skip_next (line 1166) | def recv_skip_next(self): method _pipeline_irecv_skip (line 1180) | def _pipeline_irecv_skip(self, tensor: torch.tensor): method _pipeline_isend_skip (line 1185) | def _pipeline_isend_skip(self, tensor: torch.tensor): class SequenceParallelGroupCoordinator (line 1191) | class SequenceParallelGroupCoordinator(GroupCoordinator): method __init__ (line 1192) | def __init__( FILE: python/sglang/multimodal_gen/runtime/distributed/parallel_groups.py class Singleton (line 7) | class Singleton: method __new__ (line 10) | def __new__(cls, *args, **kwargs): class ProcessGroupSingleton (line 16) | class ProcessGroupSingleton(Singleton): method __init__ (line 17) | def __init__(self): function set_seq_parallel_pg_by_sp_groups (line 25) | def set_seq_parallel_pg_by_sp_groups( FILE: python/sglang/multimodal_gen/runtime/distributed/parallel_state.py function _split_tensor_dict (line 74) | def _split_tensor_dict( function _register_group (line 103) | def _register_group(group: "GroupCoordinator") -> None: function all_reduce (line 107) | def all_reduce(tensor: torch.Tensor, group_name: str) -> torch.Tensor: function all_reduce_fake (line 115) | def all_reduce_fake(tensor: torch.Tensor, group_name: str) -> torch.Tensor: function get_world_group (line 119) | def get_world_group() -> GroupCoordinator: function init_world_group (line 124) | def init_world_group( function init_parallel_group_coordinator (line 136) | def init_parallel_group_coordinator( function get_tp_group (line 176) | def get_tp_group() -> GroupCoordinator: function init_distributed_environment (line 181) | def init_distributed_environment( function get_sp_group (line 258) | def get_sp_group() -> SequenceParallelGroupCoordinator: function get_dp_group (line 263) | def get_dp_group() -> GroupCoordinator: function initialize_model_parallel (line 269) | def initialize_model_parallel( function get_sp_world_size (line 434) | def get_sp_world_size() -> int: function get_sp_parallel_rank (line 439) | def get_sp_parallel_rank() -> int: function get_world_size (line 444) | def get_world_size() -> int: function get_world_rank (line 449) | def get_world_rank() -> int: function get_dp_world_size (line 454) | def get_dp_world_size() -> int: function get_dp_rank (line 459) | def get_dp_rank() -> int: function maybe_init_distributed_environment_and_model_parallel (line 464) | def maybe_init_distributed_environment_and_model_parallel( function model_parallel_is_initialized (line 524) | def model_parallel_is_initialized() -> bool: function patch_tensor_parallel_group (line 539) | def patch_tensor_parallel_group(tp_group: GroupCoordinator): function get_tp_world_size (line 561) | def get_tp_world_size() -> int: function get_tp_rank (line 566) | def get_tp_rank() -> int: function destroy_distributed_environment (line 571) | def destroy_distributed_environment() -> None: function cleanup_dist_env_and_memory (line 580) | def cleanup_dist_env_and_memory(shutdown_ray: bool = False): function is_the_same_node_as (line 591) | def is_the_same_node_as( function get_tensor_model_parallel_world_size (line 681) | def get_tensor_model_parallel_world_size() -> int: function get_tensor_model_parallel_rank (line 686) | def get_tensor_model_parallel_rank() -> int: function get_sequence_parallel_world_size (line 691) | def get_sequence_parallel_world_size() -> int: function get_sequence_parallel_rank (line 696) | def get_sequence_parallel_rank() -> int: function get_ulysses_parallel_world_size (line 701) | def get_ulysses_parallel_world_size() -> int: function get_ulysses_parallel_rank (line 705) | def get_ulysses_parallel_rank() -> int: function get_ring_parallel_world_size (line 709) | def get_ring_parallel_world_size() -> int: function get_ring_parallel_rank (line 713) | def get_ring_parallel_rank() -> int: function get_pp_group (line 718) | def get_pp_group() -> PipelineGroupCoordinator: function get_pipeline_parallel_world_size (line 723) | def get_pipeline_parallel_world_size() -> int: function get_pipeline_parallel_rank (line 728) | def get_pipeline_parallel_rank() -> int: function is_pipeline_first_stage (line 733) | def is_pipeline_first_stage() -> bool: function is_pipeline_last_stage (line 738) | def is_pipeline_last_stage() -> bool: function get_cfg_group (line 744) | def get_cfg_group() -> GroupCoordinator: function get_classifier_free_guidance_world_size (line 751) | def get_classifier_free_guidance_world_size() -> int: function get_classifier_free_guidance_rank (line 756) | def get_classifier_free_guidance_rank() -> int: function get_data_parallel_world_size (line 761) | def get_data_parallel_world_size() -> int: function get_data_parallel_rank (line 766) | def get_data_parallel_rank() -> int: function is_dp_last_group (line 771) | def is_dp_last_group() -> bool: function get_dit_world_size (line 781) | def get_dit_world_size() -> int: function get_vae_parallel_group (line 792) | def get_vae_parallel_group() -> ProcessGroup: function get_vae_parallel_world_size (line 797) | def get_vae_parallel_world_size() -> int: function get_vae_parallel_rank (line 802) | def get_vae_parallel_rank() -> int: function init_dit_group (line 807) | def init_dit_group( function get_dit_group (line 818) | def get_dit_group() -> ProcessGroup: function init_vae_group (line 823) | def init_vae_group( function destroy_model_parallel (line 835) | def destroy_model_parallel() -> None: FILE: python/sglang/multimodal_gen/runtime/distributed/utils.py function ensure_divisibility (line 25) | def ensure_divisibility(numerator, denominator) -> None: function divide (line 32) | def divide(numerator: int, denominator: int) -> int: function split_tensor_along_last_dim (line 39) | def split_tensor_along_last_dim( class StatelessProcessGroup (line 68) | class StatelessProcessGroup: method __post_init__ (line 89) | def __post_init__(self): method send_obj (line 95) | def send_obj(self, obj: Any, dst: int): method expire_data (line 103) | def expire_data(self) -> None: method recv_obj (line 114) | def recv_obj(self, src: int) -> Any: method broadcast_obj (line 122) | def broadcast_obj(self, obj: Any | None, src: int) -> Any: method all_gather_obj (line 140) | def all_gather_obj(self, obj: Any) -> list[Any]: method barrier (line 152) | def barrier(self): method create (line 161) | def create( FILE: python/sglang/multimodal_gen/runtime/entrypoints/cli/cli_types.py class CLISubcommand (line 11) | class CLISubcommand: method cmd (line 16) | def cmd( method validate (line 22) | def validate(self, args: argparse.Namespace) -> None: method subparser_init (line 26) | def subparser_init( FILE: python/sglang/multimodal_gen/runtime/entrypoints/cli/generate.py function add_multimodal_gen_generate_args (line 34) | def add_multimodal_gen_generate_args(parser: argparse.ArgumentParser): function maybe_dump_performance (line 63) | def maybe_dump_performance( function generate_cmd (line 109) | def generate_cmd(args: argparse.Namespace, unknown_args: list[str] | Non... class GenerateSubcommand (line 144) | class GenerateSubcommand(CLISubcommand): method __init__ (line 147) | def __init__(self) -> None: method _get_init_arg_names (line 153) | def _get_init_arg_names(self) -> list[str]: method _get_generation_arg_names (line 157) | def _get_generation_arg_names(self) -> list[str]: method cmd (line 161) | def cmd( method validate (line 166) | def validate(self, args: argparse.Namespace) -> None: method subparser_init (line 174) | def subparser_init( FILE: python/sglang/multimodal_gen/runtime/entrypoints/cli/main.py function generate_cmd_init (line 12) | def generate_cmd_init() -> list[CLISubcommand]: function cmd_init (line 16) | def cmd_init() -> list[CLISubcommand]: function main (line 23) | def main() -> None: FILE: python/sglang/multimodal_gen/runtime/entrypoints/cli/serve.py function add_multimodal_gen_serve_args (line 19) | def add_multimodal_gen_serve_args(parser: argparse.ArgumentParser): function execute_serve_cmd (line 31) | def execute_serve_cmd(args: argparse.Namespace, unknown_args: list[str] ... class ServeSubcommand (line 40) | class ServeSubcommand(CLISubcommand): method __init__ (line 43) | def __init__(self) -> None: method cmd (line 47) | def cmd( method validate (line 52) | def validate(self, args: argparse.Namespace) -> None: method subparser_init (line 57) | def subparser_init( function cmd_init (line 71) | def cmd_init() -> list[CLISubcommand]: FILE: python/sglang/multimodal_gen/runtime/entrypoints/cli/utils.py class RaiseNotImplementedAction (line 16) | class RaiseNotImplementedAction(argparse.Action): method __call__ (line 18) | def __call__(self, parser, namespace, values, option_string=None): function launch_distributed (line 22) | def launch_distributed( FILE: python/sglang/multimodal_gen/runtime/entrypoints/diffusion_generator.py class DiffGenerator (line 58) | class DiffGenerator: method __init__ (line 66) | def __init__( method from_pretrained (line 84) | def from_pretrained( method from_server_args (line 107) | def from_server_args( method _start_local_server_if_needed (line 134) | def _start_local_server_if_needed( method _check_remote_scheduler (line 145) | def _check_remote_scheduler(self): method _resolve_image_paths_per_prompt (line 159) | def _resolve_image_paths_per_prompt( method generate (line 176) | def generate( method _resolve_prompts (line 337) | def _resolve_prompts(self, prompt: str | list[str] | None) -> list[str]: method _log_summary (line 356) | def _log_summary(self, results: list[GenerationResult]) -> None: method _send_to_scheduler_and_wait_for_response (line 373) | def _send_to_scheduler_and_wait_for_response(self, batch: list[Req]) -... method _send_lora_request (line 380) | def _send_lora_request(self, req: Any, success_msg: str, failure_msg: ... method set_lora (line 389) | def set_lora( method unmerge_lora_weights (line 427) | def unmerge_lora_weights(self, target: str = "all") -> None: method merge_lora_weights (line 441) | def merge_lora_weights(self, target: str = "all", strength: float = 1.... method list_loras (line 456) | def list_loras(self) -> dict: method _ensure_lora_state (line 466) | def _ensure_lora_state( method generate_with_lora (line 494) | def generate_with_lora( method shutdown (line 515) | def shutdown(self): method __enter__ (line 541) | def __enter__(self): method __exit__ (line 544) | def __exit__(self, exc_type, exc_val, exc_tb): method __del__ (line 547) | def __del__(self): FILE: python/sglang/multimodal_gen/runtime/entrypoints/http_server.py function lifespan (line 40) | async def lifespan(app: FastAPI): function health (line 66) | async def health(): function get_models (line 71) | async def get_models(request: Request): function server_info_endpoint (line 100) | async def server_info_endpoint(request: Request): function model_info_endpoint (line 118) | async def model_info_endpoint(request: Request): function health_generate (line 157) | async def health_generate(): function make_serializable (line 162) | def make_serializable(obj): function encode_video_to_base64 (line 173) | def encode_video_to_base64(file_path: str): function forward_to_scheduler (line 180) | async def forward_to_scheduler( function vertex_generate (line 236) | async def vertex_generate(vertex_req: VertexGenerateReqInput): function create_app (line 269) | def create_app(server_args: ServerArgs): FILE: python/sglang/multimodal_gen/runtime/entrypoints/openai/common_api.py class ModelCard (line 25) | class ModelCard(BaseModel): class DiffusionModelCard (line 37) | class DiffusionModelCard(ModelCard): function _handle_lora_request (line 48) | async def _handle_lora_request(req: Any, success_msg: str, failure_msg: ... function set_lora (line 64) | async def set_lora( function merge_lora_weights (line 106) | async def merge_lora_weights( function unmerge_lora_weights (line 128) | async def unmerge_lora_weights( function model_info (line 147) | async def model_info(): function list_loras (line 160) | async def list_loras(): function available_models (line 177) | async def available_models(): function retrieve_model (line 210) | async def retrieve_model(model: str): FILE: python/sglang/multimodal_gen/runtime/entrypoints/openai/image_api.py function _read_b64_for_paths (line 39) | def _read_b64_for_paths(paths: list[str]) -> list[str]: function _build_image_response_kwargs (line 48) | def _build_image_response_kwargs( function generations (line 110) | async def generations( function edits (line 190) | async def edits( function download_image_content (line 324) | async def download_image_content( FILE: python/sglang/multimodal_gen/runtime/entrypoints/openai/mesh_api.py function _normalize_format (line 44) | def _normalize_format(fmt: Optional[str]) -> str: function _build_sampling_params_from_request (line 49) | def _build_sampling_params_from_request( function _mesh_job_from_sampling (line 79) | def _mesh_job_from_sampling( function _dispatch_job_async (line 94) | async def _dispatch_job_async(job_id: str, batch: Req) -> None: function create_mesh (line 129) | async def create_mesh( function list_meshes (line 227) | async def list_meshes( function retrieve_mesh (line 254) | async def retrieve_mesh(mesh_id: str = Path(...)): function delete_mesh (line 262) | async def delete_mesh(mesh_id: str = Path(...)): function download_mesh_content (line 271) | async def download_mesh_content( FILE: python/sglang/multimodal_gen/runtime/entrypoints/openai/protocol.py class ImageResponseData (line 11) | class ImageResponseData(BaseModel): class ImageResponse (line 18) | class ImageResponse(BaseModel): class ImageGenerationsRequest (line 26) | class ImageGenerationsRequest(BaseModel): class VideoResponse (line 59) | class VideoResponse(BaseModel): class VideoGenerationsRequest (line 79) | class VideoGenerationsRequest(BaseModel): class VideoListResponse (line 114) | class VideoListResponse(BaseModel): class VideoRemixRequest (line 119) | class VideoRemixRequest(BaseModel): class MeshResponse (line 124) | class MeshResponse(BaseModel): class MeshGenerationsRequest (line 142) | class MeshGenerationsRequest(BaseModel): class MeshListResponse (line 154) | class MeshListResponse(BaseModel): class BaseReq (line 160) | class BaseReq(ABC): method regenerate_rid (line 164) | def regenerate_rid(self): class VertexGenerateReqInput (line 174) | class VertexGenerateReqInput(BaseReq): FILE: python/sglang/multimodal_gen/runtime/entrypoints/openai/storage.py class CloudStorage (line 10) | class CloudStorage: method __init__ (line 11) | def __init__(self): method is_enabled (line 43) | def is_enabled(self) -> bool: method upload_file (line 46) | async def upload_file(self, local_path: str, destination_key: str) -> ... method upload_and_cleanup (line 91) | async def upload_and_cleanup(self, file_path: str) -> Optional[str]: FILE: python/sglang/multimodal_gen/runtime/entrypoints/openai/stores.py class AsyncDictStore (line 5) | class AsyncDictStore: method __init__ (line 13) | def __init__(self) -> None: method upsert (line 17) | async def upsert(self, key: str, value: Dict[str, Any]) -> None: method update_fields (line 21) | async def update_fields( method get (line 31) | async def get(self, key: str) -> Optional[Dict[str, Any]]: method pop (line 35) | async def pop(self, key: str) -> Optional[Dict[str, Any]]: method list_values (line 39) | async def list_values(self) -> List[Dict[str, Any]]: FILE: python/sglang/multimodal_gen/runtime/entrypoints/openai/utils.py function temp_dir_if_disabled (line 54) | def temp_dir_if_disabled( function _parse_size (line 70) | def _parse_size(size: str) -> tuple[int, int] | tuple[None, None]: function choose_output_image_ext (line 81) | def choose_output_image_ext( function build_sampling_params (line 92) | def build_sampling_params(request_id: str, **kwargs) -> SamplingParams: function save_image_to_path (line 140) | async def save_image_to_path(image: Union[UploadFile, str], target_path:... function _save_upload_to_path (line 148) | async def _save_upload_to_path(upload: UploadFile, target_path: str) -> ... function _maybe_url_image (line 156) | async def _maybe_url_image(img_url: str, target_path: str) -> str | None: function _save_url_image_to_path (line 172) | async def _save_url_image_to_path(image_url: str, target_path: str) -> str: function _save_base64_image_to_path (line 218) | async def _save_base64_image_to_path(base64_data: str, target_path: str)... function process_generation_batch (line 258) | async def process_generation_batch( function merge_image_input_list (line 303) | def merge_image_input_list(*inputs: Union[List, Any, None]) -> List: function add_common_data_to_response (line 331) | def add_common_data_to_response( function adjust_output_quality (line 345) | def adjust_output_quality(output_quality: str, data_type: DataType = Non... FILE: python/sglang/multimodal_gen/runtime/entrypoints/openai/video_api.py function _build_video_sampling_params (line 52) | def _build_video_sampling_params(request_id: str, request: VideoGenerati... function _video_job_from_sampling (line 88) | def _video_job_from_sampling( function _save_first_input_image (line 107) | async def _save_first_input_image( function _dispatch_job_async (line 123) | async def _dispatch_job_async( function create_video (line 166) | async def create_video( function list_videos (line 357) | async def list_videos( function retrieve_video (line 385) | async def retrieve_video(video_id: str = Path(...)): function delete_video (line 394) | async def delete_video(video_id: str = Path(...)): function download_video_content (line 404) | async def download_video_content( FILE: python/sglang/multimodal_gen/runtime/entrypoints/post_training/io_struct.py class UpdateWeightFromDiskReqInput (line 7) | class UpdateWeightFromDiskReqInput: class GetWeightsChecksumReqInput (line 16) | class GetWeightsChecksumReqInput: FILE: python/sglang/multimodal_gen/runtime/entrypoints/post_training/weights_api.py function update_weights_from_disk (line 16) | async def update_weights_from_disk(request: Request): function get_weights_checksum (line 50) | async def get_weights_checksum(request: Request): FILE: python/sglang/multimodal_gen/runtime/entrypoints/utils.py class SetLoraReq (line 44) | class SetLoraReq: class MergeLoraWeightsReq (line 52) | class MergeLoraWeightsReq: class UnmergeLoraWeightsReq (line 58) | class UnmergeLoraWeightsReq: class ListLorasReq (line 63) | class ListLorasReq: class ShutdownReq (line 68) | class ShutdownReq: function format_lora_message (line 72) | def format_lora_message( class GenerationResult (line 98) | class GenerationResult: function _normalize_audio_to_numpy (line 116) | def _normalize_audio_to_numpy(audio: Any) -> np.ndarray | None: function _pick_audio_sample_rate (line 143) | def _pick_audio_sample_rate( function _resolve_ffmpeg_exe (line 170) | def _resolve_ffmpeg_exe() -> str: function _mux_audio_np_into_mp4 (line 192) | def _mux_audio_np_into_mp4( function _maybe_mux_audio_into_mp4 (line 243) | def _maybe_mux_audio_into_mp4( function prepare_request (line 281) | def prepare_request( function attach_audio_to_video_sample (line 314) | def attach_audio_to_video_sample( function save_outputs (line 334) | def save_outputs( function post_process_sample (line 398) | def post_process_sample( FILE: python/sglang/multimodal_gen/runtime/launch_server.py function kill_process_tree (line 22) | def kill_process_tree(parent_pid, include_parent: bool = True, skip_pid:... function launch_server (line 61) | def launch_server(server_args: ServerArgs, launch_http_server: bool = Tr... function launch_http_server_only (line 188) | def launch_http_server_only(server_args): FILE: python/sglang/multimodal_gen/runtime/layers/activation.py class SiluAndMul (line 29) | class SiluAndMul(CustomOp): method __init__ (line 39) | def __init__(self) -> None: method forward_cuda (line 42) | def forward_cuda(self, x: torch.Tensor) -> torch.Tensor: method forward_native (line 49) | def forward_native(self, x: torch.Tensor) -> torch.Tensor: method forward_npu (line 54) | def forward_npu(self, x: torch.Tensor) -> torch.Tensor: method forward_musa (line 58) | def forward_musa(self, x: torch.Tensor) -> torch.Tensor: class GeluAndMul (line 63) | class GeluAndMul(CustomOp): method __init__ (line 73) | def __init__(self, approximate: str = "none"): method forward_cuda (line 79) | def forward_cuda(self, *args, **kwargs) -> Any: method forward_native (line 82) | def forward_native(self, x: torch.Tensor) -> torch.Tensor: method extra_repr (line 87) | def extra_repr(self) -> str: class NewGELU (line 92) | class NewGELU(CustomOp): method __init__ (line 94) | def __init__(self): method forward_cuda (line 97) | def forward_cuda(self, *args, **kwargs) -> Any: method forward_native (line 100) | def forward_native(self, x: torch.Tensor) -> torch.Tensor: class QuickGELU (line 107) | class QuickGELU(CustomOp): method __init__ (line 109) | def __init__(self): method forward_cuda (line 112) | def forward_cuda(self, *args, **kwargs) -> Any: method forward_native (line 115) | def forward_native(self, x: torch.Tensor) -> torch.Tensor: function get_act_fn (line 130) | def get_act_fn(act_fn_name: str) -> nn.Module: function get_act_and_mul_fn (line 145) | def get_act_and_mul_fn(act_fn_name: str) -> nn.Module: FILE: python/sglang/multimodal_gen/runtime/layers/attention/STA_configuration.py function configure_sta (line 14) | def configure_sta( function read_specific_json_files (line 256) | def read_specific_json_files(folder_path: str) -> list[dict[str, Any]]: function average_head_losses (line 275) | def average_head_losses( function select_best_mask_strategy (line 301) | def select_best_mask_strategy( function save_mask_search_results (line 360) | def save_mask_search_results( FILE: python/sglang/multimodal_gen/runtime/layers/attention/backends/aiter.py class AITerBackend (line 17) | class AITerBackend(AttentionBackend): method get_enum (line 23) | def get_enum() -> AttentionBackendEnum: method get_impl_cls (line 27) | def get_impl_cls() -> type["AITerImpl"]: method get_metadata_cls (line 31) | def get_metadata_cls() -> type["AttentionMetadata"]: method get_builder_cls (line 36) | def get_builder_cls() -> type["AttentionMetadataBuilder"]: class AITerImpl (line 40) | class AITerImpl(AttentionImpl): method __init__ (line 45) | def __init__( method forward (line 63) | def forward( FILE: python/sglang/multimodal_gen/runtime/layers/attention/backends/aiter_sage.py class AITERSageBackend (line 15) | class AITERSageBackend(AttentionBackend): method get_enum (line 18) | def get_enum() -> AttentionBackendEnum: method get_impl_cls (line 22) | def get_impl_cls() -> type["AITERSageImpl"]: method get_metadata_cls (line 26) | def get_metadata_cls() -> type["AttentionMetadata"]: method get_builder_cls (line 31) | def get_builder_cls() -> type["AttentionMetadataBuilder"]: class AITERSageImpl (line 37) | class AITERSageImpl(AttentionImpl): method __init__ (line 39) | def __init__( method forward (line 60) | def forward( FILE: python/sglang/multimodal_gen/runtime/layers/attention/backends/attention_backend.py class AttentionBackend (line 18) | class AttentionBackend(ABC): method get_enum (line 28) | def get_enum() -> AttentionBackendEnum: method get_impl_cls (line 33) | def get_impl_cls() -> type["AttentionImpl"]: method get_metadata_cls (line 38) | def get_metadata_cls() -> type["AttentionMetadata"]: method get_builder_cls (line 52) | def get_builder_cls() -> type["AttentionMetadataBuilder"]: class AttentionMetadata (line 57) | class AttentionMetadata: method asdict_zerocopy (line 63) | def asdict_zerocopy(self, skip_fields: set[str] | None = None) -> dict... class AttentionMetadataBuilder (line 79) | class AttentionMetadataBuilder(ABC, Generic[T]): method __init__ (line 83) | def __init__(self) -> None: method prepare (line 88) | def prepare(self) -> None: method build (line 93) | def build( class AttentionLayer (line 101) | class AttentionLayer(Protocol): method forward (line 108) | def forward( class AttentionImpl (line 118) | class AttentionImpl(ABC, Generic[T]): method __init__ (line 121) | def __init__( method preprocess_qkv (line 133) | def preprocess_qkv(self, qkv: torch.Tensor, attn_metadata: T) -> torch... method postprocess_output (line 145) | def postprocess_output( method forward (line 163) | def forward( FILE: python/sglang/multimodal_gen/runtime/layers/attention/backends/flash_attn.py function flash_attn_func (line 21) | def flash_attn_func(*args, ver: int = 3, **kwargs): function maybe_contiguous (line 30) | def maybe_contiguous(x: Optional[torch.Tensor]) -> Optional[torch.Tensor]: function flash_attn_varlen_func_fake_out (line 41) | def flash_attn_varlen_func_fake_out( function flash_attn_varlen_func_fake_out_lse (line 101) | def flash_attn_varlen_func_fake_out_lse( function flash_attn_varlen_func_op (line 176) | def flash_attn_varlen_func_op( function flash_attn_varlen_func_op_lse (line 240) | def flash_attn_varlen_func_op_lse( function set_fa_ver (line 313) | def set_fa_ver(ver: int) -> None: class FlashAttentionMetadata (line 319) | class FlashAttentionMetadata: class FlashAttentionMetadataBuilder (line 331) | class FlashAttentionMetadataBuilder(AttentionMetadataBuilder): method __init__ (line 332) | def __init__(self) -> None: method prepare (line 335) | def prepare(self) -> None: method build (line 338) | def build( # type: ignore class FlashAttentionBackend (line 347) | class FlashAttentionBackend(AttentionBackend): method get_supported_head_sizes (line 351) | def get_supported_head_sizes() -> list[int]: method get_enum (line 355) | def get_enum() -> AttentionBackendEnum: method get_impl_cls (line 359) | def get_impl_cls() -> type["FlashAttentionImpl"]: method get_metadata_cls (line 363) | def get_metadata_cls() -> type["AttentionMetadata"]: method get_builder_cls (line 367) | def get_builder_cls() -> type["AttentionMetadataBuilder"]: class FlashAttentionImpl (line 371) | class FlashAttentionImpl(AttentionImpl): method __init__ (line 372) | def __init__( method forward (line 389) | def forward( FILE: python/sglang/multimodal_gen/runtime/layers/attention/backends/flash_attn_2.py class FlashAttention2Backend (line 22) | class FlashAttention2Backend(AttentionBackend): method get_supported_head_sizes (line 26) | def get_supported_head_sizes() -> list[int]: method get_enum (line 30) | def get_enum() -> AttentionBackendEnum: method get_impl_cls (line 34) | def get_impl_cls() -> type["FlashAttention2Impl"]: method get_metadata_cls (line 38) | def get_metadata_cls() -> type["AttentionMetadata"]: method get_builder_cls (line 42) | def get_builder_cls() -> type["AttentionMetadataBuilder"]: class FlashAttention2Impl (line 46) | class FlashAttention2Impl(AttentionImpl): method __init__ (line 48) | def __init__( method forward (line 61) | def forward( FILE: python/sglang/multimodal_gen/runtime/layers/attention/backends/sage_attn.py class SageAttentionBackend (line 20) | class SageAttentionBackend(AttentionBackend): method get_supported_head_sizes (line 24) | def get_supported_head_sizes() -> list[int]: method get_enum (line 28) | def get_enum() -> AttentionBackendEnum: method get_impl_cls (line 32) | def get_impl_cls() -> type["SageAttentionImpl"]: class SageAttentionImpl (line 36) | class SageAttentionImpl(AttentionImpl): method __init__ (line 38) | def __init__( method forward (line 52) | def forward( FILE: python/sglang/multimodal_gen/runtime/layers/attention/backends/sage_attn3.py class SageAttention3Backend (line 20) | class SageAttention3Backend(AttentionBackend): method get_supported_head_sizes (line 24) | def get_supported_head_sizes() -> list[int]: method get_enum (line 28) | def get_enum() -> AttentionBackendEnum: method get_impl_cls (line 32) | def get_impl_cls() -> type["SageAttention3Impl"]: method get_metadata_cls (line 36) | def get_metadata_cls() -> type["AttentionMetadata"]: class SageAttention3Impl (line 40) | class SageAttention3Impl(AttentionImpl): method __init__ (line 43) | def __init__( method forward (line 57) | def forward( FILE: python/sglang/multimodal_gen/runtime/layers/attention/backends/sdpa.py class SDPABackend (line 18) | class SDPABackend(AttentionBackend): method get_supported_head_sizes (line 23) | def get_supported_head_sizes() -> list[int]: method get_enum (line 27) | def get_enum() -> AttentionBackendEnum: method get_impl_cls (line 31) | def get_impl_cls() -> type["SDPAImpl"]: class SDPAImpl (line 39) | class SDPAImpl(AttentionImpl): method __init__ (line 41) | def __init__( method forward (line 55) | def forward( FILE: python/sglang/multimodal_gen/runtime/layers/attention/backends/sliding_tile_attn.py class RangeDict (line 37) | class RangeDict(dict): method __getitem__ (line 39) | def __getitem__(self, item: int) -> str: class SlidingTileAttentionBackend (line 50) | class SlidingTileAttentionBackend(AttentionBackend): method get_supported_head_sizes (line 54) | def get_supported_head_sizes() -> list[int]: method get_enum (line 59) | def get_enum() -> AttentionBackendEnum: method get_impl_cls (line 63) | def get_impl_cls() -> type["SlidingTileAttentionImpl"]: method get_metadata_cls (line 67) | def get_metadata_cls() -> type["SlidingTileAttentionMetadata"]: method get_builder_cls (line 71) | def get_builder_cls() -> type["SlidingTileAttentionMetadataBuilder"]: class SlidingTileAttentionMetadata (line 76) | class SlidingTileAttentionMetadata(AttentionMetadata): class SlidingTileAttentionMetadataBuilder (line 83) | class SlidingTileAttentionMetadataBuilder(AttentionMetadataBuilder): method __init__ (line 85) | def __init__(self): method prepare (line 88) | def prepare(self): method build (line 91) | def build( # type: ignore class SlidingTileAttentionImpl (line 107) | class SlidingTileAttentionImpl(AttentionImpl): method __init__ (line 109) | def __init__( method tile (line 152) | def tile(self, x: torch.Tensor) -> torch.Tensor: method untile (line 164) | def untile(self, x: torch.Tensor) -> torch.Tensor: method preprocess_qkv (line 177) | def preprocess_qkv( method postprocess_output (line 193) | def postprocess_output( method forward (line 200) | def forward( FILE: python/sglang/multimodal_gen/runtime/layers/attention/backends/sparse_linear_attn.py function get_block_map (line 40) | def get_block_map(q, k, topk_ratio, BLKQ=64, BLKK=64): function mean_pool (line 57) | def mean_pool(x, BLK): function compress_kernel (line 70) | def compress_kernel( function _attn_fwd (line 95) | def _attn_fwd( function _get_cuda_arch (line 162) | def _get_cuda_arch(device_index: int) -> str: class SparseLinearAttentionBackend (line 169) | class SparseLinearAttentionBackend(AttentionBackend): method get_supported_head_sizes (line 175) | def get_supported_head_sizes() -> list[int]: method get_enum (line 179) | def get_enum() -> AttentionBackendEnum: method get_impl_cls (line 183) | def get_impl_cls() -> type["SparseLinearAttentionImpl"]: method get_metadata_cls (line 187) | def get_metadata_cls() -> type["SparseLinearAttentionMetadata"]: method get_builder_cls (line 191) | def get_builder_cls() -> type["SparseLinearAttentionMetadataBuilder"]: class SparseLinearAttentionMetadata (line 196) | class SparseLinearAttentionMetadata(AttentionMetadata): class SparseLinearAttentionMetadataBuilder (line 206) | class SparseLinearAttentionMetadataBuilder(AttentionMetadataBuilder): method __init__ (line 209) | def __init__(self) -> None: method prepare (line 212) | def prepare(self) -> None: method build (line 215) | def build( class SparseLinearAttentionImpl (line 227) | class SparseLinearAttentionImpl(AttentionImpl, nn.Module): method __init__ (line 230) | def __init__( method _init_weights (line 275) | def _init_weights(self) -> None: method _calc_linear_attention_with_torch (line 281) | def _calc_linear_attention_with_torch(self, q, k, v): method forward (line 286) | def forward( class _attention (line 341) | class _attention(torch.autograd.Function): method forward (line 343) | def forward(ctx, q, k, v, k_block_id, lut, topk, BLOCK_M, BLOCK_N, qk_... class SageSparseLinearAttentionBackend (line 405) | class SageSparseLinearAttentionBackend(AttentionBackend): method get_supported_head_sizes (line 411) | def get_supported_head_sizes() -> list[int]: method get_enum (line 415) | def get_enum() -> AttentionBackendEnum: method get_impl_cls (line 419) | def get_impl_cls() -> type["SageSparseLinearAttentionImpl"]: method get_metadata_cls (line 423) | def get_metadata_cls() -> type["SageSparseLinearAttentionMetadata"]: method get_builder_cls (line 427) | def get_builder_cls() -> type["SageSparseLinearAttentionMetadataBuilde... class SageSparseLinearAttentionMetadata (line 432) | class SageSparseLinearAttentionMetadata(AttentionMetadata): class SageSparseLinearAttentionMetadataBuilder (line 442) | class SageSparseLinearAttentionMetadataBuilder(AttentionMetadataBuilder): method __init__ (line 445) | def __init__(self) -> None: method prepare (line 448) | def prepare(self) -> None: method build (line 451) | def build( class SageSparseLinearAttentionImpl (line 463) | class SageSparseLinearAttentionImpl(AttentionImpl, nn.Module): method __init__ (line 464) | def __init__( method _init_weights (line 513) | def _init_weights(self) -> None: method _calc_linear_attention_with_torch (line 519) | def _calc_linear_attention_with_torch( method forward (line 529) | def forward( FILE: python/sglang/multimodal_gen/runtime/layers/attention/backends/sparse_video_gen_2_attn.py class SparseVideoGen2AttentionBackend (line 44) | class SparseVideoGen2AttentionBackend(AttentionBackend): method get_supported_head_sizes (line 49) | def get_supported_head_sizes() -> list[int]: method get_enum (line 53) | def get_enum() -> AttentionBackendEnum: method get_impl_cls (line 57) | def get_impl_cls() -> type["SparseVideoGen2AttentionImpl"]: method get_metadata_cls (line 61) | def get_metadata_cls() -> type["SparseVideoGen2AttentionMetadata"]: method get_builder_cls (line 65) | def get_builder_cls() -> type["SparseVideoGen2AttentionMetadataBuilder"]: class Svg2LayerCache (line 70) | class Svg2LayerCache: class Svg2Cache (line 78) | class Svg2Cache: method get_layer (line 81) | def get_layer(self, layer_idx: int) -> Svg2LayerCache: class SparseVideoGen2AttentionMetadata (line 90) | class SparseVideoGen2AttentionMetadata(AttentionMetadata): function _require_kwarg (line 110) | def _require_kwarg(kwargs: dict[str, Any], name: str) -> Any: class SparseVideoGen2AttentionMetadataBuilder (line 118) | class SparseVideoGen2AttentionMetadataBuilder(AttentionMetadataBuilder): method __init__ (line 120) | def __init__(self) -> None: method prepare (line 123) | def prepare(self) -> None: method build (line 126) | def build( # type: ignore[override] class SparseVideoGen2AttentionImpl (line 182) | class SparseVideoGen2AttentionImpl(AttentionImpl): method __init__ (line 184) | def __init__( method _get_layer_idx (line 207) | def _get_layer_idx(self, prefix: str) -> int: method kmeans_init (line 215) | def kmeans_init( method kmeans_step (line 248) | def kmeans_step( method kmeans_clustering (line 283) | def kmeans_clustering( method semantic_aware_permutation (line 330) | def semantic_aware_permutation( method _hunyuan_dynamic_map_post_processing (line 389) | def _hunyuan_dynamic_map_post_processing( method forward (line 440) | def forward( FILE: python/sglang/multimodal_gen/runtime/layers/attention/backends/video_sparse_attn.py function get_tile_partition_indices (line 32) | def get_tile_partition_indices( function get_reverse_tile_partition_indices (line 56) | def get_reverse_tile_partition_indices( function construct_variable_block_sizes (line 65) | def construct_variable_block_sizes( function get_non_pad_index (line 109) | def get_non_pad_index( class VideoSparseAttentionBackend (line 126) | class VideoSparseAttentionBackend(AttentionBackend): method get_supported_head_sizes (line 131) | def get_supported_head_sizes() -> list[int]: method get_enum (line 135) | def get_enum() -> AttentionBackendEnum: method get_impl_cls (line 139) | def get_impl_cls() -> type["VideoSparseAttentionImpl"]: method get_metadata_cls (line 143) | def get_metadata_cls() -> type["VideoSparseAttentionMetadata"]: method get_builder_cls (line 147) | def get_builder_cls() -> type["VideoSparseAttentionMetadataBuilder"]: class VideoSparseAttentionMetadata (line 152) | class VideoSparseAttentionMetadata(AttentionMetadata): class VideoSparseAttentionMetadataBuilder (line 171) | class VideoSparseAttentionMetadataBuilder(AttentionMetadataBuilder): method __init__ (line 173) | def __init__(self): method prepare (line 176) | def prepare(self): method build (line 179) | def build( # type: ignore class VideoSparseAttentionImpl (line 228) | class VideoSparseAttentionImpl(AttentionImpl): method __init__ (line 230) | def __init__( method tile (line 244) | def tile( method untile (line 268) | def untile( method preprocess_qkv (line 277) | def preprocess_qkv( method postprocess_output (line 289) | def postprocess_output( method forward (line 300) | def forward( # type: ignore[override] FILE: python/sglang/multimodal_gen/runtime/layers/attention/backends/vmoba.py class VMOBAAttentionBackend (line 28) | class VMOBAAttentionBackend(AttentionBackend): method get_enum (line 33) | def get_enum() -> AttentionBackendEnum: method get_impl_cls (line 37) | def get_impl_cls() -> type["VMOBAAttentionImpl"]: method get_metadata_cls (line 41) | def get_metadata_cls() -> type["VideoMobaAttentionMetadata"]: method get_builder_cls (line 45) | def get_builder_cls() -> type["VideoMobaAttentionMetadataBuilder"]: class VideoMobaAttentionMetadata (line 50) | class VideoMobaAttentionMetadata(AttentionMetadata): function pad_input (line 73) | def pad_input(hidden_states, indices, batch, seqlen): class VideoMobaAttentionMetadataBuilder (line 91) | class VideoMobaAttentionMetadataBuilder(AttentionMetadataBuilder): method __init__ (line 93) | def __init__(self): method prepare (line 96) | def prepare(self): method build (line 99) | def build( # type: ignore class VMOBAAttentionImpl (line 152) | class VMOBAAttentionImpl(AttentionImpl): method __init__ (line 154) | def __init__( method _get_layer_idx (line 169) | def _get_layer_idx(self, prefix: str) -> int | None: method forward (line 175) | def forward( FILE: python/sglang/multimodal_gen/runtime/layers/attention/layer.py class UlyssesAttention (line 38) | class UlyssesAttention(nn.Module): method __init__ (line 41) | def __init__( method forward (line 82) | def forward( class UlyssesAttention_VSA (line 157) | class UlyssesAttention_VSA(UlyssesAttention): method forward (line 160) | def forward( class LocalAttention (line 221) | class LocalAttention(nn.Module): method __init__ (line 224) | def __init__( method forward (line 261) | def forward( class USPAttention (line 288) | class USPAttention(nn.Module): method __init__ (line 297) | def __init__( method forward (line 351) | def forward( method _forward_with_replicated_prefix (line 414) | def _forward_with_replicated_prefix( FILE: python/sglang/multimodal_gen/runtime/layers/attention/selector.py function backend_name_to_enum (line 25) | def backend_name_to_enum(backend_name: str) -> AttentionBackendEnum | None: function get_env_variable_attn_backend (line 42) | def get_env_variable_attn_backend() -> AttentionBackendEnum | None: function global_force_attn_backend (line 66) | def global_force_attn_backend(attn_backend: AttentionBackendEnum | None)... function get_global_forced_attn_backend (line 81) | def get_global_forced_attn_backend() -> AttentionBackendEnum | None: function get_attn_backend (line 89) | def get_attn_backend( function _cached_get_attn_backend (line 105) | def _cached_get_attn_backend( function global_force_attn_backend_context_manager (line 163) | def global_force_attn_backend_context_manager( FILE: python/sglang/multimodal_gen/runtime/layers/attention/turbo_layer.py function post_all2all (line 31) | def post_all2all(local_seq_2_local_head, seq_world_size): function single_all_to_all (line 44) | def single_all_to_all(input, local_seq_2_local_head, group, async_op=Fal... function async_a2a_communicate (line 77) | def async_a2a_communicate( class _SeqAllToAll (line 127) | class _SeqAllToAll(torch.autograd.Function): method forward (line 129) | def forward( method backward (line 138) | def backward(ctx: Any, *grad_output: Tensor) -> Tuple[None, Tensor, No... class _SeqAllToAllQKV (line 146) | class _SeqAllToAllQKV(torch.autograd.Function): method forward (line 148) | def forward( method backward (line 168) | def backward( class DistributedAttention (line 181) | class DistributedAttention(torch.nn.Module): method __init__ (line 189) | def __init__(self, local_attention: Union[Module, Callable]) -> None: method forward (line 195) | def forward( method set_context_parallel_group (line 224) | def set_context_parallel_group(self, group, stream): class MinimalA2AAttnOp (line 229) | class MinimalA2AAttnOp(DistributedAttention): method __init__ (line 230) | def __init__( method set_context_parallel_group (line 264) | def set_context_parallel_group(self, process_group, ranks, stream): method forward (line 268) | def forward( FILE: python/sglang/multimodal_gen/runtime/layers/custom_op.py class CustomOp (line 18) | class CustomOp(nn.Module): method __init__ (line 24) | def __init__(self) -> None: method forward (line 28) | def forward(self, *args, **kwargs) -> Any: method forward_native (line 31) | def forward_native(self, *args, **kwargs) -> Any: method forward_cuda (line 39) | def forward_cuda(self, *args, **kwargs) -> Any: method forward_hip (line 42) | def forward_hip(self, *args, **kwargs) -> Any: method forward_cpu (line 46) | def forward_cpu(self, *args, **kwargs) -> Any: method forward_tpu (line 50) | def forward_tpu(self, *args, **kwargs) -> Any: method forward_musa (line 56) | def forward_musa(self, *args, **kwargs) -> Any: method forward_oot (line 60) | def forward_oot(self, *args, **kwargs) -> Any: method forward_npu (line 65) | def forward_npu(self, *args, **kwargs) -> Any: method dispatch_forward (line 70) | def dispatch_forward(self) -> Callable: method enabled (line 85) | def enabled(cls) -> bool: method default_on (line 90) | def default_on() -> bool: method register (line 106) | def register(cls, name: str) -> Callable: FILE: python/sglang/multimodal_gen/runtime/layers/elementwise.py class MulAdd (line 7) | class MulAdd(CustomOp): method __init__ (line 14) | def __init__(self, prefix: str = ""): method forward_native (line 17) | def forward_native( method forward_cuda (line 32) | def forward_cuda( FILE: python/sglang/multimodal_gen/runtime/layers/layernorm.py class RMSNorm (line 42) | class RMSNorm(CustomOp): method __init__ (line 49) | def __init__( method forward_triton (line 66) | def forward_triton(self, x: torch.Tensor, residual: Optional[torch.Ten... method forward_cuda (line 71) | def forward_cuda( method forward_native (line 106) | def forward_native( method forward_cpu (line 145) | def forward_cpu( method forward_npu (line 152) | def forward_npu( method forward_hip (line 164) | def forward_hip( method _get_weight (line 172) | def _get_weight(self, dtype: torch.dtype) -> torch.Tensor: method forward_musa (line 183) | def forward_musa( method extra_repr (line 211) | def extra_repr(self) -> str: class LayerNorm (line 219) | class LayerNorm(CustomOp): method __init__ (line 220) | def __init__( method _get_weight_fallback (line 246) | def _get_weight_fallback(self, x: torch.Tensor) -> torch.Tensor: method forward_triton (line 258) | def forward_triton(self, x: torch.Tensor): method forward_cuda (line 268) | def forward_cuda( method forward_native (line 277) | def forward_native( method forward_cpu (line 293) | def forward_cpu( method forward_musa (line 300) | def forward_musa(self, x: torch.Tensor): method extra_repr (line 303) | def extra_repr(self) -> str: class FP32LayerNorm (line 312) | class FP32LayerNorm(nn.LayerNorm): method forward (line 313) | def forward(self, inputs: torch.Tensor) -> torch.Tensor: function _ensure_contiguous (line 328) | def _ensure_contiguous(tensor: Optional[torch.Tensor]) -> Optional[torch... class _ScaleResidualNormScaleShift (line 332) | class _ScaleResidualNormScaleShift(CustomOp): method __init__ (line 343) | def __init__( method forward_cuda (line 363) | def forward_cuda( method forward_hip (line 401) | def forward_hip(self, *args, **kwargs): method forward_musa (line 406) | def forward_musa(self, *args, **kwargs): method forward_native (line 411) | def forward_native( class ScaleResidualLayerNormScaleShift (line 442) | class ScaleResidualLayerNormScaleShift(_ScaleResidualNormScaleShift): class ScaleResidualRMSNormScaleShift (line 446) | class ScaleResidualRMSNormScaleShift(_ScaleResidualNormScaleShift): class _NormScaleShift (line 450) | class _NormScaleShift(CustomOp): method __init__ (line 460) | def __init__( method forward_cuda (line 479) | def forward_cuda( method forward_hip (line 505) | def forward_hip(self, *args, **kwargs): method forward_musa (line 510) | def forward_musa(self, *args, **kwargs): method forward_native (line 515) | def forward_native( class LayerNormScaleShift (line 523) | class LayerNormScaleShift(_NormScaleShift): class RMSNormScaleShift (line 527) | class RMSNormScaleShift(_NormScaleShift): function apply_qk_norm (line 531) | def apply_qk_norm( function tensor_parallel_rms_norm (line 571) | def tensor_parallel_rms_norm(x: torch.Tensor, norm: "RMSNorm") -> torch.... FILE: python/sglang/multimodal_gen/runtime/layers/linear.py function adjust_scalar_to_fused_array (line 63) | def adjust_scalar_to_fused_array( class LinearMethodBase (line 88) | class LinearMethodBase(QuantizeMethodBase): method create_weights (line 92) | def create_weights( method apply (line 118) | def apply( class UnquantizedLinearMethod (line 126) | class UnquantizedLinearMethod(LinearMethodBase): method create_weights (line 129) | def create_weights( method apply (line 151) | def apply( class LinearBase (line 162) | class LinearBase(torch.nn.Module): method __init__ (line 173) | def __init__( method forward (line 198) | def forward(self, x: torch.Tensor) -> tuple[torch.Tensor, Parameter | ... class ReplicatedLinear (line 202) | class ReplicatedLinear(LinearBase): method __init__ (line 216) | def __init__( method weight_loader (line 264) | def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor)... method forward (line 276) | def forward(self, x: torch.Tensor) -> tuple[torch.Tensor, Parameter | ... method extra_repr (line 283) | def extra_repr(self) -> str: class ColumnParallelLinear (line 290) | class ColumnParallelLinear(LinearBase): method __init__ (line 314) | def __init__( method weight_loader (line 380) | def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor)... method weight_loader_v2 (line 401) | def weight_loader_v2(self, param: Parameter, loaded_weight: torch.Tens... method forward (line 409) | def forward(self, input_: torch.Tensor) -> tuple[torch.Tensor, Paramet... method extra_repr (line 425) | def extra_repr(self) -> str: class MergedColumnParallelLinear (line 434) | class MergedColumnParallelLinear(ColumnParallelLinear): method __init__ (line 457) | def __init__( method weight_loader (line 483) | def weight_loader( method _load_fused_module_from_checkpoint (line 562) | def _load_fused_module_from_checkpoint( method weight_loader_v2 (line 598) | def weight_loader_v2( class QKVParallelLinear (line 647) | class QKVParallelLinear(ColumnParallelLinear): method __init__ (line 673) | def __init__( method _get_shard_offset_mapping (line 724) | def _get_shard_offset_mapping(self, loaded_shard_id: str) -> int | None: method _get_shard_size_mapping (line 733) | def _get_shard_size_mapping(self, loaded_shard_id: str) -> int | None: method _load_fused_module_from_checkpoint (line 741) | def _load_fused_module_from_checkpoint( method weight_loader_v2 (line 785) | def weight_loader_v2( method weight_loader (line 815) | def weight_loader( class RowParallelLinear (line 920) | class RowParallelLinear(LinearBase): method __init__ (line 946) | def __init__( method weight_loader (line 1006) | def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor): method weight_loader_v2 (line 1028) | def weight_loader_v2(self, param: BasevLLMParameter, loaded_weight: to... method forward (line 1038) | def forward(self, input_) -> tuple[torch.Tensor, Parameter | None]: method extra_repr (line 1065) | def extra_repr(self) -> str: FILE: python/sglang/multimodal_gen/runtime/layers/lora/linear.py class BaseLayerWithLoRA (line 39) | class BaseLayerWithLoRA(nn.Module): method __init__ (line 41) | def __init__( method weight (line 70) | def weight(self): method bias (line 74) | def bias(self): method forward (line 78) | def forward(self, x: torch.Tensor) -> torch.Tensor: method slice_lora_a_weights (line 103) | def slice_lora_a_weights(self, A: torch.Tensor) -> torch.Tensor: method slice_lora_b_weights (line 106) | def slice_lora_b_weights(self, B: torch.Tensor) -> torch.Tensor: method set_lora_weights (line 109) | def set_lora_weights( method _merge_lora_into_data (line 155) | def _merge_lora_into_data( method merge_lora_weights (line 183) | def merge_lora_weights(self, strength: float | None = None) -> None: method unmerge_lora_weights (line 251) | def unmerge_lora_weights(self) -> None: class VocabParallelEmbeddingWithLoRA (line 280) | class VocabParallelEmbeddingWithLoRA(BaseLayerWithLoRA): method __init__ (line 289) | def __init__( method forward (line 295) | def forward(self, input_: torch.Tensor) -> torch.Tensor: class ColumnParallelLinearWithLoRA (line 301) | class ColumnParallelLinearWithLoRA(BaseLayerWithLoRA): method __init__ (line 303) | def __init__( method forward (line 311) | def forward(self, input_: torch.Tensor) -> torch.Tensor: method slice_lora_a_weights (line 324) | def slice_lora_a_weights(self, A: torch.Tensor) -> torch.Tensor: method slice_lora_b_weights (line 327) | def slice_lora_b_weights(self, B: torch.Tensor) -> torch.Tensor: class MergedColumnParallelLinearWithLoRA (line 336) | class MergedColumnParallelLinearWithLoRA(ColumnParallelLinearWithLoRA): method __init__ (line 338) | def __init__( method slice_lora_a_weights (line 346) | def slice_lora_a_weights(self, A: torch.Tensor) -> torch.Tensor: method slice_lora_b_weights (line 349) | def slice_lora_b_weights(self, B: torch.Tensor) -> torch.Tensor: class QKVParallelLinearWithLoRA (line 358) | class QKVParallelLinearWithLoRA(ColumnParallelLinearWithLoRA): method __init__ (line 360) | def __init__( method slice_lora_a_weights (line 368) | def slice_lora_a_weights(self, A: torch.Tensor) -> torch.Tensor: method slice_lora_b_weights (line 371) | def slice_lora_b_weights( class RowParallelLinearWithLoRA (line 391) | class RowParallelLinearWithLoRA(BaseLayerWithLoRA): method __init__ (line 393) | def __init__( method forward (line 401) | def forward(self, input_: torch.Tensor): method slice_lora_a_weights (line 432) | def slice_lora_a_weights(self, A: torch.Tensor) -> torch.Tensor: method slice_lora_b_weights (line 440) | def slice_lora_b_weights(self, B: torch.Tensor) -> torch.Tensor: class LinearWithLoRA (line 444) | class LinearWithLoRA(BaseLayerWithLoRA): method __init__ (line 451) | def __init__( method forward (line 460) | def forward(self, x: torch.Tensor) -> torch.Tensor: function wrap_with_lora_layer (line 488) | def wrap_with_lora_layer( function replace_submodule (line 520) | def replace_submodule( FILE: python/sglang/multimodal_gen/runtime/layers/mlp.py class MLP (line 26) | class MLP(nn.Module): method __init__ (line 31) | def __init__( method forward (line 64) | def forward(self, x: torch.Tensor) -> torch.Tensor: class FeedForward (line 71) | class FeedForward(nn.Module): method __init__ (line 83) | def __init__( method forward (line 118) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: FILE: python/sglang/multimodal_gen/runtime/layers/quantization/__init__.py function register_quantization_config (line 22) | def register_quantization_config(quantization: str): function get_quantization_config (line 50) | def get_quantization_config(quantization: str) -> type[QuantizationConfig]: FILE: python/sglang/multimodal_gen/runtime/layers/quantization/configs/base_config.py class QuantizeMethodBase (line 19) | class QuantizeMethodBase(ABC): method create_weights (line 23) | def create_weights( method apply (line 32) | def apply(self, layer: torch.nn.Module, *args, **kwargs) -> torch.Tensor: method embedding (line 39) | def embedding(self, layer: torch.nn.Module, *args, **kwargs) -> torch.... method process_weights_after_loading (line 45) | def process_weights_after_loading(self, layer: nn.Module) -> None: function method_has_implemented_embedding (line 53) | def method_has_implemented_embedding(method_class: type[QuantizeMethodBa... class QuantizationConfig (line 65) | class QuantizationConfig(ABC): method __init__ (line 71) | def __init__(self): method get_name (line 77) | def get_name(self) -> QuantizationMethods: method get_supported_act_dtypes (line 82) | def get_supported_act_dtypes(self) -> list[torch.dtype]: method get_min_capability (line 88) | def get_min_capability(cls) -> int: method get_config_filenames (line 99) | def get_config_filenames() -> list[str]: method from_config (line 105) | def from_config(cls, config: dict[str, Any]) -> "QuantizationConfig": method override_quantization_method (line 110) | def override_quantization_method( method get_from_keys (line 122) | def get_from_keys(config: dict[str, Any], keys: list[str]) -> Any: method get_from_keys_or (line 132) | def get_from_keys_or(config: dict[str, Any], keys: list[str], default:... method get_quant_method (line 140) | def get_quant_method( method get_cache_scale (line 154) | def get_cache_scale(self, name: str) -> str | None: FILE: python/sglang/multimodal_gen/runtime/layers/quantization/configs/nunchaku_config.py function is_nunchaku_available (line 21) | def is_nunchaku_available() -> bool: class NunchakuConfig (line 32) | class NunchakuConfig(QuantizationConfig): method get_name (line 55) | def get_name(cls) -> str: method get_supported_act_dtypes (line 59) | def get_supported_act_dtypes(cls) -> list[torch.dtype]: method get_min_capability (line 63) | def get_min_capability(cls) -> int: method get_config_filenames (line 67) | def get_config_filenames() -> list[str]: method from_config (line 71) | def from_config(cls, config: dict[str, Any]) -> "NunchakuConfig": method get_quant_method (line 81) | def get_quant_method( method _get_quant_rules (line 123) | def _get_quant_rules(self) -> dict[str, list[str]]: method __post_init__ (line 130) | def __post_init__(self): method from_dict (line 150) | def from_dict(cls, config_dict: dict) -> "NunchakuConfig": method to_dict (line 154) | def to_dict(self) -> dict: method from_pretrained (line 165) | def from_pretrained(cls, model_path: str) -> Optional["NunchakuConfig"]: function _patch_native_svdq_linear (line 176) | def _patch_native_svdq_linear( function _patch_sglang_svdq_linear (line 188) | def _patch_sglang_svdq_linear( function _patch_sglang_svdq_wcscales (line 210) | def _patch_sglang_svdq_wcscales( function _patch_nunchaku_scales (line 226) | def _patch_nunchaku_scales( FILE: python/sglang/multimodal_gen/runtime/layers/quantization/fp8.py class Fp8Config (line 77) | class Fp8Config(QuantizationConfig): method __init__ (line 80) | def __init__( method get_name (line 110) | def get_name(cls) -> str: method get_supported_act_dtypes (line 114) | def get_supported_act_dtypes(cls) -> List[torch.dtype]: method get_min_capability (line 118) | def get_min_capability(cls) -> int: method get_config_filenames (line 122) | def get_config_filenames(cls) -> List[str]: method from_config (line 126) | def from_config(cls, config: Dict[str, Any]) -> Fp8Config: method get_quant_method (line 144) | def get_quant_method( method get_scaled_act_names (line 155) | def get_scaled_act_names(self) -> List[str]: class Fp8LinearMethod (line 159) | class Fp8LinearMethod(LinearMethodBase): method __init__ (line 177) | def __init__(self, quant_config: Union[Fp8Config, W4AFp8Config]): method create_weights (line 193) | def create_weights( method process_weights_after_loading (line 302) | def process_weights_after_loading(self, layer: Module) -> None: method apply (line 442) | def apply( FILE: python/sglang/multimodal_gen/runtime/layers/quantization/modelslim.py class ModelSlimConfig (line 32) | class ModelSlimConfig(QuantizationConfig): method __init__ (line 43) | def __init__(self, quant_config: Dict[str, Any] = {}): method get_linear_method (line 53) | def get_linear_method(self) -> ModelSlimLinearMethod: method get_supported_act_dtypes (line 57) | def get_supported_act_dtypes(cls) -> List[torch.dtype]: method get_min_capability (line 61) | def get_min_capability(cls) -> int: method get_name (line 65) | def get_name(cls) -> str: method get_config_filenames (line 69) | def get_config_filenames(cls) -> List[str]: method from_config (line 74) | def from_config(cls, config: Dict[str, Any]) -> ModelSlimConfig: method get_quant_method (line 77) | def get_quant_method( method _get_scheme_from_parts (line 108) | def _get_scheme_from_parts( method get_scheme (line 124) | def get_scheme( method is_layer_skipped (line 139) | def is_layer_skipped( method get_scaled_act_names (line 170) | def get_scaled_act_names(self) -> List[str]: class ModelSlimLinearMethod (line 174) | class ModelSlimLinearMethod(LinearMethodBase): method __init__ (line 176) | def __init__(self, quantization_config: ModelSlimConfig): method process_weights_after_loading (line 179) | def process_weights_after_loading(self, layer: torch.nn.Module) -> None: method create_weights (line 182) | def create_weights( method apply (line 208) | def apply( FILE: python/sglang/multimodal_gen/runtime/layers/quantization/nunchaku_linear.py class NunchakuSVDQLinearMethod (line 24) | class NunchakuSVDQLinearMethod(LinearMethodBase): method __init__ (line 25) | def __init__( method create_weights (line 40) | def create_weights( method process_weights_after_loading (line 138) | def process_weights_after_loading(self, layer: nn.Module) -> None: method apply (line 163) | def apply( class NunchakuAWQLinearMethod (line 206) | class NunchakuAWQLinearMethod(LinearMethodBase): method __init__ (line 207) | def __init__(self, group_size: int = 64): method create_weights (line 211) | def create_weights( method process_weights_after_loading (line 259) | def process_weights_after_loading(self, layer: nn.Module) -> None: method apply (line 264) | def apply( FILE: python/sglang/multimodal_gen/runtime/layers/rotary_embedding/base.py class RotaryEmbedding (line 11) | class RotaryEmbedding(CustomOp): method __init__ (line 14) | def __init__( method _compute_inv_freq (line 36) | def _compute_inv_freq(self, base: int | float) -> torch.Tensor: method _compute_cos_sin_cache (line 50) | def _compute_cos_sin_cache(self) -> torch.Tensor: method forward_cuda (line 61) | def forward_cuda(self, *args, **kwargs): method forward_native (line 64) | def forward_native( method extra_repr (line 94) | def extra_repr(self) -> str: class LinearScalingRotaryEmbedding (line 101) | class LinearScalingRotaryEmbedding(RotaryEmbedding): method __init__ (line 102) | def __init__( method _compute_cos_sin_cache (line 122) | def _compute_cos_sin_cache(self) -> torch.Tensor: FILE: python/sglang/multimodal_gen/runtime/layers/rotary_embedding/factory.py function get_rope (line 16) | def get_rope( function get_rotary_pos_embed (line 85) | def get_rotary_pos_embed( FILE: python/sglang/multimodal_gen/runtime/layers/rotary_embedding/mrope.py function _to_tuple (line 10) | def _to_tuple(x: int | tuple[int, ...], dim: int = 2) -> tuple[int, ...]: function get_1d_rotary_pos_embed (line 19) | def get_1d_rotary_pos_embed( class OneDRotaryEmbedding (line 72) | class OneDRotaryEmbedding(torch.nn.Module): method __init__ (line 75) | def __init__( method build_freqs (line 96) | def build_freqs(self, device): method build_freqs_outer (line 108) | def build_freqs_outer(self, pos: torch.Tensor, device): method forward_from_grid (line 128) | def forward_from_grid( method forward (line 139) | def forward(self, pos: torch.Tensor) -> tuple[torch.Tensor, torch.Tens... method _forward_cached (line 151) | def _forward_cached( class NDRotaryEmbedding (line 164) | class NDRotaryEmbedding(torch.nn.Module): method __init__ (line 167) | def __init__( method forward (line 231) | def forward(self, positions: torch.Tensor) -> tuple[torch.Tensor, torc... method _forward_cached (line 248) | def _forward_cached( method forward_uncached (line 259) | def forward_uncached(self, pos: torch.Tensor) -> tuple[torch.Tensor, t... method forward_from_grid (line 296) | def forward_from_grid( method _forward_cached_from_grid (line 314) | def _forward_cached_from_grid( FILE: python/sglang/multimodal_gen/runtime/layers/rotary_embedding/utils.py function _apply_rotary_emb (line 32) | def _apply_rotary_emb( function apply_flashinfer_rope_qk_inplace (line 64) | def apply_flashinfer_rope_qk_inplace( FILE: python/sglang/multimodal_gen/runtime/layers/usp.py function _maybe_wait (line 26) | def _maybe_wait(tensor: torch.Tensor) -> torch.Tensor: function _usp_all_to_all_single (line 36) | def _usp_all_to_all_single(x: torch.Tensor) -> torch.Tensor: function _usp_input_all_to_all (line 49) | def _usp_input_all_to_all(x: torch.Tensor, head_dim: int = 1) -> torch.T... function _usp_output_all_to_all (line 105) | def _usp_output_all_to_all(x: torch.Tensor, head_dim: int = 1) -> torch.... function ring_attn (line 161) | def ring_attn( FILE: python/sglang/multimodal_gen/runtime/layers/utils.py function get_group_size (line 16) | def get_group_size(group) -> int: function get_group_rank (line 25) | def get_group_rank(group) -> int: function get_token_bin_counts_and_mask (line 34) | def get_token_bin_counts_and_mask( function direct_register_custom_op (line 54) | def direct_register_custom_op( class CustomOpWrapper (line 128) | class CustomOpWrapper: method __init__ (line 129) | def __init__( method __call__ (line 142) | def __call__(self, *args, **kwargs): method real_impl (line 146) | def real_impl(self) -> Callable: method fake_impl (line 163) | def fake_impl(self) -> Callable: function register_custom_op (line 193) | def register_custom_op( FILE: python/sglang/multimodal_gen/runtime/layers/visual_embedding.py class PatchEmbed (line 35) | class PatchEmbed(nn.Module): method __init__ (line 49) | def __init__( method forward (line 81) | def forward(self, x): class Timesteps (line 89) | class Timesteps(_Timesteps): method forward (line 90) | def forward(self, timesteps: torch.Tensor) -> torch.Tensor: class CombinedTimestepGuidanceTextProjEmbeddings (line 109) | class CombinedTimestepGuidanceTextProjEmbeddings( method __init__ (line 112) | def __init__(self, embedding_dim, pooled_projection_dim): class CombinedTimestepTextProjEmbeddings (line 131) | class CombinedTimestepTextProjEmbeddings(_CombinedTimestepTextProjEmbedd... method __init__ (line 132) | def __init__(self, embedding_dim, pooled_projection_dim): class TimestepEmbedder (line 148) | class TimestepEmbedder(nn.Module): method __init__ (line 153) | def __init__( method forward (line 176) | def forward( function timestep_embedding (line 193) | def timestep_embedding( class ModulateProjection (line 223) | class ModulateProjection(nn.Module): method __init__ (line 226) | def __init__( method forward (line 246) | def forward(self, x: torch.Tensor) -> torch.Tensor: function unpatchify (line 252) | def unpatchify(x, t, h, w, patch_size, channels) -> torch.Tensor: FILE: python/sglang/multimodal_gen/runtime/layers/vocab_parallel_embedding.py class UnquantizedEmbeddingMethod (line 31) | class UnquantizedEmbeddingMethod(QuantizeMethodBase): method create_weights (line 34) | def create_weights( method apply (line 58) | def apply( method embedding (line 63) | def embedding(self, layer: torch.nn.Module, input_: torch.Tensor) -> t... function pad_vocab_size (line 67) | def pad_vocab_size(vocab_size: int, pad_to: int = DEFAULT_VOCAB_PADDING_... function vocab_range_from_per_partition_vocab_size (line 72) | def vocab_range_from_per_partition_vocab_size( function vocab_range_from_global_vocab_size (line 80) | def vocab_range_from_global_vocab_size( class VocabParallelEmbeddingShardIndices (line 90) | class VocabParallelEmbeddingShardIndices: method num_org_elements (line 104) | def num_org_elements(self) -> int: method num_added_elements (line 108) | def num_added_elements(self) -> int: method num_org_elements_padded (line 112) | def num_org_elements_padded(self) -> int: method num_added_elements_padded (line 116) | def num_added_elements_padded(self) -> int: method num_org_vocab_padding (line 120) | def num_org_vocab_padding(self) -> int: method num_added_vocab_padding (line 124) | def num_added_vocab_padding(self) -> int: method num_elements_padded (line 128) | def num_elements_padded(self) -> int: method __post_init__ (line 131) | def __post_init__(self): function get_masked_input_and_mask (line 153) | def get_masked_input_and_mask( class VocabParallelEmbedding (line 180) | class VocabParallelEmbedding(torch.nn.Module): method __init__ (line 219) | def __init__( method _get_indices (line 310) | def _get_indices( method get_sharded_to_full_mapping (line 347) | def get_sharded_to_full_mapping(self) -> list[int] | None: method weight_loader (line 412) | def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor): method forward (line 460) | def forward(self, input_): method extra_repr (line 484) | def extra_repr(self) -> str: FILE: python/sglang/multimodal_gen/runtime/loader/component_loaders/adapter_loader.py class AdapterLoader (line 20) | class AdapterLoader(ComponentLoader): method load_customized (line 31) | def load_customized( FILE: python/sglang/multimodal_gen/runtime/loader/component_loaders/bridge_loader.py class BridgeLoader (line 22) | class BridgeLoader(ComponentLoader): method load_customized (line 30) | def load_customized( FILE: python/sglang/multimodal_gen/runtime/loader/component_loaders/component_loader.py class ComponentLoader (line 32) | class ComponentLoader(ABC): method __init_subclass__ (line 43) | def __init_subclass__(cls, **kwargs): method __init__ (line 51) | def __init__(self, device=None) -> None: method should_offload (line 54) | def should_offload( method target_device (line 60) | def target_device(self, should_offload): method load (line 70) | def load( method load_native (line 140) | def load_native( method load_customized (line 174) | def load_customized( method _ensure_loaders_registered (line 185) | def _ensure_loaders_registered(cls): method for_component_type (line 210) | def for_component_type( class ImageProcessorLoader (line 264) | class ImageProcessorLoader(ComponentLoader): method load_customized (line 270) | def load_customized( class AutoProcessorLoader (line 276) | class AutoProcessorLoader(ComponentLoader): method load_customized (line 282) | def load_customized( class TokenizerLoader (line 288) | class TokenizerLoader(ComponentLoader): method load_customized (line 294) | def load_customized( class GenericComponentLoader (line 303) | class GenericComponentLoader(ComponentLoader): method __init__ (line 306) | def __init__(self, library="transformers") -> None: class PipelineComponentLoader (line 311) | class PipelineComponentLoader: method load_component (line 317) | def load_component( FILE: python/sglang/multimodal_gen/runtime/loader/component_loaders/image_encoder_loader.py class ImageEncoderLoader (line 14) | class ImageEncoderLoader(TextEncoderLoader): method should_offload (line 18) | def should_offload(self, server_args, model_config: ModelConfig | None... method load_customized (line 32) | def load_customized( FILE: python/sglang/multimodal_gen/runtime/loader/component_loaders/scheduler_loader.py class SchedulerLoader (line 14) | class SchedulerLoader(ComponentLoader): method load_customized (line 20) | def load_customized( FILE: python/sglang/multimodal_gen/runtime/loader/component_loaders/text_encoder_loader.py class TextEncoderLoader (line 47) | class TextEncoderLoader(ComponentLoader): class Source (line 54) | class Source: method should_offload (line 69) | def should_offload(self, server_args, model_config: ModelConfig | None... method _prepare_weights (line 83) | def _prepare_weights( method _get_weights_iterator (line 135) | def _get_weights_iterator( method _get_all_weights (line 154) | def _get_all_weights( method load_customized (line 175) | def load_customized( method load_model (line 209) | def load_model( FILE: python/sglang/multimodal_gen/runtime/loader/component_loaders/transformer_loader.py class TransformerLoader (line 41) | class TransformerLoader(ComponentLoader): method get_list_of_safetensors_to_load (line 47) | def get_list_of_safetensors_to_load( method _resolve_quant_config (line 78) | def _resolve_quant_config( method _resolve_target_param_dtype (line 97) | def _resolve_target_param_dtype( method load_customized (line 127) | def load_customized( FILE: python/sglang/multimodal_gen/runtime/loader/component_loaders/vae_loader.py function _convert_conv3d_weights_to_channels_last_3d (line 30) | def _convert_conv3d_weights_to_channels_last_3d(module: nn.Module) -> int: class VAELoader (line 49) | class VAELoader(ComponentLoader): method should_offload (line 55) | def should_offload( method load_customized (line 60) | def load_customized( FILE: python/sglang/multimodal_gen/runtime/loader/component_loaders/vl_encoder_loader.py class VisionLanguageEncoderLoader (line 11) | class VisionLanguageEncoderLoader(ComponentLoader): method load_customized (line 17) | def load_customized( FILE: python/sglang/multimodal_gen/runtime/loader/component_loaders/vocoder_loader.py class VocoderLoader (line 23) | class VocoderLoader(ComponentLoader): method should_offload (line 27) | def should_offload( method load_customized (line 32) | def load_customized( FILE: python/sglang/multimodal_gen/runtime/loader/fsdp_load.py function _make_param_like (line 44) | def _make_param_like( function maybe_load_fsdp_model (line 60) | def maybe_load_fsdp_model( function shard_model (line 166) | def shard_model( function load_model_from_full_model_state_dict (line 231) | def load_model_from_full_model_state_dict( FILE: python/sglang/multimodal_gen/runtime/loader/utils.py function set_default_torch_dtype (line 23) | def set_default_torch_dtype(dtype: torch.dtype): function get_param_names_mapping (line 33) | def get_param_names_mapping( function hf_to_custom_state_dict (line 95) | def hf_to_custom_state_dict( class skip_init_modules (line 142) | class skip_init_modules: method __enter__ (line 143) | def __enter__(self): method __exit__ (line 150) | def __exit__(self, exc_type, exc_value, traceback): function _normalize_component_type (line 156) | def _normalize_component_type(module_type: str) -> str: function _clean_hf_config_inplace (line 163) | def _clean_hf_config_inplace(model_config: dict) -> None: function _list_safetensors_files (line 175) | def _list_safetensors_files(model_path: str) -> list[str]: function get_memory_usage_of_component (line 183) | def get_memory_usage_of_component(module) -> float | None: FILE: python/sglang/multimodal_gen/runtime/loader/weight_utils.py class DisabledTqdm (line 39) | class DisabledTqdm(tqdm): method __init__ (line 41) | def __init__(self, *args, **kwargs): function get_lock (line 46) | def get_lock(model_name_or_path: str | Path, cache_dir: str | None = None): function filter_duplicate_safetensors_files (line 64) | def filter_duplicate_safetensors_files( function filter_files_not_needed_for_inference (line 85) | def filter_files_not_needed_for_inference(hf_weights_files: list[str]) -... function _validate_safetensors_file (line 111) | def _validate_safetensors_file(file_path: str) -> bool: function safetensors_weights_iterator (line 135) | def safetensors_weights_iterator( function _load_pt_file (line 203) | def _load_pt_file(bin_file: str, device: str) -> dict: function pt_weights_iterator (line 223) | def pt_weights_iterator( function default_weight_loader (line 243) | def default_weight_loader(param: torch.Tensor, loaded_weight: torch.Tens... function maybe_remap_kv_scale_name (line 264) | def maybe_remap_kv_scale_name(name: str, params_dict: dict) -> str | None: function compute_weights_checksum (line 326) | def compute_weights_checksum( FILE: python/sglang/multimodal_gen/runtime/loader/weights_updater.py function get_updatable_modules (line 64) | def get_updatable_modules(pipeline) -> dict[str, torch.nn.Module]: function _get_weights_iter (line 81) | def _get_weights_iter(weights_dir: str): function _validate_weight_files (line 89) | def _validate_weight_files( function _load_weights_into_module (line 110) | def _load_weights_into_module(module: torch.nn.Module, weights_iter) -> ... function load_weights_into_model (line 131) | def load_weights_into_model(weights_iter, model_params: dict) -> None: class WeightsUpdater (line 152) | class WeightsUpdater: method __init__ (line 161) | def __init__(self, pipeline): method update_weights_from_disk (line 164) | def update_weights_from_disk( method _collect_modules (line 222) | def _collect_modules( method _apply_weights (line 245) | def _apply_weights( method _rollback (line 276) | def _rollback(self, updated_modules: list[str]) -> None: FILE: python/sglang/multimodal_gen/runtime/managers/forward_context.py class ForwardContext (line 32) | class ForwardContext: method set_attn_backend_cls (line 42) | def set_attn_backend_cls(self, attention_backend_cls: Type): function get_forward_context (line 55) | def get_forward_context() -> "ForwardContext": function set_forward_context (line 66) | def set_forward_context( FILE: python/sglang/multimodal_gen/runtime/managers/gpu_worker.py class GPUWorker (line 65) | class GPUWorker: method __init__ (line 70) | def __init__( method init_device_and_model (line 93) | def init_device_and_model(self) -> None: method do_mem_analysis (line 161) | def do_mem_analysis(self, output_batch: OutputBatch): method execute_forward (line 210) | def execute_forward(self, batch: List[Req]) -> OutputBatch: method get_can_stay_resident_components (line 306) | def get_can_stay_resident_components( method set_lora (line 340) | def set_lora( method merge_lora_weights (line 362) | def merge_lora_weights( method unmerge_lora_weights (line 377) | def unmerge_lora_weights(self, target: str = "all") -> OutputBatch: method list_loras (line 389) | def list_loras(self) -> OutputBatch: method update_weights_from_disk (line 402) | def update_weights_from_disk( method get_weights_checksum (line 423) | def get_weights_checksum( function _oom_exceptions (line 459) | def _oom_exceptions(): function run_scheduler_process (line 467) | def run_scheduler_process( FILE: python/sglang/multimodal_gen/runtime/managers/scheduler.py class Scheduler (line 45) | class Scheduler: method __init__ (line 52) | def __init__( method _handle_set_lora (line 116) | def _handle_set_lora(self, reqs: List[Any]) -> OutputBatch: method _handle_merge_lora (line 124) | def _handle_merge_lora(self, reqs: List[Any]): method _handle_unmerge_lora (line 128) | def _handle_unmerge_lora(self, reqs: List[Any]) -> OutputBatch: method _handle_list_loras (line 132) | def _handle_list_loras(self, _reqs: List[Any]) -> OutputBatch: method _handle_shutdown (line 135) | def _handle_shutdown(self, _reqs: List[Any]) -> OutputBatch: method _handle_update_weights_from_disk (line 139) | def _handle_update_weights_from_disk(self, reqs: List[Any]) -> OutputB... method _handle_get_weights_checksum (line 152) | def _handle_get_weights_checksum(self, reqs: List[Any]) -> OutputBatch: method _handle_generation (line 158) | def _handle_generation(self, reqs: List[Req]): method return_result (line 170) | def return_result( method get_next_batch_to_run (line 182) | def get_next_batch_to_run(self) -> list[tuple[bytes, Req]] | None: method prepare_server_warmup_reqs (line 192) | def prepare_server_warmup_reqs(self): method process_received_reqs_with_req_based_warmup (line 240) | def process_received_reqs_with_req_based_warmup( method recv_reqs (line 262) | def recv_reqs(self) -> List[tuple[bytes, Any]]: method event_loop (line 320) | def event_loop(self) -> None: method _broadcast_task (line 422) | def _broadcast_task(self, payload: dict[str, Any]) -> None: method _collect_slave_results (line 430) | def _collect_slave_results(self) -> List[dict[str, Any]]: FILE: python/sglang/multimodal_gen/runtime/models/adapter/ltx_2_connector.py function apply_interleaved_rotary_emb (line 15) | def apply_interleaved_rotary_emb( function apply_split_rotary_emb (line 25) | def apply_split_rotary_emb( class LTX2Attention (line 72) | class LTX2Attention(torch.nn.Module): method __init__ (line 78) | def __init__( method forward (line 148) | def forward( class LTX2RotaryPosEmbed1d (line 203) | class LTX2RotaryPosEmbed1d(nn.Module): method __init__ (line 208) | def __init__( method forward (line 230) | def forward( class LTX2TransformerBlock1d (line 307) | class LTX2TransformerBlock1d(nn.Module): method __init__ (line 308) | def __init__( method forward (line 331) | def forward( class LTX2ConnectorTransformer1d (line 352) | class LTX2ConnectorTransformer1d(nn.Module): method __init__ (line 360) | def __init__( method forward (line 413) | def forward( class LTX2TextConnectors (line 487) | class LTX2TextConnectors(nn.Module): method __init__ (line 493) | def __init__( method forward (line 544) | def forward( FILE: python/sglang/multimodal_gen/runtime/models/bridges/mova_dual_tower.py function compute_rope_cos_sin (line 37) | def compute_rope_cos_sin( class PerFrameAttentionPooling (line 85) | class PerFrameAttentionPooling(nn.Module): method __init__ (line 93) | def __init__(self, dim: int, num_heads: int, eps: float = 1e-6): method forward (line 107) | def forward(self, x: torch.Tensor, grid_size: Tuple[int, int, int]) ->... class CrossModalInteractionController (line 136) | class CrossModalInteractionController: method __init__ (line 143) | def __init__(self, visual_layers: int = 30, audio_layers: int = 30): method get_interaction_layers (line 148) | def get_interaction_layers( method should_interact (line 179) | def should_interact( class ConditionalCrossAttention (line 188) | class ConditionalCrossAttention(nn.Module): method __init__ (line 196) | def __init__(self, dim: int, kv_dim: int, num_heads: int, eps: float =... method forward (line 226) | def forward( class AdaLayerNorm (line 300) | class AdaLayerNorm(nn.Module): method __init__ (line 305) | def __init__( method forward (line 328) | def forward( class ConditionalCrossAttentionBlock (line 352) | class ConditionalCrossAttentionBlock(nn.Module): method __init__ (line 355) | def __init__( method forward (line 375) | def forward( class DualTowerConditionalBridge (line 398) | class DualTowerConditionalBridge( method __init__ (line 417) | def __init__( method build_aligned_freqs (line 512) | def build_aligned_freqs( method should_interact (line 574) | def should_interact(self, layer_idx: int, direction: str) -> bool: method apply_conditional_control (line 579) | def apply_conditional_control( method forward (line 625) | def forward( FILE: python/sglang/multimodal_gen/runtime/models/dits/base.py class BaseDiT (line 21) | class BaseDiT(nn.Module, ABC): method __init_subclass__ (line 34) | def __init_subclass__(cls) -> None: method __init__ (line 47) | def __init__(self, config: DiTConfig, hf_config: dict[str, Any], **kwa... method forward (line 57) | def forward( method __post_init__ (line 68) | def __post_init__(self) -> None: method supported_attention_backends (line 77) | def supported_attention_backends(self) -> set[AttentionBackendEnum]: method device (line 81) | def device(self) -> torch.device: class CachableDiT (line 86) | class CachableDiT(TeaCacheMixin, BaseDiT): method __init__ (line 107) | def __init__(self, config: DiTConfig, **kwargs) -> None: method get_nunchaku_quant_rules (line 112) | def get_nunchaku_quant_rules(cls) -> dict[str, dict[str, Any]]: FILE: python/sglang/multimodal_gen/runtime/models/dits/causal_wanvideo.py class CausalWanSelfAttention (line 60) | class CausalWanSelfAttention(nn.Module): method __init__ (line 62) | def __init__( method forward (line 100) | def forward( class CausalWanTransformerBlock (line 253) | class CausalWanTransformerBlock(nn.Module): method __init__ (line 255) | def __init__( method forward (line 331) | def forward( class CausalWanTransformer3DModel (line 432) | class CausalWanTransformer3DModel(BaseDiT, OffloadableDiTMixin): method __init__ (line 440) | def __init__( method _prepare_blockwise_causal_attn_mask (line 525) | def _prepare_blockwise_causal_attn_mask( method _forward_inference (line 597) | def _forward_inference( method _forward_train (line 739) | def _forward_train( method forward (line 869) | def forward(self, *args, **kwargs): FILE: python/sglang/multimodal_gen/runtime/models/dits/flux.py function _fused_gelu_mlp (line 83) | def _fused_gelu_mlp( function _get_qkv_projections (line 182) | def _get_qkv_projections( class FluxAttention (line 208) | class FluxAttention(torch.nn.Module, AttentionModuleMixin): method __init__ (line 209) | def __init__( method forward (line 343) | def forward( class FluxSingleTransformerBlock (line 424) | class FluxSingleTransformerBlock(nn.Module): method __init__ (line 425) | def __init__( method forward (line 499) | def forward( class FluxTransformerBlock (line 562) | class FluxTransformerBlock(nn.Module): method __init__ (line 563) | def __init__( method forward (line 620) | def forward( class FluxPosEmbed (line 695) | class FluxPosEmbed(nn.Module): method __init__ (line 697) | def __init__(self, theta: int, axes_dim: List[int]): method forward (line 711) | def forward(self, ids: torch.Tensor) -> tuple[torch.Tensor, torch.Tens... class FluxTransformer2DModel (line 719) | class FluxTransformer2DModel(CachableDiT, OffloadableDiTMixin): method get_nunchaku_quant_rules (line 729) | def get_nunchaku_quant_rules(cls) -> dict[str, list[str]]: method __init__ (line 760) | def __init__( method forward (line 838) | def forward( FILE: python/sglang/multimodal_gen/runtime/models/dits/flux_2.py function _get_qkv_projections (line 42) | def _get_qkv_projections( class Flux2SwiGLU (line 58) | class Flux2SwiGLU(nn.Module): method __init__ (line 64) | def __init__(self): method forward (line 68) | def forward(self, x: torch.Tensor) -> torch.Tensor: class Flux2FeedForward (line 74) | class Flux2FeedForward(nn.Module): method __init__ (line 75) | def __init__( method forward (line 98) | def forward(self, x: torch.Tensor) -> torch.Tensor: class Flux2Attention (line 105) | class Flux2Attention(torch.nn.Module, AttentionModuleMixin): method __init__ (line 106) | def __init__( method forward (line 213) | def forward( class Flux2ParallelSelfAttention (line 294) | class Flux2ParallelSelfAttention(torch.nn.Module, AttentionModuleMixin): method __init__ (line 306) | def __init__( method forward (line 367) | def forward( class Flux2SingleTransformerBlock (line 421) | class Flux2SingleTransformerBlock(nn.Module): method __init__ (line 422) | def __init__( method forward (line 452) | def forward( class Flux2TransformerBlock (line 495) | class Flux2TransformerBlock(nn.Module): method __init__ (line 496) | def __init__( method forward (line 535) | def forward( class Flux2TimestepGuidanceEmbeddings (line 607) | class Flux2TimestepGuidanceEmbeddings(nn.Module): method __init__ (line 608) | def __init__( method forward (line 633) | def forward( class Flux2Modulation (line 652) | class Flux2Modulation(nn.Module): method __init__ (line 653) | def __init__(self, dim: int, mod_param_sets: int = 2, bias: bool = Fal... method forward (line 662) | def forward( class Flux2PosEmbed (line 677) | class Flux2PosEmbed(nn.Module): method __init__ (line 678) | def __init__(self, theta: int, axes_dim: List[int]): method forward (line 692) | def forward(self, ids: torch.Tensor) -> tuple[torch.Tensor, torch.Tens... class Flux2Transformer2DModel (line 700) | class Flux2Transformer2DModel(CachableDiT, OffloadableDiTMixin): method __init__ (line 710) | def __init__( method forward (line 816) | def forward( FILE: python/sglang/multimodal_gen/runtime/models/dits/glm_image.py class GlmImageLayerKVCache (line 53) | class GlmImageLayerKVCache: method __init__ (line 56) | def __init__(self): method store (line 61) | def store(self, k: torch.Tensor, v: torch.Tensor): method get (line 69) | def get(self): method clear (line 72) | def clear(self): class GlmImageKVCache (line 78) | class GlmImageKVCache: method __init__ (line 81) | def __init__(self, num_layers: int): method __getitem__ (line 85) | def __getitem__(self, layer_idx: int) -> GlmImageLayerKVCache: method set_mode (line 88) | def set_mode(self, mode: Optional[str]): method clear (line 96) | def clear(self): class GlmImageTimestepEmbedding (line 101) | class GlmImageTimestepEmbedding(nn.Module): method __init__ (line 107) | def __init__( method forward (line 126) | def forward(self, sample: torch.Tensor) -> torch.Tensor: class GlmImageTextProjection (line 133) | class GlmImageTextProjection(nn.Module): method __init__ (line 139) | def __init__( method forward (line 158) | def forward(self, caption: torch.Tensor) -> torch.Tensor: class GlmImageCombinedTimestepSizeEmbeddings (line 165) | class GlmImageCombinedTimestepSizeEmbeddings(nn.Module): method __init__ (line 166) | def __init__( method forward (line 188) | def forward( class GlmImageImageProjector (line 218) | class GlmImageImageProjector(nn.Module): method __init__ (line 219) | def __init__( method forward (line 230) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class GlmImageAdaLayerNormZero (line 251) | class GlmImageAdaLayerNormZero(nn.Module): method __init__ (line 252) | def __init__(self, embedding_dim: int, dim: int) -> None: method forward (line 259) | def forward( class GlmImageAttention (line 308) | class GlmImageAttention(torch.nn.Module): method __init__ (line 309) | def __init__( method forward (line 379) | def forward( class GlmImageTransformerBlock (line 474) | class GlmImageTransformerBlock(nn.Module): method __init__ (line 475) | def __init__( method forward (line 513) | def forward( class GlmImageRotaryPosEmbed (line 581) | class GlmImageRotaryPosEmbed(nn.Module): method __init__ (line 582) | def __init__(self, dim: int, patch_size: int, theta: float = 10000.0) ... method forward (line 589) | def forward(self, hidden_states: torch.Tensor) -> Tuple[torch.Tensor, ... class GlmImageAdaLayerNormContinuous (line 632) | class GlmImageAdaLayerNormContinuous(nn.Module): method __init__ (line 638) | def __init__( method forward (line 660) | def forward( class GlmImageTransformer2DModel (line 670) | class GlmImageTransformer2DModel(CachableDiT, OffloadableDiTMixin): method __init__ (line 702) | def __init__( method forward (line 785) | def forward( FILE: python/sglang/multimodal_gen/runtime/models/dits/helios.py function pad_for_3d_conv (line 61) | def pad_for_3d_conv(x, kernel_size): function center_down_sample_3d (line 71) | def center_down_sample_3d(x, kernel_size): function apply_rotary_emb_transposed (line 76) | def apply_rotary_emb_transposed(hidden_states, freqs_cis): class HeliosOutputNorm (line 91) | class HeliosOutputNorm(nn.Module): method __init__ (line 92) | def __init__(self, dim: int, eps: float = 1e-6): method forward (line 97) | def forward(self, hidden_states, temb, original_context_length): class HeliosRotaryPosEmbed (line 116) | class HeliosRotaryPosEmbed(nn.Module): method __init__ (line 119) | def __init__(self, rope_dim, theta): method _get_freqs_base (line 129) | def _get_freqs_base(self, dim): method _ensure_freqs_base (line 135) | def _ensure_freqs_base(self, device): method get_frequency_batched (line 143) | def get_frequency_batched(self, freqs_base, pos): method _get_spatial_meshgrid (line 150) | def _get_spatial_meshgrid(self, height, width, device_str): method forward (line 158) | def forward(self, frame_indices, height, width, device): class HeliosTimeTextEmbedding (line 201) | class HeliosTimeTextEmbedding(nn.Module): method __init__ (line 204) | def __init__(self, dim, time_freq_dim, time_proj_dim, text_embed_dim): method forward (line 214) | def forward( class HeliosSelfAttention (line 231) | class HeliosSelfAttention(nn.Module): method __init__ (line 234) | def __init__( method forward (line 284) | def forward(self, hidden_states, rotary_emb=None, original_context_len... class HeliosCrossAttention (line 333) | class HeliosCrossAttention(nn.Module): method __init__ (line 336) | def __init__( method forward (line 373) | def forward(self, hidden_states, encoder_hidden_states): class HeliosTransformerBlock (line 400) | class HeliosTransformerBlock(nn.Module): method __init__ (line 406) | def __init__( method forward (line 455) | def forward( class HeliosTransformer3DModel (line 527) | class HeliosTransformer3DModel(CachableDiT, OffloadableDiTMixin): method __init__ (line 542) | def __init__( method forward (line 640) | def forward( FILE: python/sglang/multimodal_gen/runtime/models/dits/hunyuan3d.py class MixedRowParallelLinear (line 33) | class MixedRowParallelLinear(RowParallelLinear): method __init__ (line 36) | def __init__(self, input_sizes: list[int], output_size: int, **kwargs): method weight_loader (line 40) | def weight_loader(self, param: nn.Parameter, loaded_weight: torch.Tens... function _flux_timestep_embedding (line 56) | def _flux_timestep_embedding( class _FluxGELU (line 77) | class _FluxGELU(nn.Module): method __init__ (line 78) | def __init__(self, approximate="tanh"): method forward (line 82) | def forward(self, x: torch.Tensor) -> torch.Tensor: class _FluxMLPEmbedder (line 86) | class _FluxMLPEmbedder(nn.Module): method __init__ (line 87) | def __init__(self, in_dim: int, hidden_dim: int): method forward (line 93) | def forward(self, x: torch.Tensor) -> torch.Tensor: class _FluxRMSNorm (line 97) | class _FluxRMSNorm(nn.Module): method __init__ (line 98) | def __init__(self, dim: int): method forward (line 102) | def forward(self, x: torch.Tensor): class _FluxQKNorm (line 109) | class _FluxQKNorm(nn.Module): method __init__ (line 110) | def __init__(self, dim: int): method forward (line 115) | def forward( class _FluxSelfAttention (line 123) | class _FluxSelfAttention(nn.Module): method __init__ (line 124) | def __init__( method forward (line 155) | def forward(self, x: torch.Tensor, pe: torch.Tensor) -> torch.Tensor: class _FluxModulationOut (line 173) | class _FluxModulationOut: class _FluxModulation (line 179) | class _FluxModulation(nn.Module): method __init__ (line 180) | def __init__(self, dim: int, double: bool): method forward (line 186) | def forward( class _FluxDoubleStreamBlock (line 198) | class _FluxDoubleStreamBlock(nn.Module): method __init__ (line 199) | def __init__( method forward (line 250) | def forward( class _FluxSingleStreamBlock (line 307) | class _FluxSingleStreamBlock(nn.Module): method __init__ (line 313) | def __init__( method forward (line 364) | def forward( class _FluxLastLayer (line 392) | class _FluxLastLayer(nn.Module): method __init__ (line 393) | def __init__(self, hidden_size: int, patch_size: int, out_channels: int): method forward (line 403) | def forward(self, x: torch.Tensor, vec: torch.Tensor) -> torch.Tensor: class Hunyuan3D2DiT (line 410) | class Hunyuan3D2DiT(CachableDiT, OffloadableDiTMixin): method build_config_from_params (line 418) | def build_config_from_params(cls, params: dict) -> Hunyuan3DDiTConfig: method __init__ (line 435) | def __init__( method forward (line 520) | def forward( function _chunked_feed_forward (line 576) | def _chunked_feed_forward( class SGLangAttentionWrapper (line 595) | class SGLangAttentionWrapper(torch.nn.Module): method __init__ (line 600) | def __init__( method forward (line 641) | def forward( class Basic2p5DTransformerBlock (line 671) | class Basic2p5DTransformerBlock(torch.nn.Module): method __init__ (line 674) | def __init__( method _initialize_attn_weights (line 716) | def _initialize_attn_weights(self): method __getattr__ (line 731) | def __getattr__(self, name: str): method forward (line 737) | def forward( function compute_voxel_grid_mask (line 965) | def compute_voxel_grid_mask(position: torch.Tensor, grid_resolution: int... function compute_multi_resolution_mask (line 1010) | def compute_multi_resolution_mask( function compute_discrete_voxel_indice (line 1026) | def compute_discrete_voxel_indice( function compute_multi_resolution_discrete_voxel_indice (line 1063) | def compute_multi_resolution_discrete_voxel_indice( class UNet2p5DConditionModel (line 1085) | class UNet2p5DConditionModel(torch.nn.Module): method __init__ (line 1088) | def __init__(self, unet: UNet2DConditionModel) -> None: method from_pretrained (line 1108) | def from_pretrained(pretrained_model_name_or_path: str, **kwargs): method init_condition (line 1126) | def init_condition(self): method init_camera_embedding (line 1142) | def init_camera_embedding(self): method init_attention (line 1152) | def init_attention( method _iter_2p5d_blocks (line 1240) | def _iter_2p5d_blocks(unet): method __getattr__ (line 1251) | def __getattr__(self, name: str): method forward (line 1257) | def forward( FILE: python/sglang/multimodal_gen/runtime/models/dits/hunyuanvideo.py class MMDoubleStreamBlock (line 49) | class MMDoubleStreamBlock(nn.Module): method __init__ (line 55) | def __init__( method forward (line 177) | def forward( class MMSingleStreamBlock (line 274) | class MMSingleStreamBlock(nn.Module): method __init__ (line 280) | def __init__( method forward (line 353) | def forward( class HunyuanVideoTransformer3DModel (line 413) | class HunyuanVideoTransformer3DModel(CachableDiT, OffloadableDiTMixin): method __init__ (line 435) | def __init__( method forward (line 562) | def forward( method maybe_cache_states (line 679) | def maybe_cache_states( method should_skip_forward_for_cached_states (line 684) | def should_skip_forward_for_cached_states(self, **kwargs) -> bool: method retrieve_cached_states (line 783) | def retrieve_cached_states(self, hidden_states: torch.Tensor) -> torch... class SingleTokenRefiner (line 787) | class SingleTokenRefiner(nn.Module): method __init__ (line 793) | def __init__( method forward (line 843) | def forward(self, x, t): class IndividualTokenRefinerBlock (line 861) | class IndividualTokenRefinerBlock(nn.Module): method __init__ (line 866) | def __init__( method forward (line 934) | def forward(self, x, c): class FinalLayer (line 960) | class FinalLayer(nn.Module): method __init__ (line 965) | def __init__( method forward (line 994) | def forward(self, x, c): FILE: python/sglang/multimodal_gen/runtime/models/dits/ltx_2.py function apply_interleaved_rotary_emb (line 41) | def apply_interleaved_rotary_emb( function apply_split_rotary_emb (line 50) | def apply_split_rotary_emb( class LTX2AudioVideoRotaryPosEmbed (line 93) | class LTX2AudioVideoRotaryPosEmbed(nn.Module): method __init__ (line 94) | def __init__( method prepare_video_coords (line 147) | def prepare_video_coords( method prepare_audio_coords (line 202) | def prepare_audio_coords( method prepare_coords (line 236) | def prepare_coords(self, *args, **kwargs): method forward (line 241) | def forward( function rms_norm (line 317) | def rms_norm(x: torch.Tensor, eps: float) -> torch.Tensor: class LTX2TextProjection (line 321) | class LTX2TextProjection(nn.Module): method __init__ (line 322) | def __init__( method forward (line 347) | def forward(self, caption: torch.Tensor) -> torch.Tensor: class LTX2TimestepEmbedder (line 354) | class LTX2TimestepEmbedder(nn.Module): method __init__ (line 355) | def __init__(self, embedding_dim: int, in_channels: int = 256) -> None: method forward (line 364) | def forward(self, t_emb: torch.Tensor) -> torch.Tensor: class LTX2PixArtAlphaCombinedTimestepSizeEmbeddings (line 371) | class LTX2PixArtAlphaCombinedTimestepSizeEmbeddings(nn.Module): method __init__ (line 372) | def __init__(self, embedding_dim: int) -> None: method forward (line 376) | def forward( class LTX2AdaLayerNormSingle (line 386) | class LTX2AdaLayerNormSingle(nn.Module): method __init__ (line 387) | def __init__(self, embedding_dim: int, embedding_coefficient: int = 6)... method forward (line 398) | def forward( class LTX2TPRMSNormAcrossHeads (line 408) | class LTX2TPRMSNormAcrossHeads(nn.Module): method __init__ (line 409) | def __init__( method forward (line 428) | def forward(self, x: torch.Tensor) -> torch.Tensor: class LTX2Attention (line 444) | class LTX2Attention(nn.Module): method __init__ (line 445) | def __init__( method forward (line 545) | def forward( method _slice_rope_for_tp (line 617) | def _slice_rope_for_tp( class LTX2FeedForward (line 650) | class LTX2FeedForward(nn.Module): method __init__ (line 651) | def __init__( method forward (line 671) | def forward(self, x: torch.Tensor) -> torch.Tensor: class LTX2TransformerBlock (line 678) | class LTX2TransformerBlock(nn.Module): method __init__ (line 679) | def __init__( method get_ada_values (line 786) | def get_ada_values( method forward (line 805) | def forward( class LTX2VideoTransformer3DModel (line 985) | class LTX2VideoTransformer3DModel(CachableDiT, OffloadableDiTMixin): method _validate_tp_config (line 993) | def _validate_tp_config(self, *, arch: LTX2ArchConfig, tp_size: int) -... method __init__ (line 1043) | def __init__( method forward (line 1261) | def forward( FILE: python/sglang/multimodal_gen/runtime/models/dits/mova_audio_dit.py function legacy_precompute_freqs_cis_1d (line 29) | def legacy_precompute_freqs_cis_1d( function precompute_freqs_cis_1d (line 45) | def precompute_freqs_cis_1d(dim: int, end: int = 16384, theta: float = 1... class Head (line 50) | class Head(nn.Module): method __init__ (line 51) | def __init__( method forward (line 61) | def forward(self, x, t_mod): class Conv1dLocalIsland (line 78) | class Conv1dLocalIsland(nn.Conv1d): method __init__ (line 88) | def __init__(self, *args, **kwargs): method forward (line 91) | def forward(self, input): class WanAudioModel (line 104) | class WanAudioModel(CachableDiT, OffloadableDiTMixin): method __init__ (line 112) | def __init__( method _init_freqs (line 201) | def _init_freqs(self): method patchify (line 210) | def patchify( method unpatchify (line 220) | def unpatchify(self, x: torch.Tensor, grid_size: tuple[int]): method forward (line 225) | def forward( FILE: python/sglang/multimodal_gen/runtime/models/dits/mova_video_dit.py function modulate (line 44) | def modulate(x: torch.Tensor, shift: torch.Tensor, scale: torch.Tensor): function sinusoidal_embedding_1d (line 48) | def sinusoidal_embedding_1d(dim, position): function precompute_freqs_cis_3d (line 62) | def precompute_freqs_cis_3d(dim: int, end: int = 1024, theta: float = 10... function precompute_freqs_cis (line 70) | def precompute_freqs_cis( function rope_apply (line 82) | def rope_apply(x, freqs, num_heads): function rope_apply_head_dim (line 91) | def rope_apply_head_dim(x, freqs, head_dim): class SelfAttention (line 101) | class SelfAttention(nn.Module): method __init__ (line 110) | def __init__( method forward (line 153) | def forward(self, x, freqs): class CrossAttention (line 197) | class CrossAttention(nn.Module): method __init__ (line 208) | def __init__( method forward (line 250) | def forward(self, x: torch.Tensor, y: torch.Tensor): class MulAdd (line 283) | class MulAdd(nn.Module): method __init__ (line 284) | def __init__(self): method forward (line 287) | def forward(self, x, gate, residual): class DiTBlock (line 291) | class DiTBlock(nn.Module): method __init__ (line 292) | def __init__( method forward (line 326) | def forward(self, x, context, t_mod, freqs): class Head (line 362) | class Head(nn.Module): method __init__ (line 363) | def __init__( method forward (line 376) | def forward(self, x, t_mod): class Conv3dLocalIsland (line 391) | class Conv3dLocalIsland(nn.Conv3d): method __init__ (line 403) | def __init__(self, *args, **kwargs): method forward (line 406) | def forward(self, input): class WanModel (line 421) | class WanModel(CachableDiT, OffloadableDiTMixin): method __init__ (line 429) | def __init__( method _init_freqs (line 515) | def _init_freqs(self): method patchify (line 521) | def patchify( method unpatchify (line 531) | def unpatchify(self, x: torch.Tensor, grid_size: tuple[int, int, int]): method forward (line 543) | def forward( FILE: python/sglang/multimodal_gen/runtime/models/dits/qwen_image.py function _get_qkv_projections (line 59) | def _get_qkv_projections( class QwenTimestepProjEmbeddings (line 87) | class QwenTimestepProjEmbeddings(nn.Module): method __init__ (line 88) | def __init__(self, embedding_dim, use_additional_t_cond=False): method forward (line 101) | def forward(self, timestep, hidden_states, addition_t_cond=None): class QwenEmbedRope (line 120) | class QwenEmbedRope(nn.Module): method __init__ (line 121) | def __init__(self, theta: int, axes_dim: List[int], scale_rope=False): method rope_params (line 147) | def rope_params(self, index, dim, theta=10000): method forward (line 167) | def forward( method _compute_video_freqs (line 234) | def _compute_video_freqs( class QwenEmbedLayer3DRope (line 279) | class QwenEmbedLayer3DRope(nn.Module): method __init__ (line 280) | def __init__(self, theta: int, axes_dim: List[int], scale_rope=False): method rope_params (line 305) | def rope_params(self, index, dim, theta=10000): method forward (line 325) | def forward(self, video_fhw, txt_seq_lens, device): method _compute_video_freqs (line 389) | def _compute_video_freqs(self, frame, height, width, idx=0): method _compute_condition_freqs (line 432) | def _compute_condition_freqs(self, frame, height, width): class QwenImageCrossAttention (line 473) | class QwenImageCrossAttention(nn.Module): method __init__ (line 474) | def __init__( method forward (line 582) | def forward( class QwenImageTransformerBlock (line 671) | class QwenImageTransformerBlock(nn.Module): method __init__ (line 672) | def __init__( method _modulate (line 777) | def _modulate( method forward (line 866) | def forward( function to_hashable (line 975) | def to_hashable(obj): class QwenImageTransformer2DModel (line 981) | class QwenImageTransformer2DModel(CachableDiT, OffloadableDiTMixin): method get_nunchaku_quant_rules (line 995) | def get_nunchaku_quant_rules(cls) -> dict[str, list[str]]: method __init__ (line 1017) | def __init__( method build_modulate_index (line 1090) | def build_modulate_index(self, img_shapes: tuple[int, int, int], device): method forward (line 1101) | def forward( FILE: python/sglang/multimodal_gen/runtime/models/dits/sana.py class SanaCombinedTimestepSizeEmbeddings (line 18) | class SanaCombinedTimestepSizeEmbeddings(nn.Module): method __init__ (line 19) | def __init__(self, embedding_dim): method forward (line 28) | def forward(self, timestep, hidden_dtype=None): class SanaAdaLayerNormSingle (line 36) | class SanaAdaLayerNormSingle(nn.Module): method __init__ (line 37) | def __init__(self, embedding_dim): method forward (line 43) | def forward(self, timestep, hidden_dtype=None): class SanaModulatedNorm (line 49) | class SanaModulatedNorm(nn.Module): method __init__ (line 50) | def __init__(self, dim, eps=1e-6): method forward (line 54) | def forward(self, x, temb, scale_shift_table): class GLUMBConv (line 61) | class GLUMBConv(nn.Module): method __init__ (line 64) | def __init__(self, in_channels, out_channels, expand_ratio=2.5): method forward (line 79) | def forward(self, hidden_states): class SanaLinearAttention (line 89) | class SanaLinearAttention(nn.Module): method __init__ (line 92) | def __init__(self, query_dim, num_heads, head_dim, qk_norm_dim, bias=F... method forward (line 107) | def forward(self, hidden_states): class SanaCrossAttention (line 135) | class SanaCrossAttention(nn.Module): method __init__ (line 136) | def __init__(self, query_dim, cross_attention_dim, num_heads, head_dim... method forward (line 152) | def forward( class SanaTransformerBlock (line 182) | class SanaTransformerBlock(nn.Module): method __init__ (line 183) | def __init__( method forward (line 219) | def forward( class SanaTransformer2DModel (line 254) | class SanaTransformer2DModel(CachableDiT, OffloadableDiTMixin): method __init__ (line 265) | def __init__(self, config: SanaConfig, hf_config=None, **kwargs): method forward (line 326) | def forward( FILE: python/sglang/multimodal_gen/runtime/models/dits/wanvideo.py class WanImageEmbedding (line 67) | class WanImageEmbedding(torch.nn.Module): method __init__ (line 69) | def __init__(self, in_features: int, out_features: int): method forward (line 76) | def forward(self, encoder_hidden_states_image: torch.Tensor) -> torch.... class WanTimeTextImageEmbedding (line 84) | class WanTimeTextImageEmbedding(nn.Module): method __init__ (line 86) | def __init__( method forward (line 107) | def forward( class WanSelfAttention (line 127) | class WanSelfAttention(nn.Module): method __init__ (line 129) | def __init__( method forward (line 199) | def forward(self, x: torch.Tensor, context: torch.Tensor, context_lens... class WanT2VCrossAttention (line 207) | class WanT2VCrossAttention(WanSelfAttention): method __init__ (line 208) | def __init__(self, *args, **kwargs): method forward (line 211) | def forward(self, x, context, context_lens): class WanI2VCrossAttention (line 244) | class WanI2VCrossAttention(WanSelfAttention): method __init__ (line 246) | def __init__( method forward (line 284) | def forward(self, x, context, context_lens): class WanTransformerBlock (line 332) | class WanTransformerBlock(nn.Module): method __init__ (line 334) | def __init__( method forward (line 486) | def forward( class WanTransformerBlock_VSA (line 589) | class WanTransformerBlock_VSA(nn.Module): method __init__ (line 591) | def __init__( method forward (line 728) | def forward( class WanTransformer3DModel (line 815) | class WanTransformer3DModel(CachableDiT, OffloadableDiTMixin): method __init__ (line 823) | def __init__( method _compute_rope_for_sequence_shard (line 928) | def _compute_rope_for_sequence_shard( method forward (line 950) | def forward( method maybe_cache_states (line 1156) | def maybe_cache_states( method should_skip_forward_for_cached_states (line 1166) | def should_skip_forward_for_cached_states(self, **kwargs) -> bool: method retrieve_cached_states (line 1209) | def retrieve_cached_states(self, hidden_states: torch.Tensor) -> torch... FILE: python/sglang/multimodal_gen/runtime/models/dits/zimage.py class SelectFirstElement (line 46) | class SelectFirstElement(nn.Module): method __init__ (line 47) | def __init__(self): method forward (line 50) | def forward(self, x): class TimestepEmbedder (line 54) | class TimestepEmbedder(nn.Module): method __init__ (line 55) | def __init__(self, out_size, mid_size=None, frequency_embedding_size=2... method timestep_embedding (line 75) | def timestep_embedding(t, dim, max_period=10000): method forward (line 91) | def forward(self, t): class FeedForward (line 101) | class FeedForward(nn.Module): method __init__ (line 102) | def __init__(self, dim: int, hidden_dim: int): method forward (line 111) | def forward(self, x): class ZImageAttention (line 118) | class ZImageAttention(nn.Module): method __init__ (line 119) | def __init__( method forward (line 213) | def forward( class ZImageTransformerBlock (line 274) | class ZImageTransformerBlock(nn.Module): method __init__ (line 275) | def __init__( method forward (line 342) | def forward( class FinalLayer (line 388) | class FinalLayer(nn.Module): method __init__ (line 389) | def __init__(self, hidden_size, out_channels): method forward (line 402) | def forward(self, x, c): class RopeEmbedder (line 410) | class RopeEmbedder: method __init__ (line 411) | def __init__( method precompute_freqs (line 428) | def precompute_freqs(dim: List[int], end: List[int], theta: float = 25... method __call__ (line 445) | def __call__(self, ids: torch.Tensor) -> Tuple[torch.Tensor, torch.Ten... class ZImageTransformer2DModel (line 478) | class ZImageTransformer2DModel(CachableDiT, OffloadableDiTMixin): method get_nunchaku_quant_rules (line 490) | def get_nunchaku_quant_rules(cls) -> dict[str, list[str]]: method __init__ (line 510) | def __init__( method unpatchify (line 624) | def unpatchify( method create_coordinate_grid (line 644) | def create_coordinate_grid(size, start=None, device=None): method patchify_and_embed (line 655) | def patchify_and_embed( method forward (line 711) | def forward( FILE: python/sglang/multimodal_gen/runtime/models/encoders/base.py class TextEncoder (line 18) | class TextEncoder(nn.Module, ABC): method __init__ (line 25) | def __init__(self, config: TextEncoderConfig) -> None: method forward (line 36) | def forward( method supported_attention_backends (line 48) | def supported_attention_backends(self) -> set[AttentionBackendEnum]: class ImageEncoder (line 52) | class ImageEncoder(nn.Module, ABC): method __init__ (line 57) | def __init__(self, config: ImageEncoderConfig) -> None: method forward (line 66) | def forward(self, pixel_values: torch.Tensor, **kwargs) -> BaseEncoder... method supported_attention_backends (line 70) | def supported_attention_backends(self) -> set[AttentionBackendEnum]: FILE: python/sglang/multimodal_gen/runtime/models/encoders/bert.py class HunyuanClip (line 12) | class HunyuanClip(nn.Module): method __init__ (line 18) | def __init__(self, model_dir, max_length=77): method forward (line 30) | def forward(self, prompts, with_mask=True): FILE: python/sglang/multimodal_gen/runtime/models/encoders/clip.py class CLIPVisionEmbeddings (line 47) | class CLIPVisionEmbeddings(nn.Module): method __init__ (line 49) | def __init__(self, config: CLIPVisionConfig): method forward (line 76) | def forward(self, pixel_values: torch.Tensor) -> torch.Tensor: class CLIPTextEmbeddings (line 91) | class CLIPTextEmbeddings(nn.Module): method __init__ (line 93) | def __init__(self, config: CLIPTextConfig): method forward (line 110) | def forward( class CLIPAttention (line 143) | class CLIPAttention(nn.Module): method __init__ (line 146) | def __init__( method _shape (line 193) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): method forward (line 200) | def forward( class CLIPMLP (line 286) | class CLIPMLP(nn.Module): method __init__ (line 288) | def __init__( method forward (line 312) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class CLIPEncoderLayer (line 320) | class CLIPEncoderLayer(nn.Module): method __init__ (line 322) | def __init__( method forward (line 338) | def forward( class CLIPEncoder (line 360) | class CLIPEncoder(nn.Module): method __init__ (line 369) | def __init__( method forward (line 395) | def forward( class CLIPTextTransformer (line 418) | class CLIPTextTransformer(nn.Module): method __init__ (line 420) | def __init__( method forward (line 445) | def forward( class CLIPTextModel (line 527) | class CLIPTextModel(TextEncoder): method __init__ (line 529) | def __init__( method forward (line 538) | def forward( method load_weights (line 556) | def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) ->... class CLIPVisionTransformer (line 593) | class CLIPVisionTransformer(nn.Module): method __init__ (line 595) | def __init__( method forward (line 637) | def forward( class CLIPVisionModel (line 675) | class CLIPVisionModel(ImageEncoder): method __init__ (line 680) | def __init__(self, config: CLIPVisionConfig) -> None: method forward (line 690) | def forward( method device (line 706) | def device(self): method load_weights (line 711) | def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) ->... class BertModel (line 754) | class BertModel(CLIPTextModel): FILE: python/sglang/multimodal_gen/runtime/models/encoders/gemma2.py class Gemma2RMSNorm (line 42) | class Gemma2RMSNorm(nn.Module): method __init__ (line 43) | def __init__(self, dim: int, eps: float = 1e-6): method _norm (line 48) | def _norm(self, x): method forward (line 51) | def forward(self, x): class Gemma2MLP (line 57) | class Gemma2MLP(nn.Module): method __init__ (line 58) | def __init__( method forward (line 88) | def forward(self, x): class Gemma2Attention (line 95) | class Gemma2Attention(nn.Module): method __init__ (line 96) | def __init__( method forward (line 161) | def forward( class Gemma2DecoderLayer (line 233) | class Gemma2DecoderLayer(nn.Module): method __init__ (line 234) | def __init__( method forward (line 271) | def forward( class Gemma2Model (line 292) | class Gemma2Model(nn.Module): method __init__ (line 297) | def __init__(self, config: Gemma2Config, **kwargs): method get_input_embeddings (line 326) | def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: method forward (line 329) | def forward( method load_weights (line 377) | def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) ->... method _load_with_shard_id (line 425) | def _load_with_shard_id(weight_loader, param, loaded_weight, shard_id): FILE: python/sglang/multimodal_gen/runtime/models/encoders/gemma_3.py function get_attention_sliding_window_size (line 32) | def get_attention_sliding_window_size(config): class Gemma3RMSNorm (line 36) | class Gemma3RMSNorm(nn.Module): method __init__ (line 37) | def __init__(self, dim: int, eps: float = 1e-6): method _norm (line 42) | def _norm(self, x): method forward (line 45) | def forward(self, x): method extra_repr (line 50) | def extra_repr(self): class Gemma3MLP (line 54) | class Gemma3MLP(nn.Module): method __init__ (line 55) | def __init__( method forward (line 86) | def forward(self, x): function _rotate_half (line 93) | def _rotate_half(x: torch.Tensor) -> torch.Tensor: class Gemma3Attention (line 98) | class Gemma3Attention(nn.Module): method __init__ (line 99) | def __init__( method rotary_emb (line 210) | def rotary_emb(self, positions, q, k): method forward (line 227) | def forward( class Gemma3DecoderLayer (line 302) | class Gemma3DecoderLayer(nn.Module): method __init__ (line 303) | def __init__( method forward (line 345) | def forward( class Gemma3TextScaledWordEmbedding (line 383) | class Gemma3TextScaledWordEmbedding(nn.Embedding): method __init__ (line 384) | def __init__( method forward (line 394) | def forward(self, input_ids: torch.Tensor): class QuickGELU (line 401) | class QuickGELU(nn.Module): method forward (line 402) | def forward(self, x: torch.Tensor) -> torch.Tensor: class SiglipVisionEmbeddings (line 406) | class SiglipVisionEmbeddings(nn.Module): method __init__ (line 407) | def __init__(self, config): method forward (line 432) | def forward(self, pixel_values: torch.Tensor) -> torch.Tensor: class SiglipMLP (line 443) | class SiglipMLP(nn.Module): method __init__ (line 444) | def __init__( method forward (line 466) | def forward(self, x: torch.Tensor) -> torch.Tensor: class SiglipAttention (line 473) | class SiglipAttention(nn.Module): method __init__ (line 474) | def __init__( method forward (line 515) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class SiglipEncoderLayer (line 534) | class SiglipEncoderLayer(nn.Module): method __init__ (line 535) | def __init__( method forward (line 561) | def forward( class SiglipEncoder (line 577) | class SiglipEncoder(nn.Module): method __init__ (line 578) | def __init__( method forward (line 600) | def forward( class SiglipVisionTransformer (line 610) | class SiglipVisionTransformer(nn.Module): method __init__ (line 611) | def __init__( method device (line 629) | def device(self) -> torch.device: method forward (line 632) | def forward(self, pixel_values: torch.Tensor) -> torch.Tensor: class SiglipVisionModel (line 639) | class SiglipVisionModel(nn.Module): method __init__ (line 640) | def __init__( method device (line 652) | def device(self) -> torch.device: method forward (line 655) | def forward(self, pixel_values: torch.Tensor): class Gemma3MultiModalProjector (line 659) | class Gemma3MultiModalProjector(nn.Module): method __init__ (line 662) | def __init__(self, config: Gemma3Config): method forward (line 684) | def forward(self, vision_outputs: torch.Tensor) -> torch.Tensor: class Gemma3TextModel (line 710) | class Gemma3TextModel(nn.Module): method __init__ (line 711) | def __init__(self, config: Gemma3Config): method get_input_embeddings (line 747) | def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: method forward (line 751) | def forward( method load_weights (line 803) | def load_weights(self, weights: Any) -> set[str]: class Gemma3ForConditionalGeneration (line 893) | class Gemma3ForConditionalGeneration(nn.Module): method __init__ (line 894) | def __init__( method get_placeholder_mask (line 918) | def get_placeholder_mask( method forward (line 937) | def forward( method load_weights (line 986) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]) ->... method get_attention_sliding_window_size (line 1174) | def get_attention_sliding_window_size(self): FILE: python/sglang/multimodal_gen/runtime/models/encoders/hunyuan3d.py function get_1d_sincos_pos_embed_from_grid (line 15) | def get_1d_sincos_pos_embed_from_grid(embed_dim, pos): class ImageEncoder (line 31) | class ImageEncoder(nn.Module): method __init__ (line 37) | def __init__( method forward (line 72) | def forward(self, image, mask=None, value_range=(-1, 1), **kwargs): method unconditional_embedding (line 87) | def unconditional_embedding(self, batch_size, **kwargs): class CLIPImageEncoder (line 101) | class CLIPImageEncoder(ImageEncoder): class DinoImageEncoder (line 108) | class DinoImageEncoder(ImageEncoder): class DinoImageEncoderMV (line 115) | class DinoImageEncoderMV(DinoImageEncoder): method __init__ (line 120) | def __init__( method forward (line 140) | def forward(self, image, mask=None, value_range=(-1, 1), view_idxs=Non... method unconditional_embedding (line 182) | def unconditional_embedding(self, batch_size, view_idxs, **kwargs): function build_image_encoder (line 195) | def build_image_encoder(config): class DualImageEncoder (line 206) | class DualImageEncoder(nn.Module): method __init__ (line 207) | def __init__( method forward (line 216) | def forward(self, image, mask=None, **kwargs): method unconditional_embedding (line 223) | def unconditional_embedding(self, batch_size, **kwargs): class SingleImageEncoder (line 235) | class SingleImageEncoder(nn.Module): method __init__ (line 236) | def __init__( method forward (line 243) | def forward(self, image, mask=None, **kwargs): method unconditional_embedding (line 249) | def unconditional_embedding(self, batch_size, **kwargs): FILE: python/sglang/multimodal_gen/runtime/models/encoders/llama.py class LlamaMLP (line 60) | class LlamaMLP(nn.Module): method __init__ (line 62) | def __init__( method forward (line 94) | def forward(self, x): class LlamaAttention (line 101) | class LlamaAttention(nn.Module): method __init__ (line 103) | def __init__( method forward (line 192) | def forward( class LlamaDecoderLayer (line 220) | class LlamaDecoderLayer(nn.Module): method __init__ (line 222) | def __init__( method forward (line 277) | def forward( class LlamaModel (line 298) | class LlamaModel(TextEncoder): method __init__ (line 300) | def __init__( method get_input_embeddings (line 341) | def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: method forward (line 344) | def forward( method load_weights (line 397) | def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) ->... FILE: python/sglang/multimodal_gen/runtime/models/encoders/mistral_3.py function repeat_kv (line 43) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: class MistralAttention (line 58) | class MistralAttention(nn.Module): method __init__ (line 61) | def __init__(self, config: MistralConfig, layer_idx: int): method forward (line 106) | def forward( class MistralDecoderLayer (line 154) | class MistralDecoderLayer(nn.Module): method __init__ (line 155) | def __init__(self, config: MistralConfig, layer_idx: int): method forward (line 167) | def forward( class MistralModel (line 203) | class MistralModel(nn.Module): method __init__ (line 204) | def __init__(self, config: MistralConfig): method forward (line 224) | def forward( class Mistral3Model (line 298) | class Mistral3Model(nn.Module): method __init__ (line 301) | def __init__(self, config: Mistral3Config): method get_input_embeddings (line 306) | def get_input_embeddings(self): method set_decoder (line 309) | def set_decoder(self, decoder): method get_decoder (line 312) | def get_decoder(self): method forward (line 315) | def forward( class Mistral3ForConditionalGeneration (line 363) | class Mistral3ForConditionalGeneration(nn.Module): method __init__ (line 371) | def __init__(self, config: LlavaConfig): method get_input_embeddings (line 375) | def get_input_embeddings(self): method set_decoder (line 378) | def set_decoder(self, decoder): method get_decoder (line 381) | def get_decoder(self): method language_model (line 386) | def language_model(self): method forward (line 389) | def forward( method load_weights (line 434) | def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) ->... FILE: python/sglang/multimodal_gen/runtime/models/encoders/qwen2_5vl.py class Qwen2_5_VLAttention (line 80) | class Qwen2_5_VLAttention(nn.Module): method __init__ (line 86) | def __init__(self, config: Qwen2_5_VLTextConfig, layer_idx: Optional[i... method forward (line 143) | def forward( class Qwen2_5_VLDecoderLayer (line 208) | class Qwen2_5_VLDecoderLayer(nn.Module): method __init__ (line 209) | def __init__(self, config: Qwen2_5_VLTextConfig, layer_idx: int): method forward (line 230) | def forward( class Qwen2_5_VLMLP (line 295) | class Qwen2_5_VLMLP(nn.Module): method __init__ (line 296) | def __init__( method forward (line 322) | def forward(self, x: torch.Tensor) -> torch.Tensor: class Qwen2_5_VLTextModel (line 330) | class Qwen2_5_VLTextModel(nn.Module): method __init__ (line 331) | def __init__(self, config: PretrainedConfig): method forward (line 355) | def forward( class Qwen2_5_VLModel (line 502) | class Qwen2_5_VLModel(nn.Module): method __init__ (line 509) | def __init__(self, config, enable_image_understanding: bool = False): method get_input_embeddings (line 523) | def get_input_embeddings(self): method set_input_embeddings (line 526) | def set_input_embeddings(self, value): method set_decoder (line 529) | def set_decoder(self, decoder): method get_decoder (line 532) | def get_decoder(self): method get_rope_index (line 535) | def get_rope_index( method get_video_features (line 763) | def get_video_features( method get_image_features (line 785) | def get_image_features( method get_placeholder_mask (line 812) | def get_placeholder_mask( method forward (line 874) | def forward( class Qwen2_5_VLForConditionalGeneration (line 1005) | class Qwen2_5_VLForConditionalGeneration(TextEncoder): method __init__ (line 1024) | def __init__( method get_input_embeddings (line 1044) | def get_input_embeddings(self): method forward (line 1048) | def forward( method load_weights (line 1125) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): method get_embed_and_head (line 1152) | def get_embed_and_head(self): FILE: python/sglang/multimodal_gen/runtime/models/encoders/qwen3.py class Qwen3MLP (line 30) | class Qwen3MLP(nn.Module): method __init__ (line 33) | def __init__( method forward (line 63) | def forward(self, x: torch.Tensor) -> torch.Tensor: class Qwen3Attention (line 70) | class Qwen3Attention(nn.Module): method __init__ (line 76) | def __init__( method forward (line 157) | def forward( class Qwen3DecoderLayer (line 196) | class Qwen3DecoderLayer(nn.Module): method __init__ (line 199) | def __init__( method forward (line 239) | def forward( class Qwen3ForCausalLM (line 260) | class Qwen3ForCausalLM(TextEncoder): method __init__ (line 270) | def __init__(self, config: Qwen3TextConfig) -> None: method get_input_embeddings (line 308) | def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: method forward (line 311) | def forward( method load_weights (line 360) | def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) ->... FILE: python/sglang/multimodal_gen/runtime/models/encoders/t5.py class AttentionType (line 51) | class AttentionType: class AttentionMetadata (line 68) | class AttentionMetadata: class T5DenseActDense (line 72) | class T5DenseActDense(nn.Module): method __init__ (line 74) | def __init__( method forward (line 91) | def forward(self, hidden_states) -> torch.Tensor: class T5DenseGatedActDense (line 98) | class T5DenseGatedActDense(nn.Module): method __init__ (line 100) | def __init__( method forward (line 130) | def forward(self, hidden_states) -> torch.Tensor: class T5LayerFF (line 138) | class T5LayerFF(nn.Module): method __init__ (line 140) | def __init__( method forward (line 153) | def forward(self, hidden_states) -> torch.Tensor: class T5MultiHeadAttention (line 161) | class T5MultiHeadAttention(nn.Module): method __init__ (line 163) | def __init__(self) -> None: method forward (line 166) | def forward(self, q, k, v, attn_bias=None): class T5Attention (line 178) | class T5Attention(nn.Module): method __init__ (line 180) | def __init__( method _relative_position_bucket (line 241) | def _relative_position_bucket( method compute_bias (line 298) | def compute_bias(self, query_length, key_length, device=None) -> torch... method forward (line 324) | def forward( class T5LayerSelfAttention (line 378) | class T5LayerSelfAttention(nn.Module): method __init__ (line 380) | def __init__( method forward (line 397) | def forward( class T5LayerCrossAttention (line 416) | class T5LayerCrossAttention(nn.Module): method __init__ (line 418) | def __init__( method forward (line 431) | def forward( class T5Block (line 445) | class T5Block(nn.Module): method __init__ (line 447) | def __init__( method forward (line 476) | def forward( class T5Stack (line 505) | class T5Stack(nn.Module): method __init__ (line 507) | def __init__( method forward (line 549) | def forward( class T5EncoderModel (line 568) | class T5EncoderModel(TextEncoder): method __init__ (line 570) | def __init__(self, config: T5Config, prefix: str = ""): method get_input_embeddings (line 592) | def get_input_embeddings(self): method forward (line 595) | def forward( method load_weights (line 613) | def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) ->... class UMT5EncoderModel (line 657) | class UMT5EncoderModel(TextEncoder): method __init__ (line 659) | def __init__(self, config: T5Config, prefix: str = ""): method get_input_embeddings (line 681) | def get_input_embeddings(self): method forward (line 684) | def forward( method load_weights (line 705) | def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) ->... FILE: python/sglang/multimodal_gen/runtime/models/encoders/vision.py class VisionEncoderInfo (line 19) | class VisionEncoderInfo(ABC, Generic[_C]): method __init__ (line 21) | def __init__(self, vision_config: _C) -> None: method get_num_image_tokens (line 27) | def get_num_image_tokens( method get_max_image_tokens (line 36) | def get_max_image_tokens(self) -> int: method get_image_size (line 40) | def get_image_size(self) -> int: method get_patch_size (line 44) | def get_patch_size(self) -> int: method get_patch_grid_length (line 48) | def get_patch_grid_length(self) -> int: function resolve_visual_encoder_outputs (line 52) | def resolve_visual_encoder_outputs( FILE: python/sglang/multimodal_gen/runtime/models/parameter.py class BasevLLMParameter (line 20) | class BasevLLMParameter(Parameter): method __new__ (line 27) | def __new__(cls, data: torch.Tensor, **kwargs): method __init__ (line 31) | def __init__(self, data: torch.Tensor, weight_loader: Callable): method weight_loader (line 57) | def weight_loader(self): method _is_1d_and_scalar (line 60) | def _is_1d_and_scalar(self, loaded_weight: torch.Tensor): method _assert_and_load (line 65) | def _assert_and_load(self, loaded_weight: torch.Tensor) -> None: method load_column_parallel_weight (line 71) | def load_column_parallel_weight(self, loaded_weight: torch.Tensor) -> ... method load_row_parallel_weight (line 74) | def load_row_parallel_weight(self, loaded_weight: torch.Tensor) -> None: method load_merged_column_weight (line 77) | def load_merged_column_weight(self, loaded_weight: torch.Tensor, **kwa... method load_qkv_weight (line 80) | def load_qkv_weight(self, loaded_weight: torch.Tensor, **kwargs) -> None: class _ColumnvLLMParameter (line 84) | class _ColumnvLLMParameter(BasevLLMParameter): method __init__ (line 95) | def __init__(self, output_dim: int, **kwargs): method output_dim (line 100) | def output_dim(self): method load_column_parallel_weight (line 103) | def load_column_parallel_weight(self, loaded_weight: torch.Tensor) -> ... method load_merged_column_weight (line 112) | def load_merged_column_weight(self, loaded_weight: torch.Tensor, **kwa... method load_qkv_weight (line 136) | def load_qkv_weight(self, loaded_weight: torch.Tensor, **kwargs) -> None: class RowvLLMParameter (line 168) | class RowvLLMParameter(BasevLLMParameter): method __init__ (line 176) | def __init__(self, input_dim: int, **kwargs): method input_dim (line 181) | def input_dim(self): method load_row_parallel_weight (line 184) | def load_row_parallel_weight(self, loaded_weight: torch.Tensor) -> None: class ModelWeightParameter (line 198) | class ModelWeightParameter(_ColumnvLLMParameter, RowvLLMParameter): class GroupQuantScaleParameter (line 207) | class GroupQuantScaleParameter(_ColumnvLLMParameter, RowvLLMParameter): class ChannelQuantScaleParameter (line 216) | class ChannelQuantScaleParameter(_ColumnvLLMParameter): class PerTensorScaleParameter (line 225) | class PerTensorScaleParameter(BasevLLMParameter): method __init__ (line 239) | def __init__(self, **kwargs): method _shard_id_as_int (line 243) | def _shard_id_as_int(self, shard_id: str | int) -> int: method load_row_parallel_weight (line 255) | def load_row_parallel_weight(self, *args, **kwargs) -> None: method load_merged_column_weight (line 258) | def load_merged_column_weight(self, *args, **kwargs) -> None: method load_qkv_weight (line 261) | def load_qkv_weight(self, *args, **kwargs) -> None: method load_column_parallel_weight (line 264) | def load_column_parallel_weight(self, *args, **kwargs) -> None: method _load_into_shard_id (line 267) | def _load_into_shard_id( class PackedColumnParameter (line 289) | class PackedColumnParameter(_ColumnvLLMParameter): method __init__ (line 296) | def __init__(self, packed_factor: int | Fraction, packed_dim: int, **k... method packed_dim (line 302) | def packed_dim(self): method packed_factor (line 306) | def packed_factor(self): method adjust_shard_indexes_for_packing (line 309) | def adjust_shard_indexes_for_packing( class PackedvLLMParameter (line 319) | class PackedvLLMParameter(ModelWeightParameter): method __init__ (line 330) | def __init__(self, packed_factor: int | Fraction, packed_dim: int, **k... method packed_dim (line 336) | def packed_dim(self): method packed_factor (line 340) | def packed_factor(self): method adjust_shard_indexes_for_packing (line 343) | def adjust_shard_indexes_for_packing(self, shard_size, shard_offset): class BlockQuantScaleParameter (line 351) | class BlockQuantScaleParameter(_ColumnvLLMParameter, RowvLLMParameter): function permute_param_layout_ (line 360) | def permute_param_layout_( function _adjust_shard_indexes_for_packing (line 418) | def _adjust_shard_indexes_for_packing( FILE: python/sglang/multimodal_gen/runtime/models/registry.py function _parse_aliases_from_ast (line 44) | def _parse_aliases_from_ast(value_node: ast.expr) -> list[str]: function _discover_and_register_models (line 55) | def _discover_and_register_models() -> dict[str, tuple[str, str, str]]: class _ModelInfo (line 166) | class _ModelInfo: method from_model_cls (line 170) | def from_model_cls(model: type[nn.Module]) -> "_ModelInfo": class _BaseRegisteredModel (line 176) | class _BaseRegisteredModel(ABC): method inspect_model_cls (line 179) | def inspect_model_cls(self) -> _ModelInfo: method load_model_cls (line 183) | def load_model_cls(self) -> type[nn.Module]: class _RegisteredModel (line 188) | class _RegisteredModel(_BaseRegisteredModel): method from_model_cls (line 197) | def from_model_cls(model_cls: type[nn.Module]): method inspect_model_cls (line 203) | def inspect_model_cls(self) -> _ModelInfo: method load_model_cls (line 206) | def load_model_cls(self) -> type[nn.Module]: function _run_in_subprocess (line 210) | def _run_in_subprocess(fn: Callable[[], _T]) -> _T: class _LazyRegisteredModel (line 239) | class _LazyRegisteredModel(_BaseRegisteredModel): method inspect_model_cls (line 249) | def inspect_model_cls(self) -> _ModelInfo: method load_model_cls (line 254) | def load_model_cls(self) -> type[nn.Module]: function _try_load_model_cls (line 260) | def _try_load_model_cls( function _try_inspect_model_cls (line 275) | def _try_inspect_model_cls( class _ModelRegistry (line 287) | class _ModelRegistry: method get_supported_archs (line 291) | def get_supported_archs(self) -> Set[str]: method resolve_by_alias (line 294) | def resolve_by_alias(self, alias: str) -> type[nn.Module] | None: method register_model (line 301) | def register_model( method _raise_for_unsupported (line 337) | def _raise_for_unsupported(self, architectures: list[str]) -> NoReturn: method _try_load_model_cls (line 351) | def _try_load_model_cls(self, model_arch: str) -> type[nn.Module] | None: method _try_inspect_model_cls (line 357) | def _try_inspect_model_cls(self, model_arch: str) -> _ModelInfo | None: method _normalize_archs (line 363) | def _normalize_archs( method inspect_model_cls (line 382) | def inspect_model_cls( method resolve_model_cls (line 395) | def resolve_model_cls( FILE: python/sglang/multimodal_gen/runtime/models/schedulers/base.py class BaseScheduler (line 10) | class BaseScheduler(ABC): method __init__ (line 15) | def __init__(self, *args, **kwargs) -> None: method set_shift (line 26) | def set_shift(self, shift: float) -> None: method set_timesteps (line 30) | def set_timesteps(self, *args, **kwargs) -> None: method scale_model_input (line 34) | def scale_model_input( FILE: python/sglang/multimodal_gen/runtime/models/schedulers/flow_match_pair.py class FlowMatchScheduler (line 13) | class FlowMatchScheduler(BaseScheduler): method __init__ (line 14) | def __init__( method set_shift (line 45) | def set_shift(self, shift: float) -> None: method set_timesteps (line 48) | def set_timesteps( method scale_model_input (line 105) | def scale_model_input(self, sample: torch.Tensor, timestep: int | None... method step (line 108) | def step(self, model_output, timestep, sample, to_final=False, **kwargs): method return_to_timestep (line 120) | def return_to_timestep(self, timestep, sample, sample_stablized): method add_noise (line 128) | def add_noise(self, original_samples, noise, timestep): method training_target (line 136) | def training_target(self, sample, noise, timestep): method training_weight (line 140) | def training_weight(self, timestep): method calculate_shift (line 147) | def calculate_shift( class FlowMatchPairScheduler (line 161) | class FlowMatchPairScheduler(FlowMatchScheduler): method __init__ (line 171) | def __init__( method set_pair_postprocess (line 205) | def set_pair_postprocess(self, fn): method set_pair_postprocess_by_name (line 226) | def set_pair_postprocess_by_name(self, name: str | None, **kwargs): method _make_pairs_from_vector (line 442) | def _make_pairs_from_vector(self, vec: torch.Tensor) -> torch.Tensor: method get_pairs (line 447) | def get_pairs(self, source: str = "timesteps") -> torch.Tensor: method timestep_to_sigma (line 458) | def timestep_to_sigma(self, timestep: torch.Tensor | float) -> torch.T... method step_from_to (line 472) | def step_from_to( method _refresh_pair_cache (line 505) | def _refresh_pair_cache(self) -> None: FILE: python/sglang/multimodal_gen/runtime/models/schedulers/hunyuan3d_scheduler.py class Hunyuan3DFlowMatchSchedulerOutput (line 16) | class Hunyuan3DFlowMatchSchedulerOutput(BaseOutput): class Hunyuan3DFlowMatchEulerDiscreteScheduler (line 22) | class Hunyuan3DFlowMatchEulerDiscreteScheduler(SchedulerMixin, ConfigMix... method __init__ (line 35) | def __init__( method step_index (line 59) | def step_index(self) -> Optional[int]: method begin_index (line 64) | def begin_index(self) -> Optional[int]: method set_begin_index (line 68) | def set_begin_index(self, begin_index: int = 0): method scale_model_input (line 76) | def scale_model_input( method scale_noise (line 84) | def scale_noise( method _sigma_to_t (line 116) | def _sigma_to_t(self, sigma: float) -> float: method time_shift (line 120) | def time_shift(self, mu: float, sigma: float, t: torch.Tensor) -> torc... method set_timesteps (line 124) | def set_timesteps( method index_for_timestep (line 160) | def index_for_timestep( method _init_step_index (line 171) | def _init_step_index(self, timestep: Union[float, torch.Tensor]): method step (line 180) | def step( method __len__ (line 218) | def __len__(self) -> int: class Hunyuan3DConsistencyFlowMatchSchedulerOutput (line 223) | class Hunyuan3DConsistencyFlowMatchSchedulerOutput(BaseOutput): class Hunyuan3DConsistencyFlowMatchEulerDiscreteScheduler (line 230) | class Hunyuan3DConsistencyFlowMatchEulerDiscreteScheduler(SchedulerMixin... method __init__ (line 242) | def __init__( method step_index (line 264) | def step_index(self) -> Optional[int]: method begin_index (line 268) | def begin_index(self) -> Optional[int]: method set_begin_index (line 271) | def set_begin_index(self, begin_index: int = 0): method scale_model_input (line 274) | def scale_model_input( method _sigma_to_t (line 282) | def _sigma_to_t(self, sigma: float) -> float: method set_timesteps (line 285) | def set_timesteps( method index_for_timestep (line 311) | def index_for_timestep( method _init_step_index (line 320) | def _init_step_index(self, timestep: Union[float, torch.Tensor]): method step (line 328) | def step( method __len__ (line 363) | def __len__(self) -> int: FILE: python/sglang/multimodal_gen/runtime/models/schedulers/scheduling_comfyui_passthrough.py class ComfyUIPassThroughSchedulerOutput (line 21) | class ComfyUIPassThroughSchedulerOutput(BaseOutput): class ComfyUIPassThroughScheduler (line 32) | class ComfyUIPassThroughScheduler(BaseScheduler, ConfigMixin, SchedulerM... method __init__ (line 50) | def __init__( method set_timesteps (line 63) | def set_timesteps( method step (line 92) | def step( method scale_model_input (line 126) | def scale_model_input( method set_shift (line 141) | def set_shift(self, shift: float) -> None: method set_begin_index (line 150) | def set_begin_index(self, begin_index: int = 0) -> None: method begin_index (line 160) | def begin_index(self) -> int | None: method step_index (line 167) | def step_index(self) -> int: method add_noise (line 173) | def add_noise( FILE: python/sglang/multimodal_gen/runtime/models/schedulers/scheduling_dpm_solver_multistep.py class DPMSolverMultistepScheduler (line 22) | class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin, BaseSched... method __init__ (line 29) | def __init__( method set_shift (line 94) | def set_shift(self, shift: float) -> None: method set_begin_index (line 97) | def set_begin_index(self, begin_index: int = 0) -> None: method begin_index (line 101) | def begin_index(self) -> int | None: method set_timesteps (line 104) | def set_timesteps(self, num_inference_steps: int, device=None, **kwargs): method scale_model_input (line 108) | def scale_model_input( method step (line 113) | def step( method sigmas (line 123) | def sigmas(self): method init_noise_sigma (line 127) | def init_noise_sigma(self): method add_noise (line 130) | def add_noise( FILE: python/sglang/multimodal_gen/runtime/models/schedulers/scheduling_flow_match_euler_discrete.py class FlowMatchEulerDiscreteSchedulerOutput (line 41) | class FlowMatchEulerDiscreteSchedulerOutput(BaseOutput): class FlowMatchEulerDiscreteScheduler (line 54) | class FlowMatchEulerDiscreteScheduler(SchedulerMixin, ConfigMixin, BaseS... method __init__ (line 98) | def __init__( method shift (line 157) | def shift(self) -> float: method step_index (line 164) | def step_index(self) -> int | None: method begin_index (line 171) | def begin_index(self) -> int | None: method set_begin_index (line 178) | def set_begin_index(self, begin_index: int = 0) -> None: method set_shift (line 188) | def set_shift(self, shift: float) -> None: method scale_noise (line 191) | def scale_noise( method _sigma_to_t (line 233) | def _sigma_to_t(self, sigma: float) -> float: method time_shift (line 236) | def time_shift( method stretch_shift_to_terminal (line 246) | def stretch_shift_to_terminal(self, t: torch.Tensor) -> torch.Tensor: method set_timesteps (line 267) | def set_timesteps( method index_for_timestep (line 413) | def index_for_timestep( method _init_step_index (line 431) | def _init_step_index(self, timestep: float | torch.FloatTensor) -> None: method step (line 439) | def step( method _convert_to_karras (line 539) | def _convert_to_karras( method _convert_to_exponential (line 567) | def _convert_to_exponential( method _convert_to_beta (line 593) | def _convert_to_beta( method _time_shift_exponential (line 628) | def _time_shift_exponential( method _time_shift_linear (line 636) | def _time_shift_linear( method add_noise (line 641) | def add_noise( method scale_model_input (line 679) | def scale_model_input( method __len__ (line 684) | def __len__(self) -> int: FILE: python/sglang/multimodal_gen/runtime/models/schedulers/scheduling_flow_unipc_multistep.py class FlowUniPCMultistepScheduler (line 24) | class FlowUniPCMultistepScheduler(SchedulerMixin, ConfigMixin, BaseSched... method __init__ (line 81) | def __init__( method step_index (line 143) | def step_index(self): method begin_index (line 150) | def begin_index(self): method set_shift (line 156) | def set_shift(self, shift: float) -> None: method set_begin_index (line 160) | def set_begin_index(self, begin_index: int = 0): method set_timesteps (line 171) | def set_timesteps( method _threshold_sample (line 244) | def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: method _sigma_to_alpha_sigma_t (line 281) | def _sigma_to_alpha_sigma_t(self, sigma) -> tuple[Any, Any]: method time_shift (line 285) | def time_shift(self, mu: float, sigma: float, t: torch.Tensor): method convert_model_output (line 288) | def convert_model_output( method multistep_uni_p_bh_update (line 355) | def multistep_uni_p_bh_update( method multistep_uni_c_bh_update (line 498) | def multistep_uni_c_bh_update( method index_for_timestep (line 667) | def index_for_timestep(self, timestep, schedule_timesteps=None) -> int: method _init_step_index (line 683) | def _init_step_index(self, timestep) -> None: method step (line 695) | def step( method scale_model_input (line 786) | def scale_model_input(self, sample: torch.Tensor, *args, **kwargs) -> ... method add_noise (line 802) | def add_noise( FILE: python/sglang/multimodal_gen/runtime/models/schedulers/scheduling_helios.py class HeliosSchedulerOutput (line 19) | class HeliosSchedulerOutput: class HeliosSchedulerConfig (line 26) | class HeliosSchedulerConfig: method __init__ (line 29) | def __init__(self, **kwargs): method get (line 33) | def get(self, key, default=None): class HeliosScheduler (line 37) | class HeliosScheduler: method __init__ (line 47) | def __init__( method init_sigmas (line 118) | def init_sigmas(self): method init_sigmas_for_each_stage (line 133) | def init_sigmas_for_each_stage(self): method step_index (line 197) | def step_index(self): method begin_index (line 201) | def begin_index(self): method set_begin_index (line 204) | def set_begin_index(self, begin_index: int = 0): method time_shift (line 207) | def time_shift(self, mu, sigma, t): method set_timesteps (line 213) | def set_timesteps( method index_for_timestep (line 280) | def index_for_timestep(self, timestep, schedule_timesteps=None): method _init_step_index (line 287) | def _init_step_index(self, timestep): method step_euler (line 295) | def step_euler( method _sigma_to_alpha_sigma_t (line 320) | def _sigma_to_alpha_sigma_t(self, sigma): method convert_model_output (line 329) | def convert_model_output(self, model_output, sample=None, sigma=None, ... method multistep_uni_p_bh_update (line 366) | def multistep_uni_p_bh_update( method multistep_uni_c_bh_update (line 450) | def multistep_uni_c_bh_update( method step_unipc (line 547) | def step_unipc( method add_noise (line 611) | def add_noise(self, original_samples, noise, timestep, sigmas, timeste... method convert_flow_pred_to_x0 (line 621) | def convert_flow_pred_to_x0(self, flow_pred, xt, timestep, sigmas, tim... method step_dmd (line 634) | def step_dmd( method step (line 680) | def step( method reset_scheduler_history (line 715) | def reset_scheduler_history(self): method set_shift (line 725) | def set_shift(self, shift: float): method __len__ (line 730) | def __len__(self): FILE: python/sglang/multimodal_gen/runtime/models/schedulers/scheduling_self_forcing_flow_match.py class SelfForcingFlowMatchSchedulerOutput (line 15) | class SelfForcingFlowMatchSchedulerOutput(BaseOutput): class SelfForcingFlowMatchScheduler (line 28) | class SelfForcingFlowMatchScheduler(BaseScheduler, ConfigMixin, Schedule... method __init__ (line 33) | def __init__( method set_timesteps (line 55) | def set_timesteps( method step (line 80) | def step( method add_noise (line 113) | def add_noise(self, original_samples, noise, timestep): method scale_model_input (line 133) | def scale_model_input( method set_shift (line 138) | def set_shift(self, shift: float) -> None: FILE: python/sglang/multimodal_gen/runtime/models/schedulers/scheduling_unipc_multistep.py function betas_for_alpha_bar (line 44) | def betas_for_alpha_bar( function rescale_zero_terminal_snr (line 89) | def rescale_zero_terminal_snr(betas): class UniPCMultistepScheduler (line 125) | class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin, BaseScheduler): method __init__ (line 201) | def __init__( method step_index (line 321) | def step_index(self): method begin_index (line 328) | def begin_index(self): method set_shift (line 334) | def set_shift(self, shift: float) -> None: method set_begin_index (line 338) | def set_begin_index(self, begin_index: int = 0): method set_timesteps (line 348) | def set_timesteps( method _threshold_sample (line 516) | def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: method _sigma_to_t (line 554) | def _sigma_to_t(self, sigma, log_sigmas): method _sigma_to_alpha_sigma_t (line 582) | def _sigma_to_alpha_sigma_t(self, sigma): method _convert_to_karras (line 593) | def _convert_to_karras( method _convert_to_exponential (line 621) | def _convert_to_exponential( method _convert_to_beta (line 647) | def _convert_to_beta( method convert_model_output (line 682) | def convert_model_output( method multistep_uni_p_bh_update (line 755) | def multistep_uni_p_bh_update( method multistep_uni_c_bh_update (line 887) | def multistep_uni_c_bh_update( method index_for_timestep (line 1028) | def index_for_timestep(self, timestep, schedule_timesteps=None): method _init_step_index (line 1048) | def _init_step_index(self, timestep): method step (line 1060) | def step( method scale_model_input (line 1147) | def scale_model_input(self, sample: torch.Tensor, *args, **kwargs) -> ... method add_noise (line 1163) | def add_noise( method __len__ (line 1203) | def __len__(self): FILE: python/sglang/multimodal_gen/runtime/models/utils.py function set_weight_attrs (line 12) | def set_weight_attrs( function _make_synced_weight_loader (line 46) | def _make_synced_weight_loader(original_weight_loader) -> Any: function extract_layer_index (line 55) | def extract_layer_index(layer_name: str) -> int: function modulate (line 77) | def modulate( function pred_noise_to_pred_video (line 95) | def pred_noise_to_pred_video( FILE: python/sglang/multimodal_gen/runtime/models/vaes/autoencoder.py class AutoencoderKL (line 27) | class AutoencoderKL(nn.Module): method __init__ (line 65) | def __init__( method enable_tiling (line 138) | def enable_tiling(self, use_tiling: bool = True): method disable_tiling (line 146) | def disable_tiling(self): method enable_slicing (line 153) | def enable_slicing(self): method disable_slicing (line 160) | def disable_slicing(self): method attn_processors (line 169) | def attn_processors(self) -> Dict[str, AttentionProcessor]: method set_attn_processor (line 197) | def set_attn_processor( method set_default_attn_processor (line 234) | def set_default_attn_processor(self): method _encode (line 255) | def _encode(self, x: torch.Tensor) -> torch.Tensor: method encode (line 269) | def encode( method _decode (line 297) | def _decode( method decode (line 316) | def decode(self, z: torch.FloatTensor) -> Union[DecoderOutput, torch.F... method blend_v (line 338) | def blend_v( method blend_h (line 348) | def blend_h( method _tiled_encode (line 358) | def _tiled_encode(self, x: torch.Tensor) -> torch.Tensor: method tiled_encode (line 411) | def tiled_encode( method tiled_decode (line 480) | def tiled_decode( method forward (line 536) | def forward( method fuse_qkv_projections (line 559) | def fuse_qkv_projections(self): FILE: python/sglang/multimodal_gen/runtime/models/vaes/autoencoder_dc.py class AutoencoderDC (line 14) | class AutoencoderDC(nn.Module): method __init__ (line 17) | def __init__(self, config: SanaVAEConfig = None, **kwargs): method _ensure_inner_model (line 23) | def _ensure_inner_model(self, state_dict: dict[str, torch.Tensor] | No... method config (line 71) | def config(self): method dtype (line 77) | def dtype(self): method device (line 83) | def device(self): method encode (line 88) | def encode(self, x: torch.Tensor, **kwargs): method decode (line 92) | def decode(self, z: torch.Tensor, **kwargs): method forward (line 97) | def forward(self, x: torch.Tensor, **kwargs): method load_state_dict (line 101) | def load_state_dict( method state_dict (line 110) | def state_dict(self, *args, **kwargs) -> dict[str, torch.Tensor]: method load_weights (line 114) | def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) ->... method to (line 122) | def to(self, *args, **kwargs): FILE: python/sglang/multimodal_gen/runtime/models/vaes/autoencoder_kl_flux2.py class AutoencoderKLFlux2 (line 25) | class AutoencoderKLFlux2(ParallelTiledVAE): method __init__ (line 38) | def __init__( method attn_processors (line 125) | def attn_processors(self) -> Dict[str, AttentionProcessor]: method set_attn_processor (line 153) | def set_attn_processor( method set_default_attn_processor (line 190) | def set_default_attn_processor(self): method _encode (line 211) | def _encode(self, x: torch.Tensor) -> torch.Tensor: method encode (line 225) | def encode( method _decode (line 254) | def _decode( method decode (line 273) | def decode( method blend_v (line 298) | def blend_v( method blend_h (line 308) | def blend_h( method _tiled_encode (line 318) | def _tiled_encode(self, x: torch.Tensor) -> torch.Tensor: method tiled_encode (line 371) | def tiled_encode( method tiled_decode (line 439) | def tiled_decode( method forward (line 495) | def forward( FILE: python/sglang/multimodal_gen/runtime/models/vaes/autoencoder_kl_qwenimage.py class QwenImageCausalConv3d (line 25) | class QwenImageCausalConv3d(nn.Conv3d): method __init__ (line 40) | def __init__( method forward (line 67) | def forward(self, x, cache_x=None): class QwenImageRMS_norm (line 77) | class QwenImageRMS_norm(nn.Module): method __init__ (line 89) | def __init__( method forward (line 105) | def forward(self, x): class QwenImageUpsample (line 114) | class QwenImageUpsample(nn.Upsample): method forward (line 122) | def forward(self, x): class QwenImageResample (line 126) | class QwenImageResample(nn.Module): method __init__ (line 140) | def __init__(self, dim: int, mode: str) -> None: method forward (line 175) | def forward(self, x, feat_cache=None, feat_idx=[0]): class QwenImageResidualBlock (line 240) | class QwenImageResidualBlock(nn.Module): method __init__ (line 251) | def __init__( method forward (line 275) | def forward(self, x, feat_cache=None, feat_idx=[0]): class QwenImageAttentionBlock (line 330) | class QwenImageAttentionBlock(nn.Module): method __init__ (line 338) | def __init__(self, dim): method forward (line 347) | def forward(self, x): class QwenImageMidBlock (line 379) | class QwenImageMidBlock(nn.Module): method __init__ (line 389) | def __init__( method forward (line 410) | def forward(self, x, feat_cache=None, feat_idx=[0]): class QwenImageEncoder3d (line 424) | class QwenImageEncoder3d(nn.Module): method __init__ (line 439) | def __init__( method forward (line 504) | def forward(self, x, feat_cache=None, feat_idx=[0]): class QwenImageUpBlock (line 556) | class QwenImageUpBlock(nn.Module): method __init__ (line 569) | def __init__( method forward (line 603) | def forward(self, x, feat_cache=None, feat_idx=[0]): class QwenImageDecoder3d (line 629) | class QwenImageDecoder3d(nn.Module): method __init__ (line 644) | def __init__( method forward (line 711) | def forward(self, x, feat_cache=None, feat_idx=[0]): class AutoencoderKLQwenImage (line 761) | class AutoencoderKLQwenImage(ParallelTiledVAE): method __init__ (line 772) | def __init__( method enable_tiling (line 844) | def enable_tiling( method disable_tiling (line 874) | def disable_tiling(self) -> None: method enable_slicing (line 881) | def enable_slicing(self) -> None: method disable_slicing (line 888) | def disable_slicing(self) -> None: method clear_cache (line 895) | def clear_cache(self): method _encode (line 911) | def _encode(self, x: torch.Tensor): method encode (line 935) | def encode( method _decode (line 959) | def _decode(self, z: torch.Tensor, return_dict: bool = True): method decode (line 984) | def decode(self, z: torch.Tensor, return_dict: bool = True) -> Union[D... method blend_v (line 1006) | def blend_v(self, a: torch.Tensor, b: torch.Tensor, blend_extent: int)... method blend_h (line 1014) | def blend_h(self, a: torch.Tensor, b: torch.Tensor, blend_extent: int)... method tiled_encode (line 1022) | def tiled_encode(self, x: torch.Tensor) -> AutoencoderKLOutput: method tiled_decode (line 1088) | def tiled_decode(self, z: torch.Tensor, return_dict: bool = True) -> U... method forward (line 1151) | def forward( FILE: python/sglang/multimodal_gen/runtime/models/vaes/common.py class ParallelTiledVAE (line 24) | class ParallelTiledVAE(ABC, nn.Module): method __init__ (line 36) | def __init__(self, config: VAEConfig, **kwargs) -> None: method device (line 51) | def device(self): method temporal_compression_ratio (line 55) | def temporal_compression_ratio(self) -> int: method spatial_compression_ratio (line 59) | def spatial_compression_ratio(self) -> int: method scaling_factor (line 63) | def scaling_factor(self) -> float | torch.Tensor: method _encode (line 67) | def _encode(self, *args, **kwargs) -> torch.Tensor: method _decode (line 71) | def _decode(self, *args, **kwargs) -> torch.Tensor: method encode (line 74) | def encode(self, x: torch.Tensor) -> DiagonalGaussianDistribution: method decode (line 92) | def decode(self, z: torch.Tensor) -> torch.Tensor: method blend_v (line 121) | def blend_v( method blend_h (line 131) | def blend_h( method blend_t (line 141) | def blend_t( method spatial_tiled_encode (line 151) | def spatial_tiled_encode(self, x: torch.Tensor) -> torch.Tensor: method _parallel_data_generator (line 206) | def _parallel_data_generator( method parallel_tiled_decode (line 223) | def parallel_tiled_decode(self, z: torch.FloatTensor) -> torch.FloatTe... method _merge_spatial_tiles (line 369) | def _merge_spatial_tiles( method spatial_tiled_decode (line 385) | def spatial_tiled_decode(self, z: torch.Tensor) -> torch.Tensor: method tiled_encode (line 441) | def tiled_encode(self, x: torch.Tensor) -> torch.Tensor: method tiled_decode (line 472) | def tiled_decode(self, z: torch.Tensor) -> torch.Tensor: method enable_tiling (line 516) | def enable_tiling( method disable_tiling (line 579) | def disable_tiling(self) -> None: class DiagonalGaussianDistribution (line 588) | class DiagonalGaussianDistribution: method __init__ (line 590) | def __init__(self, parameters: torch.Tensor, deterministic: bool = Fal... method sample (line 602) | def sample(self, generator: torch.Generator | None = None) -> torch.Te... method kl (line 613) | def kl( method nll (line 636) | def nll( method mode (line 647) | def mode(self) -> torch.Tensor: FILE: python/sglang/multimodal_gen/runtime/models/vaes/dac.py function snake (line 22) | def snake(x, alpha): class Snake1d (line 30) | class Snake1d(nn.Module): method __init__ (line 31) | def __init__(self, channels): method forward (line 35) | def forward(self, x): class VectorQuantize (line 39) | class VectorQuantize(nn.Module): method __init__ (line 51) | def __init__(self, input_dim: int, codebook_size: int, codebook_dim: i... method forward (line 60) | def forward(self, z): method embed_code (line 91) | def embed_code(self, embed_id): method decode_code (line 94) | def decode_code(self, embed_id): method decode_latents (line 97) | def decode_latents(self, latents): class ResidualVectorQuantize (line 116) | class ResidualVectorQuantize(nn.Module): method __init__ (line 122) | def __init__( method forward (line 150) | def forward(self, z, n_quantizers: int = None): method from_codes (line 240) | def from_codes(self, codes: torch.Tensor): method from_latents (line 263) | def from_latents(self, latents: torch.Tensor): class ResidualUnit (line 292) | class ResidualUnit(nn.Module): method __init__ (line 293) | def __init__(self, dim: int = 16, dilation: int = 1): method forward (line 303) | def forward(self, x): class EncoderBlock (line 311) | class EncoderBlock(nn.Module): method __init__ (line 312) | def __init__(self, dim: int = 16, stride: int = 1): method forward (line 328) | def forward(self, x): class Encoder (line 332) | class Encoder(nn.Module): method __init__ (line 333) | def __init__( method forward (line 358) | def forward(self, x): class DecoderBlock (line 362) | class DecoderBlock(nn.Module): method __init__ (line 363) | def __init__(self, input_dim: int = 16, output_dim: int = 8, stride: i... method forward (line 380) | def forward(self, x): class Decoder (line 384) | class Decoder(nn.Module): method __init__ (line 385) | def __init__( method forward (line 412) | def forward(self, x): class DAC (line 416) | class DAC(nn.Module): method __init__ (line 417) | def __init__( method init_weights (line 466) | def init_weights(m): method dtype (line 472) | def dtype(self): method device (line 476) | def device(self): method preprocess (line 479) | def preprocess(self, audio_data, sample_rate): method encode (line 490) | def encode( method decode (line 535) | def decode(self, z: torch.Tensor): method forward (line 565) | def forward( FILE: python/sglang/multimodal_gen/runtime/models/vaes/hunyuan3d_vae.py class CrossAttentionProcessor (line 23) | class CrossAttentionProcessor: method __call__ (line 24) | def __call__(self, attn, q, k, v): class FlashVDMCrossAttentionProcessor (line 29) | class FlashVDMCrossAttentionProcessor: method __init__ (line 30) | def __init__(self, topk=None): method __call__ (line 33) | def __call__(self, attn, q, k, v): method select_topkv (line 67) | def select_topkv(self, q_chunk, k, v, topk): class FlashVDMTopMCrossAttentionProcessor (line 78) | class FlashVDMTopMCrossAttentionProcessor(FlashVDMCrossAttentionProcessor): method select_topkv (line 79) | def select_topkv(self, q_chunk, k, v, topk): class FourierEmbedder (line 98) | class FourierEmbedder(nn.Module): method __init__ (line 99) | def __init__( method get_dims (line 127) | def get_dims(self, input_dim): method forward (line 133) | def forward(self, x: torch.Tensor) -> torch.Tensor: class DropPath (line 148) | class DropPath(nn.Module): method __init__ (line 151) | def __init__(self, drop_prob: float = 0.0, scale_by_keep: bool = True): method forward (line 156) | def forward(self, x): method extra_repr (line 169) | def extra_repr(self): class MLP (line 173) | class MLP(nn.Module): method __init__ (line 174) | def __init__( method forward (line 193) | def forward(self, x): class QKVMultiheadCrossAttention (line 197) | class QKVMultiheadCrossAttention(nn.Module): method __init__ (line 198) | def __init__( method forward (line 223) | def forward(self, q, kv): class MultiheadCrossAttention (line 241) | class MultiheadCrossAttention(nn.Module): method __init__ (line 242) | def __init__( method forward (line 272) | def forward(self, x, data): class ResidualCrossAttentionBlock (line 288) | class ResidualCrossAttentionBlock(nn.Module): method __init__ (line 289) | def __init__( method forward (line 320) | def forward(self, x: torch.Tensor, data: torch.Tensor): class QKVMultiheadAttention (line 326) | class QKVMultiheadAttention(nn.Module): method __init__ (line 327) | def __init__( method forward (line 350) | def forward(self, qkv): class MultiheadAttention (line 368) | class MultiheadAttention(nn.Module): method __init__ (line 369) | def __init__( method forward (line 397) | def forward(self, x): class ResidualAttentionBlock (line 404) | class ResidualAttentionBlock(nn.Module): method __init__ (line 405) | def __init__( method forward (line 430) | def forward(self, x: torch.Tensor): class Transformer (line 436) | class Transformer(nn.Module): method __init__ (line 437) | def __init__( method forward (line 468) | def forward(self, x: torch.Tensor): class CrossAttentionDecoder (line 474) | class CrossAttentionDecoder(nn.Module): method __init__ (line 476) | def __init__( method set_cross_attention_processor (line 515) | def set_cross_attention_processor(self, processor): method forward (line 518) | def forward(self, queries=None, query_embeddings=None, latents=None): function generate_dense_grid_points (line 535) | def generate_dense_grid_points( function extract_near_surface_volume_fn (line 554) | def extract_near_surface_volume_fn(input_tensor: torch.Tensor, alpha: fl... class VanillaVolumeDecoder (line 620) | class VanillaVolumeDecoder: method __call__ (line 624) | def __call__( class HierarchicalVolumeDecoding (line 672) | class HierarchicalVolumeDecoding: method __call__ (line 676) | def __call__( class FlashVDMVolumeDecoding (line 800) | class FlashVDMVolumeDecoding: method __init__ (line 803) | def __init__(self, topk_mode="mean"): method __call__ (line 813) | def __call__( class Latent2MeshOutput (line 1004) | class Latent2MeshOutput: method __init__ (line 1007) | def __init__(self, mesh_v=None, mesh_f=None): function center_vertices (line 1012) | def center_vertices(vertices): class SurfaceExtractor (line 1020) | class SurfaceExtractor: method _compute_box_stat (line 1023) | def _compute_box_stat( method run (line 1038) | def run(self, *args, **kwargs): method __call__ (line 1041) | def __call__(self, grid_logits, **kwargs): class MCSurfaceExtractor (line 1057) | class MCSurfaceExtractor(SurfaceExtractor): method run (line 1060) | def run(self, grid_logit, *, mc_level, bounds, octree_resolution, **kw... class DMCSurfaceExtractor (line 1073) | class DMCSurfaceExtractor(SurfaceExtractor): method run (line 1076) | def run(self, grid_logit, *, octree_resolution, **kwargs): class VectsetVAE (line 1102) | class VectsetVAE(nn.Module): method __init__ (line 1105) | def __init__(self, volume_decoder=None, surface_extractor=None): method latents2mesh (line 1114) | def latents2mesh(self, latents: torch.FloatTensor, **kwargs): method enable_flashvdm_decoder (line 1120) | def enable_flashvdm_decoder( class ShapeVAE (line 1143) | class ShapeVAE(VectsetVAE): method __init__ (line 1148) | def __init__( method forward (line 1211) | def forward(self, latents): method decode (line 1216) | def decode(self, latents): FILE: python/sglang/multimodal_gen/runtime/models/vaes/hunyuanvae.py function prepare_causal_attention_mask (line 30) | def prepare_causal_attention_mask( class HunyuanVAEAttention (line 47) | class HunyuanVAEAttention(nn.Module): method __init__ (line 49) | def __init__( method forward (line 73) | def forward( class HunyuanVideoCausalConv3d (line 114) | class HunyuanVideoCausalConv3d(nn.Module): method __init__ (line 116) | def __init__( method forward (line 149) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class HunyuanVideoUpsampleCausal3D (line 156) | class HunyuanVideoUpsampleCausal3D(nn.Module): method __init__ (line 158) | def __init__( method forward (line 176) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class HunyuanVideoDownsampleCausal3D (line 205) | class HunyuanVideoDownsampleCausal3D(nn.Module): method __init__ (line 207) | def __init__( method forward (line 223) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class HunyuanVideoResnetBlockCausal3D (line 228) | class HunyuanVideoResnetBlockCausal3D(nn.Module): method __init__ (line 230) | def __init__( method forward (line 257) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class HunyuanVideoMidBlock3D (line 277) | class HunyuanVideoMidBlock3D(nn.Module): method __init__ (line 279) | def __init__( method forward (line 340) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class HunyuanVideoDownBlock3D (line 392) | class HunyuanVideoDownBlock3D(nn.Module): method __init__ (line 394) | def __init__( method forward (line 441) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class HunyuanVideoUpBlock3D (line 456) | class HunyuanVideoUpBlock3D(nn.Module): method __init__ (line 458) | def __init__( method forward (line 504) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class HunyuanVideoEncoder3D (line 520) | class HunyuanVideoEncoder3D(nn.Module): method __init__ (line 525) | def __init__( method forward (line 616) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class HunyuanVideoDecoder3D (line 641) | class HunyuanVideoDecoder3D(nn.Module): method __init__ (line 646) | def __init__( method forward (line 737) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class AutoencoderKLHunyuanVideo (line 763) | class AutoencoderKLHunyuanVideo(ParallelTiledVAE): method __init__ (line 774) | def __init__( method _encode (line 820) | def _encode(self, x: torch.Tensor) -> torch.Tensor: method _decode (line 825) | def _decode(self, z: torch.Tensor) -> torch.Tensor: method forward (line 830) | def forward( FILE: python/sglang/multimodal_gen/runtime/models/vaes/ltx_2_audio.py class LTX2AudioCausalConv2d (line 18) | class LTX2AudioCausalConv2d(nn.Module): method __init__ (line 23) | def __init__( method forward (line 66) | def forward(self, x: torch.Tensor) -> torch.Tensor: class LTX2AudioPixelNorm (line 71) | class LTX2AudioPixelNorm(nn.Module): method __init__ (line 76) | def __init__(self, dim: int = 1, eps: float = 1e-8) -> None: method forward (line 81) | def forward(self, x: torch.Tensor) -> torch.Tensor: class LTX2AudioAttnBlock (line 87) | class LTX2AudioAttnBlock(nn.Module): method __init__ (line 88) | def __init__( method forward (line 111) | def forward(self, x: torch.Tensor) -> torch.Tensor: class LTX2AudioResnetBlock (line 131) | class LTX2AudioResnetBlock(nn.Module): method __init__ (line 132) | def __init__( method forward (line 228) | def forward( class LTX2AudioDownsample (line 253) | class LTX2AudioDownsample(nn.Module): method __init__ (line 254) | def __init__( method forward (line 269) | def forward(self, x: torch.Tensor) -> torch.Tensor: class LTX2AudioUpsample (line 294) | class LTX2AudioUpsample(nn.Module): method __init__ (line 295) | def __init__( method forward (line 318) | def forward(self, x: torch.Tensor) -> torch.Tensor: class LTX2AudioAudioPatchifier (line 336) | class LTX2AudioAudioPatchifier: method __init__ (line 341) | def __init__( method patchify (line 355) | def patchify(self, audio_latents: torch.Tensor) -> torch.Tensor: method unpatchify (line 359) | def unpatchify( method patch_size (line 366) | def patch_size(self) -> Tuple[int, int, int]: class LTX2AudioEncoder (line 370) | class LTX2AudioEncoder(nn.Module): method __init__ (line 371) | def __init__( method forward (line 513) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class LTX2AudioDecoder (line 538) | class LTX2AudioDecoder(nn.Module): method __init__ (line 546) | def __init__( method forward (line 699) | def forward( class AutoencoderKLLTX2Audio (line 764) | class AutoencoderKLLTX2Audio(ParallelTiledVAE): method __init__ (line 771) | def __init__( method _encode (line 855) | def _encode(self, x: torch.Tensor) -> torch.Tensor: method encode (line 858) | def encode(self, x: torch.Tensor, return_dict: bool = True): method _decode (line 870) | def _decode(self, z: torch.Tensor) -> torch.Tensor: method decode (line 873) | def decode( method forward (line 887) | def forward( FILE: python/sglang/multimodal_gen/runtime/models/vaes/ltx_2_vae.py class PerChannelRMSNorm (line 17) | class PerChannelRMSNorm(nn.Module): method __init__ (line 27) | def __init__(self, channel_dim: int = 1, eps: float = 1e-8) -> None: method forward (line 37) | def forward( class LTX2VideoCausalConv3d (line 52) | class LTX2VideoCausalConv3d(nn.Module): method __init__ (line 53) | def __init__( method forward (line 90) | def forward(self, hidden_states: torch.Tensor, causal: bool = True) ->... class LTX2VideoResnetBlock3d (line 115) | class LTX2VideoResnetBlock3d(nn.Module): method __init__ (line 136) | def __init__( method forward (line 197) | def forward( class LTXVideoDownsampler3d (line 265) | class LTXVideoDownsampler3d(nn.Module): method __init__ (line 266) | def __init__( method forward (line 292) | def forward(self, hidden_states: torch.Tensor, causal: bool = True) ->... class LTXVideoUpsampler3d (line 319) | class LTXVideoUpsampler3d(nn.Module): method __init__ (line 320) | def __init__( method forward (line 346) | def forward(self, hidden_states: torch.Tensor, causal: bool = True) ->... class LTX2VideoDownBlock3D (line 398) | class LTX2VideoDownBlock3D(nn.Module): method __init__ (line 424) | def __init__( method forward (line 498) | def forward( class LTX2VideoMidBlock3d (line 524) | class LTX2VideoMidBlock3d(nn.Module): method __init__ (line 545) | def __init__( method forward (line 582) | def forward( class LTX2VideoUpBlock3d (line 613) | class LTX2VideoUpBlock3d(nn.Module): method __init__ (line 639) | def __init__( method forward (line 709) | def forward( class LTX2VideoEncoder3d (line 746) | class LTX2VideoEncoder3d(nn.Module): method __init__ (line 777) | def __init__( method forward (line 862) | def forward( class LTX2VideoDecoder3d (line 917) | class LTX2VideoDecoder3d(nn.Module): method __init__ (line 945) | def __init__( method forward (line 1044) | def forward( class AutoencoderKLLTX2Video (line 1107) | class AutoencoderKLLTX2Video(ParallelTiledVAE): method __init__ (line 1117) | def __init__(self, config: LTXVideoVAEConfig): method enable_tiling (line 1204) | def enable_tiling( method _encode (line 1248) | def _encode(self, x: torch.Tensor, causal: Optional[bool] = None) -> t... method encode (line 1263) | def encode( method _decode (line 1291) | def _decode( method decode (line 1326) | def decode( method blend_v (line 1366) | def blend_v( method blend_h (line 1376) | def blend_h( method blend_t (line 1386) | def blend_t( method tiled_encode (line 1396) | def tiled_encode( method tiled_decode (line 1466) | def tiled_decode( method _temporal_tiled_encode (line 1556) | def _temporal_tiled_encode( method _temporal_tiled_decode (line 1595) | def _temporal_tiled_decode( method forward (line 1654) | def forward( FILE: python/sglang/multimodal_gen/runtime/models/vaes/parallel/wan_common_utils.py class AvgDown3D (line 10) | class AvgDown3D(nn.Module): method __init__ (line 11) | def __init__( method forward (line 28) | def forward(self, x: torch.Tensor) -> torch.Tensor: class DupUp3D (line 63) | class DupUp3D(nn.Module): method __init__ (line 64) | def __init__( method forward (line 82) | def forward(self, x: torch.Tensor) -> torch.Tensor: class WanCausalConv3d (line 109) | class WanCausalConv3d(nn.Conv3d): method __init__ (line 117) | def __init__( method forward (line 144) | def forward(self, x, cache_x=None): class WanRMS_norm (line 157) | class WanRMS_norm(nn.Module): method __init__ (line 162) | def __init__( method forward (line 178) | def forward(self, x): class WanUpsample (line 187) | class WanUpsample(nn.Upsample): method forward (line 192) | def forward(self, x): function bind_context (line 203) | def bind_context( function _ensure_bound (line 222) | def _ensure_bound(): function resample_forward (line 233) | def resample_forward(self, x): function residual_block_forward (line 315) | def residual_block_forward(self, x): function attention_block_forward (line 379) | def attention_block_forward(self, x): function mid_block_forward (line 411) | def mid_block_forward(self, x): function residual_down_block_forward (line 425) | def residual_down_block_forward(self, x): function residual_up_block_forward (line 435) | def residual_up_block_forward(self, x): function up_block_forward (line 451) | def up_block_forward(self, x): FILE: python/sglang/multimodal_gen/runtime/models/vaes/parallel/wan_dist_utils.py function tensor_pad (line 31) | def tensor_pad(x: torch.Tensor, len_to_pad: int, dim: int = -2): function tensor_chunk (line 48) | def tensor_chunk(x: torch.Tensor, dim: int = -2, world_size: int = 1, ra... function split_for_parallel_encode (line 61) | def split_for_parallel_encode( function ensure_local_height (line 75) | def ensure_local_height(x: torch.Tensor, expected_local_height: int | No... function split_for_parallel_decode (line 86) | def split_for_parallel_decode( function gather_and_trim_height (line 94) | def gather_and_trim_height(x: torch.Tensor, expected_height: int | None): function _ensure_recv_buf (line 103) | def _ensure_recv_buf( function halo_exchange (line 116) | def halo_exchange( class WanDistConv2d (line 168) | class WanDistConv2d(nn.Conv2d): method __init__ (line 169) | def __init__( method forward (line 208) | def forward(self, x): class WanDistCausalConv3d (line 250) | class WanDistCausalConv3d(nn.Conv3d): method __init__ (line 251) | def __init__( method forward (line 297) | def forward(self, x, cache_x=None): class WanDistZeroPad2d (line 349) | class WanDistZeroPad2d(nn.Module): method __init__ (line 352) | def __init__(self, padding: tuple[int, int, int, int]) -> None: method forward (line 358) | def forward(self, x: torch.Tensor) -> torch.Tensor: class WanDistResample (line 368) | class WanDistResample(nn.Module): method __init__ (line 382) | def __init__(self, dim: int, mode: str, upsample_out_dim: int = None) ... method forward (line 422) | def forward(self, x): class WanDistResidualBlock (line 426) | class WanDistResidualBlock(nn.Module): method __init__ (line 437) | def __init__( method forward (line 461) | def forward(self, x): class WanDistAttentionBlock (line 465) | class WanDistAttentionBlock(nn.Module): method __init__ (line 473) | def __init__(self, dim) -> None: method forward (line 485) | def forward(self, x): class WanDistMidBlock (line 496) | class WanDistMidBlock(nn.Module): method __init__ (line 506) | def __init__( method forward (line 527) | def forward(self, x): class WanDistResidualDownBlock (line 531) | class WanDistResidualDownBlock(nn.Module): method __init__ (line 532) | def __init__( method forward (line 565) | def forward(self, x): class WanDistResidualUpBlock (line 569) | class WanDistResidualUpBlock(nn.Module): method __init__ (line 582) | def __init__( method forward (line 628) | def forward(self, x): class WanDistUpBlock (line 632) | class WanDistUpBlock(nn.Module): method __init__ (line 645) | def __init__( method forward (line 679) | def forward(self, x): FILE: python/sglang/multimodal_gen/runtime/models/vaes/wanvae.py function forward_context (line 78) | def forward_context( class WanResample (line 94) | class WanResample(nn.Module): method __init__ (line 108) | def __init__(self, dim: int, mode: str, upsample_out_dim: int = None) ... method forward (line 145) | def forward(self, x): class WanResidualBlock (line 149) | class WanResidualBlock(nn.Module): method __init__ (line 160) | def __init__( method forward (line 182) | def forward(self, x): class WanAttentionBlock (line 186) | class WanAttentionBlock(nn.Module): method __init__ (line 194) | def __init__(self, dim) -> None: method forward (line 203) | def forward(self, x): class WanMidBlock (line 207) | class WanMidBlock(nn.Module): method __init__ (line 217) | def __init__( method forward (line 238) | def forward(self, x): class WanResidualDownBlock (line 242) | class WanResidualDownBlock(nn.Module): method __init__ (line 244) | def __init__( method forward (line 277) | def forward(self, x): class WanEncoder3d (line 281) | class WanEncoder3d(nn.Module): method __init__ (line 296) | def __init__( method forward (line 392) | def forward(self, x): class WanResidualUpBlock (line 467) | class WanResidualUpBlock(nn.Module): method __init__ (line 480) | def __init__( method forward (line 526) | def forward(self, x): class WanUpBlock (line 530) | class WanUpBlock(nn.Module): method __init__ (line 543) | def __init__( method forward (line 575) | def forward(self, x): class WanDecoder3d (line 579) | class WanDecoder3d(nn.Module): method __init__ (line 594) | def __init__( method forward (line 698) | def forward(self, x): function patchify (line 769) | def patchify(x, patch_size): function unpatchify (line 788) | def unpatchify(x, patch_size): class AutoencoderKLWan (line 805) | class AutoencoderKLWan(ParallelTiledVAE): method __init__ (line 813) | def __init__( method clear_cache (line 867) | def clear_cache(self) -> None: method encode (line 886) | def encode(self, x: torch.Tensor) -> torch.Tensor: method _encode (line 918) | def _encode(self, x: torch.Tensor, first_frame=False) -> torch.Tensor: method tiled_encode (line 926) | def tiled_encode(self, x: torch.Tensor) -> torch.Tensor: method spatial_tiled_encode (line 935) | def spatial_tiled_encode(self, x: torch.Tensor) -> torch.Tensor: method decode (line 944) | def decode(self, z: torch.Tensor) -> torch.Tensor: method _decode (line 973) | def _decode(self, z: torch.Tensor, first_frame=False) -> torch.Tensor: method tiled_decode (line 982) | def tiled_decode(self, z: torch.Tensor) -> torch.Tensor: method spatial_tiled_decode (line 989) | def spatial_tiled_decode(self, z: torch.Tensor) -> torch.Tensor: method parallel_tiled_decode (line 995) | def parallel_tiled_decode(self, z: torch.FloatTensor) -> torch.FloatTe... method forward (line 1002) | def forward( FILE: python/sglang/multimodal_gen/runtime/models/vision_utils.py function pil_to_numpy (line 36) | def pil_to_numpy(images: list[PIL.Image.Image] | PIL.Image.Image) -> np.... function numpy_to_pt (line 56) | def numpy_to_pt(images: np.ndarray) -> torch.Tensor: function normalize (line 75) | def normalize(images: np.ndarray | torch.Tensor) -> np.ndarray | torch.T... function load_image (line 91) | def load_image( function load_video (line 132) | def load_video( function get_default_height_width (line 210) | def get_default_height_width( function resize (line 258) | def resize( FILE: python/sglang/multimodal_gen/runtime/models/vocoder/ltx_2_vocoder.py class ResBlock (line 12) | class ResBlock(nn.Module): method __init__ (line 13) | def __init__( method forward (line 54) | def forward(self, x: torch.Tensor) -> torch.Tensor: class LTX2Vocoder (line 64) | class LTX2Vocoder(ABC, nn.Module): method __init__ (line 69) | def __init__( method forward (line 142) | def forward( FILE: python/sglang/multimodal_gen/runtime/pipelines/comfyui_flux_pipeline.py class ComfyUIFluxPipeline (line 31) | class ComfyUIFluxPipeline(LoRAPipeline, ComposedPipelineBase): method initialize_pipeline (line 62) | def initialize_pipeline(self, server_args: ServerArgs): method load_modules (line 82) | def load_modules( method _load_and_convert_weights_from_safetensors (line 106) | def _load_and_convert_weights_from_safetensors( method _convert_comfyui_weights (line 282) | def _convert_comfyui_weights( method _load_transformer_from_safetensors (line 403) | def _load_transformer_from_safetensors( method create_pipeline_stages (line 664) | def create_pipeline_stages(self, server_args: ServerArgs): FILE: python/sglang/multimodal_gen/runtime/pipelines/comfyui_qwen_image_pipeline.py class ComfyUIQwenImagePipelineBase (line 43) | class ComfyUIQwenImagePipelineBase(LoRAPipeline, ComposedPipelineBase): method initialize_pipeline (line 69) | def initialize_pipeline(self, server_args: ServerArgs): method load_modules (line 86) | def load_modules( method _load_transformer_from_safetensors (line 110) | def _load_transformer_from_safetensors( method _prepare_dit_config_and_mapping (line 149) | def _prepare_dit_config_and_mapping(self, server_args: ServerArgs): method _instantiate_model (line 198) | def _instantiate_model( method _load_weights_into_model (line 258) | def _load_weights_into_model( method create_pipeline_stages (line 291) | def create_pipeline_stages(self, server_args: ServerArgs): class ComfyUIQwenImagePipeline (line 314) | class ComfyUIQwenImagePipeline(ComfyUIQwenImagePipelineBase): class ComfyUIQwenImageEditPipeline (line 329) | class ComfyUIQwenImageEditPipeline(ComfyUIQwenImagePipelineBase): FILE: python/sglang/multimodal_gen/runtime/pipelines/comfyui_zimage_pipeline.py class ComfyUIZImagePipeline (line 46) | class ComfyUIZImagePipeline(LoRAPipeline, ComposedPipelineBase): method initialize_pipeline (line 77) | def initialize_pipeline(self, server_args: ServerArgs): method load_modules (line 99) | def load_modules( method _convert_comfyui_qkv_weights (line 123) | def _convert_comfyui_qkv_weights( method _load_transformer_from_safetensors (line 193) | def _load_transformer_from_safetensors( method create_pipeline_stages (line 379) | def create_pipeline_stages(self, server_args: ServerArgs): FILE: python/sglang/multimodal_gen/runtime/pipelines/diffusers_pipeline.py class DiffusersExecutionStage (line 47) | class DiffusersExecutionStage(PipelineStage): method __init__ (line 50) | def __init__(self, diffusers_pipe: DiffusionPipeline): method forward (line 54) | def forward(self, batch: Req, server_args: ServerArgs) -> Req: method _filter_pipeline_kwargs (line 84) | def _filter_pipeline_kwargs( method _extract_output (line 130) | def _extract_output(self, output: Any) -> torch.Tensor | None: method _convert_to_tensor (line 150) | def _convert_to_tensor(self, data: Any) -> torch.Tensor | None: method _convert_list_to_tensor (line 174) | def _convert_list_to_tensor(self, data: list) -> torch.Tensor | None: method _postprocess_output (line 209) | def _postprocess_output(self, output: torch.Tensor) -> torch.Tensor: method _fix_output_shape (line 231) | def _fix_output_shape(self, output: torch.Tensor) -> torch.Tensor: method _build_pipeline_kwargs (line 259) | def _build_pipeline_kwargs(self, batch: Req) -> dict[str, Any]: method _get_generator_device (line 310) | def _get_generator_device(self, batch: Req) -> str: method _load_input_image (line 321) | def _load_input_image(self, batch: Req) -> Image.Image | None: class DiffusersPipeline (line 350) | class DiffusersPipeline(ComposedPipelineBase): method __init__ (line 362) | def __init__( method _load_diffusers_pipeline (line 384) | def _load_diffusers_pipeline( method _apply_vae_optimizations (line 481) | def _apply_vae_optimizations( method _apply_attention_backend (line 517) | def _apply_attention_backend( method _apply_cache_dit (line 565) | def _apply_cache_dit( method _apply_torch_compile (line 606) | def _apply_torch_compile(self, pipe: Any, server_args: ServerArgs) -> ... method _get_dtype (line 657) | def _get_dtype(self, server_args: ServerArgs) -> torch.dtype: method _detect_pipeline_type (line 674) | def _detect_pipeline_type(self) -> None: method load_modules (line 684) | def load_modules( method create_pipeline_stages (line 692) | def create_pipeline_stages(self, server_args: ServerArgs) -> None: method initialize_pipeline (line 699) | def initialize_pipeline(self, server_args: ServerArgs) -> None: method post_init (line 702) | def post_init(self) -> None: method add_stage (line 710) | def add_stage(self, stage_name: str, stage: PipelineStage) -> None: method stages (line 722) | def stages(self) -> list[PipelineStage]: method forward (line 727) | def forward(self, batch: Req, server_args: ServerArgs) -> Req: method from_pretrained (line 734) | def from_pretrained( method get_module (line 758) | def get_module(self, module_name: str, default_value: Any = None) -> Any: FILE: python/sglang/multimodal_gen/runtime/pipelines/flux.py function calculate_shift (line 22) | def calculate_shift( function prepare_mu (line 35) | def prepare_mu(batch: Req, server_args: ServerArgs): class FluxPipeline (line 54) | class FluxPipeline(LoRAPipeline, ComposedPipelineBase): method create_pipeline_stages (line 67) | def create_pipeline_stages(self, server_args: ServerArgs): FILE: python/sglang/multimodal_gen/runtime/pipelines/flux_2.py function compute_empirical_mu (line 16) | def compute_empirical_mu(batch: Req, server_args: ServerArgs): class Flux2Pipeline (line 36) | class Flux2Pipeline(LoRAPipeline, ComposedPipelineBase): method create_pipeline_stages (line 47) | def create_pipeline_stages(self, server_args: ServerArgs): FILE: python/sglang/multimodal_gen/runtime/pipelines/flux_2_klein.py class Flux2KleinPipeline (line 4) | class Flux2KleinPipeline(Flux2Pipeline): FILE: python/sglang/multimodal_gen/runtime/pipelines/glm_image.py class GlmImagePipeline (line 15) | class GlmImagePipeline(LoRAPipeline, ComposedPipelineBase): method create_pipeline_stages (line 28) | def create_pipeline_stages(self, server_args: ServerArgs): FILE: python/sglang/multimodal_gen/runtime/pipelines/helios_pipeline.py class HeliosPipeline (line 28) | class HeliosPipeline(LoRAPipeline, ComposedPipelineBase): method initialize_pipeline (line 46) | def initialize_pipeline(self, server_args: ServerArgs): method create_pipeline_stages (line 62) | def create_pipeline_stages(self, server_args: ServerArgs) -> None: class HeliosPyramidPipeline (line 83) | class HeliosPyramidPipeline(HeliosPipeline): FILE: python/sglang/multimodal_gen/runtime/pipelines/hunyuan3d_pipeline.py class Hunyuan3D2Pipeline (line 45) | class Hunyuan3D2Pipeline(ComposedPipelineBase): method _load_config (line 61) | def _load_config(self) -> dict[str, Any]: method _resolve_class (line 74) | def _resolve_class(target: str) -> Any: method _resolve_shape_dir (line 103) | def _resolve_shape_dir( method _resolve_paint_dir (line 154) | def _resolve_paint_dir(model_path: str, subfolder: str) -> str: method _load_and_split_checkpoint (line 185) | def _load_and_split_checkpoint( method _load_dit_model (line 204) | def _load_dit_model( method _load_simple_component (line 247) | def _load_simple_component( method _instantiate_component (line 270) | def _instantiate_component(cls, cfg: dict[str, Any]) -> Any: method load_modules (line 279) | def load_modules( method initialize_pipeline (line 349) | def initialize_pipeline(self, server_args: ServerArgs): method create_pipeline_stages (line 357) | def create_pipeline_stages(self, server_args: ServerArgs): FILE: python/sglang/multimodal_gen/runtime/pipelines/hunyuan_pipeline.py class HunyuanVideoPipeline (line 26) | class HunyuanVideoPipeline(ComposedPipelineBase): method create_pipeline_stages (line 40) | def create_pipeline_stages(self, server_args: ServerArgs): FILE: python/sglang/multimodal_gen/runtime/pipelines/ltx_2_pipeline.py function calculate_shift (line 28) | def calculate_shift( function prepare_mu (line 41) | def prepare_mu(batch: Req, server_args: ServerArgs): function _load_component_config (line 69) | def _load_component_config(model_path: str, component_name: str): function _filter_kwargs_for_cls (line 98) | def _filter_kwargs_for_cls(cls, kwargs): class LTX2FlowMatchScheduler (line 104) | class LTX2FlowMatchScheduler(FlowMatchEulerDiscreteScheduler): method _time_shift_exponential (line 107) | def _time_shift_exponential(self, mu, sigma, t): class LTX2Pipeline (line 115) | class LTX2Pipeline(ComposedPipelineBase): method initialize_pipeline (line 130) | def initialize_pipeline(self, server_args: ServerArgs): method create_pipeline_stages (line 134) | def create_pipeline_stages(self, server_args: ServerArgs): FILE: python/sglang/multimodal_gen/runtime/pipelines/mova_pipeline.py class MOVAPipeline (line 29) | class MOVAPipeline(ComposedPipelineBase): method initialize_pipeline (line 48) | def initialize_pipeline(self, server_args: ServerArgs) -> None: method create_pipeline_stages (line 62) | def create_pipeline_stages(self, server_args: ServerArgs) -> None: class MOVAPipelineAlias (line 101) | class MOVAPipelineAlias(MOVAPipeline): FILE: python/sglang/multimodal_gen/runtime/pipelines/qwen_image.py function calculate_shift (line 22) | def calculate_shift( function prepare_mu (line 35) | def prepare_mu(batch: Req, server_args: ServerArgs): class QwenImagePipeline (line 53) | class QwenImagePipeline(LoRAPipeline, ComposedPipelineBase): method create_pipeline_stages (line 64) | def create_pipeline_stages(self, server_args: ServerArgs): class QwenImageEditPipeline (line 68) | class QwenImageEditPipeline(LoRAPipeline, ComposedPipelineBase): method create_pipeline_stages (line 80) | def create_pipeline_stages(self, server_args: ServerArgs): class QwenImageEditPlusPipeline (line 95) | class QwenImageEditPlusPipeline(QwenImageEditPipeline): function prepare_mu_layered (line 99) | def prepare_mu_layered(batch: Req, server_args: ServerArgs): class QwenImageLayeredPipeline (line 105) | class QwenImageLayeredPipeline(QwenImageEditPipeline): method create_pipeline_stages (line 116) | def create_pipeline_stages(self, server_args: ServerArgs): FILE: python/sglang/multimodal_gen/runtime/pipelines/sana.py class SanaPipeline (line 28) | class SanaPipeline(LoRAPipeline, ComposedPipelineBase): method create_pipeline_stages (line 39) | def create_pipeline_stages(self, server_args: ServerArgs): FILE: python/sglang/multimodal_gen/runtime/pipelines/wan_causal_dmd_pipeline.py class WanCausalDMDPipeline (line 28) | class WanCausalDMDPipeline(LoRAPipeline, ComposedPipelineBase): method create_pipeline_stages (line 39) | def create_pipeline_stages(self, server_args: ServerArgs) -> None: FILE: python/sglang/multimodal_gen/runtime/pipelines/wan_dmd_pipeline.py class WanDMDPipeline (line 32) | class WanDMDPipeline(LoRAPipeline, ComposedPipelineBase): method initialize_pipeline (line 47) | def initialize_pipeline(self, server_args: ServerArgs): method create_pipeline_stages (line 53) | def create_pipeline_stages(self, server_args: ServerArgs) -> None: FILE: python/sglang/multimodal_gen/runtime/pipelines/wan_i2v_dmd_pipeline.py class WanImageToVideoDmdPipeline (line 25) | class WanImageToVideoDmdPipeline(LoRAPipeline, ComposedPipelineBase): method initialize_pipeline (line 38) | def initialize_pipeline(self, server_args: ServerArgs): method create_pipeline_stages (line 43) | def create_pipeline_stages(self, server_args: ServerArgs): FILE: python/sglang/multimodal_gen/runtime/pipelines/wan_i2v_pipeline.py class WanImageToVideoPipeline (line 24) | class WanImageToVideoPipeline(LoRAPipeline, ComposedPipelineBase): method initialize_pipeline (line 37) | def initialize_pipeline(self, server_args: ServerArgs): method create_pipeline_stages (line 42) | def create_pipeline_stages(self, server_args: ServerArgs): FILE: python/sglang/multimodal_gen/runtime/pipelines/wan_pipeline.py class WanPipeline (line 24) | class WanPipeline(LoRAPipeline, ComposedPipelineBase): method initialize_pipeline (line 39) | def initialize_pipeline(self, server_args: ServerArgs): method create_pipeline_stages (line 45) | def create_pipeline_stages(self, server_args: ServerArgs) -> None: FILE: python/sglang/multimodal_gen/runtime/pipelines/zimage_pipeline.py function calculate_shift (line 15) | def calculate_shift( function prepare_mu (line 28) | def prepare_mu(batch: Req, server_args: ServerArgs): class ZImagePipeline (line 46) | class ZImagePipeline(LoRAPipeline, ComposedPipelineBase): method create_pipeline_stages (line 57) | def create_pipeline_stages(self, server_args: ServerArgs): FILE: python/sglang/multimodal_gen/runtime/pipelines_core/__init__.py class PipelineWithLoRA (line 28) | class PipelineWithLoRA(LoRAPipeline, ComposedPipelineBase): function build_pipeline (line 34) | def build_pipeline( FILE: python/sglang/multimodal_gen/runtime/pipelines_core/composed_pipeline_base.py class ComposedPipelineBase (line 47) | class ComposedPipelineBase(ABC): method is_lora_effective (line 67) | def is_lora_effective(self): method is_lora_set (line 70) | def is_lora_set(self): method __init__ (line 73) | def __init__( method build_executor (line 106) | def build_executor(self, server_args: ServerArgs): method __post_init__ (line 115) | def __post_init__(self) -> None: method get_module (line 122) | def get_module(self, module_name: str, default_value: Any = None) -> Any: method add_module (line 125) | def add_module(self, module_name: str, module: Any): method _load_config (line 128) | def _load_config(self) -> dict[str, Any]: method required_config_modules (line 136) | def required_config_modules(self) -> list[str]: method stages (line 154) | def stages(self) -> list[PipelineStage]: method create_pipeline_stages (line 161) | def create_pipeline_stages(self, server_args: ServerArgs): method initialize_pipeline (line 167) | def initialize_pipeline(self, server_args: ServerArgs): method _resolve_component_path (line 173) | def _resolve_component_path( method load_modules (line 186) | def load_modules( method _infer_stage_name (line 335) | def _infer_stage_name(stage: PipelineStage) -> str: method add_stage (line 343) | def add_stage( method add_stages (line 358) | def add_stages( method add_stage_if (line 370) | def add_stage_if( method get_stage (line 380) | def get_stage(self, stage_name: str) -> PipelineStage | None: method add_standard_text_encoding_stage (line 384) | def add_standard_text_encoding_stage( method add_standard_timestep_preparation_stage (line 396) | def add_standard_timestep_preparation_stage( method add_standard_latent_preparation_stage (line 408) | def add_standard_latent_preparation_stage( method add_standard_denoising_stage (line 420) | def add_standard_denoising_stage( method add_standard_decoding_stage (line 446) | def add_standard_decoding_stage( method add_standard_t2i_stages (line 459) | def add_standard_t2i_stages( method add_standard_ti2i_stages (line 479) | def add_standard_ti2i_stages( method add_standard_ti2v_stages (line 529) | def add_standard_ti2v_stages( method forward (line 600) | def forward( FILE: python/sglang/multimodal_gen/runtime/pipelines_core/executors/parallel_executor.py class ParallelExecutor (line 29) | class ParallelExecutor(PipelineExecutor): method collect_from_main (line 35) | def collect_from_main(self, batches: list[Req]): method _execute (line 55) | def _execute( method execute (line 95) | def execute( FILE: python/sglang/multimodal_gen/runtime/pipelines_core/executors/pipeline_executor.py class Timer (line 26) | class Timer(StageProfiler): method __init__ (line 32) | def __init__(self, name="Stage"): class PipelineExecutor (line 38) | class PipelineExecutor(ABC): method __init__ (line 46) | def __init__(self, server_args): method execute_with_profiling (line 49) | def execute_with_profiling( method execute (line 62) | def execute( method profile_execution (line 82) | def profile_execution(self, batch: Req, dump_rank: int = 0): FILE: python/sglang/multimodal_gen/runtime/pipelines_core/executors/sync_executor.py class SyncExecutor (line 19) | class SyncExecutor(PipelineExecutor): method run_profile_all_stages (line 24) | def run_profile_all_stages( method execute (line 40) | def execute( FILE: python/sglang/multimodal_gen/runtime/pipelines_core/lora_format_adapter.py class LoRAFormat (line 13) | class LoRAFormat(str, Enum): function _sample_keys (line 25) | def _sample_keys(keys: Iterable[str], k: int = 20) -> list[str]: function _has_substring_key (line 34) | def _has_substring_key(keys: Iterable[str], substr: str) -> bool: function _has_prefix_key (line 38) | def _has_prefix_key(keys: Iterable[str], prefix: str) -> bool: function _looks_like_xlabs_flux_key (line 42) | def _looks_like_xlabs_flux_key(k: str) -> bool: function _looks_like_kohya_flux (line 60) | def _looks_like_kohya_flux(state_dict: Mapping[str, torch.Tensor]) -> bool: function _looks_like_non_diffusers_sd (line 72) | def _looks_like_non_diffusers_sd(state_dict: Mapping[str, torch.Tensor])... function _looks_like_wan_lora (line 82) | def _looks_like_wan_lora(state_dict: Mapping[str, torch.Tensor]) -> bool: function _looks_like_qwen_image (line 98) | def _looks_like_qwen_image(state_dict: Mapping[str, torch.Tensor]) -> bool: function _looks_like_ai_toolkit_flux_lora (line 108) | def _looks_like_ai_toolkit_flux_lora(state_dict: Mapping[str, torch.Tens... function detect_lora_format_from_state_dict (line 134) | def detect_lora_format_from_state_dict( function _convert_qwen_image_standard (line 168) | def _convert_qwen_image_standard( function _convert_non_diffusers_sd_simple (line 191) | def _convert_non_diffusers_sd_simple( function _convert_with_diffusers_utils_if_available (line 221) | def _convert_with_diffusers_utils_if_available( function _convert_via_diffusers_candidates (line 253) | def _convert_via_diffusers_candidates( function _convert_xlabs_ai_via_diffusers (line 289) | def _convert_xlabs_ai_via_diffusers( function _convert_kohya_flux_via_diffusers (line 317) | def _convert_kohya_flux_via_diffusers( function _convert_ai_toolkit_flux_lora (line 341) | def _convert_ai_toolkit_flux_lora( function convert_lora_state_dict_by_format (line 490) | def convert_lora_state_dict_by_format( function normalize_lora_state_dict (line 539) | def normalize_lora_state_dict( FILE: python/sglang/multimodal_gen/runtime/pipelines_core/lora_pipeline.py class LoRAPipeline (line 37) | class LoRAPipeline(ComposedPipelineBase): method __init__ (line 70) | def __init__(self, *args, **kwargs) -> None: method is_target_layer (line 104) | def is_target_layer(self, module_name: str) -> bool: method _get_target_lora_layers (line 111) | def _get_target_lora_layers( method _temporarily_disable_offload (line 151) | def _temporarily_disable_offload( method convert_module_lora_layers (line 215) | def convert_module_lora_layers( method convert_to_lora_layers (line 258) | def convert_to_lora_layers(self) -> None: method _normalize_lora_params (line 303) | def _normalize_lora_params( method _check_lora_config_matches (line 357) | def _check_lora_config_matches( method _apply_lora_to_layers (line 392) | def _apply_lora_to_layers( method is_lora_effective (line 493) | def is_lora_effective(self, target: str = "all") -> bool: method is_lora_set (line 504) | def is_lora_set(self, target: str = "all") -> bool: method load_lora_adapter (line 517) | def load_lora_adapter(self, lora_path: str, lora_nickname: str, rank: ... method set_lora (line 586) | def set_lora( method merge_lora_weights (line 713) | def merge_lora_weights(self, target: str = "all", strength: float = 1.... method unmerge_lora_weights (line 767) | def unmerge_lora_weights(self, target: str = "all") -> None: method get_lora_status (line 817) | def get_lora_status(self) -> dict[str, Any]: FILE: python/sglang/multimodal_gen/runtime/pipelines_core/schedule_batch.py class Req (line 39) | class Req: method __init__ (line 160) | def __init__(self, **kwargs): method __getattr__ (line 175) | def __getattr__(self, name: str) -> Any: method __setattr__ (line 193) | def __setattr__(self, name: str, value: Any) -> None: method batch_size (line 226) | def batch_size(self): method output_file_path (line 239) | def output_file_path(self, num_outputs=1, output_idx=None): method set_as_warmup (line 249) | def set_as_warmup(self, warmup_steps: int = 1): method copy_as_warmup (line 256) | def copy_as_warmup(self, warmup_steps: int = 1) -> "Req": method validate (line 261) | def validate(self): method adjust_size (line 273) | def adjust_size(self, server_args: ServerArgs): method __str__ (line 276) | def __str__(self): method log (line 279) | def log(self, server_args: ServerArgs): class OutputBatch (line 323) | class OutputBatch: FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/base.py class StageParallelismType (line 28) | class StageParallelismType(Enum): class StageVerificationError (line 37) | class StageVerificationError(Exception): class PipelineStage (line 43) | class PipelineStage(ABC): method __init__ (line 52) | def __init__(self): method log_info (line 55) | def log_info(self, msg, *args): method log_warning (line 61) | def log_warning(self, msg, *args): method log_error (line 65) | def log_error(self, msg, *args): method log_debug (line 69) | def log_debug(self, msg, *args): method verify_input (line 73) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific... method maybe_free_model_hooks (line 91) | def maybe_free_model_hooks(self): method load_model (line 94) | def load_model(self): method offload_model (line 100) | def offload_model(self): method parallelism_type (line 108) | def parallelism_type(self) -> StageParallelismType: method verify_output (line 113) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi... method _run_verification (line 125) | def _run_verification( method device (line 154) | def device(self) -> torch.device: method set_logging (line 160) | def set_logging(self, enable: bool): method __call__ (line 169) | def __call__( method forward (line 216) | def forward( method backward (line 234) | def backward( FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/causal_denoising.py class CausalDMDDenoisingStage (line 26) | class CausalDMDDenoisingStage(DenoisingStage): method __init__ (line 31) | def __init__(self, transformer, scheduler) -> None: method forward (line 52) | def forward( method _initialize_kv_cache (line 391) | def _initialize_kv_cache(self, batch_size, dtype, device) -> None: method _initialize_crossattn_cache (line 437) | def _initialize_crossattn_cache( method verify_input (line 474) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific... FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/comfyui_latent_preparation.py class ComfyUILatentPreparationStage (line 26) | class ComfyUILatentPreparationStage(LatentPreparationStage): method _fix_tensor_device (line 35) | def _fix_tensor_device(value, target_device): method _has_tensor (line 54) | def _has_tensor(value): method forward (line 62) | def forward(self, batch, server_args): FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/decoding.py function _ensure_tensor_decode_output (line 31) | def _ensure_tensor_decode_output(decode_output): class DecodingStage (line 51) | class DecodingStage(PipelineStage): method __init__ (line 59) | def __init__(self, vae, pipeline=None, component_name: str = "vae") ->... method parallelism_type (line 66) | def parallelism_type(self) -> StageParallelismType: method verify_input (line 71) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific... method verify_output (line 78) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi... method scale_and_shift (line 85) | def scale_and_shift(self, latents: torch.Tensor, server_args): method decode (line 107) | def decode(self, latents: torch.Tensor, server_args: ServerArgs) -> to... method load_model (line 157) | def load_model(self): method offload_model (line 172) | def offload_model(self): method forward (line 189) | def forward( FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/decoding_av.py class LTX2AVDecodingStage (line 14) | class LTX2AVDecodingStage(DecodingStage): method __init__ (line 19) | def __init__(self, vae, audio_vae, vocoder, pipeline=None): method forward (line 28) | def forward(self, batch: Req, server_args: ServerArgs) -> OutputBatch: FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/denoising.py class DenoisingStage (line 87) | class DenoisingStage(PipelineStage): method __init__ (line 95) | def __init__( method _maybe_enable_torch_compile (line 132) | def _maybe_enable_torch_compile(self, module: object) -> None: method _maybe_enable_cache_dit (line 164) | def _maybe_enable_cache_dit( method _build_guidance (line 353) | def _build_guidance(self, batch_size, target_dtype, device, guidance_v... method get_or_build_guidance (line 365) | def get_or_build_guidance(self, bsz: int, dtype, device): method parallelism_type (line 381) | def parallelism_type(self) -> StageParallelismType: method _preprocess_latents_for_ti2v (line 385) | def _preprocess_latents_for_ti2v( method _postprocess_latents_for_ti2v (line 444) | def _postprocess_latents_for_ti2v(self, z, reserved_frames_masks, batch): method _handle_boundary_ratio (line 497) | def _handle_boundary_ratio( method _prepare_denoising_loop (line 521) | def _prepare_denoising_loop(self, batch: Req, server_args: ServerArgs): method _post_denoising_loop (line 704) | def _post_denoising_loop( method _preprocess_sp_latents (line 787) | def _preprocess_sp_latents(self, batch: Req, server_args: ServerArgs): method _postprocess_sp_latents (line 808) | def _postprocess_sp_latents( method step_profile (line 832) | def step_profile(self): method _manage_device_placement (line 837) | def _manage_device_placement( method _select_and_manage_model (line 867) | def _select_and_manage_model( method expand_timestep_before_forward (line 890) | def expand_timestep_before_forward( method post_forward_for_ti2v_task (line 943) | def post_forward_for_ti2v_task( method forward (line 971) | def forward( method prepare_extra_func_kwargs (line 1135) | def prepare_extra_func_kwargs(self, func, kwargs) -> dict[str, Any]: method progress_bar (line 1159) | def progress_bar( method rescale_noise_cfg (line 1169) | def rescale_noise_cfg( method _build_attn_metadata (line 1198) | def _build_attn_metadata( method _predict_noise (line 1339) | def _predict_noise( method _predict_noise_with_cfg (line 1355) | def _predict_noise_with_cfg( method prepare_sta_param (line 1514) | def prepare_sta_param(self, batch: Req, server_args: ServerArgs): method save_sta_search_results (line 1628) | def save_sta_search_results(self, batch: Req): method verify_input (line 1666) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific... method verify_output (line 1695) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi... FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/denoising_av.py class LTX2AVDenoisingStage (line 38) | class LTX2AVDenoisingStage(DenoisingStage): method __init__ (line 43) | def __init__(self, transformer, scheduler, vae=None, audio_vae=None, *... method _get_video_latent_num_frames_for_model (line 50) | def _get_video_latent_num_frames_for_model( method _truncate_sp_padded_token_latents (line 76) | def _truncate_sp_padded_token_latents( method _maybe_enable_cache_dit (line 100) | def _maybe_enable_cache_dit(self, num_inference_steps: int, batch: Req... method _resize_center_crop (line 110) | def _resize_center_crop( method _apply_video_codec_compression (line 116) | def _apply_video_codec_compression( method _resize_center_crop_tensor (line 143) | def _resize_center_crop_tensor( method _pil_to_normed_tensor (line 176) | def _pil_to_normed_tensor(img: PIL.Image.Image) -> torch.Tensor: method _should_apply_ltx2_ti2v (line 183) | def _should_apply_ltx2_ti2v(batch: Req) -> bool: method _prepare_ltx2_image_latent (line 199) | def _prepare_ltx2_image_latent(self, batch: Req, server_args: ServerAr... method forward (line 314) | def forward(self, batch: Req, server_args: ServerArgs) -> Req: method _post_denoising_loop (line 648) | def _post_denoising_loop( method verify_input (line 712) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific... method do_classifier_free_guidance (line 755) | def do_classifier_free_guidance(self, batch: Req) -> bool: class LTX2RefinementStage (line 759) | class LTX2RefinementStage(LTX2AVDenoisingStage): method __init__ (line 760) | def __init__( method forward (line 766) | def forward(self, batch: Req, server_args: ServerArgs) -> Req: method do_classifier_free_guidance (line 795) | def do_classifier_free_guidance(self, batch: Req) -> bool: FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/denoising_dmd.py class DmdDenoisingStage (line 24) | class DmdDenoisingStage(DenoisingStage): method __init__ (line 29) | def __init__(self, transformer, scheduler, transformer_2=None) -> None: method _preprocess_sp_latents (line 35) | def _preprocess_sp_latents(self, batch: Req, server_args: ServerArgs): method _postprocess_sp_latents (line 45) | def _postprocess_sp_latents( method forward (line 58) | def forward( method _select_and_manage_model (line 223) | def _select_and_manage_model( method _manage_device_placement (line 246) | def _manage_device_placement( method _handle_boundary_ratio (line 272) | def _handle_boundary_ratio( FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/encoding.py class EncodingStage (line 28) | class EncodingStage(PipelineStage): method __init__ (line 36) | def __init__(self, vae: ParallelTiledVAE) -> None: method verify_input (line 41) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific... method verify_output (line 48) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi... method forward (line 55) | def forward( FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/hunyuan3d_paint.py function guidance_scale_embedding (line 41) | def guidance_scale_embedding( function extract_into_tensor (line 59) | def extract_into_tensor( function get_predicted_original_sample (line 70) | def get_predicted_original_sample( function get_predicted_noise (line 99) | def get_predicted_noise( function to_rgb_image (line 128) | def to_rgb_image(maybe_rgba): class DDIMSolver (line 145) | class DDIMSolver: method __init__ (line 148) | def __init__( method to (line 166) | def to(self, device: torch.device) -> "DDIMSolver": method ddim_step (line 172) | def ddim_step( function _recorrect_rgb (line 187) | def _recorrect_rgb( class Hunyuan3DPaintPreprocessStage (line 229) | class Hunyuan3DPaintPreprocessStage(PipelineStage): method parallelism_type (line 237) | def parallelism_type(self) -> StageParallelismType: method __init__ (line 240) | def __init__(self, config: Hunyuan3D2PipelineConfig) -> None: method _do_uv_unwrap (line 250) | def _do_uv_unwrap(self, batch: Req, server_args: ServerArgs) -> Req: method _load_delight_model (line 272) | def _load_delight_model(self, server_args: ServerArgs): method _run_delight (line 322) | def _run_delight(self, image): method _do_delight (line 369) | def _do_delight(self, batch: Req, server_args: ServerArgs) -> Req: method _init_renderer (line 395) | def _init_renderer(self): method _render_multiview (line 408) | def _render_multiview(self, mesh) -> tuple: method forward (line 424) | def forward(self, batch: Req, server_args: ServerArgs) -> Req: method verify_input (line 467) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific... method verify_output (line 473) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi... class Hunyuan3DPaintTexGenStage (line 486) | class Hunyuan3DPaintTexGenStage(PipelineStage): method __init__ (line 487) | def __init__( method parallelism_type (line 512) | def parallelism_type(self) -> StageParallelismType: method _load_paint_models (line 515) | def _load_paint_models(self, server_args: ServerArgs) -> None: method _do_load_paint (line 533) | def _do_load_paint(self, server_args: ServerArgs) -> None: method _convert_pil_list_to_tensor (line 594) | def _convert_pil_list_to_tensor( method _encode_images (line 623) | def _encode_images(self, images: torch.Tensor) -> torch.Tensor: method _compute_camera_index (line 633) | def _compute_camera_index(azim: float, elev: float) -> int: method _prepare_denoising_inputs (line 649) | def _prepare_denoising_inputs( method _denoise_loop (line 816) | def _denoise_loop( method _decode_latents (line 885) | def _decode_latents(self, latents: torch.Tensor) -> list: method forward (line 891) | def forward(self, batch: Req, server_args: ServerArgs) -> Req: method verify_input (line 948) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific... method verify_output (line 961) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi... class Hunyuan3DPaintPostprocessStage (line 970) | class Hunyuan3DPaintPostprocessStage(PipelineStage): method parallelism_type (line 974) | def parallelism_type(self) -> StageParallelismType: method __init__ (line 977) | def __init__(self, config: Hunyuan3D2PipelineConfig) -> None: method forward (line 981) | def forward(self, batch: Req, server_args: ServerArgs) -> OutputBatch: method _cleanup_obj_artifacts (line 1035) | def _cleanup_obj_artifacts(obj_path: str) -> None: method verify_input (line 1048) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific... FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/hunyuan3d_shape.py function retrieve_timesteps (line 39) | def retrieve_timesteps( function _prepare_shape_image (line 84) | def _prepare_shape_image(image_processor, image, mask=None) -> dict: function _move_to_device (line 106) | def _move_to_device(payload, device, dtype): class Hunyuan3DShapeBeforeDenoisingStage (line 117) | class Hunyuan3DShapeBeforeDenoisingStage(PipelineStage): method __init__ (line 124) | def __init__( method _validate_input (line 141) | def _validate_input(self, batch: Req, server_args: ServerArgs) -> None: method _prepare_latents (line 157) | def _prepare_latents(self, batch_size, dtype, device, generator): method forward (line 164) | def forward(self, batch: Req, server_args: ServerArgs) -> Req: method verify_input (line 229) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific... method verify_output (line 237) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi... class Hunyuan3DShapeDenoisingStage (line 245) | class Hunyuan3DShapeDenoisingStage(DenoisingStage): method __init__ (line 248) | def __init__(self, transformer: Any, scheduler: Any, **kwargs) -> None: method _prepare_denoising_loop (line 251) | def _prepare_denoising_loop(self, batch: Req, server_args: ServerArgs): method _predict_noise (line 321) | def _predict_noise( method _predict_noise_with_cfg (line 335) | def _predict_noise_with_cfg( method verify_input (line 385) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific... method verify_output (line 396) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi... class Hunyuan3DShapeExportStage (line 402) | class Hunyuan3DShapeExportStage(PipelineStage): method __init__ (line 405) | def __init__(self, vae: Any, config: Hunyuan3D2PipelineConfig) -> None: method forward (line 410) | def forward(self, batch: Req, server_args: ServerArgs) -> Req: method verify_input (line 449) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific... method verify_output (line 454) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi... class Hunyuan3DShapeSaveStage (line 460) | class Hunyuan3DShapeSaveStage(PipelineStage): method __init__ (line 463) | def __init__(self, config: Hunyuan3D2PipelineConfig) -> None: method _get_output_paths (line 467) | def _get_output_paths(self, batch: Req) -> tuple[str, str]: method forward (line 478) | def forward(self, batch: Req, server_args: ServerArgs) -> Req | Output... method verify_input (line 515) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific... FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/image_encoding.py class ImageEncodingStage (line 44) | class ImageEncodingStage(PipelineStage): method __init__ (line 52) | def __init__( method load_model (line 69) | def load_model(self): method offload_model (line 74) | def offload_model(self): method move_to_device (line 78) | def move_to_device(self, device): method encoding_qwen_image_edit (line 90) | def encoding_qwen_image_edit(self, outputs, image_inputs): method forward (line 96) | def forward( method verify_input (line 209) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific... method verify_output (line 219) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi... class ImageVAEEncodingStage (line 226) | class ImageVAEEncodingStage(PipelineStage): method __init__ (line 234) | def __init__(self, vae: ParallelTiledVAE, **kwargs) -> None: method load_model (line 238) | def load_model(self): method offload_model (line 241) | def offload_model(self): method forward (line 245) | def forward( method retrieve_latents (line 370) | def retrieve_latents( method preprocess (line 383) | def preprocess( method verify_input (line 400) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific... method verify_output (line 419) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi... FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/input_validation.py class InputValidationStage (line 37) | class InputValidationStage(PipelineStage): method __init__ (line 47) | def __init__(self, vae_image_processor=None): method _calculate_dimensions_from_area (line 52) | def _calculate_dimensions_from_area( method _generate_seeds (line 70) | def _generate_seeds(self, batch: Req, server_args: ServerArgs): method preprocess_condition_image (line 92) | def preprocess_condition_image( method forward (line 233) | def forward( method verify_input (line 333) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific... method verify_output (line 365) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi... FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/latent_preparation.py class LatentPreparationStage (line 25) | class LatentPreparationStage(PipelineStage): method __init__ (line 33) | def __init__(self, scheduler, transformer) -> None: method forward (line 38) | def forward( method adjust_video_length (line 107) | def adjust_video_length(self, batch: Req, server_args: ServerArgs) -> ... method verify_input (line 124) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific... method verify_output (line 144) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi... FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/latent_preparation_av.py class LTX2AVLatentPreparationStage (line 21) | class LTX2AVLatentPreparationStage(LatentPreparationStage): method __init__ (line 26) | def __init__(self, scheduler, transformer=None, audio_vae=None): method verify_input (line 30) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific... method forward (line 56) | def forward(self, batch: Req, server_args: ServerArgs) -> Req: FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/model_specific_stages/glm_image.py function calculate_shift (line 25) | def calculate_shift( function retrieve_timesteps (line 36) | def retrieve_timesteps( function retrieve_latents (line 91) | def retrieve_latents( class GlmImageBeforeDenoisingStage (line 106) | class GlmImageBeforeDenoisingStage(PipelineStage): method __init__ (line 130) | def __init__( method _parse_and_expand_shape_info (line 165) | def _parse_and_expand_shape_info( method _build_image_grid_thw (line 196) | def _build_image_grid_thw( method _calculate_ar_generation_params (line 227) | def _calculate_ar_generation_params( method _extract_large_image_tokens (line 250) | def _extract_large_image_tokens( method _upsample_d32_to_d16 (line 265) | def _upsample_d32_to_d16( method _compute_generation_params (line 296) | def _compute_generation_params( method _upsample_token_ids (line 320) | def _upsample_token_ids( method generate_prior_tokens (line 330) | def generate_prior_tokens( method get_glyph_texts (line 410) | def get_glyph_texts(self, prompt): method _get_glyph_embeds (line 420) | def _get_glyph_embeds( method encode_prompt (line 461) | def encode_prompt( method prepare_latents (line 536) | def prepare_latents( method check_inputs (line 561) | def check_inputs( method guidance_scale (line 605) | def guidance_scale(self): method do_classifier_free_guidance (line 609) | def do_classifier_free_guidance(self): method num_timesteps (line 613) | def num_timesteps(self): method attention_kwargs (line 617) | def attention_kwargs(self): method current_timestep (line 621) | def current_timestep(self): method interrupt (line 625) | def interrupt(self): method forward (line 629) | def forward( FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/model_specific_stages/helios_decoding.py class HeliosDecodingStage (line 21) | class HeliosDecodingStage(DecodingStage): method forward (line 36) | def forward( FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/model_specific_stages/helios_denoising.py function optimized_scale (line 31) | def optimized_scale(positive_flat, negative_flat): function calculate_shift (line 40) | def calculate_shift( function sample_block_noise (line 53) | def sample_block_noise( class HeliosChunkedDenoisingStage (line 80) | class HeliosChunkedDenoisingStage(PipelineStage): method __init__ (line 89) | def __init__(self, transformer, scheduler): method parallelism_type (line 95) | def parallelism_type(self): method _denoise_one_chunk (line 98) | def _denoise_one_chunk( method _denoise_one_chunk_stage2 (line 226) | def _denoise_one_chunk_stage2( method forward (line 442) | def forward(self, batch: Req, server_args: ServerArgs) -> Req: FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/model_specific_stages/mova.py class MOVALatentPreparationStage (line 75) | class MOVALatentPreparationStage(PipelineStage): method __init__ (line 78) | def __init__(self, audio_vae, require_vae_embedding: bool = True) -> N... method forward (line 83) | def forward(self, batch: Req, server_args: ServerArgs) -> Req: class MOVATimestepPreparationStage (line 121) | class MOVATimestepPreparationStage(PipelineStage): method __init__ (line 124) | def __init__(self, scheduler) -> None: method forward (line 128) | def forward(self, batch: Req, server_args: ServerArgs) -> Req: class MOVADenoisingStage (line 145) | class MOVADenoisingStage(PipelineStage): method __init__ (line 148) | def __init__(self, video_dit, video_dit_2, audio_dit, dual_tower_bridg... method parallelism_type (line 160) | def parallelism_type(self) -> StageParallelismType: method _predict (line 165) | def _predict( method _cfg_combine (line 196) | def _cfg_combine(self, pos, neg, guidance_scale, cfg_rank, enable_cfg_... method _maybe_enable_torch_compile (line 205) | def _maybe_enable_torch_compile(self, module: nn.Module, server_args: ... method _maybe_compile_dits (line 238) | def _maybe_compile_dits(self, server_args: ServerArgs): method verify_input (line 245) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific... method verify_output (line 272) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi... method progress_bar (line 279) | def progress_bar( method step_profile (line 289) | def step_profile(self): method rescale_noise_cfg (line 294) | def rescale_noise_cfg( method prepare_extra_func_kwargs (line 313) | def prepare_extra_func_kwargs(self, func, kwargs) -> dict[str, object]: method _build_attn_metadata (line 324) | def _build_attn_metadata( method _manage_device_placement (line 329) | def _manage_device_placement( method _select_visual_dit (line 350) | def _select_visual_dit( method _ensure_shared_models_on_device (line 368) | def _ensure_shared_models_on_device(self, server_args: ServerArgs): method _apply_guidance_rescale (line 373) | def _apply_guidance_rescale( method forward (line 400) | def forward(self, batch: Req, server_args: ServerArgs) -> Req: method _shard_sequence_for_sp (line 596) | def _shard_sequence_for_sp( method _gather_sequence_from_sp (line 632) | def _gather_sequence_from_sp( method inference_single_step (line 657) | def inference_single_step( method forward_dual_tower_dit (line 787) | def forward_dual_tower_dit( class MOVADecodingStage (line 882) | class MOVADecodingStage(PipelineStage): method __init__ (line 885) | def __init__(self, video_vae, audio_vae) -> None: method parallelism_type (line 891) | def parallelism_type(self) -> StageParallelismType: method forward (line 897) | def forward(self, batch: Req, server_args: ServerArgs) -> OutputBatch: FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/model_specific_stages/qwen_image_layered.py function calculate_dimensions (line 22) | def calculate_dimensions(target_area, ratio): function retrieve_latents (line 33) | def retrieve_latents( function retrieve_timesteps (line 47) | def retrieve_timesteps( class QwenImageLayeredBeforeDenoisingStage (line 112) | class QwenImageLayeredBeforeDenoisingStage(PipelineStage): method __init__ (line 113) | def __init__( method _extract_masked_hidden (line 162) | def _extract_masked_hidden(self, hidden_states: torch.Tensor, mask: to... method get_image_caption (line 170) | def get_image_caption(self, prompt_image, use_en_prompt=True, device=N... method _get_qwen_prompt_embeds (line 196) | def _get_qwen_prompt_embeds( method _pack_latents (line 247) | def _pack_latents(latents, batch_size, num_channels_latents, height, w... method encode_prompt (line 259) | def encode_prompt( method _encode_vae_image (line 299) | def _encode_vae_image(self, image: torch.Tensor, generator: torch.Gene... method prepare_latents (line 329) | def prepare_latents( method forward (line 410) | def forward( FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/text_connector.py class LTX2TextConnectorStage (line 9) | class LTX2TextConnectorStage(PipelineStage): method __init__ (line 15) | def __init__(self, connectors): method forward (line 19) | def forward(self, batch: Req, server_args: ServerArgs) -> Req: FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/text_encoding.py class TextEncodingStage (line 31) | class TextEncodingStage(PipelineStage): method __init__ (line 39) | def __init__(self, text_encoders, tokenizers) -> None: method forward (line 49) | def forward( method verify_input (line 110) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific... method prepare_tokenizer_kwargs (line 130) | def prepare_tokenizer_kwargs(self, tokenizer_kwargs, **kwargs): method encode_text (line 136) | def encode_text( method verify_output (line 317) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi... FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/timestep_preparation.py class TimestepPreparationStage (line 33) | class TimestepPreparationStage(PipelineStage): method __init__ (line 41) | def __init__( method parallelism_type (line 55) | def parallelism_type(self) -> StageParallelismType: method forward (line 58) | def forward( method verify_input (line 140) | def verify_input(self, batch: Req, server_args: ServerArgs) -> Verific... method verify_output (line 151) | def verify_output(self, batch: Req, server_args: ServerArgs) -> Verifi... FILE: python/sglang/multimodal_gen/runtime/pipelines_core/stages/validators.py class StageValidators (line 17) | class StageValidators: method not_none (line 21) | def not_none(value: Any) -> bool: method positive_int (line 26) | def positive_int(value: Any) -> bool: method non_negative_int (line 31) | def non_negative_int(value: Any) -> bool: method positive_float (line 36) | def positive_float(value: Any) -> bool: method non_negative_float (line 41) | def non_negative_float(value: Any) -> bool: method divisible_by (line 46) | def divisible_by(value: Any, divisor: int) -> bool: method is_tensor (line 51) | def is_tensor(value: Any) -> bool: method tensor_with_dims (line 58) | def tensor_with_dims(value: Any, dims: int) -> bool: method tensor_min_dims (line 67) | def tensor_min_dims(value: Any, min_dims: int) -> bool: method tensor_shape_matches (line 76) | def tensor_shape_matches(value: Any, expected_shape: tuple) -> bool: method list_not_empty (line 88) | def list_not_empty(value: Any) -> bool: method list_length (line 93) | def list_length(value: Any, length: int) -> bool: method list_min_length (line 98) | def list_min_length(value: Any, min_length: int) -> bool: method string_not_empty (line 103) | def string_not_empty(value: Any) -> bool: method string_not_none (line 108) | def string_not_none(value: Any) -> bool: method string_or_list_strings (line 113) | def string_or_list_strings(value: Any) -> bool: method bool_value (line 122) | def bool_value(value: Any) -> bool: method generator_or_list_generators (line 127) | def generator_or_list_generators(value: Any) -> bool: method is_list (line 136) | def is_list(value: Any) -> bool: method is_tuple (line 141) | def is_tuple(value: Any) -> bool: method none_or_tensor (line 146) | def none_or_tensor(value: Any) -> bool: method list_of_tensors_with_dims (line 155) | def list_of_tensors_with_dims(value: Any, dims: int) -> bool: method list_of_tensors (line 169) | def list_of_tensors(value: Any) -> bool: method list_of_tensors_with_min_dims (line 181) | def list_of_tensors_with_min_dims(value: Any, min_dims: int) -> bool: method none_or_tensor_with_dims (line 195) | def none_or_tensor_with_dims(dims: int) -> Callable[[Any], bool]: method none_or_list (line 210) | def none_or_list(value: Any) -> bool: method none_or_positive_int (line 215) | def none_or_positive_int(value: Any) -> bool: method with_dims (line 221) | def with_dims(dims: int) -> Callable[[Any], bool]: method min_dims (line 230) | def min_dims(min_dims: int) -> Callable[[Any], bool]: method divisible (line 239) | def divisible(divisor: int) -> Callable[[Any], bool]: method positive_int_divisible (line 248) | def positive_int_divisible(divisor: int) -> Callable[[Any], bool]: method list_of_tensors_dims (line 261) | def list_of_tensors_dims(dims: int) -> Callable[[Any], bool]: method list_of_tensors_min_dims (line 270) | def list_of_tensors_min_dims(min_dims: int) -> Callable[[Any], bool]: class ValidationFailure (line 279) | class ValidationFailure: method __init__ (line 282) | def __init__( method __str__ (line 294) | def __str__(self) -> str: method _format_value (line 309) | def _format_value(self, value: Any) -> str: class VerificationResult (line 332) | class VerificationResult: method __init__ (line 335) | def __init__(self) -> None: method add_check (line 339) | def add_check( method _create_validation_failure (line 398) | def _create_validation_failure( method is_valid (line 490) | def is_valid(self) -> bool: method get_failed_fields (line 494) | def get_failed_fields(self) -> list[str]: method get_detailed_failures (line 498) | def get_detailed_failures(self) -> dict[str, list[ValidationFailure]]: method get_failure_summary (line 502) | def get_failure_summary(self) -> str: method to_dict (line 516) | def to_dict(self) -> dict: FILE: python/sglang/multimodal_gen/runtime/platforms/__init__.py function cuda_platform_plugin (line 21) | def cuda_platform_plugin() -> str | None: function mps_platform_plugin (line 61) | def mps_platform_plugin() -> str | None: function cpu_platform_plugin (line 77) | def cpu_platform_plugin() -> str | None: function rocm_platform_plugin (line 83) | def rocm_platform_plugin() -> str | None: function npu_platform_plugin (line 104) | def npu_platform_plugin() -> str | None: function musa_platform_plugin (line 122) | def musa_platform_plugin() -> str | None: function resolve_current_platform_cls_qualname (line 151) | def resolve_current_platform_cls_qualname() -> str: function __getattr__ (line 194) | def __getattr__(name: str): FILE: python/sglang/multimodal_gen/runtime/platforms/cpu.py class CpuPlatform (line 20) | class CpuPlatform(Platform): method get_cpu_architecture (line 27) | def get_cpu_architecture(cls) -> CpuArchEnum: method get_device_name (line 38) | def get_device_name(cls, device_id: int = 0) -> str: method get_device_uuid (line 42) | def get_device_uuid(cls, device_id: int = 0) -> str: method get_device_total_memory (line 47) | def get_device_total_memory(cls, device_id: int = 0) -> int: method is_async_output_supported (line 52) | def is_async_output_supported(cls, enforce_eager: bool | None) -> bool: method get_current_memory_usage (line 56) | def get_current_memory_usage( method get_available_gpu_memory (line 63) | def get_available_gpu_memory( method get_device_communicator_cls (line 87) | def get_device_communicator_cls(cls) -> str: FILE: python/sglang/multimodal_gen/runtime/platforms/cuda.py function device_id_to_physical_device_id (line 40) | def device_id_to_physical_device_id(device_id: int) -> int: function with_nvml_context (line 59) | def with_nvml_context(fn: Callable[_P, _R]) -> Callable[_P, _R]: class CudaPlatformBase (line 71) | class CudaPlatformBase(Platform): method get_local_torch_device (line 79) | def get_local_torch_device(cls) -> torch.device: method get_device_capability (line 83) | def get_device_capability(cls, device_id: int = 0) -> DeviceCapability... method get_device_name (line 87) | def get_device_name(cls, device_id: int = 0) -> str: method get_device_total_memory (line 92) | def get_device_total_memory(cls, device_id: int = 0) -> int: method is_async_output_supported (line 96) | def is_async_output_supported(cls, enforce_eager: bool | None) -> bool: method is_full_nvlink (line 107) | def is_full_nvlink(cls, device_ids: list[int]) -> bool: method log_warnings (line 111) | def log_warnings(cls) -> None: method get_current_memory_usage (line 115) | def get_current_memory_usage( method get_available_gpu_memory (line 122) | def get_available_gpu_memory( method get_attn_backend_cls_str (line 151) | def get_attn_backend_cls_str( method get_device_communicator_cls (line 382) | def get_device_communicator_cls(cls) -> str: class NvmlCudaPlatform (line 390) | class NvmlCudaPlatform(CudaPlatformBase): method get_device_capability (line 394) | def get_device_capability(cls, device_id: int = 0) -> DeviceCapability... method has_device_capability (line 406) | def has_device_capability( method get_device_name (line 419) | def get_device_name(cls, device_id: int = 0) -> str: method get_device_uuid (line 426) | def get_device_uuid(cls, device_id: int = 0) -> str: method get_device_total_memory (line 434) | def get_device_total_memory(cls, device_id: int = 0) -> int: method is_full_nvlink (line 441) | def is_full_nvlink(cls, physical_device_ids: list[int]) -> bool: method _get_physical_device_name (line 466) | def _get_physical_device_name(cls, device_id: int = 0) -> str: method log_warnings (line 472) | def log_warnings(cls) -> None: class NonNvmlCudaPlatform (line 488) | class NonNvmlCudaPlatform(CudaPlatformBase): method get_device_capability (line 490) | def get_device_capability(cls, device_id: int = 0) -> DeviceCapability: method get_device_name (line 495) | def get_device_name(cls, device_id: int = 0) -> str: method get_device_total_memory (line 500) | def get_device_total_memory(cls, device_id: int = 0) -> int: method is_full_nvlink (line 505) | def is_full_nvlink(cls, physical_device_ids: list[int]) -> bool: FILE: python/sglang/multimodal_gen/runtime/platforms/interface.py class AttentionBackendEnum (line 26) | class AttentionBackendEnum(enum.Enum): method __str__ (line 42) | def __str__(self): method is_sparse (line 46) | def is_sparse(self) -> bool: class PlatformEnum (line 57) | class PlatformEnum(enum.Enum): class CpuArchEnum (line 69) | class CpuArchEnum(enum.Enum): class DeviceCapability (line 75) | class DeviceCapability(NamedTuple): method as_version_str (line 79) | def as_version_str(self) -> str: method to_int (line 82) | def to_int(self) -> int: class Platform (line 92) | class Platform: method is_cuda (line 113) | def is_cuda(self) -> bool: method is_npu (line 117) | def is_npu(self) -> bool: method is_rocm (line 121) | def is_rocm(self) -> bool: method is_tpu (line 125) | def is_tpu(self) -> bool: method is_cpu (line 129) | def is_cpu(self) -> bool: method is_blackwell (line 134) | def is_blackwell(cls): method is_hopper (line 141) | def is_hopper(cls): method is_sm120 (line 148) | def is_sm120(cls): method is_cuda_static (line 154) | def is_cuda_static(cls) -> bool: method is_rocm_static (line 158) | def is_rocm_static(cls) -> bool: method is_hpu (line 162) | def is_hpu(self) -> bool: method is_xpu (line 166) | def is_xpu(self) -> bool: method is_npu (line 170) | def is_npu(self) -> bool: method is_out_of_tree (line 173) | def is_out_of_tree(self) -> bool: method is_cuda_alike (line 177) | def is_cuda_alike(self) -> bool: method is_mps (line 182) | def is_mps(self) -> bool: method is_musa (line 186) | def is_musa(self): method is_hip (line 193) | def is_hip(self) -> bool: method is_amp_supported (line 198) | def is_amp_supported(cls) -> bool: method get_local_torch_device (line 202) | def get_local_torch_device(cls) -> torch.device: method get_attn_backend_cls_str (line 206) | def get_attn_backend_cls_str( method get_device_capability (line 216) | def get_device_capability( method has_device_capability (line 224) | def has_device_capability( method get_device_name (line 247) | def get_device_name(cls, device_id: int = 0) -> str: method get_device_uuid (line 252) | def get_device_uuid(cls, device_id: int = 0) -> str: method get_device_total_memory (line 258) | def get_device_total_memory(cls, device_id: int = 0) -> int: method get_device (line 263) | def get_device(self, local_rank: int) -> torch.device: method get_torch_distributed_backend_str (line 276) | def get_torch_distributed_backend_str(self) -> str: method is_async_output_supported (line 291) | def is_async_output_supported(cls, enforce_eager: bool | None) -> bool: method inference_mode (line 298) | def inference_mode(cls): method seed_everything (line 308) | def seed_everything(cls, seed: int | None = None) -> None: method verify_model_arch (line 322) | def verify_model_arch(cls, model_arch: str) -> None: method verify_quantization (line 334) | def verify_quantization(cls, quant: str) -> None: method get_current_memory_usage (line 345) | def get_current_memory_usage( method get_available_gpu_memory (line 354) | def get_available_gpu_memory( method get_device_communicator_cls (line 367) | def get_device_communicator_cls(cls) -> str: method get_cpu_architecture (line 374) | def get_cpu_architecture(cls) -> CpuArchEnum: method enable_dit_layerwise_offload_for_wan_by_default (line 379) | def enable_dit_layerwise_offload_for_wan_by_default(cls) -> bool: method optimize_vae (line 384) | def optimize_vae(cls, vae: torch.nn.Module) -> torch.nn.Module: method get_attn_backend (line 388) | def get_attn_backend(self, *args, **kwargs) -> AttentionImpl: class UnspecifiedPlatform (line 393) | class UnspecifiedPlatform(Platform): FILE: python/sglang/multimodal_gen/runtime/platforms/mps.py class MpsPlatform (line 22) | class MpsPlatform(Platform): method is_amp_supported (line 31) | def is_amp_supported(cls) -> bool: method get_local_torch_device (line 35) | def get_local_torch_device(cls) -> torch.device: method get_device_capability (line 39) | def get_device_capability(cls, device_id: int = 0) -> DeviceCapability... method get_device_name (line 43) | def get_device_name(cls, device_id: int = 0) -> str: method get_device_uuid (line 47) | def get_device_uuid(cls, device_id: int = 0) -> str: method get_device_total_memory (line 52) | def get_device_total_memory(cls, device_id: int = 0) -> int: method is_async_output_supported (line 57) | def is_async_output_supported(cls, enforce_eager: bool | None) -> bool: method get_current_memory_usage (line 68) | def get_current_memory_usage( method get_available_gpu_memory (line 74) | def get_available_gpu_memory( method get_attn_backend_cls_str (line 98) | def get_attn_backend_cls_str( method get_device_communicator_cls (line 111) | def get_device_communicator_cls(cls) -> str: method seed_everything (line 116) | def seed_everything(cls, seed: int | None = None) -> None: FILE: python/sglang/multimodal_gen/runtime/platforms/musa.py function device_id_to_physical_device_id (line 36) | def device_id_to_physical_device_id(device_id: int) -> int: function with_mtml_context (line 55) | def with_mtml_context(fn: Callable[_P, _R]) -> Callable[_P, _R]: class MusaPlatformBase (line 67) | class MusaPlatformBase(Platform): method get_local_torch_device (line 75) | def get_local_torch_device(cls) -> torch.device: method get_device_capability (line 79) | def get_device_capability(cls, device_id: int = 0) -> DeviceCapability... method get_device_name (line 83) | def get_device_name(cls, device_id: int = 0) -> str: method get_device_total_memory (line 88) | def get_device_total_memory(cls, device_id: int = 0) -> int: method is_async_output_supported (line 92) | def is_async_output_supported(cls, enforce_eager: bool | None) -> bool: method is_full_mtlink (line 103) | def is_full_mtlink(cls, device_ids: list[int]) -> bool: method log_warnings (line 107) | def log_warnings(cls) -> None: method get_current_memory_usage (line 111) | def get_current_memory_usage( method get_available_gpu_memory (line 118) | def get_available_gpu_memory( method get_attn_backend_cls_str (line 147) | def get_attn_backend_cls_str( method get_device_communicator_cls (line 159) | def get_device_communicator_cls(cls) -> str: class MtmlMusaPlatform (line 167) | class MtmlMusaPlatform(MusaPlatformBase): method get_device_capability (line 171) | def get_device_capability(cls, device_id: int = 0) -> DeviceCapability... method has_device_capability (line 183) | def has_device_capability( method get_device_name (line 196) | def get_device_name(cls, device_id: int = 0) -> str: method get_device_uuid (line 203) | def get_device_uuid(cls, device_id: int = 0) -> str: method get_device_total_memory (line 211) | def get_device_total_memory(cls, device_id: int = 0) -> int: method is_full_mtlink (line 218) | def is_full_mtlink(cls, physical_device_ids: list[int]) -> bool: method _get_physical_device_name (line 243) | def _get_physical_device_name(cls, device_id: int = 0) -> str: method log_warnings (line 249) | def log_warnings(cls) -> None: class NonMtmlMusaPlatform (line 265) | class NonMtmlMusaPlatform(MusaPlatformBase): method get_device_capability (line 267) | def get_device_capability(cls, device_id: int = 0) -> DeviceCapability: method get_device_name (line 272) | def get_device_name(cls, device_id: int = 0) -> str: method get_device_total_memory (line 277) | def get_device_total_memory(cls, device_id: int = 0) -> int: method is_full_mtlink (line 282) | def is_full_mtlink(cls, physical_device_ids: list[int]) -> bool: FILE: python/sglang/multimodal_gen/runtime/platforms/npu.py function device_id_to_physical_device_id (line 21) | def device_id_to_physical_device_id(device_id: int) -> int: class NPUPlatformBase (line 36) | class NPUPlatformBase(Platform): method get_local_torch_device (line 44) | def get_local_torch_device(cls) -> torch.device: method get_device_capability (line 48) | def get_device_capability(cls, device_id: int = 0) -> DeviceCapability: method get_device_name (line 52) | def get_device_name(cls, device_id: int = 0) -> str: method get_device_total_memory (line 56) | def get_device_total_memory(cls, device_id: int = 0) -> int: method is_async_output_supported (line 61) | def is_async_output_supported(cls, enforce_eager: bool | None) -> bool: method is_full_nvlink (line 72) | def is_full_nvlink(cls, physical_device_ids: list[int]) -> bool: method get_available_gpu_memory (line 80) | def get_available_gpu_memory( method log_warnings (line 102) | def log_warnings(cls) -> None: method get_current_memory_usage (line 106) | def get_current_memory_usage( method get_attn_backend_cls_str (line 113) | def get_attn_backend_cls_str( method get_device_communicator_cls (line 125) | def get_device_communicator_cls(cls) -> str: method enable_dit_layerwise_offload_for_wan_by_default (line 129) | def enable_dit_layerwise_offload_for_wan_by_default(cls) -> bool: FILE: python/sglang/multimodal_gen/runtime/platforms/rocm.py class RocmPlatform (line 28) | class RocmPlatform(Platform): method get_local_torch_device (line 36) | def get_local_torch_device(cls) -> torch.device: method get_device_capability (line 40) | def get_device_capability(cls, device_id: int = 0) -> DeviceCapability: method get_device_name (line 45) | def get_device_name(cls, device_id: int = 0) -> str: method get_device_total_memory (line 50) | def get_device_total_memory(cls, device_id: int = 0) -> int: method is_async_output_supported (line 54) | def is_async_output_supported(cls, enforce_eager: bool | None) -> bool: method log_warnings (line 64) | def log_warnings(cls) -> None: method get_current_memory_usage (line 68) | def get_current_memory_usage(cls, device: torch.device | None = None) ... method get_available_gpu_memory (line 73) | def get_available_gpu_memory( method get_attn_backend_cls_str (line 95) | def get_attn_backend_cls_str( method get_device_communicator_cls (line 182) | def get_device_communicator_cls(cls) -> str: method optimize_vae (line 186) | def optimize_vae(cls, vae: torch.nn.Module) -> torch.nn.Module: method _replace_groupnorm (line 207) | def _replace_groupnorm(module: torch.nn.Module, aiter_gn_cls: type) ->... method enable_dit_layerwise_offload_for_wan_by_default (line 228) | def enable_dit_layerwise_offload_for_wan_by_default(cls) -> bool: FILE: python/sglang/multimodal_gen/runtime/postprocess/realesrgan_upscaler.py class SRVGGNetCompact (line 40) | class SRVGGNetCompact(nn.Module): method __init__ (line 47) | def __init__( method _make_act (line 78) | def _make_act(act_type: str, num_feat: int) -> nn.Module: method forward (line 88) | def forward(self, x: torch.Tensor) -> torch.Tensor: class ResidualDenseBlock (line 98) | class ResidualDenseBlock(nn.Module): method __init__ (line 101) | def __init__(self, num_feat: int = 64, num_grow_ch: int = 32): method forward (line 110) | def forward(self, x: torch.Tensor) -> torch.Tensor: class RRDB (line 119) | class RRDB(nn.Module): method __init__ (line 122) | def __init__(self, num_feat: int, num_grow_ch: int = 32): method forward (line 128) | def forward(self, x: torch.Tensor) -> torch.Tensor: class RRDBNet (line 135) | class RRDBNet(nn.Module): method __init__ (line 138) | def __init__( method forward (line 166) | def forward(self, x: torch.Tensor) -> torch.Tensor: function _build_net_from_state_dict (line 190) | def _build_net_from_state_dict(state_dict: dict) -> nn.Module: class UpscalerModel (line 255) | class UpscalerModel: method __init__ (line 258) | def __init__(self, net: nn.Module, scale: int): method device (line 263) | def device(self) -> torch.device: method upscale (line 266) | def upscale(self, frame: np.ndarray, outscale: float | None = None) ->... class ImageUpscaler (line 301) | class ImageUpscaler: method __init__ (line 309) | def __init__( method _ensure_model_loaded (line 319) | def _ensure_model_loaded(self) -> UpscalerModel: method upscale (line 381) | def upscale(self, frames: list[np.ndarray]) -> list[np.ndarray]: function _resolve_model_path (line 399) | def _resolve_model_path(model_path: str) -> str: function upscale_frames (line 453) | def upscale_frames( FILE: python/sglang/multimodal_gen/runtime/postprocess/rife_interpolator.py function warp (line 39) | def warp(tenInput: torch.Tensor, tenFlow: torch.Tensor) -> torch.Tensor: function _conv (line 72) | def _conv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dil... class ResConv (line 88) | class ResConv(nn.Module): method __init__ (line 91) | def __init__(self, c: int, dilation: int = 1): method forward (line 97) | def forward(self, x: torch.Tensor) -> torch.Tensor: class IFBlock (line 101) | class IFBlock(nn.Module): method __init__ (line 104) | def __init__(self, in_planes: int, c: int = 64): method forward (line 125) | def forward( class Head (line 158) | class Head(nn.Module): method __init__ (line 161) | def __init__(self): method forward (line 169) | def forward(self, x: torch.Tensor) -> torch.Tensor: class IFNet (line 180) | class IFNet(nn.Module): method __init__ (line 183) | def __init__(self): method forward (line 191) | def forward( class Model (line 262) | class Model: method __init__ (line 265) | def __init__(self): method eval (line 269) | def eval(self) -> "Model": method device (line 273) | def device(self) -> torch.device: method load_model (line 276) | def load_model(self, path: str, strip_module_prefix: bool = True) -> N... method inference (line 305) | def inference( class FrameInterpolator (line 341) | class FrameInterpolator: method __init__ (line 349) | def __init__(self, model_path: Optional[str] = None): method _ensure_model_loaded (line 353) | def _ensure_model_loaded(self) -> Model: method _frame_to_tensor (line 384) | def _frame_to_tensor(frame: np.ndarray, device: torch.device) -> torch... method _tensor_to_frame (line 390) | def _tensor_to_frame(t: torch.Tensor) -> np.ndarray: method _make_inference (line 395) | def _make_inference( method interpolate (line 412) | def interpolate( function interpolate_video_frames (line 463) | def interpolate_video_frames( FILE: python/sglang/multimodal_gen/runtime/scheduler_client.py function run_zeromq_broker (line 13) | async def run_zeromq_broker(server_args: ServerArgs): class SchedulerClient (line 47) | class SchedulerClient: method __init__ (line 53) | def __init__(self): method initialize (line 58) | def initialize(self, server_args: ServerArgs): method forward (line 79) | def forward(self, batch: Any) -> Any: method ping (line 89) | def ping(self) -> bool: method close (line 113) | def close(self): class AsyncSchedulerClient (line 123) | class AsyncSchedulerClient: method __init__ (line 132) | def __init__(self): method initialize (line 136) | def initialize(self, server_args: ServerArgs): method forward (line 147) | async def forward(self, batch: Any) -> Any: method ping (line 173) | async def ping(self) -> bool: method close (line 197) | def close(self): FILE: python/sglang/multimodal_gen/runtime/server_args.py class Backend (line 53) | class Backend(str, Enum): method from_string (line 66) | def from_string(cls, value: str) -> "Backend": method choices (line 76) | def choices(cls) -> list[str]: class ServerArgs (line 82) | class ServerArgs: method broker_port (line 219) | def broker_port(self) -> int: method is_local_mode (line 223) | def is_local_mode(self) -> bool: method _adjust_path (line 229) | def _adjust_path(self): method _adjust_parameters (line 233) | def _adjust_parameters(self): method _validate_parameters (line 247) | def _validate_parameters(self): method _adjust_save_paths (line 254) | def _adjust_save_paths(self): method _adjust_quant_config (line 261) | def _adjust_quant_config(self): method adjust_pipeline_config (line 284) | def adjust_pipeline_config(self): method _adjust_offload (line 304) | def _adjust_offload(self): method _adjust_attention_backend (line 338) | def _adjust_attention_backend(self): method _adjust_warmup (line 368) | def _adjust_warmup(self): method _adjust_network_ports (line 377) | def _adjust_network_ports(self): method _adjust_parallelism (line 390) | def _adjust_parallelism(self): method _adjust_platform_specific (line 428) | def _adjust_platform_specific(self): method _adjust_autocast (line 447) | def _adjust_autocast(self): method _parse_attention_backend_config (line 451) | def _parse_attention_backend_config(self, config_str: str) -> dict[str... method __post_init__ (line 490) | def __post_init__(self): method add_cli_args (line 509) | def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentPa... method url (line 819) | def url(self): method scheduler_endpoint (line 831) | def scheduler_endpoint(self): method settle_port (line 841) | def settle_port( method _extract_component_paths (line 871) | def _extract_component_paths( method from_cli_args (line 907) | def from_cli_args( method from_dict (line 934) | def from_dict(cls, kwargs: dict[str, Any]) -> "ServerArgs": method load_config_file (line 958) | def load_config_file(config_file: str) -> dict[str, Any]: method from_kwargs (line 977) | def from_kwargs(cls, **kwargs: Any) -> "ServerArgs": method get_provided_args (line 986) | def get_provided_args( method _validate_pipeline (line 1011) | def _validate_pipeline(self): method _validate_offload (line 1017) | def _validate_offload(self): method _validate_parallelism (line 1060) | def _validate_parallelism(self): method _validate_cfg_parallel (line 1117) | def _validate_cfg_parallel(self): method _set_default_attention_backend (line 1123) | def _set_default_attention_backend(self) -> None: class PortArgs (line 1136) | class PortArgs: method from_server_args (line 1153) | def from_server_args( function prepare_server_args (line 1181) | def prepare_server_args(argv: list[str]) -> ServerArgs: function set_global_server_args (line 1192) | def set_global_server_args(server_args: ServerArgs): function get_global_server_args (line 1200) | def get_global_server_args() -> ServerArgs: FILE: python/sglang/multimodal_gen/runtime/utils/common.py function kill_process_tree (line 21) | def kill_process_tree(parent_pid, include_parent: bool = True, skip_pid:... function add_prefix (line 60) | def add_prefix(name: str, prefix: str) -> str: function is_valid_ipv6_address (line 73) | def is_valid_ipv6_address(address: str) -> bool: function configure_ipv6 (line 81) | def configure_ipv6(dist_init_addr): function is_port_available (line 113) | def is_port_available(port): function get_zmq_socket (line 127) | def get_zmq_socket( function is_host_cpu_x86 (line 246) | def is_host_cpu_x86() -> bool: function set_cuda_arch (line 258) | def set_cuda_arch(): function set_musa_arch (line 267) | def set_musa_arch(): function get_bool_env_var (line 278) | def get_bool_env_var(name: str, default: str = "false") -> bool: function cpu_has_amx_support (line 312) | def cpu_has_amx_support(): function use_intel_amx_backend (line 316) | def use_intel_amx_backend(layer): FILE: python/sglang/multimodal_gen/runtime/utils/distributed.py function broadcast_pyobj (line 13) | def broadcast_pyobj( function generate_masked_orthogonal_rank_groups (line 61) | def generate_masked_orthogonal_rank_groups( class RankGenerator (line 165) | class RankGenerator(object): method __init__ (line 166) | def __init__( method get_mask (line 207) | def get_mask(self, order: str, token: str): method get_ranks (line 215) | def get_ranks(self, token): FILE: python/sglang/multimodal_gen/runtime/utils/hf_diffusers_utils.py function _check_index_files_for_missing_shards (line 53) | def _check_index_files_for_missing_shards( function _cleanup_model_cache (line 116) | def _cleanup_model_cache(model_path: str, reason: str) -> bool: function _ci_validate_diffusers_model (line 160) | def _ci_validate_diffusers_model(model_path: str) -> tuple[bool, bool]: function _verify_diffusers_model_complete (line 207) | def _verify_diffusers_model_complete(path: str) -> bool: function download_from_hf (line 247) | def download_from_hf(model_path: str): function get_hf_config (line 254) | def get_hf_config( function get_config (line 281) | def get_config( function load_dict (line 293) | def load_dict(file_path): function get_diffusers_component_config (line 310) | def get_diffusers_component_config( function attach_additional_stop_token_ids (line 353) | def attach_additional_stop_token_ids(tokenizer): function check_gguf_file (line 363) | def check_gguf_file(model: str | os.PathLike) -> bool: function maybe_download_lora (line 376) | def maybe_download_lora( function verify_model_config_and_directory (line 412) | def verify_model_config_and_directory(model_path: str) -> dict[str, Any]: function maybe_download_model_index (line 474) | def maybe_download_model_index(model_name_or_path: str) -> dict[str, Any]: function maybe_download_model (line 556) | def maybe_download_model( function hf_hub_download (line 757) | def hf_hub_download( function snapshot_download (line 784) | def snapshot_download( FILE: python/sglang/multimodal_gen/runtime/utils/layerwise_offload.py class LayerwiseOffloadManager (line 15) | class LayerwiseOffloadManager: method __init__ (line 29) | def __init__( method _match_layer_idx (line 75) | def _match_layer_idx(self, name: str) -> int | None: method _get_shared_empty_tensor (line 84) | def _get_shared_empty_tensor(self, dtype: torch.dtype) -> torch.Tensor: method _initialize (line 92) | def _initialize(self) -> None: method prepare_for_next_req (line 151) | def prepare_for_next_req(self, non_blocking=True): method get_target_with_name (line 160) | def get_target_with_name(self, name: str) -> torch.Tensor: method prefetch_layer (line 169) | def prefetch_layer(self, layer_idx: int, non_blocking: bool = True) ->... method release_layer (line 212) | def release_layer(self, layer_idx: int) -> None: method release_all (line 234) | def release_all(self) -> None: method load_all_layers (line 244) | def load_all_layers(self) -> None: method sync_layer_to_cpu (line 256) | def sync_layer_to_cpu(self, layer_idx: int) -> None: method sync_all_layers_to_cpu (line 278) | def sync_all_layers_to_cpu(self) -> None: method update_cpu_weights (line 289) | def update_cpu_weights( method iter_cpu_weights (line 350) | def iter_cpu_weights(self): method register_forward_hooks (line 368) | def register_forward_hooks(self) -> None: method remove_forward_hooks (line 407) | def remove_forward_hooks(self) -> None: class OffloadableDiTMixin (line 414) | class OffloadableDiTMixin: method configure_layerwise_offload (line 423) | def configure_layerwise_offload(self, server_args: ServerArgs): method prepare_for_next_req (line 453) | def prepare_for_next_req(self): method disable_offload (line 459) | def disable_offload(self) -> None: method enable_offload (line 468) | def enable_offload(self) -> None: function iter_materialized_weights (line 479) | def iter_materialized_weights(module: torch.nn.Module): FILE: python/sglang/multimodal_gen/runtime/utils/logging_utils.py class ColoredFormatter (line 75) | class ColoredFormatter(logging.Formatter): method format (line 83) | def format(self, record: logging.LogRecord) -> str: class SortedHelpFormatter (line 95) | class SortedHelpFormatter(argparse.HelpFormatter): method add_arguments (line 98) | def add_arguments(self, actions): function _print_info_once (line 104) | def _print_info_once(logger: Logger, msg: str) -> None: function _print_warning_once (line 110) | def _print_warning_once(logger: Logger, msg: str) -> None: function get_is_main_process (line 115) | def get_is_main_process(): function get_is_local_main_process (line 123) | def get_is_local_main_process(): function _log_process_aware (line 131) | def _log_process_aware( class _SGLDiffusionLogger (line 161) | class _SGLDiffusionLogger(Logger): method info_once (line 170) | def info_once(self, msg: str) -> None: method warning_once (line 177) | def warning_once(self, msg: str) -> None: method info (line 184) | def info( # type: ignore[override] method debug (line 193) | def debug( # type: ignore[override] method warning (line 202) | def warning( # type: ignore[override] method error (line 211) | def error( # type: ignore[override] function init_logger (line 221) | def init_logger(name: str) -> _SGLDiffusionLogger: function _is_torch_tensor (line 287) | def _is_torch_tensor(obj: Any) -> tuple[bool, Any]: function _sanitize_for_logging (line 297) | def _sanitize_for_logging(obj: Any, key_hint: str | None = None) -> Any: function _trace_calls (line 388) | def _trace_calls(log_path, root_dir, frame, event, arg=None): function enable_trace_function_call (line 431) | def enable_trace_function_call(log_file_path: str, root_dir: str | None ... function set_uvicorn_logging_configs (line 454) | def set_uvicorn_logging_configs(): function configure_logger (line 467) | def configure_logger(server_args, prefix: str = ""): function get_log_level (line 484) | def get_log_level() -> int: function suppress_loggers (line 489) | def suppress_loggers(loggers_to_suppress: list[str], level: int = loggin... function globally_suppress_loggers (line 500) | def globally_suppress_loggers(): function suppress_stdout (line 520) | def suppress_stdout(): class GenerationTimer (line 547) | class GenerationTimer: method __init__ (line 548) | def __init__(self): function log_generation_timer (line 555) | def log_generation_timer( function log_batch_completion (line 595) | def log_batch_completion( FILE: python/sglang/multimodal_gen/runtime/utils/mesh3d_utils.py function transform_pos (line 24) | def transform_pos( function get_mv_matrix (line 43) | def get_mv_matrix( function get_orthographic_projection_matrix (line 90) | def get_orthographic_projection_matrix( function get_perspective_projection_matrix (line 109) | def get_perspective_projection_matrix( function export_to_trimesh (line 127) | def export_to_trimesh(mesh_output: Any) -> Any: function mesh_uv_wrap (line 145) | def mesh_uv_wrap(mesh: Any) -> Any: function stride_from_shape (line 175) | def stride_from_shape(shape: Tuple[int, ...]) -> List[int]: function scatter_add_nd_with_count (line 183) | def scatter_add_nd_with_count( function linear_grid_put_2d (line 214) | def linear_grid_put_2d( class MeshRender (line 289) | class MeshRender: method __init__ (line 292) | def __init__( method set_default_render_resolution (line 345) | def set_default_render_resolution( method set_default_texture_resolution (line 353) | def set_default_texture_resolution(self, texture_size: Union[int, Tupl... method _rasterize (line 359) | def _rasterize( method _interpolate (line 376) | def _interpolate( method load_mesh (line 394) | def load_mesh( method save_mesh (line 441) | def save_mesh(self) -> trimesh.Trimesh: method get_mesh (line 454) | def get_mesh(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.nda... method set_texture (line 473) | def set_texture(self, tex: Union[np.ndarray, torch.Tensor, Image.Image]): method get_texture (line 489) | def get_texture(self) -> np.ndarray: method _get_pos_from_mvp (line 495) | def _get_pos_from_mvp( method render_normal (line 518) | def render_normal( method render_position (line 589) | def render_position( method render_normal_multiview (line 633) | def render_normal_multiview( method render_position_multiview (line 648) | def render_position_multiview( method _render_sketch_from_depth (line 660) | def _render_sketch_from_depth(self, depth_image: torch.Tensor) -> torc... method back_project (line 671) | def back_project( method bake_from_multiview (line 794) | def bake_from_multiview( method fast_bake_texture (line 826) | def fast_bake_texture( method texture_inpaint (line 849) | def texture_inpaint( function array_to_tensor (line 897) | def array_to_tensor(np_array): function recenter_image (line 906) | def recenter_image(image, border_ratio=0.2): class ImageProcessorV2 (line 943) | class ImageProcessorV2: method __init__ (line 952) | def __init__(self, size=512, border_ratio=None): method recenter (line 957) | def recenter(image, border_ratio: float = 0.2): method load_image (line 1005) | def load_image(self, image, border_ratio=0.15, to_tensor=True): method __call__ (line 1024) | def __call__(self, image, border_ratio=0.15, to_tensor=True, **kwargs): class MVImageProcessorV2 (line 1034) | class MVImageProcessorV2(ImageProcessorV2): method __init__ (line 1044) | def __init__(self, size=512, border_ratio=None): method __call__ (line 1048) | def __call__(self, image_dict, border_ratio=0.15, to_tensor=True, **kw... function resolve_hunyuan3d_tool (line 1080) | def resolve_hunyuan3d_tool(target: str): FILE: python/sglang/multimodal_gen/runtime/utils/perf_logger.py class MemorySnapshot (line 32) | class MemorySnapshot: method to_dict (line 38) | def to_dict(self) -> Dict[str, Any]: class RequestMetrics (line 48) | class RequestMetrics: method __init__ (line 51) | def __init__(self, request_id: str): method total_duration_s (line 60) | def total_duration_s(self) -> float: method record_stage (line 63) | def record_stage(self, stage_name: str, duration_s: float): method record_steps (line 67) | def record_steps(self, index: int, duration_s: float): method record_memory_snapshot (line 72) | def record_memory_snapshot(self, checkpoint_name: str, snapshot: Memor... method to_dict (line 75) | def to_dict(self) -> Dict[str, Any]: function get_diffusion_perf_log_dir (line 89) | def get_diffusion_perf_log_dir() -> str: function get_git_commit_hash (line 104) | def get_git_commit_hash() -> str: function capture_memory_snapshot (line 122) | def capture_memory_snapshot() -> MemorySnapshot: class RequestPerfRecord (line 145) | class RequestPerfRecord: method __init__ (line 157) | def __init__( class StageProfiler (line 182) | class StageProfiler: method __init__ (line 187) | def __init__( method __enter__ (line 204) | def __enter__(self): method __exit__ (line 222) | def __exit__(self, exc_type, exc_val, exc_tb): class PerformanceLogger (line 266) | class PerformanceLogger: method dump_benchmark_report (line 276) | def dump_benchmark_report( method log_request_summary (line 324) | def log_request_summary( FILE: python/sglang/multimodal_gen/runtime/utils/profiler.py class SGLDiffusionProfiler (line 21) | class SGLDiffusionProfiler: method __init__ (line 33) | def __init__( method start (line 103) | def start(self): method _step (line 107) | def _step(self): method step_stage (line 110) | def step_stage(self): method step_denoising_step (line 114) | def step_denoising_step(self): method get_instance (line 124) | def get_instance(cls) -> "SGLDiffusionProfiler": method stop (line 127) | def stop(self, export_trace: bool = True, dump_rank: int | None = None): method _export_trace (line 149) | def _export_trace(self): method _check_trace_integrity (line 169) | def _check_trace_integrity(self, trace_path: str) -> bool: FILE: python/sglang/multimodal_gen/runtime/utils/quantization_utils.py function find_quant_modelslim_config (line 18) | def find_quant_modelslim_config(model_config, component_model_path): function replace_prefix (line 31) | def replace_prefix(key: str, prefix_mapping: dict[str, str]) -> str: function get_quant_config (line 38) | def get_quant_config( function handle_fp8_metadata_format (line 114) | def handle_fp8_metadata_format(quant_config_dict): function get_quant_config_from_safetensors_metadata (line 124) | def get_quant_config_from_safetensors_metadata( function get_metadata_from_safetensors_file (line 163) | def get_metadata_from_safetensors_file(file_path: str): FILE: python/sglang/multimodal_gen/test/cli/test_generate_common.py class TestResult (line 25) | class TestResult: function run_command (line 31) | def run_command(command) -> Optional[float]: class CLIBase (line 51) | class CLIBase(unittest.TestCase): method setUp (line 61) | def setUp(self): method tearDown (line 71) | def tearDown(self): method get_base_command (line 79) | def get_base_command(self): method _run_command (line 92) | def _run_command(self, name: str, model_path: str, args=[]): method _run_test (line 105) | def _run_test(self, name: str, args, model_path: str, test_key: str): method verify (line 109) | def verify(self, status, name): method model_name (line 125) | def model_name(self): method test_single_gpu (line 128) | def test_single_gpu(self): FILE: python/sglang/multimodal_gen/test/cli/test_generate_i2i.py class TestQwenImageEditI2I (line 14) | class TestQwenImageEditI2I(CLIBase): method get_base_command (line 25) | def get_base_command(self): method verify_multi_output (line 36) | def verify_multi_output(self, name: str, num_outputs: int): method test_single_prompt_single_image (line 61) | def test_single_prompt_single_image(self): method test_single_prompt_multi_image (line 77) | def test_single_prompt_multi_image(self): method test_multi_prompt_multi_image (line 93) | def test_multi_prompt_multi_image(self): method test_multi_prompt_single_image (line 110) | def test_multi_prompt_single_image(self): FILE: python/sglang/multimodal_gen/test/cli/test_generate_t2i_perf.py class TestFlux_T2V (line 13) | class TestFlux_T2V(CLIBase): FILE: python/sglang/multimodal_gen/test/run_suite.py function parse_args (line 74) | def parse_args(): function collect_test_items (line 117) | def collect_test_items(files, filter_expr=None): function run_pytest (line 168) | def run_pytest(files, filter_expr=None): function _is_in_ci (line 251) | def _is_in_ci() -> bool: function _maybe_pin_update_weights_model_pair (line 255) | def _maybe_pin_update_weights_model_pair(suite_files_rel: list[str]) -> ... function main (line 272) | def main(): FILE: python/sglang/multimodal_gen/test/scripts/gen_diffusion_ci_outputs.py function main (line 24) | def main(): FILE: python/sglang/multimodal_gen/test/scripts/gen_perf_baselines.py function _all_cases (line 25) | def _all_cases() -> list[DiffusionTestCase]: function _baseline_path (line 42) | def _baseline_path() -> Path: function _openai_client (line 48) | def _openai_client(port: int) -> OpenAI: function _build_server_extra_args (line 52) | def _build_server_extra_args(case: DiffusionTestCase) -> str: function _build_env_vars (line 79) | def _build_env_vars(case: DiffusionTestCase) -> dict[str, str]: function _torch_cleanup (line 85) | def _torch_cleanup() -> None: function _run_case (line 102) | def _run_case(case: DiffusionTestCase) -> dict: function main (line 154) | def main() -> int: FILE: python/sglang/multimodal_gen/test/server/ascend/test_server_1_npu.py class TestDiffusionServerOneNpu (line 23) | class TestDiffusionServerOneNpu(DiffusionServerBase): method case (line 27) | def case(self, request) -> DiffusionTestCase: FILE: python/sglang/multimodal_gen/test/server/ascend/test_server_2_npu.py class TestDiffusionServerTwoNpu (line 23) | class TestDiffusionServerTwoNpu(DiffusionServerBase): method case (line 27) | def case(self, request) -> DiffusionTestCase: FILE: python/sglang/multimodal_gen/test/server/ascend/test_server_8_npu.py class TestDiffusionServerEightNpu (line 25) | class TestDiffusionServerEightNpu(DiffusionServerBase): method case (line 29) | def case(self, request) -> DiffusionTestCase: FILE: python/sglang/multimodal_gen/test/server/conftest.py function pytest_configure (line 8) | def pytest_configure(config): function add_perf_results (line 18) | def add_perf_results(config, results: list): function perf_config (line 29) | def perf_config(request): function _write_github_step_summary (line 34) | def _write_github_step_summary(content: str): function _write_results_json (line 42) | def _write_results_json(results: list, output_path: str = "diffusion-res... function _generate_diffusion_markdown_report (line 54) | def _generate_diffusion_markdown_report(results: list) -> str: function pytest_sessionfinish (line 97) | def pytest_sessionfinish(session): FILE: python/sglang/multimodal_gen/test/server/test_server_2_gpu_a.py class TestDiffusionServerTwoGpu (line 19) | class TestDiffusionServerTwoGpu(DiffusionServerBase): method case (line 23) | def case(self, request) -> DiffusionTestCase: FILE: python/sglang/multimodal_gen/test/server/test_server_2_gpu_b.py class TestDiffusionServerTwoGpu (line 19) | class TestDiffusionServerTwoGpu(DiffusionServerBase): method case (line 23) | def case(self, request) -> DiffusionTestCase: FILE: python/sglang/multimodal_gen/test/server/test_server_a.py class TestDiffusionServerOneGpu (line 25) | class TestDiffusionServerOneGpu(DiffusionServerBase): method case (line 29) | def case(self, request) -> DiffusionTestCase: FILE: python/sglang/multimodal_gen/test/server/test_server_b.py class TestDiffusionServerOneGpu (line 25) | class TestDiffusionServerOneGpu(DiffusionServerBase): method case (line 29) | def case(self, request) -> DiffusionTestCase: FILE: python/sglang/multimodal_gen/test/server/test_server_common.py function diffusion_server (line 47) | def diffusion_server(case: DiffusionTestCase) -> ServerContext: class DiffusionServerBase (line 137) | class DiffusionServerBase: method setup_class (line 149) | def setup_class(cls): method teardown_class (line 154) | def teardown_class(cls): method _capture_pytest_config (line 186) | def _capture_pytest_config(self, request): method _client (line 190) | def _client(self, ctx: ServerContext) -> OpenAI: method run_and_collect (line 197) | def run_and_collect( method _validate_and_record (line 222) | def _validate_and_record( method _check_for_improvement (line 303) | def _check_for_improvement( method _dump_baseline_for_testcase (line 381) | def _dump_baseline_for_testcase( method _save_gt_output (line 420) | def _save_gt_output( method _test_lora_api_functionality (line 474) | def _test_lora_api_functionality( method _test_lora_dynamic_switch_e2e (line 534) | def _test_lora_dynamic_switch_e2e( method _test_dynamic_lora_loading (line 593) | def _test_dynamic_lora_loading( method _test_multi_lora_e2e (line 621) | def _test_multi_lora_e2e( method _test_v1_models_endpoint (line 695) | def _test_v1_models_endpoint( method _test_t2v_rejects_input_reference (line 784) | def _test_t2v_rejects_input_reference( method test_diffusion_generation (line 815) | def test_diffusion_generation( FILE: python/sglang/multimodal_gen/test/server/test_server_utils.py function download_image_from_url (line 58) | def download_image_from_url(url: str) -> Path: function parse_dimensions (line 89) | def parse_dimensions(size_string: str | None) -> tuple[int | None, int |... class ServerContext (line 133) | class ServerContext: method cleanup (line 145) | def cleanup(self) -> None: method _cleanup_hf_cache_if_not_persistent (line 165) | def _cleanup_hf_cache_if_not_persistent(self) -> None: method _cleanup_rocm_gpu_memory (line 219) | def _cleanup_rocm_gpu_memory(self) -> None: class ServerManager (line 249) | class ServerManager: method __init__ (line 252) | def __init__( method _wait_for_rocm_gpu_memory_clear (line 266) | def _wait_for_rocm_gpu_memory_clear(self, max_wait: float = 60.0) -> N... method start (line 325) | def start(self) -> ServerContext: method _wait_for_ready (line 417) | def _wait_for_ready(self, process: subprocess.Popen, stdout_path: Path... method _get_log_tail (line 450) | def _get_log_tail(path: Path, lines: int = 200) -> str: class PerformanceValidator (line 459) | class PerformanceValidator: method __init__ (line 464) | def __init__( method _assert_le (line 477) | def _assert_le( method validate (line 522) | def validate( method collect_metrics (line 537) | def collect_metrics( method _validate_e2e (line 543) | def _validate_e2e(self, summary: PerformanceSummary) -> None: method _validate_denoise_agg (line 553) | def _validate_denoise_agg(self, summary: PerformanceSummary) -> None: method _validate_denoise_steps (line 570) | def _validate_denoise_steps(self, summary: PerformanceSummary) -> None: method _validate_stages (line 586) | def _validate_stages(self, summary: PerformanceSummary) -> None: class VideoPerformanceValidator (line 609) | class VideoPerformanceValidator(PerformanceValidator): method validate (line 614) | def validate( method _validate_frame_rate (line 632) | def _validate_frame_rate(self, summary: PerformanceSummary) -> None: class MeshValidator (line 644) | class MeshValidator(PerformanceValidator): function _download_reference_mesh (line 656) | def _download_reference_mesh(url: str) -> Path: function validate_mesh_correctness (line 673) | def validate_mesh_correctness( function get_generate_fn (line 740) | def get_generate_fn( FILE: python/sglang/multimodal_gen/test/server/test_update_weights_from_disk.py function _resolve_active_model_pairs (line 198) | def _resolve_active_model_pairs() -> list[tuple[str, str]]: function _compute_checksum_from_disk (line 222) | def _compute_checksum_from_disk(model_path: str, module_name: str) -> str: function _clone_model_with_modified_module (line 243) | def _clone_model_with_modified_module( function _truncate_safetensor (line 284) | def _truncate_safetensor(src_file: str, dst_file: str) -> None: function _perturb_safetensor (line 297) | def _perturb_safetensor(src_file: str, dst_file: str) -> None: class _UpdateWeightsApiMixin (line 307) | class _UpdateWeightsApiMixin: method _update_weights (line 308) | def _update_weights( method _get_weights_checksum (line 326) | def _get_weights_checksum( method _assert_server_matches_model (line 345) | def _assert_server_matches_model( class TestUpdateWeightsFromDisk (line 362) | class TestUpdateWeightsFromDisk(_UpdateWeightsApiMixin): method diffusion_server_no_offload (line 369) | def diffusion_server_no_offload(self, request): method test_update_weights_from_disk_default (line 437) | def test_update_weights_from_disk_default(self, diffusion_server_no_of... method test_update_weights_specific_modules (line 452) | def test_update_weights_specific_modules(self, diffusion_server_no_off... method test_update_weights_nonexistent_model (line 494) | def test_update_weights_nonexistent_model(self, diffusion_server_no_of... method test_update_weights_missing_model_path (line 512) | def test_update_weights_missing_model_path(self, diffusion_server_no_o... method test_update_weights_nonexistent_module (line 530) | def test_update_weights_nonexistent_module(self, diffusion_server_no_o... method test_corrupted_weights_rollback (line 550) | def test_corrupted_weights_rollback(self, diffusion_server_no_offload): class TestUpdateWeightsFromDiskWithOffload (line 620) | class TestUpdateWeightsFromDiskWithOffload(_UpdateWeightsApiMixin): method diffusion_server_with_offload (line 624) | def diffusion_server_with_offload(self, request): method test_update_weights_with_offload_enabled (line 659) | def test_update_weights_with_offload_enabled(self, diffusion_server_wi... FILE: python/sglang/multimodal_gen/test/server/testcase_configs.py class ToleranceConfig (line 51) | class ToleranceConfig: method load_profile (line 61) | def load_profile(cls, all_tolerances: dict, profile_name: str) -> Tole... class ScenarioConfig (line 103) | class ScenarioConfig: class BaselineConfig (line 114) | class BaselineConfig: method load (line 123) | def load(cls, path: Path) -> BaselineConfig: method update (line 153) | def update(self, path: Path): class DiffusionServerArgs (line 173) | class DiffusionServerArgs: method __post_init__ (line 204) | def __post_init__(self): class DiffusionSamplingParams (line 214) | class DiffusionSamplingParams: class DiffusionTestCase (line 242) | class DiffusionTestCase: function sample_step_indices (line 251) | def sample_step_indices( class PerformanceSummary (line 266) | class PerformanceSummary: method from_req_perf_record (line 282) | def from_req_perf_record( FILE: python/sglang/multimodal_gen/test/slack_utils.py function _get_status_message (line 51) | def _get_status_message(run_id, current_case_id, thread_messages=None): function upload_file_to_slack (line 98) | def upload_file_to_slack( FILE: python/sglang/multimodal_gen/test/test_utils.py function print_value_formatted (line 67) | def print_value_formatted(description: str, value: int | float | str): function print_divider (line 84) | def print_divider(length: int, char: str = "-"): function is_image_url (line 89) | def is_image_url(image_path: str | Path | None) -> bool: function probe_port (line 98) | def probe_port(host="127.0.0.1", port=30010, timeout=2.0) -> bool: function is_in_ci (line 108) | def is_in_ci() -> bool: function get_dynamic_server_port (line 112) | def get_dynamic_server_port() -> int: function find_free_port (line 129) | def find_free_port(host: str = "127.0.0.1") -> int: function wait_for_server_health (line 136) | def wait_for_server_health( function post_json (line 161) | def post_json( function query_gpu_mem_used_mib (line 176) | def query_gpu_mem_used_mib(gpu_index: int = 0, required: bool = False) -... function require_gpu_mem_query (line 201) | def require_gpu_mem_query(gpu_index: int = 0) -> int: function assert_gpu_mem_changed (line 212) | def assert_gpu_mem_changed( function is_mp4 (line 230) | def is_mp4(data: bytes) -> bool: function is_jpeg (line 237) | def is_jpeg(data: bytes) -> bool: function is_png (line 242) | def is_png(data): function is_webp (line 247) | def is_webp(data: bytes) -> bool: function detect_image_format (line 252) | def detect_image_format(data: bytes) -> str: function get_expected_image_format (line 265) | def get_expected_image_format( function wait_for_port (line 284) | def wait_for_port(host="127.0.0.1", port=30010, deadline=300.0, interval... function check_image_size (line 294) | def check_image_size(ut, image, width, height): function get_perf_log_dir (line 299) | def get_perf_log_dir() -> Path: function _ensure_log_path (line 310) | def _ensure_log_path(log_dir: Path) -> Path: function clear_perf_log (line 315) | def clear_perf_log(log_dir: Path) -> Path: function prepare_perf_log (line 324) | def prepare_perf_log() -> tuple[Path, Path]: function read_perf_logs (line 331) | def read_perf_logs(log_path: Path) -> list[RequestPerfRecord]: function wait_for_req_perf_record (line 348) | def wait_for_req_perf_record( function validate_image (line 373) | def validate_image(b64_json: str) -> None: function validate_video (line 379) | def validate_video(b64_json: str) -> None: function validate_openai_video (line 386) | def validate_openai_video(video_bytes: bytes) -> None: function validate_image_file (line 392) | def validate_image_file( function _get_video_dimensions_from_metadata (line 444) | def _get_video_dimensions_from_metadata( function _get_video_dimensions_from_frame (line 464) | def _get_video_dimensions_from_frame(cap: cv2.VideoCapture) -> tuple[int... function get_video_dimensions (line 483) | def get_video_dimensions(file_path: str) -> tuple[int, int]: function get_video_frame_count (line 505) | def get_video_frame_count(file_path: str) -> int: function validate_video_file (line 521) | def validate_video_file( function output_format_to_ext (line 560) | def output_format_to_ext(output_format: str | None) -> str: function _consistency_gt_filenames (line 572) | def _consistency_gt_filenames( function extract_key_frames_from_video (line 587) | def extract_key_frames_from_video( function image_bytes_to_numpy (line 639) | def image_bytes_to_numpy(image_bytes: bytes) -> np.ndarray: FILE: python/sglang/multimodal_gen/test/unit/test_lora_format_adapter.py function download_lora (line 32) | def download_lora( function is_diffusers_style_keys (line 59) | def is_diffusers_style_keys( function run_single_test (line 130) | def run_single_test( function _run_all_tests (line 172) | def _run_all_tests() -> List[Dict]: function _print_summary (line 286) | def _print_summary(results: List[Dict]) -> None: function main (line 319) | def main() -> None: class TestLoRAFormatAdapter (line 327) | class TestLoRAFormatAdapter: method test_lora_format_adapter_all_formats (line 328) | def test_lora_format_adapter_all_formats(self): FILE: python/sglang/multimodal_gen/test/unit/test_sampling_params.py class TestSamplingParamsValidate (line 13) | class TestSamplingParamsValidate(unittest.TestCase): method test_prompt_path_suffix (line 14) | def test_prompt_path_suffix(self): method test_num_outputs_per_prompt_must_be_positive (line 18) | def test_num_outputs_per_prompt_must_be_positive(self): method test_fps_must_be_positive_int (line 22) | def test_fps_must_be_positive_int(self): method test_num_inference_steps_optional_but_if_set_must_be_positive (line 28) | def test_num_inference_steps_optional_but_if_set_must_be_positive(self): method test_guidance_scale_must_be_finite_non_negative_if_set (line 33) | def test_guidance_scale_must_be_finite_non_negative_if_set(self): method test_guidance_rescale_must_be_finite_non_negative (line 40) | def test_guidance_rescale_must_be_finite_non_negative(self): method test_boundary_ratio_range (line 46) | def test_boundary_ratio_range(self): class TestSamplingParamsSubclass (line 54) | class TestSamplingParamsSubclass(unittest.TestCase): method test_flux_defaults_resolution_when_not_provided (line 55) | def test_flux_defaults_resolution_when_not_provided(self): method test_flux_preserves_user_resolution (line 61) | def test_flux_preserves_user_resolution(self): method test_diffusers_generic_calls_base_post_init (line 67) | def test_diffusers_generic_calls_base_post_init(self): class TestSamplingParamsCliArgs (line 72) | class TestSamplingParamsCliArgs(unittest.TestCase): method _parse_cli_kwargs (line 73) | def _parse_cli_kwargs(self, argv: list[str]) -> dict: method _make_qwen_image_params (line 79) | def _make_qwen_image_params(self, argv: list[str]) -> QwenImageSamplin... method test_get_cli_args_drops_unset_sampling_params (line 82) | def test_get_cli_args_drops_unset_sampling_params(self): method test_get_cli_args_keeps_explicit_sampling_params (line 85) | def test_get_cli_args_keeps_explicit_sampling_params(self): method test_qwen_image_cli_path_preserves_model_defaults (line 100) | def test_qwen_image_cli_path_preserves_model_defaults(self): method test_qwen_image_cli_path_allows_explicit_override_to_base_defaults (line 106) | def test_qwen_image_cli_path_allows_explicit_override_to_base_defaults... method test_merge_allows_explicit_field_matching_base_default (line 119) | def test_merge_allows_explicit_field_matching_base_default(self): FILE: python/sglang/multimodal_gen/test/unit/test_server_args.py class TestServerArgsPathExpansion (line 15) | class TestServerArgsPathExpansion(unittest.TestCase): method _from_dict_without_model_resolution (line 16) | def _from_dict_without_model_resolution(self, kwargs): method test_tilde_model_path_is_expanded (line 22) | def test_tilde_model_path_is_expanded(self): method test_absolute_path_is_unchanged (line 30) | def test_absolute_path_is_unchanged(self): method test_component_paths_are_expanded_before_pipeline_resolution (line 36) | def test_component_paths_are_expanded_before_pipeline_resolution(self): class TestModelIdResolution (line 49) | class TestModelIdResolution(unittest.TestCase): method setUp (line 50) | def setUp(self): method test_model_id_overrides_arbitrary_local_path (line 53) | def test_model_id_overrides_arbitrary_local_path(self): method test_model_id_works_after_tilde_expansion (line 61) | def test_model_id_works_after_tilde_expansion(self): method test_model_id_unknown_falls_back_without_crash (line 68) | def test_model_id_unknown_falls_back_without_crash(self): class TestPipelineResolutionCliOverride (line 75) | class TestPipelineResolutionCliOverride(unittest.TestCase): method setUp (line 76) | def setUp(self): method test_resolution_flag_overrides_qwen_image_layered_pipeline_config (line 79) | def test_resolution_flag_overrides_qwen_image_layered_pipeline_config(... FILE: python/sglang/multimodal_gen/test/unit/test_storage.py function _create_temp_file (line 18) | def _create_temp_file(tmp_path, name="test.png", content=b"\x89PNG\r\n\x... function test_upload_file_success (line 28) | def test_upload_file_success(tmp_path): function test_upload_and_cleanup (line 56) | def test_upload_and_cleanup(tmp_path): function test_upload_failure_preserves_file (line 75) | def test_upload_failure_preserves_file(tmp_path): function test_disabled_storage_returns_none (line 94) | def test_disabled_storage_returns_none(tmp_path): function test_aws_url_with_region (line 110) | def test_aws_url_with_region(tmp_path): function test_aws_url_default_region (line 127) | def test_aws_url_default_region(tmp_path): function test_custom_endpoint_url (line 144) | def test_custom_endpoint_url(tmp_path): function test_content_type_detection (line 162) | def test_content_type_detection(tmp_path): function test_integration_with_moto (line 199) | def test_integration_with_moto(tmp_path): FILE: python/sglang/multimodal_gen/third_party/pynvml.py class NVMLLibraryMismatchError (line 1206) | class NVMLLibraryMismatchError(Exception): class NVMLError (line 1211) | class NVMLError(Exception): method __new__ (line 1237) | def __new__(typ, value): method __str__ (line 1248) | def __str__(self): method __eq__ (line 1258) | def __eq__(self, other): function nvmlExceptionClass (line 1262) | def nvmlExceptionClass(nvmlErrorCode): function _extractNVMLErrorsAsClasses (line 1268) | def _extractNVMLErrorsAsClasses(): function _nvmlCheckReturn (line 1303) | def _nvmlCheckReturn(ret): function _nvmlGetFunctionPointer (line 1315) | def _nvmlGetFunctionPointer(name): class nvmlFriendlyObject (line 1340) | class nvmlFriendlyObject(object): method __init__ (line 1341) | def __init__(self, dictionary): method __str__ (line 1345) | def __str__(self): function nvmlStructToFriendlyObject (line 1349) | def nvmlStructToFriendlyObject(struct): function nvmlFriendlyObjectToStruct (line 1361) | def nvmlFriendlyObjectToStruct(obj, model): class struct_c_nvmlUnit_t (line 1374) | class struct_c_nvmlUnit_t(Structure): class _PrintableStructure (line 1381) | class _PrintableStructure(Structure): method __str__ (line 1403) | def __str__(self): method __getattribute__ (line 1416) | def __getattribute__(self, name): method __setattr__ (line 1428) | def __setattr__(self, name, value): class c_nvmlUnitInfo_t (line 1436) | class c_nvmlUnitInfo_t(_PrintableStructure): class c_nvmlC2cModeInfo_v1_t (line 1445) | class c_nvmlC2cModeInfo_v1_t(_PrintableStructure): class c_nvmlLedState_t (line 1452) | class c_nvmlLedState_t(_PrintableStructure): class c_nvmlPSUInfo_t (line 1459) | class c_nvmlPSUInfo_t(_PrintableStructure): class c_nvmlUnitFanInfo_t (line 1468) | class c_nvmlUnitFanInfo_t(_PrintableStructure): class c_nvmlUnitFanSpeeds_t (line 1475) | class c_nvmlUnitFanSpeeds_t(_PrintableStructure): class struct_c_nvmlDevice_t (line 1480) | class struct_c_nvmlDevice_t(Structure): class nvmlPciInfoExt_v1_t (line 1487) | class nvmlPciInfoExt_v1_t(_PrintableStructure): class nvmlPciInfo_v2_t (line 1515) | class nvmlPciInfo_v2_t(_PrintableStructure): class nvmlPciInfo_t (line 1538) | class nvmlPciInfo_t(_PrintableStructure): class c_nvmlSystemDriverBranchInfo_v1_t (line 1561) | class c_nvmlSystemDriverBranchInfo_v1_t(_PrintableStructure): class c_nvmlExcludedDeviceInfo_t (line 1571) | class c_nvmlExcludedDeviceInfo_t(_PrintableStructure): class nvmlNvLinkUtilizationControl_t (line 1575) | class nvmlNvLinkUtilizationControl_t(_PrintableStructure): class c_nvmlMemory_t (line 1582) | class c_nvmlMemory_t(_PrintableStructure): class c_nvmlMemory_v2_t (line 1591) | class c_nvmlMemory_v2_t(_PrintableStructure): class c_nvmlBAR1Memory_t (line 1605) | class c_nvmlBAR1Memory_t(_PrintableStructure): class nvmlClkMonFaultInfo_t (line 1614) | class nvmlClkMonFaultInfo_t(Structure): class nvmlClkMonStatus_t (line 1621) | class nvmlClkMonStatus_t(Structure): class c_nvmlProcessInfo_v2_t (line 1640) | class c_nvmlProcessInfo_v2_t(_PrintableStructure): class c_nvmlProcessDetail_v1_t (line 1660) | class c_nvmlProcessDetail_v1_t(Structure): class c_nvmlProcessDetailList_v1_t (line 1670) | class c_nvmlProcessDetailList_v1_t(_PrintableStructure): class c_nvmlBridgeChipInfo_t (line 1685) | class c_nvmlBridgeChipInfo_t(_PrintableStructure): class c_nvmlBridgeChipHierarchy_t (line 1692) | class c_nvmlBridgeChipHierarchy_t(_PrintableStructure): class c_nvmlEccErrorCounts_t (line 1699) | class c_nvmlEccErrorCounts_t(_PrintableStructure): class c_nvmlUtilization_t (line 1708) | class c_nvmlUtilization_t(_PrintableStructure): class c_nvmlHwbcEntry_t (line 1717) | class c_nvmlHwbcEntry_t(_PrintableStructure): class c_nvmlValue_t (line 1724) | class c_nvmlValue_t(Union): class c_nvmlSample_t (line 1736) | class c_nvmlSample_t(_PrintableStructure): class c_nvmlViolationTime_t (line 1743) | class c_nvmlViolationTime_t(_PrintableStructure): class c_nvmlFieldValue_t (line 1750) | class c_nvmlFieldValue_t(_PrintableStructure): class c_nvmlNvlinkSupportedBwModes_v1_t (line 1767) | class c_nvmlNvlinkSupportedBwModes_v1_t(_PrintableStructure): method __init__ (line 1774) | def __init__(self): class c_nvmlNvlinkGetBwMode_v1_t (line 1783) | class c_nvmlNvlinkGetBwMode_v1_t(_PrintableStructure): method __init__ (line 1786) | def __init__(self): class c_nvmlNvlinkSetBwMode_v1_t (line 1793) | class c_nvmlNvlinkSetBwMode_v1_t(_PrintableStructure): method __init__ (line 1796) | def __init__(self): class c_nvmlVgpuHeterogeneousMode_v1_t (line 1800) | class c_nvmlVgpuHeterogeneousMode_v1_t(_PrintableStructure): class c_nvmlVgpuPlacementId_v1_t (line 1810) | class c_nvmlVgpuPlacementId_v1_t(_PrintableStructure): class c_nvmlVgpuPlacementList_v1_t (line 1820) | class c_nvmlVgpuPlacementList_v1_t(_PrintableStructure): class c_nvmlVgpuPlacementList_v2_t (line 1835) | class c_nvmlVgpuPlacementList_v2_t(_PrintableStructure): class c_nvmlVgpuTypeBar1Info_v1_t (line 1848) | class c_nvmlVgpuTypeBar1Info_v1_t(_PrintableStructure): class c_nvmlVgpuInstanceUtilizationSample_t (line 1858) | class c_nvmlVgpuInstanceUtilizationSample_t(_PrintableStructure): class c_nvmlVgpuInstanceUtilizationInfo_v1_t (line 1869) | class c_nvmlVgpuInstanceUtilizationInfo_v1_t(_PrintableStructure): class c_nvmlVgpuInstancesUtilizationInfo_v1_t (line 1882) | class c_nvmlVgpuInstancesUtilizationInfo_v1_t(_PrintableStructure): class c_nvmlVgpuProcessUtilizationSample_t (line 1895) | class c_nvmlVgpuProcessUtilizationSample_t(_PrintableStructure): class c_nvmlVgpuProcessUtilizationInfo_v1_t (line 1908) | class c_nvmlVgpuProcessUtilizationInfo_v1_t(_PrintableStructure): class c_nvmlVgpuProcessesUtilizationInfo_v1_t (line 1923) | class c_nvmlVgpuProcessesUtilizationInfo_v1_t(_PrintableStructure): class nvmlVgpuRuntimeState_v1_t (line 1935) | class nvmlVgpuRuntimeState_v1_t(_PrintableStructure): class c_nvmlVgpuLicenseExpiry_t (line 1945) | class c_nvmlVgpuLicenseExpiry_t(_PrintableStructure): class c_nvmlVgpuLicenseInfo_t (line 1965) | class c_nvmlVgpuLicenseInfo_t(_PrintableStructure): class c_nvmlEncoderSession_t (line 1973) | class c_nvmlEncoderSession_t(_PrintableStructure): class c_nvmlProcessUtilizationSample_t (line 1986) | class c_nvmlProcessUtilizationSample_t(_PrintableStructure): class c_nvmlProcessUtilizationInfo_v1_t (line 1997) | class c_nvmlProcessUtilizationInfo_v1_t(_PrintableStructure): class c_nvmlProcessesUtilizationInfo_v1_t (line 2010) | class c_nvmlProcessesUtilizationInfo_v1_t(_PrintableStructure): class c_nvmlGridLicenseExpiry_t (line 2022) | class c_nvmlGridLicenseExpiry_t(_PrintableStructure): class c_nvmlGridLicensableFeature_v4_t (line 2034) | class c_nvmlGridLicensableFeature_v4_t(_PrintableStructure): class c_nvmlGridLicensableFeatures_v4_t (line 2045) | class c_nvmlGridLicensableFeatures_v4_t(_PrintableStructure): class c_nvmlGridLicensableFeature_v3_t (line 2056) | class c_nvmlGridLicensableFeature_v3_t(_PrintableStructure): class c_nvmlGridLicensableFeatures_v3_t (line 2066) | class c_nvmlGridLicensableFeatures_v3_t(_PrintableStructure): class c_nvmlGridLicensableFeature_v2_t (line 2077) | class c_nvmlGridLicensableFeature_v2_t(_PrintableStructure): class c_nvmlGridLicensableFeatures_v2_t (line 2086) | class c_nvmlGridLicensableFeatures_v2_t(_PrintableStructure): class c_nvmlGridLicensableFeature_t (line 2097) | class c_nvmlGridLicensableFeature_t(_PrintableStructure): class c_nvmlGridLicensableFeatures_t (line 2105) | class c_nvmlGridLicensableFeatures_t(_PrintableStructure): class c_nvmlMarginTemperature_v1_t (line 2116) | class c_nvmlMarginTemperature_v1_t(_PrintableStructure): class struct_c_nvmlEventSet_t (line 2127) | class struct_c_nvmlEventSet_t(Structure): class c_nvmlEventData_t (line 2217) | class c_nvmlEventData_t(_PrintableStructure): class c_nvmlAccountingStats_t (line 2228) | class c_nvmlAccountingStats_t(_PrintableStructure): class c_nvmlVgpuVersion_t (line 2240) | class c_nvmlVgpuVersion_t(Structure): class c_nvmlVgpuMetadata_t (line 2244) | class c_nvmlVgpuMetadata_t(_PrintableStructure): class c_nvmlVgpuPgpuMetadata_t (line 2259) | class c_nvmlVgpuPgpuMetadata_t(_PrintableStructure): class c_nvmlVgpuPgpuCompatibility_t (line 2272) | class c_nvmlVgpuPgpuCompatibility_t(Structure): class c_nvmlVgpuSchedDataWithARR_t (line 2295) | class c_nvmlVgpuSchedDataWithARR_t(_PrintableStructure): class c_nvmlVgpuSchedData_t (line 2302) | class c_nvmlVgpuSchedData_t(_PrintableStructure): class c_nvmlVgpuSchedulerParams_t (line 2308) | class c_nvmlVgpuSchedulerParams_t(Union): class c_nvmlVgpuSchedulerLogEntry_t (line 2315) | class c_nvmlVgpuSchedulerLogEntry_t(_PrintableStructure): class c_nvmlVgpuSchedulerLog_t (line 2326) | class c_nvmlVgpuSchedulerLog_t(_PrintableStructure): class c_nvmlVgpuSchedulerGetState_t (line 2340) | class c_nvmlVgpuSchedulerGetState_t(_PrintableStructure): class c_nvmlVgpuSchedSetDataWithARR_t (line 2348) | class c_nvmlVgpuSchedSetDataWithARR_t(_PrintableStructure): class c_nvmlVgpuSchedSetData_t (line 2355) | class c_nvmlVgpuSchedSetData_t(_PrintableStructure): class c_nvmlVgpuSchedulerSetParams_t (line 2361) | class c_nvmlVgpuSchedulerSetParams_t(Union): class c_nvmlVgpuSchedulerSetState_t (line 2368) | class c_nvmlVgpuSchedulerSetState_t(_PrintableStructure): class c_nvmlVgpuSchedulerCapabilities_t (line 2376) | class c_nvmlVgpuSchedulerCapabilities_t(_PrintableStructure): class c_nvmlFBCStats_t (line 2389) | class c_nvmlFBCStats_t(Structure): class c_nvmlFBCSession_t (line 2397) | class c_nvmlFBCSession_t(_PrintableStructure): class c_nvmlGpuInstancePlacement_t (line 2433) | class c_nvmlGpuInstancePlacement_t(Structure): class c_nvmlGpuInstanceProfileInfo_t (line 2437) | class c_nvmlGpuInstanceProfileInfo_t(Structure): class c_nvmlGpuInstanceProfileInfo_v2_t (line 2456) | class c_nvmlGpuInstanceProfileInfo_v2_t(_PrintableStructure): method __init__ (line 2473) | def __init__(self): class c_nvmlGpuInstanceInfo_t (line 2479) | class c_nvmlGpuInstanceInfo_t(Structure): class struct_c_nvmlGpuInstance_t (line 2488) | class struct_c_nvmlGpuInstance_t(Structure): class c_nvmlComputeInstancePlacement_t (line 2508) | class c_nvmlComputeInstancePlacement_t(Structure): class c_nvmlComputeInstanceProfileInfo_t (line 2512) | class c_nvmlComputeInstanceProfileInfo_t(Structure): class c_nvmlComputeInstanceProfileInfo_v2_t (line 2529) | class c_nvmlComputeInstanceProfileInfo_v2_t(_PrintableStructure): method __init__ (line 2544) | def __init__(self): class c_nvmlComputeInstanceInfo_t (line 2550) | class c_nvmlComputeInstanceInfo_t(Structure): class c_nvmlGpuDynamicPstatesUtilization_t (line 2567) | class c_nvmlGpuDynamicPstatesUtilization_t(Structure): class c_nvmlGpuDynamicPstatesInfo_t (line 2576) | class c_nvmlGpuDynamicPstatesInfo_t(Structure): class c_nvmlGpuThermalSensor_t (line 2620) | class c_nvmlGpuThermalSensor_t(Structure): class c_nvmlGpuThermalSettings_t (line 2630) | class c_nvmlGpuThermalSettings_t(Structure): class c_nvmlCoolerInfo_t (line 2655) | class c_nvmlCoolerInfo_t(_PrintableStructure): function nvmlDeviceGetCoolerInfo (line 2667) | def nvmlDeviceGetCoolerInfo(handle): class struct_c_nvmlComputeInstance_t (line 2677) | class struct_c_nvmlComputeInstance_t(Structure): class c_nvmlDeviceAttributes (line 2684) | class c_nvmlDeviceAttributes(Structure): class c_nvmlRowRemapperHistogramValues (line 2698) | class c_nvmlRowRemapperHistogramValues(Structure): class c_nvmlConfComputeSystemState_t (line 2717) | class c_nvmlConfComputeSystemState_t(Structure): class c_nvmlSystemConfComputeSettings_v1_t (line 2728) | class c_nvmlSystemConfComputeSettings_v1_t(Structure): method __init__ (line 2737) | def __init__(self): class c_nvmlConfComputeSystemCaps_t (line 2743) | class c_nvmlConfComputeSystemCaps_t(Structure): class c_nvmlConfComputeMemSizeInfo_t (line 2750) | class c_nvmlConfComputeMemSizeInfo_t(Structure): class c_nvmlConfComputeGpuCertificate_t (line 2757) | class c_nvmlConfComputeGpuCertificate_t(Structure): class c_nvmlConfComputeGpuAttestationReport_t (line 2766) | class c_nvmlConfComputeGpuAttestationReport_t(Structure): class c_nvmlConfComputeSetKeyRotationThresholdInfo_t (line 2777) | class c_nvmlConfComputeSetKeyRotationThresholdInfo_t(Structure): class c_nvmlConfComputeGetKeyRotationThresholdInfo_t (line 2787) | class c_nvmlConfComputeGetKeyRotationThresholdInfo_t(Structure): function convertStrBytes (line 2798) | def convertStrBytes(func): function throwOnVersionMismatch (line 2827) | def throwOnVersionMismatch(func): function nvmlInitWithFlags (line 2844) | def nvmlInitWithFlags(flags): function nvmlInit (line 2862) | def nvmlInit(): function _LoadNvmlLibrary (line 2867) | def _LoadNvmlLibrary(): function nvmlShutdown (line 2912) | def nvmlShutdown(): function nvmlErrorString (line 2931) | def nvmlErrorString(result): function nvmlSystemGetNVMLVersion (line 2940) | def nvmlSystemGetNVMLVersion(): function nvmlSystemGetCudaDriverVersion (line 2948) | def nvmlSystemGetCudaDriverVersion(): function nvmlSystemGetCudaDriverVersion_v2 (line 2956) | def nvmlSystemGetCudaDriverVersion_v2(): function nvmlSystemGetProcessName (line 2966) | def nvmlSystemGetProcessName(pid): function nvmlSystemGetDriverVersion (line 2975) | def nvmlSystemGetDriverVersion(): function nvmlSystemGetHicVersion (line 2984) | def nvmlSystemGetHicVersion(): function nvmlSystemGetDriverBranch (line 3007) | def nvmlSystemGetDriverBranch(): function nvmlUnitGetCount (line 3017) | def nvmlUnitGetCount(): function nvmlUnitGetHandleByIndex (line 3025) | def nvmlUnitGetHandleByIndex(index): function nvmlUnitGetUnitInfo (line 3034) | def nvmlUnitGetUnitInfo(unit): function nvmlUnitGetLedState (line 3042) | def nvmlUnitGetLedState(unit): function nvmlUnitGetPsuInfo (line 3050) | def nvmlUnitGetPsuInfo(unit): function nvmlUnitGetTemperature (line 3058) | def nvmlUnitGetTemperature(unit, type): function nvmlUnitGetFanSpeedInfo (line 3066) | def nvmlUnitGetFanSpeedInfo(unit): function nvmlUnitGetDeviceCount (line 3075) | def nvmlUnitGetDeviceCount(unit): function nvmlUnitGetDevices (line 3086) | def nvmlUnitGetDevices(unit): function nvmlDeviceGetCount (line 3097) | def nvmlDeviceGetCount(): function nvmlDeviceGetHandleByIndex (line 3105) | def nvmlDeviceGetHandleByIndex(index): function nvmlDeviceGetHandleBySerial (line 3115) | def nvmlDeviceGetHandleBySerial(serial): function nvmlDeviceGetHandleByUUID (line 3125) | def nvmlDeviceGetHandleByUUID(uuid): function nvmlDeviceGetHandleByPciBusId (line 3135) | def nvmlDeviceGetHandleByPciBusId(pciBusId): function nvmlDeviceGetName (line 3145) | def nvmlDeviceGetName(handle): class c_nvmlDevicePerfModes_v1_t (line 3153) | class c_nvmlDevicePerfModes_v1_t(_PrintableStructure): function nvmlDeviceGetPerformanceModes (line 3164) | def nvmlDeviceGetPerformanceModes(handle): class c_nvmlDeviceCurrentClockFreqs_v1_t (line 3173) | class c_nvmlDeviceCurrentClockFreqs_v1_t(_PrintableStructure): function nvmlDeviceGetCurrentClockFreqs (line 3184) | def nvmlDeviceGetCurrentClockFreqs(handle): function nvmlDeviceGetBoardId (line 3193) | def nvmlDeviceGetBoardId(handle): function nvmlDeviceGetMultiGpuBoard (line 3201) | def nvmlDeviceGetMultiGpuBoard(handle): function nvmlDeviceGetBrand (line 3209) | def nvmlDeviceGetBrand(handle): function nvmlDeviceGetC2cModeInfoV1 (line 3217) | def nvmlDeviceGetC2cModeInfoV1(handle): function nvmlDeviceGetC2cModeInfoV (line 3225) | def nvmlDeviceGetC2cModeInfoV(handle): function nvmlDeviceGetBoardPartNumber (line 3230) | def nvmlDeviceGetBoardPartNumber(handle): function nvmlDeviceGetSerial (line 3239) | def nvmlDeviceGetSerial(handle): function nvmlDeviceGetModuleId (line 3247) | def nvmlDeviceGetModuleId(handle, moduleId=c_uint()): function nvmlDeviceGetMemoryAffinity (line 3259) | def nvmlDeviceGetMemoryAffinity(handle, nodeSetSize, scope): function nvmlDeviceGetCpuAffinityWithinScope (line 3268) | def nvmlDeviceGetCpuAffinityWithinScope(handle, cpuSetSize, scope): function nvmlDeviceGetCpuAffinity (line 3277) | def nvmlDeviceGetCpuAffinity(handle, cpuSetSize): function nvmlDeviceSetCpuAffinity (line 3286) | def nvmlDeviceSetCpuAffinity(handle): function nvmlDeviceClearCpuAffinity (line 3293) | def nvmlDeviceClearCpuAffinity(handle): function nvmlDeviceGetNumaNodeId (line 3300) | def nvmlDeviceGetNumaNodeId(handle): function nvmlDeviceGetMinorNumber (line 3308) | def nvmlDeviceGetMinorNumber(handle): function nvmlDeviceGetUUID (line 3317) | def nvmlDeviceGetUUID(handle): function nvmlDeviceGetInforomVersion (line 3326) | def nvmlDeviceGetInforomVersion(handle, infoRomObject): function nvmlDeviceGetInforomImageVersion (line 3341) | def nvmlDeviceGetInforomImageVersion(handle): function nvmlDeviceGetInforomConfigurationChecksum (line 3350) | def nvmlDeviceGetInforomConfigurationChecksum(handle): function nvmlDeviceValidateInforom (line 3359) | def nvmlDeviceValidateInforom(handle): function nvmlDeviceGetLastBBXFlushTime (line 3366) | def nvmlDeviceGetLastBBXFlushTime(handle): function nvmlDeviceGetDisplayMode (line 3375) | def nvmlDeviceGetDisplayMode(handle): function nvmlDeviceGetDisplayActive (line 3383) | def nvmlDeviceGetDisplayActive(handle): function nvmlDeviceGetPersistenceMode (line 3391) | def nvmlDeviceGetPersistenceMode(handle): function nvmlDeviceGetPciInfoExt (line 3399) | def nvmlDeviceGetPciInfoExt(handle, c_info): function nvmlDeviceGetPciInfo_v3 (line 3406) | def nvmlDeviceGetPciInfo_v3(handle): function nvmlDeviceGetPciInfo (line 3414) | def nvmlDeviceGetPciInfo(handle): function nvmlDeviceGetClockInfo (line 3418) | def nvmlDeviceGetClockInfo(handle, type): function nvmlDeviceGetMaxClockInfo (line 3427) | def nvmlDeviceGetMaxClockInfo(handle, type): function nvmlDeviceGetApplicationsClock (line 3436) | def nvmlDeviceGetApplicationsClock(handle, type): function nvmlDeviceGetMaxCustomerBoostClock (line 3444) | def nvmlDeviceGetMaxCustomerBoostClock(handle, type): function nvmlDeviceGetClock (line 3452) | def nvmlDeviceGetClock(handle, type, id): function nvmlDeviceGetDefaultApplicationsClock (line 3461) | def nvmlDeviceGetDefaultApplicationsClock(handle, type): function nvmlDeviceGetSupportedMemoryClocks (line 3470) | def nvmlDeviceGetSupportedMemoryClocks(handle): function nvmlDeviceGetSupportedGraphicsClocks (line 3499) | def nvmlDeviceGetSupportedGraphicsClocks(handle, memoryClockMHz): function nvmlDeviceGetFanSpeed (line 3527) | def nvmlDeviceGetFanSpeed(handle): function nvmlDeviceGetFanSpeed_v2 (line 3535) | def nvmlDeviceGetFanSpeed_v2(handle, fan): class c_nvmlFanSpeedInfo_t (line 3543) | class c_nvmlFanSpeedInfo_t(_PrintableStructure): function nvmlDeviceGetFanSpeedRPM (line 3554) | def nvmlDeviceGetFanSpeedRPM(handle): function nvmlDeviceGetTargetFanSpeed (line 3564) | def nvmlDeviceGetTargetFanSpeed(handle, fan): function nvmlDeviceGetNumFans (line 3572) | def nvmlDeviceGetNumFans(device): function nvmlDeviceSetDefaultFanSpeed_v2 (line 3580) | def nvmlDeviceSetDefaultFanSpeed_v2(handle, index): function nvmlDeviceGetMinMaxFanSpeed (line 3587) | def nvmlDeviceGetMinMaxFanSpeed(handle, minSpeed=c_uint(), maxSpeed=c_ui... function nvmlDeviceGetFanControlPolicy_v2 (line 3597) | def nvmlDeviceGetFanControlPolicy_v2(handle, fan, fanControlPolicy=c_uin... function nvmlDeviceSetFanControlPolicy (line 3606) | def nvmlDeviceSetFanControlPolicy(handle, fan, fanControlPolicy): class c_nvmlTemperature_v1_t (line 3613) | class c_nvmlTemperature_v1_t(_PrintableStructure): function nvmlDeviceGetTemperatureV1 (line 3624) | def nvmlDeviceGetTemperatureV1(handle, sensor): function nvmlDeviceGetTemperatureV (line 3634) | def nvmlDeviceGetTemperatureV(handle, sensor, version=nvmlTemperature_v1): function nvmlDeviceGetTemperature (line 3642) | def nvmlDeviceGetTemperature(handle, sensor): function nvmlDeviceGetTemperatureThreshold (line 3650) | def nvmlDeviceGetTemperatureThreshold(handle, threshold): function nvmlDeviceSetTemperatureThreshold (line 3658) | def nvmlDeviceSetTemperatureThreshold(handle, threshold, temp): function nvmlDeviceGetMarginTemperature (line 3667) | def nvmlDeviceGetMarginTemperature(handle): function nvmlDeviceGetPowerState (line 3677) | def nvmlDeviceGetPowerState(handle): function nvmlDeviceGetPerformanceState (line 3685) | def nvmlDeviceGetPerformanceState(handle): function nvmlDeviceGetPowerManagementMode (line 3693) | def nvmlDeviceGetPowerManagementMode(handle): function nvmlDeviceGetPowerManagementLimit (line 3701) | def nvmlDeviceGetPowerManagementLimit(handle): function nvmlDeviceGetPowerManagementLimitConstraints (line 3710) | def nvmlDeviceGetPowerManagementLimitConstraints(handle): function nvmlDeviceGetPowerManagementDefaultLimit (line 3720) | def nvmlDeviceGetPowerManagementDefaultLimit(handle): function nvmlDeviceGetEnforcedPowerLimit (line 3729) | def nvmlDeviceGetEnforcedPowerLimit(handle): function nvmlDeviceGetPowerUsage (line 3737) | def nvmlDeviceGetPowerUsage(handle): function nvmlDeviceGetTotalEnergyConsumption (line 3745) | def nvmlDeviceGetTotalEnergyConsumption(handle): function nvmlDeviceGetGpuOperationMode (line 3754) | def nvmlDeviceGetGpuOperationMode(handle): function nvmlDeviceGetCurrentGpuOperationMode (line 3764) | def nvmlDeviceGetCurrentGpuOperationMode(handle): function nvmlDeviceGetPendingGpuOperationMode (line 3769) | def nvmlDeviceGetPendingGpuOperationMode(handle): function nvmlDeviceGetMemoryInfo (line 3773) | def nvmlDeviceGetMemoryInfo(handle, version=None): function nvmlDeviceGetBAR1MemoryInfo (line 3786) | def nvmlDeviceGetBAR1MemoryInfo(handle): function nvmlDeviceGetComputeMode (line 3794) | def nvmlDeviceGetComputeMode(handle): function nvmlDeviceGetCudaComputeCapability (line 3802) | def nvmlDeviceGetCudaComputeCapability(handle): function nvmlDeviceGetEccMode (line 3811) | def nvmlDeviceGetEccMode(handle): function nvmlDeviceGetCurrentEccMode (line 3821) | def nvmlDeviceGetCurrentEccMode(handle): function nvmlDeviceGetPendingEccMode (line 3826) | def nvmlDeviceGetPendingEccMode(handle): function nvmlDeviceGetDefaultEccMode (line 3830) | def nvmlDeviceGetDefaultEccMode(handle): function nvmlDeviceGetTotalEccErrors (line 3838) | def nvmlDeviceGetTotalEccErrors(handle, errorType, counterType): function nvmlDeviceGetDetailedEccErrors (line 3852) | def nvmlDeviceGetDetailedEccErrors(handle, errorType, counterType): function nvmlDeviceGetMemoryErrorCounter (line 3866) | def nvmlDeviceGetMemoryErrorCounter(handle, errorType, counterType, loca... function nvmlDeviceGetUtilizationRates (line 3880) | def nvmlDeviceGetUtilizationRates(handle): function nvmlDeviceGetEncoderUtilization (line 3888) | def nvmlDeviceGetEncoderUtilization(handle): function nvmlDeviceGetDecoderUtilization (line 3897) | def nvmlDeviceGetDecoderUtilization(handle): function nvmlDeviceGetJpgUtilization (line 3906) | def nvmlDeviceGetJpgUtilization(handle): function nvmlDeviceGetOfaUtilization (line 3915) | def nvmlDeviceGetOfaUtilization(handle): function nvmlDeviceGetPcieReplayCounter (line 3924) | def nvmlDeviceGetPcieReplayCounter(handle): function nvmlDeviceGetDriverModel (line 3932) | def nvmlDeviceGetDriverModel(handle): function nvmlDeviceGetCurrentDriverModel (line 3942) | def nvmlDeviceGetCurrentDriverModel(handle): function nvmlDeviceGetPendingDriverModel (line 3947) | def nvmlDeviceGetPendingDriverModel(handle): function nvmlDeviceGetVbiosVersion (line 3953) | def nvmlDeviceGetVbiosVersion(handle): function nvmlDeviceGetComputeRunningProcesses_v2 (line 3962) | def nvmlDeviceGetComputeRunningProcesses_v2(handle): function nvmlDeviceGetComputeRunningProcesses_v3 (line 3994) | def nvmlDeviceGetComputeRunningProcesses_v3(handle): function nvmlDeviceGetComputeRunningProcesses (line 4030) | def nvmlDeviceGetComputeRunningProcesses(handle): function nvmlDeviceGetGraphicsRunningProcesses_v2 (line 4034) | def nvmlDeviceGetGraphicsRunningProcesses_v2(handle): function nvmlDeviceGetGraphicsRunningProcesses_v3 (line 4065) | def nvmlDeviceGetGraphicsRunningProcesses_v3(handle): function nvmlDeviceGetGraphicsRunningProcesses (line 4101) | def nvmlDeviceGetGraphicsRunningProcesses(handle): function nvmlDeviceGetMPSComputeRunningProcesses (line 4106) | def nvmlDeviceGetMPSComputeRunningProcesses(handle): function nvmlDeviceGetMPSComputeRunningProcesses_v2 (line 4110) | def nvmlDeviceGetMPSComputeRunningProcesses_v2(handle): function nvmlDeviceGetMPSComputeRunningProcesses_v3 (line 4145) | def nvmlDeviceGetMPSComputeRunningProcesses_v3(handle): function nvmlDeviceGetRunningProcessDetailList (line 4180) | def nvmlDeviceGetRunningProcessDetailList(handle, version, mode): function nvmlDeviceGetAutoBoostedClocksEnabled (line 4218) | def nvmlDeviceGetAutoBoostedClocksEnabled(handle): function nvmlUnitSetLedState (line 4229) | def nvmlUnitSetLedState(unit, color): function nvmlDeviceSetPersistenceMode (line 4236) | def nvmlDeviceSetPersistenceMode(handle, mode): function nvmlDeviceSetComputeMode (line 4243) | def nvmlDeviceSetComputeMode(handle, mode): function nvmlDeviceSetEccMode (line 4250) | def nvmlDeviceSetEccMode(handle, mode): function nvmlDeviceClearEccErrorCounts (line 4257) | def nvmlDeviceClearEccErrorCounts(handle, counterType): function nvmlDeviceSetDriverModel (line 4264) | def nvmlDeviceSetDriverModel(handle, model): function nvmlDeviceSetAutoBoostedClocksEnabled (line 4271) | def nvmlDeviceSetAutoBoostedClocksEnabled(handle, enabled): function nvmlDeviceSetDefaultAutoBoostedClocksEnabled (line 4279) | def nvmlDeviceSetDefaultAutoBoostedClocksEnabled(handle, enabled, flags): function nvmlDeviceSetGpuLockedClocks (line 4287) | def nvmlDeviceSetGpuLockedClocks(handle, minGpuClockMHz, maxGpuClockMHz): function nvmlDeviceResetGpuLockedClocks (line 4294) | def nvmlDeviceResetGpuLockedClocks(handle): function nvmlDeviceSetMemoryLockedClocks (line 4301) | def nvmlDeviceSetMemoryLockedClocks(handle, minMemClockMHz, maxMemClockM... function nvmlDeviceResetMemoryLockedClocks (line 4308) | def nvmlDeviceResetMemoryLockedClocks(handle): function nvmlDeviceGetClkMonStatus (line 4315) | def nvmlDeviceGetClkMonStatus(handle, c_clkMonInfo=nvmlClkMonStatus_t()): function nvmlDeviceSetApplicationsClocks (line 4325) | def nvmlDeviceSetApplicationsClocks(handle, maxMemClockMHz, maxGraphicsC... function nvmlDeviceResetApplicationsClocks (line 4333) | def nvmlDeviceResetApplicationsClocks(handle): function nvmlDeviceSetPowerManagementLimit (line 4341) | def nvmlDeviceSetPowerManagementLimit(handle, limit): function nvmlDeviceSetGpuOperationMode (line 4349) | def nvmlDeviceSetGpuOperationMode(handle, mode): function nvmlEventSetCreate (line 4357) | def nvmlEventSetCreate(): function nvmlDeviceRegisterEvents (line 4366) | def nvmlDeviceRegisterEvents(handle, eventTypes, eventSet): function nvmlDeviceGetSupportedEventTypes (line 4374) | def nvmlDeviceGetSupportedEventTypes(handle): function nvmlEventSetWait_v2 (line 4383) | def nvmlEventSetWait_v2(eventSet, timeoutms): function nvmlEventSetWait (line 4391) | def nvmlEventSetWait(eventSet, timeoutms): function nvmlEventSetFree (line 4396) | def nvmlEventSetFree(eventSet): function nvmlDeviceOnSameBoard (line 4404) | def nvmlDeviceOnSameBoard(handle1, handle2): function nvmlDeviceGetCurrPcieLinkGeneration (line 4413) | def nvmlDeviceGetCurrPcieLinkGeneration(handle): function nvmlDeviceGetMaxPcieLinkGeneration (line 4422) | def nvmlDeviceGetMaxPcieLinkGeneration(handle): function nvmlDeviceGetCurrPcieLinkWidth (line 4431) | def nvmlDeviceGetCurrPcieLinkWidth(handle): function nvmlDeviceGetMaxPcieLinkWidth (line 4440) | def nvmlDeviceGetMaxPcieLinkWidth(handle): function nvmlDeviceGetGpuMaxPcieLinkGeneration (line 4448) | def nvmlDeviceGetGpuMaxPcieLinkGeneration(handle): function nvmlDeviceGetSupportedClocksThrottleReasons (line 4457) | def nvmlDeviceGetSupportedClocksThrottleReasons(handle): function nvmlDeviceGetSupportedClocksEventReasons (line 4465) | def nvmlDeviceGetSupportedClocksEventReasons(handle): function nvmlDeviceGetCurrentClocksThrottleReasons (line 4474) | def nvmlDeviceGetCurrentClocksThrottleReasons(handle): function nvmlDeviceGetCurrentClocksEventReasons (line 4482) | def nvmlDeviceGetCurrentClocksEventReasons(handle): function nvmlDeviceGetIndex (line 4491) | def nvmlDeviceGetIndex(handle): function nvmlDeviceGetAccountingMode (line 4500) | def nvmlDeviceGetAccountingMode(handle): function nvmlDeviceSetAccountingMode (line 4508) | def nvmlDeviceSetAccountingMode(handle, mode): function nvmlDeviceClearAccountingPids (line 4515) | def nvmlDeviceClearAccountingPids(handle): function nvmlDeviceGetAccountingStats (line 4522) | def nvmlDeviceGetAccountingStats(handle, pid): function nvmlDeviceGetAccountingPids (line 4533) | def nvmlDeviceGetAccountingPids(handle): function nvmlDeviceGetAccountingBufferSize (line 4542) | def nvmlDeviceGetAccountingBufferSize(handle): function nvmlDeviceGetRetiredPages (line 4550) | def nvmlDeviceGetRetiredPages(device, sourceFilter): function nvmlDeviceGetRetiredPages_v2 (line 4573) | def nvmlDeviceGetRetiredPages_v2(device, sourceFilter): function nvmlDeviceGetRetiredPagesPendingStatus (line 4601) | def nvmlDeviceGetRetiredPagesPendingStatus(device): function nvmlDeviceGetAPIRestriction (line 4609) | def nvmlDeviceGetAPIRestriction(device, apiType): function nvmlDeviceSetAPIRestriction (line 4617) | def nvmlDeviceSetAPIRestriction(handle, apiType, isRestricted): function nvmlDeviceGetBridgeChipInfo (line 4624) | def nvmlDeviceGetBridgeChipInfo(handle): function nvmlDeviceGetSamples (line 4632) | def nvmlDeviceGetSamples(device, sampling_type, timeStamp): function nvmlDeviceGetViolationStatus (line 4667) | def nvmlDeviceGetViolationStatus(device, perfPolicyType): function nvmlDeviceGetPcieThroughput (line 4678) | def nvmlDeviceGetPcieThroughput(device, counter): function nvmlSystemGetTopologyGpuSet (line 4686) | def nvmlSystemGetTopologyGpuSet(cpuNumber): function nvmlDeviceGetTopologyNearestGpus (line 4703) | def nvmlDeviceGetTopologyNearestGpus(device, level): function nvmlDeviceGetTopologyCommonAncestor (line 4721) | def nvmlDeviceGetTopologyCommonAncestor(device1, device2): function nvmlDeviceGetNvLinkUtilizationCounter (line 4729) | def nvmlDeviceGetNvLinkUtilizationCounter(device, link, counter): function nvmlDeviceFreezeNvLinkUtilizationCounter (line 4738) | def nvmlDeviceFreezeNvLinkUtilizationCounter(device, link, counter, free... function nvmlDeviceResetNvLinkUtilizationCounter (line 4745) | def nvmlDeviceResetNvLinkUtilizationCounter(device, link, counter): function nvmlDeviceSetNvLinkUtilizationControl (line 4752) | def nvmlDeviceSetNvLinkUtilizationControl(device, link, counter, control... function nvmlDeviceGetNvLinkUtilizationControl (line 4759) | def nvmlDeviceGetNvLinkUtilizationControl(device, link, counter): function nvmlDeviceGetNvLinkCapability (line 4767) | def nvmlDeviceGetNvLinkCapability(device, link, capability): function nvmlDeviceGetNvLinkErrorCounter (line 4775) | def nvmlDeviceGetNvLinkErrorCounter(device, link, counter): function nvmlDeviceResetNvLinkErrorCounters (line 4783) | def nvmlDeviceResetNvLinkErrorCounters(device, link): function nvmlDeviceGetNvLinkRemotePciInfo (line 4790) | def nvmlDeviceGetNvLinkRemotePciInfo(device, link): function nvmlDeviceGetNvLinkRemoteDeviceType (line 4798) | def nvmlDeviceGetNvLinkRemoteDeviceType(handle, link): function nvmlDeviceGetNvLinkState (line 4806) | def nvmlDeviceGetNvLinkState(device, link): function nvmlDeviceGetNvLinkVersion (line 4814) | def nvmlDeviceGetNvLinkVersion(device, link): function nvmlDeviceModifyDrainState (line 4822) | def nvmlDeviceModifyDrainState(pciInfo, newState): function nvmlDeviceQueryDrainState (line 4829) | def nvmlDeviceQueryDrainState(pciInfo): function nvmlDeviceRemoveGpu (line 4837) | def nvmlDeviceRemoveGpu(pciInfo): function nvmlDeviceDiscoverGpus (line 4844) | def nvmlDeviceDiscoverGpus(pciInfo): function nvmlDeviceGetFieldValues (line 4851) | def nvmlDeviceGetFieldValues(handle, fieldIds): function nvmlDeviceClearFieldValues (line 4867) | def nvmlDeviceClearFieldValues(handle, fieldIds): function nvmlDeviceGetVirtualizationMode (line 4883) | def nvmlDeviceGetVirtualizationMode(handle): function nvmlDeviceSetVirtualizationMode (line 4891) | def nvmlDeviceSetVirtualizationMode(handle, virtualization_mode): function nvmlDeviceGetVgpuHeterogeneousMode (line 4896) | def nvmlDeviceGetVgpuHeterogeneousMode(handle): function nvmlDeviceSetVgpuHeterogeneousMode (line 4905) | def nvmlDeviceSetVgpuHeterogeneousMode(handle, heterogeneous_mode): function nvmlVgpuInstanceGetPlacementId (line 4915) | def nvmlVgpuInstanceGetPlacementId(vgpuInstance): function nvmlDeviceGetVgpuTypeSupportedPlacements (line 4924) | def nvmlDeviceGetVgpuTypeSupportedPlacements(handle, vgpuTypeId, mode=0,... function nvmlDeviceGetVgpuTypeCreatablePlacements (line 4949) | def nvmlDeviceGetVgpuTypeCreatablePlacements(handle, vgpuTypeId, version... function nvmlGetVgpuDriverCapabilities (line 4971) | def nvmlGetVgpuDriverCapabilities(capability): function nvmlDeviceGetVgpuCapabilities (line 4979) | def nvmlDeviceGetVgpuCapabilities(handle, capability): function nvmlDeviceSetVgpuCapabilities (line 4987) | def nvmlDeviceSetVgpuCapabilities(handle, capability, state): function nvmlDeviceGetSupportedVgpus (line 4994) | def nvmlDeviceGetSupportedVgpus(handle): function nvmlDeviceGetCreatableVgpus (line 5021) | def nvmlDeviceGetCreatableVgpus(handle): function nvmlVgpuTypeGetGpuInstanceProfileId (line 5048) | def nvmlVgpuTypeGetGpuInstanceProfileId(vgpuTypeId): function nvmlVgpuTypeGetClass (line 5057) | def nvmlVgpuTypeGetClass(vgpuTypeId): function nvmlVgpuTypeGetName (line 5067) | def nvmlVgpuTypeGetName(vgpuTypeId): function nvmlVgpuTypeGetDeviceID (line 5076) | def nvmlVgpuTypeGetDeviceID(vgpuTypeId): function nvmlVgpuTypeGetFramebufferSize (line 5085) | def nvmlVgpuTypeGetFramebufferSize(vgpuTypeId): function nvmlVgpuTypeGetNumDisplayHeads (line 5093) | def nvmlVgpuTypeGetNumDisplayHeads(vgpuTypeId): function nvmlVgpuTypeGetResolution (line 5101) | def nvmlVgpuTypeGetResolution(vgpuTypeId): function nvmlVgpuTypeGetLicense (line 5111) | def nvmlVgpuTypeGetLicense(vgpuTypeId): function nvmlVgpuTypeGetFrameRateLimit (line 5120) | def nvmlVgpuTypeGetFrameRateLimit(vgpuTypeId): function nvmlVgpuTypeGetGspHeapSize (line 5128) | def nvmlVgpuTypeGetGspHeapSize(vgpuTypeId): function nvmlVgpuTypeGetFbReservation (line 5136) | def nvmlVgpuTypeGetFbReservation(vgpuTypeId): function nvmlVgpuInstanceGetRuntimeStateSize (line 5144) | def nvmlVgpuInstanceGetRuntimeStateSize(vgpuInstance): function nvmlVgpuTypeGetMaxInstances (line 5153) | def nvmlVgpuTypeGetMaxInstances(handle, vgpuTypeId): function nvmlVgpuTypeGetMaxInstancesPerVm (line 5161) | def nvmlVgpuTypeGetMaxInstancesPerVm(vgpuTypeId): function nvmlVgpuTypeGetBAR1Info (line 5169) | def nvmlVgpuTypeGetBAR1Info(vgpuTypeId): function nvmlDeviceGetActiveVgpus (line 5178) | def nvmlDeviceGetActiveVgpus(handle): function nvmlVgpuInstanceGetVmID (line 5206) | def nvmlVgpuInstanceGetVmID(vgpuInstance): function nvmlVgpuInstanceGetUUID (line 5217) | def nvmlVgpuInstanceGetUUID(vgpuInstance): function nvmlVgpuInstanceGetMdevUUID (line 5227) | def nvmlVgpuInstanceGetMdevUUID(vgpuInstance): function nvmlVgpuInstanceGetVmDriverVersion (line 5237) | def nvmlVgpuInstanceGetVmDriverVersion(vgpuInstance): function nvmlVgpuInstanceGetLicenseStatus (line 5246) | def nvmlVgpuInstanceGetLicenseStatus(vgpuInstance): function nvmlVgpuInstanceGetLicenseInfo_v2 (line 5254) | def nvmlVgpuInstanceGetLicenseInfo_v2(vgpuInstance): function nvmlVgpuInstanceGetLicenseInfo (line 5262) | def nvmlVgpuInstanceGetLicenseInfo(vgpuInstance): function nvmlVgpuInstanceGetFrameRateLimit (line 5266) | def nvmlVgpuInstanceGetFrameRateLimit(vgpuInstance): function nvmlVgpuInstanceGetEccMode (line 5274) | def nvmlVgpuInstanceGetEccMode(vgpuInstance): function nvmlVgpuInstanceGetType (line 5282) | def nvmlVgpuInstanceGetType(vgpuInstance): function nvmlVgpuInstanceGetEncoderCapacity (line 5290) | def nvmlVgpuInstanceGetEncoderCapacity(vgpuInstance): function nvmlVgpuInstanceSetEncoderCapacity (line 5298) | def nvmlVgpuInstanceSetEncoderCapacity(vgpuInstance, encoder_capacity): function nvmlVgpuInstanceGetFbUsage (line 5303) | def nvmlVgpuInstanceGetFbUsage(vgpuInstance): function nvmlVgpuTypeGetCapabilities (line 5311) | def nvmlVgpuTypeGetCapabilities(vgpuTypeId, capability): function nvmlVgpuInstanceGetGpuInstanceId (line 5319) | def nvmlVgpuInstanceGetGpuInstanceId(vgpuInstance): function nvmlVgpuInstanceGetGpuPciId (line 5328) | def nvmlVgpuInstanceGetGpuPciId(vgpuInstance): function nvmlDeviceGetVgpuUtilization (line 5338) | def nvmlDeviceGetVgpuUtilization(handle, timeStamp): function nvmlDeviceGetVgpuInstancesUtilizationInfo (line 5373) | def nvmlDeviceGetVgpuInstancesUtilizationInfo(handle, timeStamp): function nvmlDeviceGetP2PStatus (line 5406) | def nvmlDeviceGetP2PStatus(device1, device2, p2pIndex): function nvmlDeviceGetGridLicensableFeatures_v4 (line 5414) | def nvmlDeviceGetGridLicensableFeatures_v4(handle): function nvmlDeviceGetGridLicensableFeatures (line 5423) | def nvmlDeviceGetGridLicensableFeatures(handle): function nvmlDeviceGetGspFirmwareVersion (line 5427) | def nvmlDeviceGetGspFirmwareVersion(handle, version=None): function nvmlDeviceGetGspFirmwareMode (line 5437) | def nvmlDeviceGetGspFirmwareMode(handle, isEnabled=c_uint(), defaultMode... function nvmlDeviceGetEncoderCapacity (line 5447) | def nvmlDeviceGetEncoderCapacity(handle, encoderQueryType): function nvmlDeviceGetVgpuProcessUtilization (line 5457) | def nvmlDeviceGetVgpuProcessUtilization(handle, timeStamp): function nvmlDeviceGetVgpuProcessesUtilizationInfo (line 5483) | def nvmlDeviceGetVgpuProcessesUtilizationInfo(handle, timeStamp): function nvmlDeviceGetEncoderStats (line 5515) | def nvmlDeviceGetEncoderStats(handle): function nvmlDeviceGetEncoderSessions (line 5525) | def nvmlDeviceGetEncoderSessions(handle): function nvmlDeviceGetFBCStats (line 5552) | def nvmlDeviceGetFBCStats(handle): function nvmlDeviceGetFBCSessions (line 5560) | def nvmlDeviceGetFBCSessions(handle): function nvmlVgpuInstanceGetEncoderStats (line 5587) | def nvmlVgpuInstanceGetEncoderStats(vgpuInstance): function nvmlVgpuInstanceGetEncoderSessions (line 5599) | def nvmlVgpuInstanceGetEncoderSessions(vgpuInstance): function nvmlVgpuInstanceGetFBCStats (line 5626) | def nvmlVgpuInstanceGetFBCStats(vgpuInstance): function nvmlVgpuInstanceGetFBCSessions (line 5634) | def nvmlVgpuInstanceGetFBCSessions(vgpuInstance): function nvmlDeviceGetProcessUtilization (line 5661) | def nvmlDeviceGetProcessUtilization(handle, timeStamp): function nvmlDeviceGetProcessesUtilizationInfo (line 5684) | def nvmlDeviceGetProcessesUtilizationInfo(handle, timeStamp): function nvmlVgpuInstanceGetMetadata (line 5713) | def nvmlVgpuInstanceGetMetadata(vgpuInstance): function nvmlDeviceGetVgpuMetadata (line 5728) | def nvmlDeviceGetVgpuMetadata(handle): function nvmlGetVgpuCompatibility (line 5743) | def nvmlGetVgpuCompatibility(vgpuMetadata, pgpuMetadata): function nvmlDeviceGetPgpuMetadataString (line 5752) | def nvmlDeviceGetPgpuMetadataString(handle): function nvmlDeviceGetVgpuSchedulerLog (line 5767) | def nvmlDeviceGetVgpuSchedulerLog(handle): function nvmlDeviceGetVgpuSchedulerState (line 5775) | def nvmlDeviceGetVgpuSchedulerState(handle): function nvmlDeviceGetVgpuSchedulerCapabilities (line 5783) | def nvmlDeviceGetVgpuSchedulerCapabilities(handle): function nvmlDeviceSetVgpuSchedulerState (line 5791) | def nvmlDeviceSetVgpuSchedulerState(handle, sched_state): function nvmlSetVgpuVersion (line 5798) | def nvmlSetVgpuVersion(vgpuVersion): function nvmlGetVgpuVersion (line 5805) | def nvmlGetVgpuVersion(supported=None, current=None): function nvmlVgpuInstanceGetAccountingMode (line 5823) | def nvmlVgpuInstanceGetAccountingMode(vgpuInstance): function nvmlVgpuInstanceGetAccountingPids (line 5831) | def nvmlVgpuInstanceGetAccountingPids(vgpuInstance): function nvmlVgpuInstanceGetAccountingStats (line 5845) | def nvmlVgpuInstanceGetAccountingStats(vgpuInstance, pid): function nvmlVgpuInstanceClearAccountingPids (line 5853) | def nvmlVgpuInstanceClearAccountingPids(vgpuInstance): function nvmlGetExcludedDeviceCount (line 5860) | def nvmlGetExcludedDeviceCount(): function nvmlGetExcludedDeviceInfoByIndex (line 5868) | def nvmlGetExcludedDeviceInfoByIndex(index): function nvmlDeviceGetHostVgpuMode (line 5877) | def nvmlDeviceGetHostVgpuMode(handle): function nvmlDeviceSetMigMode (line 5885) | def nvmlDeviceSetMigMode(device, mode): function nvmlDeviceGetMigMode (line 5893) | def nvmlDeviceGetMigMode(device): function nvmlDeviceGetGpuInstanceProfileInfo (line 5902) | def nvmlDeviceGetGpuInstanceProfileInfo(device, profile, version=2): function nvmlDeviceGetGpuInstanceRemainingCapacity (line 5920) | def nvmlDeviceGetGpuInstanceRemainingCapacity(device, profileId): function nvmlDeviceGetGpuInstancePossiblePlacements (line 5928) | def nvmlDeviceGetGpuInstancePossiblePlacements( function nvmlDeviceCreateGpuInstance (line 5937) | def nvmlDeviceCreateGpuInstance(device, profileId): function nvmlDeviceCreateGpuInstanceWithPlacement (line 5945) | def nvmlDeviceCreateGpuInstanceWithPlacement(device, profileId, placement): function nvmlGpuInstanceDestroy (line 5953) | def nvmlGpuInstanceDestroy(gpuInstance): function nvmlDeviceGetGpuInstances (line 5960) | def nvmlDeviceGetGpuInstances(device, profileId, gpuInstancesRef, countR... function nvmlDeviceGetGpuInstanceById (line 5967) | def nvmlDeviceGetGpuInstanceById(device, gpuInstanceId): function nvmlGpuInstanceGetInfo (line 5975) | def nvmlGpuInstanceGetInfo(gpuInstance): function nvmlGpuInstanceGetComputeInstanceProfileInfo (line 5983) | def nvmlGpuInstanceGetComputeInstanceProfileInfo( function nvmlGpuInstanceGetComputeInstanceRemainingCapacity (line 6005) | def nvmlGpuInstanceGetComputeInstanceRemainingCapacity(gpuInstance, prof... function nvmlGpuInstanceGetComputeInstancePossiblePlacements (line 6013) | def nvmlGpuInstanceGetComputeInstancePossiblePlacements( function nvmlGpuInstanceCreateComputeInstance (line 6022) | def nvmlGpuInstanceCreateComputeInstance(gpuInstance, profileId): function nvmlGpuInstanceCreateComputeInstanceWithPlacement (line 6030) | def nvmlGpuInstanceCreateComputeInstanceWithPlacement( function nvmlComputeInstanceDestroy (line 6040) | def nvmlComputeInstanceDestroy(computeInstance): function nvmlGpuInstanceGetComputeInstances (line 6047) | def nvmlGpuInstanceGetComputeInstances( function nvmlGpuInstanceGetComputeInstanceById (line 6056) | def nvmlGpuInstanceGetComputeInstanceById(gpuInstance, computeInstanceId): function nvmlComputeInstanceGetInfo_v2 (line 6064) | def nvmlComputeInstanceGetInfo_v2(computeInstance): function nvmlComputeInstanceGetInfo (line 6072) | def nvmlComputeInstanceGetInfo(computeInstance): function nvmlDeviceIsMigDeviceHandle (line 6076) | def nvmlDeviceIsMigDeviceHandle(device): function nvmlDeviceGetGpuInstanceId (line 6084) | def nvmlDeviceGetGpuInstanceId(device): function nvmlDeviceGetComputeInstanceId (line 6092) | def nvmlDeviceGetComputeInstanceId(device): function nvmlDeviceGetMaxMigDeviceCount (line 6100) | def nvmlDeviceGetMaxMigDeviceCount(device): function nvmlDeviceGetMigDeviceHandleByIndex (line 6108) | def nvmlDeviceGetMigDeviceHandleByIndex(device, index): function nvmlDeviceGetDeviceHandleFromMigDeviceHandle (line 6117) | def nvmlDeviceGetDeviceHandleFromMigDeviceHandle(migDevice): function nvmlDeviceGetAttributes_v2 (line 6125) | def nvmlDeviceGetAttributes_v2(device): function nvmlDeviceGetAttributes (line 6133) | def nvmlDeviceGetAttributes(device): function nvmlDeviceGetRemappedRows (line 6137) | def nvmlDeviceGetRemappedRows(device): function nvmlDeviceGetRowRemapperHistogram (line 6148) | def nvmlDeviceGetRowRemapperHistogram(device): function nvmlDeviceGetArchitecture (line 6156) | def nvmlDeviceGetArchitecture(device): function nvmlDeviceGetBusType (line 6164) | def nvmlDeviceGetBusType(device): function nvmlDeviceGetIrqNum (line 6172) | def nvmlDeviceGetIrqNum(device): function nvmlDeviceGetNumGpuCores (line 6180) | def nvmlDeviceGetNumGpuCores(device): function nvmlDeviceGetPowerSource (line 6188) | def nvmlDeviceGetPowerSource(device): function nvmlDeviceGetMemoryBusWidth (line 6196) | def nvmlDeviceGetMemoryBusWidth(device): function nvmlDeviceGetPcieLinkMaxSpeed (line 6204) | def nvmlDeviceGetPcieLinkMaxSpeed(device): function nvmlDeviceGetAdaptiveClockInfoStatus (line 6212) | def nvmlDeviceGetAdaptiveClockInfoStatus(device): function nvmlDeviceGetPcieSpeed (line 6220) | def nvmlDeviceGetPcieSpeed(device): function nvmlDeviceGetDynamicPstatesInfo (line 6228) | def nvmlDeviceGetDynamicPstatesInfo( function nvmlDeviceSetFanSpeed_v2 (line 6242) | def nvmlDeviceSetFanSpeed_v2(handle, index, speed): function nvmlDeviceGetThermalSettings (line 6249) | def nvmlDeviceGetThermalSettings( function nvmlDeviceGetMinMaxClockOfPState (line 6260) | def nvmlDeviceGetMinMaxClockOfPState( class c_nvmlClockOffset_t (line 6278) | class c_nvmlClockOffset_t(_PrintableStructure): function nvmlDeviceGetClockOffsets (line 6292) | def nvmlDeviceGetClockOffsets(device, info): function nvmlDeviceSetClockOffsets (line 6298) | def nvmlDeviceSetClockOffsets(device, info): function nvmlDeviceGetSupportedPerformanceStates (line 6304) | def nvmlDeviceGetSupportedPerformanceStates(device): function nvmlDeviceGetGpcClkVfOffset (line 6324) | def nvmlDeviceGetGpcClkVfOffset(device): function nvmlDeviceSetGpcClkVfOffset (line 6332) | def nvmlDeviceSetGpcClkVfOffset(device, offset): function nvmlDeviceGetGpcClkMinMaxVfOffset (line 6340) | def nvmlDeviceGetGpcClkMinMaxVfOffset(device, minOffset=c_int(), maxOffs... function nvmlDeviceGetMemClkVfOffset (line 6350) | def nvmlDeviceGetMemClkVfOffset(device): function nvmlDeviceSetMemClkVfOffset (line 6358) | def nvmlDeviceSetMemClkVfOffset(device, offset): function nvmlDeviceGetMemClkMinMaxVfOffset (line 6366) | def nvmlDeviceGetMemClkMinMaxVfOffset(device, minOffset=c_int(), maxOffs... function nvmlSystemSetConfComputeGpusReadyState (line 6377) | def nvmlSystemSetConfComputeGpusReadyState(state): function nvmlSystemGetConfComputeGpusReadyState (line 6385) | def nvmlSystemGetConfComputeGpusReadyState(): function nvmlSystemGetConfComputeCapabilities (line 6393) | def nvmlSystemGetConfComputeCapabilities(): function nvmlSystemGetConfComputeState (line 6401) | def nvmlSystemGetConfComputeState(): function nvmlSystemGetConfComputeSettings (line 6409) | def nvmlSystemGetConfComputeSettings(settings): function nvmlDeviceSetConfComputeUnprotectedMemSize (line 6414) | def nvmlDeviceSetConfComputeUnprotectedMemSize(device, c_ccMemSize): function nvmlDeviceGetConfComputeMemSizeInfo (line 6421) | def nvmlDeviceGetConfComputeMemSizeInfo(device): function nvmlDeviceGetConfComputeProtectedMemoryUsage (line 6429) | def nvmlDeviceGetConfComputeProtectedMemoryUsage(device): function nvmlDeviceGetConfComputeGpuCertificate (line 6437) | def nvmlDeviceGetConfComputeGpuCertificate(device): function nvmlDeviceGetConfComputeGpuAttestationReport (line 6445) | def nvmlDeviceGetConfComputeGpuAttestationReport(device, c_nonce): function nvmlSystemSetConfComputeKeyRotationThresholdInfo (line 6455) | def nvmlSystemSetConfComputeKeyRotationThresholdInfo(max_atk_adv): function nvmlSystemGetConfComputeKeyRotationThresholdInfo (line 6465) | def nvmlSystemGetConfComputeKeyRotationThresholdInfo(): class c_nvmlUnitInfo_t (line 6633) | class c_nvmlUnitInfo_t(_PrintableStructure): class struct_c_nvmlGpmSample_t (line 6642) | class struct_c_nvmlGpmSample_t(Structure): class c_metricInfo_t (line 6649) | class c_metricInfo_t(Structure): class c_nvmlGpmMetric_t (line 6657) | class c_nvmlGpmMetric_t(_PrintableStructure): class c_nvmlGpmMetricsGet_t (line 6666) | class c_nvmlGpmMetricsGet_t(_PrintableStructure): class c_nvmlGpmSupport_t (line 6679) | class c_nvmlGpmSupport_t(_PrintableStructure): function nvmlGpmMetricsGet (line 6691) | def nvmlGpmMetricsGet(metricsGet): function nvmlGpmSampleFree (line 6698) | def nvmlGpmSampleFree(gpmSample): function nvmlGpmSampleAlloc (line 6705) | def nvmlGpmSampleAlloc(): function nvmlGpmSampleGet (line 6713) | def nvmlGpmSampleGet(device, gpmSample): function nvmlGpmMigSampleGet (line 6720) | def nvmlGpmMigSampleGet(device, gpuInstanceId, gpmSample): function nvmlGpmQueryDeviceSupport (line 6727) | def nvmlGpmQueryDeviceSupport(device): function nvmlGpmSetStreamingEnabled (line 6736) | def nvmlGpmSetStreamingEnabled(device, state): function nvmlGpmQueryIfStreamingEnabled (line 6744) | def nvmlGpmQueryIfStreamingEnabled(device): class c_nvmlNvLinkPowerThres_t (line 6763) | class c_nvmlNvLinkPowerThres_t(Structure): function nvmlDeviceSetNvLinkDeviceLowPowerThreshold (line 6769) | def nvmlDeviceSetNvLinkDeviceLowPowerThreshold(device, l1threshold): class c_nvmlGpuFabricInfo_t (line 6787) | class c_nvmlGpuFabricInfo_t(_PrintableStructure): class c_nvmlGpuFabricInfoV_t (line 6823) | class c_nvmlGpuFabricInfoV_t(_PrintableStructure): method __init__ (line 6833) | def __init__(self): function nvmlDeviceGetGpuFabricInfo (line 6837) | def nvmlDeviceGetGpuFabricInfo(device, gpuFabricInfo): function nvmlDeviceGetGpuFabricInfoV (line 6844) | def nvmlDeviceGetGpuFabricInfoV(device, gpuFabricInfo): function nvmlSystemSetNvlinkBwMode (line 6862) | def nvmlSystemSetNvlinkBwMode(mode): function nvmlSystemGetNvlinkBwMode (line 6869) | def nvmlSystemGetNvlinkBwMode(): class c_nvmlPowerValue_v2_t (line 6883) | class c_nvmlPowerValue_v2_t(_PrintableStructure): function nvmlDeviceSetPowerManagementLimit_v2 (line 6895) | def nvmlDeviceSetPowerManagementLimit_v2( class c_nvmlEccSramErrorStatus_v1_t (line 6908) | class c_nvmlEccSramErrorStatus_v1_t(_PrintableStructure): method __init__ (line 6925) | def __init__(self): function nvmlDeviceGetSramEccErrorStatus (line 6934) | def nvmlDeviceGetSramEccErrorStatus(device, status): class c_nvmlDeviceCapabilities_v1_t (line 6945) | class c_nvmlDeviceCapabilities_v1_t(_PrintableStructure): method __init__ (line 6951) | def __init__(self): function nvmlDeviceGetCapabilities (line 6957) | def nvmlDeviceGetCapabilities(device, caps): class c_nvmlPlatformInfo_v1_t (line 6962) | class c_nvmlPlatformInfo_v1_t(_PrintableStructure): method __init__ (line 6974) | def __init__(self): function nvmlDeviceGetPlatformInfo (line 6981) | def nvmlDeviceGetPlatformInfo(device, platformInfo): class c_nvmlMask255_t (line 6988) | class c_nvmlMask255_t(_PrintableStructure): class c_nvmlWorkloadPowerProfileInfo_v1_t (line 7015) | class c_nvmlWorkloadPowerProfileInfo_v1_t(_PrintableStructure): method __init__ (line 7023) | def __init__(self): class c_nvmlWorkloadPowerProfileProfilesInfo_v1_t (line 7032) | class c_nvmlWorkloadPowerProfileProfilesInfo_v1_t(_PrintableStructure): method __init__ (line 7042) | def __init__(self): class c_nvmlWorkloadPowerProfileCurrentProfiles_v1_t (line 7051) | class c_nvmlWorkloadPowerProfileCurrentProfiles_v1_t(_PrintableStructure): method __init__ (line 7059) | def __init__(self): class c_nvmlWorkloadPowerProfileRequestedProfiles_v1_t (line 7068) | class c_nvmlWorkloadPowerProfileRequestedProfiles_v1_t(_PrintableStructu... method __init__ (line 7074) | def __init__(self): function nvmlDeviceWorkloadPowerProfileGetProfilesInfo (line 7080) | def nvmlDeviceWorkloadPowerProfileGetProfilesInfo(device, profilesInfo): function nvmlDeviceWorkloadPowerProfileGetCurrentProfiles (line 7087) | def nvmlDeviceWorkloadPowerProfileGetCurrentProfiles(device, currentProf... function nvmlDeviceWorkloadPowerProfileSetRequestedProfiles (line 7094) | def nvmlDeviceWorkloadPowerProfileSetRequestedProfiles(device, requested... function nvmlDeviceWorkloadPowerProfileClearRequestedProfiles (line 7101) | def nvmlDeviceWorkloadPowerProfileClearRequestedProfiles(device, request... function nvmlDeviceGetNvlinkSupportedBwModes (line 7108) | def nvmlDeviceGetNvlinkSupportedBwModes(device, supportedBwModes): function nvmlDeviceGetNvlinkBwMode (line 7115) | def nvmlDeviceGetNvlinkBwMode(device, getBwMode): function nvmlDeviceSetNvlinkBwMode (line 7122) | def nvmlDeviceSetNvlinkBwMode(device, setBwMode): class c_nvmlDramEncryptionInfo_t (line 7132) | class c_nvmlDramEncryptionInfo_t(_PrintableStructure): method __init__ (line 7138) | def __init__(self): function nvmlDeviceGetDramEncryptionMode (line 7144) | def nvmlDeviceGetDramEncryptionMode(handle): function nvmlDeviceGetCurrentDramEncryptionMode (line 7154) | def nvmlDeviceGetCurrentDramEncryptionMode(handle): function nvmlDeviceGetPendingDramEncryptionMode (line 7159) | def nvmlDeviceGetPendingDramEncryptionMode(handle): function nvmlDeviceSetDramEncryptionMode (line 7163) | def nvmlDeviceSetDramEncryptionMode(handle, mode): class c_nvmlPowerSmoothingState_v1_t (line 7183) | class c_nvmlPowerSmoothingState_v1_t(_PrintableStructure): method __init__ (line 7189) | def __init__(self): class c_nvmlPowerSmoothingProfile_v1_t (line 7198) | class c_nvmlPowerSmoothingProfile_v1_t(_PrintableStructure): method __init__ (line 7206) | def __init__(self): function nvmlDevicePowerSmoothingActivatePresetProfile (line 7212) | def nvmlDevicePowerSmoothingActivatePresetProfile(device, profile): function nvmlDevicePowerSmoothingUpdatePresetProfileParam (line 7218) | def nvmlDevicePowerSmoothingUpdatePresetProfileParam(device, profile): function nvmlDevicePowerSmoothingSetState (line 7224) | def nvmlDevicePowerSmoothingSetState(device, state): FILE: python/sglang/multimodal_gen/tools/convert_hf_to_fp8.py function ceildiv (line 44) | def ceildiv(a, b): function block_fp8 (line 48) | def block_fp8(weight, block_size): function channel_fp8 (line 82) | def channel_fp8(weight): function tensor_fp8 (line 90) | def tensor_fp8(weight): function quant_fp8 (line 98) | def quant_fp8(weight, strategy, block_size=None): class ConversionResult (line 107) | class ConversionResult: method __init__ (line 108) | def __init__(self): method add_result (line 114) | def add_result(self, filename, q_weights, module_names): function process_file (line 122) | def process_file( function convert_fp8 (line 180) | def convert_fp8(input_path, output_path, strategy, block_size=None, max_... FILE: python/sglang/multimodal_gen/tools/wan_repack.py function get_transformer_config (line 53) | def get_transformer_config(model_type: str) -> Tuple[Dict[str, Any], ...]: function update_dict_ (line 59) | def update_dict_(dict: Dict[str, Any], old_key: str, new_key: str) -> Di... function load_sharded_safetensors (line 63) | def load_sharded_safetensors(path: pathlib.Path): function convert_transformer (line 70) | def convert_transformer(model_type: str, model_dir: str, output_dir: str): function get_args (line 96) | def get_args(): FILE: python/sglang/multimodal_gen/utils.py function _expand_path_value (line 38) | def _expand_path_value(field_name: str, value: Any) -> Any: function expand_path_kwargs (line 49) | def expand_path_kwargs(kwargs: dict[str, Any]) -> dict[str, Any]: function expand_path_fields (line 53) | def expand_path_fields(obj) -> None: function find_nccl_library (line 71) | def find_nccl_library() -> str: function _patched_set_stream (line 104) | def _patched_set_stream(stream: torch.cuda.Stream | None) -> None: function current_stream (line 114) | def current_stream() -> torch.cuda.Stream | None: class StoreBoolean (line 146) | class StoreBoolean(argparse.Action): method __init__ (line 148) | def __init__(self, option_strings, dest, default=False, required=False... method __call__ (line 159) | def __call__(self, parser, namespace, values, option_string=None): class FlexibleArgumentParser (line 175) | class FlexibleArgumentParser(argparse.ArgumentParser): method __init__ (line 178) | def __init__(self, *args, **kwargs) -> None: method parse_args (line 184) | def parse_args( # type: ignore[override] method _pull_args_from_config (line 238) | def _pull_args_from_config(self, args: list[str]) -> list[str]: method _load_config_file (line 326) | def _load_config_file(self, file_path: str) -> list[str]: function warn_for_unimplemented_methods (line 396) | def warn_for_unimplemented_methods(cls: type[T]) -> type[T]: function align_to (line 439) | def align_to(value: int, alignment: int) -> int: function resolve_obj_by_qualname (line 452) | def resolve_obj_by_qualname(qualname: str) -> Any: function import_pynvml (line 462) | def import_pynvml(): function update_environment_variables (line 492) | def update_environment_variables(envs: dict[str, str]): function run_method (line 504) | def run_method( function shallow_asdict (line 528) | def shallow_asdict(obj) -> dict[str, Any]: function kill_itself_when_parent_died (line 535) | def kill_itself_when_parent_died() -> None: function get_exception_traceback (line 551) | def get_exception_traceback() -> str: class TypeBasedDispatcher (line 557) | class TypeBasedDispatcher: method __init__ (line 559) | def __init__(self, mapping: list[tuple[type, Callable]]): method __call__ (line 562) | def __call__(self, obj: Any): class MixedPrecisionState (line 570) | class MixedPrecisionState: function get_mixed_precision_state (line 582) | def get_mixed_precision_state() -> MixedPrecisionState: function set_mixed_precision_policy (line 589) | def set_mixed_precision_policy( function get_compute_dtype (line 611) | def get_compute_dtype() -> torch.dtype: function dict_to_3d_list (line 620) | def dict_to_3d_list( function set_random_seed (line 681) | def set_random_seed(seed: int) -> None: function is_vsa_available (line 688) | def is_vsa_available() -> bool: function is_vmoba_available (line 693) | def is_vmoba_available() -> bool: function masks_like (line 705) | def masks_like( function best_output_size (line 771) | def best_output_size(w, h, dw, dh, expected_area): function calculate_dimensions (line 796) | def calculate_dimensions(target_area, ratio): FILE: python/sglang/profiler.py function run_profile (line 21) | def run_profile( FILE: python/sglang/srt/batch_invariant_ops/batch_invariant_ops.py function _matmul_launch_metadata (line 40) | def _matmul_launch_metadata( function _compute_pid (line 60) | def _compute_pid(tile_id, num_pid_in_group, num_pid_m, GROUP_SIZE_M, NUM... function matmul_kernel_persistent (line 70) | def matmul_kernel_persistent( function _matmul_persistent_triton (line 163) | def _matmul_persistent_triton( function _matmul_persistent_deepgemm (line 240) | def _matmul_persistent_deepgemm( function matmul_persistent (line 265) | def matmul_persistent( function _log_softmax_kernel (line 309) | def _log_softmax_kernel( function log_softmax (line 381) | def log_softmax(input: torch.Tensor, dim: int = -1) -> torch.Tensor: function mean_kernel (line 425) | def mean_kernel( function mean_dim (line 474) | def mean_dim( function mm_batch_invariant (line 567) | def mm_batch_invariant(a, b): function addmm_batch_invariant (line 571) | def addmm_batch_invariant(bias, a, b): function _log_softmax_batch_invariant (line 575) | def _log_softmax_batch_invariant(input, dim, _half_to_float): function mean_batch_invariant (line 580) | def mean_batch_invariant(input, dim, keepdim=False, dtype: torch.dtype |... function bmm_kernel_persistent (line 597) | def bmm_kernel_persistent( function bmm_batch_invariant (line 715) | def bmm_batch_invariant(a, b, *, out=None): function _rms_norm_kernel (line 813) | def _rms_norm_kernel( function rms_norm (line 863) | def rms_norm( function rms_norm_batch_invariant (line 910) | def rms_norm_batch_invariant( function is_batch_invariant_mode_enabled (line 937) | def is_batch_invariant_mode_enabled(): function enable_batch_invariant_mode (line 941) | def enable_batch_invariant_mode( function disable_batch_invariant_mode (line 965) | def disable_batch_invariant_mode(): function set_batch_invariant_mode (line 977) | def set_batch_invariant_mode(enabled: bool = True): function get_batch_invariant_attention_block_size (line 993) | def get_batch_invariant_attention_block_size() -> AttentionBlockSize: FILE: python/sglang/srt/batch_overlap/operations.py function execute_operations (line 21) | def execute_operations(inputs, operations): function execute_overlapped_operations (line 30) | def execute_overlapped_operations( class YieldOperation (line 61) | class YieldOperation: class ExecutionOperation (line 66) | class ExecutionOperation: class _StageExecutor (line 75) | class _StageExecutor: method __init__ (line 76) | def __init__(self, debug_name: str, stages: List[Stage], inputs: dict): method next (line 90) | def next(self): method output (line 118) | def output(self): method done (line 123) | def done(self): method num_stages (line 127) | def num_stages(self): function _annotate_region (line 132) | def _annotate_region(debug_name): class _StateDict (line 141) | class _StateDict: method __init__ (line 142) | def __init__(self): method __setattr__ (line 145) | def __setattr__(self, key, value): method __getattr__ (line 154) | def __getattr__(self, item): method __delattr__ (line 157) | def __delattr__(self, item): method pop (line 160) | def pop(self, item): method update (line 163) | def update(self, values: Dict[str, Any]): method get (line 167) | def get(self, item): method clear (line 170) | def clear(self, expect_keys: Sequence[str]): function _convert_operations_to_stages (line 179) | def _convert_operations_to_stages(operations: List[Operation]) -> List[S... function _chunk_by_separator (line 188) | def _chunk_by_separator( function _decorate_operations (line 202) | def _decorate_operations(operations: List[Operation], debug_name_prefix:... function _decorate_operation (line 206) | def _decorate_operation(operation: Operation, debug_name_prefix: str): FILE: python/sglang/srt/batch_overlap/operations_strategy.py class OperationsStrategy (line 16) | class OperationsStrategy: method concat (line 22) | def concat(cls, items: List["OperationsStrategy"]) -> "OperationsStrat... method init_new_tbo (line 34) | def init_new_tbo( function _assert_all_same (line 70) | def _assert_all_same(items: List): function _compute_moe_deepseek_layer_operations_strategy_tbo (line 79) | def _compute_moe_deepseek_layer_operations_strategy_tbo( function _compute_moe_deepseek_blog_prefill (line 94) | def _compute_moe_deepseek_blog_prefill(layer): function _compute_moe_deepseek_blog_decode (line 125) | def _compute_moe_deepseek_blog_decode(layer): function _compute_moe_qwen3_layer_operations_strategy_tbo (line 158) | def _compute_moe_qwen3_layer_operations_strategy_tbo( function _compute_moe_qwen3_prefill (line 173) | def _compute_moe_qwen3_prefill(layer): function _compute_moe_qwen3_decode (line 203) | def _compute_moe_qwen3_decode(layer): function _compute_moe_mimov2_layer_operations_strategy_tbo (line 235) | def _compute_moe_mimov2_layer_operations_strategy_tbo( function _compute_moe_mimov2_prefill (line 250) | def _compute_moe_mimov2_prefill(layer): function _compute_moe_mimov2_decode (line 278) | def _compute_moe_mimov2_decode(layer): FILE: python/sglang/srt/batch_overlap/single_batch_overlap.py class SboFlags (line 28) | class SboFlags: method enable_combine_down_gemm_two_stream_overlap (line 32) | def enable_combine_down_gemm_two_stream_overlap(cls): method enable_combine_shared_two_stream_overlap (line 43) | def enable_combine_shared_two_stream_overlap(cls): method enable_dispatch_shared_one_stream_overlap (line 51) | def enable_dispatch_shared_one_stream_overlap(cls): method fuse_shared_experts_inside_sbo (line 55) | def fuse_shared_experts_inside_sbo(cls): class CombineOverlapArgs (line 63) | class CombineOverlapArgs: class DownGemmOverlapArgs (line 75) | class DownGemmOverlapArgs: function compute_overlap_args (line 81) | def compute_overlap_args(dispatch_output, alt_stream): FILE: python/sglang/srt/batch_overlap/two_batch_overlap.py function get_token_num_per_seq (line 62) | def get_token_num_per_seq( function compute_split_seq_index (line 78) | def compute_split_seq_index( function _is_two_chunk_split_enabled (line 97) | def _is_two_chunk_split_enabled(extend_lens: Sequence[int]) -> bool: function _split_extend_seqs (line 111) | def _split_extend_seqs(arr: Sequence[int]) -> int: function _split_array_by_cum_less_than_half (line 118) | def _split_array_by_cum_less_than_half(arr: Sequence[int]) -> int: function _split_array_by_balanced_sum (line 133) | def _split_array_by_balanced_sum(arr: Sequence[int]) -> int: function _update_device_and_sum_field_from_cpu_field (line 152) | def _update_device_and_sum_field_from_cpu_field( function _compute_mask_offset (line 180) | def _compute_mask_offset(seq_index: int, spec_info: Optional[EagleVerify... function split_spec_info (line 193) | def split_spec_info( function compute_split_token_index (line 265) | def compute_split_token_index( function compute_split_indices_for_cuda_graph_replay (line 286) | def compute_split_indices_for_cuda_graph_replay( class TboCudaGraphRunnerPlugin (line 315) | class TboCudaGraphRunnerPlugin: method __init__ (line 316) | def __init__(self): method capture_one_batch_size (line 319) | def capture_one_batch_size(self, batch: ForwardBatch, num_tokens: int): method replay_prepare (line 344) | def replay_prepare( class TboDPAttentionPreparer (line 370) | class TboDPAttentionPreparer: method prepare_all_gather (line 371) | def prepare_all_gather( method compute_output (line 419) | def compute_output(self, partial_global_info): method _compute_local_forward_mode (line 440) | def _compute_local_forward_mode(local_batch): method _compute_global_forward_mode (line 446) | def _compute_global_forward_mode(forward_modes): method _is_all_same (line 468) | def _is_all_same(x): class TboForwardBatchPreparer (line 472) | class TboForwardBatchPreparer: method prepare (line 474) | def prepare(cls, batch: ForwardBatch, is_draft_worker: bool = False): method prepare_raw (line 486) | def prepare_raw( method derive_fields_related_to_seq_len_for_two_chunk (line 551) | def derive_fields_related_to_seq_len_for_two_chunk( method filter_batch (line 613) | def filter_batch( method compute_tbo_children_num_token_non_padded (line 774) | def compute_tbo_children_num_token_non_padded(cls, batch: ForwardBatch): method compute_tbo_children_num_token_non_padded_raw (line 781) | def compute_tbo_children_num_token_non_padded_raw( method _compute_split_token_index (line 792) | def _compute_split_token_index(cls, batch: ForwardBatch): function _compute_extend_num_tokens (line 804) | def _compute_extend_num_tokens(input_ids, forward_mode: ForwardMode): function model_forward_maybe_tbo (line 819) | def model_forward_maybe_tbo( function _model_forward_tbo (line 851) | def _model_forward_tbo( function _model_forward_non_tbo (line 883) | def _model_forward_non_tbo(inputs, operations_strategy: OperationsStrate... function _model_forward_tbo_split_inputs (line 888) | def _model_forward_tbo_split_inputs( function _model_forward_tbo_split_inputs_raw (line 938) | def _model_forward_tbo_split_inputs_raw( function _model_forward_filter_inputs (line 966) | def _model_forward_filter_inputs( function _model_forward_tbo_merge_outputs (line 1000) | def _model_forward_tbo_merge_outputs(output_a, output_b, original_len): class MaybeTboDeepEPDispatcher (line 1024) | class MaybeTboDeepEPDispatcher(BaseDispatcher): method __init__ (line 1025) | def __init__(self, **kwargs): method _execute (line 1046) | def _execute(self, name, tbo_subbatch_index: Optional[int] = None, **k... method dispatch (line 1049) | def dispatch(self, **kwargs) -> DispatchOutput: method dispatch_a (line 1052) | def dispatch_a(self, **kwargs): method dispatch_b (line 1055) | def dispatch_b(self, **kwargs): method combine (line 1058) | def combine(self, **kwargs) -> torch.Tensor: method combine_a (line 1061) | def combine_a(self, **kwargs): method combine_b (line 1064) | def combine_b(self, **kwargs): method register_deepep_dispatch_hook (line 1067) | def register_deepep_dispatch_hook(self, hook): method set_quant_config (line 1073) | def set_quant_config(self, quant_config: dict): method set_overlap_args (line 1078) | def set_overlap_args( method clear_overlap_args (line 1085) | def clear_overlap_args(self): FILE: python/sglang/srt/checkpoint_engine/checkpoint_engine_worker.py class SGLangCheckpointEngineWorkerExtension (line 36) | class SGLangCheckpointEngineWorkerExtension: method __init__ (line 42) | def __init__(self): method get_device_uuid (line 45) | def get_device_uuid(self) -> str: method get_device_id (line 53) | def get_device_id(self) -> int: method get_model_loader (line 59) | def get_model_loader(self) -> Callable: method get_post_hook (line 65) | def get_post_hook(self) -> Optional[Callable]: method update_weights_from_ipc (line 69) | def update_weights_from_ipc(self, zmq_handles: Dict[str, str]): class SGLangCheckpointEngineWorkerExtensionImpl (line 92) | class SGLangCheckpointEngineWorkerExtensionImpl(SGLangCheckpointEngineWo... method __init__ (line 98) | def __init__(self, model_runner): method get_device_uuid (line 102) | def get_device_uuid(self) -> str: method get_device_id (line 111) | def get_device_id(self) -> int: method get_model_loader (line 115) | def get_model_loader(self) -> Callable: method get_post_hook (line 119) | def get_post_hook(self) -> Optional[Callable]: FILE: python/sglang/srt/checkpoint_engine/update.py function timer (line 42) | def timer(msg: str): function check_sglang_ready (line 49) | def check_sglang_ready( function split_checkpoint_files (line 74) | def split_checkpoint_files( function split_tensors (line 87) | def split_tensors( function req_inference (line 108) | def req_inference( function update_weights (line 137) | def update_weights( function join (line 175) | def join( function run_with_torchrun (line 199) | def run_with_torchrun(): function main (line 240) | def main(): FILE: python/sglang/srt/compilation/backend.py function make_compiler (line 29) | def make_compiler(config: CompilationConfig): function make_backend (line 38) | def make_backend( class CompilerManager (line 64) | class CompilerManager: method __init__ (line 65) | def __init__( method compute_hash (line 73) | def compute_hash(self): method initialize_cache (line 76) | def initialize_cache( method save_to_file (line 91) | def save_to_file(self): method load (line 99) | def load( method compile (line 128) | def compile( class SplitItem (line 206) | class SplitItem: function split_graph (line 213) | def split_graph( class PiecewiseCompileInterpreter (line 265) | class PiecewiseCompileInterpreter(torch.fx.Interpreter): method __init__ (line 266) | def __init__( method run (line 287) | def run(self, *args): method call_module (line 295) | def call_module( function set_model_tag (line 343) | def set_model_tag(tag: str): class SGLangBackend (line 357) | class SGLangBackend: method __init__ (line 373) | def __init__( method configure_post_pass (line 391) | def configure_post_pass(self): method __call__ (line 395) | def __call__(self, graph: fx.GraphModule, example_inputs) -> Callable: FILE: python/sglang/srt/compilation/compilation_config.py function register_split_op (line 8) | def register_split_op(op_name: Optional[str] = None): class CompilationConfig (line 18) | class CompilationConfig: method __init__ (line 19) | def __init__( method add_split_op (line 32) | def add_split_op(self, op: str): method add_traced_file (line 35) | def add_traced_file(self, file_path: str): method get_traced_files (line 38) | def get_traced_files(self): method get_capture_sizes (line 41) | def get_capture_sizes(self): method get_enable_debug_mode (line 44) | def get_enable_debug_mode(self): FILE: python/sglang/srt/compilation/compilation_counter.py class CompilationCounter (line 9) | class CompilationCounter: method clone (line 32) | def clone(self) -> "CompilationCounter": method expect (line 36) | def expect(self, **kwargs): FILE: python/sglang/srt/compilation/compile.py class IntermediateTensors (line 18) | class IntermediateTensors: method __init__ (line 32) | def __init__(self, tensors): method __getitem__ (line 39) | def __getitem__(self, key: Union[str, slice]): method __setitem__ (line 45) | def __setitem__(self, key: str, value: torch.Tensor): method items (line 48) | def items(self): method __len__ (line 51) | def __len__(self): method __eq__ (line 54) | def __eq__(self, other: object): method __repr__ (line 57) | def __repr__(self) -> str: function _normalize_dims (line 61) | def _normalize_dims(dims, ndim: int): class _MaybeIntermediateTensors (line 66) | class _MaybeIntermediateTensors: method __init__ (line 69) | def __init__(self, obj): function _mark_dynamic_on_value (line 76) | def _mark_dynamic_on_value(val, dims): function _infer_dynamic_arg_dims_from_annotations (line 87) | def _infer_dynamic_arg_dims_from_annotations(forward_fn): function install_torch_compiled (line 111) | def install_torch_compiled( FILE: python/sglang/srt/compilation/compiler_interface.py class CompilerInterface (line 20) | class CompilerInterface: method initialize_cache (line 29) | def initialize_cache( method compute_hash (line 47) | def compute_hash(self) -> str: method compile (line 59) | def compile( method load (line 93) | def load( function get_inductor_factors (line 110) | def get_inductor_factors() -> list[Any]: class AlwaysHitShapeEnv (line 126) | class AlwaysHitShapeEnv: method __init__ (line 151) | def __init__(self) -> None: method evaluate_guards_expression (line 154) | def evaluate_guards_expression(self, *args, **kwargs): method get_pruned_guards (line 157) | def get_pruned_guards(self, *args, **kwargs): method produce_guards_expression (line 160) | def produce_guards_expression(self, *args, **kwargs): class InductorAdaptor (line 164) | class InductorAdaptor(CompilerInterface): method compute_hash (line 171) | def compute_hash(self) -> str: method initialize_cache (line 178) | def initialize_cache( method compile (line 197) | def compile( method load (line 374) | def load( method metrics_context (line 452) | def metrics_context(self) -> contextlib.AbstractContextManager: function set_inductor_config (line 473) | def set_inductor_config(config, runtime_shape): class EagerAdapter (line 481) | class EagerAdapter(CompilerInterface): method compile (line 484) | def compile( method load (line 495) | def load( FILE: python/sglang/srt/compilation/cuda_piecewise_backend.py class ConcreteSizeEntry (line 24) | class ConcreteSizeEntry: class CUDAPiecewiseBackend (line 40) | class CUDAPiecewiseBackend: method __init__ (line 42) | def __init__( method check_for_ending_compilation (line 101) | def check_for_ending_compilation(self): method __call__ (line 107) | def __call__(self, *args) -> Any: FILE: python/sglang/srt/compilation/fix_functionalization.py class FixFunctionalizationPass (line 17) | class FixFunctionalizationPass(SGLangInductorPass): method __call__ (line 26) | def __call__(self, graph: torch.fx.Graph): method _remove (line 50) | def _remove(self, node_or_nodes: Union[torch.fx.Node, Iterable[torch.f... method defunctionalize (line 59) | def defunctionalize( method replace_users_with_mutated_args (line 75) | def replace_users_with_mutated_args( method getitem_users (line 91) | def getitem_users(self, node: torch.fx.Node) -> dict[int, torch.fx.Node]: method insert_defunctionalized (line 103) | def insert_defunctionalized( FILE: python/sglang/srt/compilation/fx_utils.py function is_func (line 12) | def is_func(node: fx.Node, target) -> bool: function is_auto_func (line 16) | def is_auto_func(node: fx.Node, op: OpOverload) -> bool: function find_specified_fn_maybe (line 21) | def find_specified_fn_maybe( function find_specified_fn (line 31) | def find_specified_fn(nodes: Iterable[fx.Node], op: OpOverload) -> fx.Node: function find_auto_fn_maybe (line 38) | def find_auto_fn_maybe(nodes: Iterable[fx.Node], op: OpOverload) -> Opti... function find_auto_fn (line 46) | def find_auto_fn(nodes: Iterable[fx.Node], op: OpOverload) -> fx.Node: function find_getitem_maybe (line 54) | def find_getitem_maybe(node: fx.Node, idx: int) -> Optional[fx.Node]: function find_getitem (line 62) | def find_getitem(node: fx.Node, idx: int) -> fx.Node: function find_op_nodes (line 69) | def find_op_nodes(op: OpOverload, graph: fx.Graph) -> Iterator[fx.Node]: function get_only_user (line 81) | def get_only_user(node: fx.Node) -> fx.Node: FILE: python/sglang/srt/compilation/inductor_pass.py class PassContext (line 22) | class PassContext: method __init__ (line 24) | def __init__(self, runtime_shape: Optional[int]): function get_pass_context (line 28) | def get_pass_context() -> PassContext: function pass_context (line 35) | def pass_context(runtime_shape: Optional[int]): class InductorPass (line 48) | class InductorPass(CustomGraphPass): method uuid (line 54) | def uuid(self) -> Any: method hash_source (line 64) | def hash_source(*srcs: Union[str, Any]): method hash_dict (line 83) | def hash_dict(dict_: dict[Any, Any]): method is_applicable_for_shape (line 91) | def is_applicable_for_shape(self, shape: Optional[int]): class CallableInductorPass (line 95) | class CallableInductorPass(InductorPass): method __init__ (line 101) | def __init__( method __call__ (line 107) | def __call__(self, graph: torch.fx.Graph): method uuid (line 110) | def uuid(self) -> Any: class SGLangInductorPass (line 114) | class SGLangInductorPass(InductorPass): method __init__ (line 116) | def __init__( method dump_graph (line 121) | def dump_graph(self, graph: torch.fx.Graph, stage: str): method begin (line 124) | def begin(self): method end_and_log (line 127) | def end_and_log(self): class PrinterInductorPass (line 133) | class PrinterInductorPass(SGLangInductorPass): method __init__ (line 135) | def __init__(self, name: str): method __call__ (line 139) | def __call__(self, graph: torch.fx.Graph): FILE: python/sglang/srt/compilation/npu_piecewise_backend.py class NPUPiecewiseBackend (line 16) | class NPUPiecewiseBackend(CUDAPiecewiseBackend): method __init__ (line 17) | def __init__( method __call__ (line 41) | def __call__(self, *args): FILE: python/sglang/srt/compilation/pass_manager.py class PostGradPassManager (line 18) | class PostGradPassManager(CustomGraphPass): method __init__ (line 33) | def __init__(self): method __call__ (line 36) | def __call__(self, graph: fx.Graph): method configure (line 45) | def configure( method add (line 51) | def add(self, pass_: InductorPass): method uuid (line 55) | def uuid(self): FILE: python/sglang/srt/compilation/piecewise_context_manager.py function is_in_piecewise_cuda_graph (line 21) | def is_in_piecewise_cuda_graph(): function is_in_pcg_torch_compile (line 25) | def is_in_pcg_torch_compile(): function get_pcg_capture_stream (line 29) | def get_pcg_capture_stream(): function enable_piecewise_cuda_graph_compile (line 34) | def enable_piecewise_cuda_graph_compile(): function enable_piecewise_cuda_graph (line 42) | def enable_piecewise_cuda_graph(): function set_pcg_capture_stream (line 59) | def set_pcg_capture_stream(stream: torch.cuda.Stream): class ForwardContext (line 67) | class ForwardContext: method __init__ (line 68) | def __init__(self): method set_forward_batch (line 75) | def set_forward_batch(self, forward_batch: ForwardBatch): method set_attention_layers (line 78) | def set_attention_layers(self, layers: List[Any]): method set_quant_config (line 81) | def set_quant_config(self, quant_config: Any): method set_moe_layers (line 84) | def set_moe_layers(self, layers: List[Any]): method set_moe_fusions (line 87) | def set_moe_fusions(self, fusions: List[Any]): function get_forward_context (line 94) | def get_forward_context() -> Optional[ForwardContext]: function set_forward_context (line 101) | def set_forward_context( FILE: python/sglang/srt/compilation/weak_ref_tensor.py function weak_ref_tensors (line 15) | def weak_ref_tensors( FILE: python/sglang/srt/configs/afmoe.py class AfmoeConfig (line 6) | class AfmoeConfig(PretrainedConfig): method __init__ (line 9) | def __init__( FILE: python/sglang/srt/configs/bailing_hybrid.py class HybridLayerType (line 27) | class HybridLayerType(enum.Enum): class BailingHybridConfig (line 32) | class BailingHybridConfig(PretrainedConfig): method __init__ (line 37) | def __init__( method layers_block_type (line 144) | def layers_block_type(self): method linear_layer_ids (line 159) | def linear_layer_ids(self): method full_attention_layer_ids (line 167) | def full_attention_layer_ids(self): method mamba2_cache_params (line 175) | def mamba2_cache_params(self) -> Mamba2CacheParams: FILE: python/sglang/srt/configs/chatglm.py class ChatGLMConfig (line 12) | class ChatGLMConfig(PretrainedConfig): method __init__ (line 19) | def __init__( FILE: python/sglang/srt/configs/dbrx.py class DbrxAttentionConfig (line 16) | class DbrxAttentionConfig(PretrainedConfig): method __init__ (line 34) | def __init__( method from_pretrained (line 55) | def from_pretrained( class DbrxFFNConfig (line 83) | class DbrxFFNConfig(PretrainedConfig): method __init__ (line 106) | def __init__( method from_pretrained (line 137) | def from_pretrained( class DbrxConfig (line 165) | class DbrxConfig(PretrainedConfig): method __init__ (line 229) | def __init__( FILE: python/sglang/srt/configs/deepseek_ocr.py function get_default_ngram_custom_params (line 40) | def get_default_ngram_custom_params() -> Dict[str, Any]: class DictOutput (line 53) | class DictOutput(object): method items (line 54) | def items(self): method keys (line 57) | def keys(self): method __getitem__ (line 60) | def __getitem__(self, item): method __contains__ (line 63) | def __contains__(self, key): method __setitem__ (line 66) | def __setitem__(self, key, value): class VLChatProcessorOutput (line 71) | class VLChatProcessorOutput(DictOutput): method __len__ (line 81) | def __len__(self): class ImageTransform (line 85) | class ImageTransform(object): method __init__ (line 86) | def __init__( method __call__ (line 113) | def __call__(self, pil_img: Image.Image): function find_closest_aspect_ratio (line 118) | def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height... function dynamic_preprocess (line 134) | def dynamic_preprocess( class DeepseekOCRProcessor (line 180) | class DeepseekOCRProcessor(ProcessorMixin): method __init__ (line 184) | def __init__( method format_messages_v2 (line 254) | def format_messages_v2(self, messages: str, pil_images, max_req_input_... method bos_id (line 295) | def bos_id(self): method eos_id (line 299) | def eos_id(self): method pad_id (line 303) | def pad_id(self): method encode (line 306) | def encode(self, text: str, bos: bool = True, eos: bool = False): method decode (line 316) | def decode(self, t: List[int], **kwargs) -> str: method process_one (line 319) | def process_one( method __call__ (line 393) | def __call__( method find_all_indices (line 421) | def find_all_indices(self, messages, target_value): method tokenize_with_images (line 428) | def tokenize_with_images( class VisionEncoderConfig (line 599) | class VisionEncoderConfig(PretrainedConfig): method __init__ (line 618) | def __init__( class MlpProjectorConfig (line 650) | class MlpProjectorConfig(PretrainedConfig): method __init__ (line 660) | def __init__( class DeepseekV2Config (line 680) | class DeepseekV2Config(PretrainedConfig): method __init__ (line 684) | def __init__( class DeepseekVLV2Config (line 781) | class DeepseekVLV2Config(PretrainedConfig): method __init__ (line 792) | def __init__( FILE: python/sglang/srt/configs/deepseekvl2.py function select_best_resolution (line 15) | def select_best_resolution(image_size, candidate_resolutions): class DictOutput (line 43) | class DictOutput(object): method items (line 44) | def items(self): method keys (line 47) | def keys(self): method __getitem__ (line 50) | def __getitem__(self, item): method __contains__ (line 53) | def __contains__(self, key): method __setitem__ (line 56) | def __setitem__(self, key, value): class VLChatProcessorOutput (line 61) | class VLChatProcessorOutput(DictOutput): method __len__ (line 70) | def __len__(self): class ImageTransform (line 74) | class ImageTransform(object): method __init__ (line 75) | def __init__( method __call__ (line 102) | def __call__(self, pil_img: Image.Image): class DeepseekVLV2Processor (line 107) | class DeepseekVLV2Processor(ProcessorMixin): method __init__ (line 111) | def __init__( method format_messages_v2 (line 179) | def format_messages_v2(self, messages, pil_images, max_req_input_len=-1): method bos_id (line 221) | def bos_id(self): method eos_id (line 225) | def eos_id(self): method pad_id (line 229) | def pad_id(self): method encode (line 232) | def encode(self, text: str, bos: bool = True, eos: bool = False): method decode (line 242) | def decode(self, t: List[int], **kwargs) -> str: method process_one (line 245) | def process_one( method __call__ (line 333) | def __call__( method find_all_indices (line 357) | def find_all_indices(self, messages, target_value): method tokenize_with_images (line 364) | def tokenize_with_images( class DeepseekVL2VisionEncoderConfig (line 468) | class DeepseekVL2VisionEncoderConfig(PretrainedConfig): method __init__ (line 487) | def __init__( class DeepseekVL2MlpProjectorConfig (line 519) | class DeepseekVL2MlpProjectorConfig(PretrainedConfig): method __init__ (line 529) | def __init__( class DeepseekV2Config (line 549) | class DeepseekV2Config(PretrainedConfig): method __init__ (line 554) | def __init__( class DeepseekVL2Config (line 650) | class DeepseekVL2Config(PretrainedConfig): method __init__ (line 660) | def __init__( FILE: python/sglang/srt/configs/device_config.py class DeviceConfig (line 11) | class DeviceConfig: method __init__ (line 15) | def __init__(self, device: str = "cuda", gpu_id: int = -1) -> None: FILE: python/sglang/srt/configs/dots_ocr.py class DotsOCRConfig (line 10) | class DotsOCRConfig(Qwen2Config): method __init__ (line 13) | def __init__( method save_pretrained (line 26) | def save_pretrained(self, save_directory, **kwargs): class DummyVideoProcessor (line 31) | class DummyVideoProcessor(BaseImageProcessor): method __call__ (line 34) | def __call__(self, *args, **kwargs): class DotsVLProcessor (line 38) | class DotsVLProcessor(Qwen2_5_VLProcessor): method __init__ (line 39) | def __init__( FILE: python/sglang/srt/configs/dots_vlm.py class DotsVisionConfig (line 14) | class DotsVisionConfig(PretrainedConfig): method __init__ (line 17) | def __init__( class DotsVLMConfig (line 58) | class DotsVLMConfig(PretrainedConfig): method __init__ (line 61) | def __init__(self, **kwargs): class DotsVLMProcessorKwargs (line 71) | class DotsVLMProcessorKwargs(ProcessingKwargs, total=False): class DotsVLMProcessor (line 79) | class DotsVLMProcessor(Qwen2_5_VLProcessor): method __init__ (line 100) | def __init__( FILE: python/sglang/srt/configs/exaone.py class ExaoneConfig (line 30) | class ExaoneConfig(PretrainedConfig): method __init__ (line 144) | def __init__( FILE: python/sglang/srt/configs/falcon_h1.py class FalconH1Config (line 29) | class FalconH1Config(PretrainedConfig): method __init__ (line 139) | def __init__( method layers_block_type (line 287) | def layers_block_type(self): method full_attention_layer_ids (line 291) | def full_attention_layer_ids(self): method linear_layer_ids (line 296) | def linear_layer_ids(self): method mamba2_cache_params (line 301) | def mamba2_cache_params(self): FILE: python/sglang/srt/configs/granitemoehybrid.py class GraniteMoeHybridConfig (line 28) | class GraniteMoeHybridConfig(PretrainedConfig): method __init__ (line 129) | def __init__( method mamba_layer_ids (line 269) | def mamba_layer_ids(self): method attention_layer_ids (line 276) | def attention_layer_ids(self): method full_attention_layer_ids (line 283) | def full_attention_layer_ids(self): method mamba2_cache_params (line 288) | def mamba2_cache_params(self): FILE: python/sglang/srt/configs/internvl.py class InternLM2Config (line 29) | class InternLM2Config(PretrainedConfig): method __init__ (line 80) | def __init__( # pylint: disable=W0102 method _rope_scaling_validation (line 134) | def _rope_scaling_validation(self): class InternVisionConfig (line 164) | class InternVisionConfig(PretrainedConfig): method __init__ (line 212) | def __init__( method from_pretrained (line 254) | def from_pretrained( class InternVLChatConfig (line 277) | class InternVLChatConfig(PretrainedConfig): method __init__ (line 281) | def __init__( method to_dict (line 351) | def to_dict(self): class InternLM2Tokenizer (line 486) | class InternLM2Tokenizer(PreTrainedTokenizer): method __init__ (line 500) | def __init__( method no_prefix_space_tokens (line 533) | def no_prefix_space_tokens(self): method vocab_size (line 542) | def vocab_size(self): method bos_token_id (line 547) | def bos_token_id(self) -> Optional[int]: method eos_token_id (line 551) | def eos_token_id(self) -> Optional[int]: method get_vocab (line 554) | def get_vocab(self): method _tokenize (line 560) | def _tokenize(self, text): method _convert_token_to_id (line 564) | def _convert_token_to_id(self, token): method _convert_id_to_token (line 568) | def _convert_id_to_token(self, index): method _maybe_add_prefix_space (line 573) | def _maybe_add_prefix_space(self, tokens, decoded): method convert_tokens_to_string (line 579) | def convert_tokens_to_string(self, tokens): method save_vocabulary (line 599) | def save_vocabulary( method build_inputs_with_special_tokens (line 632) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No... method get_special_tokens_mask (line 648) | def get_special_tokens_mask( method create_token_type_ids_from_sequences (line 680) | def create_token_type_ids_from_sequences( FILE: python/sglang/srt/configs/janus_pro.py class DictToObject (line 25) | class DictToObject(dict): method __init__ (line 26) | def __init__(self, dictionary): class VisionConfig (line 35) | class VisionConfig(PretrainedConfig): method __init__ (line 40) | def __init__(self, **kwargs): class GenAlignerConfig (line 50) | class GenAlignerConfig(PretrainedConfig): method __init__ (line 55) | def __init__(self, **kwargs): class GenHeadConfig (line 65) | class GenHeadConfig(PretrainedConfig): method __init__ (line 70) | def __init__(self, **kwargs): class AlignerConfig (line 80) | class AlignerConfig(PretrainedConfig): method __init__ (line 85) | def __init__(self, **kwargs): class GenVisionConfig (line 95) | class GenVisionConfig(PretrainedConfig): method __init__ (line 100) | def __init__(self, **kwargs): class SigLIPVisionCfg (line 111) | class SigLIPVisionCfg: class MultiModalityConfig (line 124) | class MultiModalityConfig(PretrainedConfig): method __init__ (line 135) | def __init__(self, **kwargs): class VLMImageProcessor (line 159) | class VLMImageProcessor(BaseImageProcessor): method __init__ (line 162) | def __init__( method resize (line 194) | def resize(self, pil_img: Image) -> np.ndarray: method preprocess (line 249) | def preprocess(self, images, return_tensors: str = "pt", **kwargs) -> ... method default_shape (line 282) | def default_shape(self): class DictOutput (line 286) | class DictOutput(object): method items (line 287) | def items(self): method keys (line 290) | def keys(self): method __getitem__ (line 293) | def __getitem__(self, item): method __contains__ (line 296) | def __contains__(self, key): method __setitem__ (line 299) | def __setitem__(self, key, value): class VLChatProcessorOutput (line 304) | class VLChatProcessorOutput(DictOutput): method __len__ (line 310) | def __len__(self): class BatchedVLChatProcessorOutput (line 315) | class BatchedVLChatProcessorOutput(DictOutput): class VLChatProcessor (line 326) | class VLChatProcessor(ProcessorMixin): method __init__ (line 332) | def __init__( method image_token (line 374) | def image_token(self): method image_id (line 378) | def image_id(self) -> int: method image_start_id (line 383) | def image_start_id(self): method image_end_id (line 388) | def image_end_id(self): method image_start_token (line 393) | def image_start_token(self): method image_end_token (line 397) | def image_end_token(self): method pad_id (line 401) | def pad_id(self): method add_image_token (line 405) | def add_image_token( method process_one (line 450) | def process_one( method __call__ (line 497) | def __call__( method batchify (line 532) | def batchify( class VLMImageProcessorConfig (line 596) | class VLMImageProcessorConfig(PretrainedConfig): method __init__ (line 605) | def __init__( FILE: python/sglang/srt/configs/jet_nemotron.py class JetBlockConfig (line 14) | class JetBlockConfig: class JetNemotronConfig (line 25) | class JetNemotronConfig(PretrainedConfig): method full_attention_layer_ids (line 42) | def full_attention_layer_ids(self) -> list[int]: method linear_layer_ids (line 50) | def linear_layer_ids(self) -> list[int]: method mamba2_cache_params (line 58) | def mamba2_cache_params(self) -> Mamba2CacheParams: FILE: python/sglang/srt/configs/jet_vlm.py class JetVLMConfig (line 10) | class JetVLMConfig(PretrainedConfig): method __init__ (line 18) | def __init__( method full_attention_layer_ids (line 44) | def full_attention_layer_ids(self) -> list[int]: method linear_layer_ids (line 48) | def linear_layer_ids(self) -> list[int]: method mamba2_cache_params (line 52) | def mamba2_cache_params(self) -> Mamba2CacheParams: FILE: python/sglang/srt/configs/kimi_k25.py class KimiK25VisionConfig (line 9) | class KimiK25VisionConfig(PretrainedConfig): method __init__ (line 36) | def __init__( class KimiK25Config (line 84) | class KimiK25Config(PretrainedConfig): method __init__ (line 128) | def __init__( method hidden_size (line 164) | def hidden_size(self) -> int: method vocab_size (line 169) | def vocab_size(self) -> int: FILE: python/sglang/srt/configs/kimi_linear.py class KimiLinearConfig (line 7) | class KimiLinearConfig(PretrainedConfig): method __init__ (line 11) | def __init__( method is_mla (line 111) | def is_mla(self): method is_moe (line 122) | def is_moe(self): method is_linear_attn (line 126) | def is_linear_attn(self) -> bool: method is_kda_layer (line 136) | def is_kda_layer(self, layer_idx: int): method linear_layer_ids (line 143) | def linear_layer_ids(self): method full_attention_layer_ids (line 147) | def full_attention_layer_ids(self): method mamba2_cache_params (line 151) | def mamba2_cache_params(self) -> KimiLinearCacheParams: FILE: python/sglang/srt/configs/kimi_vl.py class KimiVLConfig (line 11) | class KimiVLConfig(PretrainedConfig): method __init__ (line 14) | def __init__( FILE: python/sglang/srt/configs/kimi_vl_moonvit.py class MoonViTConfig (line 6) | class MoonViTConfig(PretrainedConfig): method __init__ (line 9) | def __init__( FILE: python/sglang/srt/configs/lfm2.py class Lfm2Config (line 32) | class Lfm2Config(HFLfm2Config): method full_attention_layer_ids (line 41) | def full_attention_layer_ids(self) -> List[int]: method linear_layer_ids (line 46) | def linear_layer_ids(self) -> List[int]: method mamba_chunk_size (line 53) | def mamba_chunk_size(self) -> int: method mamba2_cache_params (line 58) | def mamba2_cache_params(self) -> Optional[Mamba2CacheParams]: FILE: python/sglang/srt/configs/lfm2_moe.py class Lfm2MoeConfig (line 28) | class Lfm2MoeConfig(PretrainedConfig): method __init__ (line 45) | def __init__( method full_attention_layer_ids (line 125) | def full_attention_layer_ids(self) -> List[int]: method linear_layer_ids (line 132) | def linear_layer_ids(self) -> List[int]: method mamba_chunk_size (line 141) | def mamba_chunk_size(self) -> int: method mamba2_cache_params (line 146) | def mamba2_cache_params(self) -> Optional[Mamba2CacheParams]: FILE: python/sglang/srt/configs/load_config.py class LoadFormat (line 15) | class LoadFormat(str, enum.Enum): class LoadConfig (line 37) | class LoadConfig: method __post_init__ (line 98) | def __post_init__(self): method _verify_load_format (line 120) | def _verify_load_format(self) -> None: FILE: python/sglang/srt/configs/longcat_flash.py class LongcatFlashConfig (line 9) | class LongcatFlashConfig(PretrainedConfig): method __init__ (line 13) | def __init__( FILE: python/sglang/srt/configs/mamba_utils.py function extra_groups_for_head_shards (line 29) | def extra_groups_for_head_shards(ngroups: int, tp_size: int): class Mamba2StateDType (line 42) | class Mamba2StateDType: function mamba2_state_dtype (line 47) | def mamba2_state_dtype(config=None) -> Mamba2StateDType: class BaseLinearStateParams (line 111) | class BaseLinearStateParams(ABC): method mamba_cache_per_req (line 116) | def mamba_cache_per_req(self) -> int: class Mamba2StateShape (line 129) | class Mamba2StateShape: method create (line 142) | def create( class Mamba2CacheParams (line 181) | class Mamba2CacheParams(BaseLinearStateParams): class KimiLinearStateShape (line 186) | class KimiLinearStateShape: method create (line 198) | def create( class KimiLinearCacheParams (line 238) | class KimiLinearCacheParams(BaseLinearStateParams): FILE: python/sglang/srt/configs/model_config.py class AttentionArch (line 42) | class AttentionArch(IntEnum): class ModelImpl (line 47) | class ModelImpl(str, Enum): function is_deepseek_nsa (line 54) | def is_deepseek_nsa(config) -> bool: function get_nsa_index_head_dim (line 80) | def get_nsa_index_head_dim(config: PretrainedConfig) -> int: function get_nsa_index_topk (line 85) | def get_nsa_index_topk(config: PretrainedConfig) -> int: function get_nsa_index_n_heads (line 90) | def get_nsa_index_n_heads(config: PretrainedConfig) -> int: class ModelConfig (line 95) | class ModelConfig: method __init__ (line 96) | def __init__( method from_server_args (line 248) | def from_server_args( method _config_draft_model (line 287) | def _config_draft_model(self): method _derive_hybrid_model (line 354) | def _derive_hybrid_model(self): method _derive_context_length (line 374) | def _derive_context_length(self, context_length: int): method _derive_model_shapes (line 408) | def _derive_model_shapes(self): method get_total_num_attention_heads (line 589) | def get_total_num_attention_heads(self) -> int: method get_num_attention_heads (line 592) | def get_num_attention_heads(self, tensor_parallel_size) -> int: method get_total_num_kv_heads (line 597) | def get_total_num_kv_heads(self) -> int: method get_num_kv_heads (line 660) | def get_num_kv_heads(self, tensor_parallel_size) -> int: method get_swa_num_kv_heads (line 669) | def get_swa_num_kv_heads(self, tensor_parallel_size) -> int: method _parse_quant_hf_config (line 683) | def _parse_quant_hf_config(self): method _find_quant_modelslim_config (line 781) | def _find_quant_modelslim_config(self): method _parse_modelopt_quant_config (line 793) | def _parse_modelopt_quant_config(self, quant_config_dict: dict) -> Opt... method get_quantization_config_log_str (line 812) | def get_quantization_config_log_str(self) -> Optional[str]: method _is_already_quantized (line 834) | def _is_already_quantized(self) -> bool: method _get_modelopt_quant_type (line 847) | def _get_modelopt_quant_type(self) -> str: method _get_sliding_window_size (line 871) | def _get_sliding_window_size(self) -> Optional[int]: method _validate_quantize_and_serve_config (line 877) | def _validate_quantize_and_serve_config(self): method _verify_quantization (line 908) | def _verify_quantization(self) -> None: method _verify_dual_chunk_attention_config (line 1053) | def _verify_dual_chunk_attention_config(self) -> None: method _verify_transformers_version (line 1070) | def _verify_transformers_version(self): method _get_hf_eos_token_id (line 1105) | def _get_hf_eos_token_id(self) -> Optional[Set[int]]: method get_default_sampling_params (line 1125) | def get_default_sampling_params(self) -> dict[str, Any]: method _maybe_pull_model_tokenizer_from_remote (line 1157) | def _maybe_pull_model_tokenizer_from_remote(self) -> None: function _get_and_verify_dtype (line 1192) | def _get_and_verify_dtype( function is_generation_model (line 1256) | def is_generation_model(model_architectures: List[str], is_embedding: bo... function is_multimodal_model (line 1351) | def is_multimodal_model(model_architectures: List[str]): function is_multimodal_gen_model (line 1361) | def is_multimodal_gen_model(model_architectures: List[str]): function is_image_gen_model (line 1365) | def is_image_gen_model(model_architectures: List[str]): function is_audio_model (line 1369) | def is_audio_model(model_architectures: List[str]): function is_encoder_decoder_model (line 1376) | def is_encoder_decoder_model(model_architectures: List[str]): function is_local_attention_model (line 1384) | def is_local_attention_model(model_architectures: List[str]): function is_multimodal_chunked_prefill_supported (line 1388) | def is_multimodal_chunked_prefill_supported(model_architectures: List[st... function is_piecewise_cuda_graph_disabled_model (line 1403) | def is_piecewise_cuda_graph_disabled_model(model_architectures: List[str]): function yarn_get_mscale (line 1410) | def yarn_get_mscale(scale: float = 1, mscale: float = 1) -> float: function compute_mla_mscale_scaling (line 1416) | def compute_mla_mscale_scaling(rope_scaling: dict, base_scaling: float) ... function is_hybrid_swa_model (line 1433) | def is_hybrid_swa_model(model_architectures: List[str]): function get_hybrid_layer_ids (line 1446) | def get_hybrid_layer_ids( FILE: python/sglang/srt/configs/modelopt_config.py class ModelOptConfig (line 7) | class ModelOptConfig: method __post_init__ (line 27) | def __post_init__(self): FILE: python/sglang/srt/configs/nano_nemotron_vl.py function float_triplet (line 25) | def float_triplet(seq: Any): class NemotronH_Nano_VL_V2_Config (line 33) | class NemotronH_Nano_VL_V2_Config(PretrainedConfig): method __init__ (line 37) | def __init__( method create_radio_config (line 101) | def create_radio_config(self): FILE: python/sglang/srt/configs/nemotron_h.py class NemotronHConfig (line 38) | class NemotronHConfig(PretrainedConfig): method _validate_layers_block_type (line 161) | def _validate_layers_block_type( method _resolve_layers_block_type (line 189) | def _resolve_layers_block_type( method _resolve_mtp_layers_block_type (line 206) | def _resolve_mtp_layers_block_type(mtp_layers_block_type, kwargs) -> l... method _resolve_mamba_chunk_size (line 218) | def _resolve_mamba_chunk_size(mamba_chunk_size, kwargs) -> int: method __init__ (line 239) | def __init__( method mamba_layer_ids (line 402) | def mamba_layer_ids(self): method full_attention_layer_ids (line 410) | def full_attention_layer_ids(self): method mamba2_cache_params (line 418) | def mamba2_cache_params(self) -> Mamba2CacheParams: method num_hidden_layers (line 436) | def num_hidden_layers(self) -> int: method num_hidden_layers (line 444) | def num_hidden_layers(self, value): method hybrid_override_pattern (line 452) | def hybrid_override_pattern(self) -> str: method hybrid_override_pattern (line 460) | def hybrid_override_pattern(self, value): method mtp_hybrid_override_pattern (line 467) | def mtp_hybrid_override_pattern(self) -> str: method mtp_hybrid_override_pattern (line 475) | def mtp_hybrid_override_pattern(self, value): method _list_to_pattern (line 480) | def _list_to_pattern(layers_list: list[str]) -> str: method _pattern_to_list (line 491) | def _pattern_to_list(pattern: str) -> list[str]: FILE: python/sglang/srt/configs/olmo3.py class Olmo3LayerType (line 25) | class Olmo3LayerType(enum.Enum): class Olmo3Config (line 30) | class Olmo3Config(PretrainedConfig): method __init__ (line 35) | def __init__( FILE: python/sglang/srt/configs/points_v15_chat.py class POINTSV15ChatConfig (line 7) | class POINTSV15ChatConfig(PretrainedConfig): method __init__ (line 10) | def __init__( FILE: python/sglang/srt/configs/qwen3_5.py class Qwen3_5VisionConfig (line 7) | class Qwen3_5VisionConfig(Qwen3VLVisionConfig): class Qwen3_5TextConfig (line 12) | class Qwen3_5TextConfig(Qwen3NextConfig): method __init__ (line 16) | def __init__( class Qwen3_5Config (line 34) | class Qwen3_5Config(PretrainedConfig): method __init__ (line 81) | def __init__( class Qwen3_5MoeVisionConfig (line 109) | class Qwen3_5MoeVisionConfig(Qwen3_5VisionConfig): class Qwen3_5MoeTextConfig (line 113) | class Qwen3_5MoeTextConfig(Qwen3_5TextConfig): class Qwen3_5MoeConfig (line 117) | class Qwen3_5MoeConfig(Qwen3_5Config): FILE: python/sglang/srt/configs/qwen3_next.py class HybridLayerType (line 34) | class HybridLayerType(enum.Enum): class Qwen3NextConfig (line 39) | class Qwen3NextConfig(PretrainedConfig): method __init__ (line 180) | def __init__( method layers_block_type (line 255) | def layers_block_type(self): method linear_layer_ids (line 267) | def linear_layer_ids(self): method full_attention_layer_ids (line 275) | def full_attention_layer_ids(self): method mamba2_cache_params (line 283) | def mamba2_cache_params(self) -> Mamba2CacheParams: FILE: python/sglang/srt/configs/qwen3_omni.py class Qwen3OmniMoeAudioEncoderConfig (line 7) | class Qwen3OmniMoeAudioEncoderConfig(PretrainedConfig): method __init__ (line 10) | def __init__( class Qwen3OmniMoeVisionEncoderConfig (line 55) | class Qwen3OmniMoeVisionEncoderConfig(PretrainedConfig): method __init__ (line 59) | def __init__( class Qwen3OmniMoeTextConfig (line 93) | class Qwen3OmniMoeTextConfig(PretrainedConfig): method __init__ (line 116) | def __init__( class Qwen3OmniMoeThinkerConfig (line 182) | class Qwen3OmniMoeThinkerConfig(PretrainedConfig): method __init__ (line 195) | def __init__( class Qwen3OmniMoeTalkerCodePredictorConfig (line 237) | class Qwen3OmniMoeTalkerCodePredictorConfig(PretrainedConfig): method __init__ (line 258) | def __init__( class Qwen3OmniMoeTalkerTextConfig (line 327) | class Qwen3OmniMoeTalkerTextConfig(PretrainedConfig): method __init__ (line 351) | def __init__( class Qwen3OmniMoeTalkerConfig (line 417) | class Qwen3OmniMoeTalkerConfig(PretrainedConfig): method __init__ (line 424) | def __init__( class Qwen3OmniMoeCode2WavConfig (line 488) | class Qwen3OmniMoeCode2WavConfig(PretrainedConfig): method __init__ (line 490) | def __init__( method layer_types (line 533) | def layer_types(self): class Qwen3OmniMoeConfig (line 540) | class Qwen3OmniMoeConfig(PretrainedConfig): method __init__ (line 549) | def __init__( method get_text_config (line 597) | def get_text_config(self, decoder=False) -> "PretrainedConfig": FILE: python/sglang/srt/configs/qwen3_vl.py class Qwen3VLVisionConfig (line 4) | class Qwen3VLVisionConfig(PretrainedConfig): method __init__ (line 8) | def __init__( class Qwen3VLTextConfig (line 42) | class Qwen3VLTextConfig(PretrainedConfig): method __init__ (line 146) | def __init__( class Qwen3VLConfig (line 192) | class Qwen3VLConfig(PretrainedConfig): method __init__ (line 239) | def __init__( class Qwen3VLMoeTextConfig (line 267) | class Qwen3VLMoeTextConfig(PretrainedConfig): method __init__ (line 401) | def __init__( class Qwen3VLMoeVisionConfig (line 461) | class Qwen3VLMoeVisionConfig(PretrainedConfig): method __init__ (line 465) | def __init__( class Qwen3VLMoeConfig (line 499) | class Qwen3VLMoeConfig(PretrainedConfig): method __init__ (line 546) | def __init__( FILE: python/sglang/srt/configs/radio.py class RadioConfig (line 34) | class RadioConfig(PretrainedConfig): method __init__ (line 62) | def __init__( FILE: python/sglang/srt/configs/step3_vl.py class Step3VisionEncoderConfig (line 6) | class Step3VisionEncoderConfig(PretrainedConfig): method __init__ (line 9) | def __init__( class Step3TextConfig (line 36) | class Step3TextConfig(PretrainedConfig): method __init__ (line 40) | def __init__( class Step3VLConfig (line 143) | class Step3VLConfig(PretrainedConfig): method __init__ (line 146) | def __init__( FILE: python/sglang/srt/configs/step3p5.py class Step3p5Config (line 6) | class Step3p5Config(PretrainedConfig): method __init__ (line 10) | def __init__( FILE: python/sglang/srt/configs/update_config.py function may_get_weight_block_size (line 13) | def may_get_weight_block_size(model_config, load_config): function get_moe_padding_size (line 23) | def get_moe_padding_size(weight_block_size): function get_num_heads_padding_size (line 38) | def get_num_heads_padding_size(tp_size, weight_block_size, head_dim): function adjust_tp_num_heads_if_necessary (line 51) | def adjust_tp_num_heads_if_necessary(model_config, tp_size, is_post_upda... function update_intermediate_size (line 89) | def update_intermediate_size(model_config, attr_name, intermediate_paddi... function adjust_config_with_unaligned_cpu_tp (line 112) | def adjust_config_with_unaligned_cpu_tp( FILE: python/sglang/srt/configs/utils.py function register_image_processor (line 12) | def register_image_processor( function register_processor (line 23) | def register_processor(config: Type[PretrainedConfig], processor: Type[P... FILE: python/sglang/srt/connector/__init__.py class ConnectorType (line 19) | class ConnectorType(str, enum.Enum): function create_remote_connector (line 25) | def create_remote_connector(url, device=None, **kwargs) -> BaseConnector: function get_connector_type (line 37) | def get_connector_type(client: BaseConnector) -> ConnectorType: FILE: python/sglang/srt/connector/base_connector.py class BaseConnector (line 13) | class BaseConnector(ABC): method __init__ (line 23) | def __init__(self, url: str): method get_local_dir (line 31) | def get_local_dir(self): method weight_iterator (line 35) | def weight_iterator( method pull_files (line 41) | def pull_files( method close (line 48) | def close(self): method __enter__ (line 56) | def __enter__(self): method __exit__ (line 59) | def __exit__(self, exc_type, exc_value, traceback): method __del__ (line 62) | def __del__(self): method _close_by_signal (line 65) | def _close_by_signal(self, existing_handler=None): class BaseKVConnector (line 75) | class BaseKVConnector(BaseConnector): method get (line 78) | def get(self, key: str) -> Optional[torch.Tensor]: method getstr (line 82) | def getstr(self, key: str) -> Optional[str]: method set (line 86) | def set(self, key: str, obj: torch.Tensor) -> None: method setstr (line 90) | def setstr(self, key: str, obj: str) -> None: method list (line 94) | def list(self, prefix: str) -> List[str]: class BaseFileConnector (line 98) | class BaseFileConnector(BaseConnector): method glob (line 110) | def glob(self, allow_pattern: str) -> List[str]: FILE: python/sglang/srt/connector/redis.py class RedisConnector (line 16) | class RedisConnector(BaseKVConnector): method __init__ (line 18) | def __init__(self, url: str): method get (line 28) | def get(self, key: str) -> Optional[torch.Tensor]: method getstr (line 37) | def getstr(self, key: str) -> Optional[str]: method set (line 45) | def set(self, key: str, tensor: torch.Tensor) -> None: method setstr (line 49) | def setstr(self, key: str, obj: str) -> None: method list (line 52) | def list(self, prefix: str) -> List[str]: method weight_iterator (line 67) | def weight_iterator( method pull_files (line 76) | def pull_files( method close (line 83) | def close(self): FILE: python/sglang/srt/connector/remote_instance.py class RemoteInstanceConnector (line 16) | class RemoteInstanceConnector(BaseConnector): method __init__ (line 18) | def __init__(self, url: str, device: torch.device = "cpu"): method build_group (line 26) | def build_group( method pull_files (line 71) | def pull_files( method weight_iterator (line 79) | def weight_iterator( FILE: python/sglang/srt/connector/s3.py function _filter_allow (line 13) | def _filter_allow(paths: list[str], patterns: list[str]) -> list[str]: function _filter_ignore (line 21) | def _filter_ignore(paths: list[str], patterns: list[str]) -> list[str]: function list_files (line 29) | def list_files( class S3Connector (line 69) | class S3Connector(BaseFileConnector): method __init__ (line 71) | def __init__(self, url: str) -> None: method glob (line 77) | def glob(self, allow_pattern: Optional[list[str]] = None) -> list[str]: method pull_files (line 83) | def pull_files( method weight_iterator (line 109) | def weight_iterator( method close (line 120) | def close(self): FILE: python/sglang/srt/connector/serde/__init__.py function create_serde (line 12) | def create_serde(serde_type: str) -> Tuple[Serializer, Deserializer]: FILE: python/sglang/srt/connector/serde/safe_serde.py class SafeSerializer (line 11) | class SafeSerializer(Serializer): method __init__ (line 13) | def __init__(self): method to_bytes (line 16) | def to_bytes(self, t: torch.Tensor) -> bytes: class SafeDeserializer (line 20) | class SafeDeserializer(Deserializer): method __init__ (line 22) | def __init__(self): method from_bytes_normal (line 26) | def from_bytes_normal(self, b: Union[bytearray, bytes]) -> torch.Tensor: method from_bytes (line 29) | def from_bytes(self, b: Union[bytearray, bytes]) -> torch.Tensor: FILE: python/sglang/srt/connector/serde/serde.py class Serializer (line 9) | class Serializer(ABC): method to_bytes (line 12) | def to_bytes(self, t: torch.Tensor) -> bytes: class Deserializer (line 27) | class Deserializer(metaclass=abc.ABCMeta): method __init__ (line 29) | def __init__(self, dtype): method from_bytes (line 33) | def from_bytes(self, bs: bytes) -> torch.Tensor: FILE: python/sglang/srt/connector/utils.py function parse_model_name (line 11) | def parse_model_name(url: str) -> str: function pull_files_from_db (line 20) | def pull_files_from_db( FILE: python/sglang/srt/constrained/base_grammar_backend.py class GrammarStats (line 30) | class GrammarStats: class BaseGrammarObject (line 41) | class BaseGrammarObject: method __init__ (line 43) | def __init__(self): method maybe_init_reasoning (line 48) | def maybe_init_reasoning(self, reasoning: bool): method accept_token (line 51) | def accept_token(self, token: int) -> None: method rollback (line 57) | def rollback(self, k: int): method is_terminated (line 60) | def is_terminated(self): method allocate_vocab_mask (line 63) | def allocate_vocab_mask( method fill_vocab_mask (line 68) | def fill_vocab_mask(self, vocab_mask: torch.Tensor, idx: int) -> None: method move_vocab_mask (line 72) | def move_vocab_mask(vocab_mask: torch.Tensor, device) -> torch.Tensor: method apply_vocab_mask (line 76) | def apply_vocab_mask(logits: torch.Tensor, vocab_mask: torch.Tensor) -... method copy (line 79) | def copy(self) -> "BaseGrammarObject": method finished (line 83) | def finished(self): method finished (line 87) | def finished(self, finished): method try_jump_forward (line 90) | def try_jump_forward(self, tokenizer) -> Optional[Tuple[List[int], str]]: method jump_forward_str_state (line 100) | def jump_forward_str_state(self, helper: Tuple[List[int], str]) -> Tup... method jump_and_retokenize (line 110) | def jump_and_retokenize( class InvalidGrammarObject (line 119) | class InvalidGrammarObject(BaseGrammarObject): method __init__ (line 122) | def __init__(self, error_message: str = "Unknown grammar error"): method __repr__ (line 126) | def __repr__(self): class BaseGrammarBackend (line 130) | class BaseGrammarBackend: method __init__ (line 131) | def __init__(self): method _not_supported (line 135) | def _not_supported(self, key_type: str, key_string: str) -> BaseGramma... method dispatch_fallback (line 139) | def dispatch_fallback(self, key_type: str, key_string: str) -> BaseGra... method dispatch_json (line 145) | def dispatch_json(self, key_string: str) -> BaseGrammarObject: method dispatch_regex (line 148) | def dispatch_regex(self, key_string: str) -> BaseGrammarObject: method dispatch_ebnf (line 151) | def dispatch_ebnf(self, key_string: str) -> BaseGrammarObject: method dispatch_structural_tag (line 154) | def dispatch_structural_tag(self, key_string: str) -> BaseGrammarObject: method _init_value_dispatch (line 157) | def _init_value_dispatch( method get_cached_or_future_value (line 177) | def get_cached_or_future_value( method set_cache (line 188) | def set_cache(self, key: Tuple[str, str], value: BaseGrammarObject): method reset (line 191) | def reset(self): function register_grammar_backend (line 198) | def register_grammar_backend(name, init_func): function create_grammar_backend (line 202) | def create_grammar_backend( FILE: python/sglang/srt/constrained/grammar_manager.py class GrammarManager (line 24) | class GrammarManager: method __init__ (line 25) | def __init__(self, scheduler: Scheduler): method __len__ (line 49) | def __len__(self): method clear (line 52) | def clear(self): method has_waiting_grammars (line 56) | def has_waiting_grammars(self) -> bool: method abort_requests (line 59) | def abort_requests(self, recv_req: AbortReq): method process_req_with_grammar (line 67) | def process_req_with_grammar(self, req: Req) -> bool: method get_ready_grammar_requests (line 111) | def get_ready_grammar_requests(self) -> List[Req]: FILE: python/sglang/srt/constrained/llguidance_backend.py class GuidanceGrammar (line 40) | class GuidanceGrammar(BaseGrammarObject): method __init__ (line 42) | def __init__(self, llguidance_tokenizer: LLTokenizer, serialized_gramm... method accept_token (line 57) | def accept_token(self, token: int): method rollback (line 66) | def rollback(self, num_tokens: int) -> None: method is_terminated (line 76) | def is_terminated(self): method fill_vocab_mask (line 79) | def fill_vocab_mask(self, vocab_mask: torch.Tensor, idx: int) -> None: method allocate_vocab_mask (line 83) | def allocate_vocab_mask( method move_vocab_mask (line 98) | def move_vocab_mask(vocab_mask: torch.Tensor, device) -> torch.Tensor: method apply_vocab_mask (line 102) | def apply_vocab_mask(logits: torch.Tensor, vocab_mask: torch.Tensor) -... method copy (line 105) | def copy(self): method try_jump_forward (line 111) | def try_jump_forward(self, tokenizer) -> Optional[Tuple[List[int], str]]: method jump_forward_str_state (line 118) | def jump_forward_str_state(self, helper: Tuple[List[int], str]) -> Tup... method jump_and_retokenize (line 121) | def jump_and_retokenize( method _check_err (line 126) | def _check_err(self) -> None: class GuidanceBackend (line 131) | class GuidanceBackend(BaseGrammarBackend): method __init__ (line 133) | def __init__( method _from_serialized (line 147) | def _from_serialized(self, serialized_grammar) -> BaseGrammarObject: method dispatch_json (line 157) | def dispatch_json(self, key_string: str) -> BaseGrammarObject: method dispatch_regex (line 171) | def dispatch_regex(self, key_string: str) -> BaseGrammarObject: method dispatch_ebnf (line 175) | def dispatch_ebnf(self, key_string: str) -> BaseGrammarObject: method dispatch_structural_tag (line 183) | def dispatch_structural_tag(self, key_string: str) -> BaseGrammarObject: FILE: python/sglang/srt/constrained/outlines_backend.py class OutlinesGrammar (line 42) | class OutlinesGrammar(BaseGrammarObject): method __init__ (line 43) | def __init__( method accept_token (line 53) | def accept_token(self, token: int): method allocate_vocab_mask (line 56) | def allocate_vocab_mask( method move_vocab_mask (line 62) | def move_vocab_mask(vocab_mask: torch.Tensor, device) -> torch.Tensor: method fill_vocab_mask (line 65) | def fill_vocab_mask(self, vocab_mask: torch.Tensor, idx: int) -> None: method apply_vocab_mask (line 74) | def apply_vocab_mask(logits: torch.Tensor, vocab_mask: torch.Tensor): method copy (line 77) | def copy(self): method try_jump_forward (line 80) | def try_jump_forward(self, tokenizer) -> Optional[Tuple]: method jump_forward_str_state (line 104) | def jump_forward_str_state(self, helper: Tuple[List[int], str]) -> Tup... method jump_and_retokenize (line 108) | def jump_and_retokenize( class OutlinesGrammarBackend (line 114) | class OutlinesGrammarBackend(BaseGrammarBackend): method __init__ (line 115) | def __init__( method _compile_regex (line 145) | def _compile_regex(self, regex: str) -> BaseGrammarObject: method dispatch_ebnf (line 160) | def dispatch_ebnf(self, key_string: str): method dispatch_structural_tag (line 163) | def dispatch_structural_tag(self, key_string: str): method dispatch_json (line 166) | def dispatch_json(self, key_string: str): method dispatch_regex (line 177) | def dispatch_regex(self, key_string: str): function build_regex_from_object (line 181) | def build_regex_from_object( FILE: python/sglang/srt/constrained/outlines_jump_forward.py class JumpEdge (line 47) | class JumpEdge: function disk_cache (line 54) | def disk_cache(expire: Optional[float] = None, typed=False, ignore=()): function init_state_to_jump_forward (line 62) | def init_state_to_jump_forward(regex_string): class OutlinesJumpForwardMap (line 142) | class OutlinesJumpForwardMap: method __init__ (line 143) | def __init__(self, regex_string): method jump_forward_symbol (line 146) | def jump_forward_symbol(self, state): method jump_forward_byte (line 159) | def jump_forward_byte(self, state): method is_jump_forward_symbol_state (line 174) | def is_jump_forward_symbol_state(self, state): function test_main (line 181) | def test_main(regex_string): FILE: python/sglang/srt/constrained/reasoner_grammar_backend.py class ReasonerGrammarObject (line 27) | class ReasonerGrammarObject(BaseGrammarObject): method __init__ (line 28) | def __init__(self, grammar: BaseGrammarObject, think_end_id: int): method maybe_init_reasoning (line 37) | def maybe_init_reasoning(self, reasoning: bool): method transfer_state (line 40) | def transfer_state(self, token: int) -> int: method rollback_state (line 46) | def rollback_state(self): method accept_token (line 52) | def accept_token(self, token: int): method is_terminated (line 57) | def is_terminated(self): method rollback (line 60) | def rollback(self, k): method allocate_vocab_mask (line 68) | def allocate_vocab_mask( method fill_vocab_mask (line 73) | def fill_vocab_mask(self, vocab_mask: torch.Tensor, idx: int) -> None: method move_vocab_mask (line 77) | def move_vocab_mask(self, vocab_mask: torch.Tensor, device) -> torch.T... method apply_vocab_mask (line 81) | def apply_vocab_mask(self): method copy (line 84) | def copy(self) -> BaseGrammarObject: method finished (line 88) | def finished(self): method finished (line 92) | def finished(self, finished): method try_jump_forward (line 95) | def try_jump_forward(self, tokenizer): method jump_forward_str_state (line 98) | def jump_forward_str_state(self, helper): method jump_and_retokenize (line 101) | def jump_and_retokenize( class ReasonerGrammarBackend (line 109) | class ReasonerGrammarBackend(BaseGrammarBackend): method __init__ (line 110) | def __init__(self, grammar_backend: BaseGrammarBackend, think_end_id): method _init_value_dispatch (line 115) | def _init_value_dispatch( FILE: python/sglang/srt/constrained/triton_ops/bitmask_ops.py function apply_token_bitmask_inplace_kernel (line 14) | def apply_token_bitmask_inplace_kernel( function apply_token_bitmask_inplace_triton (line 84) | def apply_token_bitmask_inplace_triton( FILE: python/sglang/srt/constrained/utils.py function is_legacy_structural_tag (line 4) | def is_legacy_structural_tag(obj: Dict) -> bool: FILE: python/sglang/srt/constrained/xgrammar_backend.py class XGrammarGrammar (line 53) | class XGrammarGrammar(BaseGrammarObject): method __init__ (line 55) | def __init__( method accept_token (line 73) | def accept_token(self, token: int): method rollback (line 87) | def rollback(self, k: int): method is_terminated (line 91) | def is_terminated(self): method allocate_vocab_mask (line 94) | def allocate_vocab_mask( method fill_vocab_mask (line 99) | def fill_vocab_mask(self, vocab_mask: torch.Tensor, idx: int) -> None: method move_vocab_mask (line 103) | def move_vocab_mask(vocab_mask: torch.Tensor, device) -> torch.Tensor: method apply_vocab_mask (line 106) | def apply_vocab_mask(self, logits: torch.Tensor, vocab_mask: torch.Ten... method copy (line 119) | def copy(self): method try_jump_forward (line 138) | def try_jump_forward(self, tokenizer) -> Optional[Tuple[List[int], str]]: method jump_forward_str_state (line 144) | def jump_forward_str_state(self, helper: Tuple[List[int], str]) -> Tup... method jump_and_retokenize (line 148) | def jump_and_retokenize( method __repr__ (line 165) | def __repr__(self): class TokenizerNotSupportedError (line 169) | class TokenizerNotSupportedError(Exception): class XGrammarGrammarBackend (line 175) | class XGrammarGrammarBackend(BaseGrammarBackend): method __init__ (line 176) | def __init__( method _sanitize_structural_format (line 213) | def _sanitize_structural_format(structural_format): method _sanitize_structural_tag_structures (line 235) | def _sanitize_structural_tag_structures(structural_tag: Dict) -> None: method _from_context (line 240) | def _from_context( method dispatch_json (line 257) | def dispatch_json(self, key_string: str) -> BaseGrammarObject: method dispatch_ebnf (line 272) | def dispatch_ebnf(self, key_string: str) -> BaseGrammarObject: method dispatch_regex (line 280) | def dispatch_regex(self, key_string: str) -> BaseGrammarObject: method dispatch_structural_tag (line 288) | def dispatch_structural_tag(self, key_string: str) -> BaseGrammarObject: method reset (line 319) | def reset(self): function demo_test (line 323) | def demo_test(): FILE: python/sglang/srt/debug_utils/comparator/aligner/axis_aligner.py class AxisAlignerPlan (line 20) | class AxisAlignerPlan(_FrozenBase): function compute_axis_aligner_plan (line 27) | def compute_axis_aligner_plan( function _semantic_names_match (line 54) | def _semantic_names_match(specs_pair: Pair[list[DimSpec]]) -> bool: function _expand_and_skip_squeeze (line 77) | def _expand_and_skip_squeeze(specs: list[DimSpec]) -> list[str]: function _build_canonical_order (line 87) | def _build_canonical_order(specs_pair: Pair[list[DimSpec]]) -> Optional[... function _build_side_pattern (line 145) | def _build_side_pattern( function execute_axis_aligner_plan (line 179) | def execute_axis_aligner_plan( FILE: python/sglang/srt/debug_utils/comparator/aligner/entrypoint/executor.py class StepPlansResult (line 44) | class StepPlansResult(NamedTuple): class SubPlansResult (line 50) | class SubPlansResult(NamedTuple): class AlignerResult (line 57) | class AlignerResult: function execute_aligner_plan (line 64) | def execute_aligner_plan( function _execute_step_plans (line 132) | def _execute_step_plans( function execute_sub_plans (line 171) | def execute_sub_plans( function execute_sub_plan (line 202) | def execute_sub_plan( FILE: python/sglang/srt/debug_utils/comparator/aligner/entrypoint/planner.py function compute_aligner_plan (line 36) | def compute_aligner_plan( function _compute_per_step_plans (line 69) | def _compute_per_step_plans( function compute_per_step_sub_plans (line 99) | def compute_per_step_sub_plans( FILE: python/sglang/srt/debug_utils/comparator/aligner/entrypoint/traced_types.py class TracedSubPlan (line 20) | class TracedSubPlan(_StrictBase): class TracedStepPlan (line 25) | class TracedStepPlan(_StrictBase): class TracedSidePlan (line 31) | class TracedSidePlan(_StrictBase): class TracedAlignerPlan (line 35) | class TracedAlignerPlan(_StrictBase): FILE: python/sglang/srt/debug_utils/comparator/aligner/entrypoint/types.py class AlignerPerStepPlan (line 21) | class AlignerPerStepPlan(_FrozenBase): class AlignerPlan (line 27) | class AlignerPlan(_FrozenBase): FILE: python/sglang/srt/debug_utils/comparator/aligner/reorderer/executor.py function execute_reorderer_plan (line 16) | def execute_reorderer_plan( function _reorder_zigzag_to_natural_thd (line 42) | def _reorder_zigzag_to_natural_thd( function _reorder_zigzag_to_natural (line 81) | def _reorder_zigzag_to_natural( FILE: python/sglang/srt/debug_utils/comparator/aligner/reorderer/planner.py function compute_reorderer_plans (line 20) | def compute_reorderer_plans( FILE: python/sglang/srt/debug_utils/comparator/aligner/reorderer/types.py class ZigzagToNaturalParams (line 8) | class ZigzagToNaturalParams(_FrozenBase): class ZigzagToNaturalThdParams (line 14) | class ZigzagToNaturalThdParams(_FrozenBase): class ReordererPlan (line 27) | class ReordererPlan(_FrozenBase): FILE: python/sglang/srt/debug_utils/comparator/aligner/token_aligner/concat_steps/executor.py function execute_token_aligner_concat_steps (line 16) | def execute_token_aligner_concat_steps( function _resolve_token_dim (line 30) | def _resolve_token_dim(tensor: torch.Tensor) -> int: function _concat_steps (line 44) | def _concat_steps(tensor_of_step: dict[int, torch.Tensor], *, dim: int) ... FILE: python/sglang/srt/debug_utils/comparator/aligner/token_aligner/concat_steps/thd_seq_lens_loader.py function load_thd_seq_lens_only (line 17) | def load_thd_seq_lens_only( FILE: python/sglang/srt/debug_utils/comparator/aligner/token_aligner/entrypoint.py class TokenAlignerResult (line 38) | class TokenAlignerResult: function compute_maybe_token_aligner_result (line 46) | def compute_maybe_token_aligner_result( function _build_smart_result (line 81) | def _build_smart_result( function _load_thd_seq_lens_pair (line 123) | def _load_thd_seq_lens_pair( FILE: python/sglang/srt/debug_utils/comparator/aligner/token_aligner/smart/aux_loader.py function load_and_normalize_aux (line 46) | def load_and_normalize_aux( function has_aux_tensors (line 90) | def has_aux_tensors(df: pl.DataFrame) -> bool: function _detect_plugin (line 96) | def _detect_plugin(df: pl.DataFrame, dump_path: Path) -> Optional[_AuxFr... function _load_step_data (line 113) | def _load_step_data( function _load_non_tensor_aux (line 167) | def _load_non_tensor_aux( function _load_and_align_aux_tensor (line 198) | def _load_and_align_aux_tensor( function _ensure_dims_in_metas (line 261) | def _ensure_dims_in_metas( FILE: python/sglang/srt/debug_utils/comparator/aligner/token_aligner/smart/aux_plugins.py class _AuxFrameworkPlugin (line 21) | class _AuxFrameworkPlugin(ABC): method name (line 24) | def name(self) -> str: ... method tensor_names (line 28) | def tensor_names(self) -> frozenset[str]: ... method non_tensor_names (line 32) | def non_tensor_names(self) -> frozenset[str]: ... method cp_sharded_names (line 35) | def cp_sharded_names(self) -> frozenset[str]: method discriminating_names (line 39) | def discriminating_names(self) -> frozenset[str]: method detect_layout (line 44) | def detect_layout(self, raw: dict[int, dict[str, object]]) -> TokenLay... method compute_step_aux (line 47) | def compute_step_aux( method has_required_names (line 52) | def has_required_names(self, names: set[str]) -> bool: method all_names (line 57) | def all_names(self) -> frozenset[str]: method extract_global_seq_lens (line 60) | def extract_global_seq_lens( method infer_cp_sharded_dims (line 69) | def infer_cp_sharded_dims(self, name: str, ndim: int) -> str: class _SGLangPlugin (line 79) | class _SGLangPlugin(_AuxFrameworkPlugin): method name (line 81) | def name(self) -> str: method tensor_names (line 85) | def tensor_names(self) -> frozenset[str]: method non_tensor_names (line 89) | def non_tensor_names(self) -> frozenset[str]: method cp_sharded_names (line 93) | def cp_sharded_names(self) -> frozenset[str]: method discriminating_names (line 97) | def discriminating_names(self) -> frozenset[str]: method has_required_names (line 100) | def has_required_names(self, names: set[str]) -> bool: method detect_layout (line 103) | def detect_layout(self, raw: dict[int, dict[str, object]]) -> TokenLay... method extract_global_seq_lens (line 106) | def extract_global_seq_lens( method infer_cp_sharded_dims (line 118) | def infer_cp_sharded_dims(self, name: str, ndim: int) -> str: method compute_step_aux (line 130) | def compute_step_aux( class _MegatronPlugin (line 168) | class _MegatronPlugin(_AuxFrameworkPlugin): method name (line 170) | def name(self) -> str: method tensor_names (line 174) | def tensor_names(self) -> frozenset[str]: method non_tensor_names (line 178) | def non_tensor_names(self) -> frozenset[str]: method cp_sharded_names (line 182) | def cp_sharded_names(self) -> frozenset[str]: method discriminating_names (line 186) | def discriminating_names(self) -> frozenset[str]: method has_required_names (line 189) | def has_required_names(self, names: set[str]) -> bool: method extract_global_seq_lens (line 192) | def extract_global_seq_lens( method infer_cp_sharded_dims (line 204) | def infer_cp_sharded_dims(self, name: str, ndim: int) -> str: method detect_layout (line 218) | def detect_layout(self, raw: dict[int, dict[str, object]]) -> TokenLay... method compute_step_aux (line 241) | def compute_step_aux( function _infer_positions (line 290) | def _infer_positions(*, seq_lens: torch.Tensor) -> torch.Tensor: FILE: python/sglang/srt/debug_utils/comparator/aligner/token_aligner/smart/executor.py function execute_token_aligner (line 23) | def execute_token_aligner( function _collapse_bs_to_t (line 55) | def _collapse_bs_to_t( function _build_bs_collapse_pattern (line 94) | def _build_bs_collapse_pattern( function _resolve_dim_or_fallback (line 123) | def _resolve_dim_or_fallback(tensor: torch.Tensor, name: str) -> int: function _make_empty (line 129) | def _make_empty(*, tensor_of_step: dict[int, torch.Tensor]) -> torch.Ten... function _extract_and_stack_tokens (line 137) | def _extract_and_stack_tokens( FILE: python/sglang/srt/debug_utils/comparator/aligner/token_aligner/smart/planner.py function compute_token_aligner_plan (line 16) | def compute_token_aligner_plan( function _match_sequences (line 60) | def _match_sequences( function _find_matching_x_exact (line 92) | def _find_matching_x_exact( class _PrefixCandidate (line 107) | class _PrefixCandidate(NamedTuple): function _find_matching_x_prefix (line 112) | def _find_matching_x_prefix( function _is_prefix_pair (line 132) | def _is_prefix_pair(a: list[int], b: list[int]) -> bool: FILE: python/sglang/srt/debug_utils/comparator/aligner/token_aligner/smart/seq_info_builder.py class _SeqInfoAccumulator (line 16) | class _SeqInfoAccumulator: method extend (line 24) | def extend( method build (line 37) | def build(self) -> TokenAlignerSeqInfo: function build_seqs_info (line 48) | def build_seqs_info(global_aux: TokenAlignerGlobalAux) -> TokenAlignerSe... function _build_token_aligner_seq_infos (line 56) | def _build_token_aligner_seq_infos( FILE: python/sglang/srt/debug_utils/comparator/aligner/token_aligner/smart/types.py class SGLangSeqId (line 16) | class SGLangSeqId(NamedTuple): class PositionalSeqId (line 20) | class PositionalSeqId(NamedTuple): class TokenAlignerStepAux (line 29) | class TokenAlignerStepAux: method __post_init__ (line 37) | def __post_init__(self) -> None: class TokenAlignerGlobalAux (line 49) | class TokenAlignerGlobalAux: class TokenLocator (line 58) | class TokenLocator(_FrozenBase): method __add__ (line 67) | def __add__(self, other: TokenLocator) -> TokenLocator: class TokenAlignerSeqInfo (line 74) | class TokenAlignerSeqInfo(_FrozenBase): method _validate_fields (line 83) | def _validate_fields(self) -> TokenAlignerSeqInfo: method __add__ (line 99) | def __add__(self, other: TokenAlignerSeqInfo) -> TokenAlignerSeqInfo: class TokenAlignerSeqsInfo (line 107) | class TokenAlignerSeqsInfo(_FrozenBase): class TokenAlignerPlan (line 114) | class TokenAlignerPlan(_FrozenBase): method _validate_fields (line 121) | def _validate_fields(self) -> TokenAlignerPlan: FILE: python/sglang/srt/debug_utils/comparator/aligner/unsharder/executor.py class UnsharderResult (line 25) | class UnsharderResult: function execute_unsharder_plan (line 30) | def execute_unsharder_plan( function _apply_unshard (line 51) | def _apply_unshard( function _verify_replicated_group (line 92) | def _verify_replicated_group( function _check_replicated_pair (line 112) | def _check_replicated_pair( function _thd_concat (line 144) | def _thd_concat( FILE: python/sglang/srt/debug_utils/comparator/aligner/unsharder/parallel_info.py function _is_error_sentinel (line 9) | def _is_error_sentinel(value: dict) -> bool: function normalize_parallel_info (line 14) | def normalize_parallel_info(meta: dict) -> dict[ParallelAxis, AxisInfo]: FILE: python/sglang/srt/debug_utils/comparator/aligner/unsharder/planner.py class _GroupResult (line 26) | class _GroupResult(NamedTuple): function compute_unsharder_plan (line 31) | def compute_unsharder_plan( function _validate_explicit_replicated (line 114) | def _validate_explicit_replicated( function _validate (line 145) | def _validate( function _group_and_project (line 180) | def _group_and_project( function _resolve_unshard_params (line 205) | def _resolve_unshard_params( FILE: python/sglang/srt/debug_utils/comparator/aligner/unsharder/types.py class AxisInfo (line 11) | class AxisInfo(_FrozenBase): method _validate_bounds (line 16) | def _validate_bounds(self) -> AxisInfo: class ConcatParams (line 26) | class ConcatParams(_FrozenBase): class CpThdConcatParams (line 31) | class CpThdConcatParams(_FrozenBase): class PickParams (line 37) | class PickParams(_FrozenBase): class ReduceSumParams (line 41) | class ReduceSumParams(_FrozenBase): class UnsharderPlan (line 51) | class UnsharderPlan(_FrozenBase): FILE: python/sglang/srt/debug_utils/comparator/bundle_comparator.py function _collect_bundle_side_info (line 49) | def _collect_bundle_side_info( function compare_bundle_pair (line 80) | def compare_bundle_pair( function _compare_bundle_pair_inner (line 113) | def _compare_bundle_pair_inner( function _extract_dp_alias_from_items (line 184) | def _extract_dp_alias_from_items(items: list[ValueWithMeta]) -> Optional... function _compare_bundle_pair_tensor_type (line 194) | def _compare_bundle_pair_tensor_type( function _try_generate_viz (line 284) | def _try_generate_viz( function _resolve_seq_dim (line 317) | def _resolve_seq_dim(tensor: torch.Tensor) -> Optional[int]: function _compare_bundle_pair_non_tensor_type (line 330) | def _compare_bundle_pair_non_tensor_type( function _apply_dim_names_from_meta (line 353) | def _apply_dim_names_from_meta( function _load_all_values (line 369) | def _load_all_values(filenames: list[str], base_path: Path) -> list[Valu... FILE: python/sglang/srt/debug_utils/comparator/bundle_matcher.py class TensorFileInfo (line 14) | class TensorFileInfo: function match_bundles (line 23) | def match_bundles( function _rows_to_tensor_infos (line 41) | def _rows_to_tensor_infos(rows: list[dict[str, Any]]) -> list[TensorFile... FILE: python/sglang/srt/debug_utils/comparator/dims_spec/comment_parser.py class _CommentSuffix (line 15) | class _CommentSuffix(NamedTuple): function _parse_comment_suffix (line 20) | def _parse_comment_suffix(declaration_part: str) -> _CommentSuffix: FILE: python/sglang/srt/debug_utils/comparator/dims_spec/dim_parser.py function parse_dim (line 22) | def parse_dim(token: str) -> DimSpec: function _parse_single_dim (line 33) | def _parse_single_dim(token: str) -> DimSpec: function _parse_fused_dim (line 45) | def _parse_fused_dim(*, token: str, fused_match: re.Match[str]) -> DimSpec: FILE: python/sglang/srt/debug_utils/comparator/dims_spec/dims_parser.py class _SingletonDimUtil (line 18) | class _SingletonDimUtil: method is_squeeze (line 24) | def is_squeeze(spec: DimSpec) -> bool: method filter_out (line 28) | def filter_out(dim_specs: list[DimSpec]) -> list[DimSpec]: method make_name (line 32) | def make_name(index: int) -> str: method is_singleton_name (line 36) | def is_singleton_name(name: str) -> bool: method sanitize_names (line 43) | def sanitize_names(names: list[str]) -> list[str]: function parse_dims (line 58) | def parse_dims(dims_str: str) -> DimsSpec: function resolve_dim_names (line 109) | def resolve_dim_names(dims_str: str) -> list[str]: FILE: python/sglang/srt/debug_utils/comparator/dims_spec/modifier_parser.py function _parse_modifier_token (line 15) | def _parse_modifier_token(modifier_token: str, dim_token: str) -> Parall... function _parse_modifiers (line 66) | def _parse_modifiers( FILE: python/sglang/srt/debug_utils/comparator/dims_spec/tensor_naming.py function find_dim_index (line 10) | def find_dim_index(dim_specs: list[DimSpec], name: str) -> Optional[int]: function resolve_dim_by_name (line 18) | def resolve_dim_by_name(tensor: torch.Tensor, name: str) -> int: function apply_dim_names (line 29) | def apply_dim_names(tensor: torch.Tensor, dim_names: list[str]) -> torch... function strip_dim_names (line 39) | def strip_dim_names(tensor: torch.Tensor) -> torch.Tensor: FILE: python/sglang/srt/debug_utils/comparator/dims_spec/types.py class TokenLayout (line 14) | class TokenLayout(Enum): class ParallelAxis (line 19) | class ParallelAxis(Enum): class Ordering (line 27) | class Ordering(Enum): class Reduction (line 32) | class Reduction(Enum): class ParallelModifier (line 36) | class ParallelModifier(_FrozenBase): class DimSpec (line 51) | class DimSpec(_FrozenBase): method sub_dims (line 56) | def sub_dims(self) -> list[str]: method is_fused (line 61) | def is_fused(self) -> bool: method sanitized_name (line 65) | def sanitized_name(self) -> str: class DimsSpec (line 72) | class DimsSpec(_FrozenBase): FILE: python/sglang/srt/debug_utils/comparator/display.py function emit_display_records (line 20) | def emit_display_records( function _render_polars_as_text (line 40) | def _render_polars_as_text(df: pl.DataFrame, *, title: Optional[str] = N... function _collect_rank_info (line 55) | def _collect_rank_info( function _collect_input_ids_and_positions (line 78) | def _collect_input_ids_and_positions( function _extract_parallel_info (line 122) | def _extract_parallel_info(row_data: dict[str, Any], info: dict[str, Any... FILE: python/sglang/srt/debug_utils/comparator/dp_utils.py function filter_to_non_empty_dp_rank (line 18) | def filter_to_non_empty_dp_rank( function _extract_dp_info (line 71) | def _extract_dp_info( function _group_has_data (line 97) | def _group_has_data(group: list[ValueWithMeta]) -> bool: FILE: python/sglang/srt/debug_utils/comparator/entrypoint.py function main (line 52) | def main() -> None: function run (line 57) | def run(args: argparse.Namespace) -> int: function _resolve_report_path (line 161) | def _resolve_report_path( function _maybe_load_tokenizer (line 169) | def _maybe_load_tokenizer(*, tokenizer_arg: Optional[str], dir_pair: Pai... function _read_df (line 189) | def _read_df( function _compare_bundle_pairs (line 209) | def _compare_bundle_pairs( function _consume_comparison_records (line 271) | def _consume_comparison_records( function parse_args (line 315) | def parse_args(argv: list[str]) -> argparse.Namespace: FILE: python/sglang/srt/debug_utils/comparator/log_sink.py class LogSink (line 9) | class LogSink: method __init__ (line 10) | def __init__(self) -> None: method context (line 14) | def context(self) -> Generator[list[BaseLog], None, None]: method add (line 23) | def add(self, log: BaseLog) -> None: FILE: python/sglang/srt/debug_utils/comparator/meta_overrider.py class MetaOverrideRule (line 18) | class MetaOverrideRule(_StrictBase): class MetaOverrideConfig (line 29) | class MetaOverrideConfig(_StrictBase): class MetaOverrider (line 35) | class MetaOverrider: method __init__ (line 38) | def __init__(self, rules: list[MetaOverrideRule]) -> None: method is_empty (line 42) | def is_empty(self) -> bool: method from_args_and_config (line 46) | def from_args_and_config( method apply_to_meta (line 71) | def apply_to_meta( function _parse_cli_override_arg (line 88) | def _parse_cli_override_arg(raw: str) -> tuple[str, str]: function _load_yaml_rules (line 98) | def _load_yaml_rules(path: Path) -> list[MetaOverrideRule]: FILE: python/sglang/srt/debug_utils/comparator/output_formatter.py function _render_record_rich (line 48) | def _render_record_rich( function _render_record_text (line 66) | def _render_record_text(record: _OutputRecord) -> str: function _format_log_lines_rich (line 77) | def _format_log_lines_rich( function _format_log_lines_text (line 90) | def _format_log_lines_text(*, errors: list[ErrorLog], infos: list[InfoLo... function _format_config_body (line 104) | def _format_config_body(record: ConfigRecord) -> str: function _format_config_rich_body (line 108) | def _format_config_rich_body( function _format_skip_body (line 118) | def _format_skip_body(record: ComparisonSkipRecord) -> str: function _format_skip_rich_body (line 122) | def _format_skip_rich_body( function _format_error_body (line 134) | def _format_error_body(record: ComparisonErrorRecord) -> str: function _format_error_rich_body (line 142) | def _format_error_rich_body( function _format_table_body (line 158) | def _format_table_body(record: _TableRecord) -> str: function _format_table_rich_body (line 168) | def _format_table_rich_body( function _format_tensor_comparison_body (line 185) | def _format_tensor_comparison_body(record: ComparisonTensorRecord) -> str: function _format_tensor_comparison_rich_body (line 194) | def _format_tensor_comparison_rich_body( function _format_non_tensor_body (line 209) | def _format_non_tensor_body(record: ComparisonNonTensorRecord) -> str: function _format_non_tensor_rich_body (line 220) | def _format_non_tensor_rich_body( function _format_summary_body (line 243) | def _format_summary_body(record: SummaryRecord) -> str: function _format_summary_rich_body (line 253) | def _format_summary_rich_body( function _format_log_body (line 270) | def _format_log_body(record: LogRecord) -> str: function _format_aligner_plan (line 277) | def _format_aligner_plan(traced_plan: TracedAlignerPlan) -> str: function _format_sub_plan_text (line 302) | def _format_sub_plan_text(traced_sub: TracedSubPlan) -> str: function _format_cross_side_plan_text (line 316) | def _format_cross_side_plan_text(plan: AlignerPlan) -> list[str]: FILE: python/sglang/srt/debug_utils/comparator/output_types.py class BaseLog (line 45) | class BaseLog(_StrictBase): method to_text (line 49) | def to_text(self) -> str: class ErrorLog (line 53) | class ErrorLog(BaseLog): class InfoLog (line 57) | class InfoLog(BaseLog): function _split_logs (line 64) | def _split_logs(logs: list[BaseLog]) -> tuple[list[ErrorLog], list[InfoL... class ReplicatedCheckResult (line 70) | class ReplicatedCheckResult(_StrictBase): class BundleFileInfo (line 80) | class BundleFileInfo(_StrictBase): class BundleSideInfo (line 89) | class BundleSideInfo(_StrictBase): class ShapeSnapshot (line 95) | class ShapeSnapshot(_StrictBase): class _OutputRecord (line 100) | class _OutputRecord(_StrictBase): method _format_body (line 105) | def _format_body(self) -> str: ... method _format_rich_body (line 107) | def _format_rich_body(self, verbosity: Verbosity = "normal") -> Render... method to_rich (line 110) | def to_rich(self, verbosity: Verbosity = "normal") -> RenderableType: method to_text (line 113) | def to_text(self) -> str: class RecordLocation (line 117) | class RecordLocation(_StrictBase): class _BaseComparisonRecord (line 121) | class _BaseComparisonRecord(_OutputRecord): method _format_location_prefix (line 124) | def _format_location_prefix(self) -> str: method _format_location_prefix_rich (line 129) | def _format_location_prefix_rich(self) -> str: method _format_location_suffix (line 134) | def _format_location_suffix(self) -> str: class ConfigRecord (line 140) | class ConfigRecord(_OutputRecord): method _format_body (line 144) | def _format_body(self) -> str: method _format_rich_body (line 147) | def _format_rich_body(self, verbosity: Verbosity = "normal") -> Render... class ComparisonSkipRecord (line 151) | class ComparisonSkipRecord(_BaseComparisonRecord): method category (line 157) | def category(self) -> str: method _format_body (line 162) | def _format_body(self) -> str: method _format_rich_body (line 165) | def _format_rich_body(self, verbosity: Verbosity = "normal") -> Render... class ComparisonErrorRecord (line 169) | class ComparisonErrorRecord(_BaseComparisonRecord): method category (line 176) | def category(self) -> str: method _format_body (line 179) | def _format_body(self) -> str: method _format_rich_body (line 182) | def _format_rich_body(self, verbosity: Verbosity = "normal") -> Render... class _TableRecord (line 186) | class _TableRecord(_OutputRecord): method _table_title (line 191) | def _table_title(self) -> str: ... method _format_body (line 193) | def _format_body(self) -> str: method _format_rich_body (line 196) | def _format_rich_body(self, verbosity: Verbosity = "normal") -> Render... class RankInfoRecord (line 200) | class RankInfoRecord(_TableRecord): method _table_title (line 203) | def _table_title(self) -> str: class InputIdsRecord (line 207) | class InputIdsRecord(_TableRecord): method _table_title (line 210) | def _table_title(self) -> str: class ComparisonTensorRecord (line 214) | class ComparisonTensorRecord(TensorComparisonInfo, _BaseComparisonRecord): method category (line 223) | def category(self) -> str: method _format_body (line 230) | def _format_body(self) -> str: method _format_rich_body (line 233) | def _format_rich_body(self, verbosity: Verbosity = "normal") -> Render... class ComparisonNonTensorRecord (line 237) | class ComparisonNonTensorRecord(_BaseComparisonRecord): method category (line 247) | def category(self) -> str: method _format_body (line 252) | def _format_body(self) -> str: method _format_rich_body (line 255) | def _format_rich_body(self, verbosity: Verbosity = "normal") -> Render... class SummaryRecord (line 259) | class SummaryRecord(_OutputRecord): method _validate_totals (line 268) | def _validate_totals(self) -> "SummaryRecord": method _format_body (line 277) | def _format_body(self) -> str: method _format_rich_body (line 280) | def _format_rich_body(self, verbosity: Verbosity = "normal") -> Render... class LogRecord (line 284) | class LogRecord(_OutputRecord): method _format_body (line 287) | def _format_body(self) -> str: function _get_any_record_adapter (line 307) | def _get_any_record_adapter() -> TypeAdapter: function parse_record_json (line 311) | def parse_record_json(json_str: str | bytes) -> AnyRecord: FILE: python/sglang/srt/debug_utils/comparator/per_token_visualizer.py function generate_per_token_heatmap (line 15) | def generate_per_token_heatmap( function _collect_per_token_data (line 32) | def _collect_per_token_data( function _render_heatmap (line 44) | def _render_heatmap( FILE: python/sglang/srt/debug_utils/comparator/preset.py function expand_preset (line 23) | def expand_preset(argv: list[str], presets: dict[str, list[str]]) -> lis... function _expand_flag (line 38) | def _expand_flag( FILE: python/sglang/srt/debug_utils/comparator/report_sink.py class ReportSink (line 14) | class ReportSink: method __init__ (line 17) | def __init__(self) -> None: method verbosity (line 25) | def verbosity(self) -> Verbosity: method configure (line 28) | def configure( method add (line 49) | def add(self, record: _OutputRecord) -> None: method close (line 57) | def close(self) -> None: method report_path (line 63) | def report_path(self) -> Optional[Path]: method _reset (line 66) | def _reset(self) -> None: method _get_console (line 73) | def _get_console(self) -> Console: method _print_to_stdout (line 78) | def _print_to_stdout(self, record: _OutputRecord) -> None: FILE: python/sglang/srt/debug_utils/comparator/tensor_comparator/comparator.py function compare_tensor_pair (line 26) | def compare_tensor_pair( function _compute_tensor_stats (line 95) | def _compute_tensor_stats(x: torch.Tensor) -> TensorStats: function _compute_percentiles (line 117) | def _compute_percentiles(x: torch.Tensor, *, include: bool) -> dict[int,... function compute_diff (line 124) | def compute_diff( FILE: python/sglang/srt/debug_utils/comparator/tensor_comparator/formatter.py function _esc_shape (line 32) | def _esc_shape(shape: Optional[list[int]]) -> str: function _strip_torch_prefix (line 36) | def _strip_torch_prefix(dtype: str) -> str: function _fmt_val (line 45) | def _fmt_val(value: float) -> str: function _fmt_diff_colored (line 49) | def _fmt_diff_colored(diff: float, *, threshold: float = 1e-2) -> str: function _category_marker (line 61) | def _category_marker(category: str) -> tuple[bool, str, str]: function _format_stat_line (line 73) | def _format_stat_line(stat_name: str, val_b: float, val_t: float, diff: ... function format_comparison (line 85) | def format_comparison(info: TensorComparisonInfo) -> str: function format_replicated_checks (line 134) | def format_replicated_checks(checks: list[ReplicatedCheckResult]) -> str: function _format_stats_comparison (line 158) | def _format_stats_comparison(baseline: TensorStats, target: TensorStats)... function _format_diff (line 182) | def _format_diff(diff: DiffInfo, prefix_text: str = "") -> list[str]: function format_comparison_rich (line 209) | def format_comparison_rich( function _format_comparison_minimal (line 222) | def _format_comparison_minimal(record: ComparisonTensorRecord) -> str: function _format_comparison_normal_or_verbose (line 234) | def _format_comparison_normal_or_verbose( function _format_bundle_section (line 348) | def _format_bundle_section( function _format_plan_section_rich (line 395) | def _format_plan_section_rich( function _format_sub_plan_rich (line 421) | def _format_sub_plan_rich(traced_sub: TracedSubPlan) -> str: function _format_cross_side_plan_rich (line 445) | def _format_cross_side_plan_rich(plan: AlignerPlan) -> list[str]: function _format_stats_rich (line 466) | def _format_stats_rich( function _format_abs_diff_percentiles_rich (line 503) | def _format_abs_diff_percentiles_rich(diff: DiffInfo) -> str: FILE: python/sglang/srt/debug_utils/comparator/tensor_comparator/types.py class TensorStats (line 8) | class TensorStats(_StrictBase): class TensorInfo (line 17) | class TensorInfo(_StrictBase): class DiffInfo (line 24) | class DiffInfo(_StrictBase): class TensorComparisonInfo (line 37) | class TensorComparisonInfo(_StrictBase): FILE: python/sglang/srt/debug_utils/comparator/utils.py function _check_equal_lengths (line 15) | def _check_equal_lengths(**named_lists: list) -> None: function auto_descend_dir (line 23) | def auto_descend_dir(directory: Path, label: str) -> Path: class _StrictBase (line 63) | class _StrictBase(BaseModel): class _FrozenBase (line 67) | class _FrozenBase(BaseModel): class Pair (line 71) | class Pair(_FrozenBase, Generic[_T]): method map (line 75) | def map(self, fn: Callable[[_T], _U]) -> Pair[_U]: function argmax_coord (line 79) | def argmax_coord(x: torch.Tensor) -> Tuple[int, ...]: function compute_smaller_dtype (line 84) | def compute_smaller_dtype( function try_unify_shape (line 94) | def try_unify_shape(x: torch.Tensor, target_shape: torch.Size) -> torch.... function calc_rel_diff (line 106) | def calc_rel_diff(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: function calc_per_token_rel_diff (line 113) | def calc_per_token_rel_diff( function compute_exit_code (line 137) | def compute_exit_code( function _is_all_match_pattern (line 161) | def _is_all_match_pattern(*, pattern: Optional[str], strings: list[str])... FILE: python/sglang/srt/debug_utils/comparator/visualizer/figure.py class _PanelContext (line 18) | class _PanelContext: class _Panel (line 26) | class _Panel: function _build_panels (line 32) | def _build_panels() -> list[_Panel]: function generate_comparison_figure (line 54) | def generate_comparison_figure( FILE: python/sglang/srt/debug_utils/comparator/visualizer/panels.py function _draw_baseline_heatmap (line 21) | def _draw_baseline_heatmap( function _draw_target_heatmap (line 30) | def _draw_target_heatmap( function _draw_diff_heatmap (line 39) | def _draw_diff_heatmap( function _draw_diff_histogram (line 47) | def _draw_diff_histogram( function _draw_hist2d (line 57) | def _draw_hist2d(axes: np.ndarray, row_idx: int, ctx: _PanelContext) -> ... function _draw_sampled (line 68) | def _draw_sampled(axes: np.ndarray, row_idx: int, ctx: _PanelContext) ->... function _draw_heatmap_pair (line 82) | def _draw_heatmap_pair( function _draw_histogram_pair (line 104) | def _draw_histogram_pair( function _draw_scatter_hist2d (line 130) | def _draw_scatter_hist2d( function _draw_scatter_sampled (line 190) | def _draw_scatter_sampled( FILE: python/sglang/srt/debug_utils/comparator/visualizer/preprocessing.py function _preprocess_tensor (line 15) | def _preprocess_tensor(tensor: torch.Tensor) -> torch.Tensor: function _reshape_to_balanced_aspect (line 27) | def _reshape_to_balanced_aspect( function _to_log10 (line 54) | def _to_log10(t: torch.Tensor) -> torch.Tensor: function _format_log_ticks (line 58) | def _format_log_ticks(ax: object, axis: str = "both") -> None: function _format_stats (line 70) | def _format_stats(name: str, t: torch.Tensor) -> str: function _safe_hist (line 78) | def _safe_hist( function _maybe_downsample_numpy (line 88) | def _maybe_downsample_numpy( function _sanitize_filename (line 100) | def _sanitize_filename(name: str) -> str: FILE: python/sglang/srt/debug_utils/cuda_coredump.py function is_enabled (line 27) | def is_enabled() -> bool: function get_dump_dir (line 31) | def get_dump_dir() -> str: function _inject_env (line 35) | def _inject_env(): function cleanup_dump_dir (line 58) | def cleanup_dump_dir(): function report (line 65) | def report(): FILE: python/sglang/srt/debug_utils/dump_comparator.py function main (line 20) | def main(args): function check_tensor_pair (line 87) | def check_tensor_pair( function _compute_and_print_diff (line 188) | def _compute_and_print_diff( function _argmax_coord (line 215) | def _argmax_coord(x: torch.Tensor) -> tuple: function _compute_smaller_dtype (line 220) | def _compute_smaller_dtype(dtype_a, dtype_b): function _try_unify_shape (line 228) | def _try_unify_shape(x: torch.Tensor, target_shape): function _calc_rel_diff (line 242) | def _calc_rel_diff(x: torch.Tensor, y: torch.Tensor): function _load_object (line 249) | def _load_object(path): function _comparison_preprocessor (line 265) | def _comparison_preprocessor(x_baseline, x_target, name): class TensorDimDesc (line 271) | class TensorDimDesc: function _get_tensor_dim_descs (line 278) | def _get_tensor_dim_descs() -> List[TensorDimDesc]: FILE: python/sglang/srt/debug_utils/dump_loader.py function parse_meta_from_filename (line 13) | def parse_meta_from_filename(path: Path) -> Dict[str, Any]: class ValueWithMeta (line 27) | class ValueWithMeta: method load (line 32) | def load(path: Path) -> "ValueWithMeta": function _unwrap_dict_format (line 51) | def _unwrap_dict_format(obj: Any) -> Tuple[Any, Dict[str, Any]]: class DumpLoader (line 59) | class DumpLoader: method __init__ (line 60) | def __init__(self): method enable (line 69) | def enable(self): method load (line 72) | def load(self, name, **kwargs): function read_meta (line 95) | def read_meta(directory): function _add_duplicate_index (line 123) | def _add_duplicate_index(df: pl.DataFrame) -> pl.DataFrame: function filter_rows (line 132) | def filter_rows(df: pl.DataFrame, conditions: Dict[str, Any]) -> list[di... function find_row (line 147) | def find_row(df: pl.DataFrame, conditions: Dict[str, Any]): function _cast_to_polars_dtype (line 155) | def _cast_to_polars_dtype(value, target_dtype): function read_tokenizer_path (line 168) | def read_tokenizer_path(directory: Path) -> Optional[str]: FILE: python/sglang/srt/debug_utils/dumper.py class _BaseConfig (line 27) | class _BaseConfig(ABC): method __post_init__ (line 28) | def __post_init__(self) -> None: method _verify_types (line 31) | def _verify_types(self) -> None: method _env_prefix (line 47) | def _env_prefix(cls) -> str: ... method _env_name (line 50) | def _env_name(cls, field_name: str) -> str: method from_env (line 54) | def from_env(cls) -> "_BaseConfig": method with_defaults (line 62) | def with_defaults(self, **kwargs) -> "_BaseConfig": method _unwrap_type (line 72) | def _unwrap_type(hint) -> type: method _parse_env_field (line 79) | def _parse_env_field(cls, env_name: str, default): method _parse_env_value (line 83) | def _parse_env_value(raw, default): method from_kv_pairs (line 93) | def from_kv_pairs(cls, pairs: Optional[List[str]]) -> "_BaseConfig": method _kv_pairs_to_dict (line 97) | def _kv_pairs_to_dict(cls, pairs: Optional[List[str]]) -> dict: class DumperConfig (line 127) | class DumperConfig(_BaseConfig): method _env_prefix (line 145) | def _env_prefix(cls) -> str: method server_port_parsed (line 150) | def server_port_parsed(self) -> Optional[Union[int, Literal["reuse"]]]: class _DumperState (line 164) | class _DumperState: class _Dumper (line 172) | class _Dumper: method __init__ (line 202) | def __init__(self, *, config: DumperConfig): method may_enable (line 210) | def may_enable(self) -> bool: method step (line 213) | def step(self): method dump (line 227) | def dump( method dump_model (line 259) | def dump_model( method dump_dict (line 279) | def dump_dict(self, name_prefix, data, save: bool = True, **kwargs): method set_ctx (line 284) | def set_ctx(self, **kwargs): method ctx (line 297) | def ctx( method apply_source_patches (line 330) | def apply_source_patches(self) -> None: method register_non_intrusive_dumper (line 349) | def register_non_intrusive_dumper( method configure (line 363) | def configure(self, **kwargs) -> None: method configure_default (line 366) | def configure_default(self, **kwargs) -> None: method reset (line 369) | def reset(self) -> None: method capture_output (line 376) | def capture_output(self): method get_state (line 384) | def get_state(self) -> dict: method _http_manager (line 392) | def _http_manager(self) -> Optional["_DumperHttpManager"]: method _dump_inner (line 399) | def _dump_inner( method _register_dump_grad_hook (line 467) | def _register_dump_grad_hook( method _dump_single (line 500) | def _dump_single( method _static_meta (line 564) | def _static_meta(self) -> dict: method _ensure_exp_name (line 567) | def _ensure_exp_name(self): class _NonIntrusiveDumper (line 579) | class _NonIntrusiveDumper: method __init__ (line 583) | def __init__( method remove (line 612) | def remove(self) -> None: method _detect_module_ctx (line 618) | def _detect_module_ctx( method _register_ctx_hooks (line 630) | def _register_ctx_hooks(self, module: "torch.nn.Module", *, ctx: dict)... method _make_forward_pre_hook (line 645) | def _make_forward_pre_hook(self, *, module_name: str, is_root: bool): method _make_forward_hook (line 661) | def _make_forward_hook(self, *, module_name: str, is_root: bool): method _dump_value (line 668) | def _dump_value( method _convert_value (line 682) | def _convert_value(value, *, skip_forward_batch: bool = False) -> dict... function _register_forward_hook_or_replace_fn (line 700) | def _register_forward_hook_or_replace_fn( function _torch_save (line 748) | def _torch_save(value, path: str): function _map_tensor (line 764) | def _map_tensor(value, fn: Callable[[torch.Tensor], torch.Tensor]): function _clone_if_view (line 772) | def _clone_if_view(value): function _strip_parameter (line 781) | def _strip_parameter(value): function _collective_with_timeout (line 790) | def _collective_with_timeout(fn, operation_name: str, timeout_seconds: i... function _get_default_exp_name (line 810) | def _get_default_exp_name(timeout_seconds: int = 60): function _cleanup_old_dumps (line 837) | def _cleanup_old_dumps(base_dir: Path, exp_name: Optional[str] = None) -... function _get_rank (line 857) | def _get_rank(): function _get_world_size (line 864) | def _get_world_size(): function _obj_to_dict (line 871) | def _obj_to_dict(obj): function _materialize_value (line 888) | def _materialize_value(value): function _format_tags (line 894) | def _format_tags(kwargs: dict) -> str: class _DefaultNoneDict (line 898) | class _DefaultNoneDict(dict): method __missing__ (line 901) | def __missing__(self, key: str): function _evaluate_filter (line 908) | def _evaluate_filter(filter_expr: str, tags: dict[str, Any]) -> bool: function _deepcopy_or_clone (line 919) | def _deepcopy_or_clone(x): function _compute_static_meta (line 928) | def _compute_static_meta(): class _DumperHttpManager (line 950) | class _DumperHttpManager: method __init__ (line 951) | def __init__(self, dumper: "_Dumper"): method handle_request (line 974) | def handle_request(self, *, method: str, body: dict[str, Any]) -> list... method _handle_request_inner (line 979) | def _handle_request_inner(self, *, method: str, body: dict[str, Any]) ... function _start_http_server (line 995) | def _start_http_server(*, prefix: str, target: object, http_port: int): function _make_http_handler (line 1002) | def _make_http_handler(*, prefix: str, target): function _create_zmq_rpc_broadcast (line 1034) | def _create_zmq_rpc_broadcast( class _ZmqRpcHandle (line 1086) | class _ZmqRpcHandle: method __init__ (line 1089) | def __init__(self, socket, debug_name: str): method __getattr__ (line 1093) | def __getattr__(self, method_name: str): class _RpcBroadcastBase (line 1112) | class _RpcBroadcastBase: method __getattr__ (line 1115) | def __getattr__(self, method_name: str): method __init__ (line 1118) | def __init__(self, handles: List[_ZmqRpcHandle]): class _ZmqRpcBroadcast (line 1122) | class _ZmqRpcBroadcast(_RpcBroadcastBase): method __init__ (line 1128) | def __init__(self, handles: List[_ZmqRpcHandle]): method __getattr__ (line 1131) | def __getattr__(self, method_name: str): function _get_local_ip_by_remote (line 1144) | def _get_local_ip_by_remote() -> Optional[str]: class _RecomputeStatus (line 1176) | class _RecomputeStatus(enum.Enum): method to_pseudo_parallel_meta (line 1181) | def to_pseudo_parallel_meta(self) -> dict[str, Any]: class _FrameworkPlugin (line 1190) | class _FrameworkPlugin(ABC): method name (line 1193) | def name(self) -> str: ... method collect_parallel_info (line 1196) | def collect_parallel_info(self) -> dict: ... method convert_value (line 1199) | def convert_value( method detect_layer_id (line 1206) | def detect_layer_id(self, module: "torch.nn.Module") -> Optional[int]: method core_fields (line 1210) | def core_fields(self) -> frozenset[str]: method get_tokenizer_path (line 1213) | def get_tokenizer_path(self) -> Optional[str]: method detect_recompute_status (line 1216) | def detect_recompute_status(self) -> _RecomputeStatus: class _SGLangPlugin (line 1220) | class _SGLangPlugin(_FrameworkPlugin): method name (line 1234) | def name(self) -> str: method collect_parallel_info (line 1237) | def collect_parallel_info(self) -> dict: method convert_value (line 1272) | def convert_value( method detect_layer_id (line 1297) | def detect_layer_id(self, module: "torch.nn.Module") -> Optional[int]: method core_fields (line 1302) | def core_fields(self) -> frozenset[str]: method get_tokenizer_path (line 1307) | def get_tokenizer_path(self) -> Optional[str]: class _MegatronPlugin (line 1323) | class _MegatronPlugin(_FrameworkPlugin): method name (line 1332) | def name(self) -> str: method collect_parallel_info (line 1335) | def collect_parallel_info(self) -> dict: method convert_value (line 1385) | def convert_value( method detect_layer_id (line 1398) | def detect_layer_id(self, module: "torch.nn.Module") -> Optional[int]: method core_fields (line 1403) | def core_fields(self) -> frozenset[str]: method detect_recompute_status (line 1408) | def detect_recompute_status(self) -> _RecomputeStatus: function _detect_recompute_status (line 1426) | def _detect_recompute_status() -> _RecomputeStatus: function get_truncated_value (line 1443) | def get_truncated_value(value): function get_tensor_info (line 1460) | def get_tensor_info(x): FILE: python/sglang/srt/debug_utils/log_parser.py function parse (line 19) | def parse(lines): FILE: python/sglang/srt/debug_utils/model_truncator.py function main (line 14) | def main(args): function _maybe_snapshot_download (line 54) | def _maybe_snapshot_download(path): function _transform_json (line 60) | def _transform_json(dir_input, dir_output, filename, fn): function _transform_config (line 67) | def _transform_config(args, config_json): function _transform_safetensors_index (line 71) | def _transform_safetensors_index(args, safetensors_index): function _transform_safetensors_file (line 79) | def _transform_safetensors_file( function _filter_tensor_name (line 88) | def _filter_tensor_name(args, tensor_name: str): FILE: python/sglang/srt/debug_utils/schedule_simulator/data_source/data_loader.py function load_from_request_logger (line 8) | def load_from_request_logger(file_path: Union[str, Path]) -> List[SimReq... FILE: python/sglang/srt/debug_utils/schedule_simulator/data_source/data_synthesis.py function generate_random_requests (line 7) | def generate_random_requests( function generate_gsp_requests (line 36) | def generate_gsp_requests( function _random_len (line 77) | def _random_len(full_len: int, range_ratio: float) -> int: FILE: python/sglang/srt/debug_utils/schedule_simulator/entrypoint.py function create_arg_parser (line 31) | def create_arg_parser() -> argparse.ArgumentParser: function _load_requests (line 90) | def _load_requests(args: argparse.Namespace) -> List[SimRequest]: function _create_router (line 115) | def _create_router(name: str, total_gpus: int): function _create_scheduler (line 125) | def _create_scheduler(name: str): function main (line 131) | def main(args: argparse.Namespace) -> SimulationResult: FILE: python/sglang/srt/debug_utils/schedule_simulator/gpu_state.py class StepRecord (line 8) | class StepRecord: class GPUState (line 19) | class GPUState: method batch_size (line 25) | def batch_size(self) -> int: method total_attention_compute (line 28) | def total_attention_compute(self) -> int: method total_seq_len (line 31) | def total_seq_len(self, extra_reqs: Optional[List[SimRequest]] = None)... method is_valid (line 41) | def is_valid(self) -> bool: method start_request (line 44) | def start_request(self, req: SimRequest) -> None: method evict_request (line 49) | def evict_request(self, req: SimRequest) -> None: method execute_step (line 54) | def execute_step(self) -> None: method get_step_record (line 61) | def get_step_record(self, step: int) -> StepRecord: FILE: python/sglang/srt/debug_utils/schedule_simulator/metrics.py class MetricRecorder (line 7) | class MetricRecorder(ABC): method on_step_end (line 9) | def on_step_end(self, step: int, gpu_states: List[GPUState]) -> None: ... method get_summary (line 12) | def get_summary(self) -> Dict[str, Any]: ... class BalancednessRecorder (line 15) | class BalancednessRecorder(MetricRecorder): method __init__ (line 16) | def __init__(self, name: str, value_fn: Callable[[GPUState], float]): method on_step_end (line 21) | def on_step_end(self, step: int, gpu_states: List[GPUState]) -> None: method get_summary (line 28) | def get_summary(self) -> Dict[str, Any]: function BatchSizeBalancednessRecorder (line 38) | def BatchSizeBalancednessRecorder() -> BalancednessRecorder: function AttentionComputeBalancednessRecorder (line 42) | def AttentionComputeBalancednessRecorder() -> BalancednessRecorder: class AvgBatchSizeRecorder (line 48) | class AvgBatchSizeRecorder(MetricRecorder): method __init__ (line 49) | def __init__(self): method on_step_end (line 53) | def on_step_end(self, step: int, gpu_states: List[GPUState]) -> None: method get_summary (line 58) | def get_summary(self) -> Dict[str, Any]: FILE: python/sglang/srt/debug_utils/schedule_simulator/request.py class SimRequest (line 6) | class SimRequest: method seq_len (line 14) | def seq_len(self) -> int: method is_finished (line 17) | def is_finished(self) -> bool: FILE: python/sglang/srt/debug_utils/schedule_simulator/routers/base.py class RouterPolicy (line 6) | class RouterPolicy(ABC): method route (line 8) | def route(self, incoming_request: SimRequest) -> int: ... FILE: python/sglang/srt/debug_utils/schedule_simulator/routers/random_router.py class RandomRouter (line 7) | class RandomRouter(RouterPolicy): method __init__ (line 8) | def __init__(self, num_gpus: int): method route (line 11) | def route(self, incoming_request: SimRequest) -> int: FILE: python/sglang/srt/debug_utils/schedule_simulator/routers/round_robin_router.py class RoundRobinRouter (line 5) | class RoundRobinRouter(RouterPolicy): method __init__ (line 6) | def __init__(self, num_gpus: int): method route (line 10) | def route(self, incoming_request: SimRequest) -> int: FILE: python/sglang/srt/debug_utils/schedule_simulator/routers/sticky_router.py class StickyRouter (line 8) | class StickyRouter(RouterPolicy): method __init__ (line 9) | def __init__(self, num_gpus: int): method _assign_gpu (line 13) | def _assign_gpu(self) -> int: method route (line 16) | def route(self, incoming_request: SimRequest) -> int: FILE: python/sglang/srt/debug_utils/schedule_simulator/schedulers/base.py class SchedulerPolicy (line 8) | class SchedulerPolicy(ABC): method schedule (line 10) | def schedule(self, gpu_state: "GPUState") -> None: ... FILE: python/sglang/srt/debug_utils/schedule_simulator/schedulers/fifo_scheduler.py class FIFOScheduler (line 9) | class FIFOScheduler(SchedulerPolicy): method schedule (line 10) | def schedule(self, gpu_state: "GPUState") -> None: FILE: python/sglang/srt/debug_utils/schedule_simulator/simulator.py class SimulationResult (line 12) | class SimulationResult: class Simulator (line 17) | class Simulator: method __init__ (line 18) | def __init__( method run (line 40) | def run(self, requests: List[SimRequest]) -> SimulationResult: method _should_stop (line 65) | def _should_stop(self) -> bool: method _route_requests (line 76) | def _route_requests(self, incoming_requests: List[SimRequest]) -> None: method _schedule_all_gpus (line 82) | def _schedule_all_gpus(self) -> None: method _execute_step (line 90) | def _execute_step(self) -> None: method _log_step (line 94) | def _log_step(self) -> None: method _record_metrics (line 108) | def _record_metrics(self) -> None: method _get_summary (line 112) | def _get_summary(self) -> Dict[str, Any]: function _format_ids (line 116) | def _format_ids(requests: List[SimRequest], limit: int = 5) -> str: FILE: python/sglang/srt/debug_utils/source_patcher/code_patcher.py function apply_patches_from_config (line 19) | def apply_patches_from_config( class CodePatcher (line 41) | class CodePatcher: method __init__ (line 44) | def __init__(self, *, patches: list[PatchSpec]) -> None: method __enter__ (line 48) | def __enter__(self) -> "CodePatcher": method __exit__ (line 52) | def __exit__( function patch_function (line 63) | def patch_function( function _apply_specs (line 101) | def _apply_specs(specs: list[PatchSpec]) -> list[PatchState]: function _inject_preamble (line 113) | def _inject_preamble(*, config: PatchConfig, extra_imports: list[str]) -... function _insert_preamble (line 130) | def _insert_preamble(*, source: str, preamble: str) -> str: function _find_signature_end (line 150) | def _find_signature_end(lines: list[str]) -> int: function _resolve_target (line 158) | def _resolve_target(qualified_name: str) -> Callable[..., Any]: FILE: python/sglang/srt/debug_utils/source_patcher/source_editor.py function apply_edits (line 4) | def apply_edits(*, source: str, edits: list[EditSpec]) -> str: function _apply_single_edit (line 15) | def _apply_single_edit(*, source: str, edit: EditSpec) -> str: function _resolve_replacement (line 44) | def _resolve_replacement(*, edit: EditSpec, match_text: str) -> str: function _find_match (line 53) | def _find_match(*, source_lines: list[str], match_lines: list[str]) -> int: function _realign_replacement (line 81) | def _realign_replacement( function _leading_spaces (line 107) | def _leading_spaces(line: str) -> int: FILE: python/sglang/srt/debug_utils/source_patcher/types.py class PatchApplicationError (line 8) | class PatchApplicationError(Exception): class _StrictBase (line 12) | class _StrictBase(BaseModel): class EditSpec (line 16) | class EditSpec(_StrictBase): method _check_modes_mutually_exclusive (line 31) | def _check_modes_mutually_exclusive(self) -> "EditSpec": class PatchSpec (line 45) | class PatchSpec(_StrictBase): class PatchConfig (line 51) | class PatchConfig(_StrictBase): class PatchState (line 55) | class PatchState: method __init__ (line 56) | def __init__( method restore (line 62) | def restore(self) -> None: FILE: python/sglang/srt/debug_utils/tensor_dump_forward_hook.py class TensorDumper (line 26) | class TensorDumper: method __init__ (line 27) | def __init__( method get_dump_dir (line 46) | def get_dump_dir(self): method add_tensor (line 49) | def add_tensor(self, name, tensor_item): method dump_current_tensors (line 78) | def dump_current_tensors(self): method _add_hook_recursive (line 89) | def _add_hook_recursive( method _dump_hook (line 127) | def _dump_hook(self, tensor_name, do_dump): function register_forward_hook_for_model (line 141) | def register_forward_hook_for_model( FILE: python/sglang/srt/debug_utils/text_comparator.py function main (line 16) | def main(args): function _compute_df_input_mode_simple_evals (line 74) | def _compute_df_input_mode_simple_evals(args): function _compute_df_input_one_mode_simple_evals (line 83) | def _compute_df_input_one_mode_simple_evals(path, category, trial_index): function _compute_id_from_object (line 107) | def _compute_id_from_object(obj): function _compute_df_raw (line 114) | def _compute_df_raw(args): function _get_file_infos (line 127) | def _get_file_infos(args): function _read_df_raw (line 138) | def _read_df_raw(path: str, category: str, trial_index: int): function _transform_df_input (line 144) | def _transform_df_input(df: pl.DataFrame): function _compute_df_meta (line 173) | def _compute_df_meta(df_input: pl.DataFrame): function _handle_one_prompt (line 188) | def _handle_one_prompt(df_one_prompt: pl.DataFrame): function _compute_str_prefix_len (line 216) | def _compute_str_prefix_len(a: str, b: str) -> int: FILE: python/sglang/srt/disaggregation/ascend/conn.py class AscendKVManager (line 21) | class AscendKVManager(MooncakeKVManager): method init_engine (line 22) | def init_engine(self): method register_buffer_to_engine (line 31) | def register_buffer_to_engine(self): method send_kvcache (line 43) | def send_kvcache( class AscendKVSender (line 134) | class AscendKVSender(MooncakeKVSender): class AscendKVReceiver (line 138) | class AscendKVReceiver(MooncakeKVReceiver): class AscendKVBootstrapServer (line 142) | class AscendKVBootstrapServer(MooncakeKVBootstrapServer): FILE: python/sglang/srt/disaggregation/ascend/transfer_engine.py class AscendTransferEngine (line 24) | class AscendTransferEngine(MooncakeTransferEngine): method __init__ (line 26) | def __init__( method initialize (line 56) | def initialize(self) -> None: method batch_register (line 84) | def batch_register(self, ptrs: List[int], lengths: List[int]): method _get_transfer_protocol (line 94) | def _get_transfer_protocol(): FILE: python/sglang/srt/disaggregation/base/conn.py class KVArgs (line 15) | class KVArgs: class KVPoll (line 42) | class KVPoll: class BaseKVManager (line 50) | class BaseKVManager(ABC): method __init__ (line 54) | def __init__( method register_to_bootstrap (line 63) | def register_to_bootstrap(self): class BaseKVSender (line 68) | class BaseKVSender(ABC): method __init__ (line 71) | def __init__( method init (line 81) | def init(self, num_kv_indices: int, aux_index: Optional[int] = None): method send (line 88) | def send( method poll (line 99) | def poll(self) -> KVPoll: method failure_exception (line 106) | def failure_exception(self): class BaseKVReceiver (line 113) | class BaseKVReceiver(ABC): method __init__ (line 116) | def __init__( method init (line 124) | def init( method poll (line 136) | def poll(self) -> KVPoll: method failure_exception (line 143) | def failure_exception(self): method clear (line 149) | def clear(self): method abort (line 155) | def abort(self): class BaseKVBootstrapServer (line 162) | class BaseKVBootstrapServer(ABC): method __init__ (line 164) | def __init__(self, host: str, port: int): ... FILE: python/sglang/srt/disaggregation/common/conn.py class PrefillServerInfo (line 48) | class PrefillServerInfo: method __post_init__ (line 66) | def __post_init__(self): class PrefillRankInfo (line 79) | class PrefillRankInfo: method __post_init__ (line 83) | def __post_init__(self): class CommonKVManager (line 88) | class CommonKVManager(BaseKVManager): method __init__ (line 89) | def __init__( method check_status (line 177) | def check_status(self, bootstrap_room: int) -> KVPoll: method update_status (line 180) | def update_status(self, bootstrap_room: int, status: KVPoll): method record_failure (line 191) | def record_failure(self, bootstrap_room: int, failure_reason: str): method try_ensure_parallel_info (line 195) | def try_ensure_parallel_info(self, bootstrap_addr: str) -> bool: method _resolve_rank_mapping (line 240) | def _resolve_rank_mapping(self, info: PrefillServerInfo) -> None: method register_to_bootstrap (line 319) | def register_to_bootstrap(self): method _connect (line 363) | def _connect(self, endpoint: str, is_ipv6: bool = False): method get_mha_kv_ptrs_with_pp (line 370) | def get_mha_kv_ptrs_with_pp( method get_mla_kv_ptrs_with_pp (line 403) | def get_mla_kv_ptrs_with_pp( class CommonKVSender (line 417) | class CommonKVSender(BaseKVSender): method __init__ (line 418) | def __init__( method _register_prefill_dp_rank (line 444) | def _register_prefill_dp_rank(self): method init (line 460) | def init(self, num_kv_indices: int, aux_index: Optional[int] = None): method send (line 467) | def send( method poll (line 474) | def poll(self) -> KVPoll: method failure_exception (line 477) | def failure_exception(self): class CommonKVReceiver (line 481) | class CommonKVReceiver(BaseKVReceiver): method __init__ (line 487) | def __init__( method _setup_bootstrap_infos (line 529) | def _setup_bootstrap_infos(self): method _get_bootstrap_info_from_server (line 583) | def _get_bootstrap_info_from_server( method query_prefill_dp_ranks (line 603) | def query_prefill_dp_ranks( method _connect (line 626) | def _connect(cls, endpoint: str, is_ipv6: bool = False): method _connect_to_bootstrap_server (line 638) | def _connect_to_bootstrap_server(cls, bootstrap_info: dict): method _register_kv_args (line 645) | def _register_kv_args(self): method failure_exception (line 648) | def failure_exception(self): class CommonKVBootstrapServer (line 652) | class CommonKVBootstrapServer(BaseKVBootstrapServer): method __init__ (line 653) | def __init__(self, host: str, port: int): method run (line 680) | def run(self): method _is_ready (line 683) | def _is_ready(self) -> bool: method _setup_routes (line 697) | def _setup_routes(self): method _handle_health_check (line 703) | async def _handle_health_check(self, request): method _handle_route (line 706) | async def _handle_route(self, request: web.Request): method _handle_route_put (line 717) | async def _handle_route_put(self, request: web.Request): method _handle_route_get (line 784) | async def _handle_route_get(self, request: web.Request): method _handle_register_dp_rank (line 846) | async def _handle_register_dp_rank(self, request: web.Request): method _handle_query_dp_ranks (line 858) | async def _handle_query_dp_ranks(self, request: web.Request): method _cleanup_expired_entries (line 869) | async def _cleanup_expired_entries(self): method _run_server (line 887) | def _run_server(self): method close (line 915) | def close(self): method poll (line 925) | def poll(self) -> KVPoll: ... FILE: python/sglang/srt/disaggregation/common/utils.py class FastQueue (line 9) | class FastQueue: method __init__ (line 10) | def __init__(self): method put (line 14) | def put(self, item): method get (line 20) | def get(self): function group_concurrent_contiguous (line 28) | def group_concurrent_contiguous( FILE: python/sglang/srt/disaggregation/decode.py function _is_fake_transfer (line 81) | def _is_fake_transfer(req: Req, server_args: ServerArgs) -> bool: function _bootstrap_addr (line 88) | def _bootstrap_addr(req: Req) -> str: class DecodeReqToTokenPool (line 93) | class DecodeReqToTokenPool: method __init__ (line 105) | def __init__( method write (line 130) | def write(self, indices, values): method available_size (line 133) | def available_size(self): method alloc (line 136) | def alloc(self, reqs: List["Req"]) -> Optional[List[int]]: method free (line 159) | def free(self, req: "Req"): method clear (line 164) | def clear(self): class HybridMambaDecodeReqToTokenPool (line 168) | class HybridMambaDecodeReqToTokenPool(HybridReqToTokenPool): method __init__ (line 170) | def __init__( method clear (line 207) | def clear(self): class DecodeRequest (line 213) | class DecodeRequest: method seqlen (line 220) | def seqlen(self) -> int: class DecodePreallocQueue (line 224) | class DecodePreallocQueue: method __init__ (line 229) | def __init__( method _init_kv_manager (line 287) | def _init_kv_manager(self) -> CommonKVManager: method add (line 356) | def add(self, req: Req, is_retracted: bool = False) -> None: method _resolve_prefill_dp_rank (line 377) | def _resolve_prefill_dp_rank(self, req: Req) -> Optional[int]: method _create_receiver_and_enqueue (line 393) | def _create_receiver_and_enqueue(self, req: Req, prefill_dp_rank: int)... method _check_if_req_exceed_kv_capacity (line 412) | def _check_if_req_exceed_kv_capacity(self, req: Req) -> bool: method extend (line 421) | def extend(self, reqs: List[Req], is_retracted: bool = False) -> None: method resume_retracted_reqs (line 426) | def resume_retracted_reqs( method _update_handshake_waiters (line 468) | def _update_handshake_waiters( method _ensure_prefill_info (line 507) | def _ensure_prefill_info( method _resolve_pending_reqs (line 554) | def _resolve_pending_reqs(self) -> None: method pop_preallocated (line 596) | def pop_preallocated( method num_tokens_pre_allocated (line 737) | def num_tokens_pre_allocated(self): method _allocatable_tokens (line 742) | def _allocatable_tokens( method _pre_alloc (line 792) | def _pre_alloc(self, req: Req) -> torch.Tensor: class DecodeTransferQueue (line 830) | class DecodeTransferQueue: method __init__ (line 835) | def __init__( method add (line 853) | def add(self, decode_req: DecodeRequest) -> None: method extend (line 856) | def extend(self, decode_reqs: List[DecodeRequest]) -> None: method _commit_transfer_to_req (line 859) | def _commit_transfer_to_req(self, decode_req: DecodeRequest) -> bool: method pop_transferred (line 940) | def pop_transferred(self, rids_to_check: Optional[List[str]] = None) -... class SchedulerDisaggregationDecodeMixin (line 1010) | class SchedulerDisaggregationDecodeMixin: method event_loop_normal_disagg_decode (line 1013) | def event_loop_normal_disagg_decode(self: Scheduler): method event_loop_overlap_disagg_decode (line 1039) | def event_loop_overlap_disagg_decode(self: Scheduler): method _run_batch_prebuilt (line 1075) | def _run_batch_prebuilt( method get_next_disagg_decode_batch_to_run (line 1086) | def get_next_disagg_decode_batch_to_run( method get_new_prebuilt_batch (line 1114) | def get_new_prebuilt_batch(self: Scheduler) -> Optional[ScheduleBatch]: method process_decode_queue (line 1166) | def process_decode_queue(self: Scheduler): FILE: python/sglang/srt/disaggregation/decode_kvcache_offload_manager.py class DecodeKVCacheOffloadManager (line 34) | class DecodeKVCacheOffloadManager: method __init__ (line 37) | def __init__( method offload_kv_cache (line 109) | def offload_kv_cache(self, req) -> bool: method check_offload_progress (line 182) | def check_offload_progress(self): method _check_offload_progress (line 202) | def _check_offload_progress(self, finish_count): method _release_finished_req (line 237) | def _release_finished_req(self, req: Req, start_offset: int): method _check_backup_progress (line 261) | def _check_backup_progress(self, finish_count): method _trigger_backup (line 275) | def _trigger_backup( method _compute_prefix_hash (line 288) | def _compute_prefix_hash(self, tokens, prior_hash=""): method finalize_release_on_finish (line 297) | def finalize_release_on_finish(self, req: Req): FILE: python/sglang/srt/disaggregation/decode_schedule_batch_mixin.py class ScheduleBatchDisaggregationDecodeMixin (line 20) | class ScheduleBatchDisaggregationDecodeMixin: method prepare_for_prebuilt (line 22) | def prepare_for_prebuilt(self: ScheduleBatch): method process_prebuilt (line 103) | def process_prebuilt( FILE: python/sglang/srt/disaggregation/encode_grpc_server.py class EncoderHealthServicer (line 41) | class EncoderHealthServicer(health_pb2_grpc.HealthServicer): method __init__ (line 50) | def __init__(self): method set_serving (line 53) | def set_serving(self): method set_not_serving (line 56) | def set_not_serving(self): method Check (line 59) | async def Check(self, request, context) -> health_pb2.HealthCheckRespo... method Watch (line 68) | async def Watch(self, request, context): class SGLangEncoderServer (line 72) | class SGLangEncoderServer(SGLangEncoderServicer): method __init__ (line 77) | def __init__( method Encode (line 87) | async def Encode( method Send (line 164) | async def Send( method SchedulerReceiveUrl (line 187) | async def SchedulerReceiveUrl( function serve_grpc_encoder (line 208) | async def serve_grpc_encoder(server_args: ServerArgs): FILE: python/sglang/srt/disaggregation/encode_receiver.py function _grpc_target (line 40) | def _grpc_target(url: str) -> str: function _normalize_embedding_ports (line 48) | def _normalize_embedding_ports(embedding_port): function _grpc_scheduler_receive_url (line 56) | def _grpc_scheduler_receive_url(target, req_id, receive_url, receive_cou... function _grpc_encode_request (line 76) | def _grpc_encode_request(target, encode_request): function _grpc_send_request (line 102) | def _grpc_send_request(target, request_json): class EmbeddingData (line 124) | class EmbeddingData: method __init__ (line 125) | def __init__( method get_grid (line 156) | def get_grid(self): method get_embedding (line 160) | def get_embedding(self): method __repr__ (line 163) | def __repr__(self): method copy_without_embedding (line 166) | def copy_without_embedding(self): function _cat_grid (line 194) | def _cat_grid(dims, flatten_items=False): class MultiModalEmbeddingData (line 204) | class MultiModalEmbeddingData(EmbeddingData): method __init__ (line 205) | def __init__( method _set_part_grid (line 246) | def _set_part_grid(self, part_idx, modality, grid): method _set_video_meta_for_part (line 255) | def _set_video_meta_for_part(self, part_idx, source): method from_embedding_data (line 267) | def from_embedding_data(cls, embedding_data: EmbeddingData): method __repr__ (line 288) | def __repr__(self): method get_embedding (line 291) | def get_embedding(self, is_concat=False): method ready (line 304) | def ready(self): method get_mm_extra_meta (line 307) | def get_mm_extra_meta(self): method add (line 325) | def add(self, embedding_data: EmbeddingData): class WaitingImageRequestStatus (line 338) | class WaitingImageRequestStatus(IntEnum): function create_part_req_id (line 345) | def create_part_req_id(original_req_id: str, part_idx: int) -> str: function extract_original_req_id (line 350) | def extract_original_req_id(part_req_id: str) -> str: function calculate_modality_num_parts (line 357) | def calculate_modality_num_parts(modalities, num_items_assigned): class WaitingImageRequest (line 381) | class WaitingImageRequest: method __init__ (line 382) | def __init__( method send_encode_request (line 412) | def send_encode_request(self): method _try_recv_mm_data (line 482) | def _try_recv_mm_data(self): class WaitingImageRequestGrpc (line 534) | class WaitingImageRequestGrpc(WaitingImageRequest): method send_encode_request (line 535) | def send_encode_request(self): function _determine_tensor_transport_mode (line 581) | def _determine_tensor_transport_mode(server_args): class MMReceiverBase (line 591) | class MMReceiverBase(ABC): method __init__ (line 592) | def __init__( method process_waiting_requests (line 673) | def process_waiting_requests(self, recv_reqs): method recv_mm_data (line 676) | async def recv_mm_data( method _cleanup_mooncake_buffer (line 699) | def _cleanup_mooncake_buffer(self, req_id): method _recv_mm_data (line 714) | async def _recv_mm_data(self, req_id, recv_socket, mm_processor, prompt): method send_encode_request (line 800) | def send_encode_request(self, obj): method _send_encode_request (line 803) | def _send_encode_request(self, obj): method _process_waiting_requests (line 829) | def _process_waiting_requests(self, recv_reqs, waiting_cls): method _run_encode_in_thread (line 902) | def _run_encode_in_thread( method create_req (line 919) | def create_req(self, recv_req: TokenizedGenerateReqInput): method allocate_embedding_buffer (line 955) | async def allocate_embedding_buffer(self, req_id, total_bytes): method _assign_items_by_modality (line 964) | def _assign_items_by_modality( method _extract_url_data (line 1010) | def _extract_url_data(self, request_obj) -> List[Dict]: class MMReceiverHTTP (line 1035) | class MMReceiverHTTP(MMReceiverBase): method __init__ (line 1036) | def __init__( method process_waiting_requests (line 1057) | def process_waiting_requests(self, recv_reqs): method encode (line 1060) | async def encode( class MMReceiverGrpc (line 1194) | class MMReceiverGrpc(MMReceiverBase): method __init__ (line 1195) | def __init__( method build_and_send_encode_request (line 1215) | def build_and_send_encode_request(self, image_urls, rid): method process_waiting_requests (line 1224) | def process_waiting_requests(self, recv_reqs): method encode (line 1227) | async def encode( function _validate_transport_mode (line 1349) | def _validate_transport_mode(transport_mode: str, encoder_urls): function create_mm_receiver (line 1375) | def create_mm_receiver( FILE: python/sglang/srt/disaggregation/encode_server.py class MMError (line 73) | class MMError(Exception): method __init__ (line 74) | def __init__(self, message, code=HTTPStatus.INTERNAL_SERVER_ERROR): class BadRequestError (line 80) | class BadRequestError(MMError): method __init__ (line 81) | def __init__(self, message): class InternalError (line 85) | class InternalError(MMError): method __init__ (line 86) | def __init__(self, message): class TensorWrapper (line 90) | class TensorWrapper: method __init__ (line 93) | def __init__(self, tensor): method __buffer__ (line 105) | def __buffer__(self): function _convert (line 113) | def _convert(data): function _get_mm_grid_dim (line 139) | def _get_mm_grid_dim(mm_inputs, modality): function _get_mm_feature (line 146) | def _get_mm_feature(mm_inputs, modality): function _build_mm_aux_data (line 155) | def _build_mm_aux_data(mm_inputs): class MMEncoder (line 166) | class MMEncoder: method __init__ (line 167) | def __init__( method _infer_embedding_dims (line 289) | def _infer_embedding_dims(self) -> dict: method _build_vision_config (line 328) | def _build_vision_config(self, mm_process_config): method _load_mm_processor (line 373) | def _load_mm_processor(self, server_args: ServerArgs): method _load_single_item (line 422) | def _load_single_item( method submit_data_loading_tasks (line 450) | def submit_data_loading_tasks(self, items, modalities): method _get_feat_extract_output_lengths (line 466) | def _get_feat_extract_output_lengths(self, feature_lens): method _flatten_and_load_videos (line 490) | async def _flatten_and_load_videos(self, mm_items): method _flatten_and_load_data_by_modality (line 519) | async def _flatten_and_load_data_by_modality(self, mm_items, modality): method get_num_patches (line 566) | def get_num_patches( method get_num_tokens (line 575) | def get_num_tokens( method slice_embedding (line 586) | def slice_embedding( method _calculate_hashes_from_features (line 597) | def _calculate_hashes_from_features( method _encode_missing (line 612) | async def _encode_missing( method encode_with_global_cache (line 662) | async def encode_with_global_cache( method _flatten_and_load_audios (line 816) | async def _flatten_and_load_audios(self, mm_items): method _flatten_and_load_images (line 822) | async def _flatten_and_load_images(self, mm_items): method _calculate_timestamps (line 828) | def _calculate_timestamps(self, indices, video_fps: float, merge_size:... method _process_mm_items (line 846) | async def _process_mm_items(self, mm_items, modality): method _encode (line 939) | async def _encode(self, mm_items, modality: Modality) -> torch.Tensor: method _send (line 990) | async def _send( method encode (line 1045) | async def encode(self, mm_items, modality: Modality, req_id, num_parts... method send (line 1086) | async def send( method send_with_url (line 1100) | async def send_with_url( method get_embedding_port (line 1179) | async def get_embedding_port(self, prefill_url): class EncoderProfiler (line 1191) | class EncoderProfiler: method __init__ (line 1192) | def __init__(self, rank: int): method start (line 1200) | def start(self, obj: ProfileReq): method step (line 1234) | def step(self): method stop (line 1243) | def stop(self): function run_encoder (line 1261) | async def run_encoder( function launch_encoder (line 1294) | def launch_encoder(server_args, schedule_path, dist_init_method, rank): function launch_server (line 1303) | def launch_server(server_args: ServerArgs): function get_condition (line 1330) | async def get_condition(rid): function handle_encode_request (line 1338) | async def handle_encode_request(request: dict): function handle_send_request (line 1441) | async def handle_send_request(request: dict): function handle_scheduler_receive_url_request (line 1455) | async def handle_scheduler_receive_url_request(request: dict): function health_generate (line 1471) | async def health_generate(): function start_profile_async (line 1482) | async def start_profile_async(obj: Optional[ProfileReqInput] = None): function stop_profile_async (line 1520) | async def stop_profile_async(): FILE: python/sglang/srt/disaggregation/fake/conn.py class FakeKVManager (line 21) | class FakeKVManager(BaseKVManager): method __init__ (line 22) | def __init__( method register_to_bootstrap (line 31) | def register_to_bootstrap(self): class FakeKVSender (line 35) | class FakeKVSender(BaseKVSender): method __init__ (line 36) | def __init__( method poll (line 46) | def poll(self) -> KVPoll: method init (line 55) | def init( method send (line 65) | def send( method failure_exception (line 75) | def failure_exception(self): class FakeKVReceiver (line 79) | class FakeKVReceiver(BaseKVReceiver): method __init__ (line 80) | def __init__( method poll (line 89) | def poll(self) -> KVPoll: method init (line 98) | def init( method failure_exception (line 109) | def failure_exception(self): FILE: python/sglang/srt/disaggregation/kv_events.py class EventBatch (line 38) | class EventBatch( class KVCacheEvent (line 49) | class KVCacheEvent( class OffloadedState (line 64) | class OffloadedState: method __init__ (line 73) | def __init__( class BlockStored (line 81) | class BlockStored(KVCacheEvent): class BlockRemoved (line 90) | class BlockRemoved(KVCacheEvent): class AllBlocksCleared (line 95) | class AllBlocksCleared(KVCacheEvent): class KVEventBatch (line 99) | class KVEventBatch(EventBatch): class EventPublisher (line 103) | class EventPublisher(ABC): method __init__ (line 117) | def __init__(self, attn_dp_rank: int = 0): method publish (line 121) | def publish(self, events: EventBatch) -> None: method shutdown (line 129) | def shutdown(self) -> None: class NullEventPublisher (line 133) | class NullEventPublisher(EventPublisher): method publish (line 136) | def publish(self, events) -> None: method shutdown (line 139) | def shutdown(self) -> None: class ZmqEventPublisher (line 143) | class ZmqEventPublisher(EventPublisher): method __init__ (line 170) | def __init__( method publish (line 212) | def publish(self, events: EventBatch) -> None: method shutdown (line 219) | def shutdown(self) -> None: method _socket_setup (line 250) | def _socket_setup(self) -> None: method _publisher_thread (line 283) | def _publisher_thread(self) -> None: method _service_replay (line 320) | def _service_replay(self) -> None: method offset_endpoint_port (line 344) | def offset_endpoint_port( class KVEventsConfig (line 377) | class KVEventsConfig(BaseModel): method from_cli (line 412) | def from_cli(cls, cli_value: str) -> "KVEventsConfig": class EventPublisherFactory (line 417) | class EventPublisherFactory: method register_publisher (line 424) | def register_publisher(cls, name: str, ctor: Callable[..., EventPublis... method create (line 430) | def create(cls, config: Optional[str], attn_dp_rank: int = 0) -> Event... FILE: python/sglang/srt/disaggregation/mooncake/conn.py class KVTransferError (line 43) | class KVTransferError(Exception): method __init__ (line 44) | def __init__(self, bootstrap_room: int, failure_reason: str): method __str__ (line 49) | def __str__(self): class TransferKVChunk (line 55) | class TransferKVChunk: class TransferInfo (line 66) | class TransferInfo: method from_zmq (line 78) | def from_zmq(cls, msg: List[bytes]): class KVArgsRegisterInfo (line 107) | class KVArgsRegisterInfo: method from_zmq (line 123) | def from_zmq(cls, msg: List[bytes]): class AuxDataCodec (line 148) | class AuxDataCodec: method serialize_data_from_buffer (line 152) | def serialize_data_from_buffer(src_addr, data_length): method deserialize_data_to_buffer (line 158) | def deserialize_data_to_buffer(kv_args, buffer_index, aux_index, data): class MooncakeKVManager (line 168) | class MooncakeKVManager(CommonKVManager): method __init__ (line 171) | def __init__( method init_engine (line 217) | def init_engine(self): method register_buffer_to_engine (line 220) | def register_buffer_to_engine(self): method _transfer_data (line 239) | def _transfer_data(self, mooncake_session_id, transfer_blocks): method _send_kvcache_generic (line 248) | def _send_kvcache_generic( method send_kvcache (line 356) | def send_kvcache( method send_kvcache_slice (line 374) | def send_kvcache_slice( method send_aux (line 497) | def send_aux( method send_aux_tcp (line 521) | def send_aux_tcp( method send_aux_data_to_endpoint (line 546) | def send_aux_data_to_endpoint( method _handle_aux_data (line 569) | def _handle_aux_data(self, msg: List[bytes]): method maybe_send_extra (line 589) | def maybe_send_extra( method _send_mamba_state (line 653) | def _send_mamba_state( method _send_mamba_state_slice (line 674) | def _send_mamba_state_slice( method sync_status_to_decode_endpoint (line 760) | def sync_status_to_decode_endpoint( method transfer_worker (line 772) | def transfer_worker( method start_prefill_thread (line 924) | def start_prefill_thread(self): method start_decode_thread (line 960) | def start_decode_thread(self): method add_transfer_request (line 1048) | def add_transfer_request( method get_session_id (line 1093) | def get_session_id(self): method _handle_node_failure (line 1096) | def _handle_node_failure(self, failed_bootstrap_addr): class MooncakeKVSender (line 1128) | class MooncakeKVSender(CommonKVSender): method __init__ (line 1130) | def __init__( method send (line 1142) | def send( method poll (line 1182) | def poll(self) -> KVPoll: method clear (line 1208) | def clear(self) -> None: method failure_exception (line 1212) | def failure_exception(self): method abort (line 1225) | def abort(self): class MooncakeKVReceiver (line 1234) | class MooncakeKVReceiver(CommonKVReceiver): method __init__ (line 1235) | def __init__( method _register_kv_args (line 1250) | def _register_kv_args(self): method init (line 1298) | def init( method poll (line 1338) | def poll(self) -> KVPoll: method clear (line 1364) | def clear(self) -> None: method failure_exception (line 1374) | def failure_exception(self): method abort (line 1387) | def abort(self): class MooncakeKVBootstrapServer (line 1396) | class MooncakeKVBootstrapServer(CommonKVBootstrapServer): FILE: python/sglang/srt/disaggregation/mooncake/utils.py function init_mooncake_custom_mem_pool (line 29) | def init_mooncake_custom_mem_pool( function check_mooncake_custom_mem_pool_enabled (line 92) | def check_mooncake_custom_mem_pool_enabled() -> Tuple[bool, Optional[str]]: FILE: python/sglang/srt/disaggregation/mori/conn.py function _pack_mem_desc_list (line 47) | def _pack_mem_desc_list(mems: List[MemoryDesc]) -> bytes: function _unpack_mem_desc_list (line 54) | def _unpack_mem_desc_list(blob: bytes) -> List[MemoryDesc]: class TransferInfo (line 62) | class TransferInfo: method from_zmq (line 73) | def from_zmq(cls, payload: List[bytes]) -> TransferInfo: class KVArgsRegisterInfo (line 106) | class KVArgsRegisterInfo: method engine_key (line 119) | def engine_key(self) -> str: method from_zmq (line 123) | def from_zmq(cls, payload: List[bytes]) -> KVArgsRegisterInfo: class AuxDataCodec (line 148) | class AuxDataCodec: method serialize_data_from_buffer (line 150) | def serialize_data_from_buffer(src_addr, data_length): method deserialize_data_to_buffer (line 155) | def deserialize_data_to_buffer(kv_args, buffer_index, aux_index, data): class TPSliceConfig (line 165) | class TPSliceConfig: class MoriKVManager (line 176) | class MoriKVManager(CommonKVManager): method __init__ (line 179) | def __init__( method _init_engine (line 200) | def _init_engine(self) -> IOEngine: method _register_local_buffers (line 256) | def _register_local_buffers(self) -> None: method _handle_register_message (line 284) | def _handle_register_message(self, payload: List[bytes]) -> None: method _handle_transfer_message (line 291) | def _handle_transfer_message(self, payload: List[bytes]) -> None: method _validate_message (line 307) | def _validate_message(self, msg: List[bytes]) -> Optional[List[bytes]]: method _start_bootstrap_thread (line 316) | def _start_bootstrap_thread(self) -> None: method _cleanup_room_tracking (line 335) | def _cleanup_room_tracking(self, bootstrap_room: int) -> None: method _start_decode_thread (line 344) | def _start_decode_thread(self) -> None: method notify_decode_status (line 398) | def notify_decode_status( method _add_remote_peer (line 428) | def _add_remote_peer(self, register_info: KVArgsRegisterInfo) -> None: method _get_mha_mem_desc_slices (line 442) | def _get_mha_mem_desc_slices( method _get_mla_mem_desc_slices (line 468) | def _get_mla_mem_desc_slices( method _issue_layer_transfers (line 482) | def _issue_layer_transfers( method _build_tp_slice_config (line 508) | def _build_tp_slice_config(self, peer_info: KVArgsRegisterInfo) -> TPS... method _issue_tp_slice_transfers (line 562) | def _issue_tp_slice_transfers( method send_kvcache (line 623) | def send_kvcache( method send_aux (line 707) | def send_aux( method send_aux_tcp (line 716) | def send_aux_tcp( method send_aux_data_to_endpoint (line 742) | def send_aux_data_to_endpoint( method _handle_aux_data (line 765) | def _handle_aux_data(self, msg: List[bytes]): method add_transfer_request (line 785) | def add_transfer_request( class MoriKVSender (line 838) | class MoriKVSender(CommonKVSender): method __init__ (line 839) | def __init__( method send (line 855) | def send( method poll (line 889) | def poll(self) -> KVPoll: method _all_transfers_finished (line 924) | def _all_transfers_finished(self) -> bool: method _has_transfer_error (line 931) | def _has_transfer_error(self) -> bool: method _collect_failure_reason (line 934) | def _collect_failure_reason(self) -> str: method _notify_decode (line 940) | def _notify_decode( method _finalize_failure (line 951) | def _finalize_failure(self, failure_reason: Optional[str] = None) -> N... method clear (line 961) | def clear(self) -> None: method failure_exception (line 964) | def failure_exception(self): method abort (line 974) | def abort(self): class MoriKVReceiver (line 981) | class MoriKVReceiver(CommonKVReceiver): method __init__ (line 983) | def __init__( method _register_kv_args (line 1000) | def _register_kv_args(self): method init (line 1032) | def init( method poll (line 1066) | def poll(self) -> KVPoll: method clear (line 1086) | def clear(self) -> None: method failure_exception (line 1094) | def failure_exception(self): method abort (line 1105) | def abort(self): class MoriKVBootstrapServer (line 1115) | class MoriKVBootstrapServer(CommonKVBootstrapServer): FILE: python/sglang/srt/disaggregation/nixl/conn.py class TransferInfo (line 36) | class TransferInfo: method is_dummy (line 48) | def is_dummy(self): method from_zmq (line 52) | def from_zmq(cls, msg: List[bytes]): class KVArgsRegisterInfo (line 72) | class KVArgsRegisterInfo: method from_zmq (line 89) | def from_zmq(cls, msg: List[bytes]): class TransferStatus (line 113) | class TransferStatus: method is_done (line 133) | def is_done(self): method is_failed (line 153) | def is_failed(self): class NixlKVManager (line 157) | class NixlKVManager(CommonKVManager): method __init__ (line 158) | def __init__( method _start_heartbeat_checker_thread (line 204) | def _start_heartbeat_checker_thread(self): method _handle_node_failure (line 256) | def _handle_node_failure(self, failed_bootstrap_addr): method register_buffer_to_engine (line 290) | def register_buffer_to_engine(self): method _add_remote_peer (line 326) | def _add_remote_peer(self, decode_kv_args: KVArgsRegisterInfo): method _send_kvcache_generic (line 334) | def _send_kvcache_generic( method send_kvcache (line 444) | def send_kvcache( method send_kvcache_slice (line 464) | def send_kvcache_slice( method send_aux (line 589) | def send_aux( method _send_mamba_state (line 627) | def _send_mamba_state( method maybe_send_extra (line 674) | def maybe_send_extra( method add_transfer_request (line 727) | def add_transfer_request( method update_transfer_status (line 811) | def update_transfer_status(self): method check_transfer_done (line 845) | def check_transfer_done(self, room: int): method _start_bootstrap_thread (line 850) | def _start_bootstrap_thread(self): class NixlKVSender (line 888) | class NixlKVSender(CommonKVSender): method __init__ (line 889) | def __init__( method send (line 902) | def send( method poll (line 940) | def poll(self) -> KVPoll: method failure_exception (line 950) | def failure_exception(self): class NixlKVReceiver (line 954) | class NixlKVReceiver(CommonKVReceiver): method __init__ (line 955) | def __init__( method init (line 973) | def init( method poll (line 1021) | def poll(self) -> KVPoll: method _register_kv_args (line 1060) | def _register_kv_args(self): method failure_exception (line 1092) | def failure_exception(self): class NixlKVBootstrapServer (line 1096) | class NixlKVBootstrapServer(CommonKVBootstrapServer): FILE: python/sglang/srt/disaggregation/prefill.py function release_req_to_metadata_buffer (line 65) | def release_req_to_metadata_buffer( class PrefillBootstrapQueue (line 86) | class PrefillBootstrapQueue: method __init__ (line 91) | def __init__( method _init_kv_manager (line 133) | def _init_kv_manager(self) -> CommonKVManager: method add (line 206) | def add(self, req: Req, num_kv_heads: int) -> None: method extend (line 229) | def extend(self, reqs: List[Req], num_kv_heads: int) -> None: method _check_if_req_exceed_kv_capacity (line 233) | def _check_if_req_exceed_kv_capacity(self, req: Req) -> bool: method _process_req (line 243) | def _process_req(self, req: Req) -> None: method pop_bootstrapped (line 249) | def pop_bootstrapped( class SchedulerDisaggregationPrefillMixin (line 334) | class SchedulerDisaggregationPrefillMixin: method get_next_disagg_prefill_batch_to_run (line 339) | def get_next_disagg_prefill_batch_to_run( method event_loop_normal_disagg_prefill (line 357) | def event_loop_normal_disagg_prefill(self: Scheduler) -> None: method event_loop_overlap_disagg_prefill (line 385) | def event_loop_overlap_disagg_prefill(self: Scheduler) -> None: method process_batch_result_disagg_prefill (line 424) | def process_batch_result_disagg_prefill( method process_disagg_prefill_inflight_queue (line 545) | def process_disagg_prefill_inflight_queue( method get_transferred_rids (line 660) | def get_transferred_rids(self: Scheduler) -> List[str]: method process_prefill_chunk (line 678) | def process_prefill_chunk(self: Scheduler) -> None: method send_kv_chunk (line 706) | def send_kv_chunk( FILE: python/sglang/srt/disaggregation/utils.py class DisaggregationMode (line 33) | class DisaggregationMode(Enum): function poll_and_all_reduce (line 47) | def poll_and_all_reduce(pollers, gloo_group: dist.ProcessGroup): function poll_and_all_reduce_attn_cp_tp_group (line 63) | def poll_and_all_reduce_attn_cp_tp_group( class ReqToMetadataIdxAllocator (line 88) | class ReqToMetadataIdxAllocator: method __init__ (line 91) | def __init__( method available_size (line 98) | def available_size(self): method alloc (line 101) | def alloc(self) -> Optional[int]: method free (line 107) | def free(self, free_index: int): class MetadataBuffers (line 111) | class MetadataBuffers: method __init__ (line 112) | def __init__( method get_buf_infos (line 173) | def get_buf_infos(self): method get_buf (line 212) | def get_buf(self, idx: int): method set_buf (line 226) | def set_buf(self, req: Req): class TransferBackend (line 277) | class TransferBackend(Enum): class KVClassType (line 285) | class KVClassType(Enum): function get_kv_class (line 294) | def get_kv_class( function get_kv_class (line 298) | def get_kv_class( function get_kv_class (line 302) | def get_kv_class( function get_kv_class (line 306) | def get_kv_class( function get_kv_class (line 310) | def get_kv_class( function get_kv_class (line 315) | def get_kv_class( function kv_to_page_indices (line 412) | def kv_to_page_indices(kv_indices: np.ndarray, page_size: int): function kv_to_page_num (line 422) | def kv_to_page_num(num_kv_indices: int, page_size: int): function page_indices_to_cp_rank_page_indices (line 427) | def page_indices_to_cp_rank_page_indices( function filter_kv_indices_for_cp_rank (line 473) | def filter_kv_indices_for_cp_rank( function is_mla_backend (line 513) | def is_mla_backend(target_kv_pool) -> bool: function prepare_abort (line 519) | def prepare_abort(req: Req, error_message: str, status_code=None): FILE: python/sglang/srt/distributed/communication_op.py function tensor_model_parallel_all_reduce (line 11) | def tensor_model_parallel_all_reduce(input_: torch.Tensor) -> torch.Tensor: function tensor_model_parallel_fused_allreduce_rmsnorm (line 16) | def tensor_model_parallel_fused_allreduce_rmsnorm( function tensor_model_parallel_all_gather (line 31) | def tensor_model_parallel_all_gather( function tensor_model_parallel_gather (line 38) | def tensor_model_parallel_gather( function broadcast_tensor_dict (line 45) | def broadcast_tensor_dict( FILE: python/sglang/srt/distributed/device_communicators/cuda_wrapper.py class cudaIpcMemHandle_t (line 30) | class cudaIpcMemHandle_t(ctypes.Structure): class Function (line 35) | class Function: function find_loaded_library (line 41) | def find_loaded_library(lib_name) -> Optional[str]: class CudaRTLibrary (line 68) | class CudaRTLibrary: method __init__ (line 118) | def __init__(self, so_file: Optional[str] = None): method CUDART_CHECK (line 137) | def CUDART_CHECK(self, result: cudaError_t) -> None: method cudaGetErrorString (line 142) | def cudaGetErrorString(self, error: cudaError_t) -> str: method cudaSetDevice (line 145) | def cudaSetDevice(self, device: int) -> None: method cudaDeviceSynchronize (line 148) | def cudaDeviceSynchronize(self) -> None: method cudaDeviceReset (line 151) | def cudaDeviceReset(self) -> None: method cudaMalloc (line 154) | def cudaMalloc(self, size: int) -> ctypes.c_void_p: method cudaFree (line 159) | def cudaFree(self, devPtr: ctypes.c_void_p) -> None: method cudaMemset (line 162) | def cudaMemset(self, devPtr: ctypes.c_void_p, value: int, count: int) ... method cudaMemcpy (line 165) | def cudaMemcpy( method cudaIpcGetMemHandle (line 172) | def cudaIpcGetMemHandle(self, devPtr: ctypes.c_void_p) -> cudaIpcMemHa... method cudaIpcOpenMemHandle (line 179) | def cudaIpcOpenMemHandle(self, handle: cudaIpcMemHandle_t) -> ctypes.c... FILE: python/sglang/srt/distributed/device_communicators/custom_all_reduce.py function _can_p2p (line 39) | def _can_p2p(rank: int, world_size: int) -> bool: class CustomAllreduce (line 53) | class CustomAllreduce: method __init__ (line 64) | def __init__( method create_shared_buffer (line 216) | def create_shared_buffer( method free_shared_buffer (line 243) | def free_shared_buffer( method capture (line 251) | def capture(self): method _get_ipc_meta (line 265) | def _get_ipc_meta(self, inp: torch.Tensor): method _gather_ipc_meta (line 275) | def _gather_ipc_meta(self, shard_data): method register_buffer (line 299) | def register_buffer(self, inp: torch.Tensor): method register_graph_buffers (line 303) | def register_graph_buffers(self): method should_custom_ar (line 329) | def should_custom_ar(self, inp: torch.Tensor): method all_reduce_reg (line 354) | def all_reduce_reg(self, inp: torch.Tensor, out: torch.Tensor = None): method all_reduce_unreg (line 361) | def all_reduce_unreg(self, inp: torch.Tensor, out: torch.Tensor = None): method all_reduce (line 367) | def all_reduce( method deterministic_all_reduce (line 390) | def deterministic_all_reduce( method custom_all_reduce (line 407) | def custom_all_reduce(self, input: torch.Tensor) -> Optional[torch.Ten... method close (line 444) | def close(self): method __del__ (line 452) | def __del__(self): function dispatch_custom_allreduce (line 456) | def dispatch_custom_allreduce(): FILE: python/sglang/srt/distributed/device_communicators/custom_all_reduce_ops.py function init_custom_ar (line 37) | def init_custom_ar( function all_reduce (line 45) | def all_reduce( function dispose (line 54) | def dispose(fa: int) -> None: function meta_size (line 57) | def meta_size() -> int: function register_buffer (line 60) | def register_buffer(fa: int, ipc_tensors: List[int]) -> None: function get_graph_buffer_ipc_meta (line 63) | def get_graph_buffer_ipc_meta(fa: int) -> Tuple[List[int], List[int]]: function register_graph_buffers (line 66) | def register_graph_buffers( function init_custom_ar (line 74) | def init_custom_ar( function all_reduce_reg (line 86) | def all_reduce_reg(fa: int, inp: torch.Tensor, out: torch.Tensor) -> None: function all_reduce_unreg (line 89) | def all_reduce_unreg( function deterministic_all_reduce_reg (line 94) | def deterministic_all_reduce_reg( function deterministic_all_reduce_unreg (line 99) | def deterministic_all_reduce_unreg( function dispose (line 104) | def dispose(fa: int) -> None: function meta_size (line 107) | def meta_size() -> int: function register_buffer (line 110) | def register_buffer( function get_graph_buffer_ipc_meta (line 115) | def get_graph_buffer_ipc_meta(fa: int) -> Tuple[torch.Tensor, List[int]]: function register_graph_buffers (line 118) | def register_graph_buffers( function allocate_meta_buffer (line 123) | def allocate_meta_buffer(size: int) -> torch.Tensor: function get_meta_buffer_ipc_handle (line 126) | def get_meta_buffer_ipc_handle(inp: torch.Tensor) -> torch.Tensor: function init_custom_qr (line 140) | def init_custom_qr( function qr_get_handle (line 145) | def qr_get_handle(fa: int) -> torch.Tensor: function qr_open_handles (line 148) | def qr_open_handles(fa: int, handles: list[torch.Tensor]) -> None: function qr_all_reduce (line 151) | def qr_all_reduce( function qr_destroy (line 160) | def qr_destroy(fa: int) -> None: function qr_max_size (line 163) | def qr_max_size() -> int: function mscclpp_generate_unique_id (line 176) | def mscclpp_generate_unique_id() -> bytes: function mscclpp_init_context (line 179) | def mscclpp_init_context( function mscclpp_allreduce (line 202) | def mscclpp_allreduce( FILE: python/sglang/srt/distributed/device_communicators/custom_all_reduce_utils.py function update_environment_variables (line 57) | def update_environment_variables(envs: Dict[str, str]): function producer (line 69) | def producer( function consumer (line 103) | def consumer( function can_actually_p2p (line 144) | def can_actually_p2p( function gpu_p2p_access_check (line 244) | def gpu_p2p_access_check(src: int, tgt: int) -> bool: function with_nvml_context (line 319) | def with_nvml_context(fn: Callable[_P, _R]) -> Callable[_P, _R]: function is_full_nvlink (line 339) | def is_full_nvlink(physical_device_ids: List[int], world_size: int) -> b... function is_weak_contiguous (line 380) | def is_weak_contiguous(inp: torch.Tensor): FILE: python/sglang/srt/distributed/device_communicators/hpu_communicator.py class HpuCommunicator (line 13) | class HpuCommunicator: method __init__ (line 15) | def __init__(self, group: ProcessGroup): method all_reduce (line 23) | def all_reduce(self, x: torch.Tensor) -> torch.Tensor: method all_gather (line 31) | def all_gather(self, x: torch.Tensor, dim: int = -1) -> torch.Tensor: FILE: python/sglang/srt/distributed/device_communicators/mooncake_transfer_engine.py function get_ib_devices_for_gpu (line 15) | def get_ib_devices_for_gpu(ib_device_str: Optional[str], gpu_id: int) ->... class MooncakeTransferEngine (line 93) | class MooncakeTransferEngine: method __init__ (line 96) | def __init__( method register (line 124) | def register(self, ptr, length): method deregister (line 134) | def deregister(self, ptr): method batch_register (line 144) | def batch_register(self, ptrs: List[int], lengths: List[int]) -> int: method batch_deregister (line 161) | def batch_deregister(self, ptrs: List[int]) -> int: method initialize (line 173) | def initialize( method transfer_sync (line 202) | def transfer_sync( method batch_transfer_sync (line 223) | def batch_transfer_sync( method get_session_id (line 254) | def get_session_id(self): method get_engine (line 257) | def get_engine(self): method get_ib_device (line 260) | def get_ib_device(self): function init_mooncake_transfer_engine (line 264) | def init_mooncake_transfer_engine( function get_mooncake_transfer_engine (line 284) | def get_mooncake_transfer_engine() -> Optional[MooncakeTransferEngine]: FILE: python/sglang/srt/distributed/device_communicators/npu_communicator.py class NpuCommunicator (line 8) | class NpuCommunicator: method __init__ (line 10) | def __init__(self, group: ProcessGroup): method all_reduce (line 18) | def all_reduce(self, x: torch.Tensor) -> torch.Tensor: method all_gather (line 22) | def all_gather(self, x: torch.Tensor, dim: int = -1) -> torch.Tensor: FILE: python/sglang/srt/distributed/device_communicators/pymscclpp.py class MscclContextSelection (line 21) | class MscclContextSelection(IntEnum): function mscclpp_is_weak_contiguous (line 26) | def mscclpp_is_weak_contiguous(inp: torch.Tensor): function mscclpp_convert_to_bytes (line 33) | def mscclpp_convert_to_bytes(size_str): function mscclpp_bench_time (line 74) | def mscclpp_bench_time(func, test_niter: int = 10, warmup_niter: int = 2): class PyMscclppCommunicator (line 91) | class PyMscclppCommunicator: method __init__ (line 98) | def __init__( method pre_tune_config (line 230) | def pre_tune_config(self, dtype=torch.bfloat16) -> bool: method should_mscclpp_allreduce (line 260) | def should_mscclpp_allreduce( method all_reduce (line 276) | def all_reduce(self, tensor: torch.Tensor, op: ReduceOp = ReduceOp.SUM): method change_state (line 289) | def change_state( FILE: python/sglang/srt/distributed/device_communicators/pynccl.py class PyNcclCommunicator (line 27) | class PyNcclCommunicator: method __init__ (line 29) | def __init__( method _resolve_stream (line 129) | def _resolve_stream(self, stream: Optional[torch.cuda.Stream]): method all_reduce (line 144) | def all_reduce( method outplace_all_reduce (line 167) | def outplace_all_reduce( method all_gather (line 196) | def all_gather( method cp_all_gather_into_tensor (line 241) | def cp_all_gather_into_tensor( method reduce_scatter (line 269) | def reduce_scatter( method send (line 317) | def send(self, tensor: torch.Tensor, dst: int, stream=None): method recv (line 334) | def recv(self, tensor: torch.Tensor, src: int, stream=None): method broadcast (line 351) | def broadcast(self, tensor: torch.Tensor, src: int, stream=None): method register_comm_window_raw (line 377) | def register_comm_window_raw(self, ptr: int, size: int): method deregister_comm_window (line 380) | def deregister_comm_window(self, window): method group_start (line 383) | def group_start(self): method group_end (line 386) | def group_end(self): method change_state (line 390) | def change_state( FILE: python/sglang/srt/distributed/device_communicators/pynccl_allocator.py function is_symmetric_memory_enabled (line 77) | def is_symmetric_memory_enabled(): function set_graph_pool_id (line 84) | def set_graph_pool_id(graph_pool_id): function disable_symmetric_memory_context (line 89) | def disable_symmetric_memory_context(): function restore_symmetric_memory_context (line 97) | def restore_symmetric_memory_context(saved_context): function get_nccl_mem_pool (line 102) | def get_nccl_mem_pool(): class SymmetricMemoryContext (line 138) | class SymmetricMemoryContext: method __init__ (line 148) | def __init__( method __enter__ (line 157) | def __enter__(self): method __exit__ (line 190) | def __exit__(self, exc_type, exc_val, exc_tb): function use_symmetric_memory (line 207) | def use_symmetric_memory(group_coordinator: GroupCoordinator, disabled: ... FILE: python/sglang/srt/distributed/device_communicators/pynccl_wrapper.py function find_nccl_library (line 37) | def find_nccl_library() -> str: class ncclUniqueId (line 75) | class ncclUniqueId(ctypes.Structure): class ncclDataTypeEnum (line 85) | class ncclDataTypeEnum: method from_torch (line 104) | def from_torch(cls, dtype: torch.dtype) -> int: class ncclRedOpTypeEnum (line 127) | class ncclRedOpTypeEnum: method from_torch (line 136) | def from_torch(cls, op: ReduceOp) -> int: class Function (line 151) | class Function: class NCCLLibrary (line 157) | class NCCLLibrary: method __init__ (line 334) | def __init__(self, so_file: Optional[str] = None): method ncclGetErrorString (line 370) | def ncclGetErrorString(self, result: ncclResult_t) -> str: method NCCL_CHECK (line 373) | def NCCL_CHECK(self, result: ncclResult_t) -> None: method ncclGetRawVersion (line 378) | def ncclGetRawVersion(self) -> int: method ncclGetVersion (line 384) | def ncclGetVersion(self) -> str: method ncclGetUniqueId (line 392) | def ncclGetUniqueId(self) -> ncclUniqueId: method ncclCommInitRank (line 397) | def ncclCommInitRank( method ncclAllReduce (line 408) | def ncclAllReduce( method ncclReduce (line 429) | def ncclReduce( method ncclReduceScatter (line 451) | def ncclReduceScatter( method ncclAllGather (line 472) | def ncclAllGather( method ncclSend (line 491) | def ncclSend( method ncclRecv (line 504) | def ncclRecv( method ncclBroadcast (line 517) | def ncclBroadcast( method ncclCommDestroy (line 533) | def ncclCommDestroy(self, comm: ncclComm_t) -> None: method ncclCommWindowRegister (line 536) | def ncclCommWindowRegister( method ncclCommWindowDeregister (line 547) | def ncclCommWindowDeregister(self, comm: ncclComm_t, window: ncclWindo... method ncclGroupStart (line 550) | def ncclGroupStart(self) -> None: method ncclGroupEnd (line 553) | def ncclGroupEnd(self) -> None: FILE: python/sglang/srt/distributed/device_communicators/quick_all_reduce.py function qr_rocm_arch_available (line 28) | def qr_rocm_arch_available(): class QuickReduceRegime (line 41) | class QuickReduceRegime(Enum): class QuickAllReduce (line 52) | class QuickAllReduce: method __init__ (line 67) | def __init__( method init_quick_all_reduce (line 169) | def init_quick_all_reduce(self): method create_shared_buffer (line 213) | def create_shared_buffer(self): method should_quick_allreduce (line 224) | def should_quick_allreduce(self, inp: torch.Tensor): method quick_all_reduce (line 248) | def quick_all_reduce(self, inp: torch.Tensor, *, out: torch.Tensor = N... method close (line 259) | def close(self): method __del__ (line 266) | def __del__(self): FILE: python/sglang/srt/distributed/device_communicators/shm_broadcast.py class ShmRingBuffer (line 29) | class ShmRingBuffer: method __init__ (line 31) | def __init__( method __reduce__ (line 127) | def __reduce__(self): method __del__ (line 138) | def __del__(self): method get_data (line 145) | def get_data(self, current_idx: int): method get_metadata (line 152) | def get_metadata(self, current_idx: int): class Handle (line 160) | class Handle: class MessageQueue (line 169) | class MessageQueue: method __init__ (line 171) | def __init__( method export_handle (line 254) | def export_handle(self) -> Handle: method create_from_handle (line 258) | def create_from_handle(handle: Handle, rank) -> "MessageQueue": method wait_until_ready (line 300) | def wait_until_ready(self): method acquire_write (line 334) | def acquire_write(self): method acquire_read (line 387) | def acquire_read(self): method enqueue (line 430) | def enqueue(self, obj): method dequeue (line 445) | def dequeue(self): method broadcast_object (line 464) | def broadcast_object(self, obj=None): method create_from_process_group (line 472) | def create_from_process_group( FILE: python/sglang/srt/distributed/device_communicators/torch_symm_mem.py class TorchSymmMemCommunicator (line 30) | class TorchSymmMemCommunicator: method __init__ (line 52) | def __init__(self, group: ProcessGroup, device: Union[int, str, torch.... method should_torch_symm_mem_allreduce (line 110) | def should_torch_symm_mem_allreduce(self, inp: torch.Tensor): method all_reduce (line 133) | def all_reduce( FILE: python/sglang/srt/distributed/device_communicators/xpu_communicator.py class XpuCommunicator (line 10) | class XpuCommunicator: method __init__ (line 12) | def __init__(self, group: ProcessGroup): method all_reduce (line 20) | def all_reduce(self, x: torch.Tensor) -> torch.Tensor: method gather (line 24) | def gather( FILE: python/sglang/srt/distributed/naive_distributed.py class NaiveDistributed (line 12) | class NaiveDistributed: method __init__ (line 13) | def __init__(self, rank: int, world_size: int, rendezvous: str): method get_rank (line 24) | def get_rank(self): method get_world_size (line 27) | def get_world_size(self): method scatter (line 30) | def scatter( method all_gather_object (line 68) | def all_gather_object(self, obj: Any) -> List[Any]: method barrier (line 96) | def barrier(self): function get_naive_distributed (line 105) | def get_naive_distributed(): function set_naive_distributed (line 110) | def set_naive_distributed(instance: NaiveDistributed): FILE: python/sglang/srt/distributed/parallel_state.py function get_torch_distributed_pg_options (line 73) | def get_torch_distributed_pg_options(group_name=None): class GraphCaptureContext (line 92) | class GraphCaptureContext: class P2PWork (line 97) | class P2PWork: function _split_tensor_dict (line 102) | def _split_tensor_dict( function _get_unique_name (line 131) | def _get_unique_name(name: str) -> str: function _register_group (line 147) | def _register_group(group: "GroupCoordinator") -> None: function inplace_all_reduce (line 153) | def inplace_all_reduce(tensor: torch.Tensor, group_name: str) -> None: function outplace_all_reduce (line 162) | def outplace_all_reduce( function reg_all_gather_into_tensor (line 173) | def reg_all_gather_into_tensor( function reg_reduce_scatter_tensor (line 184) | def reg_reduce_scatter_tensor( class GroupCoordinator (line 194) | class GroupCoordinator: method __init__ (line 235) | def __init__( method __repr__ (line 443) | def __repr__(self): method first_rank (line 451) | def first_rank(self): method last_rank (line 456) | def last_rank(self): method is_first_rank (line 461) | def is_first_rank(self): method is_last_rank (line 466) | def is_last_rank(self): method next_rank (line 471) | def next_rank(self): method prev_rank (line 478) | def prev_rank(self): method graph_capture (line 485) | def graph_capture( method all_reduce (line 549) | def all_reduce(self, input_: torch.Tensor) -> torch.Tensor: method fused_allreduce_rmsnorm (line 649) | def fused_allreduce_rmsnorm( method _all_reduce_out_place (line 701) | def _all_reduce_out_place( method _all_reduce_in_place (line 730) | def _all_reduce_in_place(self, input_: torch.Tensor) -> None: method _reduce_scatter_tensor (line 740) | def _reduce_scatter_tensor( method reduce_scatter_tensor (line 759) | def reduce_scatter_tensor(self, output: torch.Tensor, input: torch.Ten... method reduce_scatter (line 765) | def reduce_scatter( method reduce_scatterv (line 774) | def reduce_scatterv( method _all_gather_into_tensor (line 809) | def _all_gather_into_tensor(self, output: torch.Tensor, input: torch.T... method all_gather_into_tensor (line 823) | def all_gather_into_tensor(self, output: torch.Tensor, input: torch.Te... method cp_all_gather_into_tensor_async (line 829) | def cp_all_gather_into_tensor_async( method all_gather (line 847) | def all_gather( method all_gatherv (line 921) | def all_gatherv( method gather (line 977) | def gather( method broadcast (line 1012) | def broadcast(self, input_: torch.Tensor, src: int = 0): method broadcast_object (line 1027) | def broadcast_object(self, obj: Optional[Any] = None, src: int = 0): method broadcast_object_list (line 1051) | def broadcast_object_list( method all_gather_object (line 1068) | def all_gather_object(self, obj: Any) -> List[Any]: method send_object (line 1073) | def send_object( method recv_object (line 1122) | def recv_object( method broadcast_tensor_dict (line 1158) | def broadcast_tensor_dict( method send_tensor_dict (line 1240) | def send_tensor_dict( method recv_tensor_dict (line 1295) | def recv_tensor_dict( method barrier (line 1355) | def barrier(self): method send (line 1364) | def send(self, tensor: torch.Tensor, dst: Optional[int] = None) -> None: method recv (line 1376) | def recv( method destroy (line 1392) | def destroy(self): function get_world_group (line 1410) | def get_world_group() -> GroupCoordinator: function init_world_group (line 1415) | def init_world_group( function init_model_parallel_group (line 1433) | def init_model_parallel_group( function set_pdmux_status (line 1482) | def set_pdmux_status(enable_prefill_multiplexing: bool): function get_tp_group (line 1487) | def get_tp_group() -> GroupCoordinator: function get_attn_tp_group (line 1497) | def get_attn_tp_group() -> GroupCoordinator: function get_attn_cp_group (line 1504) | def get_attn_cp_group() -> GroupCoordinator: function get_moe_dp_group (line 1516) | def get_moe_dp_group() -> GroupCoordinator: function get_moe_ep_group (line 1521) | def get_moe_ep_group() -> GroupCoordinator: function get_moe_tp_group (line 1526) | def get_moe_tp_group() -> GroupCoordinator: function get_pp_group (line 1537) | def get_pp_group() -> GroupCoordinator: function get_mooncake_transfer_engine (line 1546) | def get_mooncake_transfer_engine(): function graph_capture (line 1559) | def graph_capture(stream: Optional[torch.cuda.Stream] = None): function set_custom_all_reduce (line 1586) | def set_custom_all_reduce(enable: bool): function set_mscclpp_all_reduce (line 1591) | def set_mscclpp_all_reduce(enable: bool): function set_torch_symm_mem_all_reduce (line 1596) | def set_torch_symm_mem_all_reduce(enable: bool): function get_default_distributed_backend (line 1611) | def get_default_distributed_backend(device: str) -> str: function _create_global_tcp_store (line 1615) | def _create_global_tcp_store(rank: int, world_size: int) -> None: function init_distributed_environment (line 1670) | def init_distributed_environment( function initialize_model_parallel (line 1744) | def initialize_model_parallel( function create_custom_parallel_group (line 2019) | def create_custom_parallel_group( function ensure_model_parallel_initialized (line 2068) | def ensure_model_parallel_initialized( function model_parallel_is_initialized (line 2101) | def model_parallel_is_initialized(): function patch_tensor_parallel_group (line 2110) | def patch_tensor_parallel_group(tp_group: GroupCoordinator): function get_world_size (line 2134) | def get_world_size(): function get_world_rank (line 2139) | def get_world_rank(): function get_tensor_model_parallel_world_size (line 2144) | def get_tensor_model_parallel_world_size(): function get_tensor_model_parallel_rank (line 2149) | def get_tensor_model_parallel_rank(): function get_attn_tensor_model_parallel_world_size (line 2155) | def get_attn_tensor_model_parallel_world_size(): function get_attn_tensor_model_parallel_rank (line 2160) | def get_attn_tensor_model_parallel_rank(): function get_attn_context_model_parallel_world_size (line 2166) | def get_attn_context_model_parallel_world_size(): function get_attn_context_model_parallel_rank (line 2171) | def get_attn_context_model_parallel_rank(): function get_pipeline_model_parallel_world_size (line 2176) | def get_pipeline_model_parallel_world_size(): function get_pipeline_model_parallel_rank (line 2181) | def get_pipeline_model_parallel_rank(): function get_moe_data_parallel_world_size (line 2187) | def get_moe_data_parallel_world_size(): function get_moe_data_parallel_rank (line 2192) | def get_moe_data_parallel_rank(): function get_moe_expert_parallel_world_size (line 2198) | def get_moe_expert_parallel_world_size(): function get_moe_expert_parallel_rank (line 2203) | def get_moe_expert_parallel_rank(): function get_moe_tensor_parallel_world_size (line 2209) | def get_moe_tensor_parallel_world_size(): function get_moe_tensor_parallel_rank (line 2214) | def get_moe_tensor_parallel_rank(): function destroy_model_parallel (line 2219) | def destroy_model_parallel(): function destroy_distributed_environment (line 2262) | def destroy_distributed_environment(): function cleanup_dist_env_and_memory (line 2271) | def cleanup_dist_env_and_memory(shutdown_ray: bool = False): function in_the_same_node_as (line 2298) | def in_the_same_node_as(pg: ProcessGroup, source_rank: int = 0) -> List[... function monkey_patch_vllm_parallel_state (line 2369) | def monkey_patch_vllm_parallel_state(reverse: bool = False): FILE: python/sglang/srt/distributed/utils.py function set_global_tcp_store (line 25) | def set_global_tcp_store(store: TCPStore) -> None: function get_global_tcp_store (line 36) | def get_global_tcp_store() -> Optional[TCPStore]: function ensure_divisibility (line 57) | def ensure_divisibility(numerator, denominator): function divide (line 64) | def divide(numerator, denominator): function split_tensor_along_last_dim (line 71) | def split_tensor_along_last_dim( function get_pp_indices (line 99) | def get_pp_indices( class StatelessProcessGroup (line 139) | class StatelessProcessGroup: method __post_init__ (line 160) | def __post_init__(self): method send_obj (line 166) | def send_obj(self, obj: Any, dst: int): method expire_data (line 174) | def expire_data(self): method recv_obj (line 185) | def recv_obj(self, src: int) -> Any: method broadcast_obj (line 193) | def broadcast_obj(self, obj: Optional[Any], src: int) -> Any: method all_gather_obj (line 211) | def all_gather_obj(self, obj: Any) -> list[Any]: method barrier (line 223) | def barrier(self): method create (line 232) | def create( FILE: python/sglang/srt/dllm/algorithm/__init__.py function import_algorithms (line 10) | def import_algorithms(): function get_algorithm (line 31) | def get_algorithm(config: DllmConfig): FILE: python/sglang/srt/dllm/algorithm/base.py class DllmAlgorithm (line 6) | class DllmAlgorithm: method __init__ (line 8) | def __init__( method from_server_args (line 16) | def from_server_args(server_args: ServerArgs): FILE: python/sglang/srt/dllm/algorithm/joint_threshold.py class JointThreshold (line 12) | class JointThreshold(DllmAlgorithm): method __init__ (line 14) | def __init__( method run (line 26) | def run( FILE: python/sglang/srt/dllm/algorithm/low_confidence.py class LowConfidence (line 14) | class LowConfidence(DllmAlgorithm): method __init__ (line 16) | def __init__( method run (line 23) | def run( FILE: python/sglang/srt/dllm/config.py class DllmConfig (line 7) | class DllmConfig: method __init__ (line 8) | def __init__( method from_server_args (line 23) | def from_server_args( FILE: python/sglang/srt/dllm/mixin/req.py class DllmReqPhase (line 12) | class DllmReqPhase(str, enum.Enum): class ReqDllmMixin (line 19) | class ReqDllmMixin: method init_diffusion_llm (line 20) | def init_diffusion_llm(self: Req, dllm_config: DllmConfig): method is_dllm (line 31) | def is_dllm(self: Req) -> bool: method is_dllm_prefill (line 34) | def is_dllm_prefill(self: Req) -> bool: method determine_dllm_phase (line 40) | def determine_dllm_phase(self: Req): method _init_fill_ids_for_dllm (line 56) | def _init_fill_ids_for_dllm(self: Req): method _update_block_offset_for_dllm (line 68) | def _update_block_offset_for_dllm(self): FILE: python/sglang/srt/dllm/mixin/scheduler.py class SchedulerDllmMixin (line 20) | class SchedulerDllmMixin: method init_diffusion_llm (line 21) | def init_diffusion_llm(self: Scheduler): method get_new_batch_dllm (line 29) | def get_new_batch_dllm(self: Scheduler) -> Optional[ScheduleBatch]: method process_batch_result_dllm (line 63) | def process_batch_result_dllm( method _fetch_waiting_reqs (line 102) | def _fetch_waiting_reqs(self: Scheduler): method _should_skip_prefill (line 114) | def _should_skip_prefill(self: Scheduler) -> bool: method _create_dllm_prefill_adder (line 132) | def _create_dllm_prefill_adder(self: Scheduler, running_bs: int) -> Pr... method _process_dllm_batches (line 148) | def _process_dllm_batches(self: Scheduler, adder: PrefillAdder) -> For... method _process_batch_by_phase (line 173) | def _process_batch_by_phase( method _update_state_for_batch (line 191) | def _update_state_for_batch( method _create_dllm_batch (line 208) | def _create_dllm_batch( method process_dllm_incoming_reqs (line 235) | def process_dllm_incoming_reqs( method process_dllm_staging_reqs (line 269) | def process_dllm_staging_reqs( class DllmManager (line 281) | class DllmManager: method __init__ (line 290) | def __init__(self, dllm_config: Optional[DllmConfig] = None): method get_prefill_requests (line 298) | def get_prefill_requests(self) -> List[Req]: method get_decode_requests (line 302) | def get_decode_requests(self) -> List[Req]: method add_waiting_reqs (line 306) | def add_waiting_reqs(self, reqs: Union[Req, List[Req]]) -> None: method add_staging_reqs (line 318) | def add_staging_reqs(self, reqs: Union[Req, List[Req]]) -> None: method _has_duplicate_reqs (line 323) | def _has_duplicate_reqs(self, reqs: List[Req]) -> bool: method any_staging_reqs (line 328) | def any_staging_reqs(self) -> bool: method is_empty (line 332) | def is_empty(self) -> bool: method increment_chunked_count (line 338) | def increment_chunked_count(self) -> None: method filter_finished_reqs (line 343) | def filter_finished_reqs(self) -> None: method init_next_round (line 348) | def init_next_round(self) -> None: FILE: python/sglang/srt/elastic_ep/elastic_ep.py class ElasticEPState (line 13) | class ElasticEPState: method is_active_equal_last (line 18) | def is_active_equal_last(self) -> bool: method sync_active_to_cpu (line 21) | def sync_active_to_cpu(self): method snapshot_active_to_last (line 25) | def snapshot_active_to_last(self): class ElasticEPStateManager (line 30) | class ElasticEPStateManager: method instance (line 34) | def instance(cls) -> ElasticEPState: method init (line 38) | def init(cls, server_args: ServerArgs): method _select_device (line 47) | def _select_device() -> torch.device: method _build_state (line 56) | def _build_state( method healthy_rank_state (line 67) | def healthy_rank_state( FILE: python/sglang/srt/elastic_ep/expert_backup_client.py function extract_layer_and_expert_id (line 23) | def extract_layer_and_expert_id(param_name): class ExpertBackupClient (line 31) | class ExpertBackupClient: method __init__ (line 32) | def __init__(self, server_args: ServerArgs, model_runner): method _receive_loop (line 73) | def _receive_loop(self): method start_transfer_client (line 85) | def start_transfer_client(self): method update_weights (line 101) | def update_weights(self, weight_name_filter=None): FILE: python/sglang/srt/elastic_ep/expert_backup_manager.py function extract_expert_id (line 26) | def extract_expert_id(param_name): class ExpertBackupManager (line 34) | class ExpertBackupManager: method __init__ (line 35) | def __init__(self, server_args: ServerArgs, port_args: PortArgs): method backup_weights_from_disk (line 80) | def backup_weights_from_disk(self): method start_transfer_server (line 143) | def start_transfer_server(self): function run_expert_backup_manager_process (line 158) | def run_expert_backup_manager_process( function run_expert_backup_manager (line 177) | def run_expert_backup_manager( FILE: python/sglang/srt/entrypoints/EngineBase.py class EngineBase (line 7) | class EngineBase(ABC): method generate (line 14) | def generate( method flush_cache (line 41) | def flush_cache(self): method update_weights_from_tensor (line 46) | def update_weights_from_tensor( method load_lora_adapter (line 55) | def load_lora_adapter(self, lora_name: str, lora_path: str): method unload_lora_adapter (line 59) | def unload_lora_adapter(self, lora_name: str): method release_memory_occupation (line 64) | def release_memory_occupation(self): method resume_memory_occupation (line 69) | def resume_memory_occupation(self): method shutdown (line 74) | def shutdown(self): FILE: python/sglang/srt/entrypoints/anthropic/protocol.py class AnthropicError (line 9) | class AnthropicError(BaseModel): class AnthropicErrorResponse (line 16) | class AnthropicErrorResponse(BaseModel): class AnthropicUsage (line 23) | class AnthropicUsage(BaseModel): class AnthropicContentBlock (line 32) | class AnthropicContentBlock(BaseModel): class AnthropicMessage (line 53) | class AnthropicMessage(BaseModel): class AnthropicTool (line 60) | class AnthropicTool(BaseModel): method validate_input_schema (line 69) | def validate_input_schema(cls, v): class AnthropicToolChoice (line 77) | class AnthropicToolChoice(BaseModel): class AnthropicCountTokensRequest (line 84) | class AnthropicCountTokensRequest(BaseModel): class AnthropicCountTokensResponse (line 94) | class AnthropicCountTokensResponse(BaseModel): class AnthropicMessagesRequest (line 100) | class AnthropicMessagesRequest(BaseModel): method validate_model (line 118) | def validate_model(cls, v): method validate_max_tokens (line 125) | def validate_max_tokens(cls, v): class AnthropicDelta (line 131) | class AnthropicDelta(BaseModel): class AnthropicStreamEvent (line 145) | class AnthropicStreamEvent(BaseModel): class AnthropicMessagesResponse (line 166) | class AnthropicMessagesResponse(BaseModel): FILE: python/sglang/srt/entrypoints/anthropic/serving.py function _wrap_sse_event (line 53) | def _wrap_sse_event(data: str, event_type: str) -> str: class AnthropicServing (line 58) | class AnthropicServing: method __init__ (line 65) | def __init__(self, openai_serving_chat: OpenAIServingChat): method handle_messages (line 68) | async def handle_messages( method _convert_to_chat_completion_request (line 89) | def _convert_to_chat_completion_request( method _handle_non_streaming (line 309) | async def _handle_non_streaming( method _handle_streaming (line 365) | async def _handle_streaming( method _generate_anthropic_stream (line 415) | async def _generate_anthropic_stream( method _convert_response (line 640) | def _convert_response( method _error_response (line 692) | def _error_response( method handle_count_tokens (line 707) | async def handle_count_tokens( FILE: python/sglang/srt/entrypoints/context.py class ConversationContext (line 26) | class ConversationContext(ABC): method append_output (line 29) | def append_output(self, output) -> None: method call_tool (line 33) | async def call_tool(self) -> list[Message]: method need_builtin_tool_call (line 37) | def need_builtin_tool_call(self) -> bool: method render_for_completion (line 41) | def render_for_completion(self) -> list[int]: class SimpleContext (line 45) | class SimpleContext(ConversationContext): method __init__ (line 47) | def __init__(self): method append_output (line 50) | def append_output(self, output) -> None: method need_builtin_tool_call (line 53) | def need_builtin_tool_call(self) -> bool: method call_tool (line 56) | async def call_tool(self) -> list[Message]: method render_for_completion (line 59) | def render_for_completion(self) -> list[int]: class HarmonyContext (line 63) | class HarmonyContext(ConversationContext): method __init__ (line 65) | def __init__( method append_output (line 83) | def append_output(self, output) -> None: method messages (line 107) | def messages(self) -> list: method need_builtin_tool_call (line 110) | def need_builtin_tool_call(self) -> bool: method call_tool (line 119) | async def call_tool(self) -> list[Message]: method render_for_completion (line 135) | def render_for_completion(self) -> list[int]: method call_search_tool (line 138) | async def call_search_tool( method call_python_tool (line 151) | async def call_python_tool( class StreamingHarmonyContext (line 175) | class StreamingHarmonyContext(HarmonyContext): method __init__ (line 177) | def __init__(self, *args, **kwargs): method messages (line 187) | def messages(self) -> list: method append_output (line 190) | def append_output(self, output) -> None: method is_expecting_start (line 229) | def is_expecting_start(self) -> bool: method is_assistant_action_turn (line 232) | def is_assistant_action_turn(self) -> bool: method render_for_completion (line 235) | def render_for_completion(self) -> list[int]: FILE: python/sglang/srt/entrypoints/engine.py class SchedulerInitResult (line 110) | class SchedulerInitResult: function init_tokenizer_manager (line 118) | def init_tokenizer_manager( class Engine (line 139) | class Engine(EngineBase): method __init__ (line 160) | def __init__(self, **kwargs): method _resolve_routed_dp_rank (line 229) | def _resolve_routed_dp_rank( method generate (line 260) | def generate( method async_generate (line 350) | async def async_generate( method encode (line 430) | def encode( method async_encode (line 459) | async def async_encode( method rerank (line 489) | def rerank( method _launch_scheduler_processes (line 503) | def _launch_scheduler_processes( method _launch_subprocesses (line 602) | def _launch_subprocesses( method shutdown (line 697) | def shutdown(self): method __enter__ (line 701) | def __enter__(self): method __exit__ (line 704) | def __exit__(self, exc_type, exc_value, traceback): method flush_cache (line 708) | def flush_cache(self): method open_session (line 711) | def open_session( method close_session (line 741) | def close_session(self, session_id: str) -> None: method start_profile (line 750) | def start_profile(self, **kwargs): method stop_profile (line 753) | def stop_profile(self): method start_expert_distribution_record (line 756) | def start_expert_distribution_record(self): method stop_expert_distribution_record (line 761) | def stop_expert_distribution_record(self): method dump_expert_distribution_record (line 766) | def dump_expert_distribution_record(self): method get_server_info (line 771) | def get_server_info(self): method init_weights_update_group (line 782) | def init_weights_update_group( method destroy_weights_update_group (line 804) | def destroy_weights_update_group( method update_weights_from_distributed (line 816) | def update_weights_from_distributed( method update_weights_from_tensor (line 838) | def update_weights_from_tensor( method update_weights_from_disk (line 862) | def update_weights_from_disk( method update_weights_from_ipc (line 882) | def update_weights_from_ipc( method get_weights_by_name (line 896) | def get_weights_by_name(self, name: str, truncate_size: int = 100): method load_lora_adapter_from_tensors (line 903) | def load_lora_adapter_from_tensors( method load_lora_adapter (line 926) | def load_lora_adapter(self, lora_name: str, lora_path: str, pinned: bo... method unload_lora_adapter (line 939) | def unload_lora_adapter(self, lora_name: str): method async_load_lora_adapter (line 948) | async def async_load_lora_adapter( method async_unload_lora_adapter (line 965) | async def async_unload_lora_adapter(self, lora_name: str): method release_memory_occupation (line 976) | def release_memory_occupation(self, tags: Optional[List[str]] = None): method resume_memory_occupation (line 982) | def resume_memory_occupation(self, tags: Optional[List[str]] = None): method freeze_gc (line 988) | def freeze_gc(self): method collective_rpc (line 1007) | def collective_rpc(self, method: str, **kwargs): method save_remote_model (line 1014) | def save_remote_model(self, **kwargs): method save_sharded_model (line 1017) | def save_sharded_model(self, **kwargs): method score (line 1020) | def score( method async_score (line 1071) | async def async_score( function _set_envs_and_config (line 1094) | def _set_envs_and_config(server_args: ServerArgs): function _wait_for_scheduler_ready (line 1182) | def _wait_for_scheduler_ready( function _calculate_rank_ranges (line 1207) | def _calculate_rank_ranges( function _compute_parallelism_ranks (line 1242) | def _compute_parallelism_ranks( FILE: python/sglang/srt/entrypoints/grpc_server.py function serve_grpc (line 6) | async def serve_grpc(server_args, model_info=None): FILE: python/sglang/srt/entrypoints/harmony_utils.py function get_encoding (line 54) | def get_encoding(): function get_system_message (line 61) | def get_system_message( function get_developer_message (line 86) | def get_developer_message( function get_user_message (line 118) | def get_user_message(content: str) -> Message: function parse_response_input (line 122) | def parse_response_input( function parse_response_output (line 174) | def parse_response_output(output: ResponseOutputItem) -> Message: function parse_chat_input (line 190) | def parse_chat_input(chat_msg) -> Message: function render_for_completion (line 202) | def render_for_completion(messages: list[Message]) -> list[int]: function get_stop_tokens_for_assistant_actions (line 210) | def get_stop_tokens_for_assistant_actions() -> list[int]: function get_streamable_parser_for_assistant (line 214) | def get_streamable_parser_for_assistant() -> StreamableParser: function parse_output_message (line 218) | def parse_output_message(message: Message): function parse_remaining_state (line 324) | def parse_remaining_state(parser: StreamableParser): function parse_output_into_messages (line 364) | def parse_output_into_messages(token_ids: Iterable[int]): FILE: python/sglang/srt/entrypoints/http_server.py class _GlobalState (line 193) | class _GlobalState: function set_global_state (line 211) | def set_global_state(global_state: _GlobalState): function get_global_state (line 216) | def get_global_state() -> _GlobalState: function init_multi_tokenizer (line 220) | async def init_multi_tokenizer() -> ServerArgs: function lifespan (line 272) | async def lifespan(fast_api_app: FastAPI): function validation_exception_handler (line 405) | async def validation_exception_handler(request: Request, exc: HTTPExcept... function validation_exception_handler (line 434) | async def validation_exception_handler(request: Request, exc: RequestVal... function validate_json_request (line 469) | async def validate_json_request(raw_request: Request): function health_generate (line 490) | async def health_generate(request: Request) -> Response: function get_model_info (line 568) | async def get_model_info(): function model_info (line 578) | async def model_info(): function weight_version (line 599) | async def weight_version(): function get_server_info (line 608) | async def get_server_info(): function server_info (line 618) | async def server_info(): function get_load (line 638) | async def get_load(): function set_internal_state (line 651) | async def set_internal_state(obj: SetInternalStateReq, request: Request): function _dumper_control_handler (line 661) | async def _dumper_control_handler(method: str, request: Request): function generate_request (line 678) | async def generate_request(obj: GenerateReqInput, request: Request): function encode_request (line 711) | async def encode_request(obj: EmbeddingReqInput, request: Request): function classify_request (line 723) | async def classify_request(obj: EmbeddingReqInput, request: Request): function flush_cache (line 736) | async def flush_cache(): function clear_hicache_storage_backend_deprecated (line 748) | async def clear_hicache_storage_backend_deprecated(): function clear_hicache_storage_backend (line 764) | async def clear_hicache_storage_backend(): function attach_hicache_storage_backend (line 784) | async def attach_hicache_storage_backend(obj: AttachHiCacheStorageReqInp... function detach_hicache_storage_backend (line 816) | async def detach_hicache_storage_backend(): function hicache_storage_backend_status (line 843) | async def hicache_storage_backend_status(): function pin_prefix (line 858) | async def pin_prefix(obj: PinPrefixReqInput): function start_profile_async (line 877) | async def start_profile_async(obj: Optional[ProfileReqInput] = None): function stop_profile_async (line 902) | async def stop_profile_async(): function set_trace_level (line 912) | def set_trace_level(level: int = Query(..., ge=0)): function freeze_gc_async (line 923) | async def freeze_gc_async(): function start_expert_distribution_record_async (line 936) | async def start_expert_distribution_record_async(): function stop_expert_distribution_record_async (line 947) | async def stop_expert_distribution_record_async(): function dump_expert_distribution_record_async (line 958) | async def dump_expert_distribution_record_async(): function update_weights_from_disk (line 969) | async def update_weights_from_disk(obj: UpdateWeightFromDiskReqInput, re... function init_weights_send_group_for_remote_instance (line 994) | async def init_weights_send_group_for_remote_instance( function send_weights_to_remote_instance (line 1011) | async def send_weights_to_remote_instance( function get_remote_instance_transfer_engine_info (line 1028) | async def get_remote_instance_transfer_engine_info(rank: int = None): function init_weights_update_group (line 1053) | async def init_weights_update_group( function destroy_weights_update_group (line 1069) | async def destroy_weights_update_group( function update_weights_from_tensor (line 1084) | async def update_weights_from_tensor( function update_weights_from_distributed (line 1106) | async def update_weights_from_distributed( function update_weights_from_ipc (line 1125) | async def update_weights_from_ipc(obj: UpdateWeightsFromIPCReqInput, req... function update_weight_version (line 1142) | async def update_weight_version(obj: UpdateWeightVersionReqInput, reques... function get_weights_by_name (line 1173) | async def get_weights_by_name(obj: GetWeightsByNameReqInput, request: Re... function release_memory_occupation (line 1187) | async def release_memory_occupation( function resume_memory_occupation (line 1199) | async def resume_memory_occupation( function check_weights (line 1211) | async def check_weights(obj: CheckWeightsReqInput, request: Request): function slow_down (line 1221) | async def slow_down(obj: SlowDownReqInput, request: Request): function load_lora_adapter (line 1235) | async def load_lora_adapter(obj: LoadLoRAAdapterReqInput, request: Reque... function load_lora_adapter_from_tensors (line 1252) | async def load_lora_adapter_from_tensors( function unload_lora_adapter (line 1268) | async def unload_lora_adapter(obj: UnloadLoRAAdapterReqInput, request: R... function open_session (line 1285) | async def open_session(obj: OpenSessionReqInput, request: Request): function close_session (line 1299) | async def close_session(obj: CloseSessionReqInput, request: Request): function configure_logging (line 1310) | async def configure_logging(obj: ConfigureLoggingReq, request: Request): function abort_request (line 1318) | async def abort_request(obj: AbortReq, request: Request): function parse_function_call_request (line 1330) | async def parse_function_call_request(obj: ParseFunctionCallReq, request... function separate_reasoning_request (line 1352) | async def separate_reasoning_request(obj: SeparateReasoningReqInput, req... function pause_generation (line 1373) | async def pause_generation(obj: PauseGenerationReqInput, request: Request): function continue_generation (line 1384) | async def continue_generation(obj: ContinueGenerationReqInput, request: ... function openai_v1_completions (line 1397) | async def openai_v1_completions(request: CompletionRequest, raw_request:... function openai_v1_chat_completions (line 1405) | async def openai_v1_chat_completions( function openai_v1_embeddings (line 1419) | async def openai_v1_embeddings(request: EmbeddingRequest, raw_request: R... function openai_v1_classify (line 1431) | async def openai_v1_classify(request: ClassifyRequest, raw_request: Requ... function openai_v1_tokenize (line 1449) | async def openai_v1_tokenize(request: TokenizeRequest, raw_request: Requ... function openai_v1_detokenize (line 1467) | async def openai_v1_detokenize(request: DetokenizeRequest, raw_request: ... function openai_v1_audio_transcriptions (line 1475) | async def openai_v1_audio_transcriptions( function available_models (line 1507) | async def available_models(): function retrieve_model (line 1539) | async def retrieve_model(model: str): function v1_score_request (line 1564) | async def v1_score_request(request: ScoringRequest, raw_request: Request): function v1_responses_request (line 1572) | async def v1_responses_request(request: dict, raw_request: Request): function v1_retrieve_responses (line 1592) | async def v1_retrieve_responses(response_id: str, raw_request: Request): function v1_cancel_responses (line 1600) | async def v1_cancel_responses(response_id: str, raw_request: Request): function v1_rerank_request (line 1610) | async def v1_rerank_request(request: V1RerankReqInput, raw_request: Requ... function ollama_root (line 1624) | async def ollama_root(): function sglang_root (line 1632) | async def sglang_root(): function ollama_chat (line 1638) | async def ollama_chat(request: OllamaChatRequest, raw_request: Request): function ollama_generate (line 1644) | async def ollama_generate(request: OllamaGenerateRequest, raw_request: R... function ollama_tags (line 1652) | async def ollama_tags(raw_request: Request): function ollama_show (line 1658) | async def ollama_show(request: OllamaShowRequest, raw_request: Request): function anthropic_v1_messages (line 1667) | async def anthropic_v1_messages( function anthropic_v1_count_tokens (line 1677) | async def anthropic_v1_count_tokens( function sagemaker_health (line 1688) | async def sagemaker_health() -> Response: function sagemaker_chat_completions (line 1694) | async def sagemaker_chat_completions( function vertex_generate (line 1705) | async def vertex_generate(vertex_req: VertexGenerateReqInput, raw_reques... function _create_error_response (line 1731) | def _create_error_response(e): function _admin_api_key_missing_response (line 1744) | def _admin_api_key_missing_response( function _execute_server_warmup (line 1762) | def _execute_server_warmup(server_args: ServerArgs): function _wait_and_warmup (line 1921) | def _wait_and_warmup( function _wait_weights_ready (line 1949) | def _wait_weights_ready(): function _setup_and_run_http_server (line 1970) | def _setup_and_run_http_server( function launch_server (line 2144) | def launch_server( FILE: python/sglang/srt/entrypoints/http_server_engine.py function launch_server_process (line 14) | def launch_server_process(server_args: ServerArgs) -> multiprocessing.Pr... class HttpServerEngineAdapter (line 49) | class HttpServerEngineAdapter(EngineBase): method __init__ (line 56) | def __init__(self, **kwargs): method _make_request (line 63) | def _make_request(self, endpoint: str, payload: Optional[dict] = None): method update_weights_from_tensor (line 78) | def update_weights_from_tensor( method shutdown (line 102) | def shutdown(self): method generate (line 105) | def generate( method release_memory_occupation (line 137) | def release_memory_occupation(self): method resume_memory_occupation (line 140) | def resume_memory_occupation(self): method flush_cache (line 143) | def flush_cache(self): FILE: python/sglang/srt/entrypoints/ollama/protocol.py class OllamaMessage (line 13) | class OllamaMessage(BaseModel): class OllamaChatRequest (line 21) | class OllamaChatRequest(BaseModel): class OllamaChatResponse (line 33) | class OllamaChatResponse(BaseModel): class OllamaChatStreamResponse (line 49) | class OllamaChatStreamResponse(BaseModel): class OllamaGenerateRequest (line 59) | class OllamaGenerateRequest(BaseModel): class OllamaGenerateResponse (line 77) | class OllamaGenerateResponse(BaseModel): class OllamaGenerateStreamResponse (line 94) | class OllamaGenerateStreamResponse(BaseModel): class OllamaModelInfo (line 104) | class OllamaModelInfo(BaseModel): class OllamaTagsResponse (line 115) | class OllamaTagsResponse(BaseModel): class OllamaShowRequest (line 121) | class OllamaShowRequest(BaseModel): class OllamaShowResponse (line 127) | class OllamaShowResponse(BaseModel): FILE: python/sglang/srt/entrypoints/ollama/serving.py class OllamaServing (line 31) | class OllamaServing: method __init__ (line 34) | def __init__(self, tokenizer_manager): method _get_timestamp (line 37) | def _get_timestamp(self) -> str: method _convert_options_to_sampling_params (line 41) | def _convert_options_to_sampling_params(self, options: dict = None) ->... method handle_chat (line 68) | async def handle_chat( method _generate_chat_response (line 105) | async def _generate_chat_response( method _stream_chat_response (line 132) | async def _stream_chat_response( method handle_generate (line 173) | async def handle_generate( method _generate_generate_response (line 223) | async def _generate_generate_response( method _stream_generate_response (line 249) | async def _stream_generate_response( method get_tags (line 289) | def get_tags(self) -> OllamaTagsResponse: method get_show (line 310) | def get_show(self, model: str) -> OllamaShowResponse: FILE: python/sglang/srt/entrypoints/ollama/smart_router.py class SmartRouter (line 23) | class SmartRouter: method __init__ (line 39) | def __init__( method _classify_with_llm (line 69) | def _classify_with_llm( method should_use_remote (line 104) | def should_use_remote(self, prompt: str, verbose: bool = False) -> tup... method chat (line 117) | def chat( method chat_stream (line 197) | def chat_stream( function main (line 244) | def main(): FILE: python/sglang/srt/entrypoints/openai/encoding_dsv32.py class DS32EncodingError (line 8) | class DS32EncodingError(Exception): function to_json (line 62) | def to_json(value: Any) -> str: function tools_from_openai_format (line 69) | def tools_from_openai_format(tools): function tool_calls_from_openai_format (line 73) | def tool_calls_from_openai_format(tool_calls): function tool_calls_to_openai_format (line 83) | def tool_calls_to_openai_format(tool_calls): function encode_arguments_to_dsml (line 96) | def encode_arguments_to_dsml(tool_call: Dict[str, str]) -> str: function decode_dsml_to_arguments (line 115) | def decode_dsml_to_arguments( function render_tools (line 133) | def render_tools(tools: List[Dict[str, Union[str, Dict[str, Any]]]]) -> ... function find_last_user_index (line 144) | def find_last_user_index(messages: List[Dict[str, Any]]) -> int: function render_message (line 153) | def render_message( function drop_thinking_messages (line 289) | def drop_thinking_messages( function encode_messages (line 310) | def encode_messages( function _read_until_stop (line 333) | def _read_until_stop( function parse_tool_calls (line 353) | def parse_tool_calls(index: int, text: str): function parse_message_from_completion_text (line 414) | def parse_message_from_completion_text(text: str, thinking_mode: str): FILE: python/sglang/srt/entrypoints/openai/protocol.py class ModelCard (line 53) | class ModelCard(BaseModel): class ModelList (line 65) | class ModelList(BaseModel): class ErrorResponse (line 72) | class ErrorResponse(BaseModel): class LogProbs (line 80) | class LogProbs(BaseModel): class TopLogprob (line 87) | class TopLogprob(BaseModel): class ChatCompletionTokenLogprob (line 93) | class ChatCompletionTokenLogprob(BaseModel): class ChoiceLogprobs (line 100) | class ChoiceLogprobs(BaseModel): class CachedTokensDetails (line 105) | class CachedTokensDetails(BaseModel): method _serialize (line 115) | def _serialize(self, handler): class PromptTokensDetails (line 125) | class PromptTokensDetails(BaseModel): class UsageInfo (line 131) | class UsageInfo(BaseModel): class StreamOptions (line 140) | class StreamOptions(BaseModel): class JsonSchemaResponseFormat (line 145) | class JsonSchemaResponseFormat(BaseModel): class ResponseFormat (line 153) | class ResponseFormat(BaseModel): class StructuresResponseFormat (line 158) | class StructuresResponseFormat(BaseModel): class LegacyStructuralTagResponseFormat (line 165) | class LegacyStructuralTagResponseFormat(BaseModel): class FileRequest (line 181) | class FileRequest(BaseModel): class FileResponse (line 189) | class FileResponse(BaseModel): class FileDeleteResponse (line 198) | class FileDeleteResponse(BaseModel): class BatchRequest (line 204) | class BatchRequest(BaseModel): class BatchResponse (line 213) | class BatchResponse(BaseModel): function _migrate_deprecated_dp_rank (line 236) | def _migrate_deprecated_dp_rank(values: dict) -> dict: class CompletionRequest (line 250) | class CompletionRequest(BaseModel): method _handle_deprecated_dp_rank (line 323) | def _handle_deprecated_dp_rank(cls, values): method validate_max_tokens_positive (line 328) | def validate_max_tokens_positive(cls, v): class SglExt (line 334) | class SglExt(BaseModel): method _serialize (line 345) | def _serialize(self, handler): class CompletionResponseChoice (line 351) | class CompletionResponseChoice(BaseModel): method _serialize (line 360) | def _serialize(self, handler): class CompletionResponse (line 367) | class CompletionResponse(BaseModel): method _serialize (line 378) | def _serialize(self, handler): class CompletionResponseStreamChoice (line 385) | class CompletionResponseStreamChoice(BaseModel): method _serialize (line 394) | def _serialize(self, handler): class CompletionStreamResponse (line 401) | class CompletionStreamResponse(BaseModel): method _serialize (line 411) | def _serialize(self, handler): class ChatCompletionMessageContentTextPart (line 418) | class ChatCompletionMessageContentTextPart(BaseModel): class ChatCompletionMessageContentImageURL (line 423) | class ChatCompletionMessageContentImageURL(BaseModel): class ChatCompletionMessageContentVideoURL (line 430) | class ChatCompletionMessageContentVideoURL(BaseModel): class ChatCompletionMessageContentAudioURL (line 436) | class ChatCompletionMessageContentAudioURL(BaseModel): class ChatCompletionMessageContentImagePart (line 440) | class ChatCompletionMessageContentImagePart(BaseModel): class ChatCompletionMessageContentVideoPart (line 446) | class ChatCompletionMessageContentVideoPart(BaseModel): class ChatCompletionMessageContentAudioPart (line 451) | class ChatCompletionMessageContentAudioPart(BaseModel): class FunctionResponse (line 473) | class FunctionResponse(BaseModel): class ToolCall (line 480) | class ToolCall(BaseModel): class ChatCompletionMessageGenericParam (line 489) | class ChatCompletionMessageGenericParam(BaseModel): method _normalize_role (line 502) | def _normalize_role(cls, v): class ChatCompletionMessageUserParam (line 513) | class ChatCompletionMessageUserParam(BaseModel): class Function (line 523) | class Function(BaseModel): class Tool (line 532) | class Tool(BaseModel): class ToolChoiceFuncName (line 539) | class ToolChoiceFuncName(BaseModel): class ToolChoice (line 545) | class ToolChoice(BaseModel): class ChatCompletionRequest (line 552) | class ChatCompletionRequest(BaseModel): method _handle_deprecated_dp_rank (line 659) | def _handle_deprecated_dp_rank(cls, values): method set_tool_choice_default (line 664) | def set_tool_choice_default(cls, values): method normalize_reasoning_inputs (line 674) | def normalize_reasoning_inputs(cls, values: Dict): method set_json_schema (line 711) | def set_json_schema(cls, values): method to_sampling_params (line 741) | def to_sampling_params( class ChatMessage (line 829) | class ChatMessage(BaseModel): class ChatCompletionResponseChoice (line 836) | class ChatCompletionResponseChoice(BaseModel): method _serialize (line 849) | def _serialize(self, handler): class ChatCompletionResponse (line 856) | class ChatCompletionResponse(BaseModel): method _serialize (line 867) | def _serialize(self, handler): class DeltaMessage (line 874) | class DeltaMessage(BaseModel): method _serialize (line 882) | def _serialize(self, handler): class ChatCompletionResponseStreamChoice (line 889) | class ChatCompletionResponseStreamChoice(BaseModel): class ChatCompletionStreamResponse (line 901) | class ChatCompletionStreamResponse(BaseModel): method _serialize (line 911) | def _serialize(self, handler): class MultimodalEmbeddingInput (line 918) | class MultimodalEmbeddingInput(BaseModel): class EmbeddingRequest (line 929) | class EmbeddingRequest(BaseModel): class EmbeddingObject (line 946) | class EmbeddingObject(BaseModel): class ClassifyRequest (line 955) | class ClassifyRequest(BaseModel): class ClassifyData (line 967) | class ClassifyData(BaseModel): class ClassifyResponse (line 974) | class ClassifyResponse(BaseModel): class EmbeddingResponse (line 983) | class EmbeddingResponse(BaseModel): class ScoringRequest (line 990) | class ScoringRequest(BaseModel): class ScoringResponse (line 1005) | class ScoringResponse(BaseModel): class V1RerankReqInput (line 1014) | class V1RerankReqInput(BaseModel): method validate_top_n (line 1041) | def validate_top_n(cls, v): method is_multimodal (line 1046) | def is_multimodal(self) -> bool: class RerankResponse (line 1056) | class RerankResponse(BaseModel): method _serialize (line 1063) | def _serialize(self, handler): class TokenizeRequest (line 1071) | class TokenizeRequest(BaseModel): class TokenizeResponse (line 1082) | class TokenizeResponse(BaseModel): class DetokenizeRequest (line 1090) | class DetokenizeRequest(BaseModel): class DetokenizeResponse (line 1101) | class DetokenizeResponse(BaseModel): class ResponseReasoningParam (line 1120) | class ResponseReasoningParam(BaseModel): class ResponseTool (line 1129) | class ResponseTool(BaseModel): class ResponsesRequest (line 1144) | class ResponsesRequest(BaseModel): method to_sampling_params (line 1212) | def to_sampling_params( class PromptTokenUsageInfo (line 1259) | class PromptTokenUsageInfo(BaseModel): class ResponsesResponse (line 1265) | class ResponsesResponse(BaseModel): method from_request (line 1303) | def from_request( class RequestResponseMetadata (line 1381) | class RequestResponseMetadata(BaseModel): class MessageProcessingResult (line 1389) | class MessageProcessingResult: class ToolCallProcessingResult (line 1416) | class ToolCallProcessingResult(NamedTuple): class ResponseReasoningTextContent (line 1426) | class ResponseReasoningTextContent(BaseModel): class TranscriptionRequest (line 1439) | class TranscriptionRequest(BaseModel): class TranscriptionUsage (line 1452) | class TranscriptionUsage(BaseModel): class TranscriptionResponse (line 1459) | class TranscriptionResponse(BaseModel): class TranscriptionStreamChoice (line 1466) | class TranscriptionStreamChoice(BaseModel): class TranscriptionStreamResponse (line 1473) | class TranscriptionStreamResponse(BaseModel): FILE: python/sglang/srt/entrypoints/openai/serving_base.py class OpenAIServingBase (line 26) | class OpenAIServingBase(ABC): method __init__ (line 29) | def __init__(self, tokenizer_manager: TokenizerManager): method _parse_model_parameter (line 40) | def _parse_model_parameter(self, model: str) -> Tuple[str, Optional[st... method _resolve_lora_path (line 55) | def _resolve_lora_path( method handle_request (line 73) | async def handle_request( method _request_id_prefix (line 136) | def _request_id_prefix(self) -> str: method _generate_request_id_base (line 140) | def _generate_request_id_base(self, request: OpenAIServingRequest) -> ... method _compute_extra_key (line 151) | def _compute_extra_key(self, request: OpenAIServingRequest) -> Optiona... method _convert_to_internal_request (line 165) | def _convert_to_internal_request( method _handle_streaming_request (line 173) | async def _handle_streaming_request( method _handle_non_streaming_request (line 189) | async def _handle_non_streaming_request( method _validate_request (line 205) | def _validate_request(self, _: OpenAIServingRequest) -> Optional[str]: method create_error_response (line 209) | def create_error_response( method create_streaming_error_response (line 227) | def create_streaming_error_response( method extract_custom_labels (line 243) | def extract_custom_labels(self, raw_request): method extract_routing_key (line 272) | def extract_routing_key(self, raw_request): method extract_routed_dp_rank_from_header (line 277) | def extract_routed_dp_rank_from_header( FILE: python/sglang/srt/entrypoints/openai/serving_chat.py function _extract_max_dynamic_patch (line 62) | def _extract_max_dynamic_patch(request: ChatCompletionRequest): class OpenAIServingChat (line 88) | class OpenAIServingChat(OpenAIServingBase): method __init__ (line 93) | def __init__( method _handle_last_assistant_message (line 125) | def _handle_last_assistant_message( method _append_assistant_prefix_to_prompt_ids (line 163) | def _append_assistant_prefix_to_prompt_ids( method _use_dpsk_v32_encoding (line 181) | def _use_dpsk_v32_encoding(self) -> bool: method _request_id_prefix (line 190) | def _request_id_prefix(self) -> str: method _validate_request (line 193) | def _validate_request(self, request: ChatCompletionRequest) -> Optiona... method _convert_to_internal_request (line 241) | def _convert_to_internal_request( method _process_messages (line 328) | def _process_messages( method _apply_jinja_template (line 375) | def _apply_jinja_template( method _apply_conversation_template (line 535) | def _apply_conversation_template( method _handle_streaming_request (line 600) | async def _handle_streaming_request( method _generate_chat_stream (line 613) | async def _generate_chat_stream( method _handle_non_streaming_request (line 890) | async def _handle_non_streaming_request( method _build_chat_response (line 915) | def _build_chat_response( method _process_logprobs_tokens (line 1025) | def _process_logprobs_tokens( method _process_response_logprobs (line 1067) | def _process_response_logprobs(self, ret_item: Dict[str, Any]) -> Choi... method _process_tool_call_id (line 1077) | def _process_tool_call_id( method _process_tool_calls (line 1097) | def _process_tool_calls( method _process_streaming_logprobs (line 1176) | def _process_streaming_logprobs( method _process_reasoning_stream (line 1192) | def _process_reasoning_stream( method _get_history_tool_calls_cnt (line 1215) | def _get_history_tool_calls_cnt(self, request: ChatCompletionRequest) ... method _patch_mistral_skip_special_tokens (line 1235) | def _patch_mistral_skip_special_tokens( method _get_reasoning_from_request (line 1247) | def _get_reasoning_from_request(self, request: ChatCompletionRequest) ... method _process_tool_call_stream (line 1280) | async def _process_tool_call_stream( method _check_for_unstreamed_tool_args (line 1386) | def _check_for_unstreamed_tool_args( FILE: python/sglang/srt/entrypoints/openai/serving_classify.py class OpenAIServingClassify (line 28) | class OpenAIServingClassify(OpenAIServingBase): method __init__ (line 31) | def __init__( method _request_id_prefix (line 47) | def _request_id_prefix(self) -> str: method _convert_to_internal_request (line 50) | def _convert_to_internal_request( method _validate_request (line 79) | def _validate_request(self, request: ClassifyRequest) -> Optional[str]: method _get_id2label_mapping (line 111) | def _get_id2label_mapping(self) -> Optional[Dict[int, str]]: method _handle_non_streaming_request (line 129) | async def _handle_non_streaming_request( method _build_classify_response (line 151) | def _build_classify_response(self, ret: List[Dict[str, Any]]) -> Class... FILE: python/sglang/srt/entrypoints/openai/serving_completions.py class OpenAIServingCompletion (line 41) | class OpenAIServingCompletion(OpenAIServingBase): method __init__ (line 44) | def __init__( method _request_id_prefix (line 52) | def _request_id_prefix(self) -> str: method _validate_request (line 55) | def _validate_request(self, request: CompletionRequest) -> Optional[str]: method _convert_to_internal_request (line 63) | def _convert_to_internal_request( method _build_sampling_params (line 134) | def _build_sampling_params(self, request: CompletionRequest) -> Dict[s... method _handle_streaming_request (line 178) | async def _handle_streaming_request( method _generate_completion_stream (line 191) | async def _generate_completion_stream( method _handle_non_streaming_request (line 391) | async def _handle_non_streaming_request( method _build_completion_response (line 417) | def _build_completion_response( method _get_echo_text (line 510) | def _get_echo_text(self, request: CompletionRequest, index: int) -> str: method _prepare_echo_prompts (line 534) | def _prepare_echo_prompts(self, request: CompletionRequest) -> List[str]: FILE: python/sglang/srt/entrypoints/openai/serving_embedding.py class OpenAIServingEmbedding (line 25) | class OpenAIServingEmbedding(OpenAIServingBase): method __init__ (line 28) | def __init__( method _request_id_prefix (line 36) | def _request_id_prefix(self) -> str: method _validate_request (line 39) | def _validate_request(self, request: EmbeddingRequest) -> Optional[str]: method _convert_to_internal_request (line 74) | def _convert_to_internal_request( method _handle_non_streaming_request (line 143) | async def _handle_non_streaming_request( method _build_embedding_response (line 163) | def _build_embedding_response(self, ret: List[Dict[str, Any]]) -> Embe... FILE: python/sglang/srt/entrypoints/openai/serving_rerank.py function _get_yes_no_token_ids (line 22) | def _get_yes_no_token_ids(tokenizer) -> tuple[int, int]: function _is_qwen3_reranker_template (line 50) | def _is_qwen3_reranker_template(chat_template: str) -> bool: function _is_qwen3_vl_reranker_template (line 60) | def _is_qwen3_vl_reranker_template(chat_template: str) -> bool: function _is_qwen3_vl_model (line 78) | def _is_qwen3_vl_model(model_path: str) -> bool: function _detect_rerank_backend (line 86) | def _detect_rerank_backend( function _qwen3_rerank_score (line 112) | def _qwen3_rerank_score(p_yes: float, p_no: float) -> float: function _get_jinja_env (line 119) | def _get_jinja_env(): function _render_jinja_chat_template (line 135) | def _render_jinja_chat_template( function _render_vl_jinja_template (line 165) | def _render_vl_jinja_template( function _extract_text_from_content (line 189) | def _extract_text_from_content(content: RerankContent) -> str: class OpenAIServingRerank (line 202) | class OpenAIServingRerank(OpenAIServingBase): method __init__ (line 205) | def __init__(self, tokenizer_manager, template_manager=None): method _request_id_prefix (line 219) | def _request_id_prefix(self) -> str: method _validate_request (line 222) | def _validate_request(self, request: V1RerankReqInput) -> Optional[str]: method _convert_to_internal_request (line 242) | def _convert_to_internal_request( method _handle_non_streaming_request (line 278) | async def _handle_non_streaming_request( method _handle_rerank_paths (line 316) | async def _handle_rerank_paths( method _handle_text_reranker_request (line 351) | async def _handle_text_reranker_request( method _handle_vl_reranker_request (line 395) | async def _handle_vl_reranker_request( method _build_vl_reranker_content (line 461) | def _build_vl_reranker_content( method _content_to_template_list (line 483) | def _content_to_template_list( method _extract_score_from_logprobs (line 532) | def _extract_score_from_logprobs(self, ret: Dict[str, Any]) -> float: method _build_rerank_response (line 557) | def _build_rerank_response( FILE: python/sglang/srt/entrypoints/openai/serving_responses.py class OpenAIServingResponses (line 70) | class OpenAIServingResponses(OpenAIServingChat): method __init__ (line 73) | def __init__( method create_error_response (line 129) | def create_error_response( method create_streaming_error_response (line 144) | def create_streaming_error_response( method _request_id_prefix (line 161) | def _request_id_prefix(self) -> str: method create_responses (line 164) | async def create_responses( method _make_request (line 373) | async def _make_request( method _make_request_with_harmony (line 417) | def _make_request_with_harmony( method responses_full_generator (line 431) | async def responses_full_generator( method _make_response_output_items (line 527) | def _make_response_output_items( method _make_response_output_items_with_harmony (line 577) | def _make_response_output_items_with_harmony( method _construct_input_messages (line 591) | def _construct_input_messages( method _construct_input_messages_with_harmony (line 632) | def _construct_input_messages_with_harmony( method _run_background_request (line 707) | async def _run_background_request( method retrieve_responses (line 752) | async def retrieve_responses( method cancel_responses (line 766) | async def cancel_responses( method _make_invalid_id_error (line 799) | def _make_invalid_id_error(self, response_id: str): method _make_not_found_error (line 809) | def _make_not_found_error(self, response_id: str): method responses_stream_generator (line 817) | async def responses_stream_generator( method _generate_with_builtin_tools (line 1260) | async def _generate_with_builtin_tools( FILE: python/sglang/srt/entrypoints/openai/serving_score.py class OpenAIServingScore (line 17) | class OpenAIServingScore(OpenAIServingBase): method _request_id_prefix (line 23) | def _request_id_prefix(self) -> str: method _convert_to_internal_request (line 26) | def _convert_to_internal_request( method _handle_non_streaming_request (line 37) | async def _handle_non_streaming_request( FILE: python/sglang/srt/entrypoints/openai/serving_tokenize.py class OpenAIServingTokenize (line 19) | class OpenAIServingTokenize(OpenAIServingBase): method _request_id_prefix (line 22) | def _request_id_prefix(self) -> str: method _convert_to_internal_request (line 25) | def _convert_to_internal_request( method _handle_non_streaming_request (line 30) | async def _handle_non_streaming_request( class OpenAIServingDetokenize (line 73) | class OpenAIServingDetokenize(OpenAIServingBase): method _request_id_prefix (line 76) | def _request_id_prefix(self) -> str: method _convert_to_internal_request (line 79) | def _convert_to_internal_request( method _handle_non_streaming_request (line 84) | async def _handle_non_streaming_request( FILE: python/sglang/srt/entrypoints/openai/serving_transcription.py class OpenAIServingTranscription (line 48) | class OpenAIServingTranscription(OpenAIServingBase): method __init__ (line 51) | def __init__(self, tokenizer_manager: TokenizerManager): method _request_id_prefix (line 54) | def _request_id_prefix(self) -> str: method _validate_request (line 57) | def _validate_request(self, request: TranscriptionRequest) -> Optional... method _convert_to_internal_request (line 62) | def _convert_to_internal_request( method _get_audio_duration (line 87) | def _get_audio_duration(self, audio_data: bytes) -> float: method create_transcription (line 99) | async def create_transcription( method _handle_non_streaming_request (line 127) | async def _handle_non_streaming_request( method _handle_streaming_request (line 152) | async def _handle_streaming_request( method _generate_transcription_stream (line 165) | async def _generate_transcription_stream( FILE: python/sglang/srt/entrypoints/openai/tool_server.py function list_server_and_tools (line 20) | async def list_server_and_tools(server_url: str): function trim_schema (line 30) | def trim_schema(schema: dict) -> dict: function post_process_tools_description (line 55) | def post_process_tools_description( class ToolServer (line 73) | class ToolServer(ABC): method has_tool (line 76) | def has_tool(self, tool_name: str): method get_tool_description (line 80) | def get_tool_description(self, tool_name: str): method get_tool_session (line 84) | def get_tool_session(self, tool_name: str) -> AbstractAsyncContextMana... class MCPToolServer (line 87) | class MCPToolServer(ToolServer): method __init__ (line 89) | def __init__(self): method add_tool_server (line 92) | async def add_tool_server(self, server_url: str): method has_tool (line 124) | def has_tool(self, tool_name: str): method get_tool_description (line 127) | def get_tool_description(self, tool_name: str): method get_tool_session (line 131) | async def get_tool_session(self, tool_name: str): class DemoToolServer (line 143) | class DemoToolServer(ToolServer): method __init__ (line 145) | def __init__(self): method has_tool (line 160) | def has_tool(self, tool_name: str): method get_tool_description (line 163) | def get_tool_description(self, tool_name: str): method get_tool_session (line 174) | async def get_tool_session(self, tool_name: str): FILE: python/sglang/srt/entrypoints/openai/usage_processor.py class UsageProcessor (line 9) | class UsageProcessor: method _details_if_cached (line 13) | def _details_if_cached(count: int) -> Optional[PromptTokensDetails]: method calculate_response_usage (line 18) | def calculate_response_usage( method calculate_streaming_usage (line 47) | def calculate_streaming_usage( method calculate_token_usage (line 75) | def calculate_token_usage( FILE: python/sglang/srt/entrypoints/openai/utils.py function to_openai_style_logprobs (line 14) | def to_openai_style_logprobs( function process_hidden_states_from_ret (line 51) | def process_hidden_states_from_ret( function process_routed_experts_from_ret (line 76) | def process_routed_experts_from_ret( function process_cached_tokens_details_from_ret (line 89) | def process_cached_tokens_details_from_ret( FILE: python/sglang/srt/entrypoints/ssl_utils.py class SSLCertRefresher (line 13) | class SSLCertRefresher: method __init__ (line 22) | def __init__( method _watch_cert_key (line 44) | async def _watch_cert_key(self) -> None: method _watch_ca (line 64) | async def _watch_ca(self) -> None: method stop (line 84) | def stop(self) -> None: FILE: python/sglang/srt/entrypoints/tool.py class Tool (line 16) | class Tool(ABC): method get_result (line 19) | async def get_result(self, context: "ConversationContext") -> Any: class HarmonyBrowserTool (line 23) | class HarmonyBrowserTool(Tool): method __init__ (line 25) | def __init__(self): method get_result (line 45) | async def get_result(self, context: "ConversationContext") -> Any: method tool_config (line 56) | def tool_config(self) -> Any: class HarmonyPythonTool (line 60) | class HarmonyPythonTool(Tool): method __init__ (line 62) | def __init__(self): method get_result (line 75) | async def get_result(self, context: "ConversationContext") -> Any: method tool_config (line 86) | def tool_config(self) -> Any: FILE: python/sglang/srt/entrypoints/v1_loads.py function _get_tokenizer_manager (line 49) | def _get_tokenizer_manager(): function _loads_dict_factory (line 56) | def _loads_dict_factory(items): function _compute_aggregate (line 61) | def _compute_aggregate(load_dicts: list) -> dict: function _format_loads_prometheus (line 86) | def _format_loads_prometheus(load_results) -> Response: function get_loads (line 131) | async def get_loads( FILE: python/sglang/srt/entrypoints/warmup.py function warmup (line 20) | def warmup(name: str): function execute_warmups (line 28) | async def execute_warmups( function voice_chat (line 42) | async def voice_chat(disaggregation_mode: str, tokenizer_manager: Tokeni... FILE: python/sglang/srt/environ.py function temp_set_env (line 10) | def temp_set_env(*, allow_sglang: bool = False, **env_vars: Any): class EnvField (line 38) | class EnvField: method __init__ (line 41) | def __init__(self, default: Any): method __set_name__ (line 47) | def __set_name__(self, owner, name): method parse (line 51) | def parse(self, value: str) -> Any: method get (line 54) | def get(self) -> Any: method is_set (line 74) | def is_set(self): method set (line 77) | def set(self, value: Any): method override (line 82) | def override(self, value: Any): method clear (line 94) | def clear(self): method __bool__ (line 98) | def __bool__(self): method __len__ (line 103) | def __len__(self): class EnvTuple (line 109) | class EnvTuple(EnvField): method parse (line 110) | def parse(self, value: str) -> tuple[str, ...]: class EnvStr (line 114) | class EnvStr(EnvField): method parse (line 115) | def parse(self, value: str) -> str: class EnvBool (line 119) | class EnvBool(EnvField): method parse (line 120) | def parse(self, value: str) -> bool: class EnvInt (line 129) | class EnvInt(EnvField): method parse (line 130) | def parse(self, value: str) -> int: class EnvFloat (line 137) | class EnvFloat(EnvField): method parse (line 138) | def parse(self, value: str) -> float: class ToolStrictLevel (line 145) | class ToolStrictLevel(IntEnum): class Envs (line 159) | class Envs: function _print_deprecated_env (line 530) | def _print_deprecated_env(new_name: str, old_name: str): function _warn_deprecated_env_to_cli_flag (line 538) | def _warn_deprecated_env_to_cli_flag(env_name: str, suggestion: str): function _convert_SGL_to_SGLANG (line 547) | def _convert_SGL_to_SGLANG(): function example_with_exit_stack (line 618) | def example_with_exit_stack(): function example_with_subprocess (line 627) | def example_with_subprocess(): function example_with_implicit_bool_avoidance (line 642) | def example_with_implicit_bool_avoidance(): function examples (line 666) | def examples(): FILE: python/sglang/srt/eplb/eplb_algorithms/__init__.py class EplbAlgorithm (line 10) | class EplbAlgorithm(Enum): function rebalance_experts (line 20) | def rebalance_experts( function compute_algorithm (line 74) | def compute_algorithm( FILE: python/sglang/srt/eplb/eplb_algorithms/deepseek.py function balanced_packing (line 7) | def balanced_packing( function replicate_experts (line 52) | def replicate_experts( function rebalance_experts_hierarchical (line 83) | def rebalance_experts_hierarchical( function rebalance_experts (line 168) | def rebalance_experts( FILE: python/sglang/srt/eplb/eplb_algorithms/deepseek_vec.py function pack_groups (line 7) | def pack_groups(tokens_per_group: torch.Tensor, num_nodes: int) -> torch... function make_redundant_experts_chunkwise (line 35) | def make_redundant_experts_chunkwise( function decode_rebalance_experts (line 184) | def decode_rebalance_experts( function prefill_rebalance_experts (line 197) | def prefill_rebalance_experts( function rebalance_experts (line 255) | def rebalance_experts( FILE: python/sglang/srt/eplb/eplb_algorithms/elasticity_aware.py function rebalance_experts (line 8) | def rebalance_experts( FILE: python/sglang/srt/eplb/eplb_manager.py class EPLBManager (line 16) | class EPLBManager: method __init__ (line 17) | def __init__(self, model_runner: "ModelRunner"): method on_forward_pass_end (line 41) | def on_forward_pass_end(self): method _entrypoint (line 45) | def _entrypoint(self): method rebalance (line 52) | def rebalance(self): method _check_rebalance_needed (line 93) | def _check_rebalance_needed(self, average_utilization_rate_over_window): method _compute_update_layer_ids_chunks (line 108) | def _compute_update_layer_ids_chunks(self) -> List[List[int]]: function _chunk_list (line 116) | def _chunk_list(items: List, chunk_size): FILE: python/sglang/srt/eplb/eplb_simulator/reader.py function read_mode_per_pass (line 16) | def read_mode_per_pass(dir_data: Path): FILE: python/sglang/srt/eplb/expert_distribution.py class ExpertDistributionMetrics (line 48) | class ExpertDistributionMetrics: method copy_to_cpu (line 51) | def copy_to_cpu(self): class ExpertDistributionRecorder (line 55) | class ExpertDistributionRecorder(ABC): method init_new (line 59) | def init_new( method with_current_layer (line 77) | def with_current_layer(self, layer_idx): method with_debug_name (line 81) | def with_debug_name(self, debug_name): method disable_this_region (line 85) | def disable_this_region(self): method with_forward_pass (line 89) | def with_forward_pass(self, forward_pass_id: int, forward_batch: Forwa... method on_select_experts (line 92) | def on_select_experts(self, topk_ids: torch.Tensor): method on_deepep_dispatch_normal (line 95) | def on_deepep_dispatch_normal( method on_deepep_dispatch_low_latency (line 104) | def on_deepep_dispatch_low_latency( method start_record (line 109) | def start_record(self): method stop_record (line 112) | def stop_record(self): method dump_record (line 115) | def dump_record(self, output_mode: _OutputMode = "file"): method recording (line 119) | def recording(self): method _on_not_implemented (line 122) | def _on_not_implemented(self): class _ExpertDistributionRecorderNoop (line 128) | class _ExpertDistributionRecorderNoop(ExpertDistributionRecorder): class _ExpertDistributionRecorderReal (line 132) | class _ExpertDistributionRecorderReal(ExpertDistributionRecorder): method __init__ (line 133) | def __init__( method with_current_layer (line 161) | def with_current_layer(self, layer_idx): method with_debug_name (line 164) | def with_debug_name(self, debug_name): method with_forward_pass (line 168) | def with_forward_pass(self, forward_pass_id: int, forward_batch: Forwa... method disable_this_region (line 178) | def disable_this_region(self): method _on_forward_pass_start (line 187) | def _on_forward_pass_start(self, forward_batch: ForwardBatch): method _on_forward_pass_end (line 194) | def _on_forward_pass_end(self, forward_pass_id: int, outputs: Dict[str... method on_select_experts (line 203) | def on_select_experts(self, topk_ids: torch.Tensor): method on_deepep_dispatch_normal (line 206) | def on_deepep_dispatch_normal( method on_deepep_dispatch_low_latency (line 221) | def on_deepep_dispatch_low_latency( method _on_hook (line 229) | def _on_hook(self, hook_name: str, **kwargs): method _reset (line 243) | def _reset(self): method start_record (line 253) | def start_record(self): method stop_record (line 262) | def stop_record(self): method dump_record (line 270) | def dump_record(self, output_mode: _OutputMode = "file"): method recording (line 277) | def recording(self): function get_global_expert_distribution_recorder (line 286) | def get_global_expert_distribution_recorder(): function set_global_expert_distribution_recorder (line 290) | def set_global_expert_distribution_recorder(value): class _SinglePassGatherer (line 298) | class _SinglePassGatherer(ABC): method init_new (line 300) | def init_new( method __init__ (line 330) | def __init__(self, expert_location_metadata: ExpertLocationMetadata, r... method on_forward_pass_start (line 334) | def on_forward_pass_start(self, forward_batch: ForwardBatch): method on_select_experts (line 337) | def on_select_experts(self, layer_idx: int, topk_ids: torch.Tensor): method on_deepep_dispatch_normal (line 340) | def on_deepep_dispatch_normal( method on_deepep_dispatch_low_latency (line 350) | def on_deepep_dispatch_low_latency( method reset (line 355) | def reset(self): method collect (line 358) | def collect(self) -> Dict: class _DetailSinglePassGatherer (line 362) | class _DetailSinglePassGatherer(_SinglePassGatherer): method __init__ (line 366) | def __init__( method on_forward_pass_start (line 390) | def on_forward_pass_start(self, forward_batch: ForwardBatch): method on_select_experts (line 401) | def on_select_experts(self, layer_idx: int, topk_ids: torch.Tensor): method on_deepep_dispatch_normal (line 406) | def on_deepep_dispatch_normal( method reset (line 423) | def reset(self): method collect (line 428) | def collect(self) -> Dict: class _LayerBasedCpuSinglePassGatherer (line 446) | class _LayerBasedCpuSinglePassGatherer(_SinglePassGatherer): method __init__ (line 447) | def __init__(self, *args, **kwargs): method _on_layer_data (line 451) | def _on_layer_data(self, layer_idx: int, objects: List[int]): method reset (line 460) | def reset(self): method _collect_objects (line 463) | def _collect_objects(self, pad_len: int) -> torch.Tensor: function _list_sum (line 471) | def _list_sum(a: List, b: List) -> List: class _LayerBasedGpuSinglePassGatherer (line 475) | class _LayerBasedGpuSinglePassGatherer(_SinglePassGatherer): method __init__ (line 476) | def __init__(self, *args, enable_global_physical_experts: bool, **kwar... method reset (line 492) | def reset(self): method collect (line 495) | def collect(self) -> Dict: class _SelectExpertsSinglePassGatherer (line 510) | class _SelectExpertsSinglePassGatherer(_LayerBasedGpuSinglePassGatherer): method __init__ (line 511) | def __init__(self, *args, **kwargs): method on_select_experts (line 515) | def on_select_experts(self, layer_idx: int, topk_ids: torch.Tensor): class _DeepepNormalSinglePassGatherer (line 523) | class _DeepepNormalSinglePassGatherer(_LayerBasedCpuSinglePassGatherer): method __init__ (line 524) | def __init__(self, *args, **kwargs): method on_deepep_dispatch_normal (line 532) | def on_deepep_dispatch_normal( method collect (line 543) | def collect(self) -> Dict: class _DeepepLowLatencySinglePassGatherer (line 556) | class _DeepepLowLatencySinglePassGatherer(_LayerBasedGpuSinglePassGather... method __init__ (line 557) | def __init__(self, *args, **kwargs): method on_deepep_dispatch_low_latency (line 560) | def on_deepep_dispatch_low_latency( function _convert_per_token_to_global_physical_count (line 567) | def _convert_per_token_to_global_physical_count( function _convert_local_to_global_physical_count (line 588) | def _convert_local_to_global_physical_count( class _Accumulator (line 610) | class _Accumulator(ABC): method init_new (line 612) | def init_new( method get_class (line 622) | def get_class(server_args: ServerArgs) -> Type["_Accumulator"]: method __init__ (line 630) | def __init__( method get_single_pass_gatherer_keys (line 640) | def get_single_pass_gatherer_keys(self): method get_single_pass_gatherer_key (line 643) | def get_single_pass_gatherer_key(self, debug_name: Optional[str]): method append (line 646) | def append( method reset (line 655) | def reset(self): method dump (line 658) | def dump(self, output_mode: _OutputMode): class _UtilizationRateAccumulatorMixin (line 662) | class _UtilizationRateAccumulatorMixin(_Accumulator): method __init__ (line 663) | def __init__(self, *args, **kwargs): method append (line 677) | def append( method reset (line 690) | def reset(self): method _append_utilization_rate (line 695) | def _append_utilization_rate( method _handle_metric_eplb_heatmap (line 738) | def _handle_metric_eplb_heatmap(self, gpu_physical_count: torch.Tensor): class _DequeCollection (line 761) | class _DequeCollection: method __init__ (line 762) | def __init__(self, maxlens: List[int]): method append (line 765) | def append(self, value): method clear (line 769) | def clear(self): method mean (line 773) | def mean(self) -> Dict[int, float]: class _DetailAccumulator (line 777) | class _DetailAccumulator(_UtilizationRateAccumulatorMixin): method __init__ (line 778) | def __init__(self, *args, **kwargs): method get_single_pass_gatherer_keys (line 782) | def get_single_pass_gatherer_keys(self): method get_single_pass_gatherer_key (line 787) | def get_single_pass_gatherer_key(self, debug_name: Optional[str]): method append (line 792) | def append( method reset (line 819) | def reset(self): method dump (line 823) | def dump(self, output_mode: _OutputMode): class _StatAccumulator (line 835) | class _StatAccumulator(_UtilizationRateAccumulatorMixin): method __init__ (line 836) | def __init__(self, *args, **kwargs): method append (line 850) | def append( method reset (line 863) | def reset(self): method dump (line 867) | def dump(self, output_mode: _OutputMode): method _get_global_average_utilization_rate (line 897) | def _get_global_average_utilization_rate(self): function _dump_to_file (line 923) | def _dump_to_file(name, data): class _Buffer (line 932) | class _Buffer: method init_new (line 934) | def init_new(item_shape: Tuple, buffer_size: int, dtype, device): method append (line 940) | def append(self, value: torch.Tensor): method get_all (line 943) | def get_all(self) -> torch.Tensor: method reset (line 946) | def reset(self): class _CircularBuffer (line 950) | class _CircularBuffer(_Buffer): method __init__ (line 951) | def __init__(self, item_shape: Tuple, buffer_size: int, dtype, device): method append (line 957) | def append(self, value: torch.Tensor): method get_all (line 961) | def get_all(self) -> torch.Tensor: method reset (line 964) | def reset(self): class _InfiniteBuffer (line 968) | class _InfiniteBuffer(_Buffer): method __init__ (line 969) | def __init__(self, item_shape: Tuple, dtype, device): method append (line 974) | def append(self, value: torch.Tensor): method get_all (line 989) | def get_all(self) -> torch.Tensor: method reset (line 992) | def reset(self): function _convert_global_physical_count_to_logical_count (line 997) | def _convert_global_physical_count_to_logical_count( function compute_gpu_physical_count (line 1020) | def compute_gpu_physical_count( function compute_utilization_rate (line 1033) | def compute_utilization_rate( FILE: python/sglang/srt/eplb/expert_location.py class ExpertLocationMetadata (line 39) | class ExpertLocationMetadata: method num_layers (line 51) | def num_layers(self) -> int: method num_physical_experts (line 55) | def num_physical_experts(self) -> int: method num_local_physical_experts (line 59) | def num_local_physical_experts(self) -> int: method num_logical_experts (line 65) | def num_logical_experts(self) -> int: method ep_size (line 69) | def ep_size(self): method __post_init__ (line 73) | def __post_init__(self): method init_trivial (line 88) | def init_trivial( method init_by_mapping (line 115) | def init_by_mapping( method init_by_eplb (line 147) | def init_by_eplb( method _init_common (line 191) | def _init_common(server_args: ServerArgs, model_config: ModelConfig): method _init_raw (line 215) | def _init_raw( method update (line 255) | def update( method logical_to_all_physical (line 286) | def logical_to_all_physical( function get_global_expert_location_metadata (line 312) | def get_global_expert_location_metadata(): function set_global_expert_location_metadata (line 316) | def set_global_expert_location_metadata(value): function _compute_logical_to_all_physical_map (line 322) | def _compute_logical_to_all_physical_map( function _pad_nested_array (line 382) | def _pad_nested_array(arr, pad_value): function compute_logical_to_rank_dispatch_physical_map (line 392) | def compute_logical_to_rank_dispatch_physical_map( function _logical_to_all_physical_raw (line 446) | def _logical_to_all_physical_raw( function _compute_gpu_id_of_physical_expert (line 458) | def _compute_gpu_id_of_physical_expert( function _compute_node_id_of_physical_expert (line 464) | def _compute_node_id_of_physical_expert( function _find_nearest_expert (line 470) | def _find_nearest_expert( function _fair_choices (line 510) | def _fair_choices(arr: List, k: int, r: random.Random) -> List: class ModelConfigForExpertLocation (line 518) | class ModelConfigForExpertLocation: method from_model_config (line 524) | def from_model_config(model_config: ModelConfig): function compute_initial_expert_location_metadata (line 534) | def compute_initial_expert_location_metadata( FILE: python/sglang/srt/eplb/expert_location_dispatch.py class ExpertLocationDispatchInfo (line 25) | class ExpertLocationDispatchInfo: method init_new (line 36) | def init_new(cls, layer_id: int): function transform_select_experts_inputs (line 64) | def transform_select_experts_inputs( function topk_ids_logical_to_physical (line 76) | def topk_ids_logical_to_physical( function _topk_ids_logical_to_physical_static (line 89) | def _topk_ids_logical_to_physical_static( function _topk_ids_logical_to_physical_dynamic (line 95) | def _topk_ids_logical_to_physical_dynamic( FILE: python/sglang/srt/eplb/expert_location_updater.py class ExpertLocationUpdater (line 37) | class ExpertLocationUpdater: method __init__ (line 38) | def __init__(self): method update (line 41) | def update( function _update_expert_weights (line 78) | def _update_expert_weights(**kwargs): function _update_expert_weights_with_canary (line 86) | def _update_expert_weights_with_canary( function _update_expert_weights_raw (line 135) | def _update_expert_weights_raw( function create_temp_buffers (line 178) | def create_temp_buffers(sample_tensors): function update_expert_weights_single_layer (line 182) | def update_expert_weights_single_layer( class _ChunkUtils (line 516) | class _ChunkUtils: method __init__ (line 517) | def __init__(self, *, chunk_values: List, element_values: List): method chunk_value_from_element_value (line 521) | def chunk_value_from_element_value(self, element_value): method element_values_from_chunk_value (line 529) | def element_values_from_chunk_value(self, chunk_value) -> List: method _chunk_index_from_element_index (line 540) | def _chunk_index_from_element_index( method _element_slice_from_chunk_index (line 554) | def _element_slice_from_chunk_index( function _deduplicate_ordered (line 563) | def _deduplicate_ordered(arr: List[int]): function _log_p2p_op_metrics (line 571) | def _log_p2p_op_metrics( function _get_direction_from_op (line 606) | def _get_direction_from_op(op: P2POp): function _group_by (line 614) | def _group_by(items, keyfunc): FILE: python/sglang/srt/function_call/base_format_detector.py class BaseFormatDetector (line 26) | class BaseFormatDetector(ABC): method __init__ (line 29) | def __init__(self): method _get_tool_indices (line 53) | def _get_tool_indices(self, tools: List[Tool]) -> Dict[str, int]: method parse_base_json (line 71) | def parse_base_json(self, action: Any, tools: List[Tool]) -> List[Tool... method detect_and_parse (line 98) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP... method _ends_with_partial_token (line 106) | def _ends_with_partial_token(self, buffer: str, bot_token: str) -> int: method parse_streaming_increment (line 119) | def parse_streaming_increment( method has_tool_call (line 324) | def has_tool_call(self, text: str) -> bool: method supports_structural_tag (line 330) | def supports_structural_tag(self) -> bool: method structure_info (line 335) | def structure_info(self) -> _GetInfoFunc: FILE: python/sglang/srt/function_call/core_types.py class ToolCallItem (line 7) | class ToolCallItem(BaseModel): class StreamingParseResult (line 15) | class StreamingParseResult(BaseModel): class StructureInfo (line 23) | class StructureInfo: FILE: python/sglang/srt/function_call/deepseekv31_detector.py class DeepSeekV31Detector (line 19) | class DeepSeekV31Detector(BaseFormatDetector): method __init__ (line 45) | def __init__(self): method has_tool_call (line 56) | def has_tool_call(self, text: str) -> bool: method detect_and_parse (line 60) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP... method parse_streaming_increment (line 90) | def parse_streaming_increment( method structure_info (line 201) | def structure_info(self) -> _GetInfoFunc: FILE: python/sglang/srt/function_call/deepseekv32_detector.py class DeepSeekV32Detector (line 20) | class DeepSeekV32Detector(BaseFormatDetector): method __init__ (line 72) | def __init__(self): method has_tool_call (line 91) | def has_tool_call(self, text: str) -> bool: method _parse_parameters_from_xml (line 95) | def _parse_parameters_from_xml( method detect_and_parse (line 168) | def detect_and_parse(self, text: str, tools: list[Tool]) -> StreamingP... method parse_streaming_increment (line 212) | def parse_streaming_increment( method structure_info (line 348) | def structure_info(self) -> _GetInfoFunc: FILE: python/sglang/srt/function_call/deepseekv3_detector.py class DeepSeekV3Detector (line 19) | class DeepSeekV3Detector(BaseFormatDetector): method __init__ (line 45) | def __init__(self): method has_tool_call (line 54) | def has_tool_call(self, text: str) -> bool: method detect_and_parse (line 58) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP... method parse_streaming_increment (line 88) | def parse_streaming_increment( method structure_info (line 204) | def structure_info(self) -> _GetInfoFunc: FILE: python/sglang/srt/function_call/function_call_parser.py class FunctionCallParser (line 39) | class FunctionCallParser: method __init__ (line 74) | def __init__(self, tools: List[Tool], tool_call_parser: str): method has_tool_call (line 85) | def has_tool_call(self, text: str) -> bool: method parse_non_stream (line 100) | def parse_non_stream(self, full_text: str) -> Tuple[str, list[ToolCall... method parse_stream_chunk (line 121) | def parse_stream_chunk(self, chunk_text: str) -> Tuple[str, list[ToolC... method get_structure_tag (line 147) | def get_structure_tag(self) -> LegacyStructuralTagResponseFormat: method get_structure_constraint (line 186) | def get_structure_constraint( FILE: python/sglang/srt/function_call/gigachat3_detector.py class GigaChat3Detector (line 37) | class GigaChat3Detector(BaseFormatDetector): method __init__ (line 38) | def __init__(self) -> None: method has_tool_call (line 46) | def has_tool_call(self, text: str) -> bool: method detect_and_parse (line 50) | def detect_and_parse( method parse_streaming_increment (line 96) | def parse_streaming_increment( method supports_structural_tag (line 194) | def supports_structural_tag(self) -> bool: method structure_info (line 198) | def structure_info(self) -> _GetInfoFunc: FILE: python/sglang/srt/function_call/glm47_moe_detector.py class StreamState (line 20) | class StreamState(str, Enum): function get_argument_type (line 30) | def get_argument_type( function _convert_to_number (line 76) | def _convert_to_number(value: str) -> Any: function parse_arguments (line 94) | def parse_arguments( class Glm47MoeDetector (line 145) | class Glm47MoeDetector(BaseFormatDetector): method __init__ (line 152) | def __init__(self): method _reset_streaming_state (line 174) | def _reset_streaming_state(self) -> None: method has_tool_call (line 188) | def has_tool_call(self, text: str) -> bool: method detect_and_parse (line 192) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP... method _get_value_type (line 247) | def _get_value_type(self, func_name: str, key: str, tools: List[Tool])... method _format_value_complete (line 300) | def _format_value_complete(self, value: str, value_type: str) -> str: method _process_xml_to_json_streaming (line 327) | def _process_xml_to_json_streaming( method _extract_match_groups (line 447) | def _extract_match_groups(self, match: re.Match) -> tuple[str, str, str]: method _send_tool_name_if_needed (line 461) | def _send_tool_name_if_needed( method _process_arguments_streaming (line 504) | def _process_arguments_streaming( method _finalize_tool_call (line 547) | def _finalize_tool_call( method parse_streaming_increment (line 620) | def parse_streaming_increment( method _parse_argument_pairs (line 746) | def _parse_argument_pairs( method supports_structural_tag (line 784) | def supports_structural_tag(self) -> bool: method structure_info (line 787) | def structure_info(self) -> _GetInfoFunc: FILE: python/sglang/srt/function_call/glm4_moe_detector.py class StreamState (line 20) | class StreamState(str, Enum): function get_argument_type (line 30) | def get_argument_type( function _convert_to_number (line 65) | def _convert_to_number(value: str) -> Any: function parse_arguments (line 83) | def parse_arguments( class Glm4MoeDetector (line 134) | class Glm4MoeDetector(BaseFormatDetector): method __init__ (line 151) | def __init__(self): method _reset_streaming_state (line 169) | def _reset_streaming_state(self) -> None: method has_tool_call (line 181) | def has_tool_call(self, text: str) -> bool: method detect_and_parse (line 185) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP... method _get_value_type (line 221) | def _get_value_type(self, func_name: str, key: str, tools: List[Tool])... method _format_value_complete (line 274) | def _format_value_complete(self, value: str, value_type: str) -> str: method _process_xml_to_json_streaming (line 301) | def _process_xml_to_json_streaming( method parse_streaming_increment (line 421) | def parse_streaming_increment( method _parse_argument_pairs (line 600) | def _parse_argument_pairs( method supports_structural_tag (line 638) | def supports_structural_tag(self) -> bool: method structure_info (line 641) | def structure_info(self) -> _GetInfoFunc: FILE: python/sglang/srt/function_call/gpt_oss_detector.py class GptOssDetector (line 19) | class GptOssDetector(BaseFormatDetector): method __init__ (line 27) | def __init__(self): method has_tool_call (line 39) | def has_tool_call(self, text: str) -> bool: method detect_and_parse (line 43) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP... method parse_streaming_increment (line 76) | def parse_streaming_increment( method _extract_tool_call_from_event (line 197) | def _extract_tool_call_from_event( method structure_info (line 240) | def structure_info(self) -> _GetInfoFunc: FILE: python/sglang/srt/function_call/hermes_detector.py class HermesDetector (line 17) | class HermesDetector(BaseFormatDetector): method __init__ (line 25) | def __init__(self): method has_tool_call (line 34) | def has_tool_call(self, text: str) -> bool: method detect_and_parse (line 37) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP... method _clean_normal_text (line 62) | def _clean_normal_text(self, text: str) -> str: method parse_streaming_increment (line 85) | def parse_streaming_increment( method structure_info (line 115) | def structure_info(self) -> _GetInfoFunc: FILE: python/sglang/srt/function_call/internlm_detector.py class InternlmDetector (line 21) | class InternlmDetector(BaseFormatDetector): method __init__ (line 48) | def __init__(self): method has_tool_call (line 54) | def has_tool_call(self, text: str) -> bool: method get_arguments (line 59) | def get_arguments(self, obj): method detect_and_parse (line 67) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP... method parse_streaming_increment (line 155) | def parse_streaming_increment( method structure_info (line 233) | def structure_info(self) -> _GetInfoFunc: FILE: python/sglang/srt/function_call/json_array_parser.py class JsonArrayParser (line 8) | class JsonArrayParser(BaseFormatDetector): method __init__ (line 16) | def __init__(self): method has_tool_call (line 23) | def has_tool_call(self, text: str) -> bool: method detect_and_parse (line 29) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP... method parse_streaming_increment (line 37) | def parse_streaming_increment( method structure_info (line 45) | def structure_info(self) -> callable: FILE: python/sglang/srt/function_call/kimik2_detector.py function _strip_special_tokens (line 27) | def _strip_special_tokens(text: str) -> str: class KimiK2Detector (line 34) | class KimiK2Detector(BaseFormatDetector): method __init__ (line 48) | def __init__(self): method has_tool_call (line 76) | def has_tool_call(self, text: str) -> bool: method detect_and_parse (line 80) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP... method parse_streaming_increment (line 127) | def parse_streaming_increment( method structure_info (line 245) | def structure_info(self) -> _GetInfoFunc: FILE: python/sglang/srt/function_call/lfm2_detector.py class Lfm2Detector (line 39) | class Lfm2Detector(BaseFormatDetector): method __init__ (line 56) | def __init__(self): method has_tool_call (line 65) | def has_tool_call(self, text: str) -> bool: method _get_parameter_value (line 69) | def _get_parameter_value(self, val: ast.AST) -> Any: method _parse_pythonic_call (line 109) | def _parse_pythonic_call( method _parse_pythonic_content (line 158) | def _parse_pythonic_content( method _parse_json_content (line 210) | def _parse_json_content( method _parse_tool_calls_content (line 238) | def _parse_tool_calls_content( method detect_and_parse (line 266) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP... method _strip_special_tokens (line 287) | def _strip_special_tokens(self, text: str) -> str: method parse_streaming_increment (line 291) | def parse_streaming_increment( method supports_structural_tag (line 367) | def supports_structural_tag(self) -> bool: method structure_info (line 376) | def structure_info(self) -> _GetInfoFunc: FILE: python/sglang/srt/function_call/llama32_detector.py class Llama32Detector (line 18) | class Llama32Detector(BaseFormatDetector): method __init__ (line 28) | def __init__(self): method _convert_python_dict_to_json (line 37) | def _convert_python_dict_to_json(self, text: str) -> str: method has_tool_call (line 47) | def has_tool_call(self, text: str) -> bool: method detect_and_parse (line 53) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP... method parse_streaming_increment (line 115) | def parse_streaming_increment( method structure_info (line 139) | def structure_info(self) -> _GetInfoFunc: FILE: python/sglang/srt/function_call/mimo_detector.py function _get_param_type (line 30) | def _get_param_type(func_name: str, param_name: str, tools: List[Tool]) ... function _convert_param_value (line 40) | def _convert_param_value( class MiMoDetector (line 137) | class MiMoDetector(BaseFormatDetector): method __init__ (line 149) | def __init__(self): method has_tool_call (line 159) | def has_tool_call(self, text: str) -> bool: method detect_and_parse (line 162) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP... method parse_streaming_increment (line 195) | def parse_streaming_increment( method _parse_tool_call (line 250) | def _parse_tool_call( method supports_structural_tag (line 277) | def supports_structural_tag(self) -> bool: method structure_info (line 280) | def structure_info(self) -> _GetInfoFunc: FILE: python/sglang/srt/function_call/minimax_m2.py class MinimaxM2Detector (line 17) | class MinimaxM2Detector(BaseFormatDetector): method __init__ (line 29) | def __init__(self): method has_tool_call (line 56) | def has_tool_call(self, text: str) -> bool: method detect_and_parse (line 59) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP... method _convert_param_value (line 63) | def _convert_param_value(self, value: str, param_type: str) -> Any: method _extract_types_from_schema (line 67) | def _extract_types_from_schema(self, schema: Any) -> list[str]: method _convert_param_value_with_types (line 127) | def _convert_param_value_with_types( method _get_param_types_from_config (line 203) | def _get_param_types_from_config( method parse_streaming_increment (line 226) | def parse_streaming_increment( method _parse_and_stream_parameters (line 357) | def _parse_and_stream_parameters( method _reset_streaming_state (line 449) | def _reset_streaming_state(self, still_in_tool_call: bool = False): method _extract (line 458) | def _extract(self, text: str, tools: List[Tool]) -> Tuple[str, List[To... method _parse_block (line 477) | def _parse_block(self, block: str, tools: List[Tool]) -> List[ToolCall... method _parse_parameter (line 504) | def _parse_parameter( method supports_structural_tag (line 518) | def supports_structural_tag(self) -> bool: method structure_info (line 521) | def structure_info(self) -> _GetInfoFunc: FILE: python/sglang/srt/function_call/mistral_detector.py class MistralDetector (line 18) | class MistralDetector(BaseFormatDetector): method __init__ (line 34) | def __init__(self): method has_tool_call (line 44) | def has_tool_call(self, text: str) -> bool: method detect_and_parse (line 48) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP... method parse_streaming_increment (line 115) | def parse_streaming_increment( method _try_parse_compact_args_format (line 198) | def _try_parse_compact_args_format( method _extract_json_value (line 253) | def _extract_json_value( method _extract_json_array (line 294) | def _extract_json_array(self, text: str) -> str: method structure_info (line 338) | def structure_info(self) -> _GetInfoFunc: FILE: python/sglang/srt/function_call/pythonic_detector.py class PythonicDetector (line 19) | class PythonicDetector(BaseFormatDetector): method __init__ (line 34) | def __init__(self): method _text_strip (line 42) | def _text_strip(text: str) -> str: method has_tool_call (line 49) | def has_tool_call(self, text: str) -> bool: method detect_and_parse (line 52) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP... method _find_matching_bracket (line 113) | def _find_matching_bracket(self, buffer: str, start: int) -> int: method _strip_and_split_buffer (line 135) | def _strip_and_split_buffer(self, buffer: str) -> tuple[str, str]: method parse_streaming_increment (line 159) | def parse_streaming_increment( method _get_parameter_value (line 207) | def _get_parameter_value(self, val): method supports_structural_tag (line 220) | def supports_structural_tag(self) -> bool: method structure_info (line 223) | def structure_info(self) -> _GetInfoFunc: FILE: python/sglang/srt/function_call/qwen25_detector.py class Qwen25Detector (line 17) | class Qwen25Detector(BaseFormatDetector): method __init__ (line 33) | def __init__(self): method has_tool_call (line 43) | def has_tool_call(self, text: str) -> bool: method detect_and_parse (line 47) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP... method parse_streaming_increment (line 75) | def parse_streaming_increment( method structure_info (line 115) | def structure_info(self) -> _GetInfoFunc: FILE: python/sglang/srt/function_call/qwen3_coder_detector.py class Qwen3CoderDetector (line 18) | class Qwen3CoderDetector(BaseFormatDetector): method __init__ (line 19) | def __init__(self): method has_tool_call (line 57) | def has_tool_call(self, text: str) -> bool: method _get_arguments_config (line 60) | def _get_arguments_config( method _convert_param_value (line 89) | def _convert_param_value( method detect_and_parse (line 172) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP... method parse_streaming_increment (line 240) | def parse_streaming_increment( method supports_structural_tag (line 470) | def supports_structural_tag(self) -> bool: method structure_info (line 473) | def structure_info(self) -> _GetInfoFunc: FILE: python/sglang/srt/function_call/step3_detector.py function get_argument_type (line 18) | def get_argument_type(func_name: str, arg_key: str, defined_tools: List[... function parse_arguments (line 31) | def parse_arguments(value: str) -> tuple[Any, bool]: class Step3Detector (line 43) | class Step3Detector(BaseFormatDetector): method __init__ (line 61) | def __init__(self): method has_tool_call (line 85) | def has_tool_call(self, text: str) -> bool: method _parse_steptml_invoke (line 89) | def _parse_steptml_invoke( method detect_and_parse (line 120) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP... method parse_streaming_increment (line 169) | def parse_streaming_increment( method _parse_partial_tool_call (line 256) | def _parse_partial_tool_call(self, tools: List[Tool]) -> StreamingPars... method _reset_streaming_state (line 395) | def _reset_streaming_state(self): method supports_structural_tag (line 402) | def supports_structural_tag(self) -> bool: method structure_info (line 406) | def structure_info(self) -> _GetInfoFunc: FILE: python/sglang/srt/function_call/trinity_detector.py class TrinityDetector (line 11) | class TrinityDetector(Qwen25Detector): method _strip_think_tags (line 21) | def _strip_think_tags(self, text: str) -> str: method has_tool_call (line 25) | def has_tool_call(self, text: str) -> bool: method detect_and_parse (line 29) | def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingP... method parse_streaming_increment (line 35) | def parse_streaming_increment( FILE: python/sglang/srt/function_call/utils.py function _find_common_prefix (line 12) | def _find_common_prefix(s1: str, s2: str) -> str: function _partial_json_loads (line 23) | def _partial_json_loads(input_str: str, flags: Allow) -> Tuple[Any, int]: function _is_complete_json (line 52) | def _is_complete_json(input_str: str) -> bool: function _get_tool_schema_defs (line 60) | def _get_tool_schema_defs(tools: List[Tool]) -> dict: function _get_tool_schema (line 90) | def _get_tool_schema(tool: Tool) -> dict: function infer_type_from_json_schema (line 104) | def infer_type_from_json_schema(schema: Dict[str, Any]) -> Optional[str]: function get_json_schema_constraint (line 207) | def get_json_schema_constraint( FILE: python/sglang/srt/hardware_backend/npu/allocator_npu.py class NPUPagedTokenToKVPoolAllocator (line 15) | class NPUPagedTokenToKVPoolAllocator(PagedTokenToKVPoolAllocator): method __init__ (line 16) | def __init__( method alloc_extend (line 28) | def alloc_extend( method alloc_decode (line 96) | def alloc_decode( method free (line 135) | def free(self, free_index: torch.Tensor): FILE: python/sglang/srt/hardware_backend/npu/attention/ascend_backend.py function _reshape_kv_for_fia_nz (line 38) | def _reshape_kv_for_fia_nz( class ForwardMetadata (line 49) | class ForwardMetadata: class AscendAttnMaskBuilder (line 68) | class AscendAttnMaskBuilder: method __init__ (line 69) | def __init__(self, model_runner: ModelRunner, device, use_fia, use_mla): method generate_mask_flag (line 107) | def generate_mask_flag(max_seq_len): method generate_attn_mask (line 121) | def generate_attn_mask(max_seq_len, mode, dtype=torch.float16): method get_attention_mask_id (line 145) | def get_attention_mask_id(seq_lens, extend_lens): method update_attn_cache (line 163) | def update_attn_cache( method get_splitfuse_attn_mask (line 188) | def get_splitfuse_attn_mask( class AscendAttnBackend (line 206) | class AscendAttnBackend(AttentionBackend): method __init__ (line 208) | def __init__(self, model_runner: ModelRunner): method get_verify_buffers_to_fill_after_draft (line 263) | def get_verify_buffers_to_fill_after_draft(self): method update_verify_buffers_to_fill_after_draft (line 271) | def update_verify_buffers_to_fill_after_draft( method init_forward_metadata (line 276) | def init_forward_metadata(self, forward_batch: ForwardBatch): method init_cuda_graph_state (line 342) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int): method init_forward_metadata_capture_cuda_graph (line 351) | def init_forward_metadata_capture_cuda_graph( method init_forward_metadata_replay_cuda_graph (line 414) | def init_forward_metadata_replay_cuda_graph( method get_cuda_graph_seq_len_fill_value (line 447) | def get_cuda_graph_seq_len_fill_value(self): method _generate_alibi_bias (line 450) | def _generate_alibi_bias( method generate_alibi_bias (line 465) | def generate_alibi_bias( method attn_alibi (line 495) | def attn_alibi( method do_cp_balance_attn (line 565) | def do_cp_balance_attn( method forward_sparse (line 635) | def forward_sparse( method forward_extend (line 747) | def forward_extend( method forward_dllm (line 1130) | def forward_dllm( method forward_mtp (line 1189) | def forward_mtp( method forward_decode_graph (line 1370) | def forward_decode_graph( method forward_decode (line 1528) | def forward_decode( method forward_mixed (line 1780) | def forward_mixed( class AscendAttnMultiStepDraftBackend (line 1836) | class AscendAttnMultiStepDraftBackend: method __init__ (line 1842) | def __init__( method common_template (line 1855) | def common_template(self, forward_batch: ForwardBatch, call_fn: int): method init_forward_metadata (line 1861) | def init_forward_metadata(self, forward_batch: ForwardBatch): method init_cuda_graph_state (line 1868) | def init_cuda_graph_state(self, max_bs, max_num_tokens): method init_forward_metadata_capture_cuda_graph (line 1872) | def init_forward_metadata_capture_cuda_graph(self, forward_batch: Forw... method init_forward_metadata_replay_cuda_graph (line 1886) | def init_forward_metadata_replay_cuda_graph( FILE: python/sglang/srt/hardware_backend/npu/attention/ascend_torch_native_backend.py class AscendTorchNativeAttnBackend (line 9) | class AscendTorchNativeAttnBackend: method __init__ (line 10) | def __init__(self): method scaled_dot_product_attention_with_softcapping (line 13) | def scaled_dot_product_attention_with_softcapping( method run_sdpa_forward_extend (line 56) | def run_sdpa_forward_extend( method run_sdpa_forward_decode (line 176) | def run_sdpa_forward_decode( method support_triton (line 281) | def support_triton(self): FILE: python/sglang/srt/hardware_backend/npu/attention/mla_preprocess.py function is_mla_preprocess_enabled (line 16) | def is_mla_preprocess_enabled() -> bool: function is_fia_nz (line 21) | def is_fia_nz() -> bool: function round_up (line 30) | def round_up(val: int, align: int) -> int: function transdata (line 36) | def transdata(nd_mat, block_size: tuple = (16, 16)): function trans_rope_weight (line 55) | def trans_rope_weight(weight, rope_dim): class NPUFusedMLAPreprocess (line 63) | class NPUFusedMLAPreprocess(torch.nn.Module): method __init__ (line 64) | def __init__( method preprocess_weights (line 102) | def preprocess_weights(self, hidden_states): method mlaprolog_preprocess_weight (line 239) | def mlaprolog_preprocess_weight(self): method get_sin_cos (line 248) | def get_sin_cos(self, positions): method get_kv_cache_and_cache_idx (line 255) | def get_kv_cache_and_cache_idx(self, forward_batch): method forward_absorb_prepare_npu_rms_norm_cache (line 260) | def forward_absorb_prepare_npu_rms_norm_cache( method forward_mlapo (line 341) | def forward_mlapo(self, positions, hidden_states, forward_batch, zero_... method forward_mlaprolog (line 427) | def forward_mlaprolog(self, positions, hidden_states, forward_batch): method forward (line 468) | def forward(self, positions, hidden_states, forward_batch, zero_alloca... FILE: python/sglang/srt/hardware_backend/npu/cmo.py function get_cmo_stream (line 6) | def get_cmo_stream(): function set_cmo_stream (line 16) | def set_cmo_stream(stream): function prepare_weight_cache (line 21) | def prepare_weight_cache(handle, cache, PREFETCH_MAX_SIZE=1000000000): function wait_cmo_stream (line 50) | def wait_cmo_stream(): FILE: python/sglang/srt/hardware_backend/npu/graph_runner/eagle_draft_extend_npu_graph_runner.py class EAGLEDraftExtendNpuGraphRunner (line 33) | class EAGLEDraftExtendNpuGraphRunner(EAGLEDraftExtendCudaGraphRunner): method __init__ (line 34) | def __init__(self, eagle_worker: EAGLEWorker): method _create_graph (line 37) | def _create_graph(self): method _cache_loc_dtype (line 40) | def _cache_loc_dtype(self): method _capture_init (line 43) | def _capture_init(self, run_once_fn): method _capture_graph (line 49) | def _capture_graph(self, graph, pool, stream, run_once_fn): method _replay_update (line 56) | def _replay_update(self, seq_lens): method _replay (line 61) | def _replay(self, forward_batch: ForwardBatch): FILE: python/sglang/srt/hardware_backend/npu/graph_runner/eagle_draft_npu_graph_runner.py class EAGLEDraftNpuGraphRunner (line 47) | class EAGLEDraftNpuGraphRunner(EAGLEDraftCudaGraphRunner): method __init__ (line 48) | def __init__(self, eagle_worker: EAGLEWorker): method _init_arch_map (line 54) | def _init_arch_map(self): method _create_graph (line 64) | def _create_graph(self): method _capture_init (line 67) | def _capture_init(self, run_once_fn): method _capture_graph (line 73) | def _capture_graph(self, graph, pool, stream, run_once_fn): method _get_update_attr_name (line 80) | def _get_update_attr_name(self): method _get_update_attr_type (line 83) | def _get_update_attr_type(self): method _replay_update (line 86) | def _replay_update(self, seq_lens): method _replay (line 94) | def _replay(self, forward_batch: ForwardBatch): method _cache_loc_dtype (line 108) | def _cache_loc_dtype(self): FILE: python/sglang/srt/hardware_backend/npu/graph_runner/npu_graph_runner.py function patch_model_npu (line 55) | def patch_model_npu( class NPUGraphRunner (line 73) | class NPUGraphRunner(CudaGraphRunner): method __init__ (line 76) | def __init__(self, model_runner: ModelRunner): method _init_arch_map (line 85) | def _init_arch_map(self): method _create_device_graph (line 101) | def _create_device_graph(self): method _capture_graph (line 104) | def _capture_graph(self, graph, pool, stream, run_once_fn): method _get_update_attr_name (line 119) | def _get_update_attr_name(self): method _get_update_attr_type (line 122) | def _get_update_attr_type(self): method _update_inputs (line 125) | def _update_inputs(self, seq_lens): method _cache_loc_dtype (line 133) | def _cache_loc_dtype(self): method _init_profile_context_and_memory_record (line 136) | def _init_profile_context_and_memory_record(self): method _post_process_after_profile (line 160) | def _post_process_after_profile(self, prof_context): method replay (line 164) | def replay( FILE: python/sglang/srt/hardware_backend/npu/graph_runner/vit_npu_graph_runner.py class ViTNpuGraphRunner (line 33) | class ViTNpuGraphRunner(ViTCudaGraphRunner): method __init__ (line 46) | def __init__( method device (line 58) | def device(self) -> torch.device: method dtype (line 62) | def dtype(self) -> torch.dtype: method _create_graph (line 65) | def _create_graph( method create_graph (line 128) | def create_graph( method replay (line 177) | def replay( method run (line 204) | def run( FILE: python/sglang/srt/hardware_backend/npu/memory_pool_npu.py class NPUMHATokenToKVPool (line 18) | class NPUMHATokenToKVPool(MHATokenToKVPool): method __init__ (line 20) | def __init__( method _create_buffers (line 51) | def _create_buffers(self): method get_contiguous_buf_infos (line 86) | def get_contiguous_buf_infos(self): method set_kv_buffer (line 112) | def set_kv_buffer( class NPUMLATokenToKVPool (line 167) | class NPUMLATokenToKVPool(MLATokenToKVPool): method __init__ (line 169) | def __init__( method get_kv_size_bytes (line 240) | def get_kv_size_bytes(self): method get_kv_buffer (line 254) | def get_kv_buffer(self, layer_id: int): method get_key_buffer (line 262) | def get_key_buffer(self, layer_id: int): method get_value_buffer (line 270) | def get_value_buffer(self, layer_id: int): method get_index_k_buffer (line 278) | def get_index_k_buffer(self, layer_id: int): method get_contiguous_buf_infos (line 287) | def get_contiguous_buf_infos(self): method set_kv_buffer (line 310) | def set_kv_buffer( method set_index_k_buffer (line 344) | def set_index_k_buffer( FILE: python/sglang/srt/hardware_backend/npu/modules/deepseek_v2_attention_mla_npu.py function forward_mha_prepare_npu (line 27) | def forward_mha_prepare_npu( function forward_mha_core_npu (line 132) | def forward_mha_core_npu( function forward_mla_prepare_npu (line 149) | def forward_mla_prepare_npu( function forward_mla_core_npu (line 255) | def forward_mla_core_npu( function forward_dsa_prepare_npu (line 297) | def forward_dsa_prepare_npu( function forward_dsa_core_npu (line 396) | def forward_dsa_core_npu( function npu_mla_preprocess (line 448) | def npu_mla_preprocess( FILE: python/sglang/srt/hardware_backend/npu/modules/qwen_vl_processor.py function npu_wrapper_preprocess (line 19) | def npu_wrapper_preprocess(func): function npu_apply_qwen_image_preprocess_patch (line 144) | def npu_apply_qwen_image_preprocess_patch(): FILE: python/sglang/srt/hardware_backend/npu/moe/topk.py function fused_topk_npu (line 16) | def fused_topk_npu( FILE: python/sglang/srt/hardware_backend/npu/quantization/fused_moe_method_npu.py function npu_fused_experts_w4a4 (line 17) | def npu_fused_experts_w4a4( function npu_fused_experts (line 103) | def npu_fused_experts( function npu_fused_experts_w8a8_decode (line 205) | def npu_fused_experts_w8a8_decode( function npu_fused_moe_without_routing_weights_bf16 (line 277) | def npu_fused_moe_without_routing_weights_bf16( function fused_moe_npu (line 308) | def fused_moe_npu( class _NPUFusedMoEMethodBase (line 387) | class _NPUFusedMoEMethodBase(FusedMoEMethodBase): method __init__ (line 389) | def __init__( class NPUW4A4Int4DynamicMoEMethod (line 396) | class NPUW4A4Int4DynamicMoEMethod(_NPUFusedMoEMethodBase): method process_weights_after_loading (line 398) | def process_weights_after_loading(self, layer: torch.nn.Module) -> None: method _pack_to_int32 (line 429) | def _pack_to_int32(self, weight: torch.Tensor): method apply (line 438) | def apply( class NPUW8A8Int8DynamicMoEMethod (line 464) | class NPUW8A8Int8DynamicMoEMethod(_NPUFusedMoEMethodBase): method process_weights_after_loading (line 466) | def process_weights_after_loading(self, layer: torch.nn.Module) -> None: method apply (line 493) | def apply( method apply_without_routing_weights (line 538) | def apply_without_routing_weights( class NPUW4A8Int8DynamicMoEMethod (line 586) | class NPUW4A8Int8DynamicMoEMethod(_NPUFusedMoEMethodBase): method _process_scale (line 588) | def _process_scale( method _update_bias (line 623) | def _update_bias(self, layer, w13_bias, w2_bias): method _pack_to_int32 (line 631) | def _pack_to_int32(self, weight: torch.Tensor): method process_weights_after_loading (line 638) | def process_weights_after_loading( method _process_weights_without_clip (line 659) | def _process_weights_without_clip( method _process_weights_with_clip (line 693) | def _process_weights_with_clip(self, layer: torch.nn.Module) -> None: method apply (line 707) | def apply( method apply_without_routing_weights (line 791) | def apply_without_routing_weights( class NPUW4A16Int4DynamicMoEMethod (line 835) | class NPUW4A16Int4DynamicMoEMethod(_NPUFusedMoEMethodBase): method _pack_to_int32 (line 837) | def _pack_to_int32(self, weight: torch.Tensor): method _unpack_from_int32 (line 858) | def _unpack_from_int32( method process_weights_after_loading (line 922) | def process_weights_after_loading(self, layer: torch.nn.Module) -> None: method apply (line 963) | def apply( method apply_without_routing_weights (line 991) | def apply_without_routing_weights( FILE: python/sglang/srt/hardware_backend/npu/quantization/linear_method_npu.py class _NPULinearMethodBase (line 12) | class _NPULinearMethodBase(LinearMethodBase): method __init__ (line 14) | def __init__( class NPUW8A8Int8LinearMethod (line 21) | class NPUW8A8Int8LinearMethod(_NPULinearMethodBase): method process_weights_after_loading (line 23) | def process_weights_after_loading(self, layer: torch.nn.Module): method apply (line 46) | def apply( class NPUW8A8Int8DynamicLinearMethod (line 79) | class NPUW8A8Int8DynamicLinearMethod(_NPULinearMethodBase): method process_weights_after_loading (line 81) | def process_weights_after_loading(self, layer: torch.nn.Module): method apply (line 90) | def apply( class NPU_W4A4DynamicLinearMethod (line 114) | class NPU_W4A4DynamicLinearMethod(_NPULinearMethodBase): method process_weights_after_loading (line 116) | def process_weights_after_loading(self, layer): method apply (line 125) | def apply( FILE: python/sglang/srt/hardware_backend/npu/utils.py class NPUACLFormat (line 19) | class NPUACLFormat(IntEnum): class FusedMoEMode (line 25) | class FusedMoEMode(IntEnum): function _call_once (line 30) | def _call_once(fn: Callable): function set_default_server_args (line 44) | def set_default_server_args(args: "ServerArgs"): function init_npu_backend (line 92) | def init_npu_backend(): function npu_format_cast (line 110) | def npu_format_cast( function get_indexer_weight_stream (line 142) | def get_indexer_weight_stream(): FILE: python/sglang/srt/layers/activation.py class SiluAndMul (line 63) | class SiluAndMul(MultiPlatformOp): method __init__ (line 64) | def __init__(self, *args, **kwargs): method forward_native (line 69) | def forward_native(self, x: torch.Tensor) -> torch.Tensor: method forward_cuda (line 73) | def forward_cuda(self, x: torch.Tensor) -> torch.Tensor: method forward_cpu (line 80) | def forward_cpu(self, x: torch.Tensor) -> torch.Tensor: method forward_npu (line 87) | def forward_npu(self, x: torch.Tensor) -> torch.Tensor: method forward_xpu (line 91) | def forward_xpu(self, x: torch.Tensor) -> torch.Tensor: class GeluAndMul (line 99) | class GeluAndMul(MultiPlatformOp): method __init__ (line 100) | def __init__(self, approximate="tanh"): method _forward_impl (line 104) | def _forward_impl(self, x: torch.Tensor) -> torch.Tensor: method forward_native (line 116) | def forward_native(self, x: torch.Tensor) -> torch.Tensor: method forward_cpu (line 120) | def forward_cpu(self, x: torch.Tensor) -> torch.Tensor: method forward_cuda (line 128) | def forward_cuda(self, x: torch.Tensor) -> torch.Tensor: method forward_xpu (line 131) | def forward_xpu(self, x: torch.Tensor) -> torch.Tensor: method forward_npu (line 134) | def forward_npu(self, x: torch.Tensor) -> torch.Tensor: class NewGELU (line 146) | class NewGELU(MultiPlatformOp): method forward_native (line 147) | def forward_native(self, x: torch.Tensor) -> torch.Tensor: method forward_cuda (line 151) | def forward_cuda(self, x: torch.Tensor) -> torch.Tensor: class ReLU2 (line 156) | class ReLU2(nn.Module): method forward (line 162) | def forward(self, x: torch.Tensor) -> torch.Tensor: class QuickGELU (line 167) | class QuickGELU(MultiPlatformOp): method forward_native (line 168) | def forward_native(self, x: torch.Tensor) -> torch.Tensor: method forward_cuda (line 171) | def forward_cuda(self, x: torch.Tensor) -> torch.Tensor: method forward_hip (line 174) | def forward_hip(self, x: torch.Tensor) -> torch.Tensor: method forward_npu (line 179) | def forward_npu(self, x: torch.Tensor) -> torch.Tensor: class XIELU (line 183) | class XIELU(MultiPlatformOp): method __init__ (line 190) | def __init__( method _xielu_python (line 244) | def _xielu_python(self, x: torch.Tensor) -> torch.Tensor: method _xielu_cuda (line 253) | def _xielu_cuda(self, x: torch.Tensor) -> torch.Tensor: method forward (line 282) | def forward(self, input: torch.Tensor) -> torch.Tensor: class ScaledActivation (line 293) | class ScaledActivation(nn.Module): method __init__ (line 299) | def __init__( method forward (line 321) | def forward(self, x: torch.Tensor) -> torch.Tensor: method weight_loader (line 324) | def weight_loader(self, param: nn.Parameter, loaded_weight: torch.Tens... function get_act_fn (line 344) | def get_act_fn( function get_cross_encoder_activation_function (line 369) | def get_cross_encoder_activation_function(config: PretrainedConfig): FILE: python/sglang/srt/layers/amx_utils.py class CPUQuantMethod (line 12) | class CPUQuantMethod(IntEnum): function amx_process_weight_after_loading (line 19) | def amx_process_weight_after_loading(weight, is_conv=False): function dim_is_supported (line 35) | def dim_is_supported(weight): function dtype_is_supported (line 46) | def dtype_is_supported(weight): function is_dim_conv_weight (line 55) | def is_dim_conv_weight(weight): function _init_amx_conv_state (line 59) | def _init_amx_conv_state(conv_state): function _amx_process_weight_after_loading (line 76) | def _amx_process_weight_after_loading( class PackWeightMethod (line 131) | class PackWeightMethod: method __init__ (line 132) | def __init__(self, weight_names, transpose_dims=None): method process_weights_after_loading (line 136) | def process_weights_after_loading(self, module) -> None: FILE: python/sglang/srt/layers/attention/aiter_backend.py class WrapperDispatch (line 79) | class WrapperDispatch(Enum): class ForwardMetadata (line 85) | class ForwardMetadata: class AiterAttnBackend (line 112) | class AiterAttnBackend(AttentionBackend): method __init__ (line 113) | def __init__( method make_mla_decode_meta_data_buffer (line 268) | def make_mla_decode_meta_data_buffer(self, max_seqlen_qo, batch_size): method make_mla_meta_data (line 331) | def make_mla_meta_data( method make_mla_prefill_ps_meta_data_buffer (line 375) | def make_mla_prefill_ps_meta_data_buffer( method make_mla_prefill_ps_meta_data (line 419) | def make_mla_prefill_ps_meta_data( method _transform_table_1_to_real (line 464) | def _transform_table_1_to_real(self, page_table: torch.Tensor) -> torc... method _resolve_v2_num_draft_tokens (line 474) | def _resolve_v2_num_draft_tokens( method _get_kv_indices_scratch (line 509) | def _get_kv_indices_scratch( method _set_uniform_qo_indptr (line 522) | def _set_uniform_qo_indptr( method _ensure_spec_v2_topk_supported (line 535) | def _ensure_spec_v2_topk_supported(self): method mla_fp8_prefill_attn (line 542) | def mla_fp8_prefill_attn( method init_forward_metadata (line 617) | def init_forward_metadata(self, forward_batch: ForwardBatch): method init_cuda_graph_state (line 1146) | def init_cuda_graph_state( method init_forward_metadata_capture_cuda_graph (line 1207) | def init_forward_metadata_capture_cuda_graph( method init_forward_metadata_replay_cuda_graph (line 1585) | def init_forward_metadata_replay_cuda_graph( method get_cuda_graph_seq_len_fill_value (line 1954) | def get_cuda_graph_seq_len_fill_value(self): method update_verify_buffers_to_fill_after_draft (line 1957) | def update_verify_buffers_to_fill_after_draft( method forward_extend (line 1965) | def forward_extend( method forward_decode (line 2366) | def forward_decode( class AiterIndicesUpdaterPrefill (line 2527) | class AiterIndicesUpdaterPrefill: method __init__ (line 2528) | def __init__(self, model_runner: ModelRunner, attn_backend: AttentionB... method update (line 2553) | def update( method update_single_wrapper (line 2565) | def update_single_wrapper( class AiterMlaIndicesUpdaterPrefill (line 2632) | class AiterMlaIndicesUpdaterPrefill: method __init__ (line 2633) | def __init__(self, model_runner: ModelRunner, attn_backend: AttentionB... method update (line 2648) | def update( method update_single_wrapper (line 2661) | def update_single_wrapper( class AiterMultiStepDraftBackend (line 2714) | class AiterMultiStepDraftBackend: method __init__ (line 2720) | def __init__( method common_template (line 2759) | def common_template( method init_forward_metadata (line 2791) | def init_forward_metadata(self, forward_batch: ForwardBatch): method init_cuda_graph_state (line 2812) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int): method init_forward_metadata_capture_cuda_graph (line 2823) | def init_forward_metadata_capture_cuda_graph(self, forward_batch: Forw... method init_forward_metadata_replay_cuda_graph (line 2837) | def init_forward_metadata_replay_cuda_graph( FILE: python/sglang/srt/layers/attention/attention_registry.py function register_attention_backend (line 15) | def register_attention_backend(name): function create_flashinfer_backend (line 24) | def create_flashinfer_backend(runner): function create_trtllm_mla_backend (line 49) | def create_trtllm_mla_backend(runner): function create_aiter_backend (line 58) | def create_aiter_backend(runner): function create_wave_backend (line 65) | def create_wave_backend(runner): function create_ascend_backend (line 72) | def create_ascend_backend(runner): function create_nsa_backend (line 81) | def create_nsa_backend(runner): function create_triton_backend (line 88) | def create_triton_backend(runner): function create_torch_native_backend (line 106) | def create_torch_native_backend(runner): function create_flex_attention_backend (line 113) | def create_flex_attention_backend(runner): function create_flashmla_backend (line 120) | def create_flashmla_backend(runner): function create_flashattention_v3_backend (line 127) | def create_flashattention_v3_backend(runner): function create_flashattention_v4_backend (line 142) | def create_flashattention_v4_backend(runner): function create_cutlass_mla_backend (line 149) | def create_cutlass_mla_backend(runner): function create_trtllm_mha_backend (line 156) | def create_trtllm_mha_backend(runner): function create_intel_amx_backend (line 165) | def create_intel_amx_backend(runner): function create_dual_chunk_flash_attn_backend (line 172) | def create_dual_chunk_flash_attn_backend(runner): function attn_backend_wrapper (line 180) | def attn_backend_wrapper(runner: "ModelRunner", full_attn_backend: "Atte... function create_intel_xpu_backend (line 240) | def create_intel_xpu_backend(runner): FILE: python/sglang/srt/layers/attention/base_attn_backend.py class AttentionBackend (line 17) | class AttentionBackend(ABC): method init_forward_metadata (line 21) | def init_forward_metadata(self, forward_batch: ForwardBatch): method init_cuda_graph_state (line 25) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int): method init_forward_metadata_capture_cuda_graph (line 29) | def init_forward_metadata_capture_cuda_graph( method init_forward_metadata_replay_cuda_graph (line 42) | def init_forward_metadata_replay_cuda_graph( method get_cuda_graph_seq_len_fill_value (line 56) | def get_cuda_graph_seq_len_fill_value(self): method get_verify_buffers_to_fill_after_draft (line 60) | def get_verify_buffers_to_fill_after_draft(self): method update_verify_buffers_to_fill_after_draft (line 68) | def update_verify_buffers_to_fill_after_draft( method forward (line 79) | def forward( method forward_decode (line 123) | def forward_decode( method forward_extend (line 135) | def forward_extend( method forward_mixed (line 147) | def forward_mixed( method support_triton (line 159) | def support_triton(self): method get_indexer_metadata (line 163) | def get_indexer_metadata( FILE: python/sglang/srt/layers/attention/cutlass_mla_backend.py class CutlassMLADecodeMetadata (line 35) | class CutlassMLADecodeMetadata: method __init__ (line 39) | def __init__( class CutlassMLABackend (line 48) | class CutlassMLABackend(FlashInferMLAAttnBackend): method __init__ (line 51) | def __init__( method init_forward_metadata (line 82) | def init_forward_metadata(self, forward_batch: ForwardBatch): method init_cuda_graph_state (line 122) | def init_cuda_graph_state( method init_forward_metadata_capture_cuda_graph (line 146) | def init_forward_metadata_capture_cuda_graph( method init_forward_metadata_replay_cuda_graph (line 185) | def init_forward_metadata_replay_cuda_graph( method get_cuda_graph_seq_len_fill_value (line 223) | def get_cuda_graph_seq_len_fill_value(self): method forward_decode (line 226) | def forward_decode( FILE: python/sglang/srt/layers/attention/double_sparsity_backend.py class DoubleSparseAttnBackend (line 16) | class DoubleSparseAttnBackend(AttentionBackend): method __init__ (line 17) | def __init__(self, model_runner: ModelRunner): method init_forward_metadata (line 52) | def init_forward_metadata(self, forward_batch: ForwardBatch): method forward_extend (line 113) | def forward_extend( method forward_decode (line 167) | def forward_decode( FILE: python/sglang/srt/layers/attention/dual_chunk_flashattention_backend.py class DualChunkFlashAttentionMetadata (line 33) | class DualChunkFlashAttentionMetadata: class DualChunkFlashAttentionBackend (line 101) | class DualChunkFlashAttentionBackend(AttentionBackend): method __init__ (line 102) | def __init__( method get_sparse_attention_config (line 160) | def get_sparse_attention_config(self, layer_idx) -> List[Dict[str, Any]]: method init_forward_metadata (line 168) | def init_forward_metadata(self, forward_batch: ForwardBatch): method forward_extend (line 296) | def forward_extend( method forward_decode (line 409) | def forward_decode( method init_cuda_graph_state (line 486) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int): method init_forward_metadata_capture_cuda_graph (line 532) | def init_forward_metadata_capture_cuda_graph( method init_forward_metadata_replay_cuda_graph (line 580) | def init_forward_metadata_replay_cuda_graph( method get_cuda_graph_seq_len_fill_value (line 670) | def get_cuda_graph_seq_len_fill_value(self): method _dual_chunk_flash_attn_prefill (line 674) | def _dual_chunk_flash_attn_prefill( method _dual_chunk_flash_attn_prefill_func (line 831) | def _dual_chunk_flash_attn_prefill_func( method _do_flash_attn (line 1329) | def _do_flash_attn( method _merge_attn_outputs (line 1424) | def _merge_attn_outputs( method _dual_chunk_flash_attn_decoding (line 1466) | def _dual_chunk_flash_attn_decoding( method _dual_chunk_flash_attn_decoding_with_exp_sums (line 1560) | def _dual_chunk_flash_attn_decoding_with_exp_sums( function _vertical_slash_sparse_attention (line 1586) | def _vertical_slash_sparse_attention( function _sum_all_diagonal_matrix (line 1682) | def _sum_all_diagonal_matrix(mat: torch.tensor): function _get_block (line 1697) | def _get_block(block_table: torch.Tensor, block_size: int, begin: int, e... FILE: python/sglang/srt/layers/attention/fla/chunk.py function chunk_gated_delta_rule_fwd (line 26) | def chunk_gated_delta_rule_fwd( class ChunkGatedDeltaRuleFunction (line 75) | class ChunkGatedDeltaRuleFunction(torch.autograd.Function): method forward (line 80) | def forward( function chunk_gated_delta_rule (line 115) | def chunk_gated_delta_rule( FILE: python/sglang/srt/layers/attention/fla/chunk_delta_h.py function chunk_gated_delta_rule_fwd_kernel_h_blockdim64 (line 33) | def chunk_gated_delta_rule_fwd_kernel_h_blockdim64( function chunk_gated_delta_rule_fwd_h (line 274) | def chunk_gated_delta_rule_fwd_h( FILE: python/sglang/srt/layers/attention/fla/chunk_o.py function chunk_fwd_kernel_o (line 30) | def chunk_fwd_kernel_o( function chunk_fwd_o (line 126) | def chunk_fwd_o( FILE: python/sglang/srt/layers/attention/fla/chunk_scaled_dot_kkt.py function chunk_scaled_dot_kkt_fwd_kernel (line 25) | def chunk_scaled_dot_kkt_fwd_kernel( function chunk_scaled_dot_kkt_fwd (line 89) | def chunk_scaled_dot_kkt_fwd( FILE: python/sglang/srt/layers/attention/fla/cumsum.py function chunk_local_cumsum_scalar_kernel (line 22) | def chunk_local_cumsum_scalar_kernel( function chunk_local_cumsum_vector_kernel (line 80) | def chunk_local_cumsum_vector_kernel( function chunk_local_cumsum_scalar (line 158) | def chunk_local_cumsum_scalar( function chunk_local_cumsum_vector (line 201) | def chunk_local_cumsum_vector( function chunk_local_cumsum (line 253) | def chunk_local_cumsum( FILE: python/sglang/srt/layers/attention/fla/fused_gdn_gating.py function fused_gdn_gating_kernel (line 11) | def fused_gdn_gating_kernel( function fused_gdn_gating (line 42) | def fused_gdn_gating( FILE: python/sglang/srt/layers/attention/fla/fused_norm_gate.py function layer_norm_gated_fwd_kernel (line 26) | def layer_norm_gated_fwd_kernel( function layer_norm_gated_fwd_kernel1 (line 106) | def layer_norm_gated_fwd_kernel1( function layer_norm_gated_fwd (line 173) | def layer_norm_gated_fwd( class LayerNormGatedFunction (line 266) | class LayerNormGatedFunction(torch.autograd.Function): method forward (line 268) | def forward( function rms_norm_gated (line 318) | def rms_norm_gated( class FusedRMSNormGated (line 343) | class FusedRMSNormGated(nn.Module): method __init__ (line 344) | def __init__( method forward (line 370) | def forward( FILE: python/sglang/srt/layers/attention/fla/fused_recurrent.py function fused_recurrent_gated_delta_rule_fwd_kernel (line 16) | def fused_recurrent_gated_delta_rule_fwd_kernel( function fused_recurrent_gated_delta_rule_fwd (line 124) | def fused_recurrent_gated_delta_rule_fwd( function fused_recurrent_gated_delta_rule_packed_decode_kernel (line 186) | def fused_recurrent_gated_delta_rule_packed_decode_kernel( function fused_recurrent_gated_delta_rule_packed_decode (line 268) | def fused_recurrent_gated_delta_rule_packed_decode( class FusedRecurrentFunction (line 405) | class FusedRecurrentFunction(torch.autograd.Function): method forward (line 409) | def forward( method backward (line 439) | def backward(ctx, do, dht): function fused_recurrent_gated_delta_rule (line 447) | def fused_recurrent_gated_delta_rule( function fused_recurrent_gated_delta_rule_update_fwd_kernel (line 565) | def fused_recurrent_gated_delta_rule_update_fwd_kernel( function fused_recurrent_gated_delta_rule_update_fwd (line 743) | def fused_recurrent_gated_delta_rule_update_fwd( class FusedRecurrentUpdateFunction (line 829) | class FusedRecurrentUpdateFunction(torch.autograd.Function): method forward (line 833) | def forward( method backward (line 875) | def backward(ctx, do, dht): function fused_recurrent_gated_delta_rule_update (line 883) | def fused_recurrent_gated_delta_rule_update( FILE: python/sglang/srt/layers/attention/fla/fused_sigmoid_gating_recurrent.py function fused_sigmoid_gating_delta_rule_update_kernel (line 9) | def fused_sigmoid_gating_delta_rule_update_kernel( function fused_sigmoid_gating_delta_rule_update (line 243) | def fused_sigmoid_gating_delta_rule_update( FILE: python/sglang/srt/layers/attention/fla/index.py function prepare_lens (line 12) | def prepare_lens(cu_seqlens: torch.LongTensor) -> torch.LongTensor: function prepare_chunk_indices (line 17) | def prepare_chunk_indices( function prepare_chunk_offsets (line 30) | def prepare_chunk_offsets( FILE: python/sglang/srt/layers/attention/fla/kda.py function cdiv (line 27) | def cdiv(a: int, b: int) -> int: function next_power_of_2 (line 32) | def next_power_of_2(n: int) -> int: function fused_recurrent_kda_fwd (line 39) | def fused_recurrent_kda_fwd( function fused_recurrent_kda (line 120) | def fused_recurrent_kda( function rms_norm_gated (line 159) | def rms_norm_gated( function chunk_kda_scaled_dot_kkt_fwd_kernel_intra_sub_inter (line 207) | def chunk_kda_scaled_dot_kkt_fwd_kernel_intra_sub_inter( function chunk_kda_scaled_dot_kkt_fwd_kernel_intra_sub_intra (line 312) | def chunk_kda_scaled_dot_kkt_fwd_kernel_intra_sub_intra( function chunk_kda_scaled_dot_kkt_fwd (line 402) | def chunk_kda_scaled_dot_kkt_fwd( function recompute_w_u_fwd_kernel (line 497) | def recompute_w_u_fwd_kernel( function recompute_w_u_fwd (line 640) | def recompute_w_u_fwd( function chunk_gla_fwd_kernel_o (line 701) | def chunk_gla_fwd_kernel_o( function chunk_gla_fwd_o_gk (line 809) | def chunk_gla_fwd_o_gk( function chunk_kda_fwd (line 853) | def chunk_kda_fwd( function chunk_kda (line 912) | def chunk_kda( function kda_gate_fwd_kernel (line 956) | def kda_gate_fwd_kernel( function fused_kda_gate (line 1018) | def fused_kda_gate( FILE: python/sglang/srt/layers/attention/fla/l2norm.py function l2norm_fwd_kernel1 (line 24) | def l2norm_fwd_kernel1( function l2norm_fwd_kernel (line 55) | def l2norm_fwd_kernel( function l2norm_fwd (line 74) | def l2norm_fwd( class L2NormFunction (line 125) | class L2NormFunction(torch.autograd.Function): method forward (line 129) | def forward(ctx, x, eps=1e-6, output_dtype=None): function l2norm (line 133) | def l2norm( class L2Norm (line 142) | class L2Norm(nn.Module): method __init__ (line 144) | def __init__(self, eps: float = 1e-6, output_dtype: Optional[torch.dty... method forward (line 149) | def forward(self, x: torch.Tensor) -> torch.Tensor: FILE: python/sglang/srt/layers/attention/fla/layernorm_gated.py function rms_norm_ref (line 34) | def rms_norm_ref( function _layer_norm_fwd_1pass_kernel (line 68) | def _layer_norm_fwd_1pass_kernel( function _get_sm_count (line 173) | def _get_sm_count(device: torch.device) -> int: function calc_rows_per_block (line 179) | def calc_rows_per_block(M: int, device: torch.device) -> int: function _layer_norm_fwd (line 194) | def _layer_norm_fwd( function rms_norm_gated (line 274) | def rms_norm_gated( class LayerNormFn (line 317) | class LayerNormFn(torch.autograd.Function): method forward (line 320) | def forward( function layernorm_fn (line 345) | def layernorm_fn( class LayerNorm (line 361) | class LayerNorm(torch.nn.Module): method __init__ (line 363) | def __init__( method reset_parameters (line 385) | def reset_parameters(self): method forward (line 389) | def forward(self, x, z=None): class RMSNorm (line 403) | class RMSNorm(torch.nn.Module): method __init__ (line 405) | def __init__( method reset_parameters (line 428) | def reset_parameters(self): method forward (line 431) | def forward(self, x, z=None): FILE: python/sglang/srt/layers/attention/fla/op.py function safe_exp (line 26) | def safe_exp(x): function gather (line 33) | def gather(src, index, axis, _builder=None): function make_tensor_descriptor (line 59) | def make_tensor_descriptor( FILE: python/sglang/srt/layers/attention/fla/solve_tril.py function solve_tril_16x16_kernel (line 24) | def solve_tril_16x16_kernel( function merge_16x16_to_32x32_inverse_kernel (line 81) | def merge_16x16_to_32x32_inverse_kernel( function merge_16x16_to_64x64_inverse_kernel (line 160) | def merge_16x16_to_64x64_inverse_kernel( function solve_tril (line 394) | def solve_tril( FILE: python/sglang/srt/layers/attention/fla/utils.py function check_environments (line 26) | def check_environments(): function get_abs_err (line 63) | def get_abs_err(x, y): function get_err_ratio (line 67) | def get_err_ratio(x, y): function assert_close (line 73) | def assert_close(prefix, ref, tri, ratio, warning=False, err_atol=1e-6): function tensor_cache (line 92) | def tensor_cache(fn: Callable[..., torch.Tensor]) -> Callable[..., torch... function input_guard (line 134) | def input_guard(fn: Callable[..., torch.Tensor]) -> Callable[..., torch.... function require_version (line 174) | def require_version(version, hint): function checkpoint (line 202) | def checkpoint(fn): function _cpu_device_warning (line 209) | def _cpu_device_warning(): function get_multiprocessor_count (line 218) | def get_multiprocessor_count(tensor_idx: int = 0) -> int: function get_available_device (line 229) | def get_available_device() -> str: function _check_platform (line 238) | def _check_platform() -> Literal["nvidia", "amd", "intel", "musa"]: function get_all_max_shared_mem (line 272) | def get_all_max_shared_mem(): class Backend (line 285) | class Backend(Enum): method get_shared_memory (line 292) | def get_shared_memory(cls, arch: str) -> int: function check_shared_mem (line 300) | def check_shared_mem(arch: str = "none", tensor_idx: int = 0) -> bool: function custom_device_ctx (line 314) | def custom_device_ctx(index: int): function custom_device_ctx (line 324) | def custom_device_ctx(index: int): FILE: python/sglang/srt/layers/attention/fla/wy_fast.py function recompute_w_u_fwd_kernel (line 23) | def recompute_w_u_fwd_kernel( function recompute_w_u_fwd (line 111) | def recompute_w_u_fwd( FILE: python/sglang/srt/layers/attention/flashattention_backend.py class FlashAttentionMetadata (line 40) | class FlashAttentionMetadata: class LocalAttentionMetadata (line 75) | class LocalAttentionMetadata: function make_local_attention_virtual_batches (line 142) | def make_local_attention_virtual_batches( function cdiv (line 294) | def cdiv(a: int, b: int) -> int: function merge_state_v2_wrapper (line 301) | def merge_state_v2_wrapper(o, s_a, o_exp, s_b): class FlashAttentionBackend (line 305) | class FlashAttentionBackend(AttentionBackend): method __init__ (line 323) | def __init__( method init_forward_metadata (line 400) | def init_forward_metadata(self, forward_batch: ForwardBatch): method forward_extend (line 735) | def forward_extend( method forward_decode (line 1075) | def forward_decode( method init_cuda_graph_state (line 1358) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int): method init_forward_metadata_capture_cuda_graph (line 1645) | def init_forward_metadata_capture_cuda_graph( method init_forward_metadata_replay_cuda_graph (line 1897) | def init_forward_metadata_replay_cuda_graph( method get_cuda_graph_seq_len_fill_value (line 2258) | def get_cuda_graph_seq_len_fill_value(self): method _maybe_init_local_attn_metadata (line 2262) | def _maybe_init_local_attn_metadata( method _maybe_update_local_attn_metadata_for_capture (line 2306) | def _maybe_update_local_attn_metadata_for_capture( method _maybe_update_local_attn_metadata_for_replay (line 2365) | def _maybe_update_local_attn_metadata_for_replay( method _init_sliding_window_attn_spec_metadata (line 2445) | def _init_sliding_window_attn_spec_metadata( function _prepare_swa_spec_page_table_kernel (line 2498) | def _prepare_swa_spec_page_table_kernel( function prepare_swa_spec_page_table_triton (line 2559) | def prepare_swa_spec_page_table_triton( class FlashAttentionMultiStepBackend (line 2599) | class FlashAttentionMultiStepBackend: method __init__ (line 2601) | def __init__( method init_forward_metadata (line 2618) | def init_forward_metadata(self, forward_batch: ForwardBatch): method init_cuda_graph_state (line 2622) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int): method init_forward_metadata_capture_cuda_graph (line 2626) | def init_forward_metadata_capture_cuda_graph( method init_forward_metadata_replay_cuda_graph (line 2644) | def init_forward_metadata_replay_cuda_graph( function normal_decode_set_metadata (line 2669) | def normal_decode_set_metadata( function draft_decode_set_expand_metadata (line 2698) | def draft_decode_set_expand_metadata( FILE: python/sglang/srt/layers/attention/flashinfer_backend.py class WrapperDispatch (line 57) | class WrapperDispatch(Enum): class MultiItemScoringParams (line 63) | class MultiItemScoringParams: method is_enabled (line 87) | def is_enabled(self) -> bool: class DecodeMetadata (line 93) | class DecodeMetadata: class PrefillMetadata (line 98) | class PrefillMetadata: class FlashInferAttnBackend (line 113) | class FlashInferAttnBackend(AttentionBackend): method __init__ (line 116) | def __init__( method _process_multi_item_scoring (line 303) | def _process_multi_item_scoring( method init_forward_metadata (line 425) | def init_forward_metadata(self, forward_batch: ForwardBatch): method init_cuda_graph_state (line 514) | def init_cuda_graph_state( method init_forward_metadata_capture_cuda_graph (line 548) | def init_forward_metadata_capture_cuda_graph( method init_forward_metadata_replay_cuda_graph (line 686) | def init_forward_metadata_replay_cuda_graph( method get_cuda_graph_seq_len_fill_value (line 748) | def get_cuda_graph_seq_len_fill_value(self): method forward_extend (line 751) | def forward_extend( method forward_decode (line 865) | def forward_decode( method _get_wrapper_idx (line 903) | def _get_wrapper_idx(self, layer: RadixAttention): class FlashInferIndicesUpdaterDecode (line 915) | class FlashInferIndicesUpdaterDecode: method __init__ (line 916) | def __init__(self, model_runner: ModelRunner, attn_backend: FlashInfer... method update (line 945) | def update( method update_single_wrapper (line 960) | def update_single_wrapper( method update_sliding_window (line 986) | def update_sliding_window( method update_cross_attention (line 1036) | def update_cross_attention( method call_begin_forward (line 1070) | def call_begin_forward( class FlashInferIndicesUpdaterPrefill (line 1175) | class FlashInferIndicesUpdaterPrefill: method __init__ (line 1176) | def __init__(self, model_runner: ModelRunner, attn_backend: FlashInfer... method update (line 1207) | def update( method update_single_wrapper (line 1223) | def update_single_wrapper( method update_sliding_window (line 1263) | def update_sliding_window( method update_cross_attention (line 1312) | def update_cross_attention( method call_begin_forward (line 1354) | def call_begin_forward( class FlashInferMultiStepDraftBackend (line 1463) | class FlashInferMultiStepDraftBackend: method __init__ (line 1469) | def __init__( method common_template (line 1510) | def common_template( method init_forward_metadata (line 1555) | def init_forward_metadata(self, forward_batch: ForwardBatch): method init_cuda_graph_state (line 1576) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int): method init_forward_metadata_capture_cuda_graph (line 1588) | def init_forward_metadata_capture_cuda_graph(self, forward_batch: Forw... method init_forward_metadata_replay_cuda_graph (line 1602) | def init_forward_metadata_replay_cuda_graph( function should_use_tensor_core (line 1620) | def should_use_tensor_core( FILE: python/sglang/srt/layers/attention/flashinfer_mla_backend.py class DecodeMetadata (line 56) | class DecodeMetadata: class PrefillMetadata (line 61) | class PrefillMetadata: class FlashInferMhaChunkKVRunner (line 70) | class FlashInferMhaChunkKVRunner: method __init__ (line 71) | def __init__( method update_prefix_chunks (line 93) | def update_prefix_chunks(self, num_prefix_chunks: int): method update_wrapper (line 100) | def update_wrapper( method forward (line 153) | def forward( class FlashInferMLAAttnBackend (line 191) | class FlashInferMLAAttnBackend(AttentionBackend): method __init__ (line 194) | def __init__( method init_forward_metadata (line 288) | def init_forward_metadata(self, forward_batch: ForwardBatch): method init_cuda_graph_state (line 342) | def init_cuda_graph_state( method init_forward_metadata_capture_cuda_graph (line 373) | def init_forward_metadata_capture_cuda_graph( method init_forward_metadata_replay_cuda_graph (line 453) | def init_forward_metadata_replay_cuda_graph( method get_cuda_graph_seq_len_fill_value (line 510) | def get_cuda_graph_seq_len_fill_value(self): method init_mha_chunk_metadata (line 513) | def init_mha_chunk_metadata( method forward_extend (line 519) | def forward_extend( method forward_decode (line 595) | def forward_decode( class FlashInferMLAIndicesUpdaterDecode (line 656) | class FlashInferMLAIndicesUpdaterDecode: method __init__ (line 657) | def __init__(self, model_runner: ModelRunner, attn_backend: AttentionB... method update (line 674) | def update( method call_begin_forward (line 697) | def call_begin_forward( class FlashInferMLAIndicesUpdaterPrefill (line 765) | class FlashInferMLAIndicesUpdaterPrefill: method __init__ (line 766) | def __init__(self, model_runner: ModelRunner, attn_backend: AttentionB... method update (line 786) | def update( method call_begin_forward (line 817) | def call_begin_forward( class FlashInferMLAMultiStepDraftBackend (line 898) | class FlashInferMLAMultiStepDraftBackend: method __init__ (line 904) | def __init__( method common_template (line 950) | def common_template( method init_forward_metadata (line 988) | def init_forward_metadata(self, forward_batch: ForwardBatch): method init_cuda_graph_state (line 1009) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int): method init_forward_metadata_capture_cuda_graph (line 1021) | def init_forward_metadata_capture_cuda_graph(self, forward_batch: Forw... method init_forward_metadata_replay_cuda_graph (line 1035) | def init_forward_metadata_replay_cuda_graph( function fast_mla_decode_plan (line 1053) | def fast_mla_decode_plan( FILE: python/sglang/srt/layers/attention/flashmla_backend.py class FlashMLADecodeMetadata (line 30) | class FlashMLADecodeMetadata: method __init__ (line 35) | def __init__( class FlashMLABackend (line 46) | class FlashMLABackend(FlashInferMLAAttnBackend): method __init__ (line 47) | def __init__( method init_forward_metadata (line 87) | def init_forward_metadata(self, forward_batch: ForwardBatch): method init_cuda_graph_state (line 153) | def init_cuda_graph_state( method init_forward_metadata_capture_cuda_graph (line 186) | def init_forward_metadata_capture_cuda_graph( method init_forward_metadata_replay_cuda_graph (line 285) | def init_forward_metadata_replay_cuda_graph( method get_cuda_graph_seq_len_fill_value (line 396) | def get_cuda_graph_seq_len_fill_value(self): method forward_decode (line 399) | def forward_decode( method forward_extend (line 471) | def forward_extend( class FlashMLAMultiStepDraftBackend (line 547) | class FlashMLAMultiStepDraftBackend: method __init__ (line 548) | def __init__( method common_template (line 581) | def common_template( method init_forward_metadata (line 591) | def init_forward_metadata(self, forward_batch: ForwardBatch): method init_cuda_graph_state (line 598) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int): method init_forward_metadata_capture_cuda_graph (line 604) | def init_forward_metadata_capture_cuda_graph(self, forward_batch: Forw... method init_forward_metadata_replay_cuda_graph (line 622) | def init_forward_metadata_replay_cuda_graph( FILE: python/sglang/srt/layers/attention/hybrid_attn_backend.py class HybridAttnBackend (line 13) | class HybridAttnBackend(AttentionBackend): method __init__ (line 16) | def __init__( method _select_backend (line 27) | def _select_backend(self, forward_mode: ForwardMode) -> AttentionBackend: method init_forward_metadata (line 53) | def init_forward_metadata(self, forward_batch: ForwardBatch): method init_cuda_graph_state (line 57) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int): method init_forward_metadata_capture_cuda_graph (line 67) | def init_forward_metadata_capture_cuda_graph( method init_forward_metadata_replay_cuda_graph (line 88) | def init_forward_metadata_replay_cuda_graph( method get_cuda_graph_seq_len_fill_value (line 111) | def get_cuda_graph_seq_len_fill_value(self): method forward (line 114) | def forward( method forward_decode (line 142) | def forward_decode( method forward_extend (line 156) | def forward_extend( method get_indexer_metadata (line 171) | def get_indexer_metadata( method forward (line 177) | def forward( FILE: python/sglang/srt/layers/attention/hybrid_linear_attn_backend.py function track_mamba_state_if_needed_kernel (line 37) | def track_mamba_state_if_needed_kernel( function track_mamba_states_if_needed (line 96) | def track_mamba_states_if_needed( class MambaAttnBackendBase (line 137) | class MambaAttnBackendBase(AttentionBackend): method __init__ (line 138) | def __init__(self, model_runner: ModelRunner): method _forward_metadata (line 153) | def _forward_metadata(self, forward_batch: ForwardBatch): method init_forward_metadata (line 233) | def init_forward_metadata(self, forward_batch: ForwardBatch): method _init_track_conv_indices (line 236) | def _init_track_conv_indices( method _init_track_ssm_indices (line 278) | def _init_track_ssm_indices( method init_forward_metadata_capture_cuda_graph (line 358) | def init_forward_metadata_capture_cuda_graph( method init_forward_metadata_replay_cuda_graph (line 372) | def init_forward_metadata_replay_cuda_graph( method init_forward_metadata_capture_cpu_graph (line 387) | def init_forward_metadata_capture_cpu_graph( method init_cuda_graph_state (line 401) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int): method init_cpu_graph_state (line 441) | def init_cpu_graph_state(self, max_bs: int, max_num_tokens: int): method _capture_metadata (line 458) | def _capture_metadata( method _replay_metadata (line 496) | def _replay_metadata( method get_cuda_graph_seq_len_fill_value (line 561) | def get_cuda_graph_seq_len_fill_value(self): method get_cpu_graph_seq_len_fill_value (line 564) | def get_cpu_graph_seq_len_fill_value(self): method _track_mamba_state_decode (line 567) | def _track_mamba_state_decode( method _track_mamba_state_extend (line 597) | def _track_mamba_state_extend( class Mamba2AttnBackend (line 632) | class Mamba2AttnBackend(MambaAttnBackendBase): method __init__ (line 635) | def __init__(self, model_runner: ModelRunner): method init_forward_metadata (line 641) | def init_forward_metadata(self, forward_batch: ForwardBatch): method init_forward_metadata_capture_cuda_graph (line 649) | def init_forward_metadata_capture_cuda_graph( method init_forward_metadata_replay_cuda_graph (line 668) | def init_forward_metadata_replay_cuda_graph( method forward (line 690) | def forward( method forward_decode (line 710) | def forward_decode(self, *args, **kwargs): method forward_extend (line 715) | def forward_extend(self, *args, **kwargs): class HybridLinearAttnBackend (line 721) | class HybridLinearAttnBackend(AttentionBackend): method __init__ (line 724) | def __init__( method init_forward_metadata (line 735) | def init_forward_metadata(self, forward_batch: ForwardBatch): method init_cuda_graph_state (line 739) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int): method init_cpu_graph_state (line 743) | def init_cpu_graph_state(self, max_bs: int, max_num_tokens: int): method init_forward_metadata_capture_cuda_graph (line 747) | def init_forward_metadata_capture_cuda_graph( method init_forward_metadata_capture_cpu_graph (line 768) | def init_forward_metadata_capture_cpu_graph( method init_forward_metadata_replay_cuda_graph (line 789) | def init_forward_metadata_replay_cuda_graph( method get_cuda_graph_seq_len_fill_value (line 812) | def get_cuda_graph_seq_len_fill_value(self): method get_cpu_graph_seq_len_fill_value (line 815) | def get_cpu_graph_seq_len_fill_value(self): method forward_decode (line 818) | def forward_decode( method forward_extend (line 850) | def forward_extend( method forward (line 882) | def forward( method update_mamba_state_after_mtp_verify (line 931) | def update_mamba_state_after_mtp_verify( FILE: python/sglang/srt/layers/attention/intel_amx_backend.py class IntelAMXAttnBackend (line 15) | class IntelAMXAttnBackend(AttentionBackend): method __init__ (line 16) | def __init__(self, model_runner: ModelRunner): method init_forward_metadata (line 40) | def init_forward_metadata(self, forward_batch: ForwardBatch): method get_cpu_graph_seq_len_fill_value (line 60) | def get_cpu_graph_seq_len_fill_value(self): method init_forward_metadata_capture_cpu_graph (line 63) | def init_forward_metadata_capture_cpu_graph( method init_cpu_graph_state (line 86) | def init_cpu_graph_state(self, max_bs: int, max_num_tokens: int): method forward_extend (line 89) | def forward_extend( method forward_decode (line 128) | def forward_decode( method support_triton (line 164) | def support_triton(self): FILE: python/sglang/srt/layers/attention/linear/gdn_backend.py class GDNKernelDispatcher (line 53) | class GDNKernelDispatcher: method __init__ (line 56) | def __init__( method packed_decode (line 125) | def packed_decode( method decode (line 158) | def decode( method extend (line 187) | def extend( method target_verify (line 212) | def target_verify( class GDNAttnBackend (line 242) | class GDNAttnBackend(MambaAttnBackendBase): method __init__ (line 245) | def __init__(self, model_runner: ModelRunner): method forward_decode (line 259) | def forward_decode( method forward_extend (line 334) | def forward_extend( FILE: python/sglang/srt/layers/attention/linear/kda_backend.py class KDAKernelDispatcher (line 35) | class KDAKernelDispatcher: method __init__ (line 38) | def __init__( method decode (line 66) | def decode( method extend (line 95) | def extend( class KDAAttnBackend (line 121) | class KDAAttnBackend(MambaAttnBackendBase): method __init__ (line 124) | def __init__(self, model_runner: ModelRunner): method forward_decode (line 130) | def forward_decode( method forward_extend (line 170) | def forward_extend( FILE: python/sglang/srt/layers/attention/linear/kernels/gdn_cutedsl.py class CuteDSLGDNKernel (line 9) | class CuteDSLGDNKernel(LinearAttnKernelBase): method decode (line 12) | def decode( method extend (line 43) | def extend(self, *args, **kwargs): method target_verify (line 46) | def target_verify(self, *args, **kwargs): FILE: python/sglang/srt/layers/attention/linear/kernels/gdn_flashinfer.py function _get_flashinfer_gdn_kernels (line 32) | def _get_flashinfer_gdn_kernels(): class FlashInferGDNKernel (line 73) | class FlashInferGDNKernel(LinearAttnKernelBase): method __init__ (line 83) | def __init__(self): method decode (line 112) | def decode( method extend (line 176) | def extend( method target_verify (line 251) | def target_verify( FILE: python/sglang/srt/layers/attention/linear/kernels/gdn_triton.py class TritonGDNKernel (line 34) | class TritonGDNKernel(LinearAttnKernelBase): method packed_decode (line 39) | def packed_decode( method decode (line 94) | def decode( method extend (line 125) | def extend( method target_verify (line 156) | def target_verify( FILE: python/sglang/srt/layers/attention/linear/kernels/kda_triton.py class TritonKDAKernel (line 15) | class TritonKDAKernel(LinearAttnKernelBase): method decode (line 18) | def decode( method extend (line 50) | def extend( FILE: python/sglang/srt/layers/attention/linear/kernels/kernel_backend.py class LinearAttnKernelBase (line 6) | class LinearAttnKernelBase(ABC): method decode (line 14) | def decode( method extend (line 31) | def extend( method target_verify (line 45) | def target_verify( FILE: python/sglang/srt/layers/attention/linear/lightning_attn.py function _fwd_diag_kernel (line 12) | def _fwd_diag_kernel( function _fwd_kv_parallel (line 141) | def _fwd_kv_parallel( function _fwd_kv_reduce (line 247) | def _fwd_kv_reduce( function _fwd_none_diag_kernel (line 312) | def _fwd_none_diag_kernel( class _attention (line 394) | class _attention(torch.autograd.Function): method forward (line 397) | def forward(ctx, q, k, v, s, kv_history): function lightning_attention (line 530) | def lightning_attention(q, k, v, ed, block_size=256, kv_history=None): function _linear_attn_decode_kernel (line 582) | def _linear_attn_decode_kernel( function linear_decode_forward_triton (line 663) | def linear_decode_forward_triton( class BailingLinearKernel (line 730) | class BailingLinearKernel: method jit_linear_forward_prefix (line 742) | def jit_linear_forward_prefix( FILE: python/sglang/srt/layers/attention/linear/lightning_backend.py class LightningAttentionBackend (line 22) | class LightningAttentionBackend(MambaAttnBackendBase): method __init__ (line 39) | def __init__(self, model_runner: ModelRunner): method init_forward_metadata (line 70) | def init_forward_metadata(self, forward_batch: ForwardBatch): method init_forward_metadata_capture_cuda_graph (line 78) | def init_forward_metadata_capture_cuda_graph( method init_forward_metadata_replay_cuda_graph (line 93) | def init_forward_metadata_replay_cuda_graph( method _build_slope_tensor (line 112) | def _build_slope_tensor( method _prefill_and_mix_infer (line 157) | def _prefill_and_mix_infer( method _decode_infer (line 218) | def _decode_infer(self, q, k, v, kv_cache, state_indices_tensor, metad... method _linear_attention_entry (line 235) | def _linear_attention_entry( method forward_extend (line 272) | def forward_extend( method forward_decode (line 337) | def forward_decode( FILE: python/sglang/srt/layers/attention/linear/linear_metadata.py class BailingLinearMetadata (line 10) | class BailingLinearMetadata(ForwardMetadata): method prepare_decode (line 19) | def prepare_decode( method prepare_mixed (line 38) | def prepare_mixed( FILE: python/sglang/srt/layers/attention/linear/seg_la.py class SegLaMeta (line 18) | class SegLaMeta: function seg_la_kernel (line 34) | def seg_la_kernel( function seg_la_p_kernel (line 207) | def seg_la_p_kernel( function seg_la_s_kernel (line 348) | def seg_la_s_kernel( function seg_la_d_kernel (line 483) | def seg_la_d_kernel( function seg_la_mtp_kernel (line 554) | def seg_la_mtp_kernel( function seg_la_sum_kernel (line 646) | def seg_la_sum_kernel(T, O, DIM: tl.constexpr, NUM_BLOCK: tl.constexpr): function seg_la_fwd (line 657) | def seg_la_fwd( FILE: python/sglang/srt/layers/attention/linear/utils.py class LinearAttnKernelBackend (line 15) | class LinearAttnKernelBackend(Enum): method is_triton (line 20) | def is_triton(self): method is_cutedsl (line 23) | def is_cutedsl(self): method is_flashinfer (line 26) | def is_flashinfer(self): function initialize_linear_attn_config (line 34) | def initialize_linear_attn_config(server_args: ServerArgs): function get_linear_attn_decode_backend (line 51) | def get_linear_attn_decode_backend() -> LinearAttnKernelBackend: function get_linear_attn_prefill_backend (line 61) | def get_linear_attn_prefill_backend() -> LinearAttnKernelBackend: FILE: python/sglang/srt/layers/attention/mamba/causal_conv1d.py function _get_seq_lens_cpu (line 25) | def _get_seq_lens_cpu(query_start_loc, x): function causal_conv1d_fn (line 31) | def causal_conv1d_fn( function causal_conv1d_update (line 112) | def causal_conv1d_update( FILE: python/sglang/srt/layers/attention/mamba/causal_conv1d_triton.py function _causal_conv1d_fwd_kernel (line 15) | def _causal_conv1d_fwd_kernel( # continuous batching function causal_conv1d_fn (line 378) | def causal_conv1d_fn( function _causal_conv1d_update_kernel (line 571) | def _causal_conv1d_update_kernel( function causal_conv1d_update (line 980) | def causal_conv1d_update( FILE: python/sglang/srt/layers/attention/mamba/mamba.py function mamba_v2_sharded_weight_loader (line 56) | def mamba_v2_sharded_weight_loader( class MambaMixer2 (line 155) | class MambaMixer2(torch.nn.Module): method __init__ (line 166) | def __init__( method forward (line 390) | def forward( method mamba_type (line 696) | def mamba_type(self) -> str: FILE: python/sglang/srt/layers/attention/mamba/mamba2_metadata.py class ForwardMetadata (line 27) | class ForwardMetadata: class Mamba2Metadata (line 46) | class Mamba2Metadata(ForwardMetadata): class MixedMetadata (line 54) | class MixedMetadata: method _query_start_loc_to_chunk_indices_offsets (line 69) | def _query_start_loc_to_chunk_indices_offsets( method prepare_decode (line 153) | def prepare_decode( method prepare_mixed (line 175) | def prepare_mixed( FILE: python/sglang/srt/layers/attention/mamba/mamba_state_scatter_triton.py function _fused_mamba_state_scatter_with_mask_kernel (line 15) | def _fused_mamba_state_scatter_with_mask_kernel( function fused_mamba_state_scatter_with_mask (line 90) | def fused_mamba_state_scatter_with_mask( FILE: python/sglang/srt/layers/attention/mamba/mixer2_rms_norm_gated.py class Mixer2RMSNormGated (line 19) | class Mixer2RMSNormGated(MultiPlatformOp): method __init__ (line 20) | def __init__( method forward_native (line 48) | def forward_native( method forward_cuda (line 99) | def forward_cuda( FILE: python/sglang/srt/layers/attention/mamba/ops/layernorm_gated.py function _layer_norm_fwd_1pass_kernel (line 14) | def _layer_norm_fwd_1pass_kernel( function _layer_norm_fwd (line 77) | def _layer_norm_fwd( function rms_norm_gated (line 145) | def rms_norm_gated( FILE: python/sglang/srt/layers/attention/mamba/ops/mamba_ssm.py function softplus (line 21) | def softplus(dt): function softplus (line 28) | def softplus(dt): function _selective_scan_update_kernel (line 68) | def _selective_scan_update_kernel( function selective_state_update (line 300) | def selective_state_update( FILE: python/sglang/srt/layers/attention/mamba/ops/ssd_bmm.py function _bmm_chunk_fwd_kernel (line 19) | def _bmm_chunk_fwd_kernel( function _bmm_chunk_fwd (line 131) | def _bmm_chunk_fwd(a, b, chunk_size, seq_idx=None, causal=False, output_... FILE: python/sglang/srt/layers/attention/mamba/ops/ssd_chunk_scan.py function _chunk_scan_fwd_kernel (line 20) | def _chunk_scan_fwd_kernel( function _chunk_scan_fwd (line 422) | def _chunk_scan_fwd( FILE: python/sglang/srt/layers/attention/mamba/ops/ssd_chunk_state.py function _chunk_cumsum_fwd_kernel (line 21) | def _chunk_cumsum_fwd_kernel( function _chunk_state_fwd_kernel (line 113) | def _chunk_state_fwd_kernel( function _chunk_state_varlen_kernel (line 263) | def _chunk_state_varlen_kernel( function _chunk_cumsum_fwd (line 444) | def _chunk_cumsum_fwd( function _chunk_state_fwd (line 496) | def _chunk_state_fwd( function chunk_state_varlen (line 568) | def chunk_state_varlen( FILE: python/sglang/srt/layers/attention/mamba/ops/ssd_combined.py function is_int_pow_2 (line 24) | def is_int_pow_2(n): function _mamba_chunk_scan_combined_fwd (line 28) | def _mamba_chunk_scan_combined_fwd( function mamba_chunk_scan_combined (line 181) | def mamba_chunk_scan_combined( FILE: python/sglang/srt/layers/attention/mamba/ops/ssd_state_passing.py function _state_passing_fwd_kernel (line 17) | def _state_passing_fwd_kernel( function _state_passing_fwd (line 171) | def _state_passing_fwd( FILE: python/sglang/srt/layers/attention/mamba/ops/ssu_dispatch.py class MambaSSUBackend (line 15) | class MambaSSUBackend(ABC): method name (line 18) | def name(self) -> str: method __call__ (line 22) | def __call__( class TritonSSUBackend (line 45) | class TritonSSUBackend(MambaSSUBackend): method __init__ (line 48) | def __init__(self) -> None: method name (line 56) | def name(self) -> str: method __call__ (line 59) | def __call__( class FlashInferSSUBackend (line 102) | class FlashInferSSUBackend(MambaSSUBackend): method __init__ (line 105) | def __init__(self) -> None: method name (line 111) | def name(self) -> str: method __call__ (line 114) | def __call__( function initialize_mamba_selective_state_update_backend (line 170) | def initialize_mamba_selective_state_update_backend(server_args: ServerA... function selective_state_update (line 207) | def selective_state_update( FILE: python/sglang/srt/layers/attention/merge_state.py function _supported_dtypes (line 15) | def _supported_dtypes(o: torch.Tensor) -> bool: function _supported_headdim (line 19) | def _supported_headdim(o: torch.Tensor) -> bool: function merge_state (line 26) | def merge_state( FILE: python/sglang/srt/layers/attention/nsa/dequant_k_cache.py function dequantize_k_cache (line 6) | def dequantize_k_cache(quant_k_cache): function _dequantize_k_cache_ref (line 10) | def _dequantize_k_cache_ref( function _dequantize_k_cache_fast_wrapped (line 53) | def _dequantize_k_cache_fast_wrapped( function _dequantize_k_cache_fast (line 76) | def _dequantize_k_cache_fast(quant_k_cache, group_size: int = 128): function _dequantize_k_cache_fast_kernel (line 121) | def _dequantize_k_cache_fast_kernel( function dequantize_k_cache_paged (line 168) | def dequantize_k_cache_paged( function _dequantize_k_cache_paged_kernel (line 235) | def _dequantize_k_cache_paged_kernel( FILE: python/sglang/srt/layers/attention/nsa/index_buf_accessor.py class GetK (line 22) | class GetK: method execute (line 24) | def execute(cls, *args, **kwargs): method slow (line 28) | def slow( method torch_fast (line 47) | def torch_fast( method triton (line 77) | def triton( class GetS (line 94) | class GetS: method execute (line 96) | def execute(cls, *args, **kwargs): method slow (line 100) | def slow( method torch_fast (line 119) | def torch_fast( method triton (line 146) | def triton( class GetKAndS (line 163) | class GetKAndS: method execute (line 165) | def execute(cls, *args, **kwargs): method triton (line 169) | def triton( class SetK (line 199) | class SetK: method execute (line 201) | def execute(cls, *args, buf, **kwargs): method slow (line 205) | def slow( method torch_fast (line 221) | def torch_fast( class SetS (line 249) | class SetS: method execute (line 251) | def execute(cls, *args, buf, **kwargs): method slow (line 255) | def slow( method torch_fast (line 271) | def torch_fast( class SetKAndS (line 301) | class SetKAndS: method execute (line 303) | def execute(cls, *args, buf, **kwargs): method vanilla (line 324) | def vanilla(cls, pool, buf, loc, index_k, index_k_scale): method triton (line 329) | def triton(cls, pool, buf, loc, index_k, index_k_scale): function _set_k_and_s_triton (line 339) | def _set_k_and_s_triton( function _set_k_and_s_triton_kernel (line 413) | def _set_k_and_s_triton_kernel( function _get_k_triton (line 455) | def _get_k_triton( function _get_k_triton_kernel (line 494) | def _get_k_triton_kernel( function _get_s_triton (line 533) | def _get_s_triton( function _get_s_triton_kernel (line 572) | def _get_s_triton_kernel( function _get_k_and_s_triton (line 610) | def _get_k_and_s_triton( function _get_k_and_s_triton_kernel (line 677) | def _get_k_and_s_triton_kernel( FILE: python/sglang/srt/layers/attention/nsa/nsa_backend_mtp_precompute.py class PrecomputedMetadata (line 22) | class PrecomputedMetadata: function compute_cu_seqlens (line 53) | def compute_cu_seqlens(seqlens: torch.Tensor) -> torch.Tensor: class NativeSparseAttnBackendMTPPrecomputeMixin (line 61) | class NativeSparseAttnBackendMTPPrecomputeMixin: method _precompute_replay_metadata (line 68) | def _precompute_replay_metadata( method _precompute_decode_mode (line 115) | def _precompute_decode_mode( method _precompute_target_verify_mode (line 170) | def _precompute_target_verify_mode( method _precompute_draft_extend_mode (line 249) | def _precompute_draft_extend_mode( FILE: python/sglang/srt/layers/attention/nsa/nsa_indexer.py class BaseIndexerMetadata (line 63) | class BaseIndexerMetadata(ABC): method get_seqlens_int32 (line 65) | def get_seqlens_int32(self) -> torch.Tensor: method get_page_table_64 (line 71) | def get_page_table_64(self) -> torch.Tensor: method get_page_table_1 (line 78) | def get_page_table_1(self) -> torch.Tensor: method get_seqlens_expanded (line 85) | def get_seqlens_expanded(self) -> torch.Tensor: method get_indexer_kvcache_range (line 90) | def get_indexer_kvcache_range(self) -> Tuple[torch.Tensor, torch.Tensor]: method get_indexer_seq_len_cpu (line 95) | def get_indexer_seq_len_cpu(self) -> torch.Tensor: method get_indexer_seq_len (line 100) | def get_indexer_seq_len(self) -> torch.Tensor: method get_nsa_extend_len_cpu (line 105) | def get_nsa_extend_len_cpu(self) -> List[int]: method get_token_to_batch_idx (line 110) | def get_token_to_batch_idx(self) -> torch.Tensor: method topk_transform (line 116) | def topk_transform( function rotate_activation (line 134) | def rotate_activation(x: torch.Tensor) -> torch.Tensor: class Indexer (line 149) | class Indexer(MultiPlatformOp): method __init__ (line 150) | def __init__( method _with_real_sm_count (line 230) | def _with_real_sm_count(self): method _weights_proj_bf16_in_fp32_out (line 244) | def _weights_proj_bf16_in_fp32_out(self, x: torch.Tensor) -> torch.Ten... method _project_and_scale_head_gates (line 261) | def _project_and_scale_head_gates(self, x: torch.Tensor): method _get_logits_head_gate (line 267) | def _get_logits_head_gate(self, x: torch.Tensor, q_scale: torch.Tensor): method _get_q_k_bf16 (line 273) | def _get_q_k_bf16( method _get_k_bf16 (line 346) | def _get_k_bf16( method _get_topk_paged (line 365) | def _get_topk_paged( method _should_chunk_mqa_logits (line 478) | def _should_chunk_mqa_logits( method _get_topk_ragged (line 497) | def _get_topk_ragged( method _forward_cuda_k_only (line 672) | def _forward_cuda_k_only( method _get_topk_ragged_with_cp (line 714) | def _get_topk_ragged_with_cp( method forward_indexer (line 863) | def forward_indexer( method _store_index_k_cache (line 946) | def _store_index_k_cache( method forward_cuda (line 998) | def forward_cuda( method forward_npu (line 1215) | def forward_npu( method do_npu_cp_balance_indexer (line 1427) | def do_npu_cp_balance_indexer( function scattered_to_tp_attn_full (line 1483) | def scattered_to_tp_attn_full( FILE: python/sglang/srt/layers/attention/nsa/nsa_mtp_verification.py function verify_single_backend_fused_metadata_copy (line 11) | def verify_single_backend_fused_metadata_copy( function verify_multi_backend_fused_metadata_copy (line 207) | def verify_multi_backend_fused_metadata_copy( FILE: python/sglang/srt/layers/attention/nsa/quant_k_cache.py function quantize_k_cache (line 6) | def quantize_k_cache(cache_k): function quantize_k_cache_separate (line 10) | def quantize_k_cache_separate( function _quantize_k_cache_ref (line 59) | def _quantize_k_cache_ref( function _quantize_k_cache_fast_wrapped (line 112) | def _quantize_k_cache_fast_wrapped( function _quantize_k_cache_fast (line 133) | def _quantize_k_cache_fast(k_nope, k_rope, group_size: int = 128): function _quantize_k_cache_fast_separate (line 190) | def _quantize_k_cache_fast_separate(k_nope, k_rope, group_size: int = 128): function _quantize_k_cache_fast_kernel (line 268) | def _quantize_k_cache_fast_kernel( function run_ans (line 445) | def run_ans(): FILE: python/sglang/srt/layers/attention/nsa/tilelang_kernel.py function fast_log2_ceil (line 31) | def fast_log2_ceil(x): function fast_pow2 (line 38) | def fast_pow2(x): function fast_round_scale (line 43) | def fast_round_scale(amax, fp8_max_inv): function act_quant_kernel (line 48) | def act_quant_kernel( function act_quant (line 98) | def act_quant( function fp8_index_kernel (line 129) | def fp8_index_kernel(h: int, d: int, clear_accum=True): function fp8_index (line 185) | def fp8_index( function sparse_attention_fwd_kernel_v1 (line 218) | def sparse_attention_fwd_kernel_v1( function sparse_attention_fwd_kernel_v2 (line 395) | def sparse_attention_fwd_kernel_v2( function sparse_mla_fwd_decode_partial (line 783) | def sparse_mla_fwd_decode_partial( function sparse_mla_fwd_decode_combine (line 933) | def sparse_mla_fwd_decode_combine( function tilelang_sparse_fwd (line 1012) | def tilelang_sparse_fwd( FILE: python/sglang/srt/layers/attention/nsa/transform_index.py function transform_index_page_table_prefill (line 8) | def transform_index_page_table_prefill(**kwargs): function transform_index_page_table_decode (line 12) | def transform_index_page_table_decode(**kwargs): function transform_index_page_table_decode_kernel (line 17) | def transform_index_page_table_decode_kernel( function transform_index_page_table_decode_fast (line 38) | def transform_index_page_table_decode_fast( function transform_index_page_table_prefill_fast (line 72) | def transform_index_page_table_prefill_fast( function transform_index_page_table_decode_ref (line 94) | def transform_index_page_table_decode_ref( function transform_index_page_table_prefill_ref (line 115) | def transform_index_page_table_prefill_ref( FILE: python/sglang/srt/layers/attention/nsa/triton_kernel.py function _act_quant_kernel (line 10) | def _act_quant_kernel( function act_quant (line 86) | def act_quant( function _get_valid_kv_indices_kernel (line 140) | def _get_valid_kv_indices_kernel( function get_valid_kv_indices (line 173) | def get_valid_kv_indices( FILE: python/sglang/srt/layers/attention/nsa/utils.py function compute_nsa_seqlens (line 30) | def compute_nsa_seqlens(original_seq_lens, nsa_index_topk: int): function is_nsa_enable_prefill_cp (line 34) | def is_nsa_enable_prefill_cp(): function is_nsa_prefill_cp_in_seq_split (line 38) | def is_nsa_prefill_cp_in_seq_split(): function is_nsa_prefill_cp_round_robin_split (line 45) | def is_nsa_prefill_cp_round_robin_split(): function can_nsa_prefill_cp_round_robin_split (line 52) | def can_nsa_prefill_cp_round_robin_split(forward_batch: "ForwardBatch"): function nsa_cp_round_robin_split_data (line 65) | def nsa_cp_round_robin_split_data(input_: Union[torch.Tensor, List]): function cal_padded_tokens (line 96) | def cal_padded_tokens(forward_batch: "ForwardBatch"): function pad_nsa_cache_seqlens (line 118) | def pad_nsa_cache_seqlens(forward_batch: "ForwardBatch", nsa_cache_seqle... class NSAContextParallelMetadata (line 139) | class NSAContextParallelMetadata: function can_cp_split (line 158) | def can_cp_split(seq_len: int, cp_size: int, use_nsa: bool, forward_batch): function cp_split_and_rebuild_data (line 182) | def cp_split_and_rebuild_data(forward_batch, input_: torch.Tensor): function cp_split_and_rebuild_position (line 199) | def cp_split_and_rebuild_position(forward_batch, positions: torch.Tensor): function nsa_cp_round_robin_split_q_seqs_kernel (line 219) | def nsa_cp_round_robin_split_q_seqs_kernel( function nsa_cp_round_robin_split_q_seqs_cpu (line 240) | def nsa_cp_round_robin_split_q_seqs_cpu(extend_seqs): function nsa_cp_round_robin_split_q_seqs (line 255) | def nsa_cp_round_robin_split_q_seqs( function nsa_use_prefill_cp (line 284) | def nsa_use_prefill_cp(forward_batch, nsa_enable_prefill_cp=None): function cp_attn_tp_all_gather_reorganazied_into_tensor (line 297) | def cp_attn_tp_all_gather_reorganazied_into_tensor( function cp_all_gather_rerange_output (line 341) | def cp_all_gather_rerange_output(input_tensor, cp_size, forward_batch, s... function calculate_cp_seq_idx (line 407) | def calculate_cp_seq_idx(cp_chunks_len, seqs_len): function prepare_input_dp_with_cp_dsa (line 455) | def prepare_input_dp_with_cp_dsa( FILE: python/sglang/srt/layers/attention/nsa_backend.py class NSAFlashMLAMetadata (line 76) | class NSAFlashMLAMetadata: method slice (line 82) | def slice(self, sli): method copy_ (line 88) | def copy_(self, other: "NSAFlashMLAMetadata"): class NSAMetadata (line 94) | class NSAMetadata: class TopkTransformMethod (line 147) | class TopkTransformMethod(IntEnum): function _compiled_cat (line 155) | def _compiled_cat(tensors: list[torch.Tensor], dim: int = -1) -> torch.T... function _cat (line 159) | def _cat(tensors: list[torch.Tensor], dim: int = -1) -> torch.Tensor: class NSAIndexerMetadata (line 176) | class NSAIndexerMetadata(BaseIndexerMetadata): method get_seqlens_int32 (line 181) | def get_seqlens_int32(self) -> torch.Tensor: method get_page_table_64 (line 184) | def get_page_table_64(self) -> torch.Tensor: method get_page_table_1 (line 187) | def get_page_table_1(self) -> torch.Tensor: method get_seqlens_expanded (line 190) | def get_seqlens_expanded(self) -> torch.Tensor: method get_cu_seqlens_k (line 193) | def get_cu_seqlens_k(self) -> torch.Tensor: method get_indexer_kvcache_range (line 196) | def get_indexer_kvcache_range(self) -> Tuple[torch.Tensor, torch.Tensor]: method get_indexer_seq_len (line 199) | def get_indexer_seq_len(self) -> torch.Tensor: method get_indexer_seq_len_cpu (line 202) | def get_indexer_seq_len_cpu(self) -> torch.Tensor: method get_nsa_extend_len_cpu (line 205) | def get_nsa_extend_len_cpu(self) -> List[int]: method get_token_to_batch_idx (line 208) | def get_token_to_batch_idx(self) -> torch.Tensor: method topk_transform (line 211) | def topk_transform( class NativeSparseAttnBackend (line 278) | class NativeSparseAttnBackend( method __init__ (line 281) | def __init__( method get_device_int32_arange (line 362) | def get_device_int32_arange(self, l: int) -> torch.Tensor: method _transform_table_1_to_real (line 370) | def _transform_table_1_to_real(self, page_table: torch.Tensor) -> torc... method init_forward_metadata (line 380) | def init_forward_metadata(self, forward_batch: ForwardBatch): method _cal_indexer_k_start_end (line 656) | def _cal_indexer_k_start_end( method init_cuda_graph_state (line 732) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int): method init_forward_metadata_capture_cuda_graph (line 772) | def init_forward_metadata_capture_cuda_graph( method init_forward_metadata_replay_cuda_graph (line 927) | def init_forward_metadata_replay_cuda_graph( method init_forward_metadata_replay_cuda_graph_from_precomputed (line 1093) | def init_forward_metadata_replay_cuda_graph_from_precomputed( method forward_extend (line 1247) | def forward_extend( method forward_decode (line 1446) | def forward_decode( method _forward_fa3 (line 1587) | def _forward_fa3( method _forward_flashmla_sparse (line 1625) | def _forward_flashmla_sparse( method _forward_flashmla_kv (line 1674) | def _forward_flashmla_kv( method _forward_standard_mha (line 1719) | def _forward_standard_mha( method _forward_tilelang (line 1783) | def _forward_tilelang( method _forward_aiter (line 1801) | def _forward_aiter( method _forward_aiter_extend (line 1841) | def _forward_aiter_extend( method _forward_trtllm (line 1890) | def _forward_trtllm( method _pad_topk_indices (line 2007) | def _pad_topk_indices( method get_cuda_graph_seq_len_fill_value (line 2028) | def get_cuda_graph_seq_len_fill_value(self): method set_nsa_prefill_impl (line 2032) | def set_nsa_prefill_impl(self, forward_batch: Optional[ForwardBatch] =... method get_topk_transform_method (line 2081) | def get_topk_transform_method(self) -> TopkTransformMethod: method get_indexer_metadata (line 2096) | def get_indexer_metadata( method _compute_flashmla_metadata (line 2105) | def _compute_flashmla_metadata(self, cache_seqlens: torch.Tensor, seq_... class NativeSparseAttnMultiStepBackend (line 2125) | class NativeSparseAttnMultiStepBackend: method __init__ (line 2127) | def __init__( method init_forward_metadata (line 2144) | def init_forward_metadata(self, forward_batch: ForwardBatch): method init_cuda_graph_state (line 2148) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int): method init_forward_metadata_capture_cuda_graph (line 2152) | def init_forward_metadata_capture_cuda_graph(self, forward_batch: Forw... method init_forward_metadata_replay_cuda_graph (line 2164) | def init_forward_metadata_replay_cuda_graph( FILE: python/sglang/srt/layers/attention/tbo_backend.py class TboAttnBackend (line 13) | class TboAttnBackend(AttentionBackend): method __init__ (line 14) | def __init__(self, primary: AttentionBackend, children: List[Attention... method init_new (line 20) | def init_new(cls, creator: Callable[[], AttentionBackend]): method init_forward_metadata (line 26) | def init_forward_metadata(self, forward_batch: "ForwardBatch"): method init_cuda_graph_state (line 35) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int): method init_forward_metadata_capture_cuda_graph (line 41) | def init_forward_metadata_capture_cuda_graph( method init_forward_metadata_replay_cuda_graph (line 72) | def init_forward_metadata_replay_cuda_graph( method _init_forward_metadata_cuda_graph_children (line 106) | def _init_forward_metadata_cuda_graph_children( method get_cuda_graph_seq_len_fill_value (line 176) | def get_cuda_graph_seq_len_fill_value(self): method forward_extend (line 182) | def forward_extend(self, *args, **kwargs): method forward_decode (line 185) | def forward_decode(self, *args, **kwargs): method get_indexer_metadata (line 188) | def get_indexer_metadata(self, layer_id: int, forward_batch: "ForwardB... function _init_forward_metadata_cuda_graph_split (line 192) | def _init_forward_metadata_cuda_graph_split( FILE: python/sglang/srt/layers/attention/torch_flex_backend.py class TorchFlexAttnBackend (line 17) | class TorchFlexAttnBackend(AttentionBackend): method __init__ (line 18) | def __init__(self, model_runner: ModelRunner): method init_forward_metadata (line 26) | def init_forward_metadata(self, forward_batch: ForwardBatch): method _causal_mask (line 69) | def _causal_mask(self, b, h, q_idx, kv_idx): method _decode_mask (line 72) | def _decode_mask(self, b, h, q_idx, kv_idx): method _run_flex_forward_extend (line 75) | def _run_flex_forward_extend( method _run_flex_forward_decode (line 165) | def _run_flex_forward_decode( method forward_extend (line 236) | def forward_extend( method forward_decode (line 282) | def forward_decode( method support_triton (line 324) | def support_triton(self): FILE: python/sglang/srt/layers/attention/torch_native_backend.py class TorchNativeAttnBackend (line 17) | class TorchNativeAttnBackend(AttentionBackend): method __init__ (line 18) | def __init__(self, model_runner: ModelRunner): method init_forward_metadata (line 23) | def init_forward_metadata(self, forward_batch: ForwardBatch): method _run_sdpa_forward_extend (line 27) | def _run_sdpa_forward_extend( method _run_sdpa_forward_decode (line 117) | def _run_sdpa_forward_decode( method forward_extend (line 192) | def forward_extend( method forward_decode (line 239) | def forward_decode( method support_triton (line 285) | def support_triton(self): FILE: python/sglang/srt/layers/attention/triton_backend.py function logit_capping_mod (line 30) | def logit_capping_mod(logit_capping_method, logit_cap): class ForwardMetadata (line 39) | class ForwardMetadata: class TritonAttnBackend (line 56) | class TritonAttnBackend(AttentionBackend): method __init__ (line 57) | def __init__( method get_num_kv_splits (line 184) | def get_num_kv_splits( method init_forward_metadata (line 236) | def init_forward_metadata(self, forward_batch: ForwardBatch): method init_cuda_graph_state (line 441) | def init_cuda_graph_state( method init_forward_metadata_capture_cuda_graph (line 508) | def init_forward_metadata_capture_cuda_graph( method init_forward_metadata_replay_cuda_graph (line 664) | def init_forward_metadata_replay_cuda_graph( method get_cuda_graph_seq_len_fill_value (line 790) | def get_cuda_graph_seq_len_fill_value(self): method get_verify_buffers_to_fill_after_draft (line 793) | def get_verify_buffers_to_fill_after_draft(self): method update_verify_buffers_to_fill_after_draft (line 801) | def update_verify_buffers_to_fill_after_draft( method forward_extend (line 806) | def forward_extend( method _forward_extend_unified (line 908) | def _forward_extend_unified( method forward_decode (line 1038) | def forward_decode( class TritonMultiStepDraftBackend (line 1113) | class TritonMultiStepDraftBackend: method __init__ (line 1119) | def __init__( method common_template (line 1154) | def common_template( method init_forward_metadata (line 1195) | def init_forward_metadata(self, forward_batch: ForwardBatch): method init_cuda_graph_state (line 1216) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int): method init_forward_metadata_capture_cuda_graph (line 1237) | def init_forward_metadata_capture_cuda_graph(self, forward_batch: Forw... method init_forward_metadata_replay_cuda_graph (line 1251) | def init_forward_metadata_replay_cuda_graph( function get_num_kv_splits_triton (line 1270) | def get_num_kv_splits_triton( function update_sliding_window_buffer (line 1321) | def update_sliding_window_buffer( function update_sliding_window_buffer_cuda_graph (line 1361) | def update_sliding_window_buffer_cuda_graph( FILE: python/sglang/srt/layers/attention/triton_ops/decode_attention.py function tanh (line 39) | def tanh(x): function _fwd_kernel_stage1 (line 45) | def _fwd_kernel_stage1( function _decode_att_m_fwd (line 182) | def _decode_att_m_fwd( function _fwd_grouped_kernel_stage1 (line 253) | def _fwd_grouped_kernel_stage1( function _decode_grouped_att_m_fwd (line 426) | def _decode_grouped_att_m_fwd( function _fwd_kernel_stage2 (line 516) | def _fwd_kernel_stage2( function _decode_softmax_reducev_fwd (line 586) | def _decode_softmax_reducev_fwd( function decode_attention_fwd_normal (line 636) | def decode_attention_fwd_normal( function decode_attention_fwd_grouped (line 681) | def decode_attention_fwd_grouped( function decode_attention_fwd (line 726) | def decode_attention_fwd( FILE: python/sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py function tanh (line 23) | def tanh(x): function _fwd_kernel_flash_decode_stage1 (line 29) | def _fwd_kernel_flash_decode_stage1( function _fwd_kernel_flash_decode_stage2 (line 140) | def _fwd_kernel_flash_decode_stage2( function flash_decode_stage1 (line 192) | def flash_decode_stage1( function flash_decode_stage2 (line 255) | def flash_decode_stage2(mid_out, mid_out_logexpsum, B_Seqlen, O, block_s... function flash_decode_attention_fwd (line 284) | def flash_decode_attention_fwd( function _sparse_fwd_kernel_flash_decode_stage1 (line 329) | def _sparse_fwd_kernel_flash_decode_stage1( # Double Sparsity's approxi... function _sparse_fwd_kernel_flash_decode_stage2 (line 401) | def _sparse_fwd_kernel_flash_decode_stage2( function _sparse_fwd_kernel_flash_decode_stage3 (line 517) | def _sparse_fwd_kernel_flash_decode_stage3( function sparse_flash_decode_stage1 (line 561) | def sparse_flash_decode_stage1( function sparse_flash_decode_stage2 (line 613) | def sparse_flash_decode_stage2( function sparse_flash_decode_stage3 (line 674) | def sparse_flash_decode_stage3(Seqlen, mid_out, mid_out_logexpsum, O, bl... function flash_decode_sparse_attention_fwd (line 700) | def flash_decode_sparse_attention_fwd( function _fwd_kernel (line 782) | def _fwd_kernel( function extend_attention_fwd (line 994) | def extend_attention_fwd( FILE: python/sglang/srt/layers/attention/triton_ops/extend_attention.py function _get_block_sizes_for_extend_attention (line 35) | def _get_block_sizes_for_extend_attention(Lq: int, Lv: int): function tanh (line 108) | def tanh(x): function _copy_unified_indices_kernel (line 114) | def _copy_unified_indices_kernel( function build_unified_kv_indices (line 171) | def build_unified_kv_indices( function _fwd_kernel (line 220) | def _fwd_kernel( function extend_attention_fwd (line 552) | def extend_attention_fwd( function redundant_attention (line 660) | def redundant_attention( function _fwd_kernel_unified (line 698) | def _fwd_kernel_unified( function extend_attention_fwd_unified (line 950) | def extend_attention_fwd_unified( FILE: python/sglang/srt/layers/attention/triton_ops/merge_state.py function merge_state_kernel (line 9) | def merge_state_kernel( function merge_state_triton (line 66) | def merge_state_triton( FILE: python/sglang/srt/layers/attention/triton_ops/prefill_attention.py function _fwd_kernel (line 35) | def _fwd_kernel( function context_attention_fwd (line 170) | def context_attention_fwd( FILE: python/sglang/srt/layers/attention/triton_ops/rocm_mla_decode_rope.py function is_hip (line 31) | def is_hip(): function tanh (line 39) | def tanh(x): function _fwd_grouped_kernel_stage1_rope (line 45) | def _fwd_grouped_kernel_stage1_rope( function _decode_grouped_att_m_fwd_rope (line 310) | def _decode_grouped_att_m_fwd_rope( function decode_attention_fwd_grouped_rope (line 402) | def decode_attention_fwd_grouped_rope( FILE: python/sglang/srt/layers/attention/triton_ops/trtllm_fp8_kv_kernel.py function _process_kv_tensor (line 26) | def _process_kv_tensor( function _fused_fp8_set_kv_buffer_kernel (line 88) | def _fused_fp8_set_kv_buffer_kernel( function fused_fp8_set_kv_buffer (line 204) | def fused_fp8_set_kv_buffer( function _naive_fp8_set_kv_buffer (line 420) | def _naive_fp8_set_kv_buffer( FILE: python/sglang/srt/layers/attention/trtllm_mha_backend.py class TRTLLMMHAMetadata (line 48) | class TRTLLMMHAMetadata: class TRTLLMHAAttnBackend (line 65) | class TRTLLMHAAttnBackend(FlashInferAttnBackend): method __init__ (line 68) | def __init__( method _maybe_translate_swa (line 152) | def _maybe_translate_swa( method _alloc_swa_page_table (line 163) | def _alloc_swa_page_table( method _copy_swa_page_table (line 171) | def _copy_swa_page_table( method _get_layer_cache_loc (line 183) | def _get_layer_cache_loc( method _bind_swa_page_table (line 195) | def _bind_swa_page_table( method _get_layer_page_table (line 203) | def _get_layer_page_table( method init_cuda_graph_state (line 214) | def init_cuda_graph_state( method init_forward_metadata_capture_cuda_graph (line 306) | def init_forward_metadata_capture_cuda_graph( method init_forward_metadata_replay_cuda_graph (line 443) | def init_forward_metadata_replay_cuda_graph( method get_cuda_graph_seq_len_fill_value (line 548) | def get_cuda_graph_seq_len_fill_value(self) -> int: method _should_use_fused_fp8_path (line 552) | def _should_use_fused_fp8_path(self, save_kv_cache: bool, k: torch.Ten... method _fused_fp8_set_kv_buffer (line 556) | def _fused_fp8_set_kv_buffer( method init_forward_metadata (line 582) | def init_forward_metadata(self, forward_batch: ForwardBatch): method forward_decode (line 695) | def forward_decode( method forward_extend (line 781) | def forward_extend( class TRTLLMHAAttnMultiStepDraftBackend (line 883) | class TRTLLMHAAttnMultiStepDraftBackend(FlashInferMultiStepDraftBackend): method __init__ (line 886) | def __init__( method init_forward_metadata (line 899) | def init_forward_metadata(self, forward_batch: ForwardBatch): method init_cuda_graph_state (line 903) | def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int): method init_forward_metadata_capture_cuda_graph (line 907) | def init_forward_metadata_capture_cuda_graph( method init_forward_metadata_replay_cuda_graph (line 925) | def init_forward_metadata_replay_cuda_graph( FILE: python/sglang/srt/layers/attention/trtllm_mla_backend.py function pad_draft_extend_query_kernel (line 56) | def pad_draft_extend_query_kernel( function unpad_draft_extend_output_kernel (line 129) | def unpad_draft_extend_output_kernel( function _quantize_fp8_qkv (line 199) | def _quantize_fp8_qkv(q, k, v, layer): class TRTLLMMLAPrefillMetadata (line 233) | class TRTLLMMLAPrefillMetadata: class TRTLLMMLADecodeMetadata (line 243) | class TRTLLMMLADecodeMetadata: class TRTLLMMLABackend (line 255) | class TRTLLMMLABackend(FlashInferMLAAttnBackend): method __init__ (line 258) | def __init__( method _calc_padded_blocks (line 318) | def _calc_padded_blocks(self, max_seq_len: int) -> int: method _create_block_kv_indices (line 341) | def _create_block_kv_indices( method init_cuda_graph_state (line 379) | def init_cuda_graph_state( method init_forward_metadata_capture_cuda_graph (line 426) | def init_forward_metadata_capture_cuda_graph( method init_forward_metadata_replay_cuda_graph (line 509) | def init_forward_metadata_replay_cuda_graph( method get_cuda_graph_seq_len_fill_value (line 574) | def get_cuda_graph_seq_len_fill_value(self) -> int: method init_forward_metadata (line 578) | def init_forward_metadata(self, forward_batch: ForwardBatch): method init_mha_chunk_metadata (line 673) | def init_mha_chunk_metadata(self, forward_batch: ForwardBatch): method pad_draft_extend_query (line 676) | def pad_draft_extend_query( method unpad_draft_extend_output (line 708) | def unpad_draft_extend_output( method forward_decode (line 756) | def forward_decode( method forward_extend (line 884) | def forward_extend( class TRTLLMMLAMultiStepDraftBackend (line 1150) | class TRTLLMMLAMultiStepDraftBackend(FlashInferMLAMultiStepDraftBackend): method __init__ (line 1153) | def __init__( FILE: python/sglang/srt/layers/attention/utils.py function create_flashinfer_kv_indices_triton (line 17) | def create_flashinfer_kv_indices_triton( function get_num_page_per_block_flashmla (line 55) | def get_num_page_per_block_flashmla(page_size: int = 64) -> int: function create_flashmla_kv_indices_triton (line 61) | def create_flashmla_kv_indices_triton( function concat_and_cast_mha_k_kernel (line 115) | def concat_and_cast_mha_k_kernel( function concat_and_cast_mha_k_triton (line 153) | def concat_and_cast_mha_k_triton( function pad_sequence_with_mask_kernel (line 192) | def pad_sequence_with_mask_kernel( function pad_sequence_with_mask (line 245) | def pad_sequence_with_mask( function seqlens_expand_kernel (line 290) | def seqlens_expand_kernel( function seqlens_expand_triton (line 316) | def seqlens_expand_triton( function canonicalize_stride (line 357) | def canonicalize_stride(tensor: torch.Tensor) -> torch.Tensor: function mla_quantize_and_rope_for_fp8 (line 385) | def mla_quantize_and_rope_for_fp8( function concat_mla_absorb_q_general (line 470) | def concat_mla_absorb_q_general(q_nope, q_rope): function reshape_and_cache_flash (line 478) | def reshape_and_cache_flash( function launch_reshape_and_cache_flash (line 601) | def launch_reshape_and_cache_flash( FILE: python/sglang/srt/layers/attention/vision.py function flash_attn_func (line 48) | def flash_attn_func(*args, ver: int = 3, **kwargs): class SingletonCache (line 105) | class SingletonCache: method set_data (line 108) | def set_data(self, value: Any) -> None: method get_data (line 111) | def get_data(self) -> Optional[Any]: method empty (line 114) | def empty(self) -> bool: function _get_cu_seqlens_for_shape (line 120) | def _get_cu_seqlens_for_shape(batch_size: int, seqlen: int, device) -> t... function resolve_seqlens (line 135) | def resolve_seqlens( class VisionSdpaAttention (line 156) | class VisionSdpaAttention(nn.Module): method __init__ (line 162) | def __init__( method _generate_mask_cache (line 183) | def _generate_mask_cache( method generate_patch_attention_mask (line 215) | def generate_patch_attention_mask( method forward (line 237) | def forward( class VisionTritonAttention (line 309) | class VisionTritonAttention(nn.Module): method __init__ (line 314) | def __init__( method forward (line 324) | def forward( class VisionFlash3Attention (line 380) | class VisionFlash3Attention(nn.Module): method __init__ (line 381) | def __init__( method forward (line 393) | def forward( class VisionFlash4Attention (line 439) | class VisionFlash4Attention(nn.Module): method __init__ (line 440) | def __init__( method forward (line 448) | def forward( class VisionFlashInferAttention (line 491) | class VisionFlashInferAttention(nn.Module): method __init__ (line 492) | def __init__( method forward (line 503) | def forward( class VisionAiterAttention (line 613) | class VisionAiterAttention(nn.Module): method __init__ (line 614) | def __init__( method forward (line 630) | def forward( class VisionAscendAttention (line 657) | class VisionAscendAttention(nn.Module): method __init__ (line 659) | def __init__( method forward (line 667) | def forward( class VisionAttention (line 724) | class VisionAttention(nn.Module): method __init__ (line 737) | def __init__( method _init_qk_norm (line 860) | def _init_qk_norm( method _determine_attention_backend (line 885) | def _determine_attention_backend(self, passed_backend: Optional[str]) ... method _apply_qk_norm_head_size (line 919) | def _apply_qk_norm_head_size(self, q: torch.Tensor, k: torch.Tensor): method _apply_qk_norm (line 929) | def _apply_qk_norm(self, q: torch.Tensor, k: torch.Tensor): method forward (line 967) | def forward( FILE: python/sglang/srt/layers/attention/vision_utils.py function update_vit_attn_dummy_heads_config (line 8) | def update_vit_attn_dummy_heads_config(config): function pad_vit_attn_dummy_heads (line 26) | def pad_vit_attn_dummy_heads(config, name: str, loaded_weight: torch.Ten... FILE: python/sglang/srt/layers/attention/wave_backend.py function get_num_kv_splits_triton (line 26) | def get_num_kv_splits_triton( class ForwardMetadata (line 78) | class ForwardMetadata: class WaveAttnBackend (line 90) | class WaveAttnBackend(AttentionBackend): method __init__ (line 91) | def __init__( method get_num_kv_splits (line 162) | def get_num_kv_splits( method init_forward_metadata (line 195) | def init_forward_metadata(self, forward_batch: ForwardBatch): method init_cuda_graph_state (line 344) | def init_cuda_graph_state( method init_forward_metadata_capture_cuda_graph (line 388) | def init_forward_metadata_capture_cuda_graph( method init_forward_metadata_replay_cuda_graph (line 472) | def init_forward_metadata_replay_cuda_graph( method get_cuda_graph_seq_len_fill_value (line 540) | def get_cuda_graph_seq_len_fill_value(self): method forward_extend (line 543) | def forward_extend( method forward_decode (line 589) | def forward_decode( FILE: python/sglang/srt/layers/attention/wave_ops/decode_attention.py function get_wave_kernel (line 27) | def get_wave_kernel( function decode_attention_intermediate_arrays_shapes (line 90) | def decode_attention_intermediate_arrays_shapes( function decode_attention_wave (line 105) | def decode_attention_wave( function decode_attention_fwd (line 157) | def decode_attention_fwd( FILE: python/sglang/srt/layers/attention/wave_ops/extend_attention.py function get_wave_kernel (line 23) | def get_wave_kernel( function extend_attention_wave (line 81) | def extend_attention_wave( FILE: python/sglang/srt/layers/attention/wave_ops/prefill_attention.py function prefill_attention_wave (line 22) | def prefill_attention_wave( FILE: python/sglang/srt/layers/attention/xpu_backend.py class XPUAttentionBackend (line 26) | class XPUAttentionBackend(AttentionBackend): method __init__ (line 36) | def __init__( method init_forward_metadata (line 93) | def init_forward_metadata(self, forward_batch: ForwardBatch): method forward_extend (line 380) | def forward_extend( method forward_decode (line 672) | def forward_decode( method get_cuda_graph_seq_len_fill_value (line 931) | def get_cuda_graph_seq_len_fill_value(self): method _init_local_attn_metadata (line 935) | def _init_local_attn_metadata( method _init_sliding_window_attn_spec_metadata (line 979) | def _init_sliding_window_attn_spec_metadata( FILE: python/sglang/srt/layers/communicator.py function apply_flashinfer_allreduce_fusion (line 94) | def apply_flashinfer_allreduce_fusion(batch_size: int): function apply_aiter_all_reduce_fusion (line 108) | def apply_aiter_all_reduce_fusion(input_tensor: torch.Tensor): class ScatterMode (line 122) | class ScatterMode(Enum): method model_input_output (line 136) | def model_input_output(): class AttentionInputs (line 143) | class AttentionInputs: method __init__ (line 145) | def __init__( method tp_all_gather_hidden_states (line 157) | def tp_all_gather_hidden_states(self, hidden_states, forward_batch): method fetch_qkv_latent (line 163) | def fetch_qkv_latent(self): method fetch_hidden_states (line 176) | def fetch_hidden_states(self): class AttnTpContext (line 187) | class AttnTpContext: method __init__ (line 188) | def __init__(self): method init_context (line 193) | def init_context(self, q_lora_rank, is_nsa): method use_input_scattered (line 214) | def use_input_scattered(self, forward_batch: ForwardBatch): method input_scattered (line 225) | def input_scattered(self): method set_attn_inputs (line 228) | def set_attn_inputs(self, attn_inputs: AttentionInputs): method fetch_qkv_latent (line 231) | def fetch_qkv_latent(self): method fetch_hidden_states (line 235) | def fetch_hidden_states(self): method maybe_input_scattered (line 240) | def maybe_input_scattered(self, forward_batch: ForwardBatch): function get_attn_tp_context (line 252) | def get_attn_tp_context(): class _LayerModeComputationContext (line 257) | class _LayerModeComputationContext: method previous_layer (line 264) | def previous_layer(self): class LayerScatterModes (line 276) | class LayerScatterModes: method init_new (line 285) | def init_new(cls, **kwargs): method _compute_layer_input_mode (line 296) | def _compute_layer_input_mode(cls, context: _LayerModeComputationConte... method _compute_mlp_mode (line 302) | def _compute_mlp_mode(cls, context: _LayerModeComputationContext): method _should_gather_for_tbo (line 321) | def _should_gather_for_tbo(cls, context: _LayerModeComputationContext): method _compute_middle_residual_mode (line 330) | def _compute_middle_residual_mode(cls, context: _LayerModeComputationC... method _compute_layer_output_mode (line 339) | def _compute_layer_output_mode(cls, context: _LayerModeComputationCont... function enable_moe_dense_fully_dp (line 352) | def enable_moe_dense_fully_dp(): class LayerCommunicator (line 356) | class LayerCommunicator: method __init__ (line 357) | def __init__( method _post_init_communicate (line 380) | def _post_init_communicate(self): method prepare_attn_and_capture_last_layer_outputs (line 404) | def prepare_attn_and_capture_last_layer_outputs( method prepare_attn (line 430) | def prepare_attn( method _tp_reduce_scatter (line 544) | def _tp_reduce_scatter( method prepare_mlp (line 563) | def prepare_mlp( method postprocess_layer (line 581) | def postprocess_layer( method should_use_reduce_scatter (line 595) | def should_use_reduce_scatter(self, forward_batch: ForwardBatch): method should_fuse_mlp_allreduce_with_next_layer (line 611) | def should_fuse_mlp_allreduce_with_next_layer( class CommunicateContext (line 646) | class CommunicateContext: method is_same_group_size (line 657) | def is_same_group_size(self, a: ScatterMode, b: ScatterMode): method init_new (line 661) | def init_new(cls): class CommunicateSimpleFn (line 687) | class CommunicateSimpleFn: method get_fn (line 689) | def get_fn( method _trivial (line 707) | def _trivial( method _scattered_to_tp_attn_full (line 715) | def _scattered_to_tp_attn_full( class CommunicateWithAllReduceAndLayerNormFn (line 753) | class CommunicateWithAllReduceAndLayerNormFn: method get_fn (line 760) | def get_fn( method _simple (line 808) | def _simple( method _gather_hidden_states_and_residual (line 821) | def _gather_hidden_states_and_residual( method _scatter_hidden_states_and_residual (line 885) | def _scatter_hidden_states_and_residual( method _tp_all_reduce_with_scattered_residual (line 906) | def _tp_all_reduce_with_scattered_residual( class CommunicateSummableTensorPairFn (line 922) | class CommunicateSummableTensorPairFn: method execute (line 926) | def execute( method get_fn (line 942) | def get_fn( method _trivial (line 979) | def _trivial( method _scatter_hidden_states (line 989) | def _scatter_hidden_states( method _gather (line 1008) | def _gather( method _scatter (line 1028) | def _scatter( FILE: python/sglang/srt/layers/communicator_nsa_cp.py function nsa_enable_prefill_cp (line 42) | def nsa_enable_prefill_cp(): class NSACPLayerCommunicator (line 49) | class NSACPLayerCommunicator(LayerCommunicator): method __init__ (line 50) | def __init__( method _post_init_communicate (line 69) | def _post_init_communicate(self): class NSACPCommunicateSimpleFn (line 95) | class NSACPCommunicateSimpleFn(CommunicateSimpleFn): method get_fn (line 97) | def get_fn( class NSACPCommunicateWithAllReduceAndLayerNormFn (line 108) | class NSACPCommunicateWithAllReduceAndLayerNormFn( method get_fn (line 117) | def get_fn( method _gather_hidden_states_and_residual (line 141) | def _gather_hidden_states_and_residual( class NSACPCommunicateSummableTensorPairFn (line 167) | class NSACPCommunicateSummableTensorPairFn(CommunicateSummableTensorPair... method get_fn (line 171) | def get_fn( method _scatter_hidden_states (line 194) | def _scatter_hidden_states( FILE: python/sglang/srt/layers/conv.py function _tuplify (line 23) | def _tuplify(val, n: int) -> tuple: function _check_enable_linear (line 30) | def _check_enable_linear( function _reverse_repeat_tuple (line 46) | def _reverse_repeat_tuple(t: tuple) -> tuple: function _compute_same_padding_for_pad (line 51) | def _compute_same_padding_for_pad(kernel_size: tuple, dilation: tuple) -... function _validate_conv_args (line 65) | def _validate_conv_args( class Conv2dLayer (line 94) | class Conv2dLayer(MultiPlatformOp): method __init__ (line 97) | def __init__( method _reset_parameters (line 153) | def _reset_parameters(self): method _forward_mulmat (line 160) | def _forward_mulmat(self, x: torch.Tensor) -> torch.Tensor: method _forward_conv (line 168) | def _forward_conv(self, x: torch.Tensor) -> torch.Tensor: method forward_native (line 189) | def forward_native(self, x: torch.Tensor) -> torch.Tensor: method forward_cuda (line 194) | def forward_cuda(self, x: torch.Tensor) -> torch.Tensor: class Conv3dLayer (line 200) | class Conv3dLayer(MultiPlatformOp): method __init__ (line 203) | def __init__( method _reset_parameters (line 256) | def _reset_parameters(self): method _forward_mulmat (line 263) | def _forward_mulmat(self, x: torch.Tensor) -> torch.Tensor: method _forward_conv (line 271) | def _forward_conv(self, x: torch.Tensor) -> torch.Tensor: method forward_native (line 292) | def forward_native(self, x: torch.Tensor) -> torch.Tensor: method forward_cuda (line 297) | def forward_cuda(self, x: torch.Tensor) -> torch.Tensor: FILE: python/sglang/srt/layers/deep_gemm_wrapper/compile_utils.py function update_deep_gemm_config (line 43) | def update_deep_gemm_config(gpu_id: int, server_args: ServerArgs): class DeepGemmKernelType (line 95) | class DeepGemmKernelType(IntEnum): function _maybe_compile_deep_gemm_one_type_all (line 106) | def _maybe_compile_deep_gemm_one_type_all( function _compile_deep_gemm_one_type_all (line 151) | def _compile_deep_gemm_one_type_all( class _BaseWarmupExecutor (line 213) | class _BaseWarmupExecutor: method create (line 215) | def create(kernel_type: DeepGemmKernelType, **kwargs): method get_memory_requirement (line 224) | def get_memory_requirement( method execute (line 246) | def execute(self, m): function _empty_token_fp8 (line 250) | def _empty_token_fp8(size): function _empty_block_fp8 (line 260) | def _empty_block_fp8(size): class _NormalWarmupExecutor (line 275) | class _NormalWarmupExecutor(_BaseWarmupExecutor): method __init__ (line 276) | def __init__(self, max_m: int, n: int, k: int, num_groups: int): method execute (line 281) | def execute(self, m): class _GroupedContWarmupExecutor (line 289) | class _GroupedContWarmupExecutor(_BaseWarmupExecutor): method __init__ (line 290) | def __init__(self, max_m: int, n: int, k: int, num_groups: int): method execute (line 296) | def execute(self, m): class _GroupedMaskedWarmupExecutor (line 305) | class _GroupedMaskedWarmupExecutor(_BaseWarmupExecutor): method __init__ (line 306) | def __init__(self, max_m: int, n: int, k: int, num_groups: int): method execute (line 314) | def execute(self, m): class _BF16F32WarmupExecutor (line 325) | class _BF16F32WarmupExecutor(_BaseWarmupExecutor): method __init__ (line 326) | def __init__(self, max_m: int, n: int, k: int, num_groups: int): method execute (line 331) | def execute(self, m): function deep_gemm_execution_hook (line 336) | def deep_gemm_execution_hook( FILE: python/sglang/srt/layers/deep_gemm_wrapper/configurer.py function _compute_enable_deep_gemm (line 9) | def _compute_enable_deep_gemm(): FILE: python/sglang/srt/layers/deep_gemm_wrapper/entrypoint.py function grouped_gemm_nt_f8f8bf16_masked (line 26) | def grouped_gemm_nt_f8f8bf16_masked( function grouped_gemm_nt_f8f8bf16_contig (line 67) | def grouped_gemm_nt_f8f8bf16_contig( function gemm_nt_f8f8bf16 (line 84) | def gemm_nt_f8f8bf16( function gemm_nt_bf16bf16f32 (line 105) | def gemm_nt_bf16bf16f32( function update_deep_gemm_config (line 119) | def update_deep_gemm_config(gpu_id: int, server_args: ServerArgs): function configure_deep_gemm_num_sms (line 124) | def configure_deep_gemm_num_sms(num_sms): function _sanity_check_input (line 136) | def _sanity_check_input(x_fp8: Tuple[torch.Tensor, torch.Tensor]): FILE: python/sglang/srt/layers/dp_attention.py class DpPaddingMode (line 50) | class DpPaddingMode(IntEnum): method is_max_len (line 57) | def is_max_len(self): method is_sum_len (line 60) | def is_sum_len(self): method get_dp_padding_mode (line 64) | def get_dp_padding_mode( method get_default_mode_in_cuda_graph (line 86) | def get_default_mode_in_cuda_graph(cls) -> DpPaddingMode: class _DpGatheredBufferWrapper (line 95) | class _DpGatheredBufferWrapper: method set_metadata (line 107) | def set_metadata(cls, hidden_size: int, dtype: torch.dtype, device: to... method set_dp_buffer_len (line 113) | def set_dp_buffer_len( method get_global_dp_buffer (line 126) | def get_global_dp_buffer(cls) -> torch.Tensor: method get_local_dp_buffer (line 136) | def get_local_dp_buffer(cls) -> torch.Tensor: method get_global_dp_buffer_len (line 146) | def get_global_dp_buffer_len(cls) -> int: method get_local_dp_buffer_len (line 150) | def get_local_dp_buffer_len(cls) -> int: method get_dp_global_num_tokens (line 154) | def get_dp_global_num_tokens(cls) -> List[int]: method get_dp_hidden_size (line 158) | def get_dp_hidden_size(cls) -> int: method get_dp_dtype (line 162) | def get_dp_dtype(cls) -> torch.dtype: method get_dp_device (line 166) | def get_dp_device(cls) -> torch.device: method set_is_extend_in_batch (line 170) | def set_is_extend_in_batch(cls, is_extend_in_batch: bool): method get_is_extend_in_batch (line 174) | def get_is_extend_in_batch(cls) -> bool: method is_dp_max_padding (line 178) | def is_dp_max_padding(cls) -> bool: function set_dp_buffer_len (line 182) | def set_dp_buffer_len( function get_global_dp_buffer (line 193) | def get_global_dp_buffer() -> torch.Tensor: function get_local_dp_buffer (line 197) | def get_local_dp_buffer() -> torch.Tensor: function get_global_dp_buffer_len (line 201) | def get_global_dp_buffer_len() -> int: function get_local_dp_buffer_len (line 205) | def get_local_dp_buffer_len() -> int: function get_dp_global_num_tokens (line 209) | def get_dp_global_num_tokens() -> List[int]: function get_dp_hidden_size (line 213) | def get_dp_hidden_size() -> int: function get_dp_dtype (line 217) | def get_dp_dtype() -> torch.dtype: function get_dp_device (line 221) | def get_dp_device() -> torch.device: function set_is_extend_in_batch (line 225) | def set_is_extend_in_batch(is_extend_in_batch: bool): function get_is_extend_in_batch (line 229) | def get_is_extend_in_batch() -> bool: function is_dp_max_padding (line 233) | def is_dp_max_padding() -> bool: function compute_dp_attention_world_info (line 237) | def compute_dp_attention_world_info( function compute_dp_attention_local_info (line 254) | def compute_dp_attention_local_info( function initialize_dp_attention (line 271) | def initialize_dp_attention( function is_dp_attention_enabled (line 311) | def is_dp_attention_enabled() -> bool: function is_allocation_symmetric (line 315) | def is_allocation_symmetric() -> bool: function get_attention_tp_group (line 319) | def get_attention_tp_group() -> GroupCoordinator: function get_attention_tp_rank (line 323) | def get_attention_tp_rank() -> int: function get_attention_tp_size (line 327) | def get_attention_tp_size() -> int: function get_attention_cp_group (line 331) | def get_attention_cp_group() -> GroupCoordinator: function get_attention_cp_rank (line 335) | def get_attention_cp_rank() -> int: function get_attention_cp_size (line 339) | def get_attention_cp_size() -> int: function get_attention_dp_rank (line 343) | def get_attention_dp_rank() -> int: function get_attention_dp_size (line 348) | def get_attention_dp_size() -> int: function get_local_attention_dp_rank (line 353) | def get_local_attention_dp_rank() -> int: function get_local_attention_dp_size (line 358) | def get_local_attention_dp_size() -> int: function disable_dp_size (line 364) | def disable_dp_size(): function get_dp_local_info (line 384) | def get_dp_local_info(forward_batch: ForwardBatch) -> Tuple[torch.Tensor... function memcpy_triton_kernel (line 403) | def memcpy_triton_kernel( function prod (line 428) | def prod(x): function memcpy_triton (line 432) | def memcpy_triton(dst, src, dim, offset, sz, offset_src): function _dp_gather_via_all_reduce (line 443) | def _dp_gather_via_all_reduce( function _dp_gather_via_all_gather (line 478) | def _dp_gather_via_all_gather( function _dp_gather (line 498) | def _dp_gather( function dp_gather_partial (line 514) | def dp_gather_partial( function dp_gather_replicate (line 522) | def dp_gather_replicate( function dp_scatter (line 530) | def dp_scatter( function dp_reduce_scatter_tensor (line 552) | def dp_reduce_scatter_tensor(output: torch.Tensor, input: torch.Tensor): function attn_tp_reduce_scatter_tensor (line 563) | def attn_tp_reduce_scatter_tensor(output: torch.Tensor, input: torch.Ten... function attn_cp_reduce_scatter_tensor (line 567) | def attn_cp_reduce_scatter_tensor(output: torch.Tensor, input: torch.Ten... function attn_tp_all_reduce (line 571) | def attn_tp_all_reduce(input: torch.Tensor): function attn_tp_all_gather_into_tensor (line 575) | def attn_tp_all_gather_into_tensor(output: torch.Tensor, input: torch.Te... function attn_cp_all_gather_into_tensor (line 579) | def attn_cp_all_gather_into_tensor(output: torch.Tensor, input: torch.Te... function attn_tp_all_gather (line 583) | def attn_tp_all_gather(output_list: List[torch.Tensor], input: torch.Ten... FILE: python/sglang/srt/layers/elementwise.py function fused_softcap_kernel (line 38) | def fused_softcap_kernel( function fused_softcap (line 62) | def fused_softcap(x, softcap_const, autotune=False): class Softcap (line 76) | class Softcap: method __init__ (line 77) | def __init__(self, softcap_const: float): method __call__ (line 80) | def __call__(self, *args, **kwargs): method forward (line 83) | def forward(self, x: torch.Tensor) -> torch.Tensor: method forward_native (line 89) | def forward_native(self, x: torch.Tensor) -> torch.Tensor: method forward_cuda (line 92) | def forward_cuda(self, x: torch.Tensor, autotune=False) -> torch.Tensor: function fused_dual_residual_rmsnorm_kernel (line 139) | def fused_dual_residual_rmsnorm_kernel( function fused_dual_residual_rmsnorm (line 189) | def fused_dual_residual_rmsnorm(x, residual, weight1, weight2, eps, auto... function fused_rmsnorm_kernel (line 225) | def fused_rmsnorm_kernel( function fused_rmsnorm (line 255) | def fused_rmsnorm(x, weight, eps, autotune=False, inplace=False): class FusedDualResidualRMSNorm (line 276) | class FusedDualResidualRMSNorm: method __init__ (line 282) | def __init__(self, rmsnorm1, rmsnorm2) -> None: # the one after rmsnorm1 method __call__ (line 289) | def __call__(self, *args, **kwargs): method forward (line 292) | def forward( method forward_cuda (line 300) | def forward_cuda( method forward_flashinfer (line 312) | def forward_flashinfer( method forward_native (line 321) | def forward_native( function experts_combine_kernel (line 332) | def experts_combine_kernel( function experts_combine_triton (line 363) | def experts_combine_triton( function gelu_and_mul_kernel (line 408) | def gelu_and_mul_kernel( function gelu_and_mul_triton (line 445) | def gelu_and_mul_triton( function silu_and_mul_kernel (line 502) | def silu_and_mul_kernel( function silu_and_mul_triton (line 539) | def silu_and_mul_triton( FILE: python/sglang/srt/layers/flashinfer_comm_fusion.py function is_flashinfer_allreduce_unavailable (line 41) | def is_flashinfer_allreduce_unavailable() -> bool: class FlashInferWorkspaceManager (line 45) | class FlashInferWorkspaceManager: method __init__ (line 46) | def __init__(self): method initialize (line 55) | def initialize( method is_buffer_size_sufficient (line 106) | def is_buffer_size_sufficient( method cleanup (line 127) | def cleanup(self): function ensure_workspace_initialized (line 147) | def ensure_workspace_initialized( function fake_flashinfer_allreduce_residual_rmsnorm (line 191) | def fake_flashinfer_allreduce_residual_rmsnorm( function flashinfer_allreduce_residual_rmsnorm (line 210) | def flashinfer_allreduce_residual_rmsnorm( function cleanup_flashinfer_workspace (line 286) | def cleanup_flashinfer_workspace(): FILE: python/sglang/srt/layers/int4fp8_utils.py function quantize_fp8_scale_tensorwise (line 13) | def quantize_fp8_scale_tensorwise(w: torch.Tensor) -> Tuple[torch.Tensor... function quantize_int4_scale_columnwise (line 20) | def quantize_int4_scale_columnwise( function pack_int4_to_int32 (line 30) | def pack_int4_to_int32(to_pack: torch.Tensor, reorder: bool = True) -> t... FILE: python/sglang/srt/layers/layernorm.py function _forward_with_allreduce_fusion (line 89) | def _forward_with_allreduce_fusion( class RMSNorm (line 136) | class RMSNorm(MultiPlatformOp): method __init__ (line 137) | def __init__( method forward_cuda (line 165) | def forward_cuda( method forward_npu (line 198) | def forward_npu( method forward_aiter (line 213) | def forward_aiter( method forward_hip (line 235) | def forward_hip( method forward_native (line 261) | def forward_native( method forward_cpu (line 311) | def forward_cpu( method forward_xpu (line 331) | def forward_xpu( method forward_with_allreduce_fusion (line 347) | def forward_with_allreduce_fusion( class LayerNorm (line 359) | class LayerNorm(MultiPlatformOp): method __init__ (line 360) | def __init__( method forward_cuda (line 378) | def forward_cuda( method forward_native (line 391) | def forward_native( method forward_hip (line 407) | def forward_hip( method forward_npu (line 413) | def forward_npu( method forward_cpu (line 419) | def forward_cpu( class GemmaRMSNorm (line 432) | class GemmaRMSNorm(MultiPlatformOp): method __init__ (line 433) | def __init__( method _forward_impl (line 445) | def _forward_impl( method forward_native (line 461) | def forward_native( method forward_cuda (line 481) | def forward_cuda( method forward_cpu (line 489) | def forward_cpu( method forward_npu (line 508) | def forward_npu( method forward_xpu (line 525) | def forward_xpu( method forward_with_allreduce_fusion (line 533) | def forward_with_allreduce_fusion( class Gemma3RMSNorm (line 546) | class Gemma3RMSNorm(MultiPlatformOp): method __init__ (line 547) | def __init__(self, dim: int, eps: float = 1e-6): method _norm (line 553) | def _norm(self, x): method forward_native (line 556) | def forward_native(self, x): method forward_cpu (line 563) | def forward_cpu(self, x): method forward_cuda (line 568) | def forward_cuda(self, x): method forward_npu (line 571) | def forward_npu(self, x): method extra_repr (line 575) | def extra_repr(self): FILE: python/sglang/srt/layers/linear.py function adjust_marlin_shard (line 81) | def adjust_marlin_shard(param, shard_size, shard_offset): function adjust_bitsandbytes_4bit_shard (line 89) | def adjust_bitsandbytes_4bit_shard( function adjust_scalar_to_fused_array (line 104) | def adjust_scalar_to_fused_array(param, loaded_weight, shard_id): function adjust_shard_offsets (line 127) | def adjust_shard_offsets(shard_offsets, loaded_weight, dim): class LinearBase (line 141) | class LinearBase(torch.nn.Module): method __init__ (line 153) | def __init__( method forward (line 179) | def forward(self, x: torch.Tensor) -> torch.Tensor: class ReplicatedLinear (line 183) | class ReplicatedLinear(LinearBase): method __init__ (line 197) | def __init__( method weight_loader (line 242) | def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor): method forward (line 264) | def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, Optional[tor... method extra_repr (line 271) | def extra_repr(self) -> str: class ColumnParallelLinear (line 278) | class ColumnParallelLinear(LinearBase): method __init__ (line 302) | def __init__( method weight_loader (line 371) | def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor): method weight_loader_v2 (line 420) | def weight_loader_v2(self, param: Parameter, loaded_weight: torch.Tens... method forward (line 447) | def forward(self, input_): method extra_repr (line 461) | def extra_repr(self) -> str: class MergedColumnParallelLinear (line 470) | class MergedColumnParallelLinear(ColumnParallelLinear): method __init__ (line 493) | def __init__( method weight_loader (line 530) | def weight_loader( method _load_fused_module_from_checkpoint (line 708) | def _load_fused_module_from_checkpoint( method _load_merged_block_scale (line 748) | def _load_merged_block_scale( method weight_loader_v2 (line 793) | def weight_loader_v2( class QKVParallelLinear (line 858) | class QKVParallelLinear(ColumnParallelLinear): method __init__ (line 884) | def __init__( method _get_shard_offset_mapping (line 953) | def _get_shard_offset_mapping(self, loaded_shard_id: str): method _get_shard_size_mapping (line 963) | def _get_shard_size_mapping(self, loaded_shard_id: str): method _load_fused_module_from_checkpoint (line 971) | def _load_fused_module_from_checkpoint( method _load_qkv_block_scale (line 1016) | def _load_qkv_block_scale( method weight_loader_v2 (line 1045) | def weight_loader_v2( method weight_loader (line 1087) | def weight_loader( class RowParallelLinear (line 1304) | class RowParallelLinear(LinearBase): method __init__ (line 1330) | def __init__( method weight_loader (line 1391) | def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor): method weight_loader_v2 (line 1454) | def weight_loader_v2(self, param: BasevLLMParameter, loaded_weight: to... method forward (line 1484) | def forward(self, input_, skip_all_reduce=False): method extra_repr (line 1515) | def extra_repr(self) -> str: class MergedColumnParallelRepeatedLinear (line 1524) | class MergedColumnParallelRepeatedLinear(LinearBase): method __init__ (line 1537) | def __init__( method forward (line 1576) | def forward(self, input_: torch.Tensor) -> torch.Tensor: method weight_loader (line 1579) | def weight_loader( class ColumnParallelBatchedLinear (line 1594) | class ColumnParallelBatchedLinear(nn.Module): method __init__ (line 1605) | def __init__( method forward (line 1617) | def forward(self, input: torch.Tensor) -> torch.Tensor: method weight_loader (line 1620) | def weight_loader( FILE: python/sglang/srt/layers/model_parallel.py function _shard_tensor (line 24) | def _shard_tensor( class ColwiseParallelSharded (line 68) | class ColwiseParallelSharded(ColwiseParallel): method _partition_linear_fn (line 76) | def _partition_linear_fn(self, name, module, device_mesh): class RowwiseParallelMaybeWait (line 86) | class RowwiseParallelMaybeWait(RowwiseParallel): method _partition_linear_fn (line 94) | def _partition_linear_fn(self, name, module, device_mesh): method _prepare_output_fn (line 112) | def _prepare_output_fn(output_layouts, use_local_output, mod, outputs,... function tensor_parallel (line 121) | def tensor_parallel( FILE: python/sglang/srt/layers/moe/cutlass_moe.py function cutlass_fused_experts_fp8 (line 29) | def cutlass_fused_experts_fp8( function cutlass_moe_fp4 (line 348) | def cutlass_moe_fp4( FILE: python/sglang/srt/layers/moe/cutlass_moe_params.py class CutlassMoEType (line 8) | class CutlassMoEType(Enum): class CutlassMoEParams (line 19) | class CutlassMoEParams: method __init__ (line 95) | def __init__( method to_gemm1_args (line 155) | def to_gemm1_args(self) -> dict: method to_gemm2_args (line 172) | def to_gemm2_args(self) -> dict: FILE: python/sglang/srt/layers/moe/cutlass_w4a8_moe.py function cutlass_w4a8_moe (line 35) | def cutlass_w4a8_moe( function cutlass_w4a8_moe_deepep_normal (line 223) | def cutlass_w4a8_moe_deepep_normal( function cutlass_w4a8_moe_deepep_ll (line 413) | def cutlass_w4a8_moe_deepep_ll( FILE: python/sglang/srt/layers/moe/ep_moe/kernels.py function _get_launch_config_1d (line 19) | def _get_launch_config_1d(device, numel): function _get_launch_config_2d (line 46) | def _get_launch_config_2d(device, m, n): function deepep_permute_triton_kernel (line 74) | def deepep_permute_triton_kernel( function deepep_post_reorder_triton_kernel (line 105) | def deepep_post_reorder_triton_kernel( function compute_src2dst_triton_kernel (line 138) | def compute_src2dst_triton_kernel( function deepep_compute_src2dst_triton_kernel (line 149) | def deepep_compute_src2dst_triton_kernel( function deepep_run_moe_deep_preprocess (line 160) | def deepep_run_moe_deep_preprocess(topk_ids: torch.Tensor, num_experts: ... function compute_seg_indptr_triton_kernel (line 183) | def compute_seg_indptr_triton_kernel(reorder_topk_ids, seg_indptr, num_t... function cutlass_w4_run_moe_ep_preproess (line 199) | def cutlass_w4_run_moe_ep_preproess(topk_ids: torch.Tensor): function pre_reorder_triton_kernel_for_cutlass_moe (line 213) | def pre_reorder_triton_kernel_for_cutlass_moe( function pre_reorder_for_cutlass_moe (line 257) | def pre_reorder_for_cutlass_moe( function _silu_and_mul_post_quant_kernel (line 287) | def _silu_and_mul_post_quant_kernel( function silu_and_mul_masked_post_quant_fwd (line 364) | def silu_and_mul_masked_post_quant_fwd( function silu_mul_static_tensorwise_quant_triton_kernel_for_cutlass_moe (line 433) | def silu_mul_static_tensorwise_quant_triton_kernel_for_cutlass_moe( function silu_mul_static_tensorwise_quant_for_cutlass_moe (line 465) | def silu_mul_static_tensorwise_quant_for_cutlass_moe( function post_reorder_triton_kernel_for_cutlass_moe (line 489) | def post_reorder_triton_kernel_for_cutlass_moe( function post_reorder_for_cutlass_moe (line 538) | def post_reorder_for_cutlass_moe( function post_reorder_triton_kernel (line 569) | def post_reorder_triton_kernel( function _fwd_kernel_ep_scatter_1 (line 609) | def _fwd_kernel_ep_scatter_1( function _fwd_kernel_ep_scatter_2 (line 642) | def _fwd_kernel_ep_scatter_2( function ep_scatter (line 715) | def ep_scatter( function _fwd_kernel_ep_gather (line 791) | def _fwd_kernel_ep_gather( function ep_gather (line 857) | def ep_gather( function get_tma_aligned_size (line 896) | def get_tma_aligned_size(x: int, element_size: int) -> int: function _tma_align_input_scale_kernel (line 916) | def _tma_align_input_scale_kernel( function tma_align_input_scale (line 946) | def tma_align_input_scale(input_scale: torch.Tensor): function compute_masked_m_triton_kernel (line 972) | def compute_masked_m_triton_kernel(seg_indptr, masked_m): function deepgemm_compute_src2dst_triton_kernel (line 980) | def deepgemm_compute_src2dst_triton_kernel( function fill_gateup_input_triton_kernel (line 1001) | def fill_gateup_input_triton_kernel( function moe_ep_deepgemm_preprocess (line 1041) | def moe_ep_deepgemm_preprocess( function compute_identity_kernel (line 1119) | def compute_identity_kernel( function zero_experts_compute_triton (line 1157) | def zero_experts_compute_triton( function compute_problem_sizes_w4a8_kernel (line 1193) | def compute_problem_sizes_w4a8_kernel( function compute_problem_sizes_w4a8 (line 1230) | def compute_problem_sizes_w4a8( function deepep_ll_get_cutlass_w4a8_moe_mm_data (line 1247) | def deepep_ll_get_cutlass_w4a8_moe_mm_data( function _silu_and_mul_post_per_tensor_quant_kernel (line 1265) | def _silu_and_mul_post_per_tensor_quant_kernel( function silu_and_mul_masked_post_per_tensor_quant_fwd (line 1330) | def silu_and_mul_masked_post_per_tensor_quant_fwd( FILE: python/sglang/srt/layers/moe/flashinfer_cutedsl_moe.py function get_cute_dtype (line 11) | def get_cute_dtype(input: torch.Tensor) -> str: function flashinfer_cutedsl_moe_masked (line 22) | def flashinfer_cutedsl_moe_masked( FILE: python/sglang/srt/layers/moe/flashinfer_trtllm_moe.py function _fake_fp8_block_scale_moe (line 8) | def _fake_fp8_block_scale_moe( function trtllm_fp8_block_scale_moe_wrapper (line 38) | def trtllm_fp8_block_scale_moe_wrapper( function _fake_fp8_block_scale_routed_moe (line 100) | def _fake_fp8_block_scale_routed_moe( function trtllm_fp8_block_scale_routed_moe_wrapper (line 130) | def trtllm_fp8_block_scale_routed_moe_wrapper( function _fake_fp8_per_tensor_scale_moe (line 192) | def _fake_fp8_per_tensor_scale_moe( function trtllm_fp8_per_tensor_scale_moe_wrapper (line 220) | def trtllm_fp8_per_tensor_scale_moe_wrapper( FILE: python/sglang/srt/layers/moe/fused_moe_native.py function fused_moe_forward_native (line 18) | def fused_moe_forward_native( function moe_forward_native (line 49) | def moe_forward_native( FILE: python/sglang/srt/layers/moe/fused_moe_triton/__init__.py function override_config (line 21) | def override_config(config): function get_config (line 29) | def get_config() -> Optional[Dict[str, Any]]: