SYMBOL INDEX (7894 symbols across 838 files)

FILE: .github/scripts/action_tools.py
  function run_cmd (line 17) | def run_cmd(cmd_lines: List[str], log_path: str, cwd: str = None):
  function _append_summary (line 52) | def _append_summary(content):
  function add_summary (line 58) | def add_summary(csv_path: str):
  function evaluate (line 78) | def evaluate(models: List[str],
  function create_model_links (line 187) | def create_model_links(src_dir: str, dst_dir: str):
  function generate_benchmark_report (line 201) | def generate_benchmark_report(report_path: str):
  function generate_csv_from_profile_result (line 255) | def generate_csv_from_profile_result(file_path: str, out_path: str):
  function generate_output_for_evaluation (line 277) | def generate_output_for_evaluation(result_dir: str):
  function find_csv_files (line 291) | def find_csv_files(directory):

FILE: .github/scripts/check_lmdeploy.py
  function check_module_init (line 8) | def check_module_init(root: str):

FILE: .github/scripts/doc_link_checker.py
  function make_parser (line 9) | def make_parser():
  function analyze_doc (line 19) | def analyze_doc(home, path):
  function traverse (line 66) | def traverse(target):

FILE: autotest/benchmark/test_apiserver_performance.py
  function get_models (line 6) | def get_models(backend, parallel_config):
  function test_turbomind_apiserver_tp1 (line 14) | def test_turbomind_apiserver_tp1(config, run_config, worker_id):
  function test_turbomind_apiserver_tp2 (line 23) | def test_turbomind_apiserver_tp2(config, run_config, worker_id):
  function test_turbomind_apiserver_tp4 (line 32) | def test_turbomind_apiserver_tp4(config, run_config, worker_id):
  function test_turbomind_apiserver_tp8 (line 41) | def test_turbomind_apiserver_tp8(config, run_config, worker_id):
  function test_pytorch_apiserver_tp1 (line 50) | def test_pytorch_apiserver_tp1(config, run_config, worker_id):
  function test_pytorch_apiserver_tp2 (line 59) | def test_pytorch_apiserver_tp2(config, run_config, worker_id):
  function test_pytorch_apiserver_tp4 (line 68) | def test_pytorch_apiserver_tp4(config, run_config, worker_id):
  function test_pytorch_apiserver_tp8 (line 77) | def test_pytorch_apiserver_tp8(config, run_config, worker_id):
  function test_pytorch_apiserver_tp16 (line 86) | def test_pytorch_apiserver_tp16(config, run_config, worker_id):
  function test_restful_func_tp2 (line 131) | def test_restful_func_tp2(config, run_config, worker_id):

FILE: autotest/benchmark/test_longtext_performance.py
  function get_models (line 6) | def get_models(backend, parallel_config):
  function test_turbomind_longtext_throughput_tp1 (line 14) | def test_turbomind_longtext_throughput_tp1(config, run_config, worker_id):
  function test_turbomind_longtext_throughput_tp2 (line 23) | def test_turbomind_longtext_throughput_tp2(config, run_config, worker_id):
  function test_turbomind_longtext_throughput_tp4 (line 32) | def test_turbomind_longtext_throughput_tp4(config, run_config, worker_id):
  function test_turbomind_longtext_throughput_tp8 (line 41) | def test_turbomind_longtext_throughput_tp8(config, run_config, worker_id):
  function test_pytorch_longtext_throughput_tp1 (line 50) | def test_pytorch_longtext_throughput_tp1(config, run_config, worker_id):
  function test_pytorch_longtext_throughput_tp2 (line 59) | def test_pytorch_longtext_throughput_tp2(config, run_config, worker_id):
  function test_pytorch_longtext_throughput_tp4 (line 68) | def test_pytorch_longtext_throughput_tp4(config, run_config, worker_id):
  function test_pytorch_longtext_throughput_tp8 (line 77) | def test_pytorch_longtext_throughput_tp8(config, run_config, worker_id):
  function test_pytorch_longtext_throughput_tp16 (line 86) | def test_pytorch_longtext_throughput_tp16(config, run_config, worker_id):

FILE: autotest/benchmark/test_mllm_apiserver_performance.py
  function get_models (line 6) | def get_models(backend, parallel_config):
  function test_turbomind_mllm_apiserver_tp1 (line 14) | def test_turbomind_mllm_apiserver_tp1(config, run_config, worker_id):
  function test_turbomind_mllm_apiserver_tp2 (line 23) | def test_turbomind_mllm_apiserver_tp2(config, run_config, worker_id):
  function test_turbomind_mllm_apiserver_tp4 (line 32) | def test_turbomind_mllm_apiserver_tp4(config, run_config, worker_id):
  function test_turbomind_mllm_apiserver_tp8 (line 41) | def test_turbomind_mllm_apiserver_tp8(config, run_config, worker_id):
  function test_pytorch_mllm_apiserver_tp1 (line 50) | def test_pytorch_mllm_apiserver_tp1(config, run_config, worker_id):
  function test_pytorch_mllm_apiserver_tp2 (line 59) | def test_pytorch_mllm_apiserver_tp2(config, run_config, worker_id):
  function test_pytorch_mllm_apiserver_tp4 (line 68) | def test_pytorch_mllm_apiserver_tp4(config, run_config, worker_id):
  function test_pytorch_mllm_apiserver_tp8 (line 77) | def test_pytorch_mllm_apiserver_tp8(config, run_config, worker_id):
  function test_pytorch_mllm_apiserver_tp16 (line 86) | def test_pytorch_mllm_apiserver_tp16(config, run_config, worker_id):

FILE: autotest/benchmark/test_prefixcache_performance.py
  function get_models (line 6) | def get_models(backend, parallel_config):
  function test_turbomind_prefix_tp1 (line 14) | def test_turbomind_prefix_tp1(config, run_config, worker_id):
  function test_turbomind_prefix_tp2 (line 23) | def test_turbomind_prefix_tp2(config, run_config, worker_id):
  function test_turbomind_prefix_tp4 (line 32) | def test_turbomind_prefix_tp4(config, run_config, worker_id):
  function test_turbomind_prefix_tp8 (line 41) | def test_turbomind_prefix_tp8(config, run_config, worker_id):
  function test_pytorch_prefix_tp1 (line 50) | def test_pytorch_prefix_tp1(config, run_config, worker_id):
  function test_pytorch_prefix_tp2 (line 59) | def test_pytorch_prefix_tp2(config, run_config, worker_id):
  function test_pytorch_prefix_tp4 (line 68) | def test_pytorch_prefix_tp4(config, run_config, worker_id):
  function test_pytorch_prefix_tp8 (line 77) | def test_pytorch_prefix_tp8(config, run_config, worker_id):
  function test_pytorch_prefix_tp16 (line 86) | def test_pytorch_prefix_tp16(config, run_config, worker_id):
  function test_pytorch_prefix_pr_test_tp1 (line 113) | def test_pytorch_prefix_pr_test_tp1(config, run_config, worker_id):

FILE: autotest/benchmark/test_throughput_performance.py
  function get_models (line 6) | def get_models(backend, parallel_config):
  function test_turbomind_throughput_tp1 (line 16) | def test_turbomind_throughput_tp1(config, run_config, worker_id):
  function test_turbomind_throughput_tp2 (line 25) | def test_turbomind_throughput_tp2(config, run_config, worker_id):
  function test_turbomind_throughput_tp4 (line 34) | def test_turbomind_throughput_tp4(config, run_config, worker_id):
  function test_turbomind_throughput_tp8 (line 43) | def test_turbomind_throughput_tp8(config, run_config, worker_id):
  function test_pytorch_throughput_tp1 (line 52) | def test_pytorch_throughput_tp1(config, run_config, worker_id):
  function test_pytorch_throughput_tp2 (line 61) | def test_pytorch_throughput_tp2(config, run_config, worker_id):
  function test_pytorch_throughput_tp4 (line 70) | def test_pytorch_throughput_tp4(config, run_config, worker_id):
  function test_pytorch_throughput_tp8 (line 79) | def test_pytorch_throughput_tp8(config, run_config, worker_id):
  function test_pytorch_throughput_tp16 (line 88) | def test_pytorch_throughput_tp16(config, run_config, worker_id):
  function test_throughput_func_tp2 (line 114) | def test_throughput_func_tp2(config, run_config, worker_id):
  function test_throughput_prtest_tp1 (line 141) | def test_throughput_prtest_tp1(config, run_config, worker_id):

FILE: autotest/conftest.py
  function config (line 18) | def config():
  function cli_case_config (line 24) | def cli_case_config():
  function common_case_config (line 32) | def common_case_config():
  function shared_ray_manager (line 40) | def shared_ray_manager():
  function shared_proxy_manager (line 71) | def shared_proxy_manager():

FILE: autotest/evaluate/test_api_evaluate.py
  function _run_ray_distributed_test (line 13) | def _run_ray_distributed_test(
  function _run_proxy_distributed_test (line 59) | def _run_proxy_distributed_test(config,
  function run_eval_test (line 111) | def run_eval_test(config, run_config, worker_id, test_type='infer', eval...
  function get_models (line 192) | def get_models(backend, parallel_config):
  function test_turbomind_infer_tp1 (line 201) | def test_turbomind_infer_tp1(config, run_config, worker_id):
  function test_turbomind_infer_tp2 (line 210) | def test_turbomind_infer_tp2(config, run_config, worker_id):
  function test_turbomind_infer_tp4 (line 219) | def test_turbomind_infer_tp4(config, run_config, worker_id):
  function test_turbomind_infer_tp8 (line 228) | def test_turbomind_infer_tp8(config, run_config, worker_id):
  function test_turbomind_infer_cp2tp8 (line 237) | def test_turbomind_infer_cp2tp8(config, run_config, worker_id):
  function test_pytorch_restful_tp1 (line 247) | def test_pytorch_restful_tp1(config, run_config, worker_id):
  function test_pytorch_restful_tp2 (line 257) | def test_pytorch_restful_tp2(config, run_config, worker_id):
  function test_pytorch_restful_tp4 (line 267) | def test_pytorch_restful_tp4(config, run_config, worker_id):
  function test_pytorch_restful_tp8 (line 277) | def test_pytorch_restful_tp8(config, run_config, worker_id):
  function test_pytorch_restful_tp16 (line 287) | def test_pytorch_restful_tp16(config, run_config, worker_id):
  function test_pytorch_restful_distributed_tp16 (line 296) | def test_pytorch_restful_distributed_tp16(shared_ray_manager, config, ru...
  function test_pytorch_restful_distributed_dpep8 (line 309) | def test_pytorch_restful_distributed_dpep8(shared_proxy_manager, config,...
  function test_pytorch_restful_distributed_dpep16 (line 322) | def test_pytorch_restful_distributed_dpep16(shared_proxy_manager, config...
  function test_turbomind_eval_tp1 (line 335) | def test_turbomind_eval_tp1(config, run_config, worker_id):
  function test_turbomind_eval_tp2 (line 344) | def test_turbomind_eval_tp2(config, run_config, worker_id):
  function test_turbomind_eval_tp4 (line 353) | def test_turbomind_eval_tp4(config, run_config, worker_id):
  function test_turbomind_eval_tp8 (line 362) | def test_turbomind_eval_tp8(config, run_config, worker_id):
  function test_pytorch_eval_tp1 (line 372) | def test_pytorch_eval_tp1(config, run_config, worker_id):
  function test_pytorch_eval_tp2 (line 382) | def test_pytorch_eval_tp2(config, run_config, worker_id):
  function test_pytorch_eval_tp4 (line 392) | def test_pytorch_eval_tp4(config, run_config, worker_id):
  function test_pytorch_eval_tp8 (line 402) | def test_pytorch_eval_tp8(config, run_config, worker_id):
  function test_pytorch_eval_tp16 (line 412) | def test_pytorch_eval_tp16(config, run_config, worker_id):
  function test_pytorch_eval_distributed_tp16 (line 421) | def test_pytorch_eval_distributed_tp16(config, run_config, worker_id):
  function test_pytorch_eval_distributed_dpep8 (line 430) | def test_pytorch_eval_distributed_dpep8(config, run_config, worker_id):
  function test_pytorch_eval_distributed_dpep16 (line 439) | def test_pytorch_eval_distributed_dpep16(config, run_config, worker_id):
  function test_turbomind_eval_cp2tp8 (line 448) | def test_turbomind_eval_cp2tp8(config, run_config, worker_id):

FILE: autotest/evaluate/test_mllm_api_evaluate.py
  function run_eval_test (line 10) | def run_eval_test(config, run_config, worker_id, test_type='infer', eval...
  function get_models (line 69) | def get_models(backend, parallel_config):
  function test_turbomind_vl_eval_tp1 (line 85) | def test_turbomind_vl_eval_tp1(config, run_config, worker_id):
  function test_turbomind_vl_eval_tp2 (line 94) | def test_turbomind_vl_eval_tp2(config, run_config, worker_id):
  function test_turbomind_vl_eval_tp4 (line 103) | def test_turbomind_vl_eval_tp4(config, run_config, worker_id):
  function test_turbomind_vl_eval_tp8 (line 112) | def test_turbomind_vl_eval_tp8(config, run_config, worker_id):
  function test_pytorch_vl_eval_tp1 (line 122) | def test_pytorch_vl_eval_tp1(config, run_config, worker_id):
  function test_pytorch_vl_eval_tp2 (line 132) | def test_pytorch_vl_eval_tp2(config, run_config, worker_id):
  function test_pytorch_vl_eval_tp4 (line 142) | def test_pytorch_vl_eval_tp4(config, run_config, worker_id):
  function test_pytorch_vl_eval_tp8 (line 152) | def test_pytorch_vl_eval_tp8(config, run_config, worker_id):
  function test_pytorch_vl_eval_tp16 (line 162) | def test_pytorch_vl_eval_tp16(config, run_config, worker_id):
  function test_turbomind_eval_tp1 (line 171) | def test_turbomind_eval_tp1(config, run_config, worker_id):
  function test_turbomind_eval_tp2 (line 180) | def test_turbomind_eval_tp2(config, run_config, worker_id):
  function test_turbomind_eval_tp4 (line 189) | def test_turbomind_eval_tp4(config, run_config, worker_id):
  function test_turbomind_eval_tp8 (line 198) | def test_turbomind_eval_tp8(config, run_config, worker_id):
  function test_pytorch_eval_tp1 (line 208) | def test_pytorch_eval_tp1(config, run_config, worker_id):
  function test_pytorch_eval_tp2 (line 218) | def test_pytorch_eval_tp2(config, run_config, worker_id):
  function test_pytorch_eval_tp4 (line 228) | def test_pytorch_eval_tp4(config, run_config, worker_id):
  function test_pytorch_eval_tp8 (line 238) | def test_pytorch_eval_tp8(config, run_config, worker_id):
  function test_pytorch_eval_tp16 (line 248) | def test_pytorch_eval_tp16(config, run_config, worker_id):

FILE: autotest/interface/pipeline/test_pipeline_func.py
  function init_pipeline (line 15) | def init_pipeline(model_path, backend_config):
  function run_case_in_spawn (line 21) | def run_case_in_spawn(worker_id, target, args):
  function run_pipeline_testcase_prompt (line 33) | def run_pipeline_testcase_prompt(config, model, backend, file_name):
  function run_pipeline_testcase_prompt_stream (line 43) | def run_pipeline_testcase_prompt_stream(config, model, backend, file_name):
  function run_pipeline_testcase_multi_prompt (line 55) | def run_pipeline_testcase_multi_prompt(config, model, backend, file_name):
  function run_pipeline_testcase_multi_prompt_stream (line 65) | def run_pipeline_testcase_multi_prompt_stream(config, model, backend, fi...
  function run_pipeline_testcase_message (line 77) | def run_pipeline_testcase_message(config, model, backend, file_name):
  function run_pipeline_testcase_message_stream (line 88) | def run_pipeline_testcase_message_stream(config, model, backend, file_na...
  function run_pipeline_testcase_message_batch (line 101) | def run_pipeline_testcase_message_batch(config, model, backend, file_name):
  function run_pipeline_testcase_message_batch_stream (line 112) | def run_pipeline_testcase_message_batch_stream(config, model, backend, f...
  function run_pipeline_testcase_logprobs (line 125) | def run_pipeline_testcase_logprobs(config, model, backend, file_name):
  function run_pipeline_testcase_logprobs_stream (line 136) | def run_pipeline_testcase_logprobs_stream(config, model, backend, file_n...
  function run_pipeline_testcase_session_len (line 149) | def run_pipeline_testcase_session_len(config, model, backend, file_name):
  function run_pipeline_testcase_min_new_tokens (line 163) | def run_pipeline_testcase_min_new_tokens(config, model, backend, file_na...
  function run_pipeline_testcase_stop_words (line 177) | def run_pipeline_testcase_stop_words(config, model, backend, file_name):
  function run_pipeline_testcase_bad_words (line 192) | def run_pipeline_testcase_bad_words(config, model, backend, file_name):
  function run_pipeline_testcase_special_words_false (line 205) | def run_pipeline_testcase_special_words_false(config, model, backend, fi...
  function run_pipeline_testcase_special_words_true (line 225) | def run_pipeline_testcase_special_words_true(config, model, backend, fil...
  function run_pipeline_testcase_repetition_penalty (line 245) | def run_pipeline_testcase_repetition_penalty(config, model, backend, fil...
  function run_pipeline_testcase_repetition_penalty_bigger (line 256) | def run_pipeline_testcase_repetition_penalty_bigger(config, model, backe...
  function run_pipeline_testcase_min_top_p (line 267) | def run_pipeline_testcase_min_top_p(config, model, backend, file_name):
  function run_pipeline_testcase_min_top_k (line 278) | def run_pipeline_testcase_min_top_k(config, model, backend, file_name):
  function run_pipeline_testcase_diff_random_seed (line 291) | def run_pipeline_testcase_diff_random_seed(config, model, backend, file_...
  function run_pipeline_testcase_same_random_seed (line 304) | def run_pipeline_testcase_same_random_seed(config, model, backend, file_...
  function run_pipeline_testcase_do_sample_batch (line 317) | def run_pipeline_testcase_do_sample_batch(config, model, backend, file_n...
  function run_pipeline_testcase_max_new_tokens (line 328) | def run_pipeline_testcase_max_new_tokens(config, model, backend, file_na...
  function run_pipeline_testcase_ignore_eos (line 342) | def run_pipeline_testcase_ignore_eos(config, model, backend, file_name):
  function test_return_with_prompt (line 358) | def test_return_with_prompt(config, model, backend, worker_id):
  function test_return_with_prompt_stream (line 367) | def test_return_with_prompt_stream(config, model, backend, worker_id):
  function test_return_with_multi_prompt (line 376) | def test_return_with_multi_prompt(config, model, backend, worker_id):
  function test_return_with_multi_prompt_stream (line 385) | def test_return_with_multi_prompt_stream(config, model, backend, worker_...
  function test_return_with_message (line 394) | def test_return_with_message(config, model, backend, worker_id):
  function test_return_with_message_stream (line 402) | def test_return_with_message_stream(config, model, backend, worker_id):
  function test_return_with_message_batch (line 410) | def test_return_with_message_batch(config, model, backend, worker_id):
  function test_return_with_message_batch_stream (line 418) | def test_return_with_message_batch_stream(config, model, backend, worker...
  function test_return_check_logprobs (line 426) | def test_return_check_logprobs(config, model, backend, worker_id):
  function test_return_check_logprobs_stream (line 434) | def test_return_check_logprobs_stream(config, model, backend, worker_id):
  function test_backend_config_session_len (line 442) | def test_backend_config_session_len(config, model, backend, worker_id):
  function test_gen_config_min_new_tokens (line 450) | def test_gen_config_min_new_tokens(config, model, backend, worker_id):
  function test_gen_config_stop_words (line 458) | def test_gen_config_stop_words(config, model, backend, worker_id):
  function test_gen_config_bad_words (line 466) | def test_gen_config_bad_words(config, model, backend, worker_id):
  function test_gen_config_special_words_false (line 474) | def test_gen_config_special_words_false(config, model, backend, worker_id):
  function test_gen_config_special_words_true (line 482) | def test_gen_config_special_words_true(config, model, backend, worker_id):
  function test_gen_config_minimum_repetition_penalty (line 490) | def test_gen_config_minimum_repetition_penalty(config, model, backend, w...
  function test_gen_config_repetition_penalty_bigger_than_1 (line 498) | def test_gen_config_repetition_penalty_bigger_than_1(config, model, back...
  function test_gen_config_minimun_topp (line 506) | def test_gen_config_minimun_topp(config, model, backend, worker_id):
  function test_gen_config_minimun_topk (line 514) | def test_gen_config_minimun_topk(config, model, backend, worker_id):
  function test_gen_config_diff_random_seed (line 522) | def test_gen_config_diff_random_seed(config, model, backend, worker_id):
  function test_gen_config_same_random_seed (line 530) | def test_gen_config_same_random_seed(config, model, backend, worker_id):
  function test_gen_config_do_sample_batch (line 538) | def test_gen_config_do_sample_batch(config, model, backend, worker_id):
  function test_gen_config_max_new_tokens (line 546) | def test_gen_config_max_new_tokens(config, model, backend, worker_id):
  function test_gen_config_ignore_eos (line 554) | def test_gen_config_ignore_eos(config, model, backend, worker_id):
  function test_backend_config_input_validation (line 562) | def test_backend_config_input_validation(config, model, backend, worker_...
  function test_backend_config_validate_turbomind (line 599) | def test_backend_config_validate_turbomind(config, model, backend, worke...
  function test_backend_config_validate_pytorch (line 637) | def test_backend_config_validate_pytorch(config, model, backend, worker_...
  function test_backend_config_tp (line 667) | def test_backend_config_tp(config, model, backend, worker_id):

FILE: autotest/interface/pipeline/test_pipeline_longtext_func.py
  function run_case_in_spawn (line 24) | def run_case_in_spawn(target, args):
  function test_history_issue_tp1 (line 33) | def test_history_issue_tp1(config, model, worker_id):
  function test_history_issue_tp2 (line 43) | def test_history_issue_tp2(config, model, worker_id):
  function stream_infer_worker (line 52) | def stream_infer_worker(config, model, tp_num):
  function test_long_test_passkey_tp1 (line 77) | def test_long_test_passkey_tp1(config, model, backend, worker_id):
  function test_long_test_passkey_tp2 (line 90) | def test_long_test_passkey_tp2(config, model, backend, worker_id):
  function test_long_test_passkey_tp8 (line 104) | def test_long_test_passkey_tp8(config, model, backend, worker_id):
  function passkey_retrival_worker (line 125) | def passkey_retrival_worker(config, model, backend, log_name, tp_num, se...
  function get_passkey_prompt (line 177) | def get_passkey_prompt(pipe, session_len):

FILE: autotest/interface/restful/test_restful_chat_completions_v1.py
  class TestRestfulInterfaceBase (line 22) | class TestRestfulInterfaceBase:
    method test_get_model (line 25) | def test_get_model(self, config, backend, model_case):
    method test_encode_s1 (line 34) | def test_encode_s1(self, backend, model_case):
    method test_encode (line 54) | def test_encode(self, backend, model_case):
  class TestRestfulInterfaceChatCompletions (line 78) | class TestRestfulInterfaceChatCompletions:
    method test_return_info_with_prompt (line 80) | def test_return_info_with_prompt(self, backend, model_case):
    method test_return_info_with_messegae (line 94) | def test_return_info_with_messegae(self, backend, model_case):
    method test_return_info_with_prompt_streaming (line 106) | def test_return_info_with_prompt_streaming(self, backend, model_case):
    method test_return_info_with_messegae_streaming (line 125) | def test_return_info_with_messegae_streaming(self, backend, model_case):
    method test_single_stopword (line 142) | def test_single_stopword(self, backend, model_case):
    method test_single_stopword_streaming (line 159) | def test_single_stopword_streaming(self, backend, model_case):
    method test_array_stopwords (line 181) | def test_array_stopwords(self, backend, model_case):
    method test_array_stopwords_streaming (line 200) | def test_array_stopwords_streaming(self, backend, model_case):
    method test_special_words (line 225) | def test_special_words(self, backend, model_case):
    method test_minimum_repetition_penalty (line 253) | def test_minimum_repetition_penalty(self, backend, model_case):
    method test_minimum_repetition_penalty_streaming (line 272) | def test_minimum_repetition_penalty_streaming(self, backend, model_case):
    method test_repetition_penalty_bigger_than_1 (line 297) | def test_repetition_penalty_bigger_than_1(self, backend, model_case):
    method test_repetition_penalty_bigger_than_1_streaming (line 313) | def test_repetition_penalty_bigger_than_1_streaming(self, backend, mod...
    method test_minimum_topp (line 334) | def test_minimum_topp(self, backend, model_case):
    method test_minimum_topp_streaming (line 355) | def test_minimum_topp_streaming(self, backend, model_case):
    method test_mistake_modelname_return (line 381) | def test_mistake_modelname_return(self, backend, model_case):
    method test_mistake_modelname_return_streaming (line 396) | def test_mistake_modelname_return_streaming(self, backend, model_case):
    method test_mutilple_times_response_should_not_same (line 415) | def test_mutilple_times_response_should_not_same(self, backend, model_...
    method test_mutilple_times_response_should_not_same_streaming (line 434) | def test_mutilple_times_response_should_not_same_streaming(self, backe...
    method test_longtext_input (line 458) | def test_longtext_input(self, backend, model_case):
    method test_longtext_input_streaming (line 473) | def test_longtext_input_streaming(self, backend, model_case):
    method test_ignore_eos (line 492) | def test_ignore_eos(self, backend, model_case):
    method test_ignore_eos_streaming (line 511) | def test_ignore_eos_streaming(self, backend, model_case):
    method __test_max_tokens_or_max_completion_tokens (line 536) | def __test_max_tokens_or_max_completion_tokens(
    method test_max_tokens (line 572) | def test_max_tokens(self, backend, model_case):
    method test_max_completion_tokens (line 575) | def test_max_completion_tokens(self, backend, model_case):
    method __test_max_tokens_streaming_or_max_completion_tokens_streaming (line 578) | def __test_max_tokens_streaming_or_max_completion_tokens_streaming(
    method test_max_tokens_streaming (line 622) | def test_max_tokens_streaming(self, backend, model_case):
    method test_max_completion_tokens_streaming (line 625) | def test_max_completion_tokens_streaming(self, backend, model_case):
    method test_logprobs (line 629) | def test_logprobs(self, backend, model_case):
    method test_logprobs_streaming (line 649) | def test_logprobs_streaming(self, backend, model_case):
  class TestRestfulOpenAI (line 680) | class TestRestfulOpenAI:
    method test_return_info (line 683) | def test_return_info(self, backend, model_case):
    method test_return_info_streaming (line 699) | def test_return_info_streaming(self, backend, model_case):
    method test_single_stopword (line 720) | def test_single_stopword(self, backend, model_case):
    method test_single_stopword_streaming (line 739) | def test_single_stopword_streaming(self, backend, model_case):
    method test_array_stopwords (line 763) | def test_array_stopwords(self, backend, model_case):
    method test_array_stopwords_streaming (line 785) | def test_array_stopwords_streaming(self, backend, model_case):
    method test_minimum_topp (line 812) | def test_minimum_topp(self, backend, model_case):
    method test_minimum_topp_streaming (line 835) | def test_minimum_topp_streaming(self, backend, model_case):
    method test_mistake_modelname_return (line 863) | def test_mistake_modelname_return(self, backend, model_case):
    method test_mistake_modelname_return_streaming (line 878) | def test_mistake_modelname_return_streaming(self, backend, model_case):
    method test_mutilple_times_response_should_not_same (line 894) | def test_mutilple_times_response_should_not_same(self, backend, model_...
    method test_mutilple_times_response_should_not_same_streaming (line 914) | def test_mutilple_times_response_should_not_same_streaming(self, backe...
    method test_longtext_input (line 940) | def test_longtext_input(self, backend, model_case):
    method test_longtext_input_streaming (line 958) | def test_longtext_input_streaming(self, backend, model_case):
    method test_max_tokens (line 983) | def test_max_tokens(self, backend, model_case):
    method test_max_tokens_streaming (line 1000) | def test_max_tokens_streaming(self, backend, model_case):
    method test_logprobs (line 1031) | def test_logprobs(self, backend, model_case):
    method test_logprobs_streaming (line 1052) | def test_logprobs_streaming(self, backend, model_case):
    method test_input_validation (line 1083) | def test_input_validation(self, backend, model_case):
    method test_input_validation_streaming (line 1116) | def test_input_validation_streaming(self, backend, model_case):
    method test_disable_think (line 1150) | def test_disable_think(self, backend, model_case):
    method test_disable_think_with_image (line 1183) | def test_disable_think_with_image(self, backend, model_case):

FILE: autotest/interface/restful/test_restful_completions_v1.py
  class TestRestfulInterfaceBase (line 15) | class TestRestfulInterfaceBase:
    method test_get_model (line 18) | def test_get_model(self, config, backend, model_case):
    method test_encode (line 24) | def test_encode(self, backend, model_case):
    method test_return (line 42) | def test_return(self, backend, model_case):
    method test_return_streaming (line 58) | def test_return_streaming(self, backend, model_case):
    method test_max_tokens (line 72) | def test_max_tokens(self, backend, model_case):
    method test_single_stopword (line 85) | def test_single_stopword(self, backend, model_case):
    method test_array_stopwords (line 96) | def test_array_stopwords(self, backend, model_case):
    method test_completions_stream (line 109) | def test_completions_stream(self, backend, model_case):
    method test_completions_stream_stopword (line 127) | def test_completions_stream_stopword(self, backend, model_case):
    method test_completions_stream_stopwords (line 151) | def test_completions_stream_stopwords(self, backend, model_case):
    method test_batch_prompt_order (line 177) | def test_batch_prompt_order(self, backend, model_case):

FILE: autotest/interface/restful/test_restful_generate.py
  class TestGenerateComprehensive (line 22) | class TestGenerateComprehensive:
    method setup_api (line 25) | def setup_api(self, request, config, model_name, backend):
    method _log_request_response (line 38) | def _log_request_response(self, payload, response_data, stream_raw=None):
    method _post (line 55) | def _post(self, payload, stream=False):
    method _validate_generation_response (line 117) | def _validate_generation_response(self,
    method test_basic_generation (line 235) | def test_basic_generation(self):
    method test_input_ids_mode (line 294) | def test_input_ids_mode(self, config):
    method test_conflict_prompt_and_input_ids (line 349) | def test_conflict_prompt_and_input_ids(self):
    method test_input_ids_with_logprob (line 437) | def test_input_ids_with_logprob(self, config):
    method test_stop_str_with_include_flag (line 497) | def test_stop_str_with_include_flag(self):
    method test_streaming_mode (line 542) | def test_streaming_mode(self):
    method test_streaming_incremental_correctness (line 572) | def test_streaming_incremental_correctness(self):
    method test_return_logprob (line 625) | def test_return_logprob(self):
    method test_same_session_id_allowed (line 635) | def test_same_session_id_allowed(self):
    method test_empty_prompt_rejected (line 658) | def test_empty_prompt_rejected(self):
    method test_input_ids_rejected (line 673) | def test_input_ids_rejected(self):
    method test_stress_concurrent_requests (line 706) | def test_stress_concurrent_requests(self):
    method test_stress_long_prompt_and_generation (line 761) | def test_stress_long_prompt_and_generation(self):
    method test_stress_streaming_under_load (line 771) | def test_stress_streaming_under_load(self):
    method test_temperature_parameter (line 824) | def test_temperature_parameter(self):
    method test_top_p_parameter (line 844) | def test_top_p_parameter(self):
    method test_top_k_parameter (line 857) | def test_top_k_parameter(self):
    method test_min_p_parameter (line 870) | def test_min_p_parameter(self):
    method test_repetition_penalty (line 878) | def test_repetition_penalty(self):
    method test_ignore_eos_parameter (line 900) | def test_ignore_eos_parameter(self):
    method test_skip_special_tokens (line 917) | def test_skip_special_tokens(self, config):
    method test_stop_token_ids (line 941) | def test_stop_token_ids(self):
    method test_combined_parameters (line 968) | def test_combined_parameters(self):
    method test_streaming_with_all_parameters (line 984) | def test_streaming_with_all_parameters(self):
    method test_invalid_temperature_values (line 1008) | def test_invalid_temperature_values(self):
    method test_invalid_top_p_values (line 1019) | def test_invalid_top_p_values(self):
    method test_invalid_top_k_values (line 1027) | def test_invalid_top_k_values(self):
    method test_boundary_max_tokens (line 1035) | def test_boundary_max_tokens(self):
    method test_parameter_interactions (line 1057) | def test_parameter_interactions(self):
    method test_session_id_with_all_parameters (line 1074) | def test_session_id_with_all_parameters(self):
    method test_edge_cases_stop_conditions (line 1105) | def test_edge_cases_stop_conditions(self):
    method test_spaces_between_special_tokens (line 1134) | def test_spaces_between_special_tokens(self, config):
    method test_request_returns_experts (line 1160) | def test_request_returns_experts(self):

FILE: autotest/toolchain/test_lagent.py
  function test_repeat (line 8) | def test_repeat(config, model):

FILE: autotest/tools/chat/test_command_chat_hf_pytorch.py
  function test_hf_pytorch_chat_tp1 (line 15) | def test_hf_pytorch_chat_tp1(config, run_config, cli_case_config, worker...
  function test_hf_pytorch_chat_tp2 (line 23) | def test_hf_pytorch_chat_tp2(config, run_config, cli_case_config, worker...
  function test_hf_pytorch_chat_tp4 (line 31) | def test_hf_pytorch_chat_tp4(config, run_config, cli_case_config, worker...
  function test_hf_pytorch_chat_tp8 (line 39) | def test_hf_pytorch_chat_tp8(config, run_config, cli_case_config, worker...
  function test_hf_pytorch_chat_tp16 (line 47) | def test_hf_pytorch_chat_tp16(config, run_config, cli_case_config, worke...
  function test_hf_pytorch_base_tp1 (line 55) | def test_hf_pytorch_base_tp1(config, run_config, cli_case_config, worker...
  function test_hf_pytorch_base_tp2 (line 63) | def test_hf_pytorch_base_tp2(config, run_config, cli_case_config, worker...
  function test_hf_pytorch_chat_pr_tp2 (line 71) | def test_hf_pytorch_chat_pr_tp2(config, run_config, cli_case_config, wor...
  function test_hf_pytorch_chat_pr_tp1 (line 80) | def test_hf_pytorch_chat_pr_tp1(config, run_config, cli_case_config, wor...
  function test_modelscope_pytorch_chat_tp1 (line 88) | def test_modelscope_pytorch_chat_tp1(config, run_config, cli_case_config...
  function test_pytorch_chat_with_lora_tp1 (line 99) | def test_pytorch_chat_with_lora_tp1(config, run_config, cli_case_config,...
  function test_pytorch_chat_with_lora_tp2 (line 109) | def test_pytorch_chat_with_lora_tp2(config, run_config, cli_case_config,...

FILE: autotest/tools/chat/test_command_chat_hf_turbomind.py
  function test_hf_turbomind_chat_tp1 (line 15) | def test_hf_turbomind_chat_tp1(config, run_config, cli_case_config, work...
  function test_hf_turbomind_chat_tp2 (line 22) | def test_hf_turbomind_chat_tp2(config, run_config, cli_case_config, work...
  function test_hf_turbomind_chat_tp4 (line 29) | def test_hf_turbomind_chat_tp4(config, run_config, cli_case_config, work...
  function test_hf_turbomind_chat_tp8 (line 36) | def test_hf_turbomind_chat_tp8(config, run_config, cli_case_config, work...
  function test_hf_turbomind_chat_fallback_backend_tp1 (line 43) | def test_hf_turbomind_chat_fallback_backend_tp1(config, run_config, cli_...
  function test_hf_turbomind_chat_fallback_backend_tp2 (line 50) | def test_hf_turbomind_chat_fallback_backend_tp2(config, run_config, cli_...
  function test_hf_turbomind_base_tp1 (line 57) | def test_hf_turbomind_base_tp1(config, run_config, cli_case_config, work...
  function test_hf_turbomind_base_tp2 (line 64) | def test_hf_turbomind_base_tp2(config, run_config, cli_case_config, work...
  function test_hf_turbomind_chat_pr_tp2 (line 72) | def test_hf_turbomind_chat_pr_tp2(config, run_config, cli_case_config, w...
  function test_hf_turbomind_chat_pr_tp1 (line 81) | def test_hf_turbomind_chat_pr_tp1(config, run_config, cli_case_config, w...
  function test_modelscope_turbomind_chat_tp1 (line 89) | def test_modelscope_turbomind_chat_tp1(config, run_config, cli_case_conf...

FILE: autotest/tools/pipeline/llm_case.py
  function run_pipeline_chat_test (line 13) | def run_pipeline_chat_test(model_path, run_config, cases_path, is_pr_tes...

FILE: autotest/tools/pipeline/mllm_case.py
  function run_pipeline_mllm_test (line 23) | def run_pipeline_mllm_test(model_path, run_config, resource_path, is_pr_...
  function internvl_vl_testcase (line 125) | def internvl_vl_testcase(pipe, resource_path, lang='en'):
  function MiniCPM_vl_testcase (line 245) | def MiniCPM_vl_testcase(pipe, resource_path):
  function Qwen_vl_testcase (line 343) | def Qwen_vl_testcase(pipe, resource_path):

FILE: autotest/tools/pipeline/test_pipeline_chat_pytorch_llm.py
  function test_pipeline_chat_tp1 (line 16) | def test_pipeline_chat_tp1(config, run_config, common_case_config, worke...
  function test_pipeline_chat_tp2 (line 24) | def test_pipeline_chat_tp2(config, run_config, common_case_config, worke...
  function test_pipeline_chat_tp4 (line 32) | def test_pipeline_chat_tp4(config, run_config, common_case_config, worke...
  function test_pipeline_chat_tp8 (line 40) | def test_pipeline_chat_tp8(config, run_config, common_case_config, worke...
  function test_pipeline_chat_tp16 (line 48) | def test_pipeline_chat_tp16(config, run_config, common_case_config, work...
  function test_pipeline_chat_pytorch_prefix_cache_tp2 (line 56) | def test_pipeline_chat_pytorch_prefix_cache_tp2(config, run_config, comm...
  function test_hf_pytorch_chat_pr_tp2 (line 64) | def test_hf_pytorch_chat_pr_tp2(config, run_config, common_case_config, ...
  function test_hf_pytorch_chat_pr_tp1 (line 73) | def test_hf_pytorch_chat_pr_tp1(config, run_config, common_case_config, ...
  function test_modelscope_pipeline_chat_tp1 (line 81) | def test_modelscope_pipeline_chat_tp1(config, run_config, common_case_co...
  function test_pytorch_chat_with_lora_tp1 (line 89) | def test_pytorch_chat_with_lora_tp1(config, run_config, common_case_conf...
  function test_pytorch_chat_with_lora_tp2 (line 96) | def test_pytorch_chat_with_lora_tp2(config, run_config, common_case_conf...
  function test_pipeline_chat_speculative_decoding_tp1 (line 105) | def test_pipeline_chat_speculative_decoding_tp1(config, run_config, comm...

FILE: autotest/tools/pipeline/test_pipeline_chat_pytorch_mllm.py
  function get_models (line 8) | def get_models(parallel_config):
  function test_restful_chat_tp1 (line 15) | def test_restful_chat_tp1(config, run_config, worker_id):
  function test_restful_chat_tp2 (line 21) | def test_restful_chat_tp2(config, run_config, worker_id):
  function test_restful_chat_tp4 (line 27) | def test_restful_chat_tp4(config, run_config, worker_id):
  function test_restful_chat_tp8 (line 33) | def test_restful_chat_tp8(config, run_config, worker_id):
  function test_restful_chat_tp16 (line 39) | def test_restful_chat_tp16(config, run_config, worker_id):

FILE: autotest/tools/pipeline/test_pipeline_chat_turbomind_llm.py
  function test_pipeline_chat_tp1 (line 15) | def test_pipeline_chat_tp1(config, run_config, common_case_config, worke...
  function test_pipeline_chat_tp2 (line 22) | def test_pipeline_chat_tp2(config, run_config, common_case_config, worke...
  function test_pipeline_chat_tp4 (line 29) | def test_pipeline_chat_tp4(config, run_config, common_case_config, worke...
  function test_pipeline_chat_tp8 (line 36) | def test_pipeline_chat_tp8(config, run_config, common_case_config, worke...
  function test_pipeline_chat_prefix_cache_tp2 (line 43) | def test_pipeline_chat_prefix_cache_tp2(config, run_config, common_case_...
  function test_pipeline_chat_fallback_backend_tp1 (line 50) | def test_pipeline_chat_fallback_backend_tp1(config, run_config, common_c...
  function test_pipeline_chat_fallback_backend_tp2 (line 58) | def test_pipeline_chat_fallback_backend_tp2(config, run_config, common_c...
  function test_pipeline_chat_pr_tp2 (line 68) | def test_pipeline_chat_pr_tp2(config, run_config, common_case_config, wo...
  function test_pipeline_chat_pr_tp1 (line 79) | def test_pipeline_chat_pr_tp1(config, run_config, common_case_config, wo...
  function test_modelscope_restful_chat_tp1 (line 88) | def test_modelscope_restful_chat_tp1(config, run_config, common_case_con...

FILE: autotest/tools/pipeline/test_pipeline_chat_turbomind_mllm.py
  function get_models (line 10) | def get_models(parallel_config):
  function test_restful_chat_tp1 (line 17) | def test_restful_chat_tp1(config, run_config, worker_id):
  function test_restful_chat_tp2 (line 23) | def test_restful_chat_tp2(config, run_config, worker_id):
  function test_restful_chat_tp4 (line 29) | def test_restful_chat_tp4(config, run_config, worker_id):
  function test_restful_chat_tp8 (line 35) | def test_restful_chat_tp8(config, run_config, worker_id):
  function test_restful_chat_tp16 (line 41) | def test_restful_chat_tp16(config, run_config, worker_id):
  function test_restful_chat_fallback_backend_tp1 (line 48) | def test_restful_chat_fallback_backend_tp1(config, run_config, worker_id):
  function test_pipeline_pr_test (line 56) | def test_pipeline_pr_test(config, run_config, worker_id):
  function test_pipeline_pr_tp2_test (line 65) | def test_pipeline_pr_tp2_test(config, run_config, worker_id):

FILE: autotest/tools/quantization/test_quantization_awq.py
  function test_quantization_awq (line 13) | def test_quantization_awq(config, model, worker_id):
  function test_quantization_gptq (line 22) | def test_quantization_gptq(config, model, worker_id):
  function test_quantization_awq_pr (line 34) | def test_quantization_awq_pr(config, model):
  function quantization_all (line 39) | def quantization_all(config, quantization_model_name, origin_model_name,...

FILE: autotest/tools/quantization/test_quantization_w8a8.py
  function test_quantization_w8a8 (line 13) | def test_quantization_w8a8(config, model, worker_id):
  function quantization_w8a8 (line 17) | def quantization_w8a8(config, quantization_model_name, origin_model_name...

FILE: autotest/tools/restful/test_restful_chat_hf_pytorch_llm.py
  function _run_ray_distributed_test (line 16) | def _run_ray_distributed_test(
  function _run_proxy_distributed_test (line 41) | def _run_proxy_distributed_test(
  function test_restful_chat_tp1 (line 74) | def test_restful_chat_tp1(config, run_config, common_case_config, worker...
  function test_restful_chat_tp2 (line 82) | def test_restful_chat_tp2(config, run_config, common_case_config, worker...
  function test_restful_chat_tp4 (line 90) | def test_restful_chat_tp4(config, run_config, common_case_config, worker...
  function test_restful_chat_tp8 (line 98) | def test_restful_chat_tp8(config, run_config, common_case_config, worker...
  function test_restful_chat_tp16 (line 106) | def test_restful_chat_tp16(config, run_config, common_case_config, worke...
  function test_restful_chat_distributed_tp16 (line 115) | def test_restful_chat_distributed_tp16(shared_ray_manager, config, run_c...
  function test_restful_chat_distributed_dpep16 (line 127) | def test_restful_chat_distributed_dpep16(shared_proxy_manager, config, r...
  function test_restful_chat_pytorch_prefix_cache_tp2 (line 138) | def test_restful_chat_pytorch_prefix_cache_tp2(config, run_config, commo...
  function test_hf_pytorch_chat_pr_tp2 (line 146) | def test_hf_pytorch_chat_pr_tp2(config, run_config, common_case_config, ...
  function test_hf_pytorch_chat_pr_tp1 (line 155) | def test_hf_pytorch_chat_pr_tp1(config, run_config, common_case_config, ...
  function test_modelscope_restful_chat_tp1 (line 163) | def test_modelscope_restful_chat_tp1(config, run_config, common_case_con...
  function test_pytorch_chat_with_lora_tp1 (line 171) | def test_pytorch_chat_with_lora_tp1(config, run_config, common_case_conf...
  function test_pytorch_chat_with_lora_tp2 (line 178) | def test_pytorch_chat_with_lora_tp2(config, run_config, common_case_conf...
  function test_restful_chat_reasoning_tp1 (line 188) | def test_restful_chat_reasoning_tp1(config, run_config, worker_id):
  function test_restful_chat_reasoning_tp2 (line 198) | def test_restful_chat_reasoning_tp2(config, run_config, worker_id):
  function test_restful_chat_tools_tp1 (line 208) | def test_restful_chat_tools_tp1(config, run_config, worker_id):
  function test_restful_chat_tools_tp2 (line 218) | def test_restful_chat_tools_tp2(config, run_config, worker_id):
  function test_restful_chat_tools_tp4 (line 228) | def test_restful_chat_tools_tp4(config, run_config, worker_id):
  function test_restful_chat_speculative_decoding_tp1 (line 237) | def test_restful_chat_speculative_decoding_tp1(config, run_config, commo...
  function test_restful_chat_speculative_decoding_tp16 (line 247) | def test_restful_chat_speculative_decoding_tp16(shared_ray_manager, conf...

FILE: autotest/tools/restful/test_restful_chat_hf_pytorch_mllm.py
  function test_restful_chat_tp1 (line 11) | def test_restful_chat_tp1(config, run_config, worker_id):
  function test_restful_chat_tp2 (line 17) | def test_restful_chat_tp2(config, run_config, worker_id):
  function test_restful_chat_tp4 (line 23) | def test_restful_chat_tp4(config, run_config, worker_id):
  function test_restful_chat_tp8 (line 29) | def test_restful_chat_tp8(config, run_config, worker_id):
  function test_restful_chat_tp16 (line 35) | def test_restful_chat_tp16(config, run_config, worker_id):

FILE: autotest/tools/restful/test_restful_chat_hf_turbomind_llm.py
  function test_restful_chat_tp1 (line 16) | def test_restful_chat_tp1(config, run_config, common_case_config, worker...
  function test_restful_chat_tp2 (line 23) | def test_restful_chat_tp2(config, run_config, common_case_config, worker...
  function test_restful_chat_tp4 (line 30) | def test_restful_chat_tp4(config, run_config, common_case_config, worker...
  function test_restful_chat_tp8 (line 37) | def test_restful_chat_tp8(config, run_config, common_case_config, worker...
  function test_restful_chat_prefix_cache_tp2 (line 44) | def test_restful_chat_prefix_cache_tp2(config, run_config, common_case_c...
  function test_restful_chat_fallback_backend_tp1 (line 51) | def test_restful_chat_fallback_backend_tp1(config, run_config, common_ca...
  function test_restful_chat_fallback_backend_tp2 (line 59) | def test_restful_chat_fallback_backend_tp2(config, run_config, common_ca...
  function test_restful_chat_pr_tp2 (line 69) | def test_restful_chat_pr_tp2(config, run_config, common_case_config, wor...
  function test_restful_chat_pr_tp1 (line 80) | def test_restful_chat_pr_tp1(config, run_config, common_case_config, wor...
  function test_restful_logprobs (line 90) | def test_restful_logprobs(config, run_config, worker_id):
  function test_modelscope_restful_chat_tp1 (line 98) | def test_modelscope_restful_chat_tp1(config, run_config, common_case_con...
  function test_restful_chat_reasoning_tp1 (line 109) | def test_restful_chat_reasoning_tp1(config, run_config, worker_id):
  function test_restful_chat_reasoning_tp2 (line 119) | def test_restful_chat_reasoning_tp2(config, run_config, worker_id):
  function test_restful_chat_tools_tp1 (line 129) | def test_restful_chat_tools_tp1(config, run_config, worker_id):
  function test_restful_chat_tools_tp2 (line 139) | def test_restful_chat_tools_tp2(config, run_config, worker_id):
  function test_restful_chat_tools_tp4 (line 149) | def test_restful_chat_tools_tp4(config, run_config, worker_id):

FILE: autotest/tools/restful/test_restful_chat_hf_turbomind_mllm.py
  function test_restful_chat_tp1 (line 12) | def test_restful_chat_tp1(config, run_config, worker_id):
  function test_restful_chat_tp2 (line 18) | def test_restful_chat_tp2(config, run_config, worker_id):
  function test_restful_chat_tp4 (line 24) | def test_restful_chat_tp4(config, run_config, worker_id):
  function test_restful_chat_tp8 (line 30) | def test_restful_chat_tp8(config, run_config, worker_id):
  function test_restful_chat_tp16 (line 36) | def test_restful_chat_tp16(config, run_config, worker_id):
  function test_restful_chat_fallback_backend_tp1 (line 43) | def test_restful_chat_fallback_backend_tp1(config, run_config, worker_id):

FILE: autotest/utils/benchmark_utils.py
  function throughput_test (line 11) | def throughput_test(config, run_config, worker_id: str = '', is_smoke: b...
  function longtext_throughput_test (line 56) | def longtext_throughput_test(config, run_config, worker_id: str = ''):
  function restful_test (line 103) | def restful_test(config, run_config, worker_id: str = '', is_smoke: bool...
  function restful_profile (line 133) | def restful_profile(config, run_config, port, is_smoke: bool = False):
  function mllm_restful_profile (line 165) | def mllm_restful_profile(config, run_config, port, is_smoke: bool = False):
  function prefixcache_throughput_test (line 196) | def prefixcache_throughput_test(config, run_config, worker_id: str = '',...
  function get_max_cache_entry (line 257) | def get_max_cache_entry(model, backend):

FILE: autotest/utils/common_utils.py
  function execute_command_with_logging (line 6) | def execute_command_with_logging(cmd,

FILE: autotest/utils/config_utils.py
  function resolve_extra_params (line 15) | def resolve_extra_params(extra_params: dict[str, Any], model_base_path: ...
  function get_func_config_list (line 39) | def get_func_config_list(backend: str,
  function get_cli_common_param (line 134) | def get_cli_common_param(run_config: dict[str, Any]) -> str:
  function get_cli_str (line 169) | def get_cli_str(config: dict[str, Any]) -> str:
  function get_parallel_config (line 188) | def get_parallel_config(config: dict[str, Any], model_name: str) -> list...
  function _extract_models_from_config (line 208) | def _extract_models_from_config(config_value: Any) -> list[str]:
  function get_model_list (line 220) | def get_model_list(config: dict[str, Any],
  function _filter_by_test_func_type (line 259) | def _filter_by_test_func_type(config: dict[str, Any], model_list: list[s...
  function _extend_turbomind_quant_models (line 273) | def _extend_turbomind_quant_models(quant_config: dict[str, Any], base_mo...
  function _extend_pytorch_quant_models (line 288) | def _extend_pytorch_quant_models(quant_config: dict[str, Any], base_mode...
  function _is_kvint_model (line 300) | def _is_kvint_model(config: dict[str, Any], backend: str, model: str, qu...
  function _base_model_name (line 310) | def _base_model_name(model: str) -> str:
  function get_quantization_model_list (line 316) | def get_quantization_model_list(type: str) -> list[str]:
  function get_config (line 348) | def get_config() -> dict[str, Any]:
  function get_cuda_prefix_by_workerid (line 378) | def get_cuda_prefix_by_workerid(worker_id: str | None, parallel_config: ...
  function get_cuda_id_by_workerid (line 395) | def get_cuda_id_by_workerid(worker_id: str | None, tp_num: int = 1) -> s...
  function get_workerid (line 406) | def get_workerid(worker_id: str | None) -> int:
  function is_quantization_model (line 415) | def is_quantization_model(model: str) -> bool:
  function _get_communicator_list (line 421) | def _get_communicator_list(config: dict[str, Any],
  function set_device_env_variable (line 439) | def set_device_env_variable(worker_id: str | None, parallel_config: dict...
  function unset_device_env_variable (line 460) | def unset_device_env_variable():
  function is_model_in_list (line 470) | def is_model_in_list(config: dict[str, Any], parallel_config: dict[str, ...
  function get_case_str_by_config (line 476) | def get_case_str_by_config(run_config: dict[str, Any], is_simple: bool =...
  function parse_config_by_case (line 501) | def parse_config_by_case(case_str: str) -> dict[str, Any]:
  function test_config (line 531) | def test_config():
  function test_get_case_str_by_config (line 574) | def test_get_case_str_by_config():
  function test_cli_common_param (line 596) | def test_cli_common_param():
  function test_return_info_turbomind (line 637) | def test_return_info_turbomind():
  function test_return_info_pytorch (line 741) | def test_return_info_pytorch():
  function test_run_config (line 845) | def test_run_config():
  function test_get_parallel_config (line 880) | def test_get_parallel_config():

FILE: autotest/utils/evaluate_utils.py
  function write_to_summary (line 16) | def write_to_summary(case_name, result, msg, metrics, result_dir):
  function llm_summary (line 67) | def llm_summary(case_name, result, msg, work_dir, result_dir=None):
  function mllm_summary (line 107) | def mllm_summary(case_name,
  function eval_test (line 146) | def eval_test(model_path, eval_path, case_name, port=DEFAULT_PORT, test_...
  function mllm_eval_test (line 268) | def mllm_eval_test(model_path, eval_path, case_name, port=DEFAULT_PORT, ...

FILE: autotest/utils/get_run_config.py
  function get_model_name (line 5) | def get_model_name(model):
  function _simple_model_name (line 51) | def _simple_model_name(model):

FILE: autotest/utils/mp_log_utils.py
  function write_log (line 7) | def write_log(config, result, msg, is_new: bool = True, case_path_tag: s...
  function assert_log (line 22) | def assert_log(config, case_path_tag: str = 'default'):

FILE: autotest/utils/pipeline_chat.py
  function run_pipeline_llm_test (line 13) | def run_pipeline_llm_test(config, run_config, common_case_config, worker...
  function run_pipeline_mllm_test (line 73) | def run_pipeline_mllm_test(config, run_config, worker_id: str = '', is_s...
  function get_response_from_output (line 165) | def get_response_from_output(output_text, case):
  function get_response_from_output_by_prompt (line 169) | def get_response_from_output_by_prompt(output_text, case, prompt):
  function assert_pipeline_single_return (line 178) | def assert_pipeline_single_return(output, logprobs_num: int = 0):
  function assert_pipeline_batch_return (line 186) | def assert_pipeline_batch_return(output, size: int = 1):
  function assert_pipeline_single_stream_return (line 196) | def assert_pipeline_single_stream_return(output, logprobs_num: int = 0):
  function assert_pipeline_batch_stream_return (line 205) | def assert_pipeline_batch_stream_return(output, size: int = 1):
  function assert_pipeline_single_element (line 214) | def assert_pipeline_single_element(output, is_stream: bool = False, is_l...
  function internvl_vl_testcase (line 246) | def internvl_vl_testcase(output_text, file, lang: str = 'en'):
  function MiniCPM_vl_testcase (line 288) | def MiniCPM_vl_testcase(output_text, file):
  function Qwen_vl_testcase (line 315) | def Qwen_vl_testcase(output_text, file):
  function save_pipeline_common_log (line 342) | def save_pipeline_common_log(config, log_name, result, content, msg: str...
  function assert_pipeline_common_log (line 351) | def assert_pipeline_common_log(config, log_name):

FILE: autotest/utils/proxy_distributed_utils.py
  function is_port_open (line 18) | def is_port_open(host: str, port: int, timeout: float = 1.0) -> bool:
  function check_nodes_status (line 29) | def check_nodes_status(host: str, proxy_port: int, model_name: str, expe...
  function wait_for_model_service_ready (line 79) | def wait_for_model_service_ready(host: str,
  function proxy_worker_node_wait (line 147) | def proxy_worker_node_wait(manager, timeout_minutes: int = 120):
  class ProxyDistributedManager (line 183) | class ProxyDistributedManager:
    method __init__ (line 185) | def __init__(self):
    method start (line 193) | def start(self):
    method cleanup (line 206) | def cleanup(self):
  class ApiServerPerTest (line 216) | class ApiServerPerTest:
    method __init__ (line 218) | def __init__(self, proxy_manager: ProxyDistributedManager, config: dic...
    method start (line 236) | def start(self):
    method wait_until_ready (line 269) | def wait_until_ready(self):
    method cleanup (line 280) | def cleanup(self):

FILE: autotest/utils/quantization_utils.py
  function quantization (line 6) | def quantization(config,

FILE: autotest/utils/ray_distributed_utils.py
  function wait_for_model_service_ready (line 20) | def wait_for_model_service_ready(
  function verify_service_functionality (line 72) | def verify_service_functionality(host: str, api_port: int, model_name: s...
  class RayLMDeployManager (line 102) | class RayLMDeployManager:
    method __init__ (line 104) | def __init__(
    method start_ray_cluster (line 137) | def start_ray_cluster(self):
    method start_lmdeploy_api_server (line 153) | def start_lmdeploy_api_server(self, config: dict[str, Any], run_config...
    method cleanup (line 219) | def cleanup(self, force: bool = True):
    method get_cluster_info (line 255) | def get_cluster_info(self) -> dict[str, Any]:
    method __enter__ (line 266) | def __enter__(self):
    method __exit__ (line 269) | def __exit__(self, exc_type, exc_val, exc_tb):
  function ray_worker_node_wait (line 273) | def ray_worker_node_wait(manager: RayLMDeployManager, timeout_minutes: i...

FILE: autotest/utils/restful_return_check.py
  function assert_chat_completions_batch_return (line 4) | def assert_chat_completions_batch_return(output, model_name, check_logpr...
  function assert_completions_batch_return (line 22) | def assert_completions_batch_return(output, model_name, check_logprobs: ...
  function assert_usage (line 39) | def assert_usage(usage):
  function assert_logprobs (line 46) | def assert_logprobs(logprobs, logprobs_num):
  function assert_logprob_element (line 55) | def assert_logprob_element(logprob):
  function assert_chat_completions_stream_return (line 61) | def assert_chat_completions_stream_return(output,
  function assert_completions_stream_return (line 89) | def assert_completions_stream_return(output,
  function has_repeated_fragment (line 117) | def has_repeated_fragment(text, repeat_count=5):

FILE: autotest/utils/rule_condition_assert.py
  function assert_result (line 1) | def assert_result(input, rule_condition, model_name: str = None):

FILE: autotest/utils/run_client_chat.py
  function run_tests (line 12) | def run_tests(config, usercase, cli_case_config, run_config, worker_id):
  function hf_command_line_test (line 23) | def hf_command_line_test(config, case, case_info, run_config, cuda_prefi...
  function command_test (line 46) | def command_test(config, cmd, run_config, case_info, need_extract_output):
  function parse_dialogue (line 117) | def parse_dialogue(inputs: str):
  function extract_output (line 126) | def extract_output(output: str, model: str):

FILE: autotest/utils/run_restful_chat.py
  function start_openai_service (line 22) | def start_openai_service(config, run_config, worker_id, timeout: int = 1...
  function stop_restful_api (line 96) | def stop_restful_api(pid, startRes):
  function terminate_restful_api (line 104) | def terminate_restful_api(worker_id):
  function run_all_step (line 119) | def run_all_step(log_path, case_name, cases_info, port: int = DEFAULT_PO...
  function open_chat_test (line 137) | def open_chat_test(log_path, case_name, case_info, url):
  function health_check (line 194) | def health_check(url, model_name):
  function get_model (line 210) | def get_model(url):
  function _run_logprobs_test (line 220) | def _run_logprobs_test(port: int = DEFAULT_PORT):
  function run_vl_testcase (line 244) | def run_vl_testcase(log_path, resource_path, port: int = DEFAULT_PORT):
  function _run_reasoning_case (line 297) | def _run_reasoning_case(log_path, port: int = DEFAULT_PORT):
  function test_internlm_multiple_round_prompt (line 342) | def test_internlm_multiple_round_prompt(client, model):
  function test_qwen_multiple_round_prompt (line 443) | def test_qwen_multiple_round_prompt(client, model):
  function _run_tools_case (line 588) | def _run_tools_case(log_path, port: int = DEFAULT_PORT):
  function proxy_health_check (line 691) | def proxy_health_check(url):
  function start_proxy_server (line 704) | def start_proxy_server(log_path, port, case_name: str = 'default'):
  function run_llm_test (line 770) | def run_llm_test(config, run_config, common_case_config, worker_id):
  function run_mllm_test (line 786) | def run_mllm_test(config, run_config, worker_id):
  function run_reasoning_case (line 800) | def run_reasoning_case(config, run_config, worker_id):
  function run_tools_case (line 812) | def run_tools_case(config, run_config, worker_id):
  function run_logprob_test (line 824) | def run_logprob_test(config, run_config, worker_id):

FILE: autotest/utils/toolkit.py
  function parse_sse_stream (line 6) | def parse_sse_stream(content: str) -> list[str]:
  function _load_tokenizer_cached (line 25) | def _load_tokenizer_cached(model_path: str):
  function encode_text (line 33) | def encode_text(model_path: str, text: str) -> list[int]:

FILE: benchmark/benchmark_decode.py
  function benchmark (line 13) | def benchmark(model_path, share_gpt_path, downsample=100, accel=None, sa...

FILE: benchmark/benchmark_pipeline.py
  function get_cmd (line 9) | def get_cmd(model_path, backend, engine_config, data_config):
  function benchmark (line 36) | def benchmark(model_path, backend, engine_config, data_config):
  function main (line 63) | def main(model_path=None, backend=None, config_path=None):

FILE: benchmark/benchmark_serving.py
  function get_launching_server_cmd (line 10) | def get_launching_server_cmd(model_path, backend, server_config):
  function get_output_file (line 31) | def get_output_file(model_path, backend, server_config):
  function get_server_ip_port (line 58) | def get_server_ip_port(backend: str, server_config: Dict) -> Tuple[str, ...
  function wait_server_ready (line 78) | def wait_server_ready(server_ip: str, server_port: int) -> bool:
  function get_client_cmd (line 93) | def get_client_cmd(backend: str, server_ip: str, server_port: int, clien...
  function benchmark (line 115) | def benchmark(model_path: str, backend: str, server_config: Dict, data_c...
  function validate_config (line 169) | def validate_config(config: Dict) -> None:
  function main (line 190) | def main(backend: str, config_path: str, model_path: Optional[str] = None):

FILE: benchmark/benchmark_throughput.py
  function get_cmd (line 9) | def get_cmd(model_path, backend, engine_config, data_config):
  function benchmark (line 36) | def benchmark(model_path, backend, engine_config, data_config):
  function main (line 63) | def main(model_path=None, backend=None, config_path=None):

FILE: benchmark/profile_pipeline_api.py
  function sample_sharegpt_requests (line 20) | def sample_sharegpt_requests(
  function sample_random_requests (line 66) | def sample_random_requests(
  class Engine (line 132) | class Engine:
    method __init__ (line 134) | def __init__(self, model_path: str, engine_config, csv: str):
    method process_request (line 140) | def process_request(self, requests, profiler: Profiler, temperature, t...
  function parse_args (line 199) | def parse_args():
  function main (line 284) | def main():

FILE: benchmark/profile_restful_api.py
  class RequestFuncInput (line 55) | class RequestFuncInput:
  class RequestFuncOutput (line 66) | class RequestFuncOutput:
  function remove_prefix (line 77) | def remove_prefix(text: str, prefix: str) -> str:
  function async_request_trt_llm (line 83) | async def async_request_trt_llm(
  function async_request_openai_completions (line 153) | async def async_request_openai_completions(
  function async_request_openai_chat_completions (line 231) | async def async_request_openai_chat_completions(
  function async_request_sglang_generate (line 339) | async def async_request_sglang_generate(
  function async_request_gserver (line 416) | async def async_request_gserver(
  function get_model (line 423) | def get_model(pretrained_model_name_or_path: str) -> str:
  function get_tokenizer (line 438) | def get_tokenizer(pretrained_model_name_or_path: str, ) -> Union[PreTrai...
  function get_processor (line 449) | def get_processor(pretrained_model_name_or_path: str, ) -> Union[PreTrai...
  class BenchmarkMetrics (line 476) | class BenchmarkMetrics:
  function download_and_cache_file (line 506) | def download_and_cache_file(url: str, filename: Optional[str] = None):
  class DatasetRow (line 541) | class DatasetRow:
    method __post_init__ (line 549) | def __post_init__(self):
  function sample_sharegpt_requests (line 556) | def sample_sharegpt_requests(dataset_path: str,
  function compute_random_lens (line 609) | def compute_random_lens(full_len: int, range_ratio: float, num: int):
  function sample_random_requests (line 617) | def sample_random_requests(
  function parse_image_resolution (line 686) | def parse_image_resolution(image_resolution: str) -> Tuple[int, int]:
  function gen_mm_prompt (line 714) | def gen_mm_prompt(tokenizer, image_pad_id, token_num):
  function create_mm_data_row (line 724) | def create_mm_data_row(text_prompt, images: list, images_base64, output_...
  function sample_image_requests (line 794) | def sample_image_requests(
  function get_request (line 887) | async def get_request(
  function calculate_metrics (line 905) | def calculate_metrics(
  function benchmark (line 980) | async def benchmark(
  function parse_request_rate_range (line 1161) | def parse_request_rate_range(request_rate_range):
  function check_chat_template (line 1169) | def check_chat_template(model_path):
  function run_benchmark (line 1178) | def run_benchmark(args_: argparse.Namespace):
  function set_ulimit (line 1330) | def set_ulimit(target_soft_limit=65535):

FILE: benchmark/profile_throughput.py
  function sample_sharegpt_requests (line 24) | def sample_sharegpt_requests(
  function sample_random_requests (line 69) | def sample_random_requests(
  class Engine (line 135) | class Engine:
    method __init__ (line 137) | def __init__(self, model_path: str, engine_config: Union[PytorchEngine...
    method _inference (line 151) | async def _inference(self, req_queue: Queue, session_id: int, temperat...
    method process_request (line 199) | def process_request(self, requests, profiler: Profiler, concurrency, t...
  function parse_args (line 237) | def parse_args():
  function main (line 337) | def main():

FILE: docs/en/conf.py
  function metrics (line 62) | def metrics():

FILE: docs/zh_cn/conf.py
  function metrics (line 62) | def metrics():

FILE: eval/eval.py
  class ProcessManager (line 9) | class ProcessManager:
    method __init__ (line 12) | def __init__(self):
    method __enter__ (line 16) | def __enter__(self):
    method __exit__ (line 27) | def __exit__(self, exc_type, exc_val, exc_tb):
    method _signal_handler (line 33) | def _signal_handler(self, sig, frame):
    method start_process (line 40) | def start_process(self, cmd):
    method cleanup (line 44) | def cleanup(self):
  function read_config (line 58) | def read_config():
  function update_datasets (line 80) | def update_datasets(config, datasets):
  function get_model_name_from_server (line 118) | def get_model_name_from_server(server: str, tag: str) -> str:
  function save_config (line 128) | def save_config(work_dir: str, config: str):
  function perform_evaluation (line 144) | def perform_evaluation(config, api_server, judger_server, mode, work_dir...
  function main (line 195) | def main():

FILE: examples/lite/qwen3_30b_a3b_awq.py
  function parse_args (line 9) | def parse_args():
  function main (line 25) | def main():

FILE: examples/lite/qwen3_30b_a3b_gptq.py
  function parse_args (line 9) | def parse_args():
  function main (line 25) | def main():

FILE: lmdeploy/api.py
  function pipeline (line 15) | def pipeline(model_path: str,
  function serve (line 78) | def serve(model_path: str,
  function client (line 101) | def client(api_server_url: str = 'http://0.0.0.0:23333', api_key: str | ...

FILE: lmdeploy/archs.py
  function autoget_backend (line 13) | def autoget_backend(model_path: str) -> Literal['turbomind', 'pytorch']:
  function autoget_backend_config (line 58) | def autoget_backend_config(
  function check_vl_llm (line 96) | def check_vl_llm(backend: str, config: dict) -> bool:
  function get_task (line 131) | def get_task(backend: str, model_path: str):
  function get_model_arch (line 147) | def get_model_arch(model_path: str):
  function search_nested_config (line 176) | def search_nested_config(config, key):

FILE: lmdeploy/cli/chat.py
  function input_prompt (line 10) | def input_prompt():
  function build_pipe (line 17) | def build_pipe(model_path, backend, **kwargs):
  function build_gen_config (line 55) | def build_gen_config(**kwargs):
  function get_adapter_name (line 63) | def get_adapter_name(adapters=None, **kwargs):
  function main (line 71) | def main(model_path, backend, **kwargs):

FILE: lmdeploy/cli/cli.py
  class CLI (line 10) | class CLI(object):
    method add_parser_chat (line 18) | def add_parser_chat():
    method add_parser_checkenv (line 78) | def add_parser_checkenv():
    method check_env (line 93) | def check_env(args):
    method chat (line 157) | def chat(args):
    method add_parsers (line 169) | def add_parsers():

FILE: lmdeploy/cli/entrypoint.py
  function run (line 10) | def run():

FILE: lmdeploy/cli/lite.py
  class SubCliLite (line 6) | class SubCliLite(object):
    method add_parser_auto_awq (line 18) | def add_parser_auto_awq():
    method add_parser_auto_gptq (line 44) | def add_parser_auto_gptq():
    method add_parser_calibrate (line 66) | def add_parser_calibrate():
    method add_parser_smooth_quant (line 83) | def add_parser_smooth_quant():
    method auto_awq (line 107) | def auto_awq(args):
    method auto_gptq (line 114) | def auto_gptq(args):
    method calibrate (line 121) | def calibrate(args):
    method smooth_quant (line 128) | def smooth_quant(args):
    method add_parsers (line 135) | def add_parsers():

FILE: lmdeploy/cli/serve.py
  class SubCliServe (line 10) | class SubCliServe:
    method add_parser_api_server (line 22) | def add_parser_api_server():
    method add_parser_proxy (line 161) | def add_parser_proxy():
    method api_server (line 201) | def api_server(args):
    method proxy (line 337) | def proxy(args):
    method add_parsers (line 344) | def add_parsers():

FILE: lmdeploy/cli/utils.py
  class DefaultsAndTypesHelpFormatter (line 15) | class DefaultsAndTypesHelpFormatter(argparse.HelpFormatter):
    method _get_help_string (line 18) | def _get_help_string(self, action):
  function convert_args (line 35) | def convert_args(args):
  function get_lora_adapters (line 42) | def get_lora_adapters(adapters: List[str]):
  function get_chat_template (line 71) | def get_chat_template(chat_template: str, model_path: str = None):
  function get_speculative_config (line 102) | def get_speculative_config(args):
  class ArgumentHelper (line 115) | class ArgumentHelper:
    method model_name (line 119) | def model_name(parser):
    method dtype (line 130) | def dtype(parser, default: str = 'auto'):
    method quant_dtype (line 142) | def quant_dtype(parser, default: str = 'int8'):
    method model_format (line 151) | def model_format(parser, default: str = None):
    method revision (line 161) | def revision(parser, default: str = None):
    method download_dir (line 170) | def download_dir(parser, default: str = None):
    method tp (line 178) | def tp(parser):
    method dp (line 187) | def dp(parser):
    method ep (line 196) | def ep(parser):
    method cp (line 205) | def cp(parser):
    method dp_rank (line 215) | def dp_rank(parser):
    method node_rank (line 224) | def node_rank(parser):
    method num_nodes (line 230) | def num_nodes(parser):
    method dist_init_addr (line 236) | def dist_init_addr(parser):
    method session_id (line 242) | def session_id(parser):
    method session_len (line 248) | def session_len(parser, default: int = None):
    method max_batch_size (line 255) | def max_batch_size(parser):
    method quant_policy (line 265) | def quant_policy(parser, default: int = 0):
    method rope_scaling_factor (line 275) | def rope_scaling_factor(parser):
    method hf_overrides (line 281) | def hf_overrides(parser):
    method use_logn_attn (line 289) | def use_logn_attn(parser):
    method block_size (line 298) | def block_size(parser):
    method top_p (line 304) | def top_p(parser):
    method top_k (line 316) | def top_k(parser):
    method temperature (line 327) | def temperature(parser, default: float = 0.8):
    method repetition_penalty (line 331) | def repetition_penalty(parser):
    method log_level (line 340) | def log_level(parser):
    method api_keys (line 351) | def api_keys(parser):
    method ssl (line 361) | def ssl(parser):
    method backend (line 372) | def backend(parser):
    method stream_output (line 382) | def stream_output(parser):
    method calib_dataset (line 388) | def calib_dataset(parser):
    method calib_samples (line 399) | def calib_samples(parser):
    method calib_seqlen (line 408) | def calib_seqlen(parser):
    method calib_batchsize (line 414) | def calib_batchsize(parser):
    method calib_search_scale (line 426) | def calib_search_scale(parser):
    method device (line 438) | def device(parser, default: str = 'cuda', choices: List[str] = ['cuda'...
    method chat_template (line 448) | def chat_template(parser):
    method reasoning_parser (line 461) | def reasoning_parser(parser):
    method tool_call_parser (line 472) | def tool_call_parser(parser):
    method allow_terminate_by_client (line 483) | def allow_terminate_by_client(parser):
    method enable_abort_handling (line 492) | def enable_abort_handling(parser):
    method cache_max_entry_count (line 502) | def cache_max_entry_count(parser):
    method adapters (line 512) | def adapters(parser):
    method work_dir (line 525) | def work_dir(parser):
    method cache_block_seq_len (line 534) | def cache_block_seq_len(parser):
    method enable_prefix_caching (line 548) | def enable_prefix_caching(parser):
    method num_tokens_per_iter (line 557) | def num_tokens_per_iter(parser):
    method max_prefill_iters (line 564) | def max_prefill_iters(parser):
    method async_ (line 571) | def async_(parser):
    method max_prefill_token_num (line 581) | def max_prefill_token_num(parser):
    method vision_max_batch_size (line 588) | def vision_max_batch_size(parser):
    method max_log_len (line 592) | def max_log_len(parser):
    method disable_fastapi_docs (line 600) | def disable_fastapi_docs(parser):
    method eager_mode (line 608) | def eager_mode(parser):
    method communicator (line 618) | def communicator(parser):
    method enable_microbatch (line 627) | def enable_microbatch(parser):
    method enable_eplb (line 635) | def enable_eplb(parser):
    method disable_metrics (line 641) | def disable_metrics(parser):
    method role (line 650) | def role(parser):
    method migration_backend (line 660) | def migration_backend(parser):
    method disable_vision_encoder (line 668) | def disable_vision_encoder(parser):
    method logprobs_mode (line 676) | def logprobs_mode(parser):
    method dllm_block_length (line 685) | def dllm_block_length(parser):
    method dllm_unmasking_strategy (line 690) | def dllm_unmasking_strategy(parser):
    method dllm_denoising_steps (line 699) | def dllm_denoising_steps(parser):
    method dllm_confidence_threshold (line 707) | def dllm_confidence_threshold(parser):
    method enable_return_routed_experts (line 715) | def enable_return_routed_experts(parser):
    method add_spec_group (line 724) | def add_spec_group(parser):
    method distributed_executor_backend (line 745) | def distributed_executor_backend(parser):
  class FlexibleArgumentParser (line 755) | class FlexibleArgumentParser(argparse.ArgumentParser):
    method parse_args (line 758) | def parse_args(self, args=None, namespace=None):

FILE: lmdeploy/lite/apis/auto_awq.py
  function save_vl_model (line 18) | def save_vl_model(vl_model, model_path, dst_path):
  function auto_awq (line 41) | def auto_awq(model: str,

FILE: lmdeploy/lite/apis/calibrate.py
  function _prepare_for_calibrate (line 78) | def _prepare_for_calibrate(model: nn.Module,
  function make_compatible_internvl_config (line 149) | def make_compatible_internvl_config(model_path):
  function update_moe_mapping (line 166) | def update_moe_mapping(model, model_type):
  function calibrate (line 198) | def calibrate(model: str,

FILE: lmdeploy/lite/apis/get_small_sharded_hf.py
  function parse_args (line 12) | def parse_args():
  function main (line 20) | def main():

FILE: lmdeploy/lite/apis/gptq.py
  function auto_gptq (line 11) | def auto_gptq(model: str,

FILE: lmdeploy/lite/apis/smooth_quant.py
  function smooth_quant (line 17) | def smooth_quant(model: str,

FILE: lmdeploy/lite/modeling/internlm2_gptq.py
  class InternLM2GPTQForCausalLM (line 5) | class InternLM2GPTQForCausalLM(BaseGPTQForCausalLM):

FILE: lmdeploy/lite/modeling/internlm3_gptq.py
  class InternLM3GPTQForCausalLM (line 5) | class InternLM3GPTQForCausalLM(BaseGPTQForCausalLM):

FILE: lmdeploy/lite/quantization/activation/observer.py
  class KVCacheObserver (line 8) | class KVCacheObserver(GlobalAvailMixin):
    method __init__ (line 12) | def __init__(self, num_head: int, head_dim: int) -> None:
    method observe (line 26) | def observe(self, x: torch.Tensor) -> None:
  class ActivationObserver (line 53) | class ActivationObserver(GlobalAvailMixin):
    method __init__ (line 61) | def __init__(self, dim: int) -> None:
    method disable (line 79) | def disable(cls):
    method enable (line 84) | def enable(cls):
    method observe (line 89) | def observe(self, x: torch.Tensor, save_input: bool = False) -> None:
    method save_ratio (line 127) | def save_ratio(self, ratio: float) -> None:

FILE: lmdeploy/lite/quantization/awq.py
  function skipped_module (line 128) | def skipped_module(name: str):
  function get_weight_scale (line 137) | def get_weight_scale(weight, q_group_size=-1):
  function smooth_ln_fcs (line 153) | def smooth_ln_fcs(ln: torch.nn.Module,
  function smooth_fc_fcs (line 206) | def smooth_fc_fcs(pre_fc: torch.nn.Module,
  function check_awq_supported (line 269) | def check_awq_supported(layer_type):
  function quant_weights (line 296) | def quant_weights(model, fcs, bits, symmetry, group_size=-1, device='cud...
  function smooth_layers (line 323) | def smooth_layers(layers, fc2fcs, norm2fcs, a_scales, group_size=-1, dev...
  function pseudo_quantize_tensor (line 351) | def pseudo_quantize_tensor(w, w_bit=8, w_group_size=-1, return_scale_zer...
  function awq_layers (line 380) | def awq_layers(layers, fc2fcs, norm2fcs, a_scales, a_ratios=None, group_...

FILE: lmdeploy/lite/quantization/calibration.py
  class CalibrationContext (line 16) | class CalibrationContext():
    method __init__ (line 30) | def __init__(self,
    method _guess_num_heads (line 81) | def _guess_num_heads(self, model):
    method _init_input_observers (line 92) | def _init_input_observers(self, name2mod):
    method _init_output_observers (line 98) | def _init_output_observers(self, name2mod):
    method _insert_input_observers (line 104) | def _insert_input_observers(self):
    method _insert_output_observers (line 121) | def _insert_output_observers(self):
    method _wrap_decoder_layers (line 138) | def _wrap_decoder_layers(self):
    method collect_inputs_stats (line 168) | def collect_inputs_stats(self):
    method collect_outputs_stats (line 183) | def collect_outputs_stats(self):
    method export (line 199) | def export(self, out_dir):
    method calibrate (line 216) | def calibrate(self, data):
    method __enter__ (line 227) | def __enter__(self):
    method __exit__ (line 241) | def __exit__(self, exc_type, exc_value, traceback):
  function auto_scale_block (line 253) | def auto_scale_block(module, module_kwargs, w_bit, w_group_size, input_f...
  class CalibrationContextV2 (line 337) | class CalibrationContextV2(CalibrationContext):
    method __init__ (line 339) | def __init__(self,
    method _insert_input_observers (line 355) | def _insert_input_observers(self):
    method export (line 372) | def export(self, out_dir):
    method _wrap_decoder_layers_for_search (line 399) | def _wrap_decoder_layers_for_search(self):
    method __enter__ (line 441) | def __enter__(self):

FILE: lmdeploy/lite/quantization/modules/linear.py
  class WeightOnlyQLinear (line 15) | class WeightOnlyQLinear(nn.Module):
    method __init__ (line 28) | def __init__(
    method from_linear (line 74) | def from_linear(cls: Type['WeightOnlyQLinear'],
    method forward (line 141) | def forward(self, x):

FILE: lmdeploy/lite/quantization/weight/quant_utils.py
  function _aligned_size (line 7) | def _aligned_size(a, b):
  function fast_log2_ceil_torch (line 11) | def fast_log2_ceil_torch(x: torch.Tensor) -> torch.Tensor:
  function fast_pow2_torch (line 21) | def fast_pow2_torch(x: torch.Tensor) -> torch.Tensor:
  function fast_round_scale_torch (line 26) | def fast_round_scale_torch(amax: torch.Tensor, fp8_max: torch.Tensor) ->...
  function _get_quant_scaling (line 30) | def _get_quant_scaling(weight: torch.Tensor,
  function quant_blocked_fp8 (line 47) | def quant_blocked_fp8(weight: torch.Tensor,

FILE: lmdeploy/lite/quantization/weight/quantizer.py
  class WeightQuantizer (line 13) | class WeightQuantizer(GlobalAvailMixin):
    method __init__ (line 59) | def __init__(self, bits: int, symmetry: bool, granularity: str, group_...
    method calculate_qparams (line 81) | def calculate_qparams(self, weight: torch.Tensor) -> QParams:
    method quant (line 98) | def quant(self, weight: torch.Tensor, qparams: Optional[QParams] = Non...

FILE: lmdeploy/lite/utils/batch_split.py
  function split_decoder_layer_inputs (line 7) | def split_decoder_layer_inputs(batch_size, *args: Union[torch.Tensor, Any],
  function concat_decoder_layer_outputs (line 61) | def concat_decoder_layer_outputs(batch_outputs: List[Any]) -> Any:

FILE: lmdeploy/lite/utils/cal_qparams.py
  class QParams (line 7) | class QParams(NamedTuple):
  function precise_round (line 15) | def precise_round(x):
  function cal_qparams_per_channel_absmax (line 20) | def cal_qparams_per_channel_absmax(w: torch.Tensor, n_bits: int, return_...
  function cal_qparams_per_channel_minmax (line 36) | def cal_qparams_per_channel_minmax(w: torch.Tensor, n_bits: int, return_...
  function cal_qparams_per_group_absmax (line 58) | def cal_qparams_per_group_absmax(w: torch.Tensor, n_bits: int, group_siz...
  function cal_qparams_per_group_minmax (line 79) | def cal_qparams_per_group_minmax(w: torch.Tensor, n_bits: int, group_siz...
  function cal_qparams_per_tensor_minmax (line 105) | def cal_qparams_per_tensor_minmax(w: torch.Tensor, n_bits: int, return_s...
  function cal_qparams_per_tensor_absmax (line 125) | def cal_qparams_per_tensor_absmax(w: torch.Tensor, n_bits: int, return_s...

FILE: lmdeploy/lite/utils/calib_dataloader.py
  function set_seed (line 8) | def set_seed(seed):
  function process_dataset (line 14) | def process_dataset(ds, tokenizer, max_seq_length):
  function get_wikitext2 (line 102) | def get_wikitext2(dataset, tokenizer, nsamples, seed, seqlen):
  function get_c4 (line 128) | def get_c4(dataset, tokenizer, nsamples, seed, seqlen):
  function get_pileval (line 158) | def get_pileval(dataset, tokenizer, nsamples, seed, seqlen=512):
  function get_gsm8k (line 211) | def get_gsm8k(dataset, tokenizer, nsamples, seed, seqlen):
  function get_neuralmagic_calibration (line 250) | def get_neuralmagic_calibration(dataset, tokenizer, nsamples, seed, seql...
  function get_open_platypus (line 289) | def get_open_platypus(dataset, tokenizer, nsamples, seed, seqlen):
  function get_openwebtext (line 328) | def get_openwebtext(dataset, tokenizer, nsamples, seed, seqlen):
  function get_calib_loaders (line 362) | def get_calib_loaders(name, tokenizer, nsamples=128, seed=0, seqlen=2048):

FILE: lmdeploy/lite/utils/collect.py
  function collect_target_modules (line 7) | def collect_target_modules(model: nn.Module,
  function collect_target_weights (line 41) | def collect_target_weights(model: nn.Module, target: Union[str, type], s...
  function bimap_name_mod (line 64) | def bimap_name_mod(name2mod_mappings: List[Dict[str, nn.Module]]) -> Tup...

FILE: lmdeploy/lite/utils/global_avail.py
  class GlobalAvailMixin (line 7) | class GlobalAvailMixin:
    method global_available (line 12) | def global_available(self, key: Union[str, nn.Module] = 'default', gro...
    method _save_instance (line 24) | def _save_instance(cls,
    method find (line 44) | def find(cls, key: Union[str, nn.Module] = 'default', group: str = 'de...
    method find_group (line 60) | def find_group(cls, group: str) -> Dict[Union[str, nn.Module], 'Global...
    method instances (line 73) | def instances(cls) -> Dict[str, Dict[Union[str, nn.Module], 'GlobalAva...

FILE: lmdeploy/lite/utils/load.py
  class LoadNoInit (line 9) | class LoadNoInit:
    method __init__ (line 12) | def __init__(self):
    method __enter__ (line 22) | def __enter__(self, *args, **kwargs):
    method __exit__ (line 34) | def __exit__(self, *args, **kwargs):
  function load_hf_from_pretrained (line 47) | def load_hf_from_pretrained(pretrained_model_name_or_path, dtype: Litera...

FILE: lmdeploy/lite/utils/memory_efficient.py
  function extract_return_values (line 15) | def extract_return_values(module: nn.Module) -> List[str]:
  function find_kv_cache_idx (line 36) | def find_kv_cache_idx(module: nn.Module) -> int:
  function find_modules_by_return_value (line 46) | def find_modules_by_return_value(model: nn.Module, value: str) -> List[n...
  function offload_kv_cache (line 79) | def offload_kv_cache(model: nn.Module, device: str = 'cuda') -> None:
  function offload_weights (line 141) | def offload_weights(model: nn.Module, device: str = 'cuda') -> None:
  function memory_efficient_inference (line 198) | def memory_efficient_inference(model: nn.Module, offload: bool = True, d...

FILE: lmdeploy/logger.py
  class RequestLogger (line 11) | class RequestLogger:
    method __init__ (line 20) | def __init__(self, max_log_len: Optional[int]) -> None:
    method log_prompt (line 23) | def log_prompt(self, session_id: int, prompt: str) -> None:
    method log_inputs (line 34) | def log_inputs(self, session_id: int, prompt: Optional[str], prompt_to...

FILE: lmdeploy/messages.py
  class GenerationConfig (line 25) | class GenerationConfig:
    method convert_stop_bad_words_to_ids (line 138) | def convert_stop_bad_words_to_ids(self, tokenizer: Tokenizer):
    method update_from_hf_gen_cfg (line 160) | def update_from_hf_gen_cfg(self, generation_config, tokenizer_eos_toke...
    method __post_init__ (line 179) | def __post_init__(self):
  class TurbomindEngineConfig (line 190) | class TurbomindEngineConfig:
    method __post_init__ (line 290) | def __post_init__(self):
  class PytorchEngineConfig (line 304) | class PytorchEngineConfig:
    method __post_init__ (line 425) | def __post_init__(self):
  class ResponseType (line 450) | class ResponseType(enum.Enum):
  class Response (line 467) | class Response:
    method __str__ (line 499) | def __str__(self):
    method __repr__ (line 502) | def __repr__(self):
    method _format_none_text_fields (line 505) | def _format_none_text_fields(self):
    method extend (line 529) | def extend(self, other: 'Response') -> 'Response':
  class EventType (line 557) | class EventType(enum.IntEnum):
  class EngineEvent (line 572) | class EngineEvent:
    method new_event (line 583) | def new_event(cls, event_type: EventType, timestamp: Optional[float] =...
  class ScheduleMetrics (line 591) | class ScheduleMetrics:
  class RequestMetrics (line 602) | class RequestMetrics:
  class EngineOutput (line 615) | class EngineOutput:
  class VisionConfig (line 638) | class VisionConfig:
  class SpeculativeConfig (line 654) | class SpeculativeConfig:

FILE: lmdeploy/metrics/loggers.py
  class StatLoggerBase (line 17) | class StatLoggerBase(ABC):
    method record_schedule (line 20) | def record_schedule(self, stats: SchedulerStats) -> None:
    method record_iteration (line 24) | def record_iteration(self, stats: IterationStats) -> None:
    method record_specdecode (line 28) | def record_specdecode(self, stats: SpeculativeDecodingStats) -> None:
    method log (line 31) | def log(self):  # noqa
  class LoggingStatLogger (line 35) | class LoggingStatLogger(StatLoggerBase):
    method __init__ (line 37) | def __init__(self, dp_rank: int = 0):
    method _reset (line 42) | def _reset(self, now):
    method record_schedule (line 52) | def record_schedule(self, stats: SchedulerStats):
    method record_iteration (line 55) | def record_iteration(self, stats: IterationStats):
    method record_specdecode (line 62) | def record_specdecode(self, stats: SpeculativeDecodingStats):
    method record_finish (line 73) | def record_finish(self, stats: RequestStats):
    method get_spec_msg (line 76) | def get_spec_msg(self):
    method log (line 98) | def log(self):
  class PrometheusStatLogger (line 133) | class PrometheusStatLogger(StatLoggerBase):
    method __init__ (line 135) | def __init__(self, model_name: str, max_model_len: int, dp_rank: int =...
    method record_schedule (line 309) | def record_schedule(self, stats: SchedulerStats) -> None:
    method record_iteration (line 319) | def record_iteration(self, stats: IterationStats) -> None:
    method record_finish (line 335) | def record_finish(self, stats: RequestStats) -> None:
    method record_specdecode (line 345) | def record_specdecode(self, stats: SpeculativeDecodingStats) -> None:
  function build_buckets (line 349) | def build_buckets(mantissa_lst: List[int], max_value: int) -> List[int]:
  function build_1_2_5_buckets (line 364) | def build_1_2_5_buckets(max_value: int) -> List[int]:

FILE: lmdeploy/metrics/metrics_processor.py
  class MetricsProcessor (line 14) | class MetricsProcessor():
    method __init__ (line 17) | def __init__(self):
    method start_metrics_handler (line 25) | def start_metrics_handler(self, enable_metrics: bool):
    method stop_metrics_handler (line 33) | async def stop_metrics_handler(self):
    method _run_metrics_handler (line 45) | async def _run_metrics_handler(self):
    method update_schedule_stats (line 83) | async def update_schedule_stats(self, schedule_metrics: ScheduleMetrics):
    method queue_update (line 90) | def queue_update(self, update_data: tuple):
    method increase_total_requests (line 96) | def increase_total_requests(self):
    method increase_completed_requests (line 100) | def increase_completed_requests(self):
    method increase_api_routed_requests (line 104) | def increase_api_routed_requests(self):
    method decrease_api_routed_requests (line 108) | def decrease_api_routed_requests(self):

FILE: lmdeploy/metrics/stats.py
  class SchedulerStats (line 14) | class SchedulerStats:
    method __repr__ (line 44) | def __repr__(self):
    method update_from_schedule_metrics (line 56) | def update_from_schedule_metrics(self, scheduled_metrics: ScheduleMetr...
  class RequestStats (line 63) | class RequestStats:
    method __init__ (line 66) | def __init__(self, arrival_time: float = None, prompt_tokens: int = 0):
    method __repr__ (line 100) | def __repr__(self):
    method update_from_events (line 111) | def update_from_events(self, engine_events: List[EngineEvent]):
    method e2e_latency (line 126) | def e2e_latency(self) -> float:
    method queued_time_interval (line 131) | def queued_time_interval(self) -> float:
    method prefill_time_interval (line 136) | def prefill_time_interval(self) -> float:
    method decode_time_interval (line 144) | def decode_time_interval(self) -> float:
    method inference_time_interval (line 152) | def inference_time_interval(self) -> float:
  class IterationStats (line 160) | class IterationStats:
    method __init__ (line 163) | def __init__(self):
    method __repr__ (line 181) | def __repr__(self):
    method _time_since (line 191) | def _time_since(self, start: float) -> float:
    method update_from_output (line 195) | def update_from_output(self, outputs: EngineOutput, req_stats: Request...
  class SpeculativeDecodingStats (line 231) | class SpeculativeDecodingStats:
    method __post_init__ (line 240) | def __post_init__(self):
    method update_from_output (line 244) | def update_from_output(self, outputs: EngineOutput):
    method update_per_draft (line 253) | def update_per_draft(self, num_draft_tokens: int, num_accepted_tokens:...
    method __repr__ (line 261) | def __repr__(self):

FILE: lmdeploy/model.py
  function random_uuid (line 16) | def random_uuid() -> str:
  function get_text (line 21) | def get_text(content: Union[str, List[dict]]):
  class ChatTemplateConfig (line 35) | class ChatTemplateConfig:
    method chat_template (line 69) | def chat_template(self):
    method to_json (line 80) | def to_json(self, file_path=None):
    method from_json (line 90) | def from_json(cls, file_or_string):
  class BaseChatTemplate (line 111) | class BaseChatTemplate:
    method __init__ (line 114) | def __init__(self,
    method get_prompt (line 141) | def get_prompt(self, prompt, sequence_start=True):
    method messages2prompt (line 167) | def messages2prompt(self, messages, sequence_start=True, **kwargs):
    method match (line 194) | def match(cls, model_path: str) -> Optional[str]:
  class CogVLM (line 204) | class CogVLM(BaseChatTemplate):
    method __init__ (line 207) | def __init__(self,
    method match (line 228) | def match(cls, model_path: str) -> Optional[str]:
  class Vicuna (line 240) | class Vicuna(BaseChatTemplate):
    method __init__ (line 243) | def __init__(
    method get_prompt (line 262) | def get_prompt(self, prompt, sequence_start=True):
    method messages2prompt (line 267) | def messages2prompt(self, messages, sequence_start=True, **kwargs):
    method match (line 273) | def match(cls, model_path: str) -> Optional[str]:
  class Llavav1 (line 287) | class Llavav1(Vicuna):
    method __init__ (line 290) | def __init__(
    method match (line 297) | def match(cls, model_path: str) -> Optional[str]:
  class InternLMChat7B (line 312) | class InternLMChat7B(BaseChatTemplate):
    method __init__ (line 315) | def __init__(
    method match (line 342) | def match(cls, model_path: str) -> Optional[str]:
  class Baichuan2 (line 355) | class Baichuan2(BaseChatTemplate):
    method __init__ (line 359) | def __init__(self, user='<reserved_106>', assistant='<reserved_107>', ...
    method match (line 363) | def match(cls, model_path: str) -> Optional[str]:
  class Llama2 (line 375) | class Llama2(BaseChatTemplate):
    method __init__ (line 378) | def __init__(
    method match (line 401) | def match(cls, model_path: str) -> Optional[str]:
  class CodeLlama (line 412) | class CodeLlama(Llama2):
    method __init__ (line 414) | def __init__(self, meta_instruction='', suffix_first=False, stop_words...
    method get_prompt (line 427) | def get_prompt(self, prompt, sequence_start=True):
    method _infill_prompt (line 435) | def _infill_prompt(self, prompt):
    method match (line 446) | def match(cls, model_path: str) -> Optional[str]:
  class ChatGLM2 (line 457) | class ChatGLM2(BaseChatTemplate):
    method __init__ (line 459) | def __init__(self, user='问：', eoh='\n\n', assistant='答：', eoa='\n\n', ...
    method get_prompt (line 467) | def get_prompt(self, prompt, sequence_start=True):
    method messages2prompt (line 478) | def messages2prompt(self, messages, sequence_start=True, **kwargs):
    method match (line 497) | def match(cls, model_path: str) -> Optional[str]:
  class MistralChat (line 509) | class MistralChat(BaseChatTemplate):
    method __init__ (line 516) | def __init__(self, user='[INST] ', eoh=' [/INST]', eoa='</s>', **kwargs):
    method match (line 520) | def match(cls, model_path: str) -> Optional[str]:
  class InternVLZH (line 535) | class InternVLZH(BaseChatTemplate):
    method __init__ (line 537) | def __init__(self, user='<human>: ', eoh=' ', assistant='<bot>: ', eoa...
    method get_prompt (line 540) | def get_prompt(self, prompt, sequence_start=True):
    method messages2prompt (line 545) | def messages2prompt(self, messages, sequence_start=True, **kwargs):
    method match (line 551) | def match(cls, model_path: str) -> Optional[str]:
  class DeepseekVL (line 563) | class DeepseekVL(BaseChatTemplate):
    method __init__ (line 565) | def __init__(
    method get_prompt (line 582) | def get_prompt(self, prompt, sequence_start=True):
    method messages2prompt (line 587) | def messages2prompt(self, messages, sequence_start=True, **kwargs):
    method match (line 593) | def match(cls, model_path: str) -> Optional[str]:
  class DeepseekVL2 (line 605) | class DeepseekVL2(BaseChatTemplate):
    method __init__ (line 607) | def __init__(self,
    method get_prompt (line 623) | def get_prompt(self, prompt, sequence_start=True):
    method messages2prompt (line 626) | def messages2prompt(self, messages, sequence_start=True, **kwargs):
    method match (line 632) | def match(cls, model_path: str) -> Optional[str]:
  class ChatmlDirect (line 644) | class ChatmlDirect(BaseChatTemplate):
    method __init__ (line 646) | def __init__(self,
    method match (line 667) | def match(cls, model_path: str) -> Optional[str]:
  class HFChatTemplate (line 679) | class HFChatTemplate(BaseChatTemplate):
    method __init__ (line 685) | def __init__(self, model_path: str = '', **kwargs):
    method get_prompt (line 706) | def get_prompt(self, prompt, sequence_start=True, **kwargs):
    method messages2prompt (line 710) | def messages2prompt(self, messages, sequence_start=True, **kwargs):
    method _user_instruction (line 745) | def _user_instruction(self):
    method _assistant_instruction (line 756) | def _assistant_instruction(self):
    method _system_instruction (line 773) | def _system_instruction(self):
    method match (line 790) | def match(cls, model_path: str) -> Optional[str]:
  function get_chat_template (line 798) | def get_chat_template(model_path: str, config: Optional[ChatTemplateConf...

FILE: lmdeploy/pipeline.py
  class Pipeline (line 30) | class Pipeline:
    method __init__ (line 33) | def __init__(self,
    method infer (line 83) | def infer(self,
    method batch_infer (line 125) | def batch_infer(self, *args, **kwargs):
    method stream_infer (line 128) | def stream_infer(self,
    method close (line 164) | def close(self):
    method chat (line 169) | def chat(self,
    method session (line 230) | def session(self) -> 'Session':
    method get_reward_score (line 234) | def get_reward_score(self, input_ids: List) -> List[float]:
    method get_ppl (line 256) | def get_ppl(self, input_ids: List[int] | List[List[int]]) -> List[float]:
    method __call__ (line 306) | def __call__(self,
    method __enter__ (line 312) | def __enter__(self):
    method __exit__ (line 315) | def __exit__(self, exc_type, exc_value, traceback):
    method generate (line 319) | async def generate(self, *args, **kwargs):
    method _is_single (line 328) | def _is_single(prompts):
    method _request_generator (line 333) | def _request_generator(self,
    method _get_limiter (line 370) | def _get_limiter(self):
    method _infer (line 375) | def _infer(self, requests: Iterator[Dict], multiplex: bool, pbar=None,...
    method _run (line 413) | def _run(self, fn=None, coro=None):
    method _batch_iterator (line 424) | def _batch_iterator(self, sizes, max_value):
    method _get_long_text_ppl (line 446) | def _get_long_text_ppl(self, session, input_ids, max_input_len):
    method _get_ppl (line 472) | def _get_ppl(self,
  class _EventLoopThread (line 523) | class _EventLoopThread:
    method __init__ (line 525) | def __init__(self, daemon=False):
    method _thread_entry (line 534) | def _thread_entry(self, fut):
    method _cancel_all_tasks (line 550) | def _cancel_all_tasks(self):
    method close (line 574) | def close(self):

FILE: lmdeploy/profiler.py
  class Session (line 10) | class Session:
    method __init__ (line 16) | def __init__(self, input_len, req_output_len):
    method tick (line 23) | def tick(self, n_token):
    method finish (line 27) | def finish(self, status):
  class Profiler (line 31) | class Profiler:
    method __init__ (line 33) | def __init__(self, stream_output: bool, percentages: List[int]):
    method new_session (line 38) | def new_session(self, *args, **kwargs):
    method start (line 43) | def start(self):
    method finish (line 46) | def finish(self):
    method compute_metrics (line 49) | def compute_metrics(self):
    method summarize (line 106) | def summarize(self, title: str, hyperparams: List = None, header=40, d...
    method save_csv (line 140) | def save_csv(self, csv_file: str, hyperparams):

FILE: lmdeploy/pytorch/adapter/adapter.py
  function get_ranks_and_scalings (line 10) | def get_ranks_and_scalings(target_name: str, cfgs: Iterable, device: tor...
  function find_all_target (line 26) | def find_all_target(model: torch.nn.Module, target_name: str):
  function get_layer_index (line 48) | def get_layer_index(key: str, layers_pattern: str = None):
  function _get_reverse_pack_map (line 63) | def _get_reverse_pack_map(model: nn.Module):
  function _get_key_map (line 73) | def _get_key_map(reverse_map: Dict[str, str]):
  function load_lora_weights (line 84) | def load_lora_weights(model: nn.Module, weights: Iterable[Tuple[str, tor...
  class AdapterManager (line 111) | class AdapterManager:
    method __init__ (line 114) | def __init__(self, adapters: Dict[str, str]):
    method get_adapter_ids (line 125) | def get_adapter_ids(self, names: List[str]):
    method num_adapters (line 128) | def num_adapters(self):

FILE: lmdeploy/pytorch/backends/activation.py
  class SiluAndMulImpl (line 5) | class SiluAndMulImpl(ABC):
    method forward (line 9) | def forward(self, x):
  class SiluAndMulBuilder (line 14) | class SiluAndMulBuilder(ABC):
    method build (line 19) | def build(inplace: bool = False):
  class GeluAndMulImpl (line 24) | class GeluAndMulImpl(ABC):
    method forward (line 28) | def forward(self, x):
  class GeluAndMulBuilder (line 33) | class GeluAndMulBuilder(ABC):
    method build (line 38) | def build(approximate: str = 'none'):

FILE: lmdeploy/pytorch/backends/apply_rotary_emb.py
  class ApplyRotaryEmbImpl (line 7) | class ApplyRotaryEmbImpl(ABC):
    method forward (line 11) | def forward(self, query: Tensor, key: Tensor, cos: Tensor, sin: Tensor...
  class ApplyRotaryEmbBuilder (line 16) | class ApplyRotaryEmbBuilder(ABC):
    method build (line 21) | def build():

FILE: lmdeploy/pytorch/backends/attention.py
  class AttentionMetadata (line 11) | class AttentionMetadata:
  class AttentionImpl (line 27) | class AttentionImpl(ABC, Generic[T]):
    method __init__ (line 30) | def __init__(
    method make_alibi_slopes (line 67) | def make_alibi_slopes(head_start: int, head_end: int, num_heads: int, ...
    method set_alibi_slopes (line 85) | def set_alibi_slopes(self, slopes: torch.Tensor):
    method forward (line 89) | def forward(
  class AttentionBuilder (line 107) | class AttentionBuilder(ABC, Generic[T]):
    method build (line 112) | def build(

FILE: lmdeploy/pytorch/backends/awq_modules.py
  class LinearW4A16Impl (line 8) | class LinearW4A16Impl(ABC):
    method update_weights (line 11) | def update_weights(self,
    method forward (line 20) | def forward(self,
  class LinearW4A16Builder (line 30) | class LinearW4A16Builder(ABC):
    method build (line 35) | def build(in_features: int,

FILE: lmdeploy/pytorch/backends/base.py
  class OpType (line 13) | class OpType(Enum):
  class OpsBackend (line 45) | class OpsBackend(ABC):
    method get_name (line 50) | def get_name() -> str:
    method get_layer_impl_builder (line 56) | def get_layer_impl_builder(cls, layer_type: OpType):
    method get_attention_metadata_cls (line 62) | def get_attention_metadata_cls():
    method get_k_block_shape (line 68) | def get_k_block_shape(
    method get_v_block_shape (line 79) | def get_v_block_shape(
    method update_step_context (line 89) | def update_step_context(cls, step_context):
    method build_graph_runner (line 97) | def build_graph_runner(model: torch.nn.Module, model_config: ModelConf...
    method device_count (line 104) | def device_count():
    method support_ray (line 109) | def support_ray():

FILE: lmdeploy/pytorch/backends/blockedf8_modules.py
  class LinearBlockedF8Impl (line 9) | class LinearBlockedF8Impl(ABC):
    method __init__ (line 12) | def __init__(self):
    method update_weights (line 15) | def update_weights(self, weight: torch.Tensor, scale: torch.Tensor, bi...
    method set_scale_fmt (line 19) | def set_scale_fmt(self, scale_fmt: Optional[str]):
    method forward (line 24) | def forward(self,
  class LinearBlockedF8Builder (line 37) | class LinearBlockedF8Builder(ABC):
    method build (line 42) | def build(in_features: int, out_features: int, bias: bool = True, dtyp...

FILE: lmdeploy/pytorch/backends/causal_conv1d.py
  class CausalConv1dImpl (line 7) | class CausalConv1dImpl(ABC):
    method conv1d_fn (line 11) | def conv1d_fn(self,
    method update_fn (line 22) | def update_fn(self,
  class CausalConv1dBuilder (line 33) | class CausalConv1dBuilder(ABC):
    method build (line 38) | def build():

FILE: lmdeploy/pytorch/backends/cuda/activation.py
  class TritonSiluAndMulImpl (line 7) | class TritonSiluAndMulImpl(SiluAndMulImpl):
    method __init__ (line 10) | def __init__(self, inplace: bool):
    method forward (line 13) | def forward(self, x):
  class TritonSiluAndMulBuilder (line 30) | class TritonSiluAndMulBuilder(SiluAndMulBuilder):
    method build (line 34) | def build(inplace: bool = False):

FILE: lmdeploy/pytorch/backends/cuda/apply_rotary_emb.py
  class TritonApplyRotaryEmbImpl (line 10) | class TritonApplyRotaryEmbImpl(ApplyRotaryEmbImpl):
    method forward (line 13) | def forward(self, query: Tensor, key: Tensor, cos: Tensor, sin: Tensor...
  class TritonApplyRotaryEmbBuilder (line 24) | class TritonApplyRotaryEmbBuilder(ApplyRotaryEmbBuilder):
    method build (line 28) | def build():

FILE: lmdeploy/pytorch/backends/cuda/attention/__init__.py
  function use_fa3_warning (line 26) | def use_fa3_warning():
  function _enable_fa3 (line 35) | def _enable_fa3(alibi: bool, learnable_sink: bool, block_sparse_size: in...
  function _normalize_sliding_window (line 53) | def _normalize_sliding_window(sliding_window):
  class TritonAttentionBuilder (line 69) | class TritonAttentionBuilder(AttentionBuilder[TritonAttentionMetadata]):
    method build (line 79) | def build(

FILE: lmdeploy/pytorch/backends/cuda/attention/default.py
  class TritonAttentionMetadata (line 14) | class TritonAttentionMetadata(AttentionMetadata):
  function _cdiv (line 56) | def _cdiv(a, b):
  class TritonAttentionImpl (line 69) | class TritonAttentionImpl(AttentionImpl[TritonAttentionMetadata]):
    method __init__ (line 72) | def __init__(
    method _get_max_q_seqlen (line 111) | def _get_max_q_seqlen(
    method _get_fill_meta (line 126) | def _get_fill_meta(
    method _fill_kv_cache_impl (line 138) | def _fill_kv_cache_impl(
    method _forward_decoding (line 177) | def _forward_decoding(
    method _forward_prefill (line 226) | def _forward_prefill(
    method forward (line 298) | def forward(

FILE: lmdeploy/pytorch/backends/cuda/attention/fa3.py
  class FA3Impl (line 11) | class FA3Impl(TritonAttentionImpl):
    method __init__ (line 24) | def __init__(
    method _get_max_q_seqlen (line 54) | def _get_max_q_seqlen(
    method _normalize_sliding_window (line 66) | def _normalize_sliding_window(self, sliding_window):
    method _decoding_speculative (line 81) | def _decoding_speculative(
    method _decoding_standard (line 126) | def _decoding_standard(
    method _forward_decoding (line 176) | def _forward_decoding(
    method _forward_prefill (line 210) | def _forward_prefill(
    method forward (line 275) | def forward(

FILE: lmdeploy/pytorch/backends/cuda/attention/mla.py
  function _cdiv (line 14) | def _cdiv(a, b):
  function _try_dynamic_compile (line 19) | def _try_dynamic_compile(func, *args, **kwargs):
  class NSAIndicesUpdater (line 29) | class NSAIndicesUpdater:
    method __init__ (line 36) | def __init__(self):
    method _update_decode_impl (line 40) | def _update_decode_impl(self, nsa_indices: torch.Tensor, block_offsets...
    method update_decode (line 51) | def update_decode(self, nsa_indices: torch.Tensor, block_offsets: torc...
    method _update_prefill_impl (line 59) | def _update_prefill_impl(self, nsa_indices: torch.Tensor, q_seqlens: t...
    method update_prefill (line 68) | def update_prefill(self, nsa_indices: torch.Tensor, q_seqlens: torch.T...
    method build (line 78) | def build():
  class FlashMLAImpl (line 82) | class FlashMLAImpl(TritonAttentionImpl):
    method __init__ (line 97) | def __init__(
    method _get_flash_mla_sparse_fwd (line 143) | def _get_flash_mla_sparse_fwd(self):
    method flash_mla_decoding (line 154) | def flash_mla_decoding(
    method _prefill_sparse (line 196) | def _prefill_sparse(self, query: torch.Tensor, flatten_k: torch.Tensor...
    method _prefill_triton (line 232) | def _prefill_triton(
    method _prefill_fa3 (line 271) | def _prefill_fa3(
    method run_flatten_kv_cache (line 315) | def run_flatten_kv_cache(self,
    method _get_max_q_seqlen (line 369) | def _get_max_q_seqlen(
    method _fill_kv_cache_impl (line 382) | def _fill_kv_cache_impl(self,
    method _forward_decoding (line 449) | def _forward_decoding(
    method _forward_prefill (line 472) | def _forward_prefill(
    method forward (line 520) | def forward(

FILE: lmdeploy/pytorch/backends/cuda/awq_modules.py
  function wq_gemm_forward (line 11) | def wq_gemm_forward(
  class AwqLinearW4A16Impl (line 43) | class AwqLinearW4A16Impl(LinearW4A16Impl):
    method __init__ (line 46) | def __init__(self, in_features: int, out_features: int, w_bit: int, gr...
    method forward (line 52) | def forward(self,
  class AwqLinearW4A16Builder (line 68) | class AwqLinearW4A16Builder(LinearW4A16Builder):
    method build (line 72) | def build(in_features: int,

FILE: lmdeploy/pytorch/backends/cuda/blockedf8_modules.py
  class TritonLinearBlockedF8Impl (line 16) | class TritonLinearBlockedF8Impl(LinearBlockedF8Impl):
    method __init__ (line 19) | def __init__(self, in_features: int, out_features: int, block_size: in...
    method forward (line 26) | def forward(self,
  class TritonLinearBlockedF8Builder (line 58) | class TritonLinearBlockedF8Builder(LinearBlockedF8Builder):
    method build (line 62) | def build(in_features: int, out_features: int, block_size: int = 128, ...
  class DeepGemmLinearBlockedF8Impl (line 73) | class DeepGemmLinearBlockedF8Impl(LinearBlockedF8Impl):
    method __init__ (line 76) | def __init__(self, in_features: int, out_features: int, block_size: in...
    method warmup (line 89) | def warmup(self, warmup_meta: WarmupMeta):
    method forward (line 112) | def forward(self,

FILE: lmdeploy/pytorch/backends/cuda/causal_conv1d.py
  class CausalConv1dTilelangImpl (line 10) | class CausalConv1dTilelangImpl(CausalConv1dImpl):
    method __init__ (line 13) | def __init__(self):
    method conv1d_fn (line 18) | def conv1d_fn(self,
    method update_fn (line 32) | def update_fn(self,
  class CausalConv1dDaoImpl (line 48) | class CausalConv1dDaoImpl(CausalConv1dTilelangImpl):
    method __init__ (line 50) | def __init__(self):
  function has_dao (line 61) | def has_dao():
  class CausalConv1dCudaBuilder (line 71) | class CausalConv1dCudaBuilder(CausalConv1dBuilder):
    method build (line 75) | def build() -> CausalConv1dImpl:

FILE: lmdeploy/pytorch/backends/cuda/flash_attention.py
  class TritonFlashAttentionImpl (line 7) | class TritonFlashAttentionImpl(FlashAttentionImpl):
    method __init__ (line 10) | def __init__(
    method forward (line 42) | def forward(self,
  class TritonFlashAttentionBuilder (line 71) | class TritonFlashAttentionBuilder(FlashAttentionBuilder):
    method build (line 75) | def build(

FILE: lmdeploy/pytorch/backends/cuda/gated_delta_rule.py
  function has_fla (line 11) | def has_fla():
  class CudaGatedDeltaRuleImpl (line 19) | class CudaGatedDeltaRuleImpl(GatedDeltaRuleImpl):
    method __init__ (line 21) | def __init__(self):
    method chunk_gated_delta_rule (line 30) | def chunk_gated_delta_rule(self,
    method fused_recurrent_gated_delta_rule (line 68) | def fused_recurrent_gated_delta_rule(self,
  class CudaGatedDeltaRuleBuilder (line 93) | class CudaGatedDeltaRuleBuilder(GatedDeltaRuleBuilder):
    method build (line 96) | def build() -> GatedDeltaRuleImpl:

FILE: lmdeploy/pytorch/backends/cuda/graph_runner.py
  function next_power_of_2 (line 22) | def next_power_of_2(n: int):
  function _get_capture_batch_size_impl (line 36) | def _get_capture_batch_size_impl(max_batches: int):
  function _false (line 54) | def _false(*args, **kwargs):
  class CUDASingleGraphRunner (line 59) | class CUDASingleGraphRunner:
    method __init__ (line 62) | def __init__(
    method capture (line 102) | def capture(self, **kwargs):
    method forward (line 127) | def forward(self, **kwargs):
    method __del__ (line 138) | def __del__(self):
  class CUDAGraphRunner (line 143) | class CUDAGraphRunner(GraphRunner):
    method __init__ (line 146) | def __init__(self, model: torch.nn.Module, model_config: ModelConfig, ...
    method check_enable_graph (line 164) | def check_enable_graph(self):
    method _try_compile_model_once (line 171) | def _try_compile_model_once(self):
    method _get_capture_tokens (line 182) | def _get_capture_tokens(self, batch_size: int):
    method get_graph_key (line 190) | def get_graph_key(self, input_ids: torch.Tensor, position_ids: torch.T...
    method _prepare_inputs (line 206) | def _prepare_inputs(self, **kwargs):
    method _get_max_tokens (line 214) | def _get_max_tokens(self, graph_key: tuple, input_ids: torch.Tensor, q...
    method __call__ (line 222) | def __call__(self, **kwargs):
    method prepare_inputs_for_generation (line 262) | def prepare_inputs_for_generation(
    method reset (line 281) | def reset(self):
    method update_inputs (line 293) | def update_inputs(self, inputs):
    method get_capture_batch_sizes (line 306) | def get_capture_batch_sizes(self) -> List[int]:

FILE: lmdeploy/pytorch/backends/cuda/lora.py
  class PackedLoRAInput (line 13) | class PackedLoRAInput:
  class TritonLoRAImpl (line 23) | class TritonLoRAImpl(LoRAImpl):
    method _make_packed_lora_input (line 27) | def _make_packed_lora_input(x, ctx_mgr):
    method forward (line 41) | def forward(self,
  class TritonLoRABuilder (line 84) | class TritonLoRABuilder(LoRABuilder):
    method build (line 88) | def build():

FILE: lmdeploy/pytorch/backends/cuda/moe/blocked_fp8.py
  class TritonFusedMoEBlockedF8Impl (line 22) | class TritonFusedMoEBlockedF8Impl(FusedMoEBlockedF8Impl):
    method __init__ (line 25) | def __init__(self,
    method ep_expert_list (line 38) | def ep_expert_list(self, world_size: int, rank: int):
    method forward (line 46) | def forward(self,
  class FusedDeepEpMoEBlockedF8Impl (line 90) | class FusedDeepEpMoEBlockedF8Impl(TritonFusedMoEBlockedF8Impl):
    method __init__ (line 92) | def __init__(self,
    method ep_expert_list (line 128) | def ep_expert_list(self, world_size: int, rank: int):
    method forward (line 141) | def forward(self,
    method do_renormalize (line 168) | def do_renormalize(self, topk_weights):
    method fusedmoe_build (line 171) | def fusedmoe_build(self, low_latency_mode: bool = False):
  class TritonFusedMoEBlockedF8Builder (line 186) | class TritonFusedMoEBlockedF8Builder(FusedMoEBlockedF8Builder):
    method build (line 190) | def build(top_k: int,

FILE: lmdeploy/pytorch/backends/cuda/moe/default.py
  class TritonFusedMoEImpl (line 21) | class TritonFusedMoEImpl(FusedMoEImpl):
    method __init__ (line 24) | def __init__(self, top_k: int, num_experts: int, renormalize: bool = F...
    method update_weights (line 29) | def update_weights(self, gate_up_weights: torch.Tensor, down_weights: ...
    method ep_expert_list (line 34) | def ep_expert_list(self, world_size: int, rank: int):
    method forward (line 42) | def forward(self,
  class FusedMoENormal (line 73) | class FusedMoENormal:
    method __init__ (line 75) | def __init__(
    method forward (line 99) | def forward(
    method capture (line 121) | def capture(self):
    method wait (line 124) | def wait(self, event):
    method dispatch_async (line 128) | def dispatch_async(self,
    method combine_async (line 138) | def combine_async(self, x: torch.Tensor, handle: tuple, previous_event...
    method release (line 141) | def release(self):
    method fusedmoe_forward (line 144) | def fusedmoe_forward(self, state, up_weight, down_weight):
  function _disposible_tensor (line 150) | def _disposible_tensor(tensor):
  function dispatch_ll (line 159) | def dispatch_ll(
  function dispatch_async_ll (line 200) | def dispatch_async_ll(
  class FusedMoELowLatency (line 230) | class FusedMoELowLatency:
    method __init__ (line 232) | def __init__(
    method experts (line 253) | def experts(
    method forward (line 279) | def forward(self,
    method wait (line 300) | def wait(self, event):
    method dispatch_async (line 303) | def dispatch_async(
    method combine_async (line 313) | def combine_async(
    method fusedmoe_forward (line 323) | def fusedmoe_forward(self, state, up_weight, down_weight):
  function build_deepep_moe (line 333) | def build_deepep_moe(
  class FusedMoEEPImpl (line 360) | class FusedMoEEPImpl(TritonFusedMoEImpl):
    method __init__ (line 363) | def __init__(
    method update_weights (line 398) | def update_weights(self, gate_up_weights: torch.Tensor, down_weights: ...
    method forward (line 401) | def forward(self,
    method ep_expert_list (line 425) | def ep_expert_list(self, world_size: int, rank: int):
    method do_renormalize (line 432) | def do_renormalize(self, topk_weights):
    method fusedmoe_build (line 435) | def fusedmoe_build(self, low_latency_mode: bool = False):
  class TritonFusedMoEBuilder (line 447) | class TritonFusedMoEBuilder(FusedMoEBuilder):
    method build (line 451) | def build(

FILE: lmdeploy/pytorch/backends/cuda/moe/ep_utils.py
  function split_inputs_by_attn_tp (line 10) | def split_inputs_by_attn_tp(
  function gather_outputs_by_attn_tp (line 37) | def gather_outputs_by_attn_tp(out_states: torch.Tensor, split_size: List...

FILE: lmdeploy/pytorch/backends/cuda/moe/w8a8.py
  class TritonFusedMoEW8A8Impl (line 16) | class TritonFusedMoEW8A8Impl(FusedMoEW8A8Impl):
    method __init__ (line 19) | def __init__(
    method update_weights (line 33) | def update_weights(self, gate_up_weights: torch.Tensor, down_weights: ...
    method forward (line 38) | def forward(self,
  class TritonFusedMoEW8A8Builder (line 77) | class TritonFusedMoEW8A8Builder(FusedMoEW8A8Builder):
    method build (line 81) | def build(

FILE: lmdeploy/pytorch/backends/cuda/moe_router.py
  function is_power_of_two (line 12) | def is_power_of_two(n):
  class TritonRouterNoauxTCImpl (line 16) | class TritonRouterNoauxTCImpl(DefaultRouterNoauxTCImpl):
    method __init__ (line 18) | def __init__(
    method should_enable_custom_kernel (line 42) | def should_enable_custom_kernel(self) -> bool:
    method forward (line 60) | def forward(self, logits: torch.Tensor, bias: torch.Tensor) -> Tuple[t...
  class TritonRouterNoauxTCBuilder (line 77) | class TritonRouterNoauxTCBuilder(RouterNoauxTCBuilder):
    method build (line 80) | def build(

FILE: lmdeploy/pytorch/backends/cuda/multinomial_sampling.py
  class TritonMultinomialSamplingImpl (line 10) | class TritonMultinomialSamplingImpl(MultinomialSamplingImpl):
    method forward (line 12) | def forward(self,
  class TritonMultinomialSamplingBuilder (line 21) | class TritonMultinomialSamplingBuilder(MultinomialSamplingBuilder):
    method build (line 24) | def build():

FILE: lmdeploy/pytorch/backends/cuda/norm.py
  class TritonRMSNormImpl (line 9) | class TritonRMSNormImpl(RMSNormImpl):
    method __init__ (line 12) | def __init__(self, hidden_size: int, eps: float = 1e-6):
    method forward (line 16) | def forward(self, x: torch.Tensor, weight: torch.Tensor, residual: tor...
  class TritonRMSNormBuilder (line 26) | class TritonRMSNormBuilder(RMSNormBuilder):
    method build (line 30) | def build(weight: torch.Tensor, eps: float = 1e-6):

FILE: lmdeploy/pytorch/backends/cuda/nsa.py
  class TritonNSAIndexFP8 (line 12) | class TritonNSAIndexFP8(BaseNSAIndexFP8):
    method __init__ (line 14) | def __init__(self, topk: int, softmax_scale: float, block_size: int, f...
    method forward (line 23) | def forward(self, q: Tensor, k: Tensor, weights: Tensor, k_cache: Tens...
  class TritonNSAIndexFP8Builder (line 68) | class TritonNSAIndexFP8Builder(BaseNSAIndexFP8Builder):
    method build (line 71) | def build(topk: int, softmax_scale: float, block_size: int = 128, fill...

FILE: lmdeploy/pytorch/backends/cuda/op_backend.py
  class CudaOpsBackend (line 15) | class CudaOpsBackend(DefaultOpsBackend):
    method get_name (line 19) | def get_name() -> str:
    method get_layer_impl_builder (line 24) | def get_layer_impl_builder(cls, layer_type: OpType):
    method get_attention_metadata_cls (line 85) | def get_attention_metadata_cls():
    method get_k_block_shape (line 91) | def get_k_block_shape(
    method get_v_block_shape (line 105) | def get_v_block_shape(
    method update_meta_flashmla (line 119) | def update_meta_flashmla(cls, attn_metadata, model_config: ModelConfig...
    method update_meta_flashattn (line 139) | def update_meta_flashattn(cls, attn_metadata, step_context):
    method update_step_context (line 162) | def update_step_context(cls, step_context):
    method build_graph_runner (line 207) | def build_graph_runner(model: torch.nn.Module, model_config: ModelConf...
    method device_count (line 225) | def device_count():
    method support_ray (line 230) | def support_ray():

FILE: lmdeploy/pytorch/backends/cuda/qmodules.py
  class TritonRMSNormW8A8Impl (line 14) | class TritonRMSNormW8A8Impl(RMSNormW8A8Impl):
    method __init__ (line 17) | def __init__(self, hidden_size: int, eps: float = 1e-6, quant_dtype: t...
    method forward (line 23) | def forward(self, x: torch.Tensor, weight: torch.Tensor, residual: tor...
  class TritonRMSNormBuilder (line 39) | class TritonRMSNormBuilder(RMSNormW8A8Builder):
    method build (line 43) | def build(hidden_size: int, eps: float = 1e-6, quant_dtype: torch.dtyp...
  class TritonLinearW8A8Impl (line 48) | class TritonLinearW8A8Impl(LinearW8A8Impl):
    method __init__ (line 51) | def __init__(self,
    method forward (line 61) | def forward(self,
  class TritonLinearW8A8Builder (line 87) | class TritonLinearW8A8Builder(LinearW8A8Builder):
    method build (line 91) | def build(in_features: int,

FILE: lmdeploy/pytorch/backends/cuda/token_dispatcher.py
  function get_buffer_common (line 25) | def get_buffer_common(
  function get_buffer_normal (line 57) | def get_buffer_normal(group: dist.ProcessGroup, hidden_bytes: int):
  function get_buffer_low_latency (line 77) | def get_buffer_low_latency(
  class DeepEPTokenDispatcher (line 105) | class DeepEPTokenDispatcher(TokenDispatcherImpl):
    method __init__ (line 110) | def __init__(
    method dispatch (line 135) | def dispatch(
    method dispatch_normal (line 166) | def dispatch_normal(
    method dispatch_normal_async (line 217) | def dispatch_normal_async(self,
    method combine (line 267) | def combine(self, hidden_states: torch.Tensor) -> torch.Tensor:
    method combine_normal (line 274) | def combine_normal(self, x: torch.Tensor, handle: Tuple, previous_even...
    method combine_normal_async (line 284) | def combine_normal_async(self, x: torch.Tensor, handle: Tuple, previou...
    method release (line 294) | def release(self):
    method get_number_of_tokens_per_expert (line 304) | def get_number_of_tokens_per_expert(self) -> torch.Tensor:
    method get_permuted_hidden_states_by_experts (line 308) | def get_permuted_hidden_states_by_experts(self,
    method get_restored_hidden_states_by_experts (line 328) | def get_restored_hidden_states_by_experts(
  class DeepEPTokenDispatcherLowLatency (line 350) | class DeepEPTokenDispatcherLowLatency(TokenDispatcherImpl):
    method __init__ (line 352) | def __init__(
    method dispatch (line 378) | def dispatch(
    method dispatch_async (line 407) | def dispatch_async(
    method combine (line 427) | def combine(
    method combine_async (line 444) | def combine_async(
  class TokenDispatcherBuilder (line 465) | class TokenDispatcherBuilder:
    method build (line 469) | def build(

FILE: lmdeploy/pytorch/backends/cuda/utils.py
  function has_tilelang (line 6) | def has_tilelang():

FILE: lmdeploy/pytorch/backends/cuda/warmup_manager.py
  class WarmupMeta (line 13) | class WarmupMeta:
  class WarmupManager (line 21) | class WarmupManager:
    method __init__ (line 23) | def __init__(self):
    method __contains__ (line 26) | def __contains__(self, key: str):
    method __getitem__ (line 30) | def __getitem__(self, key: str):
    method __setitem__ (line 34) | def __setitem__(self, key: str, val):
    method warmup (line 38) | def warmup(self, warmup_meta: WarmupMeta):
  function get_warmup_manager (line 50) | def get_warmup_manager():

FILE: lmdeploy/pytorch/backends/deepep_moe_checker.py
  class MoEBackend (line 6) | class MoEBackend:
    method __init__ (line 8) | def __init__(self):
    method set_deepep_moe_backend (line 12) | def set_deepep_moe_backend(self):
    method use_deepep_moe_backend (line 16) | def use_deepep_moe_backend(self):
  function get_moe_backend (line 21) | def get_moe_backend():

FILE: lmdeploy/pytorch/backends/default/activation.py
  class DefaultSiluAndMulImpl (line 8) | class DefaultSiluAndMulImpl(SiluAndMulImpl):
    method __init__ (line 11) | def __init__(self, inplace: bool):
    method forward (line 15) | def forward(self, x):
  class DefaultSiluAndMulBuilder (line 21) | class DefaultSiluAndMulBuilder(SiluAndMulBuilder):
    method build (line 25) | def build(inplace: bool = False):
  class DefaultGeluAndMulImpl (line 30) | class DefaultGeluAndMulImpl(GeluAndMulImpl):
    method __init__ (line 33) | def __init__(self, approximate: str = 'none'):
    method forward (line 36) | def forward(self, x):
  class DefaultGeluAndMulBuilder (line 42) | class DefaultGeluAndMulBuilder(GeluAndMulBuilder):
    method build (line 46) | def build(approximate: str = 'none'):

FILE: lmdeploy/pytorch/backends/default/apply_rotary_emb.py
  function rotate_half (line 8) | def rotate_half(x):
  class DefaultApplyRotaryEmbImpl (line 19) | class DefaultApplyRotaryEmbImpl(ApplyRotaryEmbImpl):
    method forward (line 22) | def forward(self, query: Tensor, key: Tensor, cos: Tensor, sin: Tensor...
  class DefaultApplyRotaryEmbBuilder (line 42) | class DefaultApplyRotaryEmbBuilder(ApplyRotaryEmbBuilder):
    method build (line 46) | def build():

FILE: lmdeploy/pytorch/backends/default/awq_modules.py
  function get_shifts (line 13) | def get_shifts(bits: int, device: torch.device):
  function unpack_awq (line 20) | def unpack_awq(qweight: torch.Tensor, qzeros: torch.Tensor, bits: int):
  function dequantize_gemm (line 38) | def dequantize_gemm(qweight, qzeros, scales, bits, group_size):
  class DefaultLinearW4A16Impl (line 50) | class DefaultLinearW4A16Impl(LinearW4A16Impl):
    method __init__ (line 53) | def __init__(self, in_features: int, out_features: int, w_bit: int, gr...
    method forward (line 59) | def forward(self,
  class DefaultLinearW4A16Builder (line 85) | class DefaultLinearW4A16Builder(LinearW4A16Builder):
    method build (line 89) | def build(in_features: int,

FILE: lmdeploy/pytorch/backends/default/embedding.py
  function get_masked_input_and_mask (line 9) | def get_masked_input_and_mask(input: torch.Tensor, start_index: int, end...
  class DefaultEmbeddingImpl (line 16) | class DefaultEmbeddingImpl(EmbeddingImpl):
    method __init__ (line 19) | def __init__(self, start_index: int, end_index: int):
    method forward (line 23) | def forward(self, x, weight: torch.Tensor, all_reduce: bool = False, g...
  class DefaultEmbeddingBuilder (line 36) | class DefaultEmbeddingBuilder(EmbeddingBuilder):
    method build (line 40) | def build(start_index: int, end_index: int):

FILE: lmdeploy/pytorch/backends/default/linear.py
  class DefaultLinearImpl (line 11) | class DefaultLinearImpl(LinearImpl):
    method forward (line 14) | def forward(self,
  class DefaultLinearBuilder (line 33) | class DefaultLinearBuilder(LinearBuilder):
    method build (line 37) | def build(in_features: int, out_features: int, bias: bool = True, dtyp...

FILE: lmdeploy/pytorch/backends/default/moe.py
  class DefaultSoftmaxTopKImpl (line 7) | class DefaultSoftmaxTopKImpl(SoftmaxTopKImpl):
    method __init__ (line 10) | def __init__(self, top_k: int, dim: int = -1, n_groups: int = -1):
    method forward (line 16) | def forward(self, x: torch.Tensor):
  class DefaultSoftmaxTopKBuilder (line 35) | class DefaultSoftmaxTopKBuilder(SoftmaxTopKBuilder):
    method build (line 39) | def build(top_k: int, dim: int = -1, n_groups: int = -1):

FILE: lmdeploy/pytorch/backends/default/moe_router.py
  function _compute_scores (line 10) | def _compute_scores(scoring_func: str, logits: torch.Tensor):
  function get_group_offsets (line 23) | def get_group_offsets(n_groups: int, group_size: int, device: str | torc...
  class DefaultRouterNoauxTCImpl (line 28) | class DefaultRouterNoauxTCImpl(RouterNoauxTCImpl):
    method __init__ (line 30) | def __init__(
    method _forward_router_n_groups (line 55) | def _forward_router_n_groups(self, scores_for_choice: torch.Tensor) ->...
    method _forward_default (line 67) | def _forward_default(self, scores: torch.Tensor, scores_for_choice: to...
    method renorm (line 83) | def renorm(self, topk_weight: torch.Tensor) -> torch.Tensor:
    method forward (line 93) | def forward(self, logits: torch.Tensor, bias: torch.Tensor) -> Tuple[t...
  class DefaultRouterNoauxTCBuilder (line 108) | class DefaultRouterNoauxTCBuilder(RouterNoauxTCBuilder):
    method build (line 111) | def build(

FILE: lmdeploy/pytorch/backends/default/multinomial_sampling.py
  class DefaultMultinomialSamplingImpl (line 8) | class DefaultMultinomialSamplingImpl(MultinomialSamplingImpl):
    method forward (line 11) | def forward(self,
  class DefaultMultinomialSamplingBuilder (line 22) | class DefaultMultinomialSamplingBuilder(MultinomialSamplingBuilder):
    method build (line 26) | def build():

FILE: lmdeploy/pytorch/backends/default/norm.py
  class DefaultRMSNormImpl (line 7) | class DefaultRMSNormImpl(RMSNormImpl):
    method __init__ (line 10) | def __init__(self, hidden_size: int, eps: float = 1e-6):
    method forward (line 14) | def forward(self, x: torch.Tensor, weight: torch.Tensor, residual: tor...
  class DefaultRMSNormBuilder (line 29) | class DefaultRMSNormBuilder(RMSNormBuilder):
    method build (line 33) | def build(hidden_size: int, eps: float = 1e-6):
  class DefaultLayerNormImpl (line 38) | class DefaultLayerNormImpl(LayerNormImpl):
    method __init__ (line 41) | def __init__(self, normalized_shape: int, eps: float = 1e-6):
    method forward (line 47) | def forward(self,
  class DefaultLayerNormBuilder (line 62) | class DefaultLayerNormBuilder(LayerNormBuilder):
    method build (line 66) | def build(normalized_shape: int, eps: float = 1e-6):

FILE: lmdeploy/pytorch/backends/default/op_backend.py
  class DefaultOpsBackend (line 9) | class DefaultOpsBackend(OpsBackend):
    method get_name (line 12) | def get_name() -> str:
    method get_layer_impl_builder (line 16) | def get_layer_impl_builder(cls, layer_type: OpType):
    method get_k_block_shape (line 58) | def get_k_block_shape(
    method get_v_block_shape (line 72) | def get_v_block_shape(
    method init (line 86) | def init():
    method ccl_backend (line 90) | def ccl_backend() -> str:

FILE: lmdeploy/pytorch/backends/default/rotary_embedding.py
  function safe_torch_compile (line 14) | def safe_torch_compile(**compile_kwargs):
  function _rotary_embedding_fwd (line 44) | def _rotary_embedding_fwd(position_ids: torch.Tensor,
  class RotaryEmbeddingImpl (line 74) | class RotaryEmbeddingImpl(RotaryEmbeddingImpl, nn.Module):
    method __init__ (line 77) | def __init__(self, dim: int, base: int = 10000, scaling_factor: float ...
    method forward (line 85) | def forward(self, x: torch.Tensor, position_ids: torch.Tensor):
  class LlamaDynamicNTKScalingRotaryEmbedding (line 98) | class LlamaDynamicNTKScalingRotaryEmbedding(RotaryEmbeddingImpl):
    method __init__ (line 104) | def __init__(self, dim: int, base: int = 10000, scaling_factor: float ...
    method _ntk_inv_freq (line 108) | def _ntk_inv_freq(self, seq_len: torch.Tensor):
    method forward (line 116) | def forward(self, x: torch.Tensor, position_ids: torch.Tensor):
  class Llama3RotaryEmbeddingImpl (line 134) | class Llama3RotaryEmbeddingImpl(RotaryEmbeddingImpl):
    method __init__ (line 137) | def __init__(
  function yarn_find_correction_dim (line 167) | def yarn_find_correction_dim(num_rotations, dim, base=10000, max_positio...
  function yarn_find_correction_range (line 173) | def yarn_find_correction_range(low_rot, high_rot, dim, base=10000, max_p...
  function yarn_get_mscale (line 183) | def yarn_get_mscale(scale=1, mscale=1):
  function yarn_linear_ramp_mask (line 190) | def yarn_linear_ramp_mask(min, max, dim):
  class YarnRotaryEmbeddingImpl (line 200) | class YarnRotaryEmbeddingImpl(RotaryEmbeddingImpl):
    method __init__ (line 203) | def __init__(self,
    method forward (line 244) | def forward(self, x: torch.Tensor, position_ids: torch.Tensor):
  class LongRoPEScalingRotaryEmbeddingImpl (line 258) | class LongRoPEScalingRotaryEmbeddingImpl(RotaryEmbeddingImpl):
    method __init__ (line 261) | def __init__(
    method forward (line 285) | def forward(self, x: torch.Tensor, position_ids: torch.Tensor):
  class FopeRotaryEmbeddingImpl (line 310) | class FopeRotaryEmbeddingImpl(RotaryEmbeddingImpl):
    method __init__ (line 312) | def __init__(self,
    method forward (line 335) | def forward(self, x: torch.Tensor, position_ids: torch.Tensor, sin_coe...
  class DefaultRotaryEmbeddingBuilder (line 372) | class DefaultRotaryEmbeddingBuilder(RotaryEmbeddingBuilder):
    method build (line 376) | def build(

FILE: lmdeploy/pytorch/backends/default/token_dispatcher.py
  class AlltoAllTokenDispatcher (line 9) | class AlltoAllTokenDispatcher(TokenDispatcherImpl):
    method __init__ (line 11) | def __init__(
    method sort_chunks_by_idxs (line 30) | def sort_chunks_by_idxs(self, input: torch.Tensor, split_sizes: torch....
    method all_to_all (line 37) | def all_to_all(self, group: torch.distributed.group, input_: torch.Ten...
    method preprocess (line 55) | def preprocess(self, routing_map: torch.Tensor, local_expert_indices) ...
    method dispatch (line 82) | def dispatch(self, hidden_states: torch.Tensor, topk_ids: torch.Tensor...
    method combine (line 108) | def combine(self, hidden_states: torch.Tensor) -> torch.Tensor:

FILE: lmdeploy/pytorch/backends/dlinfer/activation.py
  class DlinferSiluAndMulImpl (line 7) | class DlinferSiluAndMulImpl(SiluAndMulImpl):
    method forward (line 10) | def forward(self, x):
  class DlinferSiluAndMulBuilder (line 15) | class DlinferSiluAndMulBuilder(SiluAndMulBuilder):
    method build (line 19) | def build(inplace: bool = False):

FILE: lmdeploy/pytorch/backends/dlinfer/apply_rotary_emb.py
  class DlinferApplyRotaryEmbImpl (line 9) | class DlinferApplyRotaryEmbImpl(ApplyRotaryEmbImpl):
    method forward (line 12) | def forward(self, query: Tensor, key: Tensor, cos: Tensor, sin: Tensor...
  class DlinferApplyRotaryEmbBuilder (line 23) | class DlinferApplyRotaryEmbBuilder(ApplyRotaryEmbBuilder):
    method build (line 27) | def build():

FILE: lmdeploy/pytorch/backends/dlinfer/ascend/op_backend.py
  class SocVersion (line 25) | class SocVersion:
    method device_name (line 31) | def device_name(cls) -> str:
    method is_Ascend310P (line 41) | def is_Ascend310P(cls) -> bool:
    method is_Ascend910 (line 45) | def is_Ascend910(cls) -> bool:
    method soc_version (line 50) | def soc_version(cls) -> int:
    method is_A2 (line 54) | def is_A2(cls) -> bool:
    method is_A3 (line 58) | def is_A3(cls) -> bool:
  class DistMeta (line 63) | class DistMeta:
  class AscendKVQuantMeta (line 73) | class AscendKVQuantMeta:
    method set_value (line 78) | def set_value(cls, device: str, dtype: torch.dtype, record_file: str, ...
  class AscendOpsBackend (line 118) | class AscendOpsBackend(DlinferOpsBackend):
    method get_name (line 126) | def get_name() -> str:
    method get_k_block_shape (line 131) | def get_k_block_shape(
    method get_v_block_shape (line 143) | def get_v_block_shape(
    method update_step_context (line 155) | def update_step_context(cls, step_context):
    method build_graph_runner (line 432) | def build_graph_runner(model: torch.nn.Module, model_config: ModelConf...
    method init (line 441) | def init():
    method ccl_backend (line 453) | def ccl_backend():
    method device_count (line 457) | def device_count():
    method support_ray (line 462) | def support_ray():

FILE: lmdeploy/pytorch/backends/dlinfer/ascend/utils.py
  function nd_to_nz_spec (line 8) | def nd_to_nz_spec(tensor: torch.Tensor) -> torch.Tensor:

FILE: lmdeploy/pytorch/backends/dlinfer/attention.py
  class DlinferAttentionMetadata (line 12) | class DlinferAttentionMetadata(AttentionMetadata):
  class DlinferAttentionImpl (line 23) | class DlinferAttentionImpl(AttentionImpl[DlinferAttentionMetadata]):
    method __init__ (line 26) | def __init__(
    method forward (line 58) | def forward(
  class DlinferAttentionBuilder (line 150) | class DlinferAttentionBuilder(AttentionBuilder[DlinferAttentionMetadata]):
    method build (line 154) | def build(

FILE: lmdeploy/pytorch/backends/dlinfer/awq_modules.py
  class AwqLinearW4A16Impl (line 11) | class AwqLinearW4A16Impl(LinearW4A16Impl):
    method __init__ (line 14) | def __init__(self, in_features: int, out_features: int, w_bit: int, gr...
    method forward (line 20) | def forward(self,
  class AwqLinearW4A16Builder (line 33) | class AwqLinearW4A16Builder(LinearW4A16Builder):
    method build (line 37) | def build(in_features: int,

FILE: lmdeploy/pytorch/backends/dlinfer/camb/op_backend.py
  class CambOpsBackend (line 14) | class CambOpsBackend(DlinferOpsBackend):
    method get_name (line 19) | def get_name() -> str:
    method get_k_block_shape (line 24) | def get_k_block_shape(
    method get_v_block_shape (line 37) | def get_v_block_shape(
    method update_step_context (line 50) | def update_step_context(cls, step_context):
    method build_graph_runner (line 121) | def build_graph_runner(model: torch.nn.Module, model_config: ModelConf...
    method support_ray (line 128) | def support_ray():

FILE: lmdeploy/pytorch/backends/dlinfer/flash_attention.py
  class DlinferFlashAttentionImpl (line 7) | class DlinferFlashAttentionImpl(FlashAttentionImpl):
    method __init__ (line 10) | def __init__(
    method forward (line 38) | def forward(self,
  class DlinferFlashAttentionBuilder (line 71) | class DlinferFlashAttentionBuilder(FlashAttentionBuilder):
    method build (line 75) | def build(

FILE: lmdeploy/pytorch/backends/dlinfer/linear.py
  class DlinferLinearImpl (line 13) | class DlinferLinearImpl(LinearImpl):
    method update_weights (line 16) | def update_weights(self, weight: torch.Tensor, bias: Optional[torch.Te...
    method forward (line 22) | def forward(self,
  class DlinferLinearBuilder (line 37) | class DlinferLinearBuilder(LinearBuilder):
    method build (line 41) | def build(in_features: int, out_features: int, bias: bool = True, dtyp...

FILE: lmdeploy/pytorch/backends/dlinfer/maca/op_backend.py
  class MacaOpsBackend (line 14) | class MacaOpsBackend(DlinferOpsBackend):
    method get_name (line 19) | def get_name() -> str:
    method get_k_block_shape (line 24) | def get_k_block_shape(
    method get_v_block_shape (line 33) | def get_v_block_shape(
    method update_step_context (line 42) | def update_step_context(cls, step_context):
    method build_graph_runner (line 112) | def build_graph_runner(model: torch.nn.Module, model_config: ModelConf...
    method support_ray (line 119) | def support_ray():

FILE: lmdeploy/pytorch/backends/dlinfer/moe.py
  class DlinferSoftmaxTopKImpl (line 15) | class DlinferSoftmaxTopKImpl(SoftmaxTopKImpl):
    method __init__ (line 18) | def __init__(self, top_k: int, dim: int = -1, n_groups: int = -1):
    method forward (line 23) | def forward(self, x: torch.Tensor):
  class DlinferSoftmaxTopKBuilder (line 32) | class DlinferSoftmaxTopKBuilder(SoftmaxTopKBuilder):
    method build (line 36) | def build(top_k: int, dim: int = -1, n_groups: int = -1):
  class DlinferFusedMoEImpl (line 41) | class DlinferFusedMoEImpl(FusedMoEImpl):
    method __init__ (line 44) | def __init__(self,
    method update_weights (line 63) | def update_weights(self, gate_up_weights: torch.Tensor, down_weights: ...
    method ep_expert_list (line 72) | def ep_expert_list(self, world_size: int, rank: int):
    method forward (line 80) | def forward(self,
  class DlinferFusedMoEBuilder (line 102) | class DlinferFusedMoEBuilder(FusedMoEBuilder):
    method build (line 106) | def build(top_k: int,

FILE: lmdeploy/pytorch/backends/dlinfer/norm.py
  class DlinferRMSNormImpl (line 9) | class DlinferRMSNormImpl(RMSNormImpl):
    method __init__ (line 12) | def __init__(self, hidden_size: int, eps: float = 1e-6):
    method forward (line 16) | def forward(self, x: torch.Tensor, weight: torch.Tensor, residual: tor...
  class DlinferRMSNormBuilder (line 26) | class DlinferRMSNormBuilder(RMSNormBuilder):
    method build (line 30) | def build(weight: torch.Tensor, eps: float = 1e-6):

FILE: lmdeploy/pytorch/backends/dlinfer/op_backend.py
  class DlinferOpsBackend (line 14) | class DlinferOpsBackend(DefaultOpsBackend):
    method get_name (line 18) | def get_name() -> str:
    method get_layer_impl_builder (line 23) | def get_layer_impl_builder(cls, layer_type: OpType):
    method get_attention_metadata_cls (line 66) | def get_attention_metadata_cls():
    method get_k_block_shape (line 71) | def get_k_block_shape(
    method get_v_block_shape (line 84) | def get_v_block_shape(
    method update_step_context (line 97) | def update_step_context(cls, step_context):

FILE: lmdeploy/pytorch/backends/dlinfer/qmodules.py
  class DlinferLinearW8A8Impl (line 14) | class DlinferLinearW8A8Impl(LinearW8A8Impl):
    method __init__ (line 17) | def __init__(self,
    method update_weights (line 27) | def update_weights(self, weight: torch.Tensor, scale: torch.Tensor, bi...
    method forward (line 34) | def forward(self,
  class DlinferLinearW8A8Builder (line 54) | class DlinferLinearW8A8Builder(LinearW8A8Builder):
    method build (line 58) | def build(in_features: int,
  class DlinferRMSNormW8A8Impl (line 67) | class DlinferRMSNormW8A8Impl(RMSNormW8A8Impl):
    method __init__ (line 70) | def __init__(self, hidden_size: int, eps: float = 1e-6, quant_dtype: t...
    method forward (line 76) | def forward(self, x: torch.Tensor, weight: torch.Tensor, residual: tor...
  class DlinferRMSNormW8A8Builder (line 88) | class DlinferRMSNormW8A8Builder(RMSNormW8A8Builder):
    method build (line 92) | def build(hidden_size: int, eps: float = 1e-6, quant_dtype: torch.dtyp...

FILE: lmdeploy/pytorch/backends/dlinfer/rotary_embedding.py
  function _rotary_embedding_fwd (line 14) | def _rotary_embedding_fwd(position_ids: torch.Tensor,
  class DlinferRotaryEmbeddingImpl (line 41) | class DlinferRotaryEmbeddingImpl(RotaryEmbeddingImpl, nn.Module):
    method __init__ (line 44) | def __init__(self, dim: int, base: int = 10000, scaling_factor: float ...
    method forward (line 54) | def forward(self, x, position_ids):
  class DlinferLlamaDynamicNTKScalingRotaryEmbedding (line 63) | class DlinferLlamaDynamicNTKScalingRotaryEmbedding(LlamaDynamicNTKScalin...
    method __init__ (line 69) | def __init__(self, dim: int, base: int = 10000, scaling_factor: float ...
    method _ntk_inv_freq (line 77) | def _ntk_inv_freq(self, seq_len: torch.Tensor):
    method forward (line 83) | def forward(self, x: torch.Tensor, position_ids: torch.Tensor):
  class DlinferLlama3RotaryEmbeddingImpl (line 96) | class DlinferLlama3RotaryEmbeddingImpl(DlinferRotaryEmbeddingImpl):
    method __init__ (line 99) | def __init__(
  class DlinferYarnRotaryEmbeddingImpl (line 129) | class DlinferYarnRotaryEmbeddingImpl(YarnRotaryEmbeddingImpl):
    method __init__ (line 132) | def __init__(self,
    method forward (line 140) | def forward(self, x: torch.Tensor, position_ids: torch.Tensor):
  class DlinferRotaryEmbeddingBuilder (line 148) | class DlinferRotaryEmbeddingBuilder(RotaryEmbeddingBuilder):
    method build (line 152) | def build(

FILE: lmdeploy/pytorch/backends/embedding.py
  class EmbeddingImpl (line 8) | class EmbeddingImpl(ABC):
    method forward (line 12) | def forward(self, x, weight: torch.Tensor, all_reduce: bool = False, g...
  class EmbeddingBuilder (line 17) | class EmbeddingBuilder(ABC):
    method build (line 22) | def build(start_index: int, end_index: int):

FILE: lmdeploy/pytorch/backends/flash_attention.py
  class FlashAttentionImpl (line 7) | class FlashAttentionImpl(ABC):
    method forward (line 10) | def forward(self,
  class FlashAttentionBuilder (line 23) | class FlashAttentionBuilder(ABC):
    method build (line 28) | def build(

FILE: lmdeploy/pytorch/backends/gated_delta_rule.py
  class GatedDeltaRuleImpl (line 7) | class GatedDeltaRuleImpl(ABC):
    method chunk_gated_delta_rule (line 11) | def chunk_gated_delta_rule(self,
    method fused_recurrent_gated_delta_rule (line 27) | def fused_recurrent_gated_delta_rule(self,
  class GatedDeltaRuleBuilder (line 42) | class GatedDeltaRuleBuilder(ABC):
    method build (line 47) | def build() -> GatedDeltaRuleImpl:

FILE: lmdeploy/pytorch/backends/graph_runner.py
  class GraphRunnerMeta (line 13) | class GraphRunnerMeta:
  function _get_capture_batch_size_impl (line 18) | def _get_capture_batch_size_impl(max_batches: int):
  class GraphRunner (line 29) | class GraphRunner:
    method __init__ (line 32) | def __init__(self, model: torch.nn.Module, model_config: ModelConfig, ...
    method __call__ (line 42) | def __call__(self, **kwargs):
    method get_model (line 46) | def get_model(self):
    method get_logits (line 50) | def get_logits(self, hidden_states: torch.Tensor):
    method prepare_inputs_for_generation (line 56) | def prepare_inputs_for_generation(
    method update_model_metas (line 69) | def update_model_metas(
    method get_input_processor (line 85) | def get_input_processor(self):
    method reset (line 92) | def reset(self):
    method get_meta (line 96) | def get_meta(self):
    method update_inputs (line 100) | def update_inputs(self, inputs):
    method get_capture_batch_sizes (line 103) | def get_capture_batch_sizes(self) -> List[int]:

FILE: lmdeploy/pytorch/backends/linear.py
  class LinearImpl (line 9) | class LinearImpl(ABC):
    method update_weights (line 12) | def update_weights(self, weight: torch.Tensor, bias: Optional[torch.Te...
    method forward (line 17) | def forward(self,
  class LinearBuilder (line 29) | class LinearBuilder(ABC):
    method build (line 34) | def build(in_features: int, out_features: int, bias: bool = True, dtyp...

FILE: lmdeploy/pytorch/backends/lora.py
  class AdapterInfo (line 11) | class AdapterInfo:
    method __post_init__ (line 21) | def __post_init__(self):
  class LoRAImpl (line 30) | class LoRAImpl(ABC):
    method forward (line 34) | def forward(self,
  class LoRABuilder (line 47) | class LoRABuilder(ABC):
    method build (line 52) | def build():

FILE: lmdeploy/pytorch/backends/moe.py
  class SoftmaxTopKImpl (line 10) | class SoftmaxTopKImpl(ABC):
    method get_group_offsets (line 15) | def get_group_offsets(n_groups: int, group_size: int, device: str):
    method forward (line 20) | def forward(self, x: torch.Tensor):
  class SoftmaxTopKBuilder (line 25) | class SoftmaxTopKBuilder(ABC):
    method build (line 30) | def build(top_k: int, dim: int = -1, n_groups: int = -1):
  class FusedMoEImpl (line 35) | class FusedMoEImpl(ABC):
    method update_weights (line 38) | def update_weights(self, gate_up_weights: torch.Tensor, down_weights: ...
    method ep_expert_list (line 42) | def ep_expert_list(self, world_size: int, rank: int):
    method forward (line 47) | def forward(self,
  class FusedMoEBuilder (line 61) | class FusedMoEBuilder(ABC):
    method build (line 66) | def build(top_k: int,
  class FusedMoEW8A8Impl (line 78) | class FusedMoEW8A8Impl(ABC):
    method update_weights (line 81) | def update_weights(self, gate_up_weights: torch.Tensor, down_weights: ...
    method ep_expert_list (line 86) | def ep_expert_list(self, world_size: int, rank: int):
    method forward (line 91) | def forward(self,
  class FusedMoEW8A8Builder (line 105) | class FusedMoEW8A8Builder(ABC):
    method build (line 110) | def build(top_k: int,
  class FusedMoEBlockedF8Impl (line 119) | class FusedMoEBlockedF8Impl(ABC):
    method __init__ (line 122) | def __init__(self):
    method update_weights (line 125) | def update_weights(self, gate_up_weights: torch.Tensor, down_weights: ...
    method ep_expert_list (line 130) | def ep_expert_list(self, world_size: int, rank: int):
    method set_scale_fmt (line 134) | def set_scale_fmt(self, scale_fmt: Optional[str]):
    method forward (line 139) | def forward(self,
  class FusedMoEBlockedF8Builder (line 156) | class FusedMoEBlockedF8Builder(ABC):
    method build (line 161) | def build(top_k: int,

FILE: lmdeploy/pytorch/backends/moe_router.py
  class RouterNoauxTCImpl (line 8) | class RouterNoauxTCImpl(ABC):
    method forward (line 12) | def forward(self, logits: torch.Tensor, bias: torch.Tensor) -> Tuple[t...
  class RouterNoauxTCBuilder (line 17) | class RouterNoauxTCBuilder(ABC):
    method build (line 22) | def build(

FILE: lmdeploy/pytorch/backends/multinomial_sampling.py
  class MultinomialSamplingImpl (line 7) | class MultinomialSamplingImpl(ABC):
    method forward (line 11) | def forward(scores: torch.Tensor, seeds: torch.LongTensor, offsets: to...
  class MultinomialSamplingBuilder (line 16) | class MultinomialSamplingBuilder(ABC):
    method build (line 21) | def build():

FILE: lmdeploy/pytorch/backends/norm.py
  class RMSNormImpl (line 7) | class RMSNormImpl(ABC):
    method forward (line 11) | def forward(self, x: torch.Tensor, weight: torch.Tensor, residual: tor...
  class RMSNormBuilder (line 16) | class RMSNormBuilder(ABC):
    method build (line 21) | def build(hidden_size: int, eps: float = 1e-6):
  class LayerNormImpl (line 26) | class LayerNormImpl(ABC):
    method forward (line 30) | def forward(self, x: torch.Tensor, weight: torch.Tensor, bias: torch.T...
  class LayerNormBuilder (line 35) | class LayerNormBuilder(ABC):
    method build (line 40) | def build(normalized_shape: int, eps: float = 1e-6):

FILE: lmdeploy/pytorch/backends/nsa.py
  class NSAIndexMeta (line 9) | class NSAIndexMeta:
  class BaseNSAIndexFP8 (line 19) | class BaseNSAIndexFP8(ABC):
    method forward (line 22) | def forward(self, q: Tensor, k: Tensor, weights: Tensor, k_cache: Tens...
  class BaseNSAIndexFP8Builder (line 28) | class BaseNSAIndexFP8Builder:
    method build (line 32) | def build(topk: int, softmax_scale: float, block_size: int = 128, fill...

FILE: lmdeploy/pytorch/backends/qmodules.py
  class RMSNormW8A8Impl (line 8) | class RMSNormW8A8Impl(ABC):
    method create_weight (line 12) | def create_weight(hidden_size: int, dtype: torch.dtype = None, device:...
    method forward (line 22) | def forward(self, x: torch.Tensor, weight: torch.Tensor, residual: tor...
  class RMSNormW8A8Builder (line 27) | class RMSNormW8A8Builder(ABC):
    method build (line 32) | def build(hidden_size: int, eps: float = 1e-6, quant_dtype: torch.dtyp...
  class LinearW8A8Impl (line 37) | class LinearW8A8Impl(ABC):
    method update_weights (line 40) | def update_weights(self, weight: torch.Tensor, scale: torch.Tensor, bi...
    method forward (line 45) | def forward(self,
  class LinearW8A8Builder (line 56) | class LinearW8A8Builder(ABC):
    method build (line 61) | def build(in_features: int,

FILE: lmdeploy/pytorch/backends/rotary_embedding.py
  class RopeType (line 10) | class RopeType(Enum):
  class YarnParameters (line 22) | class YarnParameters:
  class LongRoPEScalingParameters (line 33) | class LongRoPEScalingParameters:
  class Llama3Parameters (line 43) | class Llama3Parameters:
  class FopeParameters (line 51) | class FopeParameters:
  class RotaryEmbeddingImpl (line 59) | class RotaryEmbeddingImpl(ABC):
    method forward (line 63) | def forward(self, x, position_ids, **kwargs):
  class RotaryEmbeddingBuilder (line 68) | class RotaryEmbeddingBuilder(ABC):
    method build (line 73) | def build(

FILE: lmdeploy/pytorch/backends/selector.py
  function _get_backend (line 5) | def _get_backend():
  function get_backend (line 28) | def get_backend(backend_type: str = None):
  function init_backend (line 39) | def init_backend(backend_type: str):

FILE: lmdeploy/pytorch/backends/token_dispatcher.py
  class TokenDispatcherImpl (line 8) | class TokenDispatcherImpl(ABC):
    method permute (line 11) | def permute(
    method unpermute (line 25) | def unpermute(
    method indices_to_multihot (line 43) | def indices_to_multihot(self, topk_ids, topk_weight, num_experts):
    method dispatch (line 65) | def dispatch(self, hidden_states: torch.Tensor, probs: torch.Tensor, t...
    method combine (line 71) | def combine(self, hidden_states: torch.Tensor) -> torch.Tensor:

FILE: lmdeploy/pytorch/block.py
  function _div_up (line 5) | def _div_up(x, n):
  function _round_up (line 10) | def _round_up(x, n):
  class LogicalTokenBlocks (line 15) | class LogicalTokenBlocks:
    method __init__ (line 19) | def __init__(self, blocks: np.ndarray = None):
    method reserve (line 29) | def reserve(self, size: int):
    method __setitem__ (line 37) | def __setitem__(self, *args, **kwargs):
    method __getitem__ (line 41) | def __getitem__(self, *args, **kwargs):
    method get_real_blocks (line 45) | def get_real_blocks(self):
    method append (line 49) | def append(self, blocks: np.ndarray):
    method __len__ (line 58) | def __len__(self):
    method resize (line 62) | def resize(self, num_blocks: int):
    method reset (line 67) | def reset(self):
    method clone (line 72) | def clone(self):

FILE: lmdeploy/pytorch/check_env/adapter.py
  class AdapterChecker (line 5) | class AdapterChecker(BaseChecker):
    method __init__ (line 8) | def __init__(self, adapter_path: str, logger=None):
    method check (line 12) | def check(self):

FILE: lmdeploy/pytorch/check_env/base.py
  function _red_text (line 11) | def _red_text(text: str):
  class BaseChecker (line 18) | class BaseChecker:
    method __init__ (line 21) | def __init__(self, logger: Logger = None):
    method get_logger (line 28) | def get_logger(self):
    method register_required_checker (line 32) | def register_required_checker(self, checker: 'BaseChecker'):
    method handle (line 36) | def handle(self):
    method log_and_exit (line 47) | def log_and_exit(self, e: Exception = None, mod_name: str = None, mess...
    method check (line 59) | def check(self):

FILE: lmdeploy/pytorch/check_env/cuda.py
  class CudaChecker (line 5) | class CudaChecker(BaseChecker):
    method __init__ (line 8) | def __init__(self, model_format: str = None, logger=None) -> None:
    method check (line 12) | def check(self):

FILE: lmdeploy/pytorch/check_env/deeplink.py
  class DeeplinkChecker (line 7) | class DeeplinkChecker(BaseChecker):
    method __init__ (line 10) | def __init__(self, device_type: str, logger=None) -> None:
    method check (line 14) | def check(self):

FILE: lmdeploy/pytorch/check_env/dist.py
  class DistChecker (line 9) | class DistChecker(BaseChecker):
    method __init__ (line 12) | def __init__(self, tp: int, dp: int, ep: int, distributed_executor_bac...
    method check (line 22) | def check(self):

FILE: lmdeploy/pytorch/check_env/model.py
  class ModelChecker (line 7) | class ModelChecker(BaseChecker):
    method __init__ (line 10) | def __init__(self, model_path: str, trust_remote_code: bool, dtype: st...
    method check_config (line 17) | def check_config(self, trans_version):
    method check_trans_version (line 31) | def check_trans_version(self, config, trans_version):
    method check_dtype (line 44) | def check_dtype(self, config):
    method check (line 72) | def check(self):

FILE: lmdeploy/pytorch/check_env/torch.py
  class TorchChecker (line 5) | class TorchChecker(BaseChecker):
    method __init__ (line 8) | def __init__(self, device: str = 'cuda', logger=None) -> None:
    method check (line 12) | def check(self):

FILE: lmdeploy/pytorch/check_env/transformers.py
  class TransformersChecker (line 10) | class TransformersChecker(BaseChecker):
    method check (line 13) | def check(self):

FILE: lmdeploy/pytorch/check_env/triton.py
  class TritonChecker (line 10) | class TritonChecker(BaseChecker):
    method check_version (line 13) | def check_version(self):
    method check (line 31) | def check(self):

FILE: lmdeploy/pytorch/check_env/triton_custom_add.py
  function _add_kernel (line 8) | def _add_kernel(A, B, C, size, BLOCK: tl.constexpr):
  function custom_add (line 17) | def custom_add(a, b):

FILE: lmdeploy/pytorch/config.py
  function _update_torch_dtype (line 16) | def _update_torch_dtype(config: 'ModelConfig', dtype: str, device_type: ...
  class BackendConfig (line 64) | class BackendConfig:
  class SchedulerConfig (line 71) | class SchedulerConfig:
  class CacheConfig (line 83) | class CacheConfig:
    method __post_init__ (line 106) | def __post_init__(self):
  class TPMode (line 113) | class TPMode(enum.Enum):
  class DistConfig (line 120) | class DistConfig:
    method __post_init__ (line 138) | def __post_init__(self):
    method get_tp_by_layer (line 183) | def get_tp_by_layer(self, layer_type: str):
    method from_engine_config (line 198) | def from_engine_config(cls, engine_config: PytorchEngineConfig):
  function _override_hf_config_dict (line 214) | def _override_hf_config_dict(hf_config: dict, key: str, hf_overrides):
  function _overide_hf_config_cfg (line 234) | def _overide_hf_config_cfg(hf_config: list, key: str, hf_overrides):
  function _override_hf_config (line 252) | def _override_hf_config(hf_config: Any, key: str, hf_overrides):
  function override_hf_config (line 260) | def override_hf_config(hf_config: Any, hf_overrides: Dict[str, Any]):
  function _default_check_env (line 266) | def _default_check_env(device: str):
  function _patch_quantization_config (line 270) | def _patch_quantization_config(hf_config: Any, model_format: str = None):
  class ModelConfig (line 300) | class ModelConfig:
    method get_head_size (line 347) | def get_head_size(self):
    method from_pretrained (line 352) | def from_pretrained(
    method from_hf_config (line 413) | def from_hf_config(
  class UnmaskingStrategy (line 459) | class UnmaskingStrategy(enum.Enum):
    method from_str (line 470) | def from_str(cls, strategy: str):
  class DLLMConfig (line 484) | class DLLMConfig:
  class MiscConfig (line 492) | class MiscConfig:
    method from_engine_config (line 505) | def from_engine_config(cls, engine_config: PytorchEngineConfig):
  class SpecDecodeConfig (line 528) | class SpecDecodeConfig:
    method from_config (line 536) | def from_config(
  class QuantizationConfig (line 574) | class QuantizationConfig:
    method from_config (line 586) | def from_config(cls, hf_config: Any):
    method get_quant_method (line 644) | def get_quant_method(self, prefix: str = ''):
    method get (line 653) | def get(self, key, default=None):

FILE: lmdeploy/pytorch/configurations/builder.py
  class AutoModelConfigBuilder (line 9) | class AutoModelConfigBuilder(ABC):
    method __init_subclass__ (line 13) | def __init_subclass__(cls) -> None:
    method register_builder (line 18) | def register_builder(cls, sub_cls):
    method condition (line 24) | def condition(cls, hf_config):
    method build (line 29) | def build(cls, hf_config, model_path: str = None, **kwargs):
    method update_num_kv_heads (line 56) | def update_num_kv_heads(cls, hf_config, tp, num_key_value_heads):

FILE: lmdeploy/pytorch/configurations/chatglm.py
  class ChatGLMModelConfigBuilder (line 7) | class ChatGLMModelConfigBuilder(AutoModelConfigBuilder):
    method condition (line 10) | def condition(cls, hf_config):
    method build (line 15) | def build(cls, hf_config, model_path: str = None, **kwargs):

FILE: lmdeploy/pytorch/configurations/cogvlm.py
  class CogVLMModelConfigBuilder (line 6) | class CogVLMModelConfigBuilder(AutoModelConfigBuilder):
    method condition (line 9) | def condition(cls, hf_config):
    method build (line 15) | def build(cls, hf_config, model_path: str = None, **kwargs):

FILE: lmdeploy/pytorch/configurations/deepseek_v2.py
  class DeepseekV2ModelConfigBuilder (line 8) | class DeepseekV2ModelConfigBuilder(AutoModelConfigBuilder):
    method condition (line 11) | def condition(cls, hf_config):
    method build (line 16) | def build(cls, hf_config, model_path: str = None, is_draft_model: bool...

FILE: lmdeploy/pytorch/configurations/deepseek_v32.py
  function _check_env_v32 (line 7) | def _check_env_v32(device: str = 'cuda'):
  class DeepseekV32ModelConfigBuilder (line 27) | class DeepseekV32ModelConfigBuilder(DeepseekV2ModelConfigBuilder):
    method condition (line 30) | def condition(cls, hf_config):
    method build (line 35) | def build(cls, hf_config, model_path: str | None = None, **kwargs):

FILE: lmdeploy/pytorch/configurations/deepseek_vl2.py
  class DeepseekVLV2ModelConfigBuilder (line 6) | class DeepseekVLV2ModelConfigBuilder(AutoModelConfigBuilder):
    method condition (line 9) | def condition(cls, hf_config):
    method build (line 14) | def build(cls, hf_config, model_path: str = None, **kwargs):

FILE: lmdeploy/pytorch/configurations/default.py
  class DefaultModelConfigBuilder (line 7) | class DefaultModelConfigBuilder(AutoModelConfigBuilder):
    method condition (line 10) | def condition(cls, hf_config):
    method build (line 15) | def build(cls, hf_config, model_path: str = None, **kwargs):

FILE: lmdeploy/pytorch/configurations/gemma.py
  class GemmaModelConfigBuilder (line 6) | class GemmaModelConfigBuilder(AutoModelConfigBuilder):
    method condition (line 9) | def condition(cls, hf_config):
    method build (line 14) | def build(cls, hf_config, model_path: str = None, **kwargs):
  class GemmaVLModelConfigBuilder (line 21) | class GemmaVLModelConfigBuilder(AutoModelConfigBuilder):
    method condition (line 24) | def condition(cls, hf_config):
    method build (line 30) | def build(cls, hf_config, model_path: str = None, **kwargs):

FILE: lmdeploy/pytorch/configurations/glm4.py
  class Glm4MoeLiteModelConfigBuilder (line 6) | class Glm4MoeLiteModelConfigBuilder(DeepseekV2ModelConfigBuilder):
    method condition (line 9) | def condition(cls, hf_config):
    method build (line 14) | def build(cls, hf_config, model_path: str = None, is_draft_model: bool...
  class Glm4MoeModelConfigBuilder (line 28) | class Glm4MoeModelConfigBuilder(DefaultModelConfigBuilder):
    method condition (line 31) | def condition(cls, hf_config):
    method build (line 36) | def build(cls, hf_config, model_path: str = None, is_draft_model: bool...

FILE: lmdeploy/pytorch/configurations/gpt_oss.py
  class GptOSSModelConfigBuilder (line 6) | class GptOSSModelConfigBuilder(AutoModelConfigBuilder):
    method condition (line 9) | def condition(cls, hf_config):
    method build (line 14) | def build(cls, hf_config, model_path: str = None, **kwargs):

FILE: lmdeploy/pytorch/configurations/interns1_pro.py
  class InterS1ProModelConfigBuilder (line 6) | class InterS1ProModelConfigBuilder(AutoModelConfigBuilder):
    method condition (line 9) | def condition(cls, hf_config):
    method build (line 14) | def build(cls, hf_config, model_path: str = None, **kwargs):

FILE: lmdeploy/pytorch/configurations/internvl.py
  class InternVLModelConfigBuilder (line 6) | class InternVLModelConfigBuilder(AutoModelConfigBuilder):
    method condition (line 9) | def condition(cls, hf_config):
    method build (line 14) | def build(cls, hf_config, model_path: str = None, **kwargs):

FILE: lmdeploy/pytorch/configurations/internvl3_hf.py
  class InternVL3ModelConfigBuilder (line 6) | class InternVL3ModelConfigBuilder(AutoModelConfigBuilder):
    method condition (line 9) | def condition(cls, hf_config):
    method build (line 14) | def build(cls, hf_config, model_path: str = None, **kwargs):

FILE: lmdeploy/pytorch/configurations/llama.py
  class LlamaModelConfigBuilder (line 6) | class LlamaModelConfigBuilder(AutoModelConfigBuilder):
    method condition (line 9) | def condition(cls, hf_config):
    method build (line 14) | def build(cls, hf_config, model_path: str = None, is_draft_model: bool...

FILE: lmdeploy/pytorch/configurations/llama4.py
  class Llama4ModelConfigBuilder (line 6) | class Llama4ModelConfigBuilder(AutoModelConfigBuilder):
    method condition (line 9) | def condition(cls, hf_config):
    method build (line 14) | def build(cls, hf_config, model_path: str = None, **kwargs):

FILE: lmdeploy/pytorch/configurations/llava_hf.py
  class LlavaHfModelConfigBuilder (line 7) | class LlavaHfModelConfigBuilder(AutoModelConfigBuilder):
    method condition (line 10) | def condition(cls, hf_config):
    method build (line 15) | def build(cls, hf_config, model_path: str = None, **kwargs):

FILE: lmdeploy/pytorch/configurations/minicpm3.py
  class MiniCPM3ModelConfigBuilder (line 7) | class MiniCPM3ModelConfigBuilder(AutoModelConfigBuilder):
    method condition (line 10) | def condition(cls, hf_config):
    method build (line 15) | def build(cls, hf_config, model_path: str = None, **kwargs):

FILE: lmdeploy/pytorch/configurations/qwen.py
  class QwenModelConfigBuilder (line 6) | class QwenModelConfigBuilder(AutoModelConfigBuilder):
    method condition (line 9) | def condition(cls, hf_config):
    method build (line 14) | def build(cls, hf_config, model_path: str = None, **kwargs):

FILE: lmdeploy/pytorch/configurations/qwen3_5.py
  class Qwen3_5ModelConfigBuilder (line 11) | class Qwen3_5ModelConfigBuilder(AutoModelConfigBuilder):
    method condition (line 14) | def condition(cls, hf_config):
    method build (line 19) | def build(cls, hf_config, model_path: str = None, tp: int = 1, **kwargs):

FILE: lmdeploy/pytorch/configurations/qwen3_next.py
  function _check_env_qwen3_next (line 8) | def _check_env_qwen3_next(device: str):
  class Qwen3NextModelConfigBuilder (line 19) | class Qwen3NextModelConfigBuilder(AutoModelConfigBuilder):
    method condition (line 22) | def condition(cls, hf_config):
    method build (line 27) | def build(cls, hf_config, model_path: str = None, tp: int = 1, **kwargs):

FILE: lmdeploy/pytorch/configurations/qwen3_vl.py
  class Qwen3VLModelConfigBuilder (line 6) | class Qwen3VLModelConfigBuilder(AutoModelConfigBuilder):
    method condition (line 9) | def condition(cls, hf_config):
    method build (line 14) | def build(cls, hf_config, model_path: str = None, **kwargs):

FILE: lmdeploy/pytorch/configurations/sdar.py
  class SDARModelConfigBuilder (line 5) | class SDARModelConfigBuilder(AutoModelConfigBuilder):
    method condition (line 8) | def condition(cls, hf_config):
    method build (line 13) | def build(cls, hf_config, model_path: str = None, **kwargs):

FILE: lmdeploy/pytorch/configurations/utils.py
  function flash_mla_available (line 9) | def flash_mla_available():
  function flash_attn_v3_available (line 26) | def flash_attn_v3_available():

FILE: lmdeploy/pytorch/devices/device_manager.py
  class DeviceContext (line 9) | class DeviceContext:
  class DeviceManager (line 17) | class DeviceManager(CtxMgrBase[DeviceContext]):
    method __init__ (line 19) | def __init__(self):
    method register_context_callback (line 24) | def register_context_callback(self, callback: Callable):
    method unregister_context_callback (line 31) | def unregister_context_callback(self, handle: int):
  function get_device_manager (line 36) | def get_device_manager():

FILE: lmdeploy/pytorch/disagg/backend/base.py
  class MigrationBackendImpl (line 9) | class MigrationBackendImpl:
    method p2p_initialize (line 12) | def p2p_initialize(self, init_request: DistServeInitRequest):
    method register_memory_region (line 16) | def register_memory_region(self, register_mr_request: DistServeRegiste...
    method endpoint_info (line 20) | def endpoint_info(self, remote_engine_id: str, protocol: MigrationProt...
    method p2p_connect (line 24) | def p2p_connect(self, remote_engine_id: str, conn_req: DistServeKVTran...
    method p2p_migrate (line 28) | def p2p_migrate(self, assignment: MigrationAssignment, async_op: bool ...
    method store (line 32) | def store(self, assignment: MigrationAssignment, async_op: bool = False):
    method load (line 36) | def load(self, assignment: MigrationAssignment, async_op: bool = False):

FILE: lmdeploy/pytorch/disagg/backend/dlslime.py
  class DLSlimeMigrationManagement (line 22) | class DLSlimeMigrationManagement:
    method __init__ (line 24) | def __init__(self, init_request: DistServeInitRequest):
    method register_memory_region (line 46) | def register_memory_region(self, register_mr_request: DistServeRegiste...
    method connect (line 54) | def connect(self, kvtransfer_endpoint_info: DistServeKVTransferEndpoin...
    method p2p_migrate (line 57) | async def p2p_migrate(self, assignment: MigrationAssignment):
  class DLSlimeBackend (line 75) | class DLSlimeBackend(MigrationBackendImpl):
    method __init__ (line 78) | def __init__(self):
    method p2p_initialize (line 81) | def p2p_initialize(self, init_request: DistServeInitRequest):
    method register_memory_region (line 84) | def register_memory_region(self, register_mr_request: DistServeRegiste...
    method endpoint_info (line 87) | def endpoint_info(self, remote_engine_id: str, protocol: MigrationProt...
    method p2p_connect (line 90) | def p2p_connect(self, remote_engine_id: str, conn_req: DistServeKVTran...
    method p2p_migrate (line 93) | async def p2p_migrate(self, assignment: MigrationAssignment, async_op:...
    method store (line 96) | def store(self, assignment: MigrationAssignment, async_op: bool = False):
    method load (line 99) | def load(self, assignment: MigrationAssignment, async_op: bool = False):

FILE: lmdeploy/pytorch/disagg/backend/mooncake.py
  function get_rdma_nics (line 22) | def get_rdma_nics():
  function get_local_ip_by_remote (line 48) | def get_local_ip_by_remote() -> str:
  class MooncakeMigrationManagement (line 68) | class MooncakeMigrationManagement:
    method __init__ (line 71) | def __init__(self, init_request: DistServeInitRequest):
    method _initialize_p2p (line 100) | def _initialize_p2p(self, init_request: DistServeInitRequest):
    method register_memory_region (line 123) | def register_memory_region(self, register_mr_request: DistServeRegiste...
    method endpoint_info (line 145) | def endpoint_info(self) -> Dict:
    method connect (line 164) | def connect(self, connect_request: DistServeKVTransferEndpointInfo):
    method p2p_migrate (line 178) | async def p2p_migrate(self, assignment: MigrationAssignment, async_op:...
    method _migrate (line 195) | def _migrate(self, assignment: MigrationAssignment):
  class MooncakeBackend (line 236) | class MooncakeBackend(MigrationBackendImpl):
    method __init__ (line 239) | def __init__(self):
    method p2p_initialize (line 242) | def p2p_initialize(self, init_request: DistServeInitRequest):
    method register_memory_region (line 245) | def register_memory_region(self, register_mr_request: DistServeRegiste...
    method endpoint_info (line 248) | def endpoint_info(self, remote_engine_id: int, protocol: MigrationProt...
    method p2p_connect (line 251) | def p2p_connect(self, remote_engine_id: str, connect_request: DistServ...
    method p2p_migrate (line 254) | async def p2p_migrate(self, assignment: MigrationAssignment, async_op:...
    method store (line 257) | def store(self, assignment: MigrationAssignment, async_op: bool = False):
    method load (line 260) | def load(self, assignment: MigrationAssignment, async_op: bool = False):

FILE: lmdeploy/pytorch/disagg/config.py
  class ServingStrategy (line 8) | class ServingStrategy(enum.Enum):
  class EngineRole (line 22) | class EngineRole(enum.Enum):
  class MigrationBackend (line 40) | class MigrationBackend(enum.Enum):
  class RDMALinkType (line 47) | class RDMALinkType(enum.Enum):
  class DistServeRDMAConfig (line 54) | class DistServeRDMAConfig(BaseModel):
  class DistServeTCPConfig (line 72) | class DistServeTCPConfig(BaseModel):
  class DistServeNVLinkConfig (line 76) | class DistServeNVLinkConfig(BaseModel):
  class DistServeEngineConfig (line 80) | class DistServeEngineConfig(BaseModel):
  class MooncakeEngineConfig (line 112) | class MooncakeEngineConfig(DistServeEngineConfig):

FILE: lmdeploy/pytorch/disagg/conn/engine_conn.py
  class EngineP2PConnection (line 24) | class EngineP2PConnection:
    method __init__ (line 26) | def __init__(self, engine: 'Engine'):
    method p2p_initialize (line 34) | def p2p_initialize(self, init_request: DistServeInitRequest):
    method p2p_connect (line 54) | def p2p_connect(self, conn_request: DistServeConnectionRequest):
    method p2p_drop_connect (line 62) | def p2p_drop_connect(self, drop_conn_request: DistServeDropConnectionR...
    method zmq_send (line 67) | async def zmq_send(self, remote_engine_id: str, remote_session_id: int):
    method handle_zmq_recv (line 71) | async def handle_zmq_recv(self, remote_engine_id: str):
    method zmq_disconnect (line 83) | async def zmq_disconnect(self, remote_engine_id: str):

FILE: lmdeploy/pytorch/disagg/conn/protocol.py
  class MigrationProtocol (line 11) | class MigrationProtocol(enum.Enum):
  class DistServeConnectionStatus (line 27) | class DistServeConnectionStatus(enum.Enum):
  class DistServeInitRequest (line 33) | class DistServeInitRequest(BaseModel):
  class DistServeEngineEndpointInfo (line 49) | class DistServeEngineEndpointInfo(BaseModel):
  class DistServeKVTransferEndpointInfo (line 53) | class DistServeKVTransferEndpointInfo(BaseModel):
  class DistServeInitResponse (line 58) | class DistServeInitResponse(BaseModel):
  class DistServeConnectionRequest (line 69) | class DistServeConnectionRequest(BaseModel):
  class DistServeConnectionResponse (line 76) | class DistServeConnectionResponse(BaseModel):
  class MigrationRequest (line 80) | class MigrationRequest(BaseModel):
  class DistServeCacheFreeRequest (line 91) | class DistServeCacheFreeRequest(BaseModel):
  class DistServeDropConnectionRequest (line 96) | class DistServeDropConnectionRequest(BaseModel):

FILE: lmdeploy/pytorch/disagg/conn/proxy_conn.py
  class PDConnectionStatus (line 23) | class PDConnectionStatus(enum.Enum):
  class PDConnectionState (line 29) | class PDConnectionState:
    method __init__ (line 32) | def __init__(self, status: PDConnectionStatus, event: asyncio.Event):
    method wait (line 36) | async def wait(self):
    method set_status (line 39) | def set_status(self, status: PDConnectionStatus):
  function get_server_api (line 43) | def get_server_api(url: str, api: str):
  class PDConnectionPool (line 47) | class PDConnectionPool:
    method __init__ (line 65) | def __init__(self):
    method reg_instance (line 94) | def reg_instance(self, role: EngineRole, endpoint: str):
    method dereg_instance (line 102) | def dereg_instance(self, endpoint: str):
    method shelf_prefill_session (line 115) | def shelf_prefill_session(self, conn_key: Tuple[str, str], session_id:...
    method unshelf_prefill_session (line 118) | def unshelf_prefill_session(self, conn_key: Tuple[str, str], session_i...
    method connect (line 121) | async def connect(self, conn_req: PDConnectionMessage):
    method is_connected (line 261) | def is_connected(self, p_url: str, d_url: str):
    method drop (line 267) | def drop(self, pd_key: Tuple[str, str]):

FILE: lmdeploy/pytorch/disagg/messages.py
  class MigrationExecutionBatch (line 10) | class MigrationExecutionBatch(BaseModel):
  class AssignmentInstruct (line 17) | class AssignmentInstruct(BaseModel):
  class MigrationAssignment (line 25) | class MigrationAssignment(BaseModel):
  class PDConnectionMessage (line 32) | class PDConnectionMessage(BaseModel):
  class DistServeRegisterMRMessage (line 41) | class DistServeRegisterMRMessage(BaseModel):

FILE: lmdeploy/pytorch/distributed.py
  class DistGroup (line 16) | class DistGroup:
    method close (line 25) | def close(self):
  function _build_tp_group_impl (line 39) | def _build_tp_group_impl(tp: int,
  function _build_attn_tp_group (line 89) | def _build_attn_tp_group(context: 'DistContext',
  function _build_mlp_tp_group (line 114) | def _build_mlp_tp_group(context: 'DistContext',
  function _build_moe_tp_group (line 144) | def _build_moe_tp_group(context: 'DistContext',
  function _build_tp_group (line 179) | def _build_tp_group(context: 'DistContext', timeout: timedelta, cpu_back...
  class DistContext (line 188) | class DistContext:
    method _build_ep_group (line 204) | def _build_ep_group(cls, context: 'DistContext', timeout: timedelta, c...
    method build (line 228) | def build(cls, rank: int = 0, dist_config: DistConfig = None, ccl_back...
    method close (line 261) | def close(self):
  class DistManager (line 281) | class DistManager(CtxMgrBase[DistContext]):
    method __init__ (line 284) | def __init__(self):
    method current_config (line 287) | def current_config(self) -> DistConfig:
  function get_dist_manager (line 292) | def get_dist_manager():
  function get_world_rank (line 297) | def get_world_rank():
  function get_tp_world_rank (line 306) | def get_tp_world_rank(layer_type: Optional[str] = None):
  function get_dp_world_rank (line 320) | def get_dp_world_rank():
  function get_ep_world_rank (line 325) | def get_ep_world_rank():
  function _check_group_device (line 330) | def _check_group_device(device: str):
  function get_process_group (line 336) | def get_process_group(device: str = None):
  function get_dist_group (line 341) | def get_dist_group(layer_type: str = 'attn'):
  function get_tp_group (line 355) | def get_tp_group(device: str = 'gpu', layer_type: str = 'attn'):
  function get_group (line 369) | def get_group(group_type: str, device: str):
  function all_reduce (line 379) | def all_reduce(tensor, op=ReduceOp.SUM, group='tp', async_op=False):
  function broadcast (line 386) | def broadcast(tensor, src, group='tp', async_op=False):
  function all_gather_object (line 393) | def all_gather_object(object_list, obj, group='tp'):
  function all_gather (line 399) | def all_gather(tensor_list, tensor, group='tp', async_op=False):
  function all_gather_into_tensor (line 405) | def all_gather_into_tensor(output_tensor, input_tensor, group='tp', asyn...
  function reduce_scatter (line 411) | def reduce_scatter(output, input_list, op=ReduceOp.SUM, group='tp', asyn...
  function gather_by_tp_sizes (line 418) | def gather_by_tp_sizes(x: torch.Tensor,
  function reduce_scatter_by_tp_sizes (line 433) | def reduce_scatter_by_tp_sizes(out: torch.Tensor, rank: int, tp_sizes: L...

FILE: lmdeploy/pytorch/engine/base.py
  class EngineBase (line 6) | class EngineBase:
    method close (line 8) | def close(self) -> None:
    method start_loop (line 12) | def start_loop(self) -> None:
    method end_session (line 15) | def end_session(self, session_id: int):
    method p2p_initialize (line 19) | def p2p_initialize(self, conn_request: DistServeInitRequest):
    method p2p_connect (line 23) | def p2p_connect(self, conn_request: DistServeConnectionRequest):
    method p2p_drop_connect (line 27) | def p2p_drop_connect(self, drop_conn_request: DistServeDropConnectionR...
    method create_instance (line 35) | def create_instance(self, cuda_stream_id=0):
  class EngineInstanceBase (line 40) | class EngineInstanceBase:
    method async_end (line 42) | async def async_end(self, session_id: int):
    method async_cancel (line 46) | async def async_cancel(self, session_id: int):
    method async_stream_infer (line 50) | async def async_stream_infer(self, *args, **kwargs):

FILE: lmdeploy/pytorch/engine/cache_engine.py
  function round_up (line 25) | def round_up(x: int, alignment: int) -> int:
  class CacheDesc (line 31) | class CacheDesc:
    method __post_init__ (line 37) | def __post_init__(self):
  function _get_kv_cache_dtype (line 43) | def _get_kv_cache_dtype(model_config: ModelConfig):
  class CacheEngine (line 54) | class CacheEngine:
    method __init__ (line 67) | def __init__(
    method cpu_cache (line 113) | def cpu_cache(self):
    method gpu_cache (line 118) | def gpu_cache(self):
    method num_gpu_blocks (line 123) | def num_gpu_blocks(self):
    method num_cpu_blocks (line 128) | def num_cpu_blocks(self):
    method _get_key_block_shape_impl (line 133) | def _get_key_block_shape_impl(cls,
    method _get_value_block_shape_impl (line 160) | def _get_value_block_shape_impl(cls,
    method get_k_cache_desc (line 189) | def get_k_cache_desc(cls, model_config: ModelConfig, cache_config: Cac...
    method get_v_cache_desc (line 208) | def get_v_cache_desc(cls, model_config: ModelConfig, cache_config: Cac...
    method get_quant_cache_descs (line 227) | def get_quant_cache_descs(cls, k_cache_desc: CacheDesc, v_cache_desc: ...
    method get_custom_cache_descs (line 241) | def get_custom_cache_descs(cls, model_config: ModelConfig, cache_confi...
    method allocate_caches (line 256) | def allocate_caches(cls, num_blocks: int, model_config: ModelConfig, c...
    method allocate_gpu_cache (line 286) | def allocate_gpu_cache(self):
    method allocate_cpu_cache (line 299) | def allocate_cpu_cache(self):
    method get_custom_cache_shape_impl (line 313) | def get_custom_cache_shape_impl(num_layers: int, num_blocks: int, bloc...
    method _allocate_single_custom_cache (line 318) | def _allocate_single_custom_cache(shape: Sequence[int], dtype: torch.d...
    method allocate_custom_cache (line 322) | def allocate_custom_cache(self, device: str):
    method _swap (line 338) | def _swap(self, src: List[torch.Tensor], dst: List[torch.Tensor], src_...
    method swap_in (line 360) | def swap_in(self, src_to_dst: Dict[int, int]) -> None:
    method swap_out (line 368) | def swap_out(self, src_to_dst: Dict[int, int]) -> None:
    method get_cache_block_size (line 377) | def get_cache_block_size(cls, cache_config: CacheConfig, model_config:...
    method p2p_initialize (line 399) | def p2p_initialize(self, migration_init_request: DistServeInitRequest)...
    method p2p_connect (line 420) | def p2p_connect(self, remote_engine_id: str, migration_conn_request: L...
    method migrate (line 423) | async def migrate(self, migration_execution_inputs: MigrationExecution...
  class StateCacheEngine (line 459) | class StateCacheEngine:
    method __init__ (line 462) | def __init__(self, cache_config: CacheConfig):
    method allocate_caches (line 469) | def allocate_caches(num_caches: int, state_shapes: List[Tuple[Tuple[in...
    method get_cache_state_size (line 495) | def get_cache_state_size(state_shapes: List[Tuple[Tuple[int], torch.dt...
    method state_caches (line 508) | def state_caches(self):
    method init_caches (line 512) | def init_caches(self, idx: torch.Tensor, mask: torch.Tensor):

FILE: lmdeploy/pytorch/engine/config_builder.py
  class ConfigBuilder (line 11) | class ConfigBuilder:
    method update_engine_config (line 14) | def update_engine_config(engine_config: PytorchEngineConfig):
    method build_scheduler_config (line 46) | def build_scheduler_config(engine_config: PytorchEngineConfig):
    method build_cache_config (line 54) | def build_cache_config(engine_config: PytorchEngineConfig):
    method build_backend_config (line 73) | def build_backend_config(engine_config: PytorchEngineConfig):
    method build_dist_config (line 82) | def build_dist_config(engine_config: PytorchEngineConfig):
    method build_misc_config (line 88) | def build_misc_config(engine_config: PytorchEngineConfig):
    method build_specdecode_config (line 94) | def build_specdecode_config(target_model, speculative_config: Speculat...

FILE: lmdeploy/pytorch/engine/engine.py
  class InferOutput (line 35) | class InferOutput:
  function _build_seq_meta (line 57) | def _build_seq_meta(cache_config: CacheConfig, seq_strategy: Any, sampli...
  function response_reqs (line 64) | def response_reqs(req_manager: RequestManager,
  class Engine (line 78) | class Engine(EngineBase):
    method __init__ (line 87) | def __init__(
    method from_pretrained (line 191) | def from_pretrained(cls,
    method _download_adapters (line 232) | def _download_adapters(self, adapters: Dict[str, str], engine_config: ...
    method _build_adapter_manager (line 246) | def _build_adapter_manager(self, adapters):
    method _bind_request_manager (line 249) | def _bind_request_manager(self):
    method _response (line 258) | def _response(self, resp: Response, resp_type: ResponseType, data: Any...
    method _get_max_session_len (line 262) | def _get_max_session_len(self):
    method _on_add_session (line 277) | def _on_add_session(self, reqs: List[Request], **kwargs):
    method _on_stop_session (line 289) | def _on_stop_session(self, reqs: List[Request], **kwargs):
    method _on_end_session (line 308) | def _on_end_session(self, reqs: List[Request], **kwargs):
    method _on_add_message (line 324) | def _on_add_message(self, reqs: List[Request], **kwargs):
    method _add_message (line 362) | def _add_message(self, reqs: List[Request]):
    method model_config (line 416) | def model_config(self) -> ModelConfig:
    method p2p_initialize (line 420) | def p2p_initialize(self, init_request: DistServeInitRequest):
    method p2p_connect (line 423) | def p2p_connect(self, conn_request: DistServeConnectionRequest):
    method p2p_drop_connect (line 426) | def p2p_drop_connect(self, drop_conn_request: DistServeDropConnectionR...
    method _loop_finally (line 429) | def _loop_finally(self):
    method update_params (line 435) | def update_params(self, request: Any):
    method sleep (line 439) | def sleep(self, level: int = 1):
    method wakeup (line 443) | def wakeup(self, tags: Optional[List[str]] = None):
    method async_loop (line 447) | async def async_loop(self):
    method close (line 475) | def close(self):
    method start (line 486) | def start(self):
    method stop (line 493) | def stop(self):
    method wait_tasks (line 498) | async def wait_tasks(self):
    method create_instance (line 511) | def create_instance(self, cuda_stream_id=0):
    method start_loop (line 522) | def start_loop(self):
    method end_session (line 526) | def end_session(self, session_id: int):
    method get_engine_config (line 533) | def get_engine_config(self):
    method get_schedule_metrics (line 536) | def get_schedule_metrics(self):

FILE: lmdeploy/pytorch/engine/engine_checker.py
  class EngineChecker (line 12) | class EngineChecker(BaseChecker):
    method __init__ (line 15) | def __init__(self,
    method check (line 77) | def check(self):
    method _handle_impl (line 100) | def _handle_impl(self):
    method handle (line 103) | def handle(self):

FILE: lmdeploy/pytorch/engine/engine_instance.py
  function _check_resp (line 17) | def _check_resp(resp: Response, state: ResponseType, warning_msg: str = ...
  function _check_resp_success (line 27) | def _check_resp_success(resp: Response, warning_msg: str = None):
  function async_try_add_session (line 32) | async def async_try_add_session(req_sender: RequestSender, session_id: i...
  function async_cancel (line 43) | async def async_cancel(req_sender: RequestSender, session_id: int):
  function try_add_session (line 50) | def try_add_session(req_sender: RequestSender, session_id: int):
  function end (line 61) | def end(req_sender: RequestSender, session_id: int):
  function cancel (line 67) | def cancel(req_sender: RequestSender, session_id: int):
  class EngineInstance (line 75) | class EngineInstance(EngineInstanceBase):
    method __init__ (line 82) | def __init__(self, engine: Engine):
    method __del__ (line 90) | def __del__(self):
    method _get_extra_outputs (line 94) | def _get_extra_outputs(self, resp: Response):
    method _async_try_add_session (line 110) | async def _async_try_add_session(self, session_id: int):
    method _try_add_session (line 118) | def _try_add_session(self, session_id: int):
    method async_stream_infer (line 126) | async def async_stream_infer(self,
    method async_infer (line 211) | async def async_infer(self,
    method stream_infer (line 240) | def stream_infer(self,
    method infer (line 277) | def infer(self,
    method async_end (line 298) | async def async_end(self, session_id: int):
    method end (line 302) | def end(self, session_id: int):
    method async_cancel (line 306) | async def async_cancel(self, session_id: int):
    method cancel (line 310) | def cancel(self, session_id: int):

FILE: lmdeploy/pytorch/engine/engine_loop.py
  class CounterEvent (line 37) | class CounterEvent(asyncio.Event):
    method __init__ (line 39) | def __init__(self):
    method set (line 43) | def set(self):
    method clear (line 49) | def clear(self):
  class RunableEventAsync (line 55) | class RunableEventAsync:
    method __init__ (line 58) | def __init__(self, scheduler: 'Scheduler'):
    method wait (line 62) | async def wait(self):
    method set (line 66) | def set(self):
  function build_runable_event (line 74) | def build_runable_event(scheduler: 'Scheduler'):
  class EngineLoopConfig (line 80) | class EngineLoopConfig:
    method from_engine (line 91) | def from_engine(engine: 'Engine'):
  class EngineLoop (line 106) | class EngineLoop:
    method __init__ (line 109) | def __init__(self,
    method preprocess_loop (line 137) | async def preprocess_loop(self):
    method _log_resps (line 144) | def _log_resps(outputs: List[InferOutput]):
    method _send_resp (line 151) | def _send_resp(self, out: InferOutput):
    method _update_logprobs (line 169) | def _update_logprobs(step_outputs: List[InferOutput]):
    method _send_resps (line 186) | def _send_resps(self, step_outputs: List[InferOutput]):
    method send_response_loop (line 198) | async def send_response_loop(self):
    method _make_infer_outputs (line 212) | def _make_infer_outputs(
    method _main_loop_try_send_next_inputs (line 301) | async def _main_loop_try_send_next_inputs(self):
    method _main_loop_get_outputs (line 310) | async def _main_loop_get_outputs(
    method main_loop (line 332) | async def main_loop(self):
    method update_running_migration (line 365) | def update_running_migration(self, running: 'SeqList', next_token_ids:...
    method _migration_loop_migrate (line 382) | async def _migration_loop_migrate(self, migration_ready: 'SeqList'):
    method _migration_loop_get_outputs (line 410) | async def _migration_loop_get_outputs(self, migration_ready: 'SeqList'):
    method _migration_loop_process_ready (line 431) | async def _migration_loop_process_ready(self, migration_ready: 'SeqLis...
    method migration_loop (line 440) | async def migration_loop(self):
    method start (line 453) | def start(self, event_loop: asyncio.AbstractEventLoop):
    method wait_tasks (line 473) | async def wait_tasks(self):
    method stop (line 494) | def stop(self):
    method cancel (line 503) | def cancel(self):
  function build_engine_loop (line 511) | def build_engine_loop(engine: 'Engine'):

FILE: lmdeploy/pytorch/engine/executor/__init__.py
  function get_distributed_executor_backend (line 12) | def get_distributed_executor_backend(world_size: int, dp: int, device_ty...
  function build_executor (line 56) | def build_executor(

FILE: lmdeploy/pytorch/engine/executor/base.py
  class ExecutorBase (line 16) | class ExecutorBase:
    method __init__ (line 19) | def __init__(self,
    method download_models (line 45) | def download_models(self):
    method build_model (line 49) | def build_model(self):
    method gather_free_mem (line 53) | def gather_free_mem(self):
    method set_cache_config (line 57) | def set_cache_config(self, cache_config: CacheConfig, spec_cache_confi...
    method set_model_config (line 61) | def set_model_config(self, model_config: ModelConfig, spec_model_confi...
    method build_graph_runner (line 65) | def build_graph_runner(self):
    method build_cache_engine (line 69) | def build_cache_engine(self):
    method warmup (line 73) | def warmup(self):
    method sleep (line 77) | async def sleep(self, level: int = 1):
    method wakeup (line 81) | def wakeup(self, tags: Optional[List[str]] = None):
    method update_params (line 85) | def update_params(self, request: Any):
    method get_input_processor (line 89) | def get_input_processor(self):
    method start (line 93) | def start(self, forward_event: asyncio.Event):
    method wait_tasks (line 97) | async def wait_tasks(self):
    method stop (line 101) | def stop(self):
    method release (line 105) | def release(self):
    method forward_async (line 109) | async def forward_async(self, inputs):
    method get_output_async (line 113) | async def get_output_async(self):
    method p2p_initialize (line 119) | def p2p_initialize(self, remote_engine_config: DistServeInitRequest):
    method p2p_connect (line 123) | def p2p_connect(self, conn_request: List[DistServeKVTransferEndpointIn...
    method migrate (line 127) | async def migrate(self, batch: MigrationExecutionBatch):
    method _get_runtime_size (line 133) | def _get_runtime_size(self, num_free_gpu_mem: int, cache_block_size: i...
    method _adjust_block_size (line 148) | def _adjust_block_size(self):
    method _get_state_cache_mem (line 161) | def _get_state_cache_mem(self):
    method update_configs (line 185) | def update_configs(self):
    method init (line 241) | def init(self):
    method remote_log (line 258) | def remote_log(self, msg: str):

FILE: lmdeploy/pytorch/engine/executor/base_worker.py
  class WorkerWrapperBase (line 20) | class WorkerWrapperBase:
    method __init__ (line 23) | def __init__(
    method init_process_group (line 57) | def init_process_group(self, rank: int, master_addr: str = None, maste...
    method pack_output (line 69) | def pack_output(self, output: Dict):
    method get_outputs (line 73) | async def get_outputs(self):
    method build_model (line 77) | def build_model(self):
    method get_free_mem (line 94) | def get_free_mem(self):
    method set_cache_config (line 98) | def set_cache_config(self, cache_config: CacheConfig, spec_cache_confi...
    method set_model_config (line 102) | def set_model_config(self, model_config: ModelConfig, spec_model_confi...
    method build_graph_runner (line 106) | def build_graph_runner(self):
    method build_cache_engine (line 110) | def build_cache_engine(self):
    method update_params (line 114) | def update_params(self, request: Any):
    method warmup (line 118) | def warmup(self):
    method sleep (line 122) | async def sleep(self, level: int = 1):
    method wakeup (line 126) | def wakeup(self, tags: Optional[List[str]] = None):
    method get_input_processor (line 130) | def get_input_processor(self):
    method start (line 134) | def start(self):
    method wait_tasks (line 139) | async def wait_tasks(self):
    method stop (line 152) | def stop(self):
    method stop_async (line 156) | async def stop_async(self):
    method forward_async (line 159) | async def forward_async(self, inputs):
    method get_output_async (line 163) | async def get_output_async(self):
    method release (line 169) | def release(self):
    method p2p_initialize (line 175) | def p2p_initialize(self, init_request: DistServeInitRequest):
    method p2p_connect (line 178) | def p2p_connect(self, remote_engine_id: str, conn_request: List[DistSe...
    method migrate (line 181) | async def migrate(self, inputs: MigrationExecutionBatch):

FILE: lmdeploy/pytorch/engine/executor/dist_utils.py
  function find_available_port (line 11) | def find_available_port() -> bool:
  function setup_master_addr (line 20) | def setup_master_addr(addr: str, port: str):
  function init_dist_environ (line 32) | def init_dist_environ(rank: int, world_size: int):
  function init_process_group (line 38) | def init_process_group(rank: int, world_size: int):

FILE: lmdeploy/pytorch/engine/executor/mp_executor.py
  function get_num_packages (line 37) | def get_num_packages(data_size):
  class Notifier (line 42) | class Notifier:
    method __init__ (line 44) | def __init__(self, num_receiver: int, mp_ctx: SpawnContext):
    method _update_event_id (line 49) | def _update_event_id(self):
    method set (line 52) | def set(self):
    method set_async (line 60) | async def set_async(self):
    method wait (line 71) | def wait(self):
    method wait_async (line 80) | async def wait_async(self):
    method close (line 89) | def close(self):
  class SharedBuffer (line 95) | class SharedBuffer:
    method __init__ (line 98) | def __init__(self, proc_id: int, notifier: Notifier, name: str = None):
    method acquire_buf (line 117) | def acquire_buf(self):
    method name (line 125) | def name(self):
    method pack_data (line 128) | def pack_data(self, data, receiver_mask):
    method send (line 144) | def send(self, data, receiver_mask: int = 0xff):
    method send_async (line 149) | async def send_async(self, data, receiver_mask: int = 0xff):
    method _receive_step0 (line 154) | def _receive_step0(self):
    method _receive_step1 (line 170) | def _receive_step1(self, dumped_data, is_receiver, remain_size):
    method receive (line 185) | def receive(self):
    method receive_async (line 191) | async def receive_async(self):
    method close (line 197) | def close(self):
  class MPExecutor (line 207) | class MPExecutor(ExecutorBase):
    method setup_master_addr (line 211) | def setup_master_addr(cls):
    method __init__ (line 220) | def __init__(self,
    method collective_rpc (line 286) | def collective_rpc(self,
    method collective_rpc_async (line 315) | async def collective_rpc_async(self,
    method download_models (line 343) | def download_models(self):
    method build_model (line 347) | def build_model(self):
    method gather_free_mem (line 351) | def gather_free_mem(self):
    method set_cache_config (line 356) | def set_cache_config(self, cache_config: CacheConfig, spec_cache_confi...
    method set_model_config (line 360) | def set_model_config(self, model_config: ModelConfig, spec_model_confi...
    method build_graph_runner (line 364) | def build_graph_runner(self):
    method build_cache_engine (line 368) | def build_cache_engine(self):
    method warmup (line 372) | def warmup(self):
    method _prefetch_outputs (line 376) | async def _prefetch_outputs(self):
    method start (line 381) | def start(self, forward_event: asyncio.Event):
    method wait_tasks (line 389) | async def wait_tasks(self):
    method forward_async (line 394) | async def forward_async(self, inputs):
    method get_output_async (line 398) | async def get_output_async(self):
    method get_input_processor (line 402) | def get_input_processor(self):
    method stop (line 406) | def stop(self):
    method release (line 411) | def release(self):
  class MPWorkerWrapper (line 424) | class MPWorkerWrapper(WorkerWrapperBase):
    method __init__ (line 427) | def __init__(
  class ExecutorProc (line 454) | class ExecutorProc:
    method __init__ (line 456) | def __init__(self, proc_id: int, mp_ctx: SpawnContext):
    method start (line 462) | def start(self, **kwargs):
    method close (line 472) | def close(self):
    method join (line 480) | def join(self):
    method _main_loop (line 485) | def _main_loop(
    method _task_wrapper (line 557) | async def _task_wrapper(func, args: List, kwargs: Dict, need_return: b...
    method _main_loop_impl (line 562) | async def _main_loop_impl(self, proc_id: int, comm_buf: SharedBuffer, ...

FILE: lmdeploy/pytorch/engine/executor/ray_executor.py
  function _get_master_addr (line 31) | def _get_master_addr():
  function _get_master_port (line 41) | def _get_master_port():
  function get_ascend_device_rank_mapping (line 49) | def get_ascend_device_rank_mapping(master_addr):
  function _update_env_cuda_alloc_conf (line 85) | def _update_env_cuda_alloc_conf(env_vars: Dict):
  function _update_runtime_envs (line 108) | def _update_runtime_envs(runtime_env: Dict):
  function _update_runtime_env_nsys (line 118) | def _update_runtime_env_nsys(runtime_env: Dict):
  class RemoteLogger (line 132) | class RemoteLogger:
    method __init__ (line 135) | def __init__(self):
    method start (line 139) | def start(self, msg: str):
    method end (line 148) | def end(self, handle: int):
  class RayWorkerWrapper (line 155) | class RayWorkerWrapper(WorkerWrapperBase):
    method __init__ (line 158) | def __init__(
    method set_device (line 190) | def set_device(self, local_rank):
    method set_env (line 194) | def set_env(self, envs: Dict[str, str]):
    method get_node_ip (line 198) | def get_node_ip(self):
    method warmup_dist (line 202) | def warmup_dist(self):
    method pack_output (line 214) | def pack_output(self, output: Dict):
    method remote_log_start (line 218) | def remote_log_start(self, msg: str):
    method remote_log_end (line 222) | def remote_log_end(self, handle: int):
    method exit (line 226) | def exit(self):
  class RayExecutor (line 231) | class RayExecutor(ExecutorBase):
    method __init__ (line 234) | def __init__(
    method collective_rpc (line 312) | def collective_rpc(self,
    method build_model (line 324) | def build_model(self):
    method gather_free_mem (line 328) | def gather_free_mem(self):
    method set_cache_config (line 332) | def set_cache_config(self, cache_config: CacheConfig, spec_cache_confi...
    method set_model_config (line 336) | def set_model_config(self, model_config: ModelConfig, spec_model_confi...
    method build_graph_runner (line 340) | def build_graph_runner(self):
    method build_cache_engine (line 344) | def build_cache_engine(self):
    method update_params (line 348) | def update_params(self, request: Any):
    method warmup (line 352) | def warmup(self):
    method sleep (line 356) | def sleep(self, level: int = 1):
    method wakeup (line 360) | def wakeup(self, tags: Optional[List[str]] = None):
    method get_input_processor (line 366) | def get_input_processor(self):
    method _prefetch_task_callback (line 370) | def _prefetch_task_callback(self, task: asyncio.Task):
    method start (line 380) | def start(self, forward_event: asyncio.Event):
    method wait_tasks (line 388) | async def wait_tasks(self):
    method stop (line 427) | def stop(self):
    method release (line 445) | def release(self):
    method _compile_dag (line 465) | def _compile_dag(self):
    method forward_async (line 475) | async def forward_async(self, inputs):
    method get_output_async (line 500) | async def get_output_async(self):
    method remote_log (line 507) | def remote_log(self, msg: str):
    method _sort_workers (line 517) | def _sort_workers(self, driver_ip: str, workers: List[RayWorkerWrapper]):
    method _sort_workers_by_ip (line 547) | def _sort_workers_by_ip(self, ips, workers: List[RayWorkerWrapper]):
    method _valid_bundle_id (line 569) | def _valid_bundle_id(self, bundle_id: int):
    method _init_workers_ray (line 576) | def _init_workers_ray(self, placement_group: PlacementGroup, worker_kw...
    method _init_distributed_environment_by_device (line 615) | def _init_distributed_environment_by_device(self, device_str: str):
    method _init_ascend_distributed_environment (line 629) | def _init_ascend_distributed_environment(self, driver_ip):
    method p2p_initialize (line 661) | def p2p_initialize(self, init_request: DistServeInitRequest):
    method p2p_connect (line 664) | def p2p_connect(self, remote_engine_id: str, conn_request: List[DistSe...
    method migrate (line 671) | async def migrate(self, batch: MigrationExecutionBatch):

FILE: lmdeploy/pytorch/engine/executor/uni_executor.py
  class UniExecutor (line 17) | class UniExecutor(ExecutorBase):
    method __init__ (line 20) | def __init__(
    method download_models (line 54) | def download_models(self):
    method build_model (line 58) | def build_model(self):
    method gather_free_mem (line 62) | def gather_free_mem(self):
    method set_cache_config (line 66) | def set_cache_config(self, cache_config: CacheConfig, spec_cache_confi...
    method set_model_config (line 70) | def set_model_config(self, model_config: ModelConfig, spec_model_confi...
    method build_graph_runner (line 74) | def build_graph_runner(self):
    method build_cache_engine (line 78) | def build_cache_engine(self):
    method warmup (line 82) | def warmup(self):
    method start (line 85) | def start(self, forward_event: asyncio.Event):
    method wait_tasks (line 89) | async def wait_tasks(self):
    method stop (line 93) | def stop(self):
    method release (line 97) | def release(self):
    method forward_async (line 101) | async def forward_async(self, inputs):
    method get_output_async (line 107) | async def get_output_async(self, dp_rank: int = 0):
    method get_input_processor (line 112) | def get_input_processor(self):
    method p2p_initialize (line 118) | def p2p_initialize(self, init_request: DistServeInitRequest):
    method p2p_connect (line 125) | def p2p_connect(self, remote_engine_id: str, conn_request: List[DistSe...
    method migrate (line 129) | async def migrate(self, batch: MigrationExecutionBatch):

FILE: lmdeploy/pytorch/engine/guided_process.py
  class GuidedDecodingManager (line 13) | class GuidedDecodingManager:
    method __init__ (line 16) | def __init__(self, tokenizer: PreTrainedTokenizerBase, vocab_size: Opt...
    method get_processors (line 24) | def get_processors(self, session_ctx: List[Dict[str, Any]],
    method get_processor (line 55) | def get_processor(self, session_id: int, seq_id: int, schema: str, typ...
    method remove_processor (line 81) | def remove_processor(self, session_id: int):
    method allocate_batched_bitmap (line 87) | def allocate_batched_bitmap(self, batch_size: int) -> torch.Tensor:
    method fill_bitmap (line 90) | def fill_bitmap(self, processor: xgr.GrammarMatcher, guided_bitmask: t...
    method accept_token (line 93) | def accept_token(self, processor: xgr.GrammarMatcher, token: int) -> N...
    method apply_batched_bitmap (line 96) | def apply_batched_bitmap(self, logits: torch.Tensor, guided_bitmask: t...
    method clear (line 108) | def clear(self) -> None:

FILE: lmdeploy/pytorch/engine/input_process.py
  class PreprocessInputResult (line 14) | class PreprocessInputResult:
  class BaseModelInputProcessor (line 21) | class BaseModelInputProcessor(ABC):
    method preprocess_input (line 25) | def preprocess_input(self,
  class DefaultModelInputProcessor (line 33) | class DefaultModelInputProcessor(BaseModelInputProcessor):
    method preprocess_input (line 36) | def preprocess_input(self,

FILE: lmdeploy/pytorch/engine/inputs_maker.py
  function _tensorlize_block_offsets (line 31) | def _tensorlize_block_offsets(block_offsets, dtype=torch.int32):
  class InputsMakerConfig (line 45) | class InputsMakerConfig:
    method from_engine (line 59) | def from_engine(engine: 'Engine'):
  class LongContextChunker (line 72) | class LongContextChunker:
    method __init__ (line 75) | def __init__(self, max_prefill_token_num: int):
    method enabled (line 81) | def enabled(self):
    method is_long_context (line 85) | def is_long_context(self, seq: 'SchedulerSequence'):
    method set_seq (line 89) | def set_seq(self, seq: 'SchedulerSequence'):
    method multimodal_iter (line 108) | def multimodal_iter(self):
    method next_chunk_size (line 120) | def next_chunk_size(self):
    method is_last_chunk (line 153) | def is_last_chunk(self):
    method clear (line 159) | def clear(self):
    method update_step (line 166) | def update_step(self, inputs: ModelInputs):
    method check_enable (line 184) | def check_enable(self):
  class InputsMakerAsync (line 191) | class InputsMakerAsync:
    method __init__ (line 193) | def __init__(
    method _init_do_prefill (line 228) | def _init_do_prefill(self, config: InputsMakerConfig):
    method _create_vision_model_inputs (line 236) | def _create_vision_model_inputs(self, messages: 'SeqList', model_input...
    method torch_int_dtype (line 301) | def torch_int_dtype(self):
    method _set_adapter_ids (line 307) | def _set_adapter_ids(self, model_inputs: ModelInputs, messages: 'SeqLi...
    method create_model_inputs (line 318) | def create_model_inputs(self, messages: 'SeqList', is_prefill: bool):
    method create_model_inputs_long_context (line 386) | def create_model_inputs_long_context(self,
    method create_model_inputs_delta (line 443) | def create_model_inputs_delta(self):
    method create_model_inputs_delta_valid_only (line 487) | def create_model_inputs_delta_valid_only(self):
    method update_running_seqs (line 526) | def update_running_seqs(self, running: 'SeqList', inputs: Optional[Mod...
    method deactivate_evict_seqs (line 543) | def deactivate_evict_seqs(self):
    method _make_forward_inputs (line 557) | def _make_forward_inputs(self, prefill: bool, enable_empty: bool = Fal...
    method do_prefill_pnode (line 674) | def do_prefill_pnode(self):
    method do_prefill_default (line 677) | def do_prefill_default(self):
    method do_prefill_chunked (line 703) | def do_prefill_chunked(self):
    method _send_next_inputs_impl (line 711) | async def _send_next_inputs_impl(self, prefill: bool = None, enable_em...
    method send_next_inputs (line 725) | async def send_next_inputs(self):
    method prefetch_next_inputs (line 729) | async def prefetch_next_inputs(self):
  function build_inputs_maker (line 736) | def build_inputs_maker(engine: 'Engine'):

FILE: lmdeploy/pytorch/engine/logits_process.py
  function _process_temperature_ (line 17) | def _process_temperature_(scores: torch.Tensor, temperature: torch.Tensor):
  function _process_bad_words_ (line 24) | def _process_bad_words_(scores: torch.Tensor,
  function _process_repetition_penalty_ (line 59) | def _process_repetition_penalty_(scores: torch.Tensor, input_ids: torch....
  function _filter_topk_sorted_ (line 68) | def _filter_topk_sorted_(scores: torch.Tensor, topk: torch.LongTensor, f...
  function _filter_topp_sorted_ (line 78) | def _filter_topp_sorted_(scores: torch.Tensor, topp: torch.Tensor, filte...
  function _filter_minp_sorted_ (line 88) | def _filter_minp_sorted_(scores: torch.Tensor, minp: torch.Tensor, filte...
  function _ngram_one (line 99) | def _ngram_one(dtype: torch.dtype, device: torch.device, fill: int = 1):
  function ngram (line 103) | def ngram(
  function _filter_repetition_ngram_ (line 196) | def _filter_repetition_ngram_(
  function _multinomial_sampling (line 223) | def _multinomial_sampling(scores: torch.Tensor,
  class SamplingInputsDelta (line 236) | class SamplingInputsDelta:
  class SamplingInputs (line 243) | class SamplingInputs:
    method to_device (line 274) | def to_device(self, device: str, non_blocking: bool = False):
    method get_delta (line 288) | def get_delta(self) -> SamplingInputsDelta:
    method update_delta (line 298) | def update_delta(self, delta: SamplingInputsDelta):
  function _apply_custom_logits_processors (line 307) | def _apply_custom_logits_processors(batched_logits_processors, all_ids, ...
  function _torch_topk (line 316) | def _torch_topk(x: torch.Tensor, k: int, dim: int = -1, largest: bool = ...
  class FusedLogitsProcessor (line 327) | class FusedLogitsProcessor:
    method __init__ (line 330) | def __init__(
    method _wait_stream_once (line 348) | async def _wait_stream_once(self):
    method __call__ (line 354) | async def __call__(self, scores: torch.Tensor) -> torch.Tensor:
    method sampling (line 440) | def sampling(self, logits: torch.Tensor):
    method compute_logprobs (line 488) | def compute_logprobs(self, raw_logprobs: torch.Tensor, token_ids: torc...
    method cleanup_sessions (line 503) | def cleanup_sessions(self, session_ids: list[int]):

FILE: lmdeploy/pytorch/engine/model_agent/__init__.py
  function build_model_agent (line 11) | def build_model_agent(

FILE: lmdeploy/pytorch/engine/model_agent/agent.py
  class SleepWakeupState (line 42) | class SleepWakeupState:
  class BatchedLogProbs (line 49) | class BatchedLogProbs:
    method to_cpu (line 53) | def to_cpu(self):
    method to_numpy (line 57) | def to_numpy(self):
    method to_tensor (line 65) | def to_tensor(self):
  class BatchedOutputs (line 75) | class BatchedOutputs:
    method to_cpu (line 86) | def to_cpu(self):
    method to_numpy (line 99) | def to_numpy(self):
    method to_tensor (line 112) | def to_tensor(self):
  function msg_with_rank (line 126) | def msg_with_rank(rank: int, msg: str):
  function cache_swapping (line 131) | def cache_swapping(cache_engine: CacheEngine, swap_in_map: dict, swap_ou...
  function model_forward (line 148) | def model_forward(
  function _try_to_cuda (line 191) | def _try_to_cuda(val, non_blocking: bool = False):
  class DistGatherScalar (line 202) | class DistGatherScalar:
    method __init__ (line 205) | def __init__(self, val, size: int, device: str = 'cpu', group: dist.Pr...
    method async_wait (line 216) | async def async_wait(self, timeout: float = 0.001):
  class StepInputs (line 227) | class StepInputs:
    method merge (line 235) | def merge(
    method update_delta (line 270) | def update_delta(
    method step (line 282) | def step(
  class BaseModelAgent (line 312) | class BaseModelAgent:
    method __init__ (line 324) | def __init__(
    method all_context (line 419) | def all_context(self):
    method set_cache_config (line 425) | def set_cache_config(self, cache_config: CacheConfig, spec_cache_confi...
    method set_model_config (line 430) | def set_model_config(self, model_config: ModelConfig, spec_model_confi...
    method get_free_mem (line 435) | def get_free_mem(self):
    method warmup (line 442) | def warmup(self):
    method _slice_outs (line 495) | def _slice_outs(self, inputs: torch.Tensor, seq_length: torch.LongTens...
    method _postprocess_forward_output (line 499) | def _postprocess_forward_output(self, output: dict, inputs: ModelInputs):
    method _async_model_forward (line 507) | async def _async_model_forward(
    method async_sampling_logits (line 525) | async def async_sampling_logits(self, logits: torch.Tensor, sampling_i...
    method _push_output (line 548) | def _push_output(self, output: BatchedOutputs):
    method _broadcast_next_token (line 555) | def _broadcast_next_token(self, next_token_ids: torch.Tensor, extra_in...
    method _prepare_dp_v1 (line 565) | async def _prepare_dp_v1(self, inputs: ModelInputs):
    method _get_inputs_from_delta (line 624) | def _get_inputs_from_delta(
    method _prepare_inputs_prefill (line 637) | def _prepare_inputs_prefill(
    method _step_postprocess_with_output (line 664) | async def _step_postprocess_with_output(self,
    method _step_postprocess_without_output (line 721) | async def _step_postprocess_without_output(
    method _async_step (line 741) | async def _async_step(
    method _async_loop_background (line 916) | async def _async_loop_background(self, forward_event: asyncio.Event = ...
    method _async_loop_inputs_preprocess (line 932) | async def _async_loop_inputs_preprocess(self, forward_event: asyncio.E...
    method start (line 952) | def start(self, forward_event: asyncio.Event = None):
    method wait_tasks (line 977) | async def wait_tasks(self):
    method stop (line 991) | def stop(self):
    method stop_async (line 1006) | async def stop_async(self):
    method set_forward_inputs (line 1030) | def set_forward_inputs(self, inputs):
    method get_output_async (line 1035) | async def get_output_async(self):
    method _build_model (line 1051) | def _build_model(self):
    method build_model (line 1083) | def build_model(self):
    method build_graph_runner (line 1091) | def build_graph_runner(self):
    method build_cache_engine (line 1102) | def build_cache_engine(self):
    method _forward_impl (line 1119) | def _forward_impl(self, inputs: ModelInputs):
    method async_forward (line 1129) | async def async_forward(self, inputs: ModelInputs):
    method get_logits (line 1142) | def get_logits(self, hidden_states: torch.Tensor):
    method get_input_processor (line 1146) | def get_input_processor(self):
    method reset_graph_runner (line 1150) | def reset_graph_runner(self):
    method update_params (line 1158) | def update_params(self, request: UpdateParamsRequest):
    method sleep (line 1199) | async def sleep(self, level: int = 1):
    method wakeup (line 1213) | def wakeup(self, tags: Optional[List[str]] = None):
    method release (line 1237) | def release(self):

FILE: lmdeploy/pytorch/engine/model_agent/inputs_maker.py
  class DefaultForwardInputsMaker (line 14) | class DefaultForwardInputsMaker:
    method __init__ (line 17) | def __init__(self, model_agent: 'BaseModelAgent'):
    method get (line 20) | async def get(self):
    method step (line 24) | def step(self):
  class DPForwardInputsMaker (line 30) | class DPForwardInputsMaker:
    method __init__ (line 33) | def __init__(self, model_agent: 'BaseModelAgent'):
    method _make_dummy_forward_inputs (line 46) | def _make_dummy_forward_inputs(self):
    method _gather_has_inputs (line 59) | async def _gather_has_inputs(self, has_inputs: bool = False):
    method _get_inputs (line 75) | async def _get_inputs(self):
    method get (line 89) | async def get(self):
    method step (line 104) | def step(self):
  function build_inputs_maker (line 111) | def build_inputs_maker(model_agent: 'BaseModelAgent'):

FILE: lmdeploy/pytorch/engine/model_agent/profiler.py
  class AgentProfiler (line 13) | class AgentProfiler:
    method __init__ (line 15) | def __init__(self, dist_ctx: DistContext, stream: torch.Stream):
    method _build_profiler (line 35) | def _build_profiler(self):
    method dump (line 50) | def dump(self):
    method profile_task (line 70) | async def profile_task(self):
    method create_task (line 87) | def create_task(self):

FILE: lmdeploy/pytorch/engine/mp_engine/__init__.py
  function build_mp_engine (line 5) | def build_mp_engine(backend: str, model_path: str, engine_config: Pytorc...

FILE: lmdeploy/pytorch/engine/mp_engine/base.py
  class SessionState (line 18) | class SessionState:
  class MPEngine (line 22) | class MPEngine(EngineBase):
    method __init__ (line 24) | def __init__(self) -> None:
    method _collective_rpc (line 29) | def _collective_rpc(self, func, *args, **kwargs):
    method _collective_rpc_async (line 33) | async def _collective_rpc_async(self, func, *args, **kwargs):
    method _collective_rpc_streaming_async (line 37) | async def _collective_rpc_streaming_async(self, func, *args, **kwargs):
    method close (line 41) | def close(self) -> None:
    method start_loop (line 45) | def start_loop(self) -> None:
    method end_session (line 49) | def end_session(self, session_id: int):
    method sleep (line 53) | def sleep(self, level: int):
    method wakeup (line 57) | def wakeup(self, tags: Optional[List[str]] = None):
    method update_params (line 61) | def update_params(self, request: Any):
    method get_schedule_metrics (line 65) | def get_schedule_metrics(self):
    method p2p_initialize (line 69) | def p2p_initialize(self, conn_request: DistServeInitRequest):
    method p2p_connect (line 73) | def p2p_connect(self, conn_request: DistServeConnectionRequest):
    method p2p_drop_connect (line 77) | def p2p_drop_connect(self, drop_conn_request: DistServeDropConnectionR...
    method create_instance (line 85) | def create_instance(self, cuda_stream_id=0):
  class MPEngineInstance (line 90) | class MPEngineInstance(EngineInstanceBase):
    method __init__ (line 93) | def __init__(self, engine: MPEngine):
    method async_end (line 97) | async def async_end(self, session_id: int):
    method async_cancel (line 107) | async def async_cancel(self, session_id: int):
    method async_stream_infer (line 115) | async def async_stream_infer(self, session_id: int, *args, **kwargs):

FILE: lmdeploy/pytorch/engine/mp_engine/base_worker.py
  class EngineInstancePool (line 17) | class EngineInstancePool:
    method __init__ (line 20) | def __init__(self, engine):
    method create_instance_pool (line 27) | def create_instance_pool(self, num_instance: int):
    method instance (line 36) | async def instance(self):
    method async_end (line 47) | async def async_end(self, session_id: int):
    method async_cancel (line 52) | async def async_cancel(self, session_id: int):
    method async_stream_infer (line 57) | async def async_stream_infer(self, *args, **kwargs):
  class EngineWorkerBase (line 64) | class EngineWorkerBase:
    method __init__ (line 67) | def __init__(self, engine: 'Engine'):
    method end_session (line 72) | def end_session(self, session_id: int):
    method get_engine_config (line 76) | def get_engine_config(self):
    method get_schedule_metrics (line 80) | def get_schedule_metrics(self):
    method p2p_initialize (line 84) | def p2p_initialize(self, conn_request: DistServeInitRequest):
    method p2p_connect (line 88) | def p2p_connect(self, conn_request: DistServeConnectionRequest):
    method p2p_drop_connect (line 92) | def p2p_drop_connect(self, drop_conn_request: DistServeDropConnectionR...
    method sleep (line 100) | def sleep(self, level: int = 1):
    method wakeup (line 104) | def wakeup(self, tags: Optional[List[str]] = None):
    method update_params (line 108) | def update_params(self, request: Any):
    method close (line 112) | def close(self) -> None:
    method instance_async_end (line 116) | async def instance_async_end(self, session_id: int):
    method instance_async_cancel (line 120) | async def instance_async_cancel(self, session_id: int):
    method instance_async_stream_infer (line 124) | async def instance_async_stream_infer(self, *args, **kwargs):
  class EngineOutputGather (line 130) | class EngineOutputGather:
    method __init__ (line 133) | def __init__(self):
    method get (line 136) | def get(self, stream_id):
    method add (line 141) | def add(self, stream_id, result):
    method pop (line 148) | def pop(self, stream_id, result):

FILE: lmdeploy/pytorch/engine/mp_engine/ray_engine.py
  class RayEngineWorker (line 19) | class RayEngineWorker(EngineWorkerBase):
    method __init__ (line 21) | def __init__(self,
    method _stream_task_wrapper (line 40) | async def _stream_task_wrapper(self, stream_id: int, init_event: async...
    method create_stream_task (line 56) | async def create_stream_task(self, func, *args, **kwargs):
    method get_stream_task_result (line 69) | async def get_stream_task_result(self, stream_id: int):
  function _update_runtime_envs (line 87) | def _update_runtime_envs(runtime_env: Dict):
  class RayMPEngine (line 96) | class RayMPEngine(MPEngine):
    method __init__ (line 98) | def __init__(self, model_path: str, engine_config: PytorchEngineConfig...
    method _init_ray (line 107) | def _init_ray(self, engine_config: PytorchEngineConfig = None):
    method _create_worker (line 119) | def _create_worker(self, model_path: str, engine_config: PytorchEngine...
    method _collective_rpc (line 141) | def _collective_rpc(self, func, *args, **kwargs):
    method _collective_rpc_async (line 146) | async def _collective_rpc_async(self, func, *args, **kwargs):
    method _collective_rpc_streaming_async (line 151) | async def _collective_rpc_streaming_async(self, func, *args, **kwargs):
    method close (line 161) | def close(self) -> None:
    method start_loop (line 167) | def start_loop(self) -> None:

FILE: lmdeploy/pytorch/engine/mp_engine/zmq_engine.py
  function cancel_async_tasks (line 20) | def cancel_async_tasks(loop: asyncio.AbstractEventLoop):
  class ZMQMPEngine (line 30) | class ZMQMPEngine(MPEngine):
    method __init__ (line 32) | def __init__(self,
    method _start_mp_proc (line 49) | def _start_mp_proc(
    method _mp_proc (line 84) | def _mp_proc(
    method _mp_proc_async (line 125) | async def _mp_proc_async(server, engine: 'Engine'):
    method _collective_rpc (line 167) | def _collective_rpc(self, func, *args, **kwargs):
    method _collective_rpc_async (line 171) | async def _collective_rpc_async(self, func, *args, **kwargs):
    method _collective_rpc_streaming_async (line 175) | async def _collective_rpc_streaming_async(self, func, *args, **kwargs):
    method close (line 180) | def close(self) -> None:
    method start_loop (line 195) | def start_loop(self) -> None:

FILE: lmdeploy/pytorch/engine/mp_engine/zmq_rpc.py
  function _task_callback (line 19) | def _task_callback(task: asyncio.Task) -> None:
  class AsyncRPCServer (line 33) | class AsyncRPCServer:
    method __init__ (line 35) | def __init__(self):
    method get_port (line 52) | def get_port(self):
    method _get_next_stream_id (line 55) | def _get_next_stream_id(self):
    method register_method (line 60) | def register_method(self, name: str, func: Callable):
    method send_multipart (line 70) | def send_multipart(self, client_id: bytes, data: bytes):
    method call_method_default (line 77) | def call_method_default(self, client_id, method: Callable, request: Di...
    method _method_async_task (line 88) | async def _method_async_task(self, client_id, request_id, method: Call...
    method _method_async_streaming_task (line 97) | async def _method_async_streaming_task(self, stream_id: int, request_i...
    method get_stream_output (line 127) | async def get_stream_output(self, stream_id: int):
    method call_method_async (line 144) | async def call_method_async(self, client_id, method: Callable, request...
    method call_and_response (line 166) | async def call_and_response(self):
    method run (line 185) | async def run(self):
    method stop (line 210) | def stop(self):
  class AsyncRPCClient (line 216) | class AsyncRPCClient:
    method __init__ (line 218) | def __init__(self, port: int = 5555):
    method _set_reply_default (line 240) | def _set_reply_default(self, request_id: int, reply: Dict):
    method _set_reply (line 252) | def _set_reply(self, reply: Dict):
    method _poll_recv (line 256) | def _poll_recv(self, timeout: float = 3):
    method _try_start_listen (line 264) | def _try_start_listen(self):
    method call (line 271) | def call(self, method, *args, **kwargs):
    method _async_call_impl (line 290) | async def _async_call_impl(self, method, streaming, *args, **kwargs):
    method async_call (line 302) | async def async_call(self, method, *args, **kwargs):
    method async_stream_call (line 306) | async def async_stream_call(self, method, *args, **kwargs):
    method listen (line 315) | async def listen(self):
    method stop (line 329) | def stop(self):
    method close_sockets (line 336) | def close_sockets(self):

FILE: lmdeploy/pytorch/engine/request.py
  class RequestType (line 14) | class RequestType(enum.Enum):
  class Response (line 26) | class Response:
  class Request (line 39) | class Request:
  function _run_until_complete (line 51) | def _run_until_complete(future: Awaitable):
  class RequestSender (line 64) | class RequestSender:
    method new (line 75) | def new(cls, sender_id: int, manager: 'RequestManager'):
    method req_que (line 81) | def req_que(self):
    method event_loop (line 86) | def event_loop(self):
    method is_loop_alive (line 90) | def is_loop_alive(self):
    method run_until_complete (line 94) | def run_until_complete(self, future: Awaitable):
    method _req_put (line 98) | def _req_put(self, reqs: Any):
    method _gather_request (line 102) | def _gather_request(self, req_types: List[RequestType], data: List[Any]):
    method batched_send_async (line 122) | def batched_send_async(self, req_types: List[RequestType], data: List[...
    method send_async (line 128) | def send_async(self, req_type: RequestType, data: Any):
    method async_recv (line 132) | async def async_recv(self, resp: Response, wait_main: bool = False) ->...
    method recv (line 149) | def recv(self, resp: Response) -> Response:
    method async_send (line 154) | async def async_send(self, req_type: RequestType, data: Any):
    method send (line 159) | def send(self, req_type: RequestType, data: Any) -> Response:
  class RequestManager (line 165) | class RequestManager:
    method __init__ (line 168) | def __init__(self):
    method prepare_send (line 186) | async def prepare_send(self):
    method sender_wait_loop (line 198) | async def sender_wait_loop(self):
    method create_loop_task (line 218) | def create_loop_task(self):
    method wait_tasks (line 232) | async def wait_tasks(self):
    method event_loop (line 251) | def event_loop(self):
    method set_main_loop_func (line 258) | def set_main_loop_func(self, loop: Callable[[Coroutine], asyncio.Task]):
    method stop_loop (line 262) | def stop_loop(self):
    method is_loop_alive (line 270) | def is_loop_alive(self):
    method build_sender (line 282) | def build_sender(self):
    method has_requests (line 290) | def has_requests(self):
    method get_all_requests (line 296) | async def get_all_requests(self) -> Dict[RequestType, List[Request]]:
    method bind_func (line 323) | def bind_func(self, req_type: RequestType, callback: Callable):
    method set_request_priority (line 327) | def set_request_priority(self, priority: List[RequestType]):
    method response (line 331) | def response(self, resp: Response):
    method process_request (line 335) | def process_request(self, req_type: RequestType, reqs: ReqList, **kwar...
    method step (line 350) | async def step(self, **kwargs):
    method run_until_complete (line 378) | def run_until_complete(self, future: Awaitable):

FILE: lmdeploy/pytorch/envs.py
  function env_to_bool (line 7) | def env_to_bool(
  function env_to_int (line 28) | def env_to_int(
  function env_to_list_int (line 43) | def env_to_list_int(
  function env_to_float (line 59) | def env_to_float(
  function set_envs (line 78) | def set_envs():
  function get_all_envs (line 164) | def get_all_envs():

FILE: lmdeploy/pytorch/kernels/cuda/activation.py
  function _silu_and_mul_kernel (line 18) | def _silu_and_mul_kernel(
  function silu_and_mul (line 63) | def silu_and_mul(gate_up: torch.Tensor, out: torch.Tensor = None):
  function _silu_and_mul_moe_ep_kernel (line 101) | def _silu_and_mul_moe_ep_kernel(
  function silu_and_mul_moe_ep (line 153) | def silu_and_mul_moe_ep(gate_up: torch.Tensor, mask_m: torch.Tensor, out...

FILE: lmdeploy/pytorch/kernels/cuda/apply_rotary_pos_emb.py
  function _apply_rotary_impl (line 9) | def _apply_rotary_impl(x_l, x_h, cos_l, cos_h, sin_l, sin_h):
  function apply_rotary_pos_emb_qk_kernel (line 28) | def apply_rotary_pos_emb_qk_kernel(
  function apply_rotary_pos_emb (line 115) | def apply_rotary_pos_emb(q: Tensor,

FILE: lmdeploy/pytorch/kernels/cuda/awq_kernels.py
  function get_cuda_autotune_config (line 7) | def get_cuda_autotune_config():
  function _dequant_s4_to_f16x2 (line 21) | def _dequant_s4_to_f16x2(weight, shift: tl.constexpr, is_top: tl.constex...
  function _unpack_weight (line 61) | def _unpack_weight(weight):
  function awq_linear_kernel (line 91) | def awq_linear_kernel(
  function awq_linear (line 212) | def awq_linear(x, qweight, scales, qzeros):

FILE: lmdeploy/pytorch/kernels/cuda/bitonic_topk.py
  function _indicator (line 20) | def _indicator(n_dims: core.constexpr, j: core.constexpr):
  function _flip_along_middle (line 27) | def _flip_along_middle(x, n_dims, i):
  function _compare_and_swap (line 36) | def _compare_and_swap(x, ids, flip, i: core.constexpr):
  function _bitonic_merge_hypercube (line 55) | def _bitonic_merge_hypercube(x, ids, stage: core.constexpr, order: core....
  function _bitonic_merge (line 74) | def _bitonic_merge(x, ids, stage: tl.constexpr, order: tl.constexpr, n_d...
  function argsort (line 86) | def argsort(x, ids, dim: tl.constexpr = None, descending: tl.constexpr =...
  function _bitonic_topk_kernel0 (line 99) | def _bitonic_topk_kernel0(score_ptr,
  function _concate (line 135) | def _concate(a, b):
  function _split (line 145) | def _split(a, k):
  function _bitonic_topk_kernel1 (line 153) | def _bitonic_topk_kernel1(score_ptr,
  function bitonic_topk (line 202) | def bitonic_topk(scores: torch.Tensor,

FILE: lmdeploy/pytorch/kernels/cuda/blocked_fp8_fused_moe.py
  function get_cuda_autotune_config (line 14) | def get_cuda_autotune_config():
  function fused_moe_blocked_f8_kernel (line 28) | def fused_moe_blocked_f8_kernel(
  function fused_moe_blocked_fp8_kernel_launcher (line 173) | def fused_moe_blocked_fp8_kernel_launcher(
  function fused_moe_blocked_fp8 (line 260) | def fused_moe_blocked_fp8(input: torch.Tensor,

FILE: lmdeploy/pytorch/kernels/cuda/blocked_gemm_fp8.py
  function fast_log2_ceil (line 17) | def fast_log2_ceil(x):
  function fast_pow2 (line 26) | def fast_pow2(x):
  function fast_round_scale (line 32) | def fast_round_scale(amax, fp8_max_inv):
  function _quant_fp8_kernel (line 37) | def _quant_fp8_kernel(
  function _quant_fp8_launcher (line 105) | def _quant_fp8_launcher(A: Tensor, group_size: int, out: Tensor, scales:...
  function quant_fp8 (line 159) | def quant_fp8(A: Tensor,
  function quant_fp8_tma (line 177) | def quant_fp8_tma(A: Tensor,
  function _gemm_fp8_tma_pre_hook (line 194) | def _gemm_fp8_tma_pre_hook(nargs):
  function _gemm_fp8_tma_kernel (line 214) | def _gemm_fp8_tma_kernel(
  function _gemm_fp8_kernel (line 302) | def _gemm_fp8_kernel(
  function blocked_gemm_fp8 (line 384) | def blocked_gemm_fp8(A: Tensor,
  function deep_gemm_fp8 (line 480) | def deep_gemm_fp8(A: Tensor,

FILE: lmdeploy/pytorch/kernels/cuda/causal_conv1d.py
  function causal_conv1d_fwd (line 12) | def causal_conv1d_fwd(hidden_size, width, has_bias, activation, dtype, s...
  function causal_conv1d_fn (line 115) | def causal_conv1d_fn(
  function causal_conv1d_update_fwd (line 185) | def causal_conv1d_update_fwd(hidden_size: int, seqlen: int, state_len: i...
  function causal_conv1d_update (line 268) | def causal_conv1d_update(x,

FILE: lmdeploy/pytorch/kernels/cuda/ds_index.py
  function _fp8_index_kernel (line 10) | def _fp8_index_kernel(
  function fp8_index (line 96) | def fp8_index(q: torch.Tensor,

FILE: lmdeploy/pytorch/kernels/cuda/fill_kv_cache.py
  function _quant_int8 (line 11) | def _quant_int8(val):
  function _quant_int4 (line 21) | def _quant_int4(val1, val2):
  function _fill_kv_cache_kernel (line 35) | def _fill_kv_cache_kernel(
  function _fill_page_quant_int8 (line 126) | def _fill_page_quant_int8(
  function _fill_page_quant_int4 (line 170) | def _fill_page_quant_int4(
  function _fill_page_quant (line 215) | def _fill_page_quant(state_ptr, cache_ptr, scales_zeros_ptr, block_off, ...
  function _fill_kv_cache_quant_kernel (line 270) | def _fill_kv_cache_quant_kernel(
  function fill_kv_cache (line 401) | def fill_kv_cache(k_states: Tensor,
  function fast_log2_ceil (line 528) | def fast_log2_ceil(x):
  function fast_pow2 (line 537) | def fast_pow2(x):
  function fast_round_scale (line 543) | def fast_round_scale(amax, fp8_max_inv):
  function _quant_blocked_fp8 (line 548) | def _quant_blocked_fp8(x,
  function _fill_kv_cache_blocked_fp8_kernel (line 574) | def _fill_kv_cache_blocked_fp8_kernel(
  function fill_kv_cache_blocked_fp8 (line 692) | def fill_kv_cache_blocked_fp8(k_states: Tensor,

FILE: lmdeploy/pytorch/kernels/cuda/flashattention.py
  function _get_block_d (line 26) | def _get_block_d(head_dim_k, head_dim_v):
  function softcapping (line 38) | def softcapping(qk, logit_softcapping: tl.constexpr):
  function _load_kv (line 48) | def _load_kv(ptrs, boundary_check: tl.constexpr):
  function _prefill_fwd_inner (line 57) | def _prefill_fwd_inner(acc, l_i, m_i, q, k_ptrs, v_ptrs, q1, k1_ptrs, lo...
  function _flash_prefill_fwd_kernel (line 164) | def _flash_prefill_fwd_kernel(
  function _kernel_meta_sm7x (line 385) | def _kernel_meta_sm7x(BLOCK_DK):
  function _kernel_meta_sm8x (line 393) | def _kernel_meta_sm8x(BLOCK_DK: int, shared_kv: bool):
  function _kernel_meta_sm86 (line 404) | def _kernel_meta_sm86(BLOCK_DK: int, shared_kv: bool):
  function _kernel_meta_sm9x (line 423) | def _kernel_meta_sm9x(BLOCK_DK: int, shared_kv: bool):
  function _kernel_meta_sm12x (line 441) | def _kernel_meta_sm12x(BLOCK_DK: int, shared_kv: bool):
  function _kernel_meta_rocm (line 467) | def _kernel_meta_rocm(BLOCK_DK: int, shared_kv: bool):
  function flash_attn_varlen_func (line 475) | def flash_attn_varlen_func(

FILE: lmdeploy/pytorch/kernels/cuda/flatten_kv_cache.py
  function _flatten_kv_cache (line 11) | def _flatten_kv_cache(
  function _dequant_int4 (line 84) | def _dequant_int4(val, HEAD_DIM: tl.constexpr, BLOCK: tl.constexpr):
  function _flatten_kv_cache_quant (line 92) | def _flatten_kv_cache_quant(
  function flatten_kv_cache (line 195) | def flatten_kv_cache(k_caches: Tensor,
  function dequant_fp8 (line 339) | def dequant_fp8(x, scale, GROUP_SIZE: tl.constexpr):
  function flatten_kv_cache_mla_fp8_kernel (line 352) | def flatten_kv_cache_mla_fp8_kernel(
  function flatten_kv_cache_mla_fp8 (line 431) | def flatten_kv_cache_mla_fp8(k_caches: Tensor,

FILE: lmdeploy/pytorch/kernels/cuda/fused_lora.py
  function get_autotune_config (line 7) | def get_autotune_config():
  function _atomic_store (line 24) | def _atomic_store(ptrs, val, mask):
  function _fused_lora_kernel (line 43) | def _fused_lora_kernel(
  function fused_lora (line 142) | def fused_lora(input: torch.Tensor,

FILE: lmdeploy/pytorch/kernels/cuda/fused_moe.py
  function get_cuda_autotune_config (line 12) | def get_cuda_autotune_config():
  function _config_prune_func (line 83) | def _config_prune_func(config: list, *args, **kwargs):
  function fused_moe_kernel (line 103) | def fused_moe_kernel(
  function fused_moe_kernel_launcher (line 201) | def fused_moe_kernel_launcher(
  function _get_exp_mask_kernel (line 262) | def _get_exp_mask_kernel(
  function _get_exp_mask (line 297) | def _get_exp_mask(topk_ids: torch.Tensor, num_experts: int):
  function _get_start_end_kernel (line 327) | def _get_start_end_kernel(
  function get_start_end (line 377) | def get_start_end(exp_cum: torch.Tensor, exp_topk: torch.Tensor, topk: i...
  function _get_sorted_idx (line 410) | def _get_sorted_idx(topk_ids: torch.Tensor, num_experts: int):
  function _renormalize (line 426) | def _renormalize(topk_weights: torch.Tensor, renormalize: bool):
  function _make_intermediate (line 434) | def _make_intermediate(shape: tuple, dtype: torch.dtype, device: torch.d...
  function _moe_reduce_kernel (line 443) | def _moe_reduce_kernel(
  function moe_reduce (line 489) | def moe_reduce(hidden_states: torch.Tensor, topk_weights: torch.Tensor, ...
  function fused_moe (line 526) | def fused_moe(hidden_states: torch.Tensor,

FILE: lmdeploy/pytorch/kernels/cuda/fused_moe_ep.py
  function _fwd_kernel_ep_scatter_step1 (line 13) | def _fwd_kernel_ep_scatter_step1(
  function _fwd_kernel_ep_scatter_step2 (line 42) | def _fwd_kernel_ep_scatter_step2(
  function ep_scatter (line 78) | def ep_scatter(
  function _fwd_kernel_ep_gather (line 127) | def _fwd_kernel_ep_gather(
  function ep_gather (line 171) | def ep_gather(
  function _deepgemm_grouped_bf16_nt_contiguous (line 208) | def _deepgemm_grouped_bf16_nt_contiguous(
  function fused_moe_v3 (line 218) | def fused_moe_v3(

FILE: lmdeploy/pytorch/kernels/cuda/fused_noaux_tc.py
  function _noaux_routing_kernel (line 29) | def _noaux_routing_kernel(
  function fused_noaux_tc_routing (line 102) | def fused_noaux_tc_routing(

FILE: lmdeploy/pytorch/kernels/cuda/gated_delta_rule.py
  function normalize_qk (line 13) | def normalize_qk(k_local: T.Buffer, q_local: T.Buffer, k_per_thr: int) -...
  function fused_recurrent_gated_delta_rule_fwd (line 33) | def fused_recurrent_gated_delta_rule_fwd(SEQLEN,
  function fused_recurrent_gated_delta_rule (line 240) | def fused_recurrent_gated_delta_rule(

FILE: lmdeploy/pytorch/kernels/cuda/multinomial_sampling.py
  function _multinomial_sampling_kernel (line 8) | def _multinomial_sampling_kernel(Scores, Seeds, Offsets, Indices, Output...
  function multinomial_sampling (line 50) | def multinomial_sampling(scores: torch.Tensor,

FILE: lmdeploy/pytorch/kernels/cuda/pagedattention.py
  function _fwd_grouped_split_kernel (line 37) | def _fwd_grouped_split_kernel(
  function _fwd_grouped_split_quant_kernel (line 222) | def _fwd_grouped_split_quant_kernel(
  function _reduce_split_kernel (line 453) | def _reduce_split_kernel(
  function _convert_pv (line 503) | def _convert_pv(p, v):
  function _kernel_meta_default (line 512) | def _kernel_meta_default(BLOCK_DMODEL: int, BLOCK_H: int):
  function _kernel_meta_sm8x (line 517) | def _kernel_meta_sm8x(BLOCK_DMODEL: int, BLOCK_H: int):
  function _kernel_meta_sm9x (line 527) | def _kernel_meta_sm9x(BLOCK_DMODEL: int, BLOCK_H: int):
  function _get_split_k (line 537) | def _get_split_k(device_idx: int, head_grid: int, batch_size: int, num_w...
  function flash_attn_with_kvcache (line 553) | def flash_attn_with_kvcache(

FILE: lmdeploy/pytorch/kernels/cuda/rms_norm.py
  function _compute_rms_norm (line 11) | def _compute_rms_norm(x, w, eps: tl.constexpr, N_COLS: tl.constexpr):
  function add_rms_norm_kernel (line 22) | def add_rms_norm_kernel(input, weight, residual, output, out_residual, n...
  function _unsqueeze_to_3d (line 55) | def _unsqueeze_to_3d(tensor: Tensor) -> Tensor:
  function _squeeze_to_origin_dim (line 67) | def _squeeze_to_origin_dim(tensor: Tensor, origin_dim: int) -> Tensor:
  function rms_norm (line 79) | def rms_norm(hidden_states: Tensor,
  function torch_forward (line 165) | def torch_forward(hidden_states, weight, variance_epsilon=1e-6):
  function test_rms_norm (line 173) | def test_rms_norm(bsz, ctx_len, feat_len, dtype):

FILE: lmdeploy/pytorch/kernels/cuda/utils.py
  function get_device_props (line 36) | def get_device_props(device=None):
  function is_cuda (line 52) | def is_cuda():
  function supports_tma (line 57) | def supports_tma():

FILE: lmdeploy/pytorch/kernels/cuda/w8a8_fused_moe.py
  function get_cuda_autotune_config (line 12) | def get_cuda_autotune_config():
  function fused_moe_w8a8_kernel (line 54) | def fused_moe_w8a8_kernel(
  function fused_moe_w8a8_kernel_launcher (line 155) | def fused_moe_w8a8_kernel_launcher(
  function fused_moe_w8a8 (line 218) | def fused_moe_w8a8(input: torch.Tensor,

FILE: lmdeploy/pytorch/kernels/cuda/w8a8_triton_kernels.py
  function _linear (line 33) | def _linear(
  function _linear_add (line 112) | def _linear_add(A, B, C, residual_ptr, M, N, K, stride_am, stride_ak, st...
  function matmul_kernel_dynamic_quant (line 162) | def matmul_kernel_dynamic_quant(a, b, rms_scale, linear_scale, residual=...
  function _per_token_quant_int8 (line 225) | def _per_token_quant_int8(
  function per_token_quant_int8 (line 263) | def per_token_quant_int8(x, eps, quant_dtype=torch.int8):
  function _compute_rms_norm (line 299) | def _compute_rms_norm(x, w, eps: tl.constexpr, N_COLS: tl.constexpr):
  function rms_norm_quant_kernel (line 310) | def rms_norm_quant_kernel(
  function add_rms_norm_quant_kernel (line 345) | def add_rms_norm_quant_kernel(
  function rms_norm_dynamic_quant (line 390) | def rms_norm_dynamic_quant(x, w, eps, residual=None, quant_dtype=torch.i...
  function test_rms_and_linear (line 443) | def test_rms_and_linear(x, rms_weight, linear_weight, output_dtype=torch...
  function test_per_token_quant (line 473) | def test_per_token_quant(x, eps, quant_dtype=torch.int8):
  function bench_rms_and_linear (line 495) | def bench_rms_and_linear(M: int, provider: str, dtype: torch.dtype = tor...

FILE: lmdeploy/pytorch/kernels/default/multinomial_sampling.py
  function multinomial_sampling (line 6) | def multinomial_sampling(scores: Tensor, seeds: LongTensor, offsets: Lon...

FILE: lmdeploy/pytorch/kernels/default/w8a8_kernels.py
  function per_channel_quant (line 5) | def per_channel_quant(x: torch.Tensor, dtype: torch.dtype):

FILE: lmdeploy/pytorch/kernels/dispatcher.py
  function _default_api (line 13) | def _default_api(*args, **kwargs):
  class ParamParser (line 18) | class ParamParser:
    method __init__ (line 20) | def __init__(self, param: inspect.Parameter) -> None:
    method name (line 23) | def name(self):
    method func_arg (line 27) | def func_arg(self):
    method func_input (line 44) | def func_input(self):
  class FunctionDispatcher (line 59) | class FunctionDispatcher:
    method __init__ (line 61) | def __init__(self, func_name: str):
    method device_callback (line 69) | def device_callback(self, context: DeviceContext):
    method load_func (line 73) | def load_func(self, device: str):
    method load_and_call (line 90) | def load_and_call(self, *args, **kwargs):
    method make_caller (line 98) | def make_caller(self, api: Callable = _default_api, globals=None):

FILE: lmdeploy/pytorch/kernels/dlinfer/activation.py
  function silu_and_mul (line 6) | def silu_and_mul(input_tensor: Tensor, ) -> Tensor:

FILE: lmdeploy/pytorch/kernels/dlinfer/apply_rotary_pos_emb.py
  function apply_rotary_pos_emb (line 8) | def apply_rotary_pos_emb(

FILE: lmdeploy/pytorch/kernels/dlinfer/awq_kernels.py
  function awq_linear (line 8) | def awq_linear(x: Tensor,

FILE: lmdeploy/pytorch/kernels/dlinfer/fill_kv_cache.py
  function fill_kv_cache (line 8) | def fill_kv_cache(

FILE: lmdeploy/pytorch/kernels/dlinfer/flash_attention.py
  function flash_attention_fwd (line 6) | def flash_attention_fwd(

FILE: lmdeploy/pytorch/kernels/dlinfer/fused_moe.py
  function fused_moe (line 8) | def fused_moe(

FILE: lmdeploy/pytorch/kernels/dlinfer/fused_rotary_emb.py
  function fused_rotary_emb (line 7) | def fused_rotary_emb(

FILE: lmdeploy/pytorch/kernels/dlinfer/linear.py
  function linear (line 8) | def linear(x: Tensor, weight: Tensor, bias: Optional[Tensor] = None, all...

FILE: lmdeploy/pytorch/kernels/dlinfer/moe_gating_topk_softmax.py
  function moe_gating_topk_softmax (line 9) | def moe_gating_topk_softmax(router_logits: Tensor, topk: int,

FILE: lmdeploy/pytorch/kernels/dlinfer/pagedattention.py
  function prefill_attention (line 8) | def prefill_attention(
  function paged_token_attention (line 77) | def paged_token_attention(
  function paged_attention_fwd (line 113) | def paged_attention_fwd(

FILE: lmdeploy/pytorch/kernels/dlinfer/rms_norm.py
  function rms_norm (line 6) | def rms_norm(hidden_states: Tensor, weight: Tensor, epsilon: float = 1e-...

FILE: lmdeploy/pytorch/kernels/dlinfer/w8a8_kernels.py
  function dynamic_quant (line 7) | def dynamic_quant(x: Tensor, quant_dtype: torch.dtype, quant_granularity...
  function linear_w8a8 (line 12) | def linear_w8a8(
  function rms_norm_w8a8 (line 29) | def rms_norm_w8a8(

FILE: lmdeploy/pytorch/messages.py
  class InputEmbeddings (line 32) | class InputEmbeddings:
    method move_position (line 38) | def move_position(self, offset: int = 0):
  class SamplingParam (line 46) | class SamplingParam:
    method from_gen_config (line 71) | def from_gen_config(cls, gen_config: GenerationConfig):
  class MessageStatus (line 156) | class MessageStatus(enum.Enum):
  class SequenceMeta (line 180) | class SequenceMeta:
  class SequenceManager (line 187) | class SequenceManager:
    method __init__ (line 190) | def __init__(self, seq_meta: SequenceMeta) -> None:
    method _new_seq_id (line 197) | def _new_seq_id(self):
    method get_all_sequences (line 202) | def get_all_sequences(self):
    method get_sequences (line 206) | def get_sequences(self, states: MessageStatus):
    method num_sequences (line 210) | def num_sequences(self, status: MessageStatus):
    method add_sequence (line 214) | def add_sequence(self, seq: 'SchedulerSequence'):
    method remove_sequence (line 222) | def remove_sequence(self, seq: 'SchedulerSequence'):
    method update_sequence_status (line 230) | def update_sequence_status(self, seq: 'SchedulerSequence', new_status:...
  function _to_ndarray (line 244) | def _to_ndarray(token_ids) -> np.ndarray:
  class SchedulerSession (line 255) | class SchedulerSession:
    method __init__ (line 258) | def __init__(self, session_id: int, seq_manager: SequenceManager, sche...
    method add_sequence (line 265) | def add_sequence(self,
    method remove_sequence (line 307) | def remove_sequence(self, seq: 'SchedulerSequence'):
  function _div_up (line 315) | def _div_up(x, n):
  function _round_up (line 320) | def _round_up(x, n):
  class HistoryEmbeddings (line 325) | class HistoryEmbeddings:
    method __init__ (line 328) | def __init__(self, embeddings: List[InputEmbeddings] = None):
    method append (line 333) | def append(self, embeddings: List[InputEmbeddings]):
    method clone (line 336) | def clone(self):
    method copy (line 340) | def copy(self):
    method get_step (line 343) | def get_step(self, step: int) -> int:
    method embeddings (line 359) | def embeddings(self):
    method __len__ (line 363) | def __len__(self):
    method __getitem__ (line 367) | def __getitem__(self, *args, **kwargs):
  class _HistoryDataBase (line 372) | class _HistoryDataBase:
    method __init__ (line 377) | def __init__(self, data: np.ndarray = None, dtype: np.dtype = np.int64):
    method _create_empty_array (line 388) | def _create_empty_array(self, dtype):
    method _get_pad_width (line 395) | def _get_pad_width(self, reserve_size: int):
    method reserve (line 402) | def reserve(self, size: int):
    method get_real (line 413) | def get_real(self):
    method resize (line 419) | def resize(self, size: int):
    method append (line 426) | def append(self, new_data: np.ndarray):
    method __setitem__ (line 439) | def __setitem__(self, *args, **kwargs):
    method __getitem__ (line 443) | def __getitem__(self, *args, **kwargs):
    method __len__ (line 447) | def __len__(self):
    method clone (line 451) | def clone(self):
    method copy (line 457) | def copy(self):
  class HistoryTokenIds (line 462) | class HistoryTokenIds(_HistoryDataBase):
    method __init__ (line 466) | def __init__(self, token_ids: np.ndarray = None, dtype: np.dtype = np....
    method _token_ids (line 470) | def _token_ids(self):
    method _token_ids (line 475) | def _token_ids(self, value):
  class HistoryRouterExperts (line 480) | class HistoryRouterExperts(_HistoryDataBase):
    method __init__ (line 485) | def __init__(self, expert_ids: np.ndarray = None, dtype: np.dtype = np...
    method _create_empty_array (line 488) | def _create_empty_array(self, dtype):
    method _get_pad_width (line 495) | def _get_pad_width(self, reserve_size: int):
  class HistoryLogits (line 500) | class HistoryLogits(_HistoryDataBase):
    method __init__ (line 505) | def __init__(self, logits: np.ndarray = None, dtype: np.dtype = np.int...
    method _create_empty_array (line 509) | def _create_empty_array(self, dtype):
    method _get_pad_width (line 516) | def _get_pad_width(self, reserve_size: int):
    method set_torch_dtype (line 520) | def set_torch_dtype(self, torch_dtype):
    method get_logits (line 524) | def get_logits(self):
    method clone (line 534) | def clone(self):
  class HistoryMultiModals (line 541) | class HistoryMultiModals:
    method __init__ (line 543) | def __init__(self, multimodals: MultiModalInputs = None):
    method get_datas (line 548) | def get_datas(self, start=0, end=-1):
    method add_inputs (line 562) | def add_inputs(self, input_mms: MultiModalInputs):
    method empty (line 570) | def empty(self):
    method update_multimodals (line 577) | def update_multimodals(input_mms: MultiModalInputs, prev_len: int):
  class UpdateTokenMode (line 586) | class UpdateTokenMode(enum.Enum):
  class SchedulerSequence (line 594) | class SchedulerSequence:
    method __post_init__ (line 626) | def __post_init__(self):
    method block_size (line 638) | def block_size(self) -> int:
    method history_image_num (line 643) | def history_image_num(self) -> int:
    method history_image_token_len (line 648) | def history_image_token_len(self) -> int:
    method session_id (line 653) | def session_id(self) -> int:
    method token_ids (line 658) | def token_ids(self) -> np.ndarray:
    method input_embeddings (line 665) | def input_embeddings(self) -> List[InputEmbeddings]:
    method history_ids (line 672) | def history_ids(self) -> np.ndarray:
    method all_ids (line 677) | def all_ids(self) -> np.ndarray:
    method valid_ids (line 682) | def valid_ids(self) -> np.ndarray:
    method generated_ids (line 687) | def generated_ids(self) -> np.ndarray:
    method return_routed_experts (line 693) | def return_routed_experts(self) -> bool:
    method routed_experts (line 697) | def routed_experts(self) -> np.ndarray:
    method append_routed_experts (line 707) | def append_routed_experts(self, routed_experts: Tensor | np.ndarray):
    method num_history_ids (line 718) | def num_history_ids(self):
    method num_token_ids (line 723) | def num_token_ids(self):
    method num_valid_ids (line 727) | def num_valid_ids(self):
    method num_images (line 731) | def num_images(self):
    method num_all_ids (line 735) | def num_all_ids(self):
    method num_blocks (line 740) | def num_blocks(self):
    method state (line 745) | def state(self) -> 'StateBase':
    method set_state (line 748) | def set_state(self, state: 'StateBase'):
    method status (line 753) | def status(self):
    method return_logits (line 757) | def return_logits(self):
    method logits (line 761) | def logits(self):
    method append_logits (line 765) | def append_logits(self, logits: Tensor | np.ndarray):
    method get_input_multimodals (line 776) | def get_input_multimodals(self):
    method record_event (line 782) | def record_event(
    method _update_embeddings (line 789) | def _update_embeddings(self, embeddings: List[InputEmbeddings]):
    method _update_multimodals (line 799) | def _update_multimodals(self, multimodals: MultiModalInputs):
    method update_token_ids (line 806) | def update_token_ids(self,
    method set_step (line 816) | def set_step(self, step: int):

FILE: lmdeploy/pytorch/model_inputs.py
  class DPMeta (line 22) | class DPMeta:
    method _gather_tp_sizes (line 27) | def _gather_tp_sizes(tp: int, seqlen: int, num_tokens: List[int], dist...
    method build (line 41) | def build(cls, seqlen: int, num_tokens: List[int]):
    method sync_tp_size (line 57) | def sync_tp_size(self, tp_size: int):
  class VisionModelInputs (line 63) | class VisionModelInputs:
    method to_device (line 71) | def to_device(self, device: str, non_blocking: bool = False):
    method get_inputs (line 98) | def get_inputs(self, history_lengths: torch.Tensor, seq_lengths: torch...
  class ModelInputsDelta (line 125) | class ModelInputsDelta:
    method seq_length (line 141) | def seq_length(self):
    method fill_tensors (line 146) | def fill_tensors(self):
    method to_device (line 153) | def to_device(self, device: str, non_blocking: bool = False):
    method log_info (line 166) | def log_info(self):
  class ModelInputs (line 174) | class ModelInputs:
    method step (line 197) | def step(self, input_ids: torch.Tensor, step_seqlens: torch.Tensor = N...
    method to_device (line 211) | def to_device(self, device: str, non_blocking: bool = False):
    method build_dp_meta (line 225) | def build_dp_meta(self, num_tokens: List[int]):
    method log_info (line 229) | def log_info(self):
  class StepContext (line 237) | class StepContext:
    method new (line 275) | def new(
    method get_mask_and_position_ids (line 343) | def get_mask_and_position_ids(cls, inputs: ModelInputs):
  class BuildModelContext (line 387) | class BuildModelContext:
  class StepContextManager (line 398) | class StepContextManager(CtxMgrBase[StepContext]):
    method __init__ (line 400) | def __init__(self, build_ctx: BuildModelContext = None):
    method build_context (line 406) | def build_context(
  class StepCtxMgrApi (line 427) | class StepCtxMgrApi(CtxMgrBase[StepContextManager]):
    method __init__ (line 430) | def __init__(self):

FILE: lmdeploy/pytorch/models/baichuan.py
  function _is_baichuan_13b (line 17) | def _is_baichuan_13b(config: Any):
  class BaichuanAttention (line 22) | class BaichuanAttention(nn.Module):
    method __init__ (line 25) | def __init__(self, config: Any, dtype: torch.dtype = None, device: tor...
    method forward (line 67) | def forward(
  class MLP (line 111) | class MLP(nn.Module):
    method __init__ (line 113) | def __init__(self, config: Any, dtype: torch.dtype = None, device: tor...
    method forward (line 139) | def forward(self, x):
  class DecoderLayer (line 146) | class DecoderLayer(nn.Module):
    method __init__ (line 149) | def __init__(self, config: Any, layer_idx: int, dtype: torch.dtype = N...
    method forward (line 174) | def forward(
  class BaichuanModel (line 205) | class BaichuanModel(nn.Module):
    method __init__ (line 208) | def __init__(self, config: Any, dtype: torch.dtype = None, device: tor...
    method forward (line 243) | def forward(
    method get_input_embeddings (line 283) | def get_input_embeddings(self):
  class BaichuanForCausalLM (line 288) | class BaichuanForCausalLM(nn.Module, CudaGraphMixin):
    method __init__ (line 298) | def __init__(self,
    method forward (line 315) | def forward(
    method get_logits (line 334) | def get_logits(self, hidden_states: torch.Tensor):
    method get_input_embeddings (line 338) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 342) | def prepare_inputs_for_generation(
    method load_weights (line 371) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/chatglm2.py
  class SelfAttention (line 26) | class SelfAttention(torch.nn.Module):
    method __init__ (line 32) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method _extract_rope (line 71) | def _extract_rope(states: torch.Tensor):
    method _fill_rope (line 79) | def _fill_rope(states: torch.Tensor, rope: torch.Tensor):
    method forward (line 87) | def forward(
  class MLP (line 134) | class MLP(nn.Module):
    method __init__ (line 137) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 165) | def forward(self, x):
  class GLMBlock (line 172) | class GLMBlock(torch.nn.Module):
    method __init__ (line 178) | def __init__(self,
    method forward (line 211) | def forward(
  class GLMTransformer (line 242) | class GLMTransformer(nn.Module):
    method __init__ (line 245) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method _get_layer (line 260) | def _get_layer(self, layer_number: int):
    method forward (line 264) | def forward(
  class Embedding (line 288) | class Embedding(nn.Module):
    method __init__ (line 291) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 298) | def forward(self, input_ids):
  class PatchEmbedding (line 307) | class PatchEmbedding(nn.Module):
    method __init__ (line 310) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 321) | def forward(self, images):
  class EVA2CLIPAttention (line 331) | class EVA2CLIPAttention(nn.Module):
    method __init__ (line 334) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 363) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class EVA2CLIPMLP (line 382) | class EVA2CLIPMLP(nn.Module):
    method __init__ (line 385) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 416) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class EVA2CLIPTransformerLayer (line 424) | class EVA2CLIPTransformerLayer(nn.Module):
    method __init__ (line 427) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 437) | def forward(self, hidden_states):
  class EVA2CLIPTransformer (line 448) | class EVA2CLIPTransformer(nn.Module):
    method __init__ (line 451) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 456) | def forward(self, hidden_states):
  class GLU (line 463) | class GLU(nn.Module):
    method __init__ (line 466) | def __init__(self,
    method forward (line 488) | def forward(self, x):
  class EVA2CLIPModel (line 497) | class EVA2CLIPModel(nn.Module):
    method __init__ (line 500) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 518) | def forward(self, images):
  class ChatGLMModel (line 539) | class ChatGLMModel(nn.Module):
    method __init__ (line 541) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 573) | def forward(
    method get_input_embeddings (line 611) | def get_input_embeddings(self):
  class ChatGLMForConditionalGeneration (line 616) | class ChatGLMForConditionalGeneration(nn.Module, DeployModelMixin, CudaG...
    method __init__ (line 619) | def __init__(self,
    method forward (line 632) | def forward(
    method get_logits (line 655) | def get_logits(self, hidden_states: torch.Tensor):
    method get_input_embeddings (line 659) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 663) | def prepare_inputs_for_generation(
    method _get_model_metas (line 708) | def _get_model_metas(self, context: StepContext):
    method update_model_metas (line 716) | def update_model_metas(self,
    method load_weights (line 793) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
    method get_input_processor (line 832) | def get_input_processor(self) -> BaseModelInputProcessor:
  class ChatGLMInputProcessor (line 837) | class ChatGLMInputProcessor(BaseModelInputProcessor):
    method __init__ (line 840) | def __init__(self, config: PretrainedConfig, dtype) -> None:
    method preprocess_input (line 852) | def preprocess_input(self,

FILE: lmdeploy/pytorch/models/cogvlm.py
  class VisionExpertAttention (line 25) | class VisionExpertAttention(nn.Module):
    method __init__ (line 28) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 92) | def forward(
  class MLP (line 160) | class MLP(nn.Module):
    method __init__ (line 163) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 193) | def forward(self, x):
  class VisionExpertMLP (line 200) | class VisionExpertMLP(nn.Module):
    method __init__ (line 203) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 210) | def forward(
  class CogVLMDecoderLayer (line 230) | class CogVLMDecoderLayer(nn.Module):
    method __init__ (line 233) | def __init__(self,
    method forward (line 262) | def forward(
  class PatchEmbedding (line 301) | class PatchEmbedding(nn.Module):
    method __init__ (line 304) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 315) | def forward(self, images):
  class EVA2CLIPAttention (line 325) | class EVA2CLIPAttention(nn.Module):
    method __init__ (line 328) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 357) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class EVA2CLIPMLP (line 376) | class EVA2CLIPMLP(nn.Module):
    method __init__ (line 379) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 410) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class EVA2CLIPTransformerLayer (line 418) | class EVA2CLIPTransformerLayer(nn.Module):
    method __init__ (line 421) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 431) | def forward(self, hidden_states):
  class EVA2CLIPTransformer (line 442) | class EVA2CLIPTransformer(nn.Module):
    method __init__ (line 445) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 450) | def forward(self, hidden_states):
  class GLU (line 457) | class GLU(nn.Module):
    method __init__ (line 460) | def __init__(self,
    method forward (line 482) | def forward(self, x):
  class EVA2CLIPModel (line 491) | class EVA2CLIPModel(nn.Module):
    method __init__ (line 494) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 515) | def forward(self, images):
  class CogVLMModel (line 536) | class CogVLMModel(nn.Module):
    method __init__ (line 539) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 574) | def forward(
    method get_input_embeddings (line 622) | def get_input_embeddings(self):
  class CogVLMForCausalLM (line 631) | class CogVLMForCausalLM(nn.Module, CudaGraphMixin, DeployModelMixin):
    method __init__ (line 641) | def __init__(self,
    method forward (line 660) | def forward(
    method get_logits (line 685) | def get_logits(self, hidden_states: torch.Tensor):
    method get_input_embeddings (line 689) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 693) | def prepare_inputs_for_generation(
    method load_weights (line 749) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
    method _get_model_metas (line 792) | def _get_model_metas(self, context: StepContext):
    method update_model_metas (line 800) | def update_model_metas(self,
    method get_input_processor (line 870) | def get_input_processor(self) -> BaseModelInputProcessor:
  class CogVLMInputProcessor (line 875) | class CogVLMInputProcessor(BaseModelInputProcessor):
    method __init__ (line 878) | def __init__(self, config: PretrainedConfig, dtype) -> None:
    method preprocess_input (line 890) | def preprocess_input(self, input_ids: List[int], input_multimodals=Non...

FILE: lmdeploy/pytorch/models/deepseek.py
  class DeepseekAttention (line 20) | class DeepseekAttention(nn.Module):
    method __init__ (line 23) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 62) | def forward(
  class DeepseekMoE (line 105) | class DeepseekMoE(nn.Module):
    method __init__ (line 108) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 159) | def forward(self, hidden_states: torch.Tensor):
  class DeepseekMLP (line 183) | class DeepseekMLP(nn.Module):
    method __init__ (line 186) | def __init__(self,
    method forward (line 223) | def forward(self, x):
  class DeepseekDecoderLayer (line 230) | class DeepseekDecoderLayer(nn.Module):
    method __init__ (line 233) | def __init__(self,
    method forward (line 260) | def forward(
  class DeepseekModel (line 291) | class DeepseekModel(nn.Module):
    method __init__ (line 294) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 317) | def forward(
    method get_input_embeddings (line 355) | def get_input_embeddings(self):
  class DeepseekForCausalLM (line 360) | class DeepseekForCausalLM(nn.Module, CudaGraphMixin):
    method __init__ (line 375) | def __init__(self,
    method forward (line 392) | def forward(
    method get_logits (line 411) | def get_logits(self, hidden_states: torch.Tensor):
    method get_input_embeddings (line 415) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 419) | def prepare_inputs_for_generation(
    method _load_weight_experts (line 448) | def _load_weight_experts(self, name: str, loaded_weight: torch.Tensor,...
    method load_weights (line 462) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/deepseek_mtp.py
  class DeepseekV2BMM (line 25) | class DeepseekV2BMM(nn.Module):
    method __init__ (line 28) | def __init__(self, batch: int, in_features: int, out_features: int, dt...
    method create_weight (line 42) | def create_weight(self, batch: int, in_features: int, out_features: in...
    method weight_loader (line 46) | def weight_loader(self, param: nn.Parameter, weight: torch.Tensor):
    method forward (line 50) | def forward(self, x: torch.Tensor, output: torch.Tensor):
  class DeepseekV2Attention (line 55) | class DeepseekV2Attention(DeepseekV2Attention):
    method __init__ (line 58) | def __init__(self, config: Any, dtype: torch.dtype = None, device: tor...
    method forward (line 161) | def forward(
  class DeepseekV2MoE (line 207) | class DeepseekV2MoE(nn.Module):
    method __init__ (line 210) | def __init__(self, config: Any, layer_idx, dtype: torch.dtype = None, ...
    method forward (line 247) | def forward(self, hidden_states: torch.Tensor):
  class DeepseekV2MLP (line 267) | class DeepseekV2MLP(nn.Module):
    method __init__ (line 270) | def __init__(self,
    method forward (line 306) | def forward(self, x):
  class DeepseekV2DecoderLayer (line 313) | class DeepseekV2DecoderLayer(DeepseekV2DecoderLayer):
    method __init__ (line 316) | def __init__(self, config: Any, layer_idx: int, dtype: torch.dtype = N...
  class SharedHead (line 343) | class SharedHead(nn.Module):
    method __init__ (line 346) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 352) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  function build_deepseek_rotary_embedding (line 356) | def build_deepseek_rotary_embedding(config: PretrainedConfig):
  class DeepSeekMultiTokenPredictorLayer (line 370) | class DeepSeekMultiTokenPredictorLayer(nn.Module):
    method __init__ (line 372) | def __init__(
    method forward (line 411) | def forward(
  class DeepSeekMultiTokenPredictor (line 447) | class DeepSeekMultiTokenPredictor(nn.Module):
    method __init__ (line 449) | def __init__(
    method forward (line 475) | def forward(
    method get_logits (line 498) | def get_logits(
  class DeepseekMTPModel (line 511) | class DeepseekMTPModel(nn.Module, CudaGraphMixin):
    method __init__ (line 513) | def __init__(
    method get_logits (line 535) | def get_logits(self, hidden_states: torch.Tensor, spec_step_idx: int =...
    method forward (line 539) | def forward(
    method make_buffers_cudagraph (line 558) | def make_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs):
    method fill_buffers_cudagraph (line 570) | def fill_buffers_cudagraph(self, graph_meta: CudaGraphMeta, input_ids:...
    method prepare_inputs_for_generation (line 583) | def prepare_inputs_for_generation(
    method _load_weight_experts (line 603) | def _load_weight_experts(self, name: str, loaded_weight: torch.Tensor,...
    method _load_weight_attention (line 617) | def _load_weight_attention(self, name: str, loaded_weight: torch.Tenso...
    method load_weights (line 710) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
    method _rewrite_spec_layer_name (line 774) | def _rewrite_spec_layer_name(self, spec_layer: int, name: str) -> str:

FILE: lmdeploy/pytorch/models/deepseek_v2.py
  class ExecType (line 29) | class ExecType(Enum):
  class BatchWorker (line 39) | class BatchWorker:
    method __init__ (line 41) | def __init__(self, tag: str, generator):
    method next (line 47) | def next(self):
    method done (line 59) | def done(self):
  function execute_batch (line 63) | def execute_batch(inputs: list, fn, delta_stages: int = 0, exec_type: Ex...
  function get_new_meta (line 177) | def get_new_meta(attn_metadata, start_idx: int, end_idx: int):
  function get_new_rotary_pos_emb (line 195) | def get_new_rotary_pos_emb(rotary_pos_emb, start_loc, end_loc):
  function get_new_input (line 201) | def get_new_input(hidden_states, rotary_pos_emb, past_key_values, residu...
  function get_split_flags (line 211) | def get_split_flags(attn_metadata, num=2):
  function split_input (line 254) | def split_input(hidden_states,
  function merge_output (line 309) | def merge_output(output_list):
  function yarn_get_mscale (line 321) | def yarn_get_mscale(scale=1, mscale=1):
  class DeepseekV2BMM (line 327) | class DeepseekV2BMM(nn.Module):
    method __init__ (line 330) | def __init__(self, batch: int, in_features: int, out_features: int, dt...
    method _update_batch (line 345) | def _update_batch(self, batch: int):
    method create_weight (line 351) | def create_weight(self, batch: int, in_features: int, out_features: in...
    method weight_loader (line 355) | def weight_loader(self, param: nn.Parameter, weight: torch.Tensor):
    method forward (line 361) | def forward(self, x: torch.Tensor, output: torch.Tensor):
  class DeepseekV2Attention (line 366) | class DeepseekV2Attention(nn.Module):
    method __init__ (line 369) | def __init__(self, config: Any, dtype: torch.dtype = None, device: tor...
    method _q_proj (line 472) | def _q_proj(self, hidden_states, num_heads: int, nope_size: int, pe_si...
    method _kv_proj (line 490) | def _kv_proj(self, hidden_states, nope_size: int):
    method _qkv_proj (line 502) | def _qkv_proj(self, hidden_states: torch.Tensor, num_heads: int):
    method forward (line 511) | def forward(
  class MoEGate (line 562) | class MoEGate(nn.Module):
    method __init__ (line 565) | def __init__(self,
    method _compute_scores (line 604) | def _compute_scores(self, logits: torch.Tensor):
    method _postprocess_topk_weight (line 615) | def _postprocess_topk_weight(self, topk_weight: torch.Tensor):
    method forward (line 625) | def forward(self, hidden_states: torch.Tensor):
  class DeepseekV2MoE (line 661) | class DeepseekV2MoE(nn.Module):
    method __init__ (line 664) | def __init__(self, config: Any, layer_idx, dtype: torch.dtype = None, ...
    method forward (line 720) | def forward(self, hidden_states: torch.Tensor):
  class DeepseekV2MLP (line 743) | class DeepseekV2MLP(nn.Module):
    method __init__ (line 746) | def __init__(self,
    method forward (line 798) | def forward(self, x):
  class DeepseekV2DecoderLayer (line 805) | class DeepseekV2DecoderLayer(nn.Module):
    method __init__ (line 808) | def __init__(self, config: Any, layer_idx: int, dtype: torch.dtype = N...
    method forward (line 836) | def forward(
    method forward_yield (line 866) | def forward_yield(
  class DeepseekV2Model (line 950) | class DeepseekV2Model(nn.Module):
    method __init__ (line 953) | def __init__(self, config: Any, dtype: torch.dtype = None, device: tor...
    method forward (line 987) | def forward(
    method forward_microbatch (line 1018) | def forward_microbatch(
    method forward_yieldlayers (line 1072) | def forward_yieldlayers(self,
    method get_input_embeddings (line 1092) | def get_input_embeddings(self):
  class DeepseekV2ForCausalLM (line 1097) | class DeepseekV2ForCausalLM(nn.Module, CudaGraphMixin):
    method __init__ (line 1100) | def __init__(self,
    method forward (line 1119) | def forward(
    method get_logits (line 1146) | def get_logits(self, hidden_states: torch.Tensor):
    method get_input_embeddings (line 1150) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 1154) | def prepare_inputs_for_generation(
    method _load_weight_experts (line 1173) | def _load_weight_experts(self, name: str, loaded_weight: torch.Tensor,...
    method _load_weight_attention (line 1187) | def _load_weight_attention(self, name: str, loaded_weight: torch.Tenso...
    method load_weights (line 1280) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/deepseek_v32.py
  function rotate_activation (line 21) | def rotate_activation(x: torch.Tensor) -> torch.Tensor:
  class LayerNorm (line 28) | class LayerNorm(nn.Module):
    method __init__ (line 31) | def __init__(self, dim: int, eps: float = 1e-6, device: torch.device =...
    method forward (line 40) | def forward(self, x: torch.Tensor):
  class Indexer (line 44) | class Indexer(nn.Module):
    method __init__ (line 46) | def __init__(self, config: Any, layer_idx: int, dtype: torch.dtype = N...
    method forward (line 87) | def forward(self,
  class DeepseekV32Attention (line 121) | class DeepseekV32Attention(DeepseekV2Attention):
    method __init__ (line 123) | def __init__(self, config: Any, layer_idx: int, dtype: torch.dtype = N...
    method _q_proj (line 229) | def _q_proj(self, hidden_states, num_heads: int, nope_size: int, pe_si...
    method _kv_proj (line 249) | def _kv_proj(self, hidden_states, nope_size: int):
    method _qkv_proj (line 261) | def _qkv_proj(self, hidden_states: torch.Tensor, num_heads: int):
    method forward (line 270) | def forward(
  class DeepseekV32DecoderLayer (line 320) | class DeepseekV32DecoderLayer(DeepseekV2DecoderLayer):
    method __init__ (line 322) | def __init__(self, config: Any, layer_idx: int, dtype: torch.dtype = N...
  class DeepseekV32Model (line 354) | class DeepseekV32Model(DeepseekV2Model):
    method __init__ (line 356) | def __init__(self, config: Any, dtype: torch.dtype = None, device: tor...
  class DeepseekV32ForCausalLM (line 393) | class DeepseekV32ForCausalLM(DeepseekV2ForCausalLM):
    method __init__ (line 395) | def __init__(self,

FILE: lmdeploy/pytorch/models/deepseek_vl2.py
  class MlpProjector (line 23) | class MlpProjector(nn.Module):
    method __init__ (line 25) | def __init__(self, cfg, dtype):
    method forward (line 68) | def forward(self, x):
  class DeepseekVLV2ForCausalLM (line 105) | class DeepseekVLV2ForCausalLM(nn.Module, CudaGraphMixin, DeployModelMixin):
    method __init__ (line 107) | def __init__(self,
    method _init_vision_module (line 154) | def _init_vision_module(
    method prepare_inputs_embeds (line 173) | def prepare_inputs_embeds(self,
    method forward (line 305) | def forward(
    method get_logits (line 333) | def get_logits(self, hidden_states: torch.Tensor):
    method get_input_embeddings (line 337) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 341) | def prepare_inputs_for_generation(
    method load_weights (line 385) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
    method get_input_processor (line 411) | def get_input_processor(self) -> BaseModelInputProcessor:
  class DeepSeekVLV2InputProcessor (line 416) | class DeepSeekVLV2InputProcessor(BaseModelInputProcessor):
    method __init__ (line 419) | def __init__(self, config: PretrainedConfig, dtype) -> None:
    method preprocess_input (line 425) | def preprocess_input(self,

FILE: lmdeploy/pytorch/models/gemma.py
  class GemmaAttention (line 21) | class GemmaAttention(nn.Module):
    method __init__ (line 24) | def __init__(self,
    method forward (line 96) | def forward(
    method naive_attn_with_masks (line 162) | def naive_attn_with_masks(
  class GemmaMLP (line 205) | class GemmaMLP(nn.Module):
    method __init__ (line 208) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 237) | def forward(self, x):
  class GemmaDecoderLayer (line 245) | class GemmaDecoderLayer(nn.Module):
    method __init__ (line 248) | def __init__(self,
    method forward (line 290) | def forward(
  class Gemma3TextScaledWordEmbedding (line 334) | class Gemma3TextScaledWordEmbedding(nn.Embedding):
    method __init__ (line 338) | def __init__(self,
    method forward (line 347) | def forward(self, input_ids: torch.Tensor):
  class GemmaModel (line 351) | class GemmaModel(nn.Module):
    method __init__ (line 354) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method build_rope_emb (line 386) | def build_rope_emb(self, config: PretrainedConfig):
    method forward (line 428) | def forward(
    method get_input_embeddings (line 478) | def get_input_embeddings(self):
  class GemmaForCausalLM (line 483) | class GemmaForCausalLM(nn.Module, CudaGraphMixin):
    method __init__ (line 498) | def __init__(self,
    method forward (line 516) | def forward(
    method get_logits (line 539) | def get_logits(self, hidden_states: torch.Tensor):
    method get_input_embeddings (line 548) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 552) | def prepare_inputs_for_generation(
    method update_weights (line 581) | def update_weights(self):
    method load_weights (line 585) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/gemma3_vl.py
  class Gemma3RMSNorm (line 20) | class Gemma3RMSNorm(nn.Module):
    method __init__ (line 22) | def __init__(self, dim: int, eps: float = 1e-6):
    method _norm (line 27) | def _norm(self, x):
    method forward (line 30) | def forward(self, x):
    method extra_repr (line 37) | def extra_repr(self):
  class Gemma3MultiModalProjector (line 41) | class Gemma3MultiModalProjector(nn.Module):
    method __init__ (line 43) | def __init__(self,
    method forward (line 62) | def forward(self, vision_outputs: torch.Tensor):
  class Gemma3VLInputProcessor (line 80) | class Gemma3VLInputProcessor(BaseModelInputProcessor):
    method __init__ (line 83) | def __init__(self, config: PretrainedConfig, dtype) -> None:
    method preprocess_input (line 94) | def preprocess_input(self,
  class Gemma3ForConditionalGeneration (line 124) | class Gemma3ForConditionalGeneration(nn.Module, CudaGraphMixin, DeployMo...
    method __init__ (line 126) | def __init__(self,
    method get_input_embeddings (line 141) | def get_input_embeddings(self):
    method get_logits (line 144) | def get_logits(self, hidden_states: torch.Tensor):
    method get_image_features (line 148) | def get_image_features(self, pixel_values: torch.Tensor):
    method forward (line 162) | def forward(
    method prepare_attn_masks (line 212) | def prepare_attn_masks(
    method prepare_inputs_for_generation (line 269) | def prepare_inputs_for_generation(
    method tie_weights (line 302) | def tie_weights(self):
    method load_weights (line 305) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
    method get_input_processor (line 337) | def get_input_processor(self) -> BaseModelInputProcessor:

FILE: lmdeploy/pytorch/models/glm4.py
  class Glm4Attention (line 17) | class Glm4Attention(nn.Module):
    method __init__ (line 19) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method _extract_rope (line 55) | def _extract_rope(states: torch.Tensor):
    method _fill_rope (line 63) | def _fill_rope(states: torch.Tensor, rope: torch.Tensor):
    method forward (line 71) | def forward(
  class Glm4MLP (line 119) | class Glm4MLP(nn.Module):
    method __init__ (line 121) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 147) | def forward(self, x):
  class Glm4DecoderLayer (line 154) | class Glm4DecoderLayer(nn.Module):
    method __init__ (line 156) | def __init__(self,
    method forward (line 199) | def forward(
  class Glm4Model (line 236) | class Glm4Model(nn.Module):
    method __init__ (line 238) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 261) | def forward(
  class Glm4ForCausalLM (line 300) | class Glm4ForCausalLM(nn.Module, CudaGraphMixin):
    method __init__ (line 303) | def __init__(self,
    method forward (line 320) | def forward(
    method get_logits (line 339) | def get_logits(self, hidden_states: torch.Tensor):
    method update_weights (line 343) | def update_weights(self):
    method get_input_embeddings (line 348) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 352) | def prepare_inputs_for_generation(
    method load_weights (line 381) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/glm4_1v.py
  function _apply_mrope_selection (line 24) | def _apply_mrope_selection(hidden_states: torch.Tensor, mrope_position_i...
  class Glm4vTextModel (line 45) | class Glm4vTextModel(nn.Module):
    method __init__ (line 47) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 71) | def forward(
  class Glm4VisionMLP (line 115) | class Glm4VisionMLP(nn.Module):
    method __init__ (line 118) | def __init__(self,
    method forward (line 148) | def forward(self, x):
  class Glm4vVisionPatchEmbed (line 153) | class Glm4vVisionPatchEmbed(nn.Module):
    method __init__ (line 155) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 170) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class Glm4vVisionRotaryEmbedding (line 178) | class Glm4vVisionRotaryEmbedding(nn.Module):
    method __init__ (line 181) | def __init__(self, dim: int, theta: float = 10000.0, device: torch.dev...
    method forward (line 186) | def forward(self, seqlen: int) -> torch.Tensor:
  class Glm4vVisionPatchMerger (line 192) | class Glm4vVisionPatchMerger(nn.Module):
    method __init__ (line 194) | def __init__(self,
    method forward (line 230) | def forward(self, hidden_state: torch.Tensor) -> torch.Tensor:
  class Glm4vVisionEmbeddings (line 236) | class Glm4vVisionEmbeddings(nn.Module):
    method __init__ (line 238) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 250) | def forward(self, embeddings, lengths, image_shapes, h_coords, w_coord...
  class Glm4vVisionAttention (line 321) | class Glm4vVisionAttention(nn.Module):
    method __init__ (line 324) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 363) | def forward(self, hidden_states: torch.Tensor, cu_seqlens: torch.Tensor,
  class Glm4vVisionBlock (line 390) | class Glm4vVisionBlock(nn.Module):
    method __init__ (line 392) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 399) | def forward(self,
  class Glm4vVisionModel (line 417) | class Glm4vVisionModel(nn.Module):
    method __init__ (line 420) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method rot_pos_emb (line 450) | def rot_pos_emb(self, grid_thw):
    method forward (line 480) | def forward(self, hidden_states: torch.Tensor, cu_seqlens: torch.Tenso...
  class Glm4vForConditionalGeneration (line 511) | class Glm4vForConditionalGeneration(nn.Module, DeployModelMixin, CudaGra...
    method __init__ (line 525) | def __init__(self,
    method forward (line 550) | def forward(
    method get_logits (line 590) | def get_logits(self, hidden_states: torch.Tensor):
    method update_weights (line 594) | def update_weights(self):
    method get_input_embeddings (line 599) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 603) | def prepare_inputs_for_generation(
    method rename_weight (line 665) | def rename_weight(cls, name: str) -> str:
    method load_weights (line 675) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
    method make_buffers_cudagraph (line 720) | def make_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs):
    method fill_buffers_cudagraph (line 731) | def fill_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs):
    method _get_model_metas (line 752) | def _get_model_metas(self, context: StepContext):
    method _update_model_meta_decoding (line 760) | def _update_model_meta_decoding(self, context: StepContext):
    method _get_multimodal_pos_ids (line 773) | def _get_multimodal_pos_ids(self, grid_thw: list, device: torch.device):
    method _update_model_meta_prefilling (line 784) | def _update_model_meta_prefilling(self, context: StepContext):
    method update_model_metas (line 828) | def update_model_metas(self,
    method get_input_processor (line 838) | def get_input_processor(self) -> BaseModelInputProcessor:
  class Glm4vInputProcessor (line 843) | class Glm4vInputProcessor(BaseModelInputProcessor):
    method __init__ (line 846) | def __init__(self, config: PretrainedConfig) -> None:
    method preprocess_input (line 849) | def preprocess_input(self,

FILE: lmdeploy/pytorch/models/glm4_moe.py
  class Glm4MoeAttention (line 19) | class Glm4MoeAttention(nn.Module):
    method __init__ (line 22) | def __init__(self,
    method forward (line 78) | def forward(
  class Glm4MoeMLP (line 126) | class Glm4MoeMLP(nn.Module):
    method __init__ (line 129) | def __init__(self,
    method forward (line 165) | def forward(self, x):
  class Glm4MoE (line 172) | class Glm4MoE(nn.Module):
    method __init__ (line 175) | def __init__(self,
    method forward (line 241) | def forward(self, hidden_states: torch.Tensor):
  class Glm4MoeDecoderLayer (line 268) | class Glm4MoeDecoderLayer(nn.Module):
    method __init__ (line 271) | def __init__(self,
    method forward (line 298) | def forward(
  class Glm4MoeModel (line 328) | class Glm4MoeModel(nn.Module):
    method __init__ (line 331) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method _build_rotary_embedding (line 354) | def _build_rotary_embedding(self, config: PretrainedConfig):
    method forward (line 358) | def forward(
  class Glm4MoeForCausalLM (line 397) | class Glm4MoeForCausalLM(nn.Module, CudaGraphMixin):
    method __init__ (line 412) | def __init__(self,
    method forward (line 437) | def forward(
    method get_logits (line 456) | def get_logits(self, hidden_states: torch.Tensor):
    method get_input_embeddings (line 460) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 464) | def prepare_inputs_for_generation(
    method _load_weight_experts (line 493) | def _load_weight_experts(self, name: str, loaded_weight: torch.Tensor,...
    method _load_weight_fused_experts (line 511) | def _load_weight_fused_experts(self, name: str, loaded_weight: torch.T...
    method load_weights (line 536) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/glm4moe_mtp.py
  class Glm4MoeMTPDecoderLayer (line 17) | class Glm4MoeMTPDecoderLayer(Glm4MoeDecoderLayer):
    method __init__ (line 20) | def __init__(self,
  class Glm4MoeMTPModel (line 49) | class Glm4MoeMTPModel(DeepseekMTPModel):
    method __init__ (line 64) | def __init__(self,
    method _load_weight_experts (line 78) | def _load_weight_experts(self, name: str, loaded_weight: torch.Tensor,...
    method load_weights (line 92) | def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/gpt_oss.py
  class GptOssAttention (line 22) | class GptOssAttention(nn.Module):
    method __init__ (line 25) | def __init__(self,
    method build_sinks (line 84) | def build_sinks(cls, config: PretrainedConfig, device):
    method weight_loader_sinks (line 97) | def weight_loader_sinks(cls, param: nn.Parameter, loaded_weight: torch...
    method forward (line 104) | def forward(
  class GateupAct (line 147) | class GateupAct:
    method __init__ (line 149) | def __init__(self, limit: float = 7.0, alpha: float = 1.702):
    method _impl (line 154) | def _impl(self, gateup: torch.Tensor) -> torch.Tensor:
    method build (line 164) | def build(limit: float, alpha: float):
    method _try_compile (line 167) | def _try_compile(self, gateup: torch.Tensor) -> Callable:
    method __call__ (line 175) | def __call__(self, gateup: torch.Tensor) -> torch.Tensor:
  class GptOssExperts (line 183) | class GptOssExperts(nn.Module):
    method __init__ (line 186) | def __init__(self,
    method forward (line 218) | def forward(self, hidden_states: torch.Tensor, router_indices, routing...
  class GptOssTopKRouter (line 231) | class GptOssTopKRouter(nn.Module):
    method __init__ (line 234) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 242) | def forward(self, hidden_states):
  class GptOssMLP (line 251) | class GptOssMLP(nn.Module):
    method __init__ (line 254) | def __init__(self,
    method forward (line 264) | def forward(self, hidden_states, all_routed_experts: torch.Tensor = No...
  class GptOssDecoderLayer (line 272) | class GptOssDecoderLayer(nn.Module):
    method __init__ (line 275) | def __init__(self,
    method forward (line 306) | def forward(
  class GptOssModel (line 338) | class GptOssModel(nn.Module):
    method __init__ (line 340) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 363) | def forward(
    method get_input_embeddings (line 403) | def get_input_embeddings(self):
  class GptOssForCausalLM (line 408) | class GptOssForCausalLM(nn.Module, DeployModelMixinV1, CudaGraphMixin):
    method __init__ (line 419) | def __init__(self,
    method forward (line 436) | def forward(
    method get_input_embeddings (line 469) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 473) | def prepare_inputs_for_generation(
    method _load_weight_experts_gate_up (line 502) | def _load_weight_experts_gate_up(self, name: str, loaded_weight: torch...
    method _load_weight_experts_down (line 520) | def _load_weight_experts_down(self, name: str, loaded_weight: torch.Te...
    method _load_weight_experts (line 535) | def _load_weight_experts(self, name: str, loaded_weight: torch.Tensor,...
    method load_weights (line 543) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/internlm.py
  class InternLMAttention (line 18) | class InternLMAttention(nn.Module):
    method __init__ (line 21) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 60) | def forward(
  class InternLMMLP (line 103) | class InternLMMLP(nn.Module):
    method __init__ (line 106) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 132) | def forward(self, x):
  class InternLMDecoderLayer (line 139) | class InternLMDecoderLayer(nn.Module):
    method __init__ (line 142) | def __init__(self,
    method forward (line 171) | def forward(
  class InternLMModel (line 202) | class InternLMModel(nn.Module):
    method __init__ (line 205) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 248) | def forward(
    method get_input_embeddings (line 286) | def get_input_embeddings(self):
  class InternLMForCausalLM (line 291) | class InternLMForCausalLM(nn.Module, CudaGraphMixin):
    method __init__ (line 306) | def __init__(self,
    method forward (line 323) | def forward(
    method get_logits (line 342) | def get_logits(self, hidden_states: torch.Tensor):
    method get_input_embeddings (line 346) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 350) | def prepare_inputs_for_generation(
    method load_weights (line 379) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/internlm2.py
  class InternLM2Attention (line 18) | class InternLM2Attention(nn.Module):
    method __init__ (line 21) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 61) | def forward(
  class InternLM2MLP (line 104) | class InternLM2MLP(nn.Module):
    method __init__ (line 107) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 133) | def forward(self, x):
  class InternLM2DecoderLayer (line 140) | class InternLM2DecoderLayer(nn.Module):
    method __init__ (line 143) | def __init__(self,
    method forward (line 172) | def forward(
  class InternLM2Model (line 203) | class InternLM2Model(nn.Module):
    method __init__ (line 206) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 230) | def forward(
    method get_input_embeddings (line 268) | def get_input_embeddings(self):
  class InternLM2ForCausalLM (line 273) | class InternLM2ForCausalLM(nn.Module, DeployModelMixinV1, CudaGraphMixin):
    method __init__ (line 283) | def __init__(self,
    method forward (line 297) | def forward(
    method get_input_embeddings (line 316) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 320) | def prepare_inputs_for_generation(
    method load_lora_weights (line 349) | def load_lora_weights(self, weights: Iterable[Tuple[str, torch.Tensor]...
    method load_weights (line 373) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/internlm2_reward.py
  class InternLM2ForRewardModel (line 17) | class InternLM2ForRewardModel(nn.Module, CudaGraphMixin):
    method __init__ (line 27) | def __init__(self,
    method forward (line 40) | def forward(
    method get_logits (line 59) | def get_logits(self, hidden_states: torch.Tensor):
    method get_input_embeddings (line 63) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 67) | def prepare_inputs_for_generation(
    method load_lora_weights (line 92) | def load_lora_weights(self, weights: Iterable[Tuple[str, torch.Tensor]...
    method load_weights (line 116) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/internlm2_ve.py
  class InternLM2VEDecoderLayer (line 18) | class InternLM2VEDecoderLayer(nn.Module):
    method __init__ (line 21) | def __init__(self,
    method forward (line 54) | def forward(
  class InternLM2VEModel (line 94) | class InternLM2VEModel(nn.Module):
    method __init__ (line 97) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 141) | def forward(
    method get_input_embeddings (line 183) | def get_input_embeddings(self):
  class InternLM2VEForCausalLM (line 188) | class InternLM2VEForCausalLM(nn.Module, CudaGraphMixin):
    method __init__ (line 198) | def __init__(self,
    method forward (line 215) | def forward(
    method get_logits (line 238) | def get_logits(self, hidden_states: torch.Tensor):
    method support_cuda_graph (line 242) | def support_cuda_graph(
    method get_input_embeddings (line 256) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 260) | def prepare_inputs_for_generation(
    method load_weights (line 289) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/internlm3.py
  class InternLM3Attention (line 18) | class InternLM3Attention(nn.Module):
    method __init__ (line 21) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 62) | def forward(
  class InternLM3MLP (line 105) | class InternLM3MLP(nn.Module):
    method __init__ (line 108) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 135) | def forward(self, x):
  class InternLM3DecoderLayer (line 142) | class InternLM3DecoderLayer(nn.Module):
    method __init__ (line 145) | def __init__(self,
    method forward (line 174) | def forward(
  class InternLM3Model (line 205) | class InternLM3Model(nn.Module):
    method __init__ (line 208) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 233) | def forward(
    method get_input_embeddings (line 271) | def get_input_embeddings(self):
  class InternLM3ForCausalLM (line 276) | class InternLM3ForCausalLM(nn.Module, DeployModelMixinV1, CudaGraphMixin):
    method __init__ (line 291) | def __init__(self,
    method forward (line 304) | def forward(
    method get_input_embeddings (line 323) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 327) | def prepare_inputs_for_generation(
    method load_weights (line 356) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/interns1_pro.py
  class InternS1ProForConditionalGeneration (line 22) | class InternS1ProForConditionalGeneration(nn.Module, DeployModelMixinV1,...
    method __init__ (line 37) | def __init__(self,
    method forward (line 80) | def forward(
    method get_input_embeddings (line 148) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 152) | def prepare_inputs_for_generation(
    method rename_weight (line 236) | def rename_weight(cls, name: str) -> str:
    method _load_weight_experts (line 246) | def _load_weight_experts(self, name: str, loaded_weight: torch.Tensor,...
    method _load_weight_fused_experts (line 262) | def _load_weight_fused_experts(self, name: str, loaded_weight: torch.T...
    method load_weights (line 286) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
    method get_input_processor (line 360) | def get_input_processor(self) -> BaseModelInputProcessor:
  class InternS1ProInputProcessor (line 365) | class InternS1ProInputProcessor(BaseModelInputProcessor):
    method __init__ (line 368) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype) -> None:
    method _make_image_mm_data (line 372) | def _make_image_mm_data(self, input_mm: Dict[str, Any]) -> MultiModalD...
    method _make_video_mm_data (line 390) | def _make_video_mm_data(self, input_mm: Dict[str, Any]) -> MultiModalD...
    method _make_time_series_mm_data (line 411) | def _make_time_series_mm_data(self, input_mm: Dict[str, Any]) -> Multi...
    method preprocess_input (line 429) | def preprocess_input(self,

FILE: lmdeploy/pytorch/models/interns1_pro_ts.py
  class InternS1ProTimeSeriesEncoder (line 17) | class InternS1ProTimeSeriesEncoder(nn.Module):
    method __init__ (line 19) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method _make_causal_mask (line 58) | def _make_causal_mask(self,
    method _prepare_decoder_attention_mask (line 74) | def _prepare_decoder_attention_mask(self, input_shape, inputs_embeds, ...
    method forward (line 89) | def forward(self, input_features):
  class InternS1ProTimeSeriesConcatSubsampling (line 131) | class InternS1ProTimeSeriesConcatSubsampling(nn.Module):
    method __init__ (line 133) | def __init__(self, in_channels: int, concat_size: int):
    method forward (line 138) | def forward(self, ts_signals: torch.Tensor, ts_lens: torch.Tensor):
  class InternS1ProTimeSeriesFixPositionalEncoding (line 148) | class InternS1ProTimeSeriesFixPositionalEncoding(nn.Module):
    method __init__ (line 150) | def __init__(self, d_model, max_len=20000, dtype: torch.dtype = None, ...
    method forward (line 161) | def forward(self, x):
  class InternS1ProTimeSeriesMultiChannelAdaptiveSubsampling (line 167) | class InternS1ProTimeSeriesMultiChannelAdaptiveSubsampling(nn.Module):
    method __init__ (line 169) | def __init__(self,
    method forward (line 188) | def forward(self, inputs, input_lens, sr):
    method forward_encoder (line 222) | def forward_encoder(self, x):
  class InternS1ProTimeSeriesProjector (line 239) | class InternS1ProTimeSeriesProjector(nn.Module):
    method __init__ (line 241) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 256) | def forward(self, ts_features):
  class InternS1ProTimeSeriesModel (line 264) | class InternS1ProTimeSeriesModel(nn.Module):
    method __init__ (line 266) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 273) | def forward(

FILE: lmdeploy/pytorch/models/internvl.py
  class Gating (line 25) | class Gating(nn.Module):
    method __init__ (line 27) | def __init__(self, hidden_size=2048, expansion_factor=4, dtype=None, d...
    method forward (line 52) | def forward(self, x):
  class CrossAttentionPooling (line 63) | class CrossAttentionPooling(nn.Module):
    method __init__ (line 65) | def __init__(self, dim, num_heads=16, dtype=None, device=None):
    method forward (line 97) | def forward(self, batched_tokens: list[torch.Tensor]):
  class InternVisionEmbeddings (line 136) | class InternVisionEmbeddings(nn.Module):
    method __init__ (line 139) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method _get_pos_embed (line 161) | def _get_pos_embed(self, pos_embed, H, W):
    method forward (line 169) | def forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor:
  function pre_rms_norm (line 191) | def pre_rms_norm(q: torch.Tensor, k: torch.Tensor) -> torch.Tensor:
  function post_rms_norm (line 202) | def post_rms_norm(q: torch.Tensor, k: torch.Tensor, weight_q: torch.Tens...
  class InternAttention (line 216) | class InternAttention(nn.Module):
    method __init__ (line 219) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method pre_rms_norm (line 270) | def pre_rms_norm(self, q: torch.Tensor, k: torch.Tensor) -> torch.Tensor:
    method post_rms_norm (line 274) | def post_rms_norm(self, q: torch.Tensor, k: torch.Tensor, variance: to...
    method qkv_norm (line 280) | def qkv_norm(self, q: torch.Tensor, k: torch.Tensor) -> Tuple[torch.Te...
    method forward (line 302) | def forward(self, hidden_states):
  class InternMLP (line 325) | class InternMLP(nn.Module):
    method __init__ (line 328) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 357) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class InternVisionEncoderLayer (line 364) | class InternVisionEncoderLayer(nn.Module):
    method __init__ (line 367) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method _attn (line 383) | def _attn(self, hidden_states):
    method _mlp (line 388) | def _mlp(self, hidden_states):
    method forward (line 392) | def forward(
  class InternVisionEncoder (line 401) | class InternVisionEncoder(nn.Module):
    method __init__ (line 404) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 410) | def forward(
  class InternVisionModel (line 423) | class InternVisionModel(nn.Module):
    method __init__ (line 426) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 433) | def forward(
  class InternVLChatModel (line 447) | class InternVLChatModel(nn.Module, DeployModelMixinV1, CudaGraphMixin):
    method __init__ (line 449) | def __init__(self,
    method compile_model (line 515) | def compile_model(self):
    method _mark_dynamic_once (line 532) | def _mark_dynamic_once(self, pixel_values, dims):
    method pixel_shuffle (line 540) | def pixel_shuffle(self, x, scale_factor=0.5):
    method extract_feature (line 552) | def extract_feature(self, pixel_values):
    method compress_visual_tokens_in_sentence (line 569) | def compress_visual_tokens_in_sentence(
    method get_image_num_per_sample (line 627) | def get_image_num_per_sample(self, input_ids: torch.Tensor, img_contex...
    method split_and_merge (line 642) | def split_and_merge(self, features: torch.Tensor, split_sizes: torch.T...
    method extract_feature_flash (line 657) | def extract_feature_flash(self, pixel_values, lengths):
    method extract_and_compress (line 685) | def extract_and_compress(self, pixel_values: torch.Tensor, input_ids: ...
    method update_forward_inputs (line 716) | def update_forward_inputs(self, input_ids: torch.Tensor, new_seqlens: ...
    method forward (line 757) | def forward(
    method get_input_embeddings (line 805) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 809) | def prepare_inputs_for_generation(
    method load_lora_weights (line 924) | def load_lora_weights(self, weights: Iterable[Tuple[str, torch.Tensor]...
    method load_weights (line 934) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
    method get_input_processor (line 959) | def get_input_processor(self) -> BaseModelInputProcessor:
  class InternVLInputProcessor (line 964) | class InternVLInputProcessor(BaseModelInputProcessor):
    method __init__ (line 967) | def __init__(self, config: PretrainedConfig, dtype) -> None:
    method preprocess_input (line 978) | def preprocess_input(self,

FILE: lmdeploy/pytorch/models/internvl3_hf.py
  function pre_rms_norm (line 27) | def pre_rms_norm(q: torch.Tensor, k: torch.Tensor) -> torch.Tensor:
  function post_rms_norm (line 38) | def post_rms_norm(q: torch.Tensor, k: torch.Tensor, weight_q: torch.Tens...
  class InternVLVisionPatchEmbeddings (line 52) | class InternVLVisionPatchEmbeddings(nn.Module):
    method __init__ (line 58) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 78) | def forward(self, pixel_values: torch.Tensor) -> torch.Tensor:
  class InternVLVisionEmbeddings (line 90) | class InternVLVisionEmbeddings(nn.Module):
    method __init__ (line 93) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method interpolate_pos_encoding (line 115) | def interpolate_pos_encoding(self, embeddings: torch.Tensor, height: i...
    method forward (line 140) | def forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor:
  class InternVLVisionAttention (line 158) | class InternVLVisionAttention(nn.Module):
    method __init__ (line 161) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method pre_rms_norm (line 212) | def pre_rms_norm(self, q: torch.Tensor, k: torch.Tensor) -> torch.Tensor:
    method post_rms_norm (line 216) | def post_rms_norm(self, q: torch.Tensor, k: torch.Tensor, variance: to...
    method qkv_norm (line 221) | def qkv_norm(self, q: torch.Tensor, k: torch.Tensor) -> Tuple[torch.Te...
    method forward (line 243) | def forward(self, hidden_states):
  class InternVLVisionMLP (line 266) | class InternVLVisionMLP(nn.Module):
    method __init__ (line 269) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 298) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class InternVLVisionLayer (line 305) | class InternVLVisionLayer(nn.Module):
    method __init__ (line 308) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method _attn (line 330) | def _attn(self, hidden_states):
    method _mlp (line 336) | def _mlp(self, hidden_states):
    method forward (line 341) | def forward(
  class InternVLVisionEncoder (line 350) | class InternVLVisionEncoder(nn.Module):
    method __init__ (line 353) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 359) | def forward(
  class InternVLVisionModel (line 372) | class InternVLVisionModel(nn.Module):
    method __init__ (line 375) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method get_input_embeddings (line 385) | def get_input_embeddings(self):
    method forward (line 388) | def forward(
  class InternVLMultiModalProjector (line 403) | class InternVLMultiModalProjector(nn.Module):
    method __init__ (line 405) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 434) | def forward(self, image_features):
  class InternVLForConditionalGeneration (line 442) | class InternVLForConditionalGeneration(nn.Module, DeployModelMixinV1, Cu...
    method __init__ (line 444) | def __init__(self,
    method compile_model (line 464) | def compile_model(self):
    method _mark_dynamic_once (line 481) | def _mark_dynamic_once(self, pixel_values, dims):
    method get_input_embeddings (line 489) | def get_input_embeddings(self):
    method get_image_features (line 493) | def get_image_features(
    method pixel_shuffle (line 539) | def pixel_shuffle(self, vision_features: torch.Tensor, scale_factor: f...
    method forward (line 573) | def forward(
    method prepare_inputs_for_generation (line 611) | def prepare_inputs_for_generation(
    method load_lora_weights (line 656) | def load_lora_weights(self, weights: Iterable[Tuple[str, torch.Tensor]...
    method rename_weight (line 667) | def rename_weight(cls, name: str) -> str:
    method load_weights (line 677) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
    method get_input_processor (line 710) | def get_input_processor(self) -> BaseModelInputProcessor:
  class InternVLProcessor (line 715) | class InternVLProcessor(BaseModelInputProcessor):
    method __init__ (line 718) | def __init__(self, config: PretrainedConfig, dtype) -> None:
    method preprocess_input (line 722) | def preprocess_input(self,

FILE: lmdeploy/pytorch/models/internvl_patch.py
  class InternVisionEmbeddings (line 10) | class InternVisionEmbeddings(nn.Module):
    method __init__ (line 13) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method _get_pos_embed (line 35) | def _get_pos_embed(self, pos_embed, H, W):
    method forward (line 43) | def forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor:
  class InternVisionPatchModel (line 58) | class InternVisionPatchModel(nn.Module):
    method __init__ (line 61) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 66) | def forward(

FILE: lmdeploy/pytorch/models/llama.py
  class LlamaAttention (line 18) | class LlamaAttention(nn.Module):
    method __init__ (line 21) | def __init__(self, config: LlamaConfig, dtype: torch.dtype = None, dev...
    method forward (line 63) | def forward(
  class LlamaMLP (line 106) | class LlamaMLP(nn.Module):
    method __init__ (line 109) | def __init__(self, config: LlamaConfig, dtype: torch.dtype = None, dev...
    method forward (line 136) | def forward(self, x):
  class LlamaDecoderLayer (line 143) | class LlamaDecoderLayer(nn.Module):
    method __init__ (line 146) | def __init__(self,
    method forward (line 176) | def forward(
  class LlamaModel (line 207) | class LlamaModel(nn.Module):
    method __init__ (line 210) | def __init__(self, config: LlamaConfig, dtype: torch.dtype = None, dev...
    method forward (line 233) | def forward(
    method get_input_embeddings (line 278) | def get_input_embeddings(self):
  class LlamaForCausalLM (line 283) | class LlamaForCausalLM(nn.Module, CudaGraphMixin):
    method __init__ (line 298) | def __init__(self,
    method forward (line 316) | def forward(
    method update_weights (line 335) | def update_weights(self):
    method get_logits (line 340) | def get_logits(self, hidden_states: torch.Tensor):
    method get_input_embeddings (line 345) | def get_input_embeddings(self):
    method get_outputs_cudagraph (line 349) | def get_outputs_cudagraph(self, output_buffers: Dict[str, torch.Tensor...
    method prepare_inputs_for_generation (line 358) | def prepare_inputs_for_generation(
    method load_weights (line 387) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/llama4.py
  class Llama4TextAttention (line 22) | class Llama4TextAttention(nn.Module):
    method __init__ (line 25) | def __init__(self,
    method forward (line 77) | def forward(
  class Llama4TextMLP (line 126) | class Llama4TextMLP(nn.Module):
    method __init__ (line 129) | def __init__(self,
    method forward (line 168) | def forward(self, x):
  class Llama4TextMoe (line 175) | class Llama4TextMoe(nn.Module):
    method __init__ (line 178) | def __init__(self, config: Llama4TextConfig, dtype: torch.dtype = None...
    method forward (line 212) | def forward(self, hidden_states: torch.Tensor):
  class Llama4TextDecoderLayer (line 245) | class Llama4TextDecoderLayer(nn.Module):
    method __init__ (line 248) | def __init__(self,
    method forward (line 271) | def forward(
  class Llama4TextModel (line 303) | class Llama4TextModel(nn.Module):
    method __init__ (line 306) | def __init__(self, config: Llama4TextConfig, dtype: torch.dtype = None...
    method build_llama4_rotary_embedding (line 326) | def build_llama4_rotary_embedding(config: Llama4TextConfig):
    method forward (line 330) | def forward(
  class Llama4ForCausalLM (line 363) | class Llama4ForCausalLM(nn.Module):
    method __init__ (line 365) | def __init__(self,
    method forward (line 381) | def forward(
    method get_input_embeddings (line 400) | def get_input_embeddings(self):
    method get_logits (line 404) | def get_logits(self, hidden_states: torch.Tensor):
  class Llama4MultiModalProjector (line 409) | class Llama4MultiModalProjector(nn.Module):
    method __init__ (line 411) | def __init__(self, config: Llama4Config, dtype: torch.dtype = None, de...
    method forward (line 421) | def forward(self, image_features):
  class Llama4UnfoldConvolution (line 427) | class Llama4UnfoldConvolution(nn.Module):
    method __init__ (line 430) | def __init__(self, config: Llama4VisionConfig, dtype: torch.dtype = No...
    method forward (line 444) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class Llama4VisionRotaryEmbedding (line 452) | class Llama4VisionRotaryEmbedding(nn.Module):
    method __init__ (line 454) | def __init__(self, config: Llama4VisionConfig, dtype: torch.dtype = No...
    method forward (line 471) | def forward(self, hidden_states):
  function reshape_for_broadcast (line 475) | def reshape_for_broadcast(freqs_ci: torch.Tensor, query: torch.Tensor):
  function vision_apply_rotary_emb (line 481) | def vision_apply_rotary_emb(
  class Llama4VisionAttention (line 495) | class Llama4VisionAttention(nn.Module):
    method __init__ (line 498) | def __init__(self, config: Llama4VisionConfig, dtype: torch.dtype = No...
    method forward (line 525) | def forward(
  class Llama4VisionMLP (line 565) | class Llama4VisionMLP(nn.Module):
    method __init__ (line 568) | def __init__(self, config: Llama4VisionConfig, dtype: torch.dtype = No...
    method forward (line 585) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class Llama4VisionEncoderLayer (line 593) | class Llama4VisionEncoderLayer(nn.Module):
    method __init__ (line 596) | def __init__(self, config: Llama4VisionConfig, dtype: torch.dtype = No...
    method forward (line 606) | def forward(
  class Llama4VisionEncoder (line 632) | class Llama4VisionEncoder(nn.Module):
    method __init__ (line 635) | def __init__(self, config: Llama4VisionConfig, dtype: torch.dtype = No...
    method forward (line 641) | def forward(
  function pixel_shuffle (line 655) | def pixel_shuffle(input_tensor: torch.Tensor, shuffle_ratio: int):
  class Llama4VisionMLP2 (line 675) | class Llama4VisionMLP2(torch.nn.Module):
    method __init__ (line 677) | def __init__(self, config: Llama4VisionConfig, dtype: torch.dtype = No...
    method forward (line 695) | def forward(self, hidden_states):
  class Llama4VisionPixelShuffleMLP (line 702) | class Llama4VisionPixelShuffleMLP(nn.Module):
    method __init__ (line 704) | def __init__(self, config: Llama4VisionConfig, dtype: torch.dtype = No...
    method forward (line 711) | def forward(self, encoded_patches: torch.Tensor) -> torch.Tensor:
  class Llama4VisionModel (line 716) | class Llama4VisionModel(nn.Module):
    method __init__ (line 719) | def __init__(self, config: Llama4VisionConfig, dtype: torch.dtype = No...
    method get_input_embeddings (line 745) | def get_input_embeddings(self):
    method forward (line 750) | def forward(
  class Llama4ForConditionalGeneration (line 796) | class Llama4ForConditionalGeneration(nn.Module, CudaGraphMixin):
    method __init__ (line 798) | def __init__(self,
    method _update_quant_config (line 818) | def _update_quant_config(config: Llama4Config):
    method get_image_features (line 834) | def get_image_features(
    method get_input_embeddings (line 844) | def get_input_embeddings(self):
    method forward (line 848) | def forward(
    method get_logits (line 879) | def get_logits(self, hidden_states: torch.Tensor):
    method prepare_inputs_for_generation (line 883) | def prepare_inputs_for_generation(
    method load_weights (line 921) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
    method get_input_processor (line 1004) | def get_input_processor(self) -> BaseModelInputProcessor:
  class Llama4InputProcessor (line 1009) | class Llama4InputProcessor(BaseModelInputProcessor):
    method __init__ (line 1012) | def __init__(self, config: Llama4Config, dtype) -> None:
    method preprocess_input (line 1018) | def preprocess_input(self,

FILE: lmdeploy/pytorch/models/llama_eagle.py
  class EagleLlamaDecoderLayer (line 17) | class EagleLlamaDecoderLayer(LlamaDecoderLayer):
    method __init__ (line 19) | def __init__(self,
  class EagleLlamaModel (line 33) | class EagleLlamaModel(nn.Module):
    method __init__ (line 35) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 62) | def forward(
    method get_input_embeddings (line 98) | def get_input_embeddings(self):
  class EagleLlamaForCausalLM (line 103) | class EagleLlamaForCausalLM(nn.Module, CudaGraphMixin):
    method __init__ (line 117) | def __init__(self, config, ctx_mgr, dtype=None, device=None):
    method forward (line 125) | def forward(
    method prepare_inputs_for_generation (line 146) | def prepare_inputs_for_generation(
    method make_buffers_cudagraph (line 166) | def make_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs):
    method fill_buffers_cudagraph (line 178) | def fill_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs):
    method update_weights (line 199) | def update_weights(self):
    method get_input_embeddings (line 204) | def get_input_embeddings(self):
    method load_weights (line 208) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/llama_eagle3.py
  class Eagle3LlamaDecoderLayer (line 18) | class Eagle3LlamaDecoderLayer(LlamaDecoderLayer):
    method __init__ (line 21) | def __init__(self,
    method forward (line 50) | def forward(
  class Eagle3LlamaModel (line 80) | class Eagle3LlamaModel(nn.Module):
    method __init__ (line 82) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 109) | def forward(
    method get_input_embeddings (line 146) | def get_input_embeddings(self):
  class Eagle3LlamaForCausalLM (line 151) | class Eagle3LlamaForCausalLM(nn.Module, CudaGraphMixin):
    method __init__ (line 165) | def __init__(self, config, ctx_mgr, dtype=None, device=None):
    method forward (line 188) | def forward(
    method prepare_inputs_for_generation (line 209) | def prepare_inputs_for_generation(
    method get_logits (line 229) | def get_logits(self, hidden_states: torch.Tensor):
    method make_buffers_cudagraph (line 234) | def make_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs):
    method fill_buffers_cudagraph (line 248) | def fill_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs):
    method get_outputs_cudagraph (line 265) | def get_outputs_cudagraph(self, output_buffers: Dict[str, torch.Tensor...
    method get_input_embeddings (line 273) | def get_input_embeddings(self):
    method load_weights (line 277) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/llava.py
  class LlavaMultiModalProjector (line 23) | class LlavaMultiModalProjector(nn.Module):
    method __init__ (line 25) | def __init__(self, config: LlavaConfig, dtype: torch.dtype = None, dev...
    method forward (line 41) | def forward(self, image_features):
  class CLIPVisionEmbeddings (line 48) | class CLIPVisionEmbeddings(nn.Module):
    method __init__ (line 51) | def __init__(self, config, dtype: torch.dtype = None, device: torch.de...
    method interpolate_pos_encoding (line 82) | def interpolate_pos_encoding(self, embeddings: torch.Tensor, height: i...
    method forward (line 123) | def forward(self, pixel_values: torch.FloatTensor, interpolate_pos_enc...
  class CLIPAttention (line 141) | class CLIPAttention(nn.Module):
    method __init__ (line 144) | def __init__(self, config, dtype: torch.dtype = None, device: torch.de...
    method forward (line 174) | def forward(
  class CLIPMLP (line 205) | class CLIPMLP(nn.Module):
    method __init__ (line 208) | def __init__(self, config, dtype: torch.dtype = None, device: torch.de...
    method forward (line 234) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class CLIPEncoderLayer (line 242) | class CLIPEncoderLayer(nn.Module):
    method __init__ (line 245) | def __init__(self, config, dtype: torch.dtype = None, device: torch.de...
    method forward (line 253) | def forward(
  class CLIPEncoder (line 278) | class CLIPEncoder(nn.Module):
    method __init__ (line 281) | def __init__(self, config, dtype: torch.dtype = None, device: torch.de...
    method forward (line 287) | def forward(
  class CLIPVisionTransformer (line 309) | class CLIPVisionTransformer(nn.Module):
    method __init__ (line 312) | def __init__(self, config, dtype: torch.dtype = None, device: torch.de...
    method forward (line 322) | def forward(
  class CLIPVisionModel (line 347) | class CLIPVisionModel(nn.Module):
    method __init__ (line 350) | def __init__(self, config, dtype: torch.dtype = None, device: torch.de...
    method forward (line 354) | def forward(self,
  function build_vision_model (line 365) | def build_vision_model(vision_config, dtype: torch.dtype = None, device:...
  class LlavaForConditionalGeneration (line 375) | class LlavaForConditionalGeneration(nn.Module, CudaGraphMixin, DeployMod...
    method __init__ (line 377) | def __init__(self,
    method get_image_features (line 395) | def get_image_features(self,
    method forward (line 413) | def forward(
    method get_logits (line 442) | def get_logits(self, hidden_states: torch.Tensor):
    method get_input_embeddings (line 446) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 450) | def prepare_inputs_for_generation(
    method load_weights (line 495) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
    method get_input_processor (line 529) | def get_input_processor(self) -> BaseModelInputProcessor:
  class LLavaInputProcessor (line 534) | class LLavaInputProcessor(BaseModelInputProcessor):
    method __init__ (line 537) | def __init__(self, config: PretrainedConfig, dtype) -> None:
    method preprocess_input (line 541) | def preprocess_input(self,
  function get_anyres_image_grid_shape (line 571) | def get_anyres_image_grid_shape(image_size, grid_pinpoints, patch_size):
  function unpad_image (line 585) | def unpad_image(tensor, original_size):
  function image_size_to_num_patches (line 609) | def image_size_to_num_patches(image_size, grid_pinpoints, patch_size: int):
  class LlavaNextForConditionalGeneration (line 628) | class LlavaNextForConditionalGeneration(LlavaForConditionalGeneration):
    method __init__ (line 630) | def __init__(self,
    method get_image_features (line 639) | def get_image_features(
    method pack_image_features (line 674) | def pack_image_features(self, image_features, image_sizes, vision_feat...
    method forward (line 720) | def forward(
    method get_input_processor (line 759) | def get_input_processor(self) -> BaseModelInputProcessor:
    method prepare_inputs_for_generation (line 763) | def prepare_inputs_for_generation(
  class LLavaNextInputProcessor (line 812) | class LLavaNextInputProcessor(BaseModelInputProcessor):
    method __init__ (line 815) | def __init__(self, config: PretrainedConfig, dtype) -> None:
    method preprocess_input (line 819) | def preprocess_input(self,

FILE: lmdeploy/pytorch/models/minicpm3.py
  class MiniCPMAttention (line 22) | class MiniCPMAttention(nn.Module):
    method __init__ (line 25) | def __init__(self, config: Any, dtype: torch.dtype = None, device: tor...
    method forward (line 107) | def forward(
  class MiniCPMMLP (line 173) | class MiniCPMMLP(nn.Module):
    method __init__ (line 176) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 202) | def forward(self, x):
  class MiniCPMDecoderLayer (line 209) | class MiniCPMDecoderLayer(nn.Module):
    method __init__ (line 212) | def __init__(self,
    method forward (line 243) | def forward(
  class MiniCPM3Model (line 275) | class MiniCPM3Model(nn.Module):
    method __init__ (line 278) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 328) | def forward(
    method get_input_embeddings (line 363) | def get_input_embeddings(self):
  class MiniCPM3ForCausalLM (line 368) | class MiniCPM3ForCausalLM(nn.Module, CudaGraphMixin):
    method __init__ (line 378) | def __init__(self,
    method forward (line 395) | def forward(
    method update_weights (line 416) | def update_weights(self):
    method get_input_embeddings (line 421) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 425) | def prepare_inputs_for_generation(
    method load_weights (line 454) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/minicpmv26.py
  class MiniCPMV26Attention (line 17) | class MiniCPMV26Attention(nn.Module):
    method __init__ (line 20) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 60) | def forward(
  class MiniCPMV26MLP (line 103) | class MiniCPMV26MLP(nn.Module):
    method __init__ (line 106) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 132) | def forward(self, x):
  class MiniCPMV26DecoderLayer (line 139) | class MiniCPMV26DecoderLayer(nn.Module):
    method __init__ (line 142) | def __init__(self,
    method forward (line 171) | def forward(
  class MiniCPMV26Model (line 202) | class MiniCPMV26Model(nn.Module):
    method __init__ (line 205) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 228) | def forward(
    method get_input_embeddings (line 266) | def get_input_embeddings(self):
  class MiniCPMVForCausalLM (line 271) | class MiniCPMVForCausalLM(nn.Module, CudaGraphMixin):
    method __init__ (line 281) | def __init__(self,
    method forward (line 298) | def forward(
    method get_logits (line 318) | def get_logits(self, hidden_states: torch.Tensor):
    method update_weights (line 322) | def update_weights(self):
    method get_input_embeddings (line 327) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 331) | def prepare_inputs_for_generation(
    method load_weights (line 360) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/mistral.py
  class MistralAttention (line 18) | class MistralAttention(nn.Module):
    method __init__ (line 21) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 64) | def forward(
  class MistralMLP (line 107) | class MistralMLP(nn.Module):
    method __init__ (line 110) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 136) | def forward(self, x):
  class MistralDecoderLayer (line 143) | class MistralDecoderLayer(nn.Module):
    method __init__ (line 146) | def __init__(self,
    method forward (line 175) | def forward(
  class MistralModel (line 206) | class MistralModel(nn.Module):
    method __init__ (line 209) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 232) | def forward(
    method get_input_embeddings (line 270) | def get_input_embeddings(self):
  class MistralForCausalLM (line 275) | class MistralForCausalLM(nn.Module, CudaGraphMixin):
    method __init__ (line 290) | def __init__(self,
    method forward (line 307) | def forward(
    method get_logits (line 326) | def get_logits(self, hidden_states: torch.Tensor):
    method get_input_embeddings (line 330) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 334) | def prepare_inputs_for_generation(
    method load_weights (line 363) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/mixtral.py
  class MixtralAttention (line 17) | class MixtralAttention(nn.Module):
    method __init__ (line 20) | def __init__(self, config: Any, dtype: torch.dtype = None, device: tor...
    method forward (line 62) | def forward(
  class MixtralSparseMoeBlock (line 101) | class MixtralSparseMoeBlock(nn.Module):
    method __init__ (line 104) | def __init__(self, config: Any, dtype: torch.dtype = None, device: tor...
    method forward (line 139) | def forward(self, hidden_states: torch.Tensor):
  class MixtralDecoderLayer (line 156) | class MixtralDecoderLayer(nn.Module):
    method __init__ (line 159) | def __init__(self, config: Any, layer_idx: int, dtype: torch.dtype = N...
    method forward (line 178) | def forward(
  class MixtralModel (line 209) | class MixtralModel(nn.Module):
    method __init__ (line 212) | def __init__(self, config: Any, dtype: torch.dtype = None, device: tor...
    method forward (line 231) | def forward(
    method get_input_embeddings (line 263) | def get_input_embeddings(self):
  class MixtralForCausalLM (line 268) | class MixtralForCausalLM(nn.Module, CudaGraphMixin):
    method __init__ (line 271) | def __init__(self,
    method forward (line 287) | def forward(
    method get_logits (line 305) | def get_logits(self, hidden_states: torch.Tensor):
    method get_input_embeddings (line 309) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 313) | def prepare_inputs_for_generation(
    method load_weights (line 332) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/patch.py
  function _get_rewrite_qualname (line 24) | def _get_rewrite_qualname(origin_qualname: str, module_map: Dict[str, st...
  function _class_from_qualname (line 41) | def _class_from_qualname(qualname: str) -> Any:
  function _find_rewrite_module_qualname (line 61) | def _find_rewrite_module_qualname(model, module_map: Dict[str, str]):
  function get_rewrite_cls (line 96) | def get_rewrite_cls(model: torch.nn.Module, module_map: Dict[str, str] =...
  function _get_module_map (line 106) | def _get_module_map():
  function update_custom_module_map (line 118) | def update_custom_module_map(module_map_path: str):
  function _get_model_class (line 156) | def _get_model_class(config, module_map):
  function build_model_from_hf_config (line 188) | def build_model_from_hf_config(model_config: PretrainedConfig,
  function build_patched_model (line 210) | def build_patched_model(config: ModelConfig, device: torch.device = None...
  function add_adapters (line 218) | def add_adapters(model: torch.nn.Module,
  function build_model_context (line 323) | def build_model_context(ctx: BuildModelContext):
  function get_build_model_context (line 333) | def get_build_model_context() -> BuildModelContext:
  function add_prefix (line 339) | def add_prefix(name: str, prefix: str) -> str:

FILE: lmdeploy/pytorch/models/phi3.py
  class Phi3Attention (line 19) | class Phi3Attention(nn.Module):
    method __init__ (line 22) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 64) | def forward(
  class Phi3MLP (line 107) | class Phi3MLP(nn.Module):
    method __init__ (line 110) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 136) | def forward(self, x):
  class Phi3DecoderLayer (line 143) | class Phi3DecoderLayer(nn.Module):
    method __init__ (line 146) | def __init__(self,
    method forward (line 175) | def forward(
  class Phi3Model (line 206) | class Phi3Model(nn.Module):
    method __init__ (line 209) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 232) | def forward(
    method get_input_embeddings (line 270) | def get_input_embeddings(self):
  class Phi3ForCausalLM (line 275) | class Phi3ForCausalLM(nn.Module, DeployModelMixinV1, CudaGraphMixin):
    method __init__ (line 285) | def __init__(self,
    method forward (line 298) | def forward(
    method get_input_embeddings (line 317) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 321) | def prepare_inputs_for_generation(
    method load_weights (line 350) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/phi3_moe.py
  function sparsemixer (line 19) | def sparsemixer(scores, top_k, jitter_eps):
  class PhiMoEAttention (line 68) | class PhiMoEAttention(nn.Module):
    method __init__ (line 71) | def __init__(self, config: Any, dtype: torch.dtype = None, device: tor...
    method forward (line 112) | def forward(
  class PhiMoESparseMoeBlock (line 150) | class PhiMoESparseMoeBlock(nn.Module):
    method __init__ (line 153) | def __init__(self, config: Any, dtype: torch.dtype = None, device: tor...
    method forward (line 183) | def forward(self, hidden_states: torch.Tensor):
  class PhiMoEDecoderLayer (line 203) | class PhiMoEDecoderLayer(nn.Module):
    method __init__ (line 206) | def __init__(self, config: Any, layer_idx: int, dtype: torch.dtype = N...
    method forward (line 223) | def forward(
  class PhiMoEModel (line 253) | class PhiMoEModel(nn.Module):
    method __init__ (line 256) | def __init__(self, config: Any, dtype: torch.dtype = None, device: tor...
    method forward (line 304) | def forward(
    method get_input_embeddings (line 335) | def get_input_embeddings(self):
  class PhiMoEForCausalLM (line 340) | class PhiMoEForCausalLM(nn.Module, CudaGraphMixin):
    method __init__ (line 343) | def __init__(self,
    method forward (line 359) | def forward(
    method get_logits (line 377) | def get_logits(self, hidden_states: torch.Tensor):
    method get_input_embeddings (line 381) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 385) | def prepare_inputs_for_generation(
    method load_weights (line 404) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/phi3_v.py
  class Phi3ImageEmbedding (line 34) | class Phi3ImageEmbedding(nn.Module):
    method __init__ (line 38) | def __init__(self,
    method get_img_features (line 108) | def get_img_features(self, img_embeds: torch.FloatTensor) -> torch.Flo...
    method forward (line 124) | def forward(
    method hd_feature_transform (line 143) | def hd_feature_transform(self, image_features, image_sizes):
    method reshape_hd_patches_2x2merge (line 187) | def reshape_hd_patches_2x2merge(self, image_features, h_crop, w_crop):
    method add_image_newline (line 207) | def add_image_newline(self, image_features_hd):
  class Phi3VModel (line 220) | class Phi3VModel(Phi3Model):
    method __init__ (line 223) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 236) | def forward(
  class Phi3VForCausalLM (line 266) | class Phi3VForCausalLM(Phi3ForCausalLM):
    method __init__ (line 268) | def __init__(self,
    method forward (line 283) | def forward(
    method prepare_inputs_for_generation (line 308) | def prepare_inputs_for_generation(
    method load_weights (line 336) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
    method get_input_processor (line 352) | def get_input_processor(self) -> BaseModelInputProcessor:
  class Phi3VInputProcessor (line 357) | class Phi3VInputProcessor(BaseModelInputProcessor):
    method __init__ (line 360) | def __init__(self, config: PretrainedConfig, dtype) -> None:
    method preprocess_input (line 364) | def preprocess_input(self,

FILE: lmdeploy/pytorch/models/q_modules.py
  class QTensor (line 13) | class QTensor:
    method __post_init__ (line 22) | def __post_init__(self):
    method __getattr__ (line 25) | def __getattr__(self, name: str):
  class QRMSNorm (line 33) | class QRMSNorm(nn.Module):
    method __init__ (line 37) | def __init__(self, hidden_size, eps=1e-6, quant_dtype=torch.int8):
    method from_float (line 44) | def from_float(cls, mod: nn.Module, initialization: bool = True, quant...
    method forward (line 57) | def forward(self, hidden_states):
  class QLinear (line 70) | class QLinear(nn.Module):
    method __init__ (line 81) | def __init__(self,
    method from_float (line 101) | def from_float(cls, mod: nn.Module, initialization: bool = True, quant...
    method forward (line 123) | def forward(self, input):
    method extra_repr (line 145) | def extra_repr(self) -> str:

FILE: lmdeploy/pytorch/models/qwen.py
  class QWenAttention (line 18) | class QWenAttention(torch.nn.Module):
    method __init__ (line 24) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 65) | def forward(
  class QWenMLP (line 108) | class QWenMLP(nn.Module):
    method __init__ (line 111) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 138) | def forward(self, x):
  class QWenBlock (line 145) | class QWenBlock(torch.nn.Module):
    method __init__ (line 151) | def __init__(self,
    method forward (line 183) | def forward(
  class QWenModel (line 214) | class QWenModel(nn.Module):
    method __init__ (line 216) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 245) | def forward(
    method get_input_embeddings (line 283) | def get_input_embeddings(self):
  class QWenLMHeadModel (line 288) | class QWenLMHeadModel(nn.Module, CudaGraphMixin):
    method __init__ (line 298) | def __init__(self,
    method forward (line 316) | def forward(
    method get_logits (line 335) | def get_logits(self, hidden_states: torch.Tensor):
    method get_input_embeddings (line 339) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 343) | def prepare_inputs_for_generation(
    method load_weights (line 372) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/qwen2.py
  class Qwen2Attention (line 18) | class Qwen2Attention(nn.Module):
    method __init__ (line 21) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 61) | def forward(
  class Qwen2MLP (line 104) | class Qwen2MLP(nn.Module):
    method __init__ (line 107) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 133) | def forward(self, x):
  class Qwen2DecoderLayer (line 140) | class Qwen2DecoderLayer(nn.Module):
    method __init__ (line 143) | def __init__(self,
    method forward (line 172) | def forward(
  class Qwen2Model (line 203) | class Qwen2Model(nn.Module):
    method __init__ (line 206) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 231) | def forward(
    method get_input_embeddings (line 269) | def get_input_embeddings(self):
  class Qwen2ForCausalLM (line 274) | class Qwen2ForCausalLM(nn.Module, DeployModelMixinV1, CudaGraphMixin):
    method __init__ (line 289) | def __init__(self,
    method forward (line 302) | def forward(
    method get_input_embeddings (line 321) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 325) | def prepare_inputs_for_generation(
    method load_weights (line 354) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/qwen2_5_vl.py
  class Qwen2_5_PatchEmbed (line 25) | class Qwen2_5_PatchEmbed(nn.Module):
    method __init__ (line 28) | def __init__(self,
    method forward (line 50) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class Qwen2_5_VisionRotaryEmbedding (line 58) | class Qwen2_5_VisionRotaryEmbedding(nn.Module):
    method __init__ (line 61) | def __init__(self, dim: int, theta: float = 10000.0, device: torch.dev...
    method forward (line 66) | def forward(self, seqlen: int) -> torch.Tensor:
  class Qwen2_5_VLVisionAttention (line 72) | class Qwen2_5_VLVisionAttention(nn.Module):
    method __init__ (line 75) | def __init__(self,
    method forward (line 120) | def forward(self, hidden_states: torch.Tensor, cu_seqlens: torch.Tensor,
  class Qwen2_5_VLMLP (line 147) | class Qwen2_5_VLMLP(nn.Module):
    method __init__ (line 150) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 176) | def forward(self, x):
  class Qwen2_5_VLVisionBlock (line 181) | class Qwen2_5_VLVisionBlock(nn.Module):
    method __init__ (line 184) | def __init__(self,
    method forward (line 198) | def forward(self,
  class Qwen2_5_VLPatchMerger (line 211) | class Qwen2_5_VLPatchMerger(nn.Module):
    method __init__ (line 214) | def __init__(self,
    method forward (line 230) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class Qwen2_5_VisionTransformerPretrainedModel (line 236) | class Qwen2_5_VisionTransformerPretrainedModel(nn.Module):
    method __init__ (line 239) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method rot_pos_emb (line 268) | def rot_pos_emb(self, grid_thw):
    method get_window_index (line 298) | def get_window_index(self, grid_thw):
    method forward (line 339) | def forward(self,
  class Qwen2_5_VLForConditionalGeneration (line 376) | class Qwen2_5_VLForConditionalGeneration(nn.Module, DeployModelMixinV1, ...
    method __init__ (line 391) | def __init__(self,
    method forward (line 416) | def forward(
    method get_input_embeddings (line 455) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 459) | def prepare_inputs_for_generation(
    method load_weights (line 529) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
    method make_buffers_cudagraph (line 567) | def make_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs):
    method fill_buffers_cudagraph (line 578) | def fill_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs):
    method _get_model_metas (line 599) | def _get_model_metas(self, context: StepContext):
    method _update_model_meta_decoding (line 607) | def _update_model_meta_decoding(self, context: StepContext):
    method _get_multimodal_pos_ids (line 620) | def _get_multimodal_pos_ids(self, grid_thw: list, device: torch.device):
    method _update_model_meta_prefilling (line 631) | def _update_model_meta_prefilling(self, context: StepContext):
    method update_model_metas (line 675) | def update_model_metas(self,
    method get_input_processor (line 685) | def get_input_processor(self) -> BaseModelInputProcessor:
  class Qwen2_5_VLInputProcessor (line 690) | class Qwen2_5_VLInputProcessor(BaseModelInputProcessor):
    method __init__ (line 693) | def __init__(self, config: PretrainedConfig) -> None:
    method preprocess_input (line 696) | def preprocess_input(self,

FILE: lmdeploy/pytorch/models/qwen2_moe.py
  class Qwen2MoeAttention (line 22) | class Qwen2MoeAttention(nn.Module):
    method __init__ (line 25) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 66) | def forward(
  class Qwen2MoeMLP (line 109) | class Qwen2MoeMLP(nn.Module):
    method __init__ (line 112) | def __init__(self,
    method forward (line 147) | def forward(self, x):
  class Qwen2MoeSparseMoeBlock (line 154) | class Qwen2MoeSparseMoeBlock(nn.Module):
    method __init__ (line 157) | def __init__(self,
    method forward (line 217) | def forward(self, hidden_states: torch.Tensor):
  class Qwen2MoeDecoderLayer (line 242) | class Qwen2MoeDecoderLayer(nn.Module):
    method __init__ (line 245) | def __init__(self,
    method forward (line 274) | def forward(
  class Qwen2MoeModel (line 305) | class Qwen2MoeModel(nn.Module):
    method __init__ (line 308) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 333) | def forward(
    method get_input_embeddings (line 371) | def get_input_embeddings(self):
  class Qwen2MoeForCausalLM (line 376) | class Qwen2MoeForCausalLM(nn.Module, DeployModelMixinV1, CudaGraphMixin):
    method __init__ (line 391) | def __init__(self,
    method forward (line 404) | def forward(
    method get_input_embeddings (line 423) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 427) | def prepare_inputs_for_generation(
    method _load_weight_experts (line 456) | def _load_weight_experts(self, name: str, loaded_weight: torch.Tensor,...
    method load_weights (line 470) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/qwen2_reward.py
  class Qwen2ForRewardModel (line 16) | class Qwen2ForRewardModel(nn.Module, CudaGraphMixin):
    method __init__ (line 31) | def __init__(self,
    method forward (line 53) | def forward(
    method get_logits (line 72) | def get_logits(self, hidden_states: torch.Tensor):
    method update_weights (line 77) | def update_weights(self):
    method get_input_embeddings (line 81) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 85) | def prepare_inputs_for_generation(
    method load_weights (line 106) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/qwen2_vl.py
  function _apply_mrope_selection (line 22) | def _apply_mrope_selection(hidden_states: torch.Tensor, mrope_position_i...
  class Qwen2Attention (line 43) | class Qwen2Attention(nn.Module):
    method __init__ (line 46) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 87) | def forward(
  class Qwen2MLP (line 130) | class Qwen2MLP(nn.Module):
    method __init__ (line 133) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 159) | def forward(self, x):
  class Qwen2DecoderLayer (line 166) | class Qwen2DecoderLayer(nn.Module):
    method __init__ (line 169) | def __init__(self,
    method forward (line 198) | def forward(
  class Qwen2Model (line 229) | class Qwen2Model(nn.Module):
    method __init__ (line 232) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 258) | def forward(
    method get_input_embeddings (line 301) | def get_input_embeddings(self):
  class PatchEmbed (line 306) | class PatchEmbed(nn.Module):
    method __init__ (line 309) | def __init__(self,
    method forward (line 331) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class VisionRotaryEmbedding (line 339) | class VisionRotaryEmbedding(nn.Module):
    method __init__ (line 342) | def __init__(self, dim: int, theta: float = 10000.0, device: torch.dev...
    method forward (line 347) | def forward(self, seqlen: int) -> torch.Tensor:
  class VisionAttention (line 353) | class VisionAttention(nn.Module):
    method __init__ (line 356) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 395) | def forward(self, hidden_states: torch.Tensor, cu_seqlens: torch.Tensor,
  class VisionMlp (line 422) | class VisionMlp(nn.Module):
    method __init__ (line 425) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 457) | def forward(self, x):
  class Qwen2VLVisionBlock (line 462) | class Qwen2VLVisionBlock(nn.Module):
    method __init__ (line 465) | def __init__(self,
    method forward (line 479) | def forward(self,
  class PatchMerger (line 497) | class PatchMerger(nn.Module):
    method __init__ (line 500) | def __init__(self,
    method forward (line 515) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class Qwen2VisionTransformerPretrainedModel (line 521) | class Qwen2VisionTransformerPretrainedModel(nn.Module):
    method __init__ (line 524) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method rot_pos_emb (line 549) | def rot_pos_emb(self, grid_thw):
    method forward (line 579) | def forward(self, hidden_states: torch.Tensor, cu_seqlens: torch.Tensor,
  class Qwen2VLForConditionalGeneration (line 597) | class Qwen2VLForConditionalGeneration(nn.Module, DeployModelMixinV1, Cud...
    method __init__ (line 612) | def __init__(self,
    method forward (line 637) | def forward(
    method get_input_embeddings (line 671) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 675) | def prepare_inputs_for_generation(
    method load_weights (line 732) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
    method make_buffers_cudagraph (line 770) | def make_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs):
    method fill_buffers_cudagraph (line 781) | def fill_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs):
    method _get_model_metas (line 802) | def _get_model_metas(self, context: StepContext):
    method _update_model_meta_decoding (line 810) | def _update_model_meta_decoding(self, context: StepContext):
    method _get_multimodal_pos_ids (line 823) | def _get_multimodal_pos_ids(self, grid_thw: list, device: torch.device):
    method _update_model_meta_prefilling (line 834) | def _update_model_meta_prefilling(self, context: StepContext):
    method update_model_metas (line 878) | def update_model_metas(self,
    method get_input_processor (line 888) | def get_input_processor(self) -> BaseModelInputProcessor:
  class Qwen2VLInputProcessor (line 893) | class Qwen2VLInputProcessor(BaseModelInputProcessor):
    method __init__ (line 896) | def __init__(self, config: PretrainedConfig) -> None:
    method preprocess_input (line 899) | def preprocess_input(self,

FILE: lmdeploy/pytorch/models/qwen3.py
  class Qwen3Attention (line 19) | class Qwen3Attention(nn.Module):
    method __init__ (line 22) | def __init__(self,
    method forward (line 77) | def forward(
  class Qwen3MLP (line 124) | class Qwen3MLP(nn.Module):
    method __init__ (line 127) | def __init__(self,
    method forward (line 161) | def forward(self, x):
  class Qwen3DecoderLayer (line 168) | class Qwen3DecoderLayer(nn.Module):
    method __init__ (line 171) | def __init__(self,
    method forward (line 207) | def forward(
  class Qwen3model (line 238) | class Qwen3model(nn.Module):
    method __init__ (line 241) | def __init__(self,
    method forward (line 274) | def forward(
    method get_input_embeddings (line 312) | def get_input_embeddings(self):
  class Qwen3ForCausalLM (line 317) | class Qwen3ForCausalLM(nn.Module, DeployModelMixinV1, CudaGraphMixin):
    method __init__ (line 332) | def __init__(self,
    method forward (line 346) | def forward(
    method get_input_embeddings (line 365) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 369) | def prepare_inputs_for_generation(
    method load_weights (line 398) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/qwen3_5.py
  class Qwen3_5VisionPatchEmbed (line 33) | class Qwen3_5VisionPatchEmbed(nn.Module):
    method __init__ (line 35) | def __init__(self, config, dtype: torch.dtype | None = None, device: t...
    method forward (line 51) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class Qwen3_5VisionMLP (line 59) | class Qwen3_5VisionMLP(nn.Module):
    method __init__ (line 62) | def __init__(self,
    method forward (line 99) | def forward(self, x):
  class Qwen3_5VisionBlock (line 104) | class Qwen3_5VisionBlock(nn.Module):
    method __init__ (line 107) | def __init__(self,
    method forward (line 122) | def forward(self,
  class Qwen3_5VisionPatchMerger (line 135) | class Qwen3_5VisionPatchMerger(nn.Module):
    method __init__ (line 137) | def __init__(self,
    method forward (line 167) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class Qwen3_5VisionModel (line 174) | class Qwen3_5VisionModel(nn.Module):
    method __init__ (line 177) | def __init__(self,
    method rot_pos_ids (line 205) | def rot_pos_ids(h: int, w: int, spatial_merge_size: int) -> torch.Tensor:
    method rot_pos_emb (line 231) | def rot_pos_emb(self, grid_thw: torch.Tensor) -> torch.Tensor:
    method fast_pos_embed_interpolate (line 247) | def fast_pos_embed_interpolate(self, grid_thw: List[List[int]]) -> tor...
    method forward (line 302) | def forward(self, hidden_states: torch.Tensor, cu_seqlens: torch.Tenso...
  class Qwen3_5MLP (line 317) | class Qwen3_5MLP(nn.Module):
    method __init__ (line 320) | def __init__(self,
    method forward (line 360) | def forward(self, x, all_routed_experts: torch.Tensor | None = None):
  class Qwen3_5GatedDeltaNet (line 367) | class Qwen3_5GatedDeltaNet(nn.Module):
    method __init__ (line 370) | def __init__(
    method get_A_log_exp (line 444) | def get_A_log_exp(self):
    method make_params (line 450) | def make_params(self, num_v_heads: int, device: torch.device | None):
    method weight_loader_qkv (line 461) | def weight_loader_qkv(self, param: torch.nn.Parameter, loaded_weight: ...
    method weight_loader_a_dt (line 471) | def weight_loader_a_dt(self, param: torch.nn.Parameter, loaded_weight:...
    method fix_zba_ordering (line 477) | def fix_zba_ordering(self, mixed_zba: torch.Tensor):
    method _load_state (line 490) | def _load_state(self, past_key_value: Tuple[torch.Tensor, torch.Tensor...
    method forward (line 494) | def forward(
  class Qwen3_5Attention (line 555) | class Qwen3_5Attention(nn.Module):
    method __init__ (line 558) | def __init__(self,
    method forward (line 630) | def forward(
  class Qwen3_5DecoderLayer (line 680) | class Qwen3_5DecoderLayer(nn.Module):
    method __init__ (line 683) | def __init__(self,
    method forward (line 728) | def forward(
  class Qwen3_5TextRotaryEmbedding (line 768) | class Qwen3_5TextRotaryEmbedding(nn.Module):
    method __init__ (line 771) | def __init__(self, config: PretrainedConfig, device=None):
    method compute_default_rope_parameters (line 793) | def compute_default_rope_parameters(
    method apply_interleaved_mrope (line 823) | def apply_interleaved_mrope(self, freqs, mrope_section):
    method forward (line 843) | def forward(self, x, position_ids):
  class Qwen3_5TextModel (line 860) | class Qwen3_5TextModel(nn.Module):
    method __init__ (line 863) | def __init__(self,
    method forward (line 896) | def forward(
    method get_input_embeddings (line 947) | def get_input_embeddings(self):
  class Qwen3_5Model (line 952) | class Qwen3_5Model(nn.Module):
    method __init__ (line 954) | def __init__(self,
    method forward (line 970) | def forward(
    method get_input_embeddings (line 1024) | def get_input_embeddings(self):
  class Qwen3_5ForConditionalGeneration (line 1029) | class Qwen3_5ForConditionalGeneration(nn.Module, DeployModelMixinV1, Cud...
    method __init__ (line 1044) | def __init__(self,
    method forward (line 1068) | def forward(
    method get_input_embeddings (line 1113) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 1117) | def prepare_inputs_for_generation(
    method load_weights (line 1198) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
    method make_buffers_cudagraph (line 1264) | def make_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs):
    method fill_buffers_cudagraph (line 1280) | def fill_buffers_cudagraph(self, graph_meta: CudaGraphMeta, *args, **k...
    method _get_model_metas (line 1304) | def _get_model_metas(self, context: StepContext):
    method _update_model_meta_decoding (line 1312) | def _update_model_meta_decoding(self, context: StepContext):
    method _get_multimodal_pos_ids (line 1329) | def _get_multimodal_pos_ids(self, grid_thw: list, device: torch.device):
    method _update_model_meta_prefilling (line 1340) | def _update_model_meta_prefilling(self, context: StepContext):
    method update_model_metas (line 1429) | def update_model_metas(self, past_key_values: List[List[torch.Tensor]]...
    method get_input_processor (line 1437) | def get_input_processor(self) -> BaseModelInputProcessor:

FILE: lmdeploy/pytorch/models/qwen3_5_moe.py
  class Qwen3_5MoeTopKRouter (line 24) | class Qwen3_5MoeTopKRouter(nn.Module):
    method __init__ (line 26) | def __init__(self, config, dtype: torch.dtype | None = None, device: t...
    method forward (line 33) | def forward(self, hidden_states):
  class Qwen3_5MoeSparseMoeBlock (line 44) | class Qwen3_5MoeSparseMoeBlock(nn.Module):
    method __init__ (line 47) | def __init__(self,
    method forward (line 97) | def forward(self, hidden_states: torch.Tensor, all_routed_experts: tor...
  class Qwen3_5MoeDecoderLayer (line 121) | class Qwen3_5MoeDecoderLayer(Qwen3_5DecoderLayer):
    method __init__ (line 124) | def __init__(
  class Qwen3_5MoeTextModel (line 172) | class Qwen3_5MoeTextModel(Qwen3_5TextModel):
    method __init__ (line 174) | def __init__(self,
  class Qwen3_5MoeModel (line 208) | class Qwen3_5MoeModel(Qwen3_5Model):
    method __init__ (line 210) | def __init__(self,
  class Qwen3_5MoeForConditionalGeneration (line 227) | class Qwen3_5MoeForConditionalGeneration(Qwen3_5ForConditionalGeneration):
    method __init__ (line 242) | def __init__(self,
    method _load_weight_experts (line 267) | def _load_weight_experts(self, name: str, loaded_weight: torch.Tensor,...
    method _load_weight_fused_experts (line 284) | def _load_weight_fused_experts(self, name: str, loaded_weight: torch.T...
    method load_weights (line 307) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/qwen3_moe.py
  class Qwen3MoeAttention (line 22) | class Qwen3MoeAttention(nn.Module):
    method __init__ (line 25) | def __init__(self,
    method forward (line 94) | def forward(
  class Qwen3MoeMLP (line 141) | class Qwen3MoeMLP(nn.Module):
    method __init__ (line 144) | def __init__(self,
    method forward (line 184) | def forward(self, x):
  class Qwen3MoeSparseMoeBlock (line 191) | class Qwen3MoeSparseMoeBlock(nn.Module):
    method __init__ (line 194) | def __init__(self,
    method forward (line 244) | def forward(
  class Qwen3MoeDecoderLayer (line 268) | class Qwen3MoeDecoderLayer(nn.Module):
    method __init__ (line 271) | def __init__(
    method forward (line 316) | def forward(
  class Qwen3MoeModel (line 348) | class Qwen3MoeModel(nn.Module):
    method __init__ (line 351) | def __init__(self,
    method forward (line 395) | def forward(
    method get_input_embeddings (line 435) | def get_input_embeddings(self):
  class Qwen3MoeForCausalLM (line 440) | class Qwen3MoeForCausalLM(nn.Module, DeployModelMixinV1, CudaGraphMixin):
    method __init__ (line 455) | def __init__(
    method forward (line 481) | def forward(
    method get_input_embeddings (line 514) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 518) | def prepare_inputs_for_generation(
    method _load_weight_experts (line 547) | def _load_weight_experts(self, name: str, loaded_weight: torch.Tensor,...
    method _load_weight_fused_experts (line 565) | def _load_weight_fused_experts(self, name: str, loaded_weight: torch.T...
    method load_weights (line 590) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/qwen3_next.py
  class Qwen3NextGatedDeltaNet (line 25) | class Qwen3NextGatedDeltaNet(nn.Module):
    method __init__ (line 28) | def __init__(self,
    method get_A_log_exp (line 96) | def get_A_log_exp(self):
    method make_params (line 102) | def make_params(self, num_v_heads: int, device: torch.device | None):
    method weight_loader_a_dt (line 113) | def weight_loader_a_dt(self, param: torch.nn.Parameter, loaded_weight:...
    method fix_query_key_value_ordering (line 119) | def fix_query_key_value_ordering(self, mixed_qkvz: torch.Tensor, mixed...
    method _load_state (line 145) | def _load_state(self, past_key_value: Tuple[torch.Tensor, torch.Tensor...
    method forward (line 149) | def forward(
  class Qwen3NextAttention (line 209) | class Qwen3NextAttention(nn.Module):
    method __init__ (line 212) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 268) | def forward(
  class Qwen3NextMLP (line 318) | class Qwen3NextMLP(nn.Module):
    method __init__ (line 321) | def __init__(self,
    method forward (line 356) | def forward(self, x):
  class Qwen3NextSparseMoeBlock (line 363) | class Qwen3NextSparseMoeBlock(nn.Module):
    method __init__ (line 366) | def __init__(self,
    method forward (line 427) | def forward(self, hidden_states: torch.Tensor):
  class Qwen3NextDecoderLayer (line 450) | class Qwen3NextDecoderLayer(nn.Module):
    method __init__ (line 453) | def __init__(self,
    method forward (line 486) | def forward(
  class Qwen3NextModel (line 525) | class Qwen3NextModel(nn.Module):
    method __init__ (line 528) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 554) | def forward(
    method get_input_embeddings (line 597) | def get_input_embeddings(self):
  class Qwen3NextForCausalLM (line 602) | class Qwen3NextForCausalLM(nn.Module, DeployModelMixinV1, CudaGraphMixin):
    method __init__ (line 617) | def __init__(self,
    method forward (line 630) | def forward(
    method get_input_embeddings (line 651) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 655) | def prepare_inputs_for_generation(
    method make_buffers_cudagraph (line 696) | def make_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs):
    method fill_buffers_cudagraph (line 707) | def fill_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs):
    method _load_weight_experts (line 719) | def _load_weight_experts(self, name: str, loaded_weight: torch.Tensor,...
    method load_weights (line 734) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/qwen3_vl.py
  class Qwen3VLTextRotaryEmbedding (line 29) | class Qwen3VLTextRotaryEmbedding(nn.Module):
    method __init__ (line 32) | def __init__(self, config: PretrainedConfig, device=None):
    method _pack_for_trans5 (line 53) | def _pack_for_trans5(self, config):
    method apply_interleaved_mrope (line 61) | def apply_interleaved_mrope(self, freqs, mrope_section):
    method forward (line 81) | def forward(self, x, position_ids):
  class Qwen3VLTextModel (line 100) | class Qwen3VLTextModel(Qwen3model):
    method __init__ (line 106) | def __init__(self,
    method forward (line 117) | def forward(
    method _deepstack_process (line 174) | def _deepstack_process(self, hidden_states: torch.Tensor, visual_pos_m...
  class Qwen3VLVisionPatchEmbed (line 184) | class Qwen3VLVisionPatchEmbed(nn.Module):
    method __init__ (line 186) | def __init__(self, config, dtype: torch.dtype = None, device: torch.de...
    method forward (line 202) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class Qwen3VLVisionMLP (line 210) | class Qwen3VLVisionMLP(nn.Module):
    method __init__ (line 213) | def __init__(self,
    method forward (line 248) | def forward(self, x):
  class Qwen3VLVisionBlock (line 253) | class Qwen3VLVisionBlock(nn.Module):
    method __init__ (line 256) | def __init__(
    method forward (line 273) | def forward(self,
  class Qwen3VLVisionPatchMerger (line 286) | class Qwen3VLVisionPatchMerger(nn.Module):
    method __init__ (line 288) | def __init__(self,
    method forward (line 321) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class Qwen3VLVisionModel (line 328) | class Qwen3VLVisionModel(nn.Module):
    method __init__ (line 331) | def __init__(self,
    method rot_pos_ids (line 374) | def rot_pos_ids(h: int, w: int, spatial_merge_size: int) -> torch.Tensor:
    method rot_pos_emb (line 400) | def rot_pos_emb(self, grid_thw: torch.Tensor) -> torch.Tensor:
    method fast_pos_embed_interpolate (line 416) | def fast_pos_embed_interpolate(self, grid_thw: List[List[int]]) -> tor...
    method forward (line 471) | def forward(self, hidden_states: torch.Tensor, cu_seqlens: torch.Tenso...
  class Qwen3VLForConditionalGeneration (line 491) | class Qwen3VLForConditionalGeneration(nn.Module, DeployModelMixinV1, Cud...
    method __init__ (line 506) | def __init__(
    method forward (line 542) | def forward(
    method get_input_embeddings (line 600) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 604) | def prepare_inputs_for_generation(
    method rename_weight (line 673) | def rename_weight(cls, name: str) -> str:
    method load_weights (line 683) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
    method make_buffers_cudagraph (line 721) | def make_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs):
    method fill_buffers_cudagraph (line 732) | def fill_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs):
    method _get_model_metas (line 753) | def _get_model_metas(self, context: StepContext):
    method _update_model_meta_decoding (line 761) | def _update_model_meta_decoding(self, context: StepContext):
    method _get_multimodal_pos_ids (line 774) | def _get_multimodal_pos_ids(self, grid_thw: list, device: torch.device):
    method _update_model_meta_prefilling (line 785) | def _update_model_meta_prefilling(self, context: StepContext):
    method update_model_metas (line 874) | def update_model_metas(self,
    method get_input_processor (line 884) | def get_input_processor(self) -> BaseModelInputProcessor:
  class Qwen3VLInputProcessor (line 889) | class Qwen3VLInputProcessor(BaseModelInputProcessor):
    method __init__ (line 892) | def __init__(self, config: PretrainedConfig) -> None:
    method _make_image_mm_data (line 895) | def _make_image_mm_data(self, input_mm: Dict[str, Any]) -> MultiModalD...
    method _make_video_mm_data (line 913) | def _make_video_mm_data(self, input_mm: Dict[str, Any]) -> MultiModalD...
    method preprocess_input (line 934) | def preprocess_input(self,

FILE: lmdeploy/pytorch/models/qwen3_vl_moe.py
  class Qwen3VLMoeTextModel (line 18) | class Qwen3VLMoeTextModel(Qwen3MoeModel):
    method __init__ (line 24) | def __init__(self,
    method forward (line 35) | def forward(
    method _deepstack_process (line 92) | def _deepstack_process(self, hidden_states: torch.Tensor, visual_pos_m...
  class Qwen3VLMoeForConditionalGeneration (line 102) | class Qwen3VLMoeForConditionalGeneration(Qwen3VLForConditionalGeneration):
    method __init__ (line 117) | def __init__(
    method _load_weight_experts (line 132) | def _load_weight_experts(self, name: str, loaded_weight: torch.Tensor,...
    method _load_weight_fused_experts (line 148) | def _load_weight_fused_experts(self, name: str, loaded_weight: torch.T...
    method load_weights (line 172) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/sdar.py
  class SDARAttention (line 18) | class SDARAttention(nn.Module):
    method __init__ (line 21) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 68) | def forward(
  class SDARMLP (line 113) | class SDARMLP(nn.Module):
    method __init__ (line 116) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 142) | def forward(self, x):
  class SDARDecoderLayer (line 149) | class SDARDecoderLayer(nn.Module):
    method __init__ (line 152) | def __init__(self,
    method forward (line 181) | def forward(
  class SDARModel (line 212) | class SDARModel(nn.Module):
    method __init__ (line 215) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 238) | def forward(
    method get_input_embeddings (line 276) | def get_input_embeddings(self):
  class SDARForCausalLM (line 281) | class SDARForCausalLM(nn.Module, CudaGraphMixin):
    method __init__ (line 296) | def __init__(self,
    method forward (line 314) | def forward(
    method get_logits (line 333) | def get_logits(self, hidden_states: torch.Tensor):
    method update_weights (line 337) | def update_weights(self):
    method get_input_embeddings (line 342) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 346) | def prepare_inputs_for_generation(
    method load_weights (line 375) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/sdar_moe.py
  class SDARMoeAttention (line 19) | class SDARMoeAttention(nn.Module):
    method __init__ (line 22) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 69) | def forward(
  class SDARMoeMLP (line 114) | class SDARMoeMLP(nn.Module):
    method __init__ (line 117) | def __init__(self,
    method forward (line 147) | def forward(self, x):
  class SDARMoeSparseMoeBlock (line 154) | class SDARMoeSparseMoeBlock(nn.Module):
    method __init__ (line 157) | def __init__(self,
    method forward (line 198) | def forward(self, hidden_states: torch.Tensor):
  class SDARMoeDecoderLayer (line 214) | class SDARMoeDecoderLayer(nn.Module):
    method __init__ (line 217) | def __init__(self,
    method forward (line 250) | def forward(
  class SDARMoeModel (line 281) | class SDARMoeModel(nn.Module):
    method __init__ (line 284) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 307) | def forward(
    method get_input_embeddings (line 345) | def get_input_embeddings(self):
  class SDARMoeForCausalLM (line 350) | class SDARMoeForCausalLM(nn.Module, CudaGraphMixin):
    method __init__ (line 365) | def __init__(self,
    method forward (line 383) | def forward(
    method get_logits (line 402) | def get_logits(self, hidden_states: torch.Tensor):
    method update_weights (line 406) | def update_weights(self):
    method get_input_embeddings (line 411) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 415) | def prepare_inputs_for_generation(
    method _load_weight_experts (line 444) | def _load_weight_experts(self, name: str, loaded_weight: torch.Tensor,...
    method load_weights (line 459) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/siglip.py
  class SiglipVisionEmbeddings (line 15) | class SiglipVisionEmbeddings(nn.Module):
    method __init__ (line 17) | def __init__(self,
    method interpolate_pos_encoding (line 42) | def interpolate_pos_encoding(self, embeddings: torch.Tensor, height: i...
    method forward (line 81) | def forward(self, pixel_values: torch.FloatTensor, interpolate_pos_enc...
  class SiglipAttention (line 94) | class SiglipAttention(nn.Module):
    method __init__ (line 96) | def __init__(self,
    method forward (line 133) | def forward(
  class SiglipMLP (line 152) | class SiglipMLP(nn.Module):
    method __init__ (line 154) | def __init__(self,
    method forward (line 181) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class SiglipEncoderLayer (line 189) | class SiglipEncoderLayer(nn.Module):
    method __init__ (line 191) | def __init__(self,
    method forward (line 206) | def forward(
  class SiglipEncoder (line 223) | class SiglipEncoder(nn.Module):
    method __init__ (line 225) | def __init__(self,
    method forward (line 241) | def forward(
  class SiglipMultiheadAttentionPoolingHead (line 253) | class SiglipMultiheadAttentionPoolingHead(nn.Module):
    method __init__ (line 256) | def __init__(
    method forward (line 271) | def forward(self, hidden_state: torch.Tensor) -> torch.Tensor:
  class SiglipVisionTransformer (line 284) | class SiglipVisionTransformer(nn.Module):
    method __init__ (line 286) | def __init__(
    method forward (line 314) | def forward(
  class SiglipVisionModel (line 331) | class SiglipVisionModel(nn.Module):
    method __init__ (line 335) | def __init__(
    method get_input_embeddings (line 347) | def get_input_embeddings(self) -> nn.Module:
    method forward (line 350) | def forward(
    method load_weights (line 360) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]) ->...

FILE: lmdeploy/pytorch/models/starcoder2.py
  class Starcoder2Attention (line 17) | class Starcoder2Attention(nn.Module):
    method __init__ (line 20) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 62) | def forward(
  class Starcoder2MLP (line 105) | class Starcoder2MLP(nn.Module):
    method __init__ (line 108) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 138) | def forward(self, x):
  class Starcoder2DecoderLayer (line 145) | class Starcoder2DecoderLayer(nn.Module):
    method __init__ (line 148) | def __init__(self,
    method forward (line 171) | def forward(
  class Starcoder2Model (line 201) | class Starcoder2Model(nn.Module):
    method __init__ (line 204) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 227) | def forward(
    method get_input_embeddings (line 265) | def get_input_embeddings(self):
  class Starcoder2ForCausalLM (line 270) | class Starcoder2ForCausalLM(nn.Module, CudaGraphMixin):
    method __init__ (line 281) | def __init__(self,
    method forward (line 298) | def forward(
    method get_logits (line 317) | def get_logits(self, hidden_states: torch.Tensor):
    method update_weights (line 321) | def update_weights(self):
    method get_input_embeddings (line 325) | def get_input_embeddings(self):
    method prepare_inputs_for_generation (line 329) | def prepare_inputs_for_generation(
    method load_weights (line 358) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):

FILE: lmdeploy/pytorch/models/utils/cudagraph.py
  function _get_meta_flashattn (line 14) | def _get_meta_flashattn(
  function next_power_of_2 (line 54) | def next_power_of_2(n: int):
  class CudaGraphMeta (line 68) | class CudaGraphMeta:
  class CudaGraphMixin (line 85) | class CudaGraphMixin:
    method support_cuda_graph (line 88) | def support_cuda_graph(
    method make_output_buffers (line 100) | def make_output_buffers(self, output):
    method update_meta_flashattn (line 109) | def update_meta_flashattn(self, graph_meta: CudaGraphMeta, block_size:...
    method make_buffers_cudagraph (line 141) | def make_buffers_cudagraph(self, graph_meta: CudaGraphMeta, *args, pas...
    method fill_buffers_cudagraph (line 196) | def fill_buffers_cudagraph(self, graph_meta: CudaGraphMeta, input_ids:...
    method update_context_cudagraph (line 282) | def update_context_cudagraph(self, graph_meta: CudaGraphMeta, context:...
    method get_outputs_cudagraph (line 296) | def get_outputs_cudagraph(self, output_buffers: Dict[str, torch.Tensor...

FILE: lmdeploy/pytorch/models/utils/micro_batch.py
  function enable_micro_batch (line 7) | def enable_micro_batch(param_name, index=-1):
  function split_batch (line 39) | def split_batch(func, param_name, index=-1, num_splits=2):

FILE: lmdeploy/pytorch/models/utils/model.py
  class BaseModelMetaProcessor (line 15) | class BaseModelMetaProcessor:
    method update_inputs (line 18) | def update_inputs(self, inputs: ModelInputs, device: torch.device) -> ...
    method update_delta (line 22) | def update_delta(self, inputs: ModelInputs, delta: ModelInputsDelta) -...
    method merge (line 26) | def merge(self, inputs: ModelInputs, other: ModelInputs) -> ModelInputs:
  class DeployModelMixin (line 31) | class DeployModelMixin:
    method forward (line 33) | def forward(self, *args, **kwargs):
    method prepare_inputs_for_generation (line 37) | def prepare_inputs_for_generation(
    method load_weights (line 46) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
    method get_logits (line 50) | def get_logits(self, hidden_states: torch.Tensor):
    method rename_weight (line 55) | def rename_weight(cls, name: str) -> str:
    method update_weights (line 59) | def update_weights(self):
    method update_model_metas (line 63) | def update_model_metas(self,
    method get_input_processor (line 70) | def get_input_processor(self) -> BaseModelInputProcessor:
    method get_modelmeta_processor (line 74) | def get_modelmeta_processor(self) -> BaseModelMetaProcessor:
    method update_quant_config (line 79) | def update_quant_config(cls, quant_config: QuantizationConfig):
  class DeployModelMixinV1 (line 112) | class DeployModelMixinV1(DeployModelMixin):
    method get_logits (line 114) | def get_logits(self, hidden_states: torch.Tensor):
    method get_input_embeddings (line 122) | def get_input_embeddings(self):
    method update_weights (line 126) | def update_weights(self):
    method build_lm_head (line 131) | def build_lm_head(self,
  function vlm_model (line 152) | def vlm_model(vlm_cls):
  function build_embedding (line 170) | def build_embedding(vocab_size: int,

FILE: lmdeploy/pytorch/models/whisper.py
  class WhisperAttention (line 13) | class WhisperAttention(nn.Module):
    method __init__ (line 16) | def __init__(
    method forward (line 58) | def forward(self, hidden_states: torch.Tensor, attention_mask: torch.T...
  class WhisperEncoderLayer (line 79) | class WhisperEncoderLayer(nn.Module):
    method __init__ (line 81) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None...
    method forward (line 115) | def forward(

FILE: lmdeploy/pytorch/multimodal/data_type.py
  class MultiModalData (line 13) | class MultiModalData:
    method __post_init__ (line 21) | def __post_init__(self):
    method to_device (line 25) | def to_device(self, device: str, non_blocking: bool = False):

FILE: lmdeploy/pytorch/nn/activation.py
  class SiluAndMul (line 7) | class SiluAndMul(nn.Module):
    method __init__ (line 10) | def __init__(self, inplace: bool = True):
    method forward (line 16) | def forward(self, x: Tensor):
  class GeluAndMul (line 21) | class GeluAndMul(nn.Module):
    method __init__ (line 24) | def __init__(self, approximate: str = 'none'):
    method forward (line 30) | def forward(self, x: Tensor):

FILE: lmdeploy/pytorch/nn/attention.py
  function _update_num_heads (line 12) | def _update_num_heads(num_heads: int, num_kv_heads: int):
  class Attention (line 20) | class Attention(nn.Module):
    method __init__ (line 23) | def __init__(
    method _lazy_init (line 72) | def _lazy_init(self, device):
    method forward (line 87) | def forward(
    method update_meta_flashmla (line 123) | def update_meta_flashmla(attn_metadata: AttentionMetadata, num_attenti...
  class FlashAttention (line 127) | class FlashAttention(nn.Module):
    method __init__ (line 130) | def __init__(
    method forward (line 165) | def forward(self,

FILE: lmdeploy/pytorch/nn/embedding.py
  function pad_vocab_size (line 12) | def pad_vocab_size(vocab_size: int, pad_to: int = DEFAULT_VOCAB_PADDING_...
  class ParallelEmbedding (line 17) | class ParallelEmbedding(nn.Module):
    method __init__ (line 19) | def __init__(
    method create_weight (line 72) | def create_weight(vocab_size: int, hidden_size: int, dtype: torch.dtyp...
    method _weight_loader_tp_rowwise (line 82) | def _weight_loader_tp_rowwise(self, param: torch.nn.Parameter, loaded_...
    method weight_loader (line 93) | def weight_loader(self, param: torch.nn.Parameter, loaded_weight: torc...
    method forward (line 105) | def forward(self, x: torch.Tensor):

FILE: lmdeploy/pytorch/nn/eplb.py
  class EPLBDispatchInfo (line 5) | class EPLBDispatchInfo:
    method __init__ (line 7) | def __init__(self, info) -> None:
  class EPLBManager (line 11) | class EPLBManager:
    method init_global_eplb_metadata (line 15) | def init_global_eplb_metadata(cls, ep_size: int, num_routed_experts: i...
    method num_physical_experts (line 24) | def num_physical_experts(cls) -> int:
    method topk_ids_logical_to_physical (line 28) | def topk_ids_logical_to_physical(cls, topk_ids: torch.Tensor, eplb_dis...
    method get_dispatch_info (line 32) | def get_dispatch_info(cls, ep_rank, layer_idx) -> EPLBDispatchInfo:

FILE: lmdeploy/pytorch/nn/gated_delta.py
  function build_rmsnorm_gated (line 17) | def build_rmsnorm_gated(hidden_size: int, eps=1e-6, **kwargs):
  class GatedDeltaMeta (line 31) | class GatedDeltaMeta:
    method __init__ (line 33) | def __init__(self, num_tokens: int, conv_kernel_size: int, state_ids: ...
  class CausalConv1dFunc (line 56) | class CausalConv1dFunc:
    method __init__ (line 58) | def __init__(self, activation: str = 'silu'):
    method conv1d_func (line 66) | def conv1d_func(self, x: torch.Tensor, weight: torch.Tensor, bias: tor...
    method conv1d_update (line 106) | def conv1d_update(
    method __call__ (line 126) | def __call__(
  class GatedDelta (line 140) | class GatedDelta:
    method __init__ (line 142) | def __init__(self, use_qk_l2norm_in_kernel: bool = True):
    method __call__ (line 148) | def __call__(
  class CausalConv1d (line 194) | class CausalConv1d(nn.Module):
    method __init__ (line 197) | def __init__(
    method make_weight (line 232) | def make_weight(
    method register_weight (line 252) | def register_weight(self, weight: torch.Tensor, w_bias: torch.Tensor |...
    method weight_loader (line 261) | def weight_loader(self, param: torch.nn.Parameter, loaded_weight: torc...
    method forward (line 270) | def forward(self, x: torch.Tensor, conv_state: torch.Tensor, gated_del...
  function load_state (line 276) | def load_state(past_key_value: Tuple[torch.Tensor, torch.Tensor], gated_...

FILE: lmdeploy/pytorch/nn/linear/__init__.py
  function build_linear (line 18) | def build_linear(
  function build_colwise_linear (line 103) | def build_colwise_linear(
  function build_rowwise_linear (line 147) | def build_rowwise_linear(
  function build_merged_colwise_linear (line 184) | def build_merged_colwise_linear(
  function build_qkv_proj (line 258) | def build_qkv_proj(in_features: int,
  function build_o_proj (line 334) | def build_o_proj(
  function build_gateup_linear (line 366) | def build_gateup_linear(
  function build_down_linear (line 400) | def build_down_linear(

FILE: lmdeploy/pytorch/nn/linear/awq.py
  class AwqLinear (line 14) | class AwqLinear(LinearBase):
    method __init__ (line 17) | def __init__(
    method setup_loaders (line 55) | def setup_loaders(self):
    method register_all_parameters (line 67) | def register_all_parameters(self,
    method _get_io_features (line 84) | def _get_io_features(self, in_features: int, out_features: int, w_bit:...
    method _weight_loader_tp_colwise (line 94) | def _weight_loader_tp_colwise(self, param: torch.nn.Parameter, loaded_...
    method _weight_loader_tp_rowwise (line 113) | def _weight_loader_tp_rowwise(self, param: torch.nn.Parameter, loaded_...
    method weight_loader (line 132) | def weight_loader(self, param: torch.nn.Parameter, loaded_weight: torc...
    method create_weights (line 143) | def create_weights(self, in_features: int, out_features: int, w_bit: i...
    method update_weights (line 161) | def update_weights(self):
    method _forward_default (line 166) | def _forward_default(self, x, all_reduce, tp_sizes):
  class MergedAwqLinear (line 171) | class MergedAwqLinear(AwqLinear):
    method __init__ (line 174) | def __init__(self,
    method setup_loaders (line 208) | def setup_loaders(self):
    method _get_io_features (line 224) | def _get_io_features(self, in_features: int, out_features: int, w_bit:...
    method _update_all_out_features (line 228) | def _update_all_out_features(self, all_out_features: List[int], w_bit:...
    method weight_loader (line 238) | def weight_loader(self, param: torch.nn.Parameter, loaded_weight: torc...
    method weight_spliter_wz (line 262) | def weight_spliter_wz(self, loaded_weight: torch.Tensor):
    method weight_spliter_s (line 266) | def weight_spliter_s(self, loaded_weight: torch.Tensor):
  class QKVAwqLinear (line 271) | class QKVAwqLinear(MergedAwqLinear, QKVMixin):
    method __init__ (line 274) | def __init__(self,
    method _update_all_out_features (line 312) | def _update_all_out_features(self, all_out_features: List[int], w_bit:...
    method weight_loader (line 316) | def weight_loader(self, param: torch.nn.Parameter, loaded_weight: torc...
    method weight_spliter_wz (line 348) | def weight_spliter_wz(self, loaded_weight: torch.Tensor, layout: str =...
    method weight_spliter_s (line 365) | def weight_spliter_s(self, loaded_weight: torch.Tensor, layout: str = ...
    method weight_spliter_lora_b (line 382) | def weight_spliter_lora_b(self, loaded_weight: torch.Tensor):

FILE: lmdeploy/pytorch/nn/linear/base.py
  class LinearForwardDPTP (line 16) | class LinearForwardDPTP:
    method __init__ (line 18) | def __init__(self, gemm_func: Callable, max_tokens_per_round: int = 81...
    method all_gather (line 33) | def all_gather(self, hidden_states: torch.Tensor, tp_sizes: List[int]):
    method reduce_scatter (line 38) | def reduce_scatter(self, hidden_states: torch.Tensor, out_states: torc...
    method _gemm_and_reduce_scatter (line 48) | def _gemm_and_reduce_scatter(self, hidden_states: torch.Tensor, output...
    method forward (line 55) | def forward(self, hidden_states: torch.Tensor):
  class LinearBase (line 106) | class LinearBase(nn.Module):
    method __init__ (line 109) | def __init__(
    method init_tp_args (line 135) | def init_tp_args(self, is_tp: bool, all_reduce: bool, colwise: bool, l...
    method get_tp_world_rank (line 171) | def get_tp_world_rank(self):
    method update_weights (line 176) | def update_weights(self):
    method _forward_default (line 180) | def _forward_default(self, x, all_reduce: bool, tp_sizes: List[int]):
    method _forward_lora (line 184) | def _forward_lora(self, x, tp_sizes: List[int] = None):
    method _forward_dp_tp (line 197) | def _forward_dp_tp(self, x):
    method forward (line 214) | def forward(self, x):

FILE: lmdeploy/pytorch/nn/linear/blocked_fp8.py
  class BlockedF8Linear (line 16) | class BlockedF8Linear(LinearBase):
    method __init__ (line 19) | def __init__(
    method setup_loaders (line 59) | def setup_loaders(self):
    method register_all_parameters (line 66) | def register_all_parameters(self,
    method _get_io_features (line 80) | def _get_io_features(self, in_features: int, out_features: int, colwis...
    method _weight_loader_tp_colwise (line 89) | def _weight_loader_tp_colwise(self, param: torch.nn.Parameter, loaded_...
    method _weight_loader_tp_rowwise (line 95) | def _weight_loader_tp_rowwise(self, param: torch.nn.Parameter, loaded_...
    method weight_loader (line 108) | def weight_loader(self, param: torch.nn.Parameter, loaded_weight: torc...
    method weight_loader_with_quant (line 119) | def weight_loader_with_quant(self, param: torch.nn.Parameter, loaded_w...
    method create_weights (line 132) | def create_weights(self, in_features: int, out_features: int, bias: bo...
    method update_weights (line 144) | def update_weights(self):
    method _forward_default (line 149) | def _forward_default(self, x, all_reduce, tp_sizes):
  class MergedBlockedF8Linear (line 165) | class MergedBlockedF8Linear(BlockedF8Linear):
    method __init__ (line 168) | def __init__(self,
    method setup_loaders (line 208) | def setup_loaders(self):
    method _get_io_features (line 221) | def _get_io_features(self, in_features: int, out_features: int, colwis...
    method _update_all_out_features (line 225) | def _update_all_out_features(self, all_out_features: List[int], replic...
    method weight_loader (line 236) | def weight_loader(self, param: torch.nn.Parameter, loaded_weight: torc...
    method weight_loader_with_quant (line 250) | def weight_loader_with_quant(self, param: torch.nn.Parameter, loaded_w...
    method weight_spliter (line 263) | def weight_spliter(self, loaded_weight: torch.Tensor):
    method weight_spliter_lora_b (line 269) | def weight_spliter_lora_b(self, loaded_weight: torch.Tensor):
  class QKVBlockedF8Linear (line 273) | class QKVBlockedF8Linear(MergedBlockedF8Linear, QKVMixin):
    method __init__ (line 276) | def __init__(self,
    method _update_all_out_features (line 316) | def _update_all_out_features(self, all_out_features: List[int], replic...
    method weight_loader (line 320) | def weight_loader(self, param: torch.nn.Parameter, loaded_weight: torc...
    method weight_loader_with_quant (line 345) | def weight_loader_with_quant(self, param: torch.nn.Parameter, loaded_w...
    method weight_spliter (line 358) | def weight_spliter(self, loaded_weight: torch.Tensor, layout: str = 'd...

FILE: lmdeploy/pytorch/nn/linear/default.py
  class BaseLinear (line 15) | class BaseLinear(LinearBase):
    method __init__ (line 18) | def __init__(
    method setup_loaders (line 50) | def setup_loaders(self):
    method register_all_parameters (line 56) | def register_all_parameters(self, weight: torch.Tensor, bias: Optional...
    method _get_io_features (line 65) | def _get_io_features(self, in_features: int, out_features: int, colwis...
    method _weight_loader_tp_colwise (line 74) | def _weight_loader_tp_colwise(self, param: torch.nn.Parameter, loaded_...
    method _weight_loader_tp_rowwise (line 80) | def _weight_loader_tp_rowwise(self, param: torch.nn.Parameter, loaded_...
    method weight_loader (line 93) | def weight_loader(self, param: torch.nn.Parameter, loaded_weight: torc...
    method create_weights (line 104) | def create_weights(self, in_features: int, out_features: int, bias: bo...
    method update_weights (line 113) | def update_weights(self):
    method _forward_default (line 118) | def _forward_default(self, x, all_reduce, tp_sizes):
  class MergedBaseLinear (line 133) | class MergedBaseLinear(BaseLinear):
    method __init__ (line 136) | def __init__(self,
    method setup_loaders (line 166) | def setup_loaders(self):
    method _get_io_features (line 174) | def _get_io_features(self, in_features: int, out_features: int, colwis...
    method _update_all_out_features (line 178) | def _update_all_out_features(self, all_out_features: List[int]):
    method weight_loader (line 187) | def weight_loader(self, param: torch.nn.Parameter, loaded_weight: torc...
    method weight_spliter (line 195) | def weight_spliter(self, loaded_weight: torch.Tensor):
    method weight_spliter_lora_b (line 199) | def weight_spliter_lora_b(self, loaded_weight: torch.Tensor):
  class QKVBaseLinear (line 203) | class QKVBaseLinear(MergedBaseLinear, QKVMixin):
    method __init__ (line 206) | def __init__(self,
    method _update_all_out_features (line 239) | def _update_all_out_features(self, all_out_features: List[int]):
    method weight_loader (line 243) | def weight_loader(self, param: torch.nn.Parameter, loaded_weight: torc...
    method weight_spliter (line 260) | def weight_spliter(self, loaded_weight: torch.Tensor, layout: str = 'd...
    method weight_spliter_lora_b (line 277) | def weight_spliter_lora_b(self, loaded_weight: torch.Tensor):

FILE: lmdeploy/pytorch/nn/linear/lora.py
  class LoRA (line 12) | class LoRA(nn.Module):
    method __init__ (line 15) | def __init__(self,
    method forward (line 49) | def forward(self, x, base_output=None):
    method weight_loader_A (line 60) | def weight_loader_A(self, param: nn.Parameter, loaded_weight: torch.Te...
    method weight_loader_B (line 74) | def weight_loader_B(self, param: nn.Parameter, loaded_weight: torch.Te...

FILE: lmdeploy/pytorch/nn/linear/utils.py
  function check_qkv_split_layout (line 14) | def check_qkv_split_layout(layout: str):
  function update_tp_args (line 20) | def update_tp_args(is_tp: bool, all_reduce: bool, colwise: bool, layer_t...
  class QKVMixin (line 32) | class QKVMixin:
    method __init__ (line 35) | def __init__(self,
    method get_qkv_out_feautures (line 54) | def get_qkv_out_feautures(self):
    method _get_qkv_out_features (line 58) | def _get_qkv_out_features(self,
    method _update_num_heads (line 69) | def _update_num_heads(self, is_tp: bool, tp: int, tp_rank: int, num_q_...
    method split_qkv (line 79) | def split_qkv(self, x: torch.Tensor):

FILE: lmdeploy/pytorch/nn/linear/w8a8.py
  class W8A8Linear (line 14) | class W8A8Linear(LinearBase):
    method __init__ (line 17) | def __init__(self,
    method setup_loaders (line 49) | def setup_loaders(self):
    method register_all_parameters (line 56) | def register_all_parameters(self, weight: torch.Tensor, scale: torch.T...
    method _get_io_features (line 67) | def _get_io_features(self, in_features: int, out_features: int, colwis...
    method _weight_loader_tp_colwise (line 76) | def _weight_loader_tp_colwise(self, param: torch.nn.Parameter, loaded_...
    method _weight_loader_tp_rowwise (line 82) | def _weight_loader_tp_rowwise(self, param: torch.nn.Parameter, loaded_...
    method weight_loader (line 98) | def weight_loader(self, param: torch.nn.Parameter, loaded_weight: torc...
    method create_weights (line 109) | def create_weights(self, in_features: int, out_features: int, bias: bo...
    method update_weights (line 119) | def update_weights(self):
    method _forward_default (line 124) | def _forward_default(self, x, all_reduce, tp_sizes):
  class MergedW8A8Linear (line 129) | class MergedW8A8Linear(W8A8Linear):
    method __init__ (line 132) | def __init__(self,
    method setup_loaders (line 162) | def setup_loaders(self):
    method _get_io_features (line 172) | def _get_io_features(self, in_features: int, out_features: int, colwis...
    method _update_all_out_features (line 176) | def _update_all_out_features(self, all_out_features: List[int]):
    method weight_loader (line 185) | def weight_loader(self, param: torch.nn.Parameter, loaded_weight: torc...
    method weight_spliter (line 193) | def weight_spliter(self, loaded_weight: torch.Tensor):
    method weight_spliter_lora_b (line 197) | def weight_spliter_lora_b(self, loaded_weight: torch.Tensor):
  class QKVW8A8Linear (line 201) | class QKVW8A8Linear(MergedW8A8Linear, QKVMixin):
    method __init__ (line 204) | def __init__(self,
    method _update_all_out_features (line 239) | def _update_all_out_features(self, all_out_features: List[int]):
    method weight_loader (line 243) | def weight_loader(self, param: torch.nn.Parameter, loaded_weight: torc...
    method weight_spliter (line 260) | def weight_spliter(self, loaded_weight: torch.Tensor, layout: str = 'd...
    method weight_spliter_lora_b (line 277) | def weight_spliter_lora_b(self, loaded_weight: torch.Tensor):

FILE: lmdeploy/pytorch/nn/moe/__init__.py
  function build_fused_moe (line 11) | def build_fused_moe(

FILE: lmdeploy/pytorch/nn/moe/base.py
  class MoeType (line 16) | class MoeType(Enum):
  class SoftmaxTopK (line 23) | class SoftmaxTopK(nn.Module):
    method __init__ (line 26) | def __init__(self, top_k: int, dim: int = -1, n_groups: int = -1):
    method forward (line 32) | def forward(self, x: torch.Tensor):
  function update_dims (line 37) | def update_dims(hidden_dim: int, ffn_dim: int):
  function split_size (line 45) | def split_size(size: int, world_size: int, align: int):
  function moe_gather_inputs (line 54) | def moe_gather_inputs(hidden_states, topk_weights, topk_ids, group: Opti...
  function moe_reduce (line 76) | def moe_reduce(ret, rank: int, tp_mode: TPMode, group: Optional[dist.Pro...
  class MoEForwardDPTP (line 94) | class MoEForwardDPTP:
    method __init__ (line 96) | def __init__(self, gemm_func: Callable, max_tokens_per_round: int = 81...
    method all_gather (line 111) | def all_gather(self, hidden_states: torch.Tensor, topk_weights: torch....
    method reduce_scatter (line 119) | def reduce_scatter(self, hidden_states: torch.Tensor, out_states: torc...
    method _gemm_and_reduce_scatter (line 129) | def _gemm_and_reduce_scatter(self, hidden_states: torch.Tensor, topk_w...
    method forward (line 137) | def forward(self, hidden_states: torch.Tensor, topk_weights: torch.Ten...
  function _renormalize (line 196) | def _renormalize(topk_weights: torch.Tensor, renormalize: bool):
  class DispatchInputs (line 205) | class DispatchInputs:
    method from_dict (line 213) | def from_dict(cls, input: Dict):
    method to_dict (line 224) | def to_dict(self) -> Dict:
  class FusedMoEBase (line 234) | class FusedMoEBase(nn.Module):
    method __init__ (line 237) | def __init__(self, tp: int, tp_mode: TPMode, do_renormalize: bool):
    method init_dist_args (line 243) | def init_dist_args(self, all_reduce: bool):
    method before_dispatch (line 274) | def before_dispatch(self, state: DispatchInputs):
    method dispatch (line 278) | def dispatch(self, state: Dict):
    method gemm (line 282) | def gemm(self, state: Dict):
    method combine (line 286) | def combine(self, state: Dict):
    method wait (line 290) | def wait(self, state: Dict):
    method forward_dptp (line 295) | def forward_dptp(self) -> MoEForwardDPTP:
    method forward_default (line 299) | def forward_default(self, hidden_states: torch.Tensor, topk_weights: t...
    method forward (line 312) | def forward(self, hidden_states: torch.Tensor, topk_weights: torch.Ten...
    method renormalize (line 319) | def renormalize(self, topk_weights):

FILE: lmdeploy/pytorch/nn/moe/blocked_fp8.py
  class LinearWeightsBlockedF8 (line 16) | class LinearWeightsBlockedF8(LinearWeights):
    method __init__ (line 19) | def __init__(self,
    method update_weight (line 54) | def update_weight(self, weight: torch.Tensor, weight_scale_inv: torch....
    method weight_loader_scale_ep (line 62) | def weight_loader_scale_ep(self, param: torch.nn.Parameter, loaded_wei...
    method _chunk_weight_tp (line 71) | def _chunk_weight_tp(self, weight: torch.Tensor, dim: int, world_size:...
    method weight_loader_tp_blocked_fp8 (line 76) | def weight_loader_tp_blocked_fp8(self, param: torch.nn.Parameter, load...
    method weight_loader_scale_tp (line 107) | def weight_loader_scale_tp(self, param: torch.nn.Parameter, loaded_wei...
    method weight_loader_with_quant (line 127) | def weight_loader_with_quant(self, param: torch.nn.Parameter, loaded_w...
  class FusedMoEBlockedF8 (line 142) | class FusedMoEBlockedF8(FusedMoEBase):
    method __init__ (line 145) | def __init__(self,
    method _update_args (line 226) | def _update_args(hidden_dim: int, ffn_dim: int, align: int):
    method update_weights (line 232) | def update_weights(self):
    method before_dispatch (line 240) | def before_dispatch(self, state: DispatchInputs):
    method dispatch (line 255) | def dispatch(self, state: Dict):
    method gemm (line 318) | def gemm(self, state: Dict):
    method combine (line 367) | def combine(self, state: Dict):
    method wait (line 408) | def wait(self, state):
    method fusedmoe_build (line 418) | def fusedmoe_build(self, low_latency_mode: bool = False):

FILE: lmdeploy/pytorch/nn/moe/default.py
  class LinearWeights (line 14) | class LinearWeights(nn.Module):
    method __init__ (line 17) | def __init__(self,
    method setup_weight_loader (line 45) | def setup_weight_loader(self):
    method update_weight (line 59) | def update_weight(self, weight: torch.Tensor):
    method weight_loader_tp (line 66) | def weight_loader_tp(self, param: torch.nn.Parameter, loaded_weight: t...
    method weight_loader_ep (line 88) | def weight_loader_ep(self, param: torch.nn.Parameter, loaded_weight: t...
  class FusedMoE (line 108) | class FusedMoE(FusedMoEBase):
    method __init__ (line 111) | def __init__(self,
    method update_weights (line 183) | def update_weights(self):
    method before_dispatch (line 189) | def before_dispatch(self, state: DispatchInputs):
    method dispatch (line 202) | def dispatch(self, state: Dict):
    method gemm (line 268) | def gemm(self, state: Dict):
    method combine (line 314) | def combine(self, state: Dict):
    method wait (line 358) | def wait(self, state: Dict):
    method fusedmoe_build (line 369) | def fusedmoe_build(self, low_latency_mode: bool = False):

FILE: lmdeploy/pytorch/nn/moe/route.py
  class NoauxTCRouter (line 9) | class NoauxTCRouter(torch.nn.Module):
    method __init__ (line 11) | def __init__(
    method forward (line 36) | def forward(self, router_logits: torch.Tensor,

FILE: lmdeploy/pytorch/nn/moe/w8a8.py
  class LinearWeightsW8A8 (line 13) | class LinearWeightsW8A8(LinearWeights):
    method __init__ (line 16) | def __init__(self,
    method update_weight (line 42) | def update_weight(self, weight: torch.Tensor, scale: torch.Tensor):
    method weight_loader_scale_tp (line 50) | def weight_loader_scale_tp(self, param: torch.nn.Parameter, loaded_wei...
  class FusedMoEW8A8 (line 69) | class FusedMoEW8A8(FusedMoEBase):
    method __init__ (line 72) | def __init__(self,
    method update_weights (line 128) | def update_weights(self):
    method dispatch (line 136) | def dispatch(self, state: Dict):
    method gemm (line 154) | def gemm(self, state: Dict):
    method combine (line 164) | def combine(self, state: Dict):
    method wait (line 178) | def wait(self, state: Dict):

FILE: lmdeploy/pytorch/nn/multinomial_sampling.py
  function multinomial_sampling (line 7) | def multinomial_sampling(scores: torch.Tensor,

FILE: lmdeploy/pytorch/nn/norm.py
  class RMSNorm (line 14) | class RMSNorm(nn.Module):
    method __init__ (line 17) | def __init__(
    method weight_loader (line 57) | def weight_loader(self, param: nn.Parameter, loaded_weight: torch.Tens...
    method create_weight (line 64) | def create_weight(hidden_size: int, dtype: torch.dtype | None = None, ...
    method forward (line 73) | def forward(self, x: torch.Tensor, residual: torch.Tensor = None):
  class LayerNorm (line 78) | class LayerNorm(nn.Module):
    method __init__ (line 81) | def __init__(self,
    method create_weight (line 96) | def create_weight(hidden_size: int,
    method forward (line 113) | def forward(self, x: torch.Tensor, residual: torch.Tensor | None = None):

FILE: lmdeploy/pytorch/nn/nsa.py
  class IndexerTopKFP8 (line 10) | class IndexerTopKFP8(nn.Module):
    method __init__ (line 12) | def __init__(self, topk: int, softmax_scale: float, block_size: int = ...
    method forward (line 18) | def forward(

FILE: lmdeploy/pytorch/nn/rotary_embedding.py
  function get_rope_parameters (line 14) | def get_rope_parameters(config: PretrainedConfig):
  function _get_default_rope_parameters (line 23) | def _get_default_rope_parameters(config: PretrainedConfig):
  function _get_linear_scaling_rope_parameters (line 28) | def _get_linear_scaling_rope_parameters(config: PretrainedConfig):
  function _get_dynamic_ntk_parameters (line 35) | def _get_dynamic_ntk_parameters(config: PretrainedConfig):
  function _get_yarn_parameters (line 42) | def _get_yarn_parameters(config: PretrainedConfig):
  function _get_longrope_parameters (line 78) | def _get_longrope_parameters(config: PretrainedConfig):
  function _get_llama3_parameters (line 96) | def _get_llama3_parameters(config: PretrainedConfig):
  function _get_fope_parameters (line 108) | def _get_fope_parameters(config: PretrainedConfig):
  function build_rotary_params (line 125) | def build_rotary_params(config: PretrainedConfig):
  function build_rotary_embedding (line 153) | def build_rotary_embedding(dim: int,
  function get_rope_theta (line 190) | def get_rope_theta(config: PretrainedConfig, default: int = 10000) -> int:
  function build_rotary_embedding_from_config (line 200) | def build_rotary_embedding_from_config(config: PretrainedConfig, device:...
  class ApplyRotaryEmb (line 215) | class ApplyRotaryEmb(nn.Module):
    method __init__ (line 218) | def __init__(self):
    method forward (line 224) | def forward(self, query: Tensor, key: Tensor, cos: Tensor, sin: Tensor...
  class FopeRotaryEmbedding (line 250) | class FopeRotaryEmbedding(nn.Module):
    method __init__ (line 253) | def __init__(self,
    method update_num_kv_heads (line 287) | def update_num_kv_heads(num_key_value_heads: int):
    method weight_loader (line 298) | def weight_loader(self, param: nn.Parameter, loaded_weight: torch.Tens...
    method forward (line 312) | def forward(self, x: Tensor, position_ids: Tensor):

FILE: lmdeploy/pytorch/nn/utils.py
  function div_up (line 5) | def div_up(a: int, b: int):
  function get_distribute_size (line 10) | def get_distribute_size(feature_size: int, world_size: int, rank: int, a...
  function chunk_aligned (line 23) | def chunk_aligned(weight: torch.Tensor, chunks: int, dim: int, align: int):

FILE: lmdeploy/pytorch/paging/block_manager/__init__.py
  function build_block_manager (line 8) | def build_block_manager(cache_config: CacheConfig) -> BaseBlockManager:

FILE: lmdeploy/pytorch/paging/block_manager/base_block_manager.py
  class LogicalMemory (line 10) | class LogicalMemory:
    method __init__ (line 13) | def __init__(self, num_blocks: int) -> None:
    method get_physical_blocks (line 20) | def get_physical_blocks(self, logical_address: np.ndarray):
    method num_blocks (line 26) | def num_blocks(self):
  class PhysicalAllocator (line 31) | class PhysicalAllocator:
    method __init__ (line 37) | def __init__(self, num_blocks: int, offset: int = 0):
    method allocate (line 44) | def allocate(self, num_blocks: int):
    method free (line 54) | def free(self, blocks: np.ndarray):
    method get_num_free_blocks (line 64) | def get_num_free_blocks(self):
  class LogicalAllocator (line 69) | class LogicalAllocator:
    method __init__ (line 72) | def __init__(self, num_cpu_blocks: int, num_gpu_blocks: int, num_gpu_r...
    method get_phy_allocator (line 85) | def get_phy_allocator(self, device: str):
    method allocate (line 94) | def allocate(self, num_blocks: int, device: str = 'gpu'):
    method free (line 113) | def free(self, blocks: np.ndarray):
    method get_num_free_blocks (line 139) | def get_num_free_blocks(self):
    method get_physical_blocks (line 143) | def get_physical_blocks(self, blocks: np.ndarray):
    method get_ref_count (line 147) | def get_ref_count(self, blocks: np.ndarray):
    method add_ref_count (line 151) | def add_ref_count(self, blocks: np.ndarray, value: np.ndarray):
    method get_access_time (line 155) | def get_access_time(self, blocks: np.ndarray):
    method update_access_time (line 159) | def update_access_time(self, blocks: np.ndarray):
    method cpu_mem_offset (line 164) | def cpu_mem_offset(self):
    method count_cpu_blocks (line 168) | def count_cpu_blocks(self, blocks: np.ndarray):
    method count_gpu_blocks (line 173) | def count_gpu_blocks(self, blocks: np.ndarray):
    method update_phy_map (line 178) | def update_phy_map(self, log_blocks: np.ndarray, phy_blocks: np.ndarray):
    method on_device (line 183) | def on_device(self, blocks: np.ndarray, device: str):
  class BaseBlockManager (line 202) | class BaseBlockManager:
    method __init__ (line 210) | def __init__(self, num_gpu_blocks: int, num_cpu_blocks: int, num_gpu_r...
    method num_required_blocks (line 219) | def num_required_blocks(cls, obj: SchedulerSequence, prealloc_size: in...
    method can_allocate (line 223) | def can_allocate(self, msg: SchedulerSequence, prealloc_size: int = 0):
    method allocate_msg (line 227) | def allocate_msg(self, msg: SchedulerSequence, prealloc_size: int = 0):
    method free (line 232) | def free(self, msg: SchedulerSequence):
    method try_swap_out (line 236) | def try_swap_out(self, msg: SchedulerSequence):
    method try_swap_in (line 240) | def try_swap_in(self, msg: SchedulerSequence):
    method get_block_table (line 244) | def get_block_table(self, msg: SchedulerSequence):
    method allocate (line 253) | def allocate(self, data: SchedulerSequence, prealloc_size: int = 0):
    method get_num_free_gpu_blocks (line 257) | def get_num_free_gpu_blocks(self) -> int:
    method get_num_free_cpu_blocks (line 261) | def get_num_free_cpu_blocks(self) -> int:
    method on_device (line 265) | def on_device(self, msg: SchedulerSequence, device: str):

FILE: lmdeploy/pytorch/paging/block_manager/default_block_manager.py
  function _div_up (line 9) | def _div_up(x, n):
  class DefaultBlockManager (line 17) | class DefaultBlockManager(BaseBlockManager):
    method num_required_blocks (line 26) | def num_required_blocks(cls, obj: SchedulerSequence, prealloc_size: in...
    method can_allocate (line 33) | def can_allocate(self, msg: SchedulerSequence, prealloc_size: int = 0):
    method allocate_msg (line 39) | def allocate_msg(self, msg: SchedulerSequence, prealloc_size: int = 0):
    method free (line 48) | def free(self, msg: SchedulerSequence):
    method try_swap_out (line 53) | def try_swap_out(self, msg: SchedulerSequence):
    method try_swap_in (line 99) | def try_swap_in(self, msg: SchedulerSequence):

FILE: lmdeploy/pytorch/paging/block_manager/window_block_manager.py
  function _num_blocks_to_drop (line 11) | def _num_blocks_to_drop(seq: SchedulerSequence, window_size: int):
  class WindowBlockManager (line 24) | class WindowBlockManager(DefaultBlockManager):
    method __init__ (line 32) | def __init__(self, num_gpu_blocks: int, num_cpu_blocks: int, window_si...
    method num_required_blocks (line 38) | def num_required_blocks(self, obj: SchedulerSequence, prealloc_size: i...
    method can_allocate (line 47) | def can_allocate(self, msg: SchedulerSequence, prealloc_size: int = 0):
    method allocate_msg (line 54) | def allocate_msg(self, msg: SchedulerSequence, prealloc_size: int = 0):

FILE: lmdeploy/pytorch/paging/block_trie.py
  class PrefixCacheStats (line 15) | class PrefixCacheStats:
    method reset (line 20) | def reset(self):
    method hit_rate (line 24) | def hit_rate(self):
  class Node (line 28) | class Node:
    method __init__ (line 31) | def __init__(self, hash_key: int, block: int, tokens: np.ndarray, num_...
    method parent (line 40) | def parent(self):
    method parent (line 44) | def parent(self, val: 'Node'):
    method __lt__ (line 52) | def __lt__(self, other):
    method __le__ (line 55) | def __le__(self, other):
  class BlockTrie (line 59) | class BlockTrie:
    method __init__ (line 62) | def __init__(self, cache_config: CacheConfig, block_manager: BaseBlock...
    method hit_rate (line 74) | def hit_rate(self):
    method get_root (line 78) | def get_root(self, adapter_name: str):
    method match (line 84) | def match(self, seq: SchedulerSequence):
    method allocate (line 131) | def allocate(self, seq: SchedulerSequence):
    method evict (line 185) | def evict(self, max_num_blocks: int):

FILE: lmdeploy/pytorch/paging/eviction_helper/__init__.py
  function build_eviction_helper (line 7) | def build_eviction_helper(scheduler, eviction_type: str):

FILE: lmdeploy/pytorch/paging/eviction_helper/base_eviction_helper.py
  class BaseEvictionHelper (line 10) | class BaseEvictionHelper:
    method __init__ (line 13) | def __init__(self, scheduler: Scheduler):
    method need_swap_in (line 20) | def need_swap_in(self, seq: SchedulerSequence):
    method evict_for_seq (line 24) | def evict_for_seq(self, seq: SchedulerSequence, evictable_seqs: List[S...

FILE: lmdeploy/pytorch/paging/eviction_helper/recompute_eviction_helper.py
  class RecomputeEvictionHelper (line 9) | class RecomputeEvictionHelper(BaseEvictionHelper):
    method __init__ (line 12) | def __init__(self, scheduler: Scheduler):
    method _evict_for_seq_default (line 20) | def _evict_for_seq_default(self, seq: SchedulerSequence, evictable_seq...
    method _evict_for_ssm (line 59) | def _evict_for_ssm(self, seq: SchedulerSequence, evictable_seqs: List[...

FILE: lmdeploy/pytorch/paging/scheduler.py
  class SchedulerOutput (line 28) | class SchedulerOutput:
  class Scheduler (line 37) | class Scheduler:
    method __init__ (line 45) | def __init__(
    method create_status_list_property (line 70) | def create_status_list_property(status: MessageStatus):
    method create_num_status_method (line 80) | def create_num_status_method(status: MessageStatus):
    method create_has_status_method (line 89) | def create_has_status_method(status: MessageStatus):
    method add_session (line 118) | def add_session(self, session_id: int):
    method _schedule_migration (line 129) | def _schedule_migration(self):
    method _schedule_prefill (line 169) | def _schedule_prefill(self, prealloc_size: int = 0):
    method _schedule_decoding (line 227) | def _schedule_decoding(self, prealloc_size: int = 0):
    method schedule (line 280) | def schedule(self, is_prefill: bool, prealloc_size: int = 0):
    method schedule_running (line 291) | def schedule_running(self, running: SeqList, num_decode_tokens: int = ...
    method stop_session (line 327) | def stop_session(self, session_id: int):
    method end_session (line 338) | def end_session(self, session_id: int):
    method has_unfinished (line 354) | def has_unfinished(self):
    method get_block_tables (line 358) | def get_block_tables(self, seqs: SeqList):
    method evict_seqs (line 362) | def evict_seqs(self, running: SeqList):
    method activate_seqs (line 367) | def activate_seqs(self, running: SeqList, filter_status: MessageStatus...
    method deactivate_seqs (line 373) | def deactivate_seqs(self, running: SeqList, filter_status: MessageStat...
    method seqs_activation (line 379) | def seqs_activation(self, running: SeqList):
    method activate_migration_seqs (line 387) | def activate_migration_seqs(self, running: SeqList):
    method deactivate_migration_seqs (line 391) | def deactivate_migration_seqs(self, running: SeqList):
    method seqs_migration_activation (line 396) | def seqs_migration_activation(self, running: SeqList):
    method collect_migration_done (line 404) | def collect_migration_done(self):
    method schedule_metrics (line 409) | def schedule_metrics(self):

FILE: lmdeploy/pytorch/paging/seq_states/states.py
  function _free_seq (line 10) | def _free_seq(seq: SchedulerSequence, scheduler: 'Scheduler'):
  class StateBase (line 19) | class StateBase:
    method __init_subclass__ (line 23) | def __init_subclass__(cls, **kargs) -> None:
    method build (line 29) | def build(cls, scheduler: 'Scheduler', seq: 'SchedulerSequence', statu...
    method __init__ (line 35) | def __init__(self, seq: SchedulerSequence, scheduler: 'Scheduler'):
    method to_state (line 39) | def to_state(self, new_state):
    method evict (line 44) | def evict(self):
    method activate (line 48) | def activate(self):
    method deactivate (line 52) | def deactivate(self):
    method finish (line 56) | def finish(self):
    method stop (line 60) | def stop(self):
    method free (line 64) | def free(self):
  class WaitingState (line 69) | class WaitingState(StateBase):
    method activate (line 73) | def activate(self):
    method evict (line 81) | def evict(self):
  class ReadyState (line 85) | class ReadyState(StateBase):
    method activate (line 89) | def activate(self):
    method evict (line 93) | def evict(self):
  class StoppedState (line 97) | class StoppedState(StateBase):
    method activate (line 101) | def activate(self):
    method evict (line 106) | def evict(self):
  class RunningState (line 110) | class RunningState(StateBase):
    method deactivate (line 114) | def deactivate(self):
    method finish (line 117) | def finish(self):
  class ToBeMigratedState (line 124) | class ToBeMigratedState(StateBase):
    method finish (line 128) | def finish(self):
  class MigrationWaitingState (line 132) | class MigrationWaitingState(StateBase):
    method activate (line 136) | def activate(self):
    method evict (line 139) | def evict(self):
  class MigrationReadyState (line 143) | class MigrationReadyState(StateBase):
    method activate (line 147) | def activate(self):
    method evict (line 150) | def evict(self):
  class MigrationDoneState (line 154) | class MigrationDoneState(StateBase):
    method activate (line 158) | def activate(self):
    method finish (line 161) | def finish(self):
  class MigrationRunningState (line 165) | class MigrationRunningState(StateBase):
    method deactivate (line 169) | def deactivate(self):
    method finish (line 172) | def finish(self):
  function build_seq_state (line 176) | def build_seq_state(scheduler: 'Scheduler', seq: 'SchedulerSequence', st...

FILE: lmdeploy/pytorch/paging/state_manager.py
  class StateAllocator (line 8) | class StateAllocator:
    method __init__ (line 11) | def __init__(self, num_states: int, offset: int = 0):
    method allocate (line 16) | def allocate(self):
    method free (line 24) | def free(self, state_id: int):
    method get_num_free (line 31) | def get_num_free(self):
  class StateManager (line 35) | class StateManager:
    method __init__ (line 37) | def __init__(self, num_states: int, num_reserved: int = 0):
    method is_allocated (line 42) | def is_allocated(self, seq: SchedulerSequence):
    method allocate (line 46) | def allocate(self, seq: SchedulerSequence):
    method free (line 52) | def free(self, seq: SchedulerSequence):
    method get_num_free (line 59) | def get_num_free(self):
  function build_state_manager (line 64) | def build_state_manager(cache_config: CacheConfig) -> StateManager:

FILE: lmdeploy/pytorch/ray.py
  function get_device_str (line 16) | def get_device_str(device_type: str = None) -> str:
  function get_resource_kwargs (line 31) | def get_resource_kwargs(device_str: str, resource_used: float = 0.01) ->...
  function _wait_until_pg_ready (line 42) | def _wait_until_pg_ready(current_placement_group: PlacementGroup):
  function _get_obj_store_memory (line 79) | def _get_obj_store_memory(dp: int = 1):
  function init_ray_cluster (line 97) | def init_ray_cluster(world_size: int, ray_address: str = None, dp: int =...
  class RayContext (line 146) | class RayContext:
    method __init__ (line 149) | def __init__(self, world_size: int, ray_address: str = None, dp: int =...
    method get_placement_group (line 159) | def get_placement_group(self):
    method shutdown (line 163) | def shutdown(self):

FILE: lmdeploy/pytorch/spec_decode/__init__.py
  function build_spec_agent (line 7) | def build_spec_agent(specdecode_config: SpecDecodeConfig,

FILE: lmdeploy/pytorch/spec_decode/base.py
  class BaseSpecModelAgent (line 13) | class BaseSpecModelAgent:
    method __init__ (line 16) | def __init__(self, enable: bool = False):
    method is_enabled (line 19) | def is_enabled(self):
    method set_cache_config (line 22) | def set_cache_config(self, cache_config: CacheConfig):
    method set_model_config (line 26) | def set_model_config(self, model_config: ModelConfig):
    method build_model (line 30) | def build_model(self, empty_init: bool, target_model=None, build_model...
    method build_graph_runner (line 34) | def build_graph_runner(self):
    method build_cache_engine (line 38) | def build_cache_engine(self, cache_stream: torch.cuda.Stream):
    method async_model_forward (line 42) | async def async_model_forward(self, next_token_ids: torch.Tensor, mode...
    method warmup (line 47) | def warmup(self, max_batches: int, target_model_config: ModelConfig):
    method reset_graph_runner (line 51) | def reset_graph_runner(self):
    method update_main_model_outputs (line 55) | def update_main_model_outputs(self, output: Dict[str, torch.Tensor], m...

FILE: lmdeploy/pytorch/spec_decode/proposers/base.py
  function draft_model_forward (line 23) | def draft_model_forward(
  class BaseSpecProposer (line 58) | class BaseSpecProposer:
    method __init__ (line 60) | def __init__(self, specdecode_config: SpecDecodeConfig, device: torch....
    method build_model (line 68) | def build_model(self, empty_init: bool, target_model: torch.nn.Module ...
    method get_outputs (line 88) | def get_outputs(self,
    method _forward (line 96) | def _forward(self, model_inputs: ModelInputs, cache_engine: CacheEngin...
    method update_inputs_decoding (line 105) | def update_inputs_decoding(self, model_inputs: ModelInputs, extra_inpu...
    method get_logits (line 124) | def get_logits(self, hidden_states: torch.Tensor):
    method get_target_hidden_size (line 136) | def get_target_hidden_size(self, model_config: ModelConfig):
  function build_specdecode_proposer (line 141) | def build_specdecode_proposer(specdecode_config: SpecDecodeConfig, devic...

FILE: lmdeploy/pytorch/spec_decode/proposers/deepseek_mtp.py
  class DeepseekMTP (line 16) | class DeepseekMTP(BaseSpecProposer):
    method get_outputs (line 18) | def get_outputs(self,

FILE: lmdeploy/pytorch/spec_decode/proposers/eagle.py
  class Eagle (line 8) | class Eagle(DeepseekMTP):

FILE: lmdeploy/pytorch/spec_decode/proposers/eagle3.py
  class Eagle3 (line 18) | class Eagle3(DeepseekMTP):
    method build_model (line 20) | def build_model(self, empty_init: bool, target_model: torch.nn.Module ...
    method get_target_hidden_size (line 28) | def get_target_hidden_size(self, model_config: ModelConfig):
    method get_outputs (line 34) | def get_outputs(self,

FILE: lmdeploy/pytorch/spec_decode/reject_sampler.py
  class SamplePolicy (line 10) | class SamplePolicy(enum.Enum):
  class RejectionSampler (line 16) | class RejectionSampler(nn.Module):
    method __init__ (line 18) | def __init__(self, sample_policy: SamplePolicy = SamplePolicy.ALL_GREE...
    method forward (line 22) | def forward(
  function rejection_sample (line 47) | def rejection_sample(
  function greedy_reject_sampler (line 66) | def greedy_reject_sampler(draft_token_ids, target_token_ids, bonus_token...

FILE: lmdeploy/pytorch/spec_decode/spec_agent.py
  class SpecModelAgent (line 23) | class SpecModelAgent(BaseSpecModelAgent):
    method __init__ (line 26) | def __init__(
    method set_cache_config (line 48) | def set_cache_config(self, cache_config: CacheConfig):
    method set_model_config (line 52) | def set_model_config(self, model_config: ModelConfig):
    method build_model (line 56) | def build_model(self, empty_init: bool, target_model=None, build_model...
    method build_graph_runner (line 60) | def build_graph_runner(self):
    method build_cache_engine (line 69) | def build_cache_engine(self, cache_stream: torch.cuda.Stream):
    method _rejection_sampling (line 79) | def _rejection_sampling(self, next_token_ids, model_inputs: 'ModelInpu...
    method _forward_impl (line 125) | def _forward_impl(self, inputs: ModelInputs):
    method _async_forward (line 130) | async def _async_forward(self, inputs: ModelInputs):
    method _async_model_forward (line 140) | async def _async_model_forward(self, inputs: ModelInputs, extra_inputs...
    method async_model_forward (line 174) | async def async_model_forward(
    method warmup (line 187) | def warmup(self, max_batches: int, target_model_config: ModelConfig):
    method reset_graph_runner (line 228) | def reset_graph_runner(self):

FILE: lmdeploy/pytorch/strategies/__init__.py
  function build_strategy_factory (line 5) | def build_strategy_factory(model_config: ModelConfig,

FILE: lmdeploy/pytorch/strategies/ar/__init__.py
  class ARStrategyFactory (line 18) | class ARStrategyFactory(StrategyFactoryBase):
    method __init__ (line 20) | def __init__(self, model_config: ModelConfig):
    method build_cudagraph_strategy (line 24) | def build_cudagraph_strategy(self) -> 'CudagraphStrategy':
    method build_sampling_strategy (line 29) | def build_sampling_strategy(self) -> 'SamplingStrategy':
    method build_model_inputs_strategy (line 36) | def build_model_inputs_strategy(self) -> 'ModelInputsStrategy':
    method build_model_agent_strategy (line 41) | def build_model_agent_strategy(self) -> 'ModelAgentStrategy':
    method build_engine_strategy (line 46) | def build_engine_strategy(self, cache_config: 'CacheConfig',
    method build_sequence_strategy (line 52) | def build_sequence_strategy(self) -> SequenceStrategy:

FILE: lmdeploy/pytorch/strategies/ar/cudagraph.py
  class ARCudagraphStrategy (line 5) | class ARCudagraphStrategy(CudagraphStrategy):
    method get_max_tokens (line 7) | def get_max_tokens(self, batch_size: int, origin_batch_size: int, num_...

FILE: lmdeploy/pytorch/strategies/ar/engine.py
  class AREngineStrategy (line 7) | class AREngineStrategy(EngineStrategy):
    method __init__ (line 10) | def __init__(self, scheduler_config: SchedulerConfig, cache_config: Ca...
    method get_prealloc_size (line 14) | def get_prealloc_size(self, is_decoding: bool):
    method get_num_loops (line 18) | def get_num_loops(self, is_decoding: bool) -> int:
    method get_num_decode_tokens (line 22) | def get_num_decode_tokens(self) -> int:

FILE: lmdeploy/pytorch/strategies/ar/model_agent.py
  function get_model_inputs_next_decoding (line 20) | def get_model_inputs_next_decoding(inputs: ModelInputs, input_ids: torch...
  class ARExtraInputs (line 45) | class ARExtraInputs(ExtraInputs):
  class ARExtraOutputs (line 50) | class ARExtraOutputs(ExtraOutputs):
  class ARStoppingCriteria (line 55) | class ARStoppingCriteria(StoppingCriteria):
    method clone (line 58) | def clone(self):
    method merge (line 62) | def merge(self, other: 'ARStoppingCriteria'):
    method update (line 67) | def update(self, delta: ModelInputsDelta):
    method step (line 74) | def step(self,
  class ARModelAgentStrategy (line 94) | class ARModelAgentStrategy(ModelAgentStrategy):
    method slice_outputs (line 96) | def slice_outputs(self, inputs: torch.Tensor, seq_length: torch.LongTe...
    method slice_extra_inputs (line 107) | def slice_extra_inputs(self, extra_inputs: ARExtraInputs, model_inputs...
    method step_sampling_inputs (line 113) | def step_sampling_inputs(self, sampling_inputs: SamplingInputs, next_t...
    method make_stopping_criteria (line 127) | def make_stopping_criteria(self, seqs: SeqList) -> ARStoppingCriteria:
    method make_extra_inputs (line 133) | def make_extra_inputs(self, seqs: 'SeqList', model_inputs: 'ModelInput...
    method make_extra_outputs (line 137) | def make_extra_outputs(self, extra_inputs: ARExtraInputs) -> ARExtraOu...
    method update_prefill_for_next_step (line 141) | def update_prefill_for_next_step(
    method update_decoding_for_next_step (line 153) | def update_decoding_for_next_step(self, model_inputs: 'ModelInputs', n...
    method post_sampling (line 161) | def post_sampling(self, inputs: 'ModelInputs', logits: torch.Tensor, n...
    method broadcast_next_token (line 167) | def broadcast_next_token(self, next_token_ids: torch.Tensor, extra_inp...

FILE: lmdeploy/pytorch/strategies/ar/model_inputs.py
  function merge_model_inputs (line 13) | def merge_model_inputs(inputs: ModelInputs, other: ModelInputs) -> Model...
  class ARModelInputsStrategy (line 70) | class ARModelInputsStrategy(ModelInputsStrategy):
    method make_dummy (line 72) | def make_dummy(self,
    method merge (line 87) | def merge(self, inputs: ModelInputs, other: ModelInputs) -> ModelInputs:
    method index_select (line 92) | def index_select(inputs: ModelInputs,
    method update_inputs (line 162) | def update_inputs(self, inputs: ModelInputs, delta: 'ModelInputsDelta'...

FILE: lmdeploy/pytorch/strategies/ar/sampling.py
  function _gather_all_ids (line 16) | def _gather_all_ids(pad_id: int, seqs: SeqList, sampling_inputs: Samplin...
  function _gather_generated_ids (line 32) | def _gather_generated_ids(pad_id: int, seqs: SeqList, sampling_inputs: S...
  function _get_num_ignore_eos (line 48) | def _get_num_ignore_eos(seqs: SeqList):
  class ARSamplingStrategy (line 54) | class ARSamplingStrategy(SamplingStrategy):
    method __init__ (line 57) | def __init__(self, pad_token_id: int) -> None:
    method make_sampling_inputs (line 63) | def make_sampling_inputs(self, seqs: SeqList) -> SamplingInputs:
    method on_session_end (line 230) | def on_session_end(self, session_id: int):
    method merge_sampling_delta (line 233) | def merge_sampling_delta(
    method step_sampling_delta (line 268) | def step_sampling_delta(
    method update_sampling_delta (line 287) | def update_sampling_delta(

FILE: lmdeploy/pytorch/strategies/ar/sequence.py
  class SchedulerSequenceDefault (line 21) | class SchedulerSequenceDefault(SchedulerSequence):
    method update_token_ids (line 23) | def update_token_ids(self,
    method set_step (line 60) | def set_step(self, step: int):
  class ARSequenceStrategy (line 81) | class ARSequenceStrategy(SequenceStrategy):
    method make_sequence (line 83) | def make_sequence(self,
    method update_running (line 102) | def update_running(self, running: SeqList, batched_outputs: BatchedOut...

FILE: lmdeploy/pytorch/strategies/ar_spec/__init__.py
  class ARSpecStrategyFactory (line 18) | class ARSpecStrategyFactory(StrategyFactoryBase):
    method __init__ (line 20) | def __init__(self, model_config: ModelConfig, specdecode_config: SpecD...
    method build_cudagraph_strategy (line 26) | def build_cudagraph_strategy(self) -> 'CudagraphStrategy':
    method build_sampling_strategy (line 31) | def build_sampling_strategy(self) -> 'SamplingStrategy':
    method build_model_inputs_strategy (line 38) | def build_model_inputs_strategy(self) -> 'ModelInputsStrategy':
    method build_model_agent_strategy (line 43) | def build_model_agent_strategy(self) -> 'ModelAgentStrategy':
    method build_engine_strategy (line 48) | def build_engine_strategy(self, cache_config: 'CacheConfig',
    method build_sequence_strategy (line 56) | def build_sequence_strategy(self) -> SequenceStrategy:

FILE: lmdeploy/pytorch/strategies/ar_spec/cudagraph.py
  class ARSpecCudagraphStrategy (line 5) | class ARSpecCudagraphStrategy(CudagraphStrategy):
    method __init__ (line 7) | def __init__(self, num_spec_tokens: int):
    method get_max_tokens (line 11) | def get_max_tokens(self, batch_size: int, origin_batch_size: int, num_...

FILE: lmdeploy/pytorch/strategies/ar_spec/engine.py
  class ARSpecEngineStrategy (line 7) | class ARSpecEngineStrategy(EngineStrategy):
    method __init__ (line 10) | def __init__(self, scheduler_config: SchedulerConfig, cache_config: Ca...
    method get_prealloc_size (line 15) | def get_prealloc_size(self, is_decoding: bool):
    method get_num_loops (line 20) | def get_num_loops(self, is_decoding: bool) -> int:
    method get_num_decode_tokens (line 24) | def get_num_decode_tokens(self) -> int:

FILE: lmdeploy/pytorch/strategies/ar_spec/model_agent.py
  class ARSpecExtraInputs (line 22) | class ARSpecExtraInputs(ExtraInputs):
    method __repr__ (line 36) | def __repr__(self):
    method broadcast (line 43) | def broadcast(self, src: int, group, async_op=False):
    method merge (line 48) | def merge(self, other: 'ARSpecExtraInputs'):
  class ARSpecExtraOutputs (line 55) | class ARSpecExtraOutputs(ExtraOutputs):
    method __repr__ (line 60) | def __repr__(self):
  class ARSpecStoppingCriteria (line 65) | class ARSpecStoppingCriteria(ARStoppingCriteria):
    method clone (line 68) | def clone(self):
    method merge (line 72) | def merge(self, other: 'ARSpecStoppingCriteria'):
    method update (line 77) | def update(self, delta: ModelInputsDelta):
    method step (line 84) | def step(self,
  class ARSpecModelAgentStrategy (line 114) | class ARSpecModelAgentStrategy(ModelAgentStrategy):
    method __init__ (line 116) | def __init__(self, num_spec_tokens: int):
    method slice_outputs (line 119) | def slice_outputs(self, inputs: torch.Tensor, seq_length: torch.LongTe...
    method slice_extra_inputs (line 130) | def slice_extra_inputs(self, extra_inputs: ARSpecExtraInputs, model_in...
    method step_sampling_inputs (line 142) | def step_sampling_inputs(self, sampling_inputs: SamplingInputs, next_t...
    method make_stopping_criteria (line 152) | def make_stopping_criteria(self, seqs: SeqList) -> ARSpecStoppingCrite...
    method make_extra_inputs (line 158) | def make_extra_inputs(self, seqs: 'SeqList', model_inputs: 'ModelInput...
    method update_extra_inputs (line 162) | def update_extra_inputs(self, extra_inputs: ARSpecExtraInputs, delta: ...
    method make_extra_outputs (line 168) | def make_extra_outputs(self, extra_inputs: ARSpecExtraInputs) -> ARSpe...
    method update_prefill_for_next_step (line 174) | def update_prefill_for_next_step(
    method update_decoding_for_next_step (line 194) | def update_decoding_for_next_step(self, model_inputs: 'ModelInputs', n...
    method post_sampling (line 213) | def post_sampling(self, inputs: 'ModelInputs', logits: torch.Tensor, n...
    method make_dummy_next_token (line 218) | def make_dummy_next_token(self, inputs: 'ModelInputs', logits: torch.T...
    method broadcast_next_token (line 227) | def broadcast_next_token(self, next_token_ids: torch.Tensor, extra_inp...

FILE: lmdeploy/pytorch/strategies/ar_spec/model_inputs.py
  class ARSpecModelInputsStrategy (line 11) | class ARSpecModelInputsStrategy(ModelInputsStrategy):
    method __init__ (line 13) | def __init__(self, num_spec_tokens: int):
    method make_dummy (line 16) | def make_dummy(
    method merge (line 41) | def merge(self, inputs: ModelInputs, other: ModelInputs) -> ModelInputs:
    method update_inputs (line 46) | def update_inputs(self, inputs: ModelInputs, delta: 'ModelInputsDelta'...

FILE: lmdeploy/pytorch/strategies/ar_spec/sampling.py
  class ARSpecSamplingStrategy (line 5) | class ARSpecSamplingStrategy(ARSamplingStrategy):

FILE: lmdeploy/pytorch/strategies/ar_spec/sequence.py
  class SchedulerSequenceARSpec (line 21) | class SchedulerSequenceARSpec(SchedulerSequenceDefault):
    method __post_init__ (line 23) | def __post_init__(self):
    method num_valid_ids (line 32) | def num_valid_ids(self):
    method num_spec_ids (line 36) | def num_spec_ids(self):
    method generated_ids (line 40) | def generated_ids(self) -> np.ndarray:
    method set_stop_pos (line 45) | def set_stop_pos(self, pos: int):
    method _update_token_ids_inputs (line 56) | def _update_token_ids_inputs(self, token_ids: np.ndarray):
    method _update_token_ids_prefill (line 67) | def _update_token_ids_prefill(self, token_ids: np.ndarray, draft_token...
    method _update_token_ids_decode (line 80) | def _update_token_ids_decode(self, token_ids: np.ndarray, draft_token_...
    method update_token_ids (line 110) | def update_token_ids(self,
  class ARSpecSequenceStrategy (line 140) | class ARSpecSequenceStrategy(ARSequenceStrategy):
    method make_sequence (line 142) | def make_sequence(self,
    method update_running (line 159) | def update_running(self, running: SeqList, batched_outputs: BatchedOut...

FILE: lmdeploy/pytorch/strategies/base/__init__.py
  class StrategyFactoryBase (line 16) | class StrategyFactoryBase(ABC):
    method build_cudagraph_strategy (line 19) | def build_cudagraph_strategy(self) -> 'CudagraphStrategy':
    method build_sampling_strategy (line 24) | def build_sampling_strategy(self) -> 'SamplingStrategy':
    method build_model_inputs_strategy (line 29) | def build_model_inputs_strategy(self) -> 'ModelInputsStrategy':
    method build_model_agent_strategy (line 34) | def build_model_agent_strategy(self) -> 'ModelAgentStrategy':
    method build_engine_strategy (line 39) | def build_engine_strategy(self, cache_config: 'CacheConfig',
    method build_sequence_strategy (line 45) | def build_sequence_strategy(self) -> 'SequenceStrategy':

FILE: lmdeploy/pytorch/strategies/base/cudagraph.py
  class CudagraphStrategy (line 5) | class CudagraphStrategy(ABC):
    method get_max_tokens (line 8) | def get_max_tokens(self, batch_size: int, origin_batch_size: int, num_...

FILE: lmdeploy/pytorch/strategies/base/engine.py
  class EngineStrategy (line 5) | class EngineStrategy(ABC):
    method get_prealloc_size (line 9) | def get_prealloc_size(self, is_decoding: bool) -> int:
    method get_num_loops (line 14) | def get_num_loops(self, is_decoding: bool) -> int:
    method get_num_decode_tokens (line 19) | def get_num_decode_tokens(self) -> int:
    method get_num_required_tokens (line 23) | def get_num_required_tokens(self) -> int:

FILE: lmdeploy/pytorch/strategies/base/model_agent.py
  function to_device (line 18) | def to_device(self, device: str, non_blocking: bool = False):
  class ExtraInputs (line 32) | class ExtraInputs(ABC):
    method to_device (line 34) | def to_device(self, device: str, non_blocking: bool = False):
    method broadcast (line 38) | def broadcast(self, src: int, group, async_op=False):
    method merge (line 42) | def merge(self, other: 'ExtraInputs'):
  class ExtraOutputs (line 48) | class ExtraOutputs(ABC):
    method to_device (line 50) | def to_device(self, device: str, non_blocking: bool = False):
    method to_cpu (line 54) | def to_cpu(self):
    method to_numpy (line 58) | def to_numpy(self):
    method to_tensor (line 71) | def to_tensor(self):
  class StoppingCriteria (line 86) | class StoppingCriteria(ABC):
    method clone (line 90) | def clone(self) -> 'StoppingCriteria':
    method merge (line 94) | def merge(self, other: 'StoppingCriteria') -> 'StoppingCriteria':
    method update (line 98) | def update(self, delta: 'ModelInputsDelta') -> 'StoppingCriteria':
    method step (line 102) | def step(self,
    method to_device (line 110) | def to_device(self, device: str, non_blocking: bool = False):
  class ModelAgentStrategy (line 115) | class ModelAgentStrategy(ABC):
    method slice_outputs (line 119) | def slice_outputs(self, inputs: torch.Tensor, seq_length: torch.LongTe...
    method slice_extra_inputs (line 124) | def slice_extra_inputs(self, extra_inputs: ExtraInputs, model_inputs: ...
    method make_stopping_criteria (line 130) | def make_stopping_criteria(self, seqs: 'SeqList') -> StoppingCriteria:
    method make_extra_inputs (line 135) | def make_extra_inputs(self, seqs: 'SeqList', model_inputs: 'ModelInput...
    method update_extra_inputs (line 139) | def update_extra_inputs(self, extra_inputs: ExtraInputs, delta: 'Model...
    method make_extra_outputs (line 144) | def make_extra_outputs(self, extra_inputs: ExtraInputs) -> ExtraOutputs:
    method step_sampling_inputs (line 149) | def step_sampling_inputs(
    method update_prefill_for_next_step (line 159) | def update_prefill_for_next_step(
    method update_decoding_for_next_step (line 171) | def update_decoding_for_next_step(self, model_inputs: 'ModelInputs', n...
    method post_sampling (line 178) | def post_sampling(self, inputs: 'ModelInputs', logits: torch.Tensor, n...
    method make_dummy_next_token (line 183) | def make_dummy_next_token(self, inputs: 'ModelInputs', logits: torch.T...
    method broadcast_next_token (line 191) | def broadcast_next_token(self, next_token_ids: torch.Tensor, extra_inp...

FILE: lmdeploy/pytorch/strategies/base/model_inputs.py
  function make_dummy_inputs (line 11) | def make_dummy_inputs(batch_size: int,
  class ModelInputsStrategy (line 47) | class ModelInputsStrategy(ABC):
    method make_dummy (line 50) | def make_dummy(self,
    method merge (line 60) | def merge(self, inputs: ModelInputs, other: ModelInputs) -> ModelInputs:
    method update_inputs (line 65) | def update_inputs(self, inputs: ModelInputs, delta: 'ModelInputsDelta'...

FILE: lmdeploy/pytorch/strategies/base/sampling.py
  class SamplingStrategy (line 16) | class SamplingStrategy(ABC):
    method make_sampling_inputs (line 20) | def make_sampling_inputs(self, seqs: SeqList) -> SamplingInputs:
    method on_session_end (line 25) | def on_session_end(self, session_id: int) -> None:
    method merge_sampling_delta (line 30) | def merge_sampling_delta(
    method step_sampling_delta (line 38) | def step_sampling_delta(
    method update_sampling_delta (line 48) | def update_sampling_delta(

FILE: lmdeploy/pytorch/strategies/base/sequence.py
  class SequenceStrategy (line 14) | class SequenceStrategy(ABC):
    method make_sequence (line 17) | def make_sequence(self,
    method update_running (line 29) | def update_running(self, running: 'SeqList', batched_outputs: 'Batched...

FILE: lmdeploy/pytorch/strategies/dllm/__init__.py
  class DLLMStrategyFactory (line 21) | class DLLMStrategyFactory(StrategyFactoryBase):
    method __init__ (line 23) | def __init__(self, model_config: ModelConfig, dllm_config: DLLMConfig):
    method _update_dllm_block_length (line 31) | def _update_dllm_block_length(self):
    method build_cudagraph_strategy (line 51) | def build_cudagraph_strategy(self) -> 'CudagraphStrategy':
    method build_sampling_strategy (line 56) | def build_sampling_strategy(self) -> 'SamplingStrategy':
    method build_model_inputs_strategy (line 63) | def build_model_inputs_strategy(self) -> 'ModelInputsStrategy':
    method build_model_agent_strategy (line 68) | def build_model_agent_strategy(self) -> 'ModelAgentStrategy':
    method build_engine_strategy (line 73) | def build_engine_strategy(self, cache_config: 'CacheConfig',
    method build_sequence_strategy (line 81) | def build_sequence_strategy(self) -> SequenceStrategy:

FILE: lmdeploy/pytorch/strategies/dllm/cudagraph.py
  class DLLMCudagraphStrategy (line 5) | class DLLMCudagraphStrategy(CudagraphStrategy):
    method __init__ (line 7) | def __init__(self, block_size: int) -> None:
    method get_max_tokens (line 11) | def get_max_tokens(self, batch_size: int, origin_batch_size: int, num_...

FILE: lmdeploy/pytorch/strategies/dllm/engine.py
  class DLLMEngineStrategy (line 12) | class DLLMEngineStrategy(EngineStrategy):
    method __init__ (line 15) | def __init__(self, scheduler_config: SchedulerConfig, cache_config: Ca...
    method _check (line 22) | def _check(self):
    method get_prealloc_size (line 32) | def get_prealloc_size(self, is_decoding: bool) -> int:
    method get_num_loops (line 42) | def get_num_loops(self, is_decoding: bool) -> int:
    method get_num_decode_tokens (line 52) | def get_num_decode_tokens(self) -> int:

FILE: lmdeploy/pytorch/strategies/dllm/model_agent.py
  function get_model_inputs_next_decoding (line 24) | def get_model_inputs_next_decoding(inputs: ModelInputs, input_ids: torch...
  class DLLMExtraInputs (line 47) | class DLLMExtraInputs(ExtraInputs):
    method broadcast (line 51) | def broadcast(self, src: int, group, async_op=False):
    method merge (line 54) | def merge(self, other: 'DLLMExtraInputs'):
  class DLLMExtraOutputs (line 61) | class DLLMExtraOutputs(ExtraOutputs):
  function _check_stopwords_dllm (line 66) | def _check_stopwords_dllm(token_ids: torch.Tensor, stop_words: torch.Ten...
  class DLLMStoppingCriteria (line 100) | class DLLMStoppingCriteria(StoppingCriteria):
    method clone (line 104) | def clone(self) -> 'DLLMStoppingCriteria':
    method merge (line 108) | def merge(self, other: 'DLLMStoppingCriteria') -> 'DLLMStoppingCriteria':
    method update (line 114) | def update(self, delta: 'ModelInputsDelta') -> 'DLLMStoppingCriteria':
    method step (line 121) | def step(self,
  class DLLMModelAgentStrategy (line 157) | class DLLMModelAgentStrategy(ModelAgentStrategy):
    method __init__ (line 159) | def __init__(self, dllm_config: DLLMConfig, dllm_mask_token: int):
    method _update_dllm (line 166) | def _update_dllm(self, next_token_ids: torch.Tensor, dllm_mask: torch....
    method slice_outputs (line 185) | def slice_outputs(self, inputs: torch.Tensor, seq_length: torch.LongTe...
    method slice_extra_inputs (line 200) | def slice_extra_inputs(self, extra_inputs: DLLMExtraInputs, model_inpu...
    method step_sampling_inputs (line 206) | def step_sampling_inputs(self, sampling_inputs: SamplingInputs, next_t...
    method make_stopping_criteria (line 223) | def make_stopping_criteria(self, seqs: SeqList) -> DLLMStoppingCriteria:
    method make_extra_inputs (line 238) | def make_extra_inputs(self, seqs: 'SeqList', model_inputs: 'ModelInput...
    method update_extra_inputs (line 250) | def update_extra_inputs(self, extra_inputs: DLLMExtraInputs, delta: 'M...
    method make_extra_outputs (line 260) | def make_extra_outputs(self, extra_inputs: DLLMExtraInputs) -> DLLMExt...
    method update_prefill_for_next_step (line 265) | def update_prefill_for_next_step(
    method update_decoding_for_next_step (line 285) | def update_decoding_for_next_step(self, model_inputs: 'ModelInputs', n...
    method post_sampling (line 297) | def post_sampling(self, inputs: 'ModelInputs', logits: torch.Tensor, n...
    method make_dummy_next_token (line 309) | def make_dummy_next_token(self, inputs: 'ModelInputs', logits: torch.T...
    method broadcast_next_token (line 316) | def broadcast_next_token(self, next_token_ids: torch.Tensor, extra_inp...

FILE: lmdeploy/pytorch/strategies/dllm/model_inputs.py
  class DLLMModelInputsStrategy (line 8) | class DLLMModelInputsStrategy(ModelInputsStrategy):
    method __init__ (line 10) | def __init__(self, block_size: int):
    method make_dummy (line 13) | def make_dummy(self,
    method merge (line 27) | def merge(self, inputs: ModelInputs, other: ModelInputs) -> ModelInputs:
    method update_inputs (line 31) | def update_inputs(self, inputs: ModelInputs, delta: 'ModelInputsDelta'...

FILE: lmdeploy/pytorch/strategies/dllm/sampling.py
  class DLLMSamplingStrategy (line 18) | class DLLMSamplingStrategy(ARSamplingStrategy):
    method __init__ (line 21) | def __init__(self, pad_token_id: int, dllm_block_length: int) -> None:
    method make_sampling_inputs (line 26) | def make_sampling_inputs(self, seqs: SeqList) -> SamplingInputs:
    method merge_sampling_delta (line 84) | def merge_sampling_delta(
    method update_sampling_delta (line 99) | def update_sampling_delta(
    method step_sampling_delta (line 119) | def step_sampling_delta(

FILE: lmdeploy/pytorch/strategies/dllm/sequence.py
  class HistoryDLLMMask (line 27) | class HistoryDLLMMask(HistoryTokenIds):
    method __init__ (line 29) | def __init__(self, token_ids: np.ndarray = None, dtype: np.dtype = DLL...
  class SchedulerSequenceDLLM (line 34) | class SchedulerSequenceDLLM(SchedulerSequenceDefault):
    method __post_init__ (line 39) | def __post_init__(self):
    method dllm_mask (line 46) | def dllm_mask(self):
    method num_valid_ids (line 52) | def num_valid_ids(self):
    method generated_ids (line 56) | def generated_ids(self) -> np.ndarray:
    method all_dllm_mask (line 62) | def all_dllm_mask(self):
    method dllm_block_length (line 66) | def dllm_block_length(self):
    method dllm_mask_token (line 70) | def dllm_mask_token(self):
    method set_stop_pos (line 73) | def set_stop_pos(self, pos: int):
    method _update_token_ids_inputs (line 79) | def _update_token_ids_inputs(self, token_ids: np.ndarray, dllm_mask: n...
    method _update_token_ids_decode (line 119) | def _update_token_ids_decode(self, token_ids: np.ndarray, dllm_mask: n...
    method _update_token_ids_prefill (line 150) | def _update_token_ids_prefill(self, token_ids: np.ndarray, dllm_mask: ...
    method update_token_ids (line 165) | def update_token_ids(self,
    method set_step (line 197) | def set_step(self, step: int):
  class DLLMSequenceStrategy (line 208) | class DLLMSequenceStrategy(SequenceStrategy):
    method __init__ (line 210) | def __init__(self, block_size: int, dllm_mask_token: int) -> None:
    method make_sequence (line 214) | def make_sequence(self,
    method update_running (line 231) | def update_running(self, running: SeqList, batched_outputs: BatchedOut...

FILE: lmdeploy/pytorch/strategies/dllm/unmasking.py
  class UnmaskingProcessor (line 13) | class UnmaskingProcessor:
    method __init__ (line 15) | def __init__(self, dllm_config: DLLMConfig):
    method _get_scores (line 18) | def _get_scores(self, logits: torch.Tensor, token_ids: torch.Tensor):
    method _get_denoise_num (line 24) | def _get_denoise_num(self):
    method low_confidence_static (line 34) | def low_confidence_static(self, logits: torch.Tensor, token_ids: torch...
    method low_confidence_dynamic (line 51) | def low_confidence_dynamic(self, logits: torch.Tensor, token_ids: torc...
    method sequential (line 69) | def sequential(self, dllm_mask: torch.Tensor):
    method __call__ (line 89) | def __call__(self, logits: torch.Tensor, input_ids: torch.Tensor, toke...

FILE: lmdeploy/pytorch/third_party/deep_gemm/__init__.py
  function _log_jit_build (line 21) | def _log_jit_build(M: int, N: int, K: int):
  function fp8_gemm_nt (line 40) | def fp8_gemm_nt(a, b, d, c, recipe=None, compiled_dim='nk', disable_ue8m...
  function m_grouped_fp8_gemm_nt_contiguous (line 52) | def m_grouped_fp8_gemm_nt_contiguous(a, b, d, m_indices, recipe=None, co...
  function m_grouped_fp8_gemm_nt_masked (line 64) | def m_grouped_fp8_gemm_nt_masked(a,
  function get_mn_major_tma_aligned_tensor (line 83) | def get_mn_major_tma_aligned_tensor(x):

FILE: lmdeploy/pytorch/third_party/flash_attn_interface.py
  function flash_attn_varlen_func (line 9) | def flash_attn_varlen_func(*args, **kwargs):
  function flash_attn_with_kvcache (line 18) | def flash_attn_with_kvcache(*args, **kwargs):

FILE: lmdeploy/pytorch/tools/utils.py
  class Timer (line 6) | class Timer:
    method __init__ (line 9) | def __init__(self):
    method tic_cpu (line 13) | def tic_cpu(self):
    method toc_cpu (line 18) | def toc_cpu(self):
    method tic_cuda (line 25) | def tic_cuda(self):
    method toc_cuda (line 32) | def toc_cuda(self):
    method tic (line 41) | def tic(cls, is_cuda: bool = False) -> 'Timer':
    method toc (line 49) | def toc(self):
    method timing (line 59) | def timing(cls, is_cuda: bool = False) -> 'Timer':
    method format_duration (line 65) | def format_duration(duration: float, acc: int = 3):
    method format_flops (line 78) | def format_flops(flops: float, acc: int = 3):
    method formatted_print (line 96) | def formatted_print(out_info: dict, title: str = None):
    method print (line 108) | def print(self, flop: int = None, title: str = None):
    method toc_print (line 126) | def toc_print(self, flop: int = None, title: str = None):
  function visualize_pipe_out (line 130) | def visualize_pipe_out(outputs, enable_meta: bool = True):
  function visualize_chat_completions (line 209) | def visualize_chat_completions(outputs, enable_meta: bool = True):
  function dump_tilelang_source (line 233) | def dump_tilelang_source(kernel, path: str = 'sources/tvm_kernels.cu'):

FILE: lmdeploy/pytorch/transformers/__init__.py
  function register_config (line 10) | def register_config(model_type: str):
  function config_from_pretrained (line 21) | def config_from_pretrained(pretrained_model_name_or_path: str, **kwargs):

FILE: lmdeploy/pytorch/transformers/configuration_deepseek_v32.py
  class DeepseekV32Config (line 6) | class DeepseekV32Config(DeepseekV3Config):
    method __init__ (line 9) | def __init__(self, index_head_dim=128, index_n_heads=64, index_topk=20...

FILE: lmdeploy/pytorch/utils.py
  function get_gpu_memory (line 16) | def get_gpu_memory(device_id: int = None) -> int:
  function get_cpu_memory (line 24) | def get_cpu_memory() -> int:
  function bind_sigature (line 29) | def bind_sigature(input_names: str, args: Sequence, kwargs: Dict):
  function singleton (line 38) | def singleton(cls):
  class CtxMgrBase (line 59) | class CtxMgrBase(Generic[T]):
    method __init__ (line 62) | def __init__(self, default: Optional[T] = None):
    method current_context (line 65) | def current_context(self) -> Optional[T]:
    method set_context (line 69) | def set_context(self, context: Optional[T]):
    method context (line 74) | def context(self, context: T):
  function maybe_register_config_serialize_by_value (line 85) | def maybe_register_config_serialize_by_value(trust_remote_code: bool) ->...
  function monkey_patch_hf_modules_cache (line 148) | def monkey_patch_hf_modules_cache():
  function wait_for_async_tasks (line 185) | async def wait_for_async_tasks(tasks: Sequence[asyncio.Task],
  function cancel_async_tasks (line 222) | async def cancel_async_tasks(tasks: Sequence[asyncio.Task]):

FILE: lmdeploy/pytorch/weight_loader/model_weight_loader.py
  function load_weight (line 19) | def load_weight(param: torch.nn.Parameter, loaded_weight: torch.Tensor, ...
  function default_weight_loader (line 28) | def default_weight_loader(param: torch.nn.Parameter, loaded_weight: torc...
  function _get_weight_type (line 38) | def _get_weight_type(model_path: str, use_safetensors: bool = None):
  function _get_weight_map (line 62) | def _get_weight_map(model_path: str, weight_type: str):
  function _get_weight_path (line 78) | def _get_weight_path(model_path: str, weight_type: str):
  function _get_safetensors_weights_iterator (line 91) | def _get_safetensors_weights_iterator(file: str, prefix: str):
  function _get_pt_weights_iterator (line 101) | def _get_pt_weights_iterator(file: str, prefix: str):
  class ModelWeightLoader (line 115) | class ModelWeightLoader:
    method __init__ (line 118) | def __init__(self, model_path: str, prefix: str = None):
    method _get_shard_paths (line 128) | def _get_shard_paths(model_path: str, is_sharded: bool, weight_type: s...
    method _get_weights_iterator (line 139) | def _get_weights_iterator(self, path: str):
    method _skip_dummy_iterator (line 148) | def _skip_dummy_iterator(iterator, dummy_prefix: list):
    method _rename_weights_iterator (line 155) | def _rename_weights_iterator(iterator, model: torch.nn.Module):
    method load_model_weights (line 162) | def load_model_weights(
  function load_model_weights (line 193) | def load_model_weights(model: torch.nn.Module, checkpoint_path: str, pre...

FILE: lmdeploy/serve/core/async_engine.py
  class GenOut (line 33) | class GenOut:
    method to_response (line 47) | def to_response(self, index: int = 0) -> Response:
  class AsyncEngine (line 66) | class AsyncEngine:
    method __init__ (line 96) | def __init__(self,
    method close (line 152) | def close(self):
    method __enter__ (line 156) | def __enter__(self):
    method __exit__ (line 159) | def __exit__(self, exc_type, exc_value, traceback):
    method _build_turbomind (line 162) | def _build_turbomind(self, model_path: str, backend_config: TurbomindE...
    method _build_pytorch (line 167) | def _build_pytorch(self,
    method _build_stat_loggers (line 176) | def _build_stat_loggers(self):
    method get_schedule_metrics (line 194) | def get_schedule_metrics(self):
    method do_log_stats (line 197) | async def do_log_stats(self):
    method stop_all_session (line 203) | async def stop_all_session(self):
    method sleep (line 209) | def sleep(self, level: int = 1):
    method wakeup (line 221) | def wakeup(self, tags: List[str] | None = None):
    method _determine_gen_config (line 242) | def _determine_gen_config(self, session, input_ids, gen_config: Genera...
    method safe_run (line 265) | async def safe_run(self, handle, session, **kwargs):
    method generate (line 280) | async def generate(
    method start_loop (line 545) | def start_loop(self, loop, use_async_api=False):
    method free_cache (line 573) | def free_cache(self, session_id: int):
    method p2p_initialize (line 579) | def p2p_initialize(self, init_request: DistServeInitRequest):
    method p2p_connect (line 582) | def p2p_connect(self, conn_request: List[DistServeConnectionRequest]):
    method p2p_drop_connect (line 585) | def p2p_drop_connect(self, drop_conn_request: List[DistServeDropConnec...
    method async_get_reward_score (line 590) | async def async_get_reward_score(self, input_ids: List) -> List[float]:
    method async_get_logits (line 606) | async def async_get_logits(self,

FILE: lmdeploy/serve/core/exceptions.py
  class SafeRunException (line 5) | class SafeRunException(Exception):

FILE: lmdeploy/serve/core/vl_async_engine.py
  class VLAsyncEngine (line 12) | class VLAsyncEngine(AsyncEngine):
    method __init__ (line 15) | def __init__(self,
    method close (line 42) | def close(self):

FILE: lmdeploy/serve/managers/session_manager.py
  class Session (line 17) | class Session:
    method __init__ (line 20) | def __init__(self, session_id: int, session_mgr: SessionManager, **kwa...
    method update (line 33) | def update(self, **kwargs):
    method __repr__ (line 39) | def __repr__(self) -> str:
    method __str__ (line 46) | def __str__(self) -> str:
    method reset (line 57) | def reset(self):
    method request_handle (line 73) | async def request_handle(self):
    method async_abort (line 102) | async def async_abort(self):
    method async_close (line 108) | async def async_close(self):
    method abort (line 122) | def abort(self):
    method close (line 127) | def close(self):
    method _run (line 132) | def _run(self, coro):
  class RequestHandlePool (line 137) | class RequestHandlePool:
    method __init__ (line 161) | def __init__(self, engine, size: int):
    method get (line 167) | async def get(self):
    method put (line 177) | def put(self, handle):
    method clear (line 182) | def clear(self):
  class SessionManager (line 188) | class SessionManager:
    method __init__ (line 191) | def __init__(self):
    method get (line 199) | def get(self, session_id: int | None = None, **kwargs) -> Session:
    method async_abort_all (line 213) | async def async_abort_all(self):
    method has (line 223) | def has(self, session_id):
    method remove (line 226) | def remove(self, session: Session):
    method clear (line 229) | def clear(self):
    method attach_event_loop (line 234) | def attach_event_loop(self, loop):
    method build_request_handle_pool (line 237) | def build_request_handle_pool(self, engine, size):

FILE: lmdeploy/serve/openai/api_client.py
  function get_model_list (line 10) | def get_model_list(api_url: str, headers: dict = None):
  function json_loads (line 27) | def json_loads(content):
  class APIClient (line 38) | class APIClient:
    method __init__ (line 48) | def __init__(self, api_server_url: str, api_key: Optional[str] = None,...
    method available_models (line 61) | def available_models(self):
    method encode (line 68) | def encode(self,
    method chat_completions_v1 (line 90) | def chat_completions_v1(
    method completions_v1 (line 175) | def completions_v1(

FILE: lmdeploy/serve/openai/api_server.py
  class VariableInterface (line 55) | class VariableInterface:
    method get_session (line 70) | def get_session(session_id: int) -> int:
    method get_session_manager (line 78) | def get_session_manager():
    method get_engine_config (line 82) | def get_engine_config():
  function get_model_list (line 90) | def get_model_list():
  function available_models (line 102) | def available_models():
  function create_error_response (line 110) | def create_error_response(status: HTTPStatus, message: str, error_type='...
  function check_request (line 122) | def check_request(request) -> JSONResponse | None:
  function _create_completion_logprobs (line 150) | def _create_completion_logprobs(tokenizer: Tokenizer,
  function _create_chat_completion_logprobs (line 209) | def _create_chat_completion_logprobs(tokenizer: Tokenizer,
  function health (line 246) | async def health() -> Response:
  function terminate (line 252) | async def terminate():
  function logit_bias_logits_processor (line 265) | def logit_bias_logits_processor(logit_bias: dict[int, float] | dict[str,...
  function chat_completions_v1 (line 296) | async def chat_completions_v1(request: ChatCompletionRequest, raw_reques...
  function completions_v1 (line 683) | async def completions_v1(request: CompletionRequest, raw_request: Reques...
  function generate (line 927) | async def generate(request: GenerateReqInput, raw_request: Request = None):
  function create_embeddings (line 1044) | async def create_embeddings(request: EmbeddingsRequest, raw_request: Req...
  function encode (line 1050) | async def encode(request: EncodeRequest, raw_request: Request = None):
  function pooling (line 1080) | async def pooling(request: PoolingRequest, raw_request: Request = None):
  function update_params (line 1134) | def update_params(request: UpdateParamsRequest, raw_request: Request = N...
  function sleep (line 1141) | async def sleep(raw_request: Request = None):
  function wakeup (line 1148) | async def wakeup(raw_request: Request = None):
  function is_sleeping (line 1156) | async def is_sleeping():
  function engine_info (line 1165) | async def engine_info():
  function p2p_initialize (line 1181) | async def p2p_initialize(init_request: DistServeInitRequest):
  function p2p_connect (line 1186) | async def p2p_connect(conn_request: DistServeConnectionRequest):
  function p2p_drop_connect (line 1191) | async def p2p_drop_connect(drop_conn_request: DistServeDropConnectionReq...
  function free_cache (line 1196) | async def free_cache(cache_free_request: DistServeCacheFreeRequest) -> J...
  function abort_request (line 1206) | async def abort_request(request: AbortRequest, raw_request: Request = No...
  function chat_interactive_v1 (line 1222) | async def chat_interactive_v1(request, raw_request: Request = None):
  function handle_torchrun (line 1228) | def handle_torchrun():
  function startup_event (line 1242) | async def startup_event():
  function shutdown_event (line 1267) | async def shutdown_event():
  function validation_exception_handler (line 1273) | async def validation_exception_handler(request: Request, exc: RequestVal...
  class ConcurrencyLimitMiddleware (line 1284) | class ConcurrencyLimitMiddleware(BaseHTTPMiddleware):
    method __init__ (line 1286) | def __init__(self, app: FastAPI, max_concurrent_requests: int):
    method dispatch (line 1290) | async def dispatch(self, request: Request, call_next):
  function set_parsers (line 1296) | def set_parsers(reasoning_parser: str | None = None, tool_parser: str | ...
  function mount_metrics (line 1318) | def mount_metrics(app: FastAPI, backend_config: PytorchEngineConfig | Tu...
  function create_lifespan_handler (line 1333) | def create_lifespan_handler(backend_config: PytorchEngineConfig | Turbom...
  function serve (line 1365) | def serve(model_path: str,

FILE: lmdeploy/serve/openai/harmony_utils.py
  function get_encoding (line 14) | def get_encoding():
  function get_streamable_parser_for_assistant (line 21) | def get_streamable_parser_for_assistant() -> 'StreamableParser':
  class GptOssChatParser (line 25) | class GptOssChatParser:
    method __init__ (line 27) | def __init__(self):
    method parse_streaming (line 30) | def parse_streaming(self, tokens: List[int]) -> DeltaMessage:
    method parse_full (line 79) | def parse_full(self, tokens: List[int]) -> ChatMessage:

FILE: lmdeploy/serve/openai/launch_server.py
  function find_available_ports (line 19) | def find_available_ports(num: int) -> List[int]:
  function get_host_ip (line 42) | def get_host_ip():
  function _run_server (line 50) | def _run_server(gpu_ids: List[int], model_path: str, **kwargs):
  function cleanup_processes (line 59) | def cleanup_processes(processes: List[mp.Process]):
  function launch_server (line 83) | def launch_server(num_nodes: int,

FILE: lmdeploy/serve/openai/protocol.py
  class ErrorResponse (line 11) | class ErrorResponse(BaseModel):
  class ModelPermission (line 20) | class ModelPermission(BaseModel):
  class ModelCard (line 36) | class ModelCard(BaseModel):
  class ModelList (line 47) | class ModelList(BaseModel):
  class UsageInfo (line 53) | class UsageInfo(BaseModel):
  class Function (line 60) | class Function(BaseModel):
  class Tool (line 67) | class Tool(BaseModel):
  class ToolChoiceFuncName (line 73) | class ToolChoiceFuncName(BaseModel):
  class ToolChoice (line 78) | class ToolChoice(BaseModel):
  class StreamOptions (line 84) | class StreamOptions(BaseModel):
  class JsonSchema (line 89) | class JsonSchema(BaseModel):
  class ResponseFormat (line 101) | class ResponseFormat(BaseModel):
  class ChatCompletionRequest (line 108) | class ChatCompletionRequest(BaseModel):
  class FunctionCall (line 173) | class FunctionCall(BaseModel):
  class ToolCall (line 179) | class ToolCall(BaseModel):
  class ExtractedToolCallInformation (line 186) | class ExtractedToolCallInformation(BaseModel):
  class ChatMessage (line 197) | class ChatMessage(BaseModel):
  class LogProbs (line 206) | class LogProbs(BaseModel):
  class TopLogprob (line 213) | class TopLogprob(BaseModel):
  class ChatCompletionTokenLogprob (line 219) | class ChatCompletionTokenLogprob(BaseModel):
  class ChoiceLogprobs (line 226) | class ChoiceLogprobs(BaseModel):
  class ChatCompletionResponseChoice (line 230) | class ChatCompletionResponseChoice(BaseModel):
  class ChatCompletionResponse (line 238) | class ChatCompletionResponse(BaseModel):
  class DeltaFunctionCall (line 248) | class DeltaFunctionCall(BaseModel):
  class DeltaToolCall (line 254) | class DeltaToolCall(BaseModel):
  class DeltaMessage (line 261) | class DeltaMessage(BaseModel):
  class ChatCompletionResponseStreamChoice (line 270) | class ChatCompletionResponseStreamChoice(BaseModel):
  class ChatCompletionStreamResponse (line 278) | class ChatCompletionStreamResponse(BaseModel):
  class CompletionRequest (line 288) | class CompletionRequest(BaseModel):
  class CompletionResponseChoice (line 327) | class CompletionResponseChoice(BaseModel):
  class CompletionResponse (line 336) | class CompletionResponse(BaseModel):
  class CompletionResponseStreamChoice (line 346) | class CompletionResponseStreamChoice(BaseModel):
  class CompletionStreamResponse (line 355) | class CompletionStreamResponse(BaseModel):
  class EmbeddingsRequest (line 365) | class EmbeddingsRequest(BaseModel):
  class EmbeddingsResponse (line 372) | class EmbeddingsResponse(BaseModel):
  class PoolingRequest (line 380) | class PoolingRequest(BaseModel):
  class PoolingResponse (line 397) | class PoolingResponse(BaseModel):
  class EncodeRequest (line 407) | class EncodeRequest(BaseModel):
  class EncodeResponse (line 414) | class EncodeResponse(BaseModel):
  class GenerateResponse (line 420) | class GenerateResponse(BaseModel):
  class UpdateParamsRequest (line 429) | class UpdateParamsRequest(BaseModel):
  class GenerateReqInput (line 442) | class GenerateReqInput(BaseModel):
  class GenerateReqMetaOutput (line 476) | class GenerateReqMetaOutput(BaseModel):
  class GenerateReqOutput (line 485) | class GenerateReqOutput(BaseModel):
  class AbortRequest (line 491) | class AbortRequest(BaseModel):

FILE: lmdeploy/serve/openai/reasoning_parser/deepseek_r1_reasoning_parser.py
  class DeepSeekR1ReasoningParser (line 12) | class DeepSeekR1ReasoningParser(ReasoningParser):
    method __init__ (line 19) | def __init__(self, tokenizer: object):
    method extract_reasoning_content_streaming (line 36) | def extract_reasoning_content_streaming(
    method extract_reasoning_content (line 107) | def extract_reasoning_content(self, model_output: str, request: ChatCo...

FILE: lmdeploy/serve/openai/reasoning_parser/qwen_qwq_reasoning_parser.py
  class QwenQwQReasoningParser (line 11) | class QwenQwQReasoningParser(ReasoningParser):
    method __init__ (line 18) | def __init__(self, tokenizer: object):
    method extract_reasoning_content_streaming (line 29) | def extract_reasoning_content_streaming(
    method extract_reasoning_content (line 97) | def extract_reasoning_content(self, model_output: str, request: ChatCo...

FILE: lmdeploy/serve/openai/reasoning_parser/reasoning_parser.py
  class ReasoningParser (line 13) | class ReasoningParser:
    method __init__ (line 15) | def __init__(self, tokenizer: object):
    method vocab (line 19) | def vocab(self) -> Dict[str, int]:
    method extract_reasoning_content_streaming (line 24) | def extract_reasoning_content_streaming(
    method extract_reasoning_content (line 44) | def extract_reasoning_content(self, model_output: str, request: ChatCo...

FILE: lmdeploy/serve/openai/serving_chat_completion.py
  function check_request (line 10) | def check_request(request: ChatCompletionRequest, server_context: 'Varia...

FILE: lmdeploy/serve/openai/serving_completion.py
  function check_request (line 10) | def check_request(request: CompletionRequest, server_context: 'VariableI...

FILE: lmdeploy/serve/openai/serving_generate.py
  function check_request (line 10) | def check_request(request: GenerateReqInput, server_context: 'VariableIn...

FILE: lmdeploy/serve/openai/tool_parser/internlm2_parser.py
  class Internlm2ToolParser (line 21) | class Internlm2ToolParser(ToolParser):
    method __init__ (line 23) | def __init__(self, tokenizer: object):
    method adjust_request (line 27) | def adjust_request(self, request: ChatCompletionRequest) -> ChatComple...
    method get_argments (line 35) | def get_argments(self, obj):
    method extract_tool_calls_streaming (line 42) | def extract_tool_calls_streaming(
    method extract_tool_calls (line 158) | def extract_tool_calls(

FILE: lmdeploy/serve/openai/tool_parser/llama3_parser.py
  class Llama3JsonToolParser (line 21) | class Llama3JsonToolParser(ToolParser):
    method __init__ (line 28) | def __init__(self, tokenizer: object):
    method extract_tool_calls (line 41) | def extract_tool_calls(self, model_output: str, request: ChatCompletio...
    method extract_tool_calls_streaming (line 65) | def extract_tool_calls_streaming(

FILE: lmdeploy/serve/openai/tool_parser/qwen2d5_parser.py
  class Qwen2d5ToolParser (line 21) | class Qwen2d5ToolParser(ToolParser):
    method __init__ (line 23) | def __init__(self, tokenizer: object):
    method get_argments (line 30) | def get_argments(self, obj):
    method extract_tool_calls_streaming (line 37) | def extract_tool_calls_streaming(
    method extract_tool_calls (line 153) | def extract_tool_calls(

FILE: lmdeploy/serve/openai/tool_parser/qwen3_parser.py
  class ParserState (line 19) | class ParserState(object):
    method reset_tool_call (line 27) | def reset_tool_call(self):
  class Qwen3ToolParser (line 33) | class Qwen3ToolParser(ToolParser):
    method __init__ (line 40) | def __init__(self, tokenizer: object):
    method get_argments (line 46) | def get_argments(self, obj):
    method _split (line 57) | def _split(self, parser_state: ParserState, parsing_content: str):
    method _parse_delta_tool_call (line 80) | def _parse_delta_tool_call(self, parser_state: ParserState, tool_conte...
    method extract_tool_calls_streaming (line 113) | def extract_tool_calls_streaming(
    method extract_tool_calls (line 150) | def extract_tool_calls(

FILE: lmdeploy/serve/openai/tool_parser/qwen3coder_parser.py
  class ParserState (line 19) | class ParserState(object):
    method reset_tool_call (line 26) | def reset_tool_call(self):
  class Qwen3CoderToolParser (line 32) | class Qwen3CoderToolParser(ToolParser):
    method __init__ (line 40) | def __init__(self, tokenizer: object):
    method _split (line 51) | def _split(self, parser_state: ParserState, parsing_content: str) -> T...
    method _extract_params (line 69) | def _extract_params(self, content: str) -> Tuple[Optional[str], Dict[s...
    method extract_tool_calls_streaming (line 120) | def extract_tool_calls_streaming(
    method extract_tool_calls (line 204) | def extract_tool_calls(

FILE: lmdeploy/serve/openai/tool_parser/tool_parser.py
  class ToolParser (line 15) | class ToolParser:
    method __init__ (line 21) | def __init__(self, tokenizer: object):
    method vocab (line 31) | def vocab(self) -> Dict[str, int]:
    method adjust_request (line 36) | def adjust_request(self, request: ChatCompletionRequest) -> ChatComple...
    method extract_tool_calls (line 40) | def extract_tool_calls(self, model_output: str, request: ChatCompletio...
    method extract_tool_calls_streaming (line 49) | def extract_tool_calls_streaming(

FILE: lmdeploy/serve/openai/tool_parser/utils.py
  function find_common_prefix (line 12) | def find_common_prefix(s1: str, s2: str) -> str:
  function find_common_suffix (line 32) | def find_common_suffix(s1: str, s2: str) -> str:
  function extract_intermediate_diff (line 49) | def extract_intermediate_diff(curr: str, old: str) -> str:
  function find_all_indices (line 80) | def find_all_indices(string: str, substring: str) -> List[int]:
  function partial_json_loads (line 97) | def partial_json_loads(input_str: str, flags: Allow) -> Tuple[Any, int]:
  function is_complete_json (line 107) | def is_complete_json(input_str: str) -> bool:
  function consume_space (line 115) | def consume_space(i: int, s: str) -> int:

FILE: lmdeploy/serve/processors/multimodal.py
  class MultimodalProcessor (line 19) | class MultimodalProcessor:
    method __init__ (line 23) | def __init__(self,
    method merge_message_content (line 42) | def merge_message_content(msg: Dict) -> Dict:
    method _parse_multimodal_item (line 93) | def _parse_multimodal_item(i: int, in_messages: List[Dict], out_messag...
    method async_parse_multimodal_item (line 139) | async def async_parse_multimodal_item(messages: List[Dict],
    method get_prompt_input (line 156) | async def get_prompt_input(self,
    method format_prompts (line 229) | def format_prompts(prompts: Any) -> List[Dict]:
    method _is_openai_message (line 247) | def _is_openai_message(message) -> bool:
    method _is_str_images_pair (line 252) | def _is_str_images_pair(message) -> bool:
    method _is_image (line 263) | def _is_image(obj) -> bool:
    method _is_image_list (line 269) | def _is_image_list(obj) -> bool:
    method _re_format_prompt_images_pair (line 273) | def _re_format_prompt_images_pair(prompt: Tuple) -> Dict:
    method _has_multimodal_input (line 305) | def _has_multimodal_input(self, messages: List[Dict]) -> bool:
    method _get_text_prompt_input (line 312) | async def _get_text_prompt_input(self,
    method _get_multimodal_prompt_input (line 345) | async def _get_multimodal_prompt_input(self,

FILE: lmdeploy/serve/proxy/proxy.py
  class Status (line 41) | class Status(BaseModel):
  class Node (line 50) | class Node(BaseModel):
  function heart_beat_controller (line 59) | def heart_beat_controller(proxy_controller):
  class NodeManager (line 66) | class NodeManager:
    method __init__ (line 81) | def __init__(self,
    method get_nodes (line 117) | def get_nodes(self, role: EngineRole) -> dict[str, Status]:
    method hybrid_nodes (line 122) | def hybrid_nodes(self):
    method prefill_nodes (line 126) | def prefill_nodes(self):
    method decode_nodes (line 130) | def decode_nodes(self):
    method update_config_file (line 133) | def update_config_file(self):
    method add (line 147) | def add(self, node_url: str, status: Status | None = None):
    method remove (line 175) | def remove(self, node_url: str):
    method terminate_node (line 182) | def terminate_node(self, node_url: str):
    method terminate_all_nodes (line 205) | def terminate_all_nodes(self):
    method remove_stale_nodes_by_expiration (line 214) | def remove_stale_nodes_by_expiration(self):
    method model_list (line 233) | def model_list(self):
    method status (line 242) | def status(self):
    method get_node_url (line 246) | def get_node_url(self, model_name: str, role: EngineRole = EngineRole....
    method check_request_model (line 315) | async def check_request_model(self, model_name) -> JSONResponse | None:
    method handle_unavailable_model (line 322) | def handle_unavailable_model(self, model_name):
    method handle_api_timeout (line 335) | def handle_api_timeout(self, node_url):
    method stream_generate (line 344) | async def stream_generate(self, request: dict, node_url: str, endpoint...
    method generate (line 363) | async def generate(self, request: dict, node_url: str, endpoint: str):
    method forward_raw_request_stream_generate (line 379) | async def forward_raw_request_stream_generate(self, raw_request: Reque...
    method forward_raw_request_generate (line 401) | async def forward_raw_request_generate(self, raw_request: Request, nod...
    method pre_call (line 414) | def pre_call(self, node_url):
    method post_call (line 423) | def post_call(self, node_url: str, start: int):
    method create_background_tasks (line 434) | def create_background_tasks(self, url: str, start: int):
    method _prepare_headers (line 445) | def _prepare_headers(self, raw_request: Request) -> dict[str, str]:
  function available_models (line 470) | def available_models():
  function node_status (line 479) | def node_status():
  function add_node (line 488) | def add_node(node: Node, raw_request: Request = None):
  function remove_node (line 509) | def remove_node(node: Node):
  function terminate_node (line 522) | def terminate_node(node: Node):
  function terminate_node_all (line 536) | def terminate_node_all():
  function connection_warmup (line 549) | async def connection_warmup():
  function cache_block_gc_to_be_migrated (line 563) | async def cache_block_gc_to_be_migrated():
  function chat_completions_v1 (line 569) | async def chat_completions_v1(request: ChatCompletionRequest, raw_reques...
  function completions_v1 (line 737) | async def completions_v1(request: CompletionRequest, raw_request: Reques...
  function proxy (line 879) | def proxy(server_name: str = '0.0.0.0',

FILE: lmdeploy/serve/proxy/streaming_response.py
  class ProxyStreamingResponse (line 10) | class ProxyStreamingResponse(StreamingResponse):
    method __init__ (line 13) | def __init__(self, content, **kwargs):
    method stream_response (line 16) | async def stream_response(self, send) -> None:
    method _convert_headers_to_asgi (line 69) | def _convert_headers_to_asgi(self, headers: dict) -> list[tuple[bytes,...

FILE: lmdeploy/serve/proxy/utils.py
  class RoutingStrategy (line 18) | class RoutingStrategy(enum.Enum):
    method from_str (line 25) | def from_str(cls, name):
  class ErrorCodes (line 38) | class ErrorCodes(enum.Enum):
  class APIServerException (line 52) | class APIServerException(Exception):
    method __init__ (line 54) | def __init__(self, status_code: int, body: bytes, headers: dict | None...

FILE: lmdeploy/serve/utils/server_utils.py
  function validate_json_request (line 14) | def validate_json_request(raw_request: Request):
  class AuthenticationMiddleware (line 21) | class AuthenticationMiddleware:
    method __init__ (line 32) | def __init__(self, app: ASGIApp, tokens: list[str]) -> None:
    method verify_token (line 43) | def verify_token(self, headers: Headers) -> bool:
    method __call__ (line 60) | def __call__(self, scope: Scope, receive: Receive, send: Send) -> Awai...

FILE: lmdeploy/tokenizer.py
  class DetokenizeState (line 16) | class DetokenizeState:
    method as_tuple (line 34) | def as_tuple(self) -> Tuple:
  class HuggingFaceTokenizer (line 39) | class HuggingFaceTokenizer:
    method __init__ (line 46) | def __init__(self, model_dir: str):
    method _check_transformers_version (line 70) | def _check_transformers_version(self, model_dir: str):
    method get_vocab (line 94) | def get_vocab(self):
    method vocab_size (line 99) | def vocab_size(self):
    method vocab_size_with_added (line 104) | def vocab_size_with_added(self):
    method bos_token_id (line 112) | def bos_token_id(self):
    method eos_token_id (line 117) | def eos_token_id(self):
    method prefix_space_tokens (line 122) | def prefix_space_tokens(self):
    method _maybe_add_prefix_space (line 132) | def _maybe_add_prefix_space(self, tokens: List[int], decoded: str):
    method maybe_decode_bytes (line 141) | def maybe_decode_bytes(self):
    method indexes_containing_token (line 152) | def indexes_containing_token(self, token: str):
    method encode (line 192) | def encode(self, s: str, add_bos: bool = True, add_special_tokens: boo...
    method decode (line 211) | def decode(self, t: Sequence[int], offset: Optional[int] = None, skip_...
    method _convert_tokens_to_string_with_added_encoders (line 233) | def _convert_tokens_to_string_with_added_encoders(
    method detokenize_incrementally (line 267) | def detokenize_incrementally(self,
    method __call__ (line 338) | def __call__(self, s: Union[str, Sequence[str]]):
  class ChatGLM4Tokenizer (line 350) | class ChatGLM4Tokenizer(HuggingFaceTokenizer):
    method __init__ (line 353) | def __init__(self, model_path):
    method encode (line 365) | def encode(self, s: str, add_bos: bool = True, add_special_tokens: boo...
  class ChatGLMTokenizer (line 372) | class ChatGLMTokenizer(HuggingFaceTokenizer):
    method __init__ (line 375) | def __init__(self, model_path):
  class GptOssTokenizer (line 388) | class GptOssTokenizer(HuggingFaceTokenizer):
    method __init__ (line 391) | def __init__(self, model_dir: str):
    method detokenize_incrementally (line 398) | def detokenize_incrementally(self,
  class Tokenizer (line 417) | class Tokenizer:
    method __init__ (line 424) | def __init__(self, model_path: str):
    method vocab_size (line 445) | def vocab_size(self):
    method bos_token_id (line 450) | def bos_token_id(self):
    method eos_token_id (line 455) | def eos_token_id(self):
    method get_vocab (line 459) | def get_vocab(self):
    method encode (line 463) | def encode(self, s: str, add_bos: bool = True, add_special_tokens: boo...
    method decode (line 483) | def decode(
    method detokenize_incrementally (line 502) | def detokenize_incrementally(self,
    method __call__ (line 528) | def __call__(self, s: Union[str, Sequence[str]]):
    method indexes_containing_token (line 538) | def indexes_containing_token(self, token):

FILE: lmdeploy/turbomind/__init__.py
  function bootstrap (line 4) | def bootstrap():

FILE: lmdeploy/turbomind/deploy/config.py
  function config_from_dict (line 16) | def config_from_dict(cls, env):
  function config_to_dict (line 31) | def config_to_dict(config):
  class ModelConfig (line 42) | class ModelConfig:
    method verify (line 109) | def verify(self):
  class RopeParam (line 118) | class RopeParam:
  class AttentionConfig (line 134) | class AttentionConfig:
  class LoraConfig (line 143) | class LoraConfig:
  class TurbomindModelConfig (line 153) | class TurbomindModelConfig:
    method update_from_engine_config (line 159) | def update_from_engine_config(self, config: TurbomindEngineConfig):
    method from_dict (line 210) | def from_dict(cls, config: dict = {}):
    method to_dict (line 218) | def to_dict(self):
    method session_len (line 225) | def session_len(self):
    method weight_type (line 229) | def weight_type(self):
    method group_size (line 233) | def group_size(self):
    method vocab_size (line 237) | def vocab_size(self):
    method __str__ (line 240) | def __str__(self):

FILE: lmdeploy/turbomind/deploy/converter.py
  function get_input_model_registered_name (line 20) | def get_input_model_registered_name(model_path: str, model_format: str):
  function get_output_model_registered_name_and_config (line 34) | def get_output_model_registered_name_and_config(model_path: str, model_f...
  function get_tm_model (line 151) | def get_tm_model(model_path,

FILE: lmdeploy/turbomind/deploy/loader.py
  class BaseLoader (line 24) | class BaseLoader(ABC):
    method __init__ (line 26) | def __init__(self, model_path: str, pattern, mappings: list):
    method get_index (line 32) | def get_index(self, index_name: str, file_pattern: str) -> Tuple[dict,...
    method map_key (line 48) | def map_key(self, key: str):
    method items (line 58) | def items(self) -> Iterator[Tuple[int, dict]]:
  class SafetensorsLoader (line 62) | class SafetensorsLoader(BaseLoader):
    method __init__ (line 64) | def __init__(self, model_path: str, pattern: str, mappings: list, inde...
    method items (line 82) | def items(self):
  class PytorchLoader (line 108) | class PytorchLoader(BaseLoader):
    method __init__ (line 110) | def __init__(self, model_path: str, pattern: str, mappings: list, inde...
    method items (line 118) | def items(self):
  class StateDictLoader (line 148) | class StateDictLoader:
    method __init__ (line 155) | def __init__(self, queue: Queue, pattern: str, mappings: list):
    method items (line 159) | def items(self):
  function create_loader (line 177) | def create_loader(model_path: Union[str, Queue], pattern: str, mappings:...

FILE: lmdeploy/turbomind/deploy/module.py
  function permute_v2 (line 12) | def permute_v2(x: torch.Tensor, size_per_head: int = 128):
  function permute_v2_partial (line 25) | def permute_v2_partial(x: torch.Tensor, size_per_head: int, rotary_dim: ...
  function merge_qkv_v2 (line 51) | def merge_qkv_v2(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, tp: ...
  function merge_qkvg_v2 (line 68) | def merge_qkvg_v2(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, gat...
  function transpose (line 86) | def transpose(x):
  function pad_out_dims (line 90) | def pad_out_dims(x: torch.Tensor, dims: int):
  function pad_in_dims (line 96) | def pad_in_dims(x: torch.Tensor, dims: int):
  function get_lora_flags (line 107) | def get_lora_flags(kind: str):
  class Module (line 111) | class Module(ABC):
    method __init__ (line 113) | def __init__(self, model: BaseOutputModel):
    method __call__ (line 116) | def __call__(self, *args, **kwargs):
    method apply (line 120) | def apply(self, idx: int, r: BaseReader):
  class LayerNorm (line 124) | class LayerNorm(Module):
    method apply (line 126) | def apply(self, i: int, r: BaseReader):
  class Ffn (line 133) | class Ffn(Module):
    method __init__ (line 141) | def __init__(self, model: BaseOutputModel):
    method _export (line 149) | def _export(self, inter_size: int, fmt: str, idx: int, w123, kind: str...
    method apply (line 167) | def apply(self, i: int, r: BaseReader):
  class MoeFfn (line 176) | class MoeFfn(Ffn):
    method __init__ (line 188) | def __init__(self, model: BaseOutputModel):
    method apply (line 194) | def apply(self, i: int, r: BaseReader):
  class Attn (line 224) | class Attn(Module):
    method __init__ (line 232) | def __init__(self, model: BaseOutputModel):
    method _split_q_gate (line 245) | def _split_q_gate(self, q):
    method _reorder_and_merge (line 267) | def _reorder_and_merge(self, qkvo, gs: int):
    method _repeat_kv (line 295) | def _repeat_kv(self, qkvo, gs: int, kind: str):
    method _export (line 320) | def _export(self, idx: int, qkvo, kind: str, pack_fn, apply_gs=[], **k...
    method apply (line 346) | def apply(self, i: int, r: BaseReader):
  class MLA (line 366) | class MLA(Module):
    method __init__ (line 375) | def __init__(self, model: BaseOutputModel):
    method _export (line 378) | def _export(self, idx: int, xs, kind: str, pack_fn, **kwargs):
    method apply (line 473) | def apply(self, i: int, r: BaseReader):
  class LinearAttn (line 484) | class LinearAttn(Module):
    method __init__ (line 487) | def __init__(self, model: BaseOutputModel):
    method _tp_interleave_qkv (line 494) | def _tp_interleave_qkv(self, tensor, dim):
    method apply (line 522) | def apply(self, i: int, r: BaseReader):
  class Misc (line 576) | class Misc(Module):
    method apply (line 584) | def apply(self, i: int, r: BaseReader):
  class Transformer (line 611) | class Transformer:
    method __init__ (line 613) | def __init__(self, model: BaseOutputModel):
    method __call__ (line 629) | def __call__(self, i: int, r: BaseReader):

FILE: lmdeploy/turbomind/deploy/parameter.py
  function identity (line 7) | def identity(x):
  function to_half (line 11) | def to_half(x: torch.Tensor):
  function to_float (line 15) | def to_float(x: torch.Tensor):
  function to_fp8 (line 19) | def to_fp8(x: torch.Tensor):
  function pack_u4_row (line 24) | def pack_u4_row(x: torch.Tensor) -> torch.Tensor:
  function generate_zero_point (line 33) | def generate_zero_point(g):
  class Parameter (line 43) | class Parameter:
    method take (line 47) | def take(cls, keys: list[str]):
    method __call__ (line 59) | def __call__(cls, f, g, i):
  class QuantWeightOnly (line 63) | class QuantWeightOnly(Parameter):
    method __call__ (line 66) | def __call__(self, f, g, i):
  class WeightScaleInv (line 72) | class WeightScaleInv(Parameter):
    method __call__ (line 76) | def __call__(self, f, g, i):
  class CompressedWeight (line 81) | class CompressedWeight(Parameter):
    method __init__ (line 84) | def __init__(self, xs):
    method __call__ (line 89) | def __call__(self, f, g, i):
  class Mxfp4Weight (line 98) | class Mxfp4Weight(Parameter):
    method __call__ (line 101) | def __call__(self, f, g, i):
  class Weight (line 106) | class Weight(Parameter):
    method __call__ (line 109) | def __call__(self, f, g, i):
  class Bias (line 113) | class Bias(Parameter):
    method __call__ (line 116) | def __call__(self, f, g, i):
  class PLora (line 120) | class PLora(Parameter):
    method __call__ (line 123) | def __call__(self, f, g, i):
  function get_params (line 128) | def get_params(keys: list[str], bias=0):

FILE: lmdeploy/turbomind/deploy/policy.py
  function to_cuda (line 7) | def to_cuda(x: torch.Tensor, *args):
  function get_u4_slices (line 11) | def get_u4_slices(x: torch.Tensor, dtype: torch.dtype) -> List[torch.Ten...
  function unpack_awq_gemm (line 20) | def unpack_awq_gemm(x: torch.Tensor) -> torch.Tensor:
  function process_awq_gemm (line 27) | def process_awq_gemm(x: torch.Tensor, kind: str):
  function process_gptq (line 36) | def process_gptq(x: torch.Tensor, kind: str):
  function process_mxfp4 (line 49) | def process_mxfp4(x: torch.Tensor, kind: str):
  function process_fp8 (line 60) | def process_fp8(x: torch.Tensor, kind: str):
  function process_compressed_tensor (line 71) | def process_compressed_tensor(x: torch.Tensor, kind: str):
  function get_input_policy (line 82) | def get_input_policy(model_format):

FILE: lmdeploy/turbomind/deploy/source_model/baichuan.py
  class BaichuanReader (line 9) | class BaichuanReader(LlamaReader):
    method _attn (line 12) | def _attn(self, i: int, kind: str):
  class BaichuanModel (line 25) | class BaichuanModel(LlamaModel):
  class Baichuan2Reader (line 31) | class Baichuan2Reader(BaichuanReader):
    method output_weight (line 34) | def output_weight(self):
  class Baichuan2Model (line 45) | class Baichuan2Model(LlamaModel):

FILE: lmdeploy/turbomind/deploy/source_model/base.py
  class BaseReader (line 11) | class BaseReader(ABC):
    method __init__ (line 14) | def __init__(self):
    method transform (line 17) | def transform(self, x: Union[torch.Tensor, None], kind: str) -> Union[...
    method _transform (line 21) | def _transform(self, x: torch.Tensor, kind: str):
  class BaseInputModel (line 26) | class BaseInputModel(ABC):
    method __init__ (line 29) | def __init__(self, model_path: str, tokenizer_path: str, **kwargs):
    method model_info (line 40) | def model_info(self) -> Dict:
    method readers (line 45) | def readers(self) -> Iterator[BaseReader]:

FILE: lmdeploy/turbomind/deploy/source_model/deepseek2.py
  class DeepSeek2Reader (line 10) | class DeepSeek2Reader(LlamaReader):
    method moe_ffn_gate (line 12) | def moe_ffn_gate(self, i, kind):
    method moe_ffn_expert (line 15) | def moe_ffn_expert(self, e=None, i=None, kind=None):
    method _ffn (line 26) | def _ffn(self, i: int, kind: str):
    method ffn (line 45) | def ffn(self, i: int, kind: str):
    method mla (line 48) | def mla(self, i: int, kind: str):
    method mla_norm (line 58) | def mla_norm(self, i: int):
  function get_yarn_params (line 66) | def get_yarn_params(rope_scaling: dict):
  class DeepSeek2Model (line 88) | class DeepSeek2Model(LlamaModel):
    method model_info (line 92) | def model_info(self):

FILE: lmdeploy/turbomind/deploy/source_model/deepseek_vl.py
  class DeepSeekVLReader (line 10) | class DeepSeekVLReader(LlamaReader):
    method __init__ (line 19) | def __init__(self, new_params: dict, unused_params: dict, last_bin: bo...
    method attn_norm (line 23) | def attn_norm(self, i: int):
    method ffn_norm (line 27) | def ffn_norm(self, i: int):
  class DeepSeekVLModel (line 33) | class DeepSeekVLModel(LlamaModel):
    method model_info (line 38) | def model_info(self):

FILE: lmdeploy/turbomind/deploy/source_model/glm4.py
  class Glm4Reader (line 12) | class Glm4Reader(LlamaReader):
    method _attn (line 22) | def _attn(self, i: int, kind: str):
    method attn_norm (line 39) | def attn_norm(self, i: int):
    method _ffn (line 43) | def _ffn(self, i: int, kind: str):
    method ffn_norm (line 52) | def ffn_norm(self, i: int):
  class Glm4Model (line 58) | class Glm4Model(LlamaModel):
    method __init__ (line 63) | def __init__(self, model_path: str, tokenizer_path: str, **kwargs):
    method model_info (line 69) | def model_info(self):

FILE: lmdeploy/turbomind/deploy/source_model/glm4_moe_lite.py
  class Glm4MoeLiteReader (line 12) | class Glm4MoeLiteReader(DeepSeek2Reader):
    method moe_ffn_gate_correction_bias (line 25) | def moe_ffn_gate_correction_bias(self, i: int):
  class Glm4MoeLiteModel (line 31) | class Glm4MoeLiteModel(DeepSeek2Model):
    method model_info (line 39) | def model_info(self):

FILE: lmdeploy/turbomind/deploy/source_model/gpt_oss.py
  function map_experts (line 9) | def map_experts(str):
  class GptOssReader (line 17) | class GptOssReader(LlamaReader):
    method moe_ffn_expert (line 21) | def moe_ffn_expert(self, e=None, i=None, kind=None):
    method moe_ffn_gate (line 38) | def moe_ffn_gate(self, i, kind):
    method attn_sinks (line 41) | def attn_sinks(self, i):
  class GptOssModel (line 46) | class GptOssModel(LlamaModel):
    method model_info (line 50) | def model_info(self):

FILE: lmdeploy/turbomind/deploy/source_model/internlm2.py
  class InternLM2Reader (line 11) | class InternLM2Reader(LlamaReader):
    method filter (line 25) | def filter(self, pattern: str, i: int | None):
    method _attn (line 63) | def _attn(self, i: int, kind: str):
    method attn_norm (line 88) | def attn_norm(self, i: int):
    method _ffn (line 92) | def _ffn(self, i: int, kind: str):
    method ffn_norm (line 103) | def ffn_norm(self, i: int):
  class InternLM2Model (line 109) | class InternLM2Model(LlamaModel):

FILE: lmdeploy/turbomind/deploy/source_model/internvl.py
  class InternVLReader (line 9) | class InternVLReader(LlamaReader):
  class InternVL2Reader (line 20) | class InternVL2Reader(InternLM2Reader):
  class InternVL3d5Reader (line 30) | class InternVL3d5Reader(Qwen3Reader):
  class InternVL3d5Qwen3MoEReader (line 38) | class InternVL3d5Qwen3MoEReader(Qwen3MoeReader):
  class InternVL3d5GptOSSReader (line 46) | class InternVL3d5GptOSSReader(GptOssReader):
  class InternS1Reader (line 54) | class InternS1Reader(Qwen3MoeReader):
  class InternS1MiniReader (line 64) | class InternS1MiniReader(Qwen3Reader):
  class InternVLModel (line 74) | class InternVLModel(LlamaModel):
    method __init__ (line 77) | def __init__(self, model_path: str, tokenizer_path: str, **kwargs):
    method model_info (line 100) | def model_info(self):

FILE: lmdeploy/turbomind/deploy/source_model/llama.py
  class LlamaReader (line 14) | class LlamaReader(BaseReader):
    method __init__ (line 29) | def __init__(self, new_params: dict, unused_params: dict, last_bin: bo...
    method quant_weight_fp8 (line 44) | def quant_weight_fp8(self):
    method filter (line 65) | def filter(self, pattern: str, i: int | None):
    method tok_embeddings (line 72) | def tok_embeddings(self):
    method norm_weight (line 76) | def norm_weight(self):
    method output_weight (line 80) | def output_weight(self):
    method _transform (line 84) | def _transform(self, x: torch.Tensor, kind: str):
    method _attn (line 87) | def _attn(self, i: int, kind: str):
    method attn (line 96) | def attn(self, i: int, kind: str):
    method attn_norm (line 101) | def attn_norm(self, i: int):
    method _ffn (line 105) | def _ffn(self, i: int, kind: str):
    method ffn (line 116) | def ffn(self, i: int, kind: str):
    method ffn_norm (line 121) | def ffn_norm(self, i: int):
  class LlamaModel (line 127) | class LlamaModel(BaseInputModel):
    method __init__ (line 132) | def __init__(self, model_path: str, tokenizer_path: str, **kwargs: dict):
    method readers (line 146) | def readers(self):
    method model_info (line 154) | def model_info(self):

FILE: lmdeploy/turbomind/deploy/source_model/llava.py
  class LlavaReader (line 10) | class LlavaReader(LlamaReader):
    method __init__ (line 19) | def __init__(self, new_params: dict, unused_params: dict, last_bin: bo...
  class LlavaModel (line 25) | class LlavaModel(LlamaModel):
    method __init__ (line 28) | def __init__(self, model_path: str, tokenizer_path: str, **kwargs):
    method model_info (line 38) | def model_info(self):

FILE: lmdeploy/turbomind/deploy/source_model/minicpmv.py
  class MiniCPMVReader (line 10) | class MiniCPMVReader(LlamaReader):
  class MiniCPMVModel (line 21) | class MiniCPMVModel(LlamaModel):
    method model_info (line 25) | def model_info(self):

FILE: lmdeploy/turbomind/deploy/source_model/mixtral.py
  class MixtralReader (line 7) | class MixtralReader(LlamaReader):
    method moe_ffn_expert (line 9) | def moe_ffn_expert(self, e=None, i=None, kind=None):
    method moe_ffn_gate (line 20) | def moe_ffn_gate(self, i, kind):
  class MixtralModel (line 25) | class MixtralModel(LlamaModel):
    method model_info (line 29) | def model_info(self):

FILE: lmdeploy/turbomind/deploy/source_model/molmo.py
  class MolmoReader (line 12) | class MolmoReader(LlamaReader):
    method tok_embeddings (line 25) | def tok_embeddings(self):
    method attn_norm (line 34) | def attn_norm(self, i: int):
    method _attn (line 38) | def _attn(self, i: int, kind: str):
    method _ffn (line 62) | def _ffn(self, i: int, kind: str):
    method ffn_norm (line 71) | def ffn_norm(self, i: int):
  class MolmoModel (line 77) | class MolmoModel(LlamaModel):
    method __init__ (line 81) | def __init__(self, model_path: str, tokenizer_path: str, **kwargs):
    method model_info (line 87) | def model_info(self):

FILE: lmdeploy/turbomind/deploy/source_model/qwen.py
  class QwenReader (line 14) | class QwenReader(LlamaReader):
    method _attn (line 25) | def _attn(self, i: int, kind: str):
    method attn_norm (line 38) | def attn_norm(self, i: int):
    method _ffn (line 42) | def _ffn(self, i: int, kind: str):
    method ffn_norm (line 51) | def ffn_norm(self, i: int):
  class QwenModel (line 57) | class QwenModel(LlamaModel):
    method model_info (line 62) | def model_info(self):
  class Qwen2Model (line 106) | class Qwen2Model(LlamaModel):
    method model_info (line 114) | def model_info(self):
  class Qwen2MoeReader (line 120) | class Qwen2MoeReader(LlamaReader):
    method moe_ffn_expert (line 122) | def moe_ffn_expert(self, e=None, i=None, kind=None):
    method moe_ffn_gate (line 133) | def moe_ffn_gate(self, i, kind):
    method _ffn (line 136) | def _ffn(self, i: int, kind: str):
    method ffn (line 147) | def ffn(self, i: int, kind: str):
    method moe_ffn_shared_gate (line 152) | def moe_ffn_shared_gate(self, i):
  class Qwen2MoeModel (line 157) | class Qwen2MoeModel(LlamaModel):
    method model_info (line 161) | def model_info(self):
  class Qwen3Reader (line 174) | class Qwen3Reader(LlamaReader):
    method qk_norm (line 176) | def qk_norm(self, i: int):
  class Qwen3Model (line 185) | class Qwen3Model(LlamaModel):
    method model_info (line 188) | def model_info(self):
  class Qwen3MoeReader (line 195) | class Qwen3MoeReader(Qwen2MoeReader):
    method qk_norm (line 197) | def qk_norm(self, i: int):
  class Qwen3MoeModel (line 206) | class Qwen3MoeModel(LlamaModel):
    method model_info (line 209) | def model_info(self):
  class Qwen3_5ReaderMixin (line 223) | class Qwen3_5ReaderMixin:
    method __init__ (line 237) | def __init__(self, *args, **kwargs):
    method attn_norm (line 248) | def attn_norm(self, i: int):
    method ffn_norm (line 254) | def ffn_norm(self, i: int):
    method norm_weight (line 260) | def norm_weight(self):
    method qk_norm (line 266) | def qk_norm(self, i: int):
    method _attn (line 272) | def _attn(self, i: int, kind: str):
    method _awq_dequant (line 299) | def _awq_dequant(self, prefix: str):
    method linear_attn (line 315) | def linear_attn(self, i: int, kind: str):
    method linear_norm (line 339) | def linear_norm(self, i: int, kind: str = 'weight'):
  class Qwen3_5Reader (line 346) | class Qwen3_5Reader(Qwen3_5ReaderMixin, Qwen3Reader):
  class Qwen3_5Model (line 351) | class Qwen3_5Model(Qwen3Model):
    method model_info (line 354) | def model_info(self):
  class Qwen3_5MoeReader (line 389) | class Qwen3_5MoeReader(Qwen3_5ReaderMixin, Qwen3MoeReader):
    method _unpacked_moe_expert (line 391) | def _unpacked_moe_expert(self, e: int, i: int, kind: str):
    method moe_ffn_expert (line 406) | def moe_ffn_expert(self, e=None, i=None, kind=None):
  class Qwen3_5MoeModel (line 417) | class Qwen3_5MoeModel(Qwen3MoeModel):
    method map_packed_qwen35_experts (line 421) | def map_packed_qwen35_experts(name: str):
    method readers (line 428) | def readers(self):
    method model_info (line 442) | def model_info(self):

FILE: lmdeploy/turbomind/deploy/source_model/xcomposer2.py
  class Xcomposer2Reader (line 7) | class Xcomposer2Reader(InternLM2Reader):
    method _attn (line 14) | def _attn(self, i, kind):
  class Xcomposer2Model (line 23) | class Xcomposer2Model(InternLM2Model):
    method _lora_cfg_7b (line 28) | def _lora_cfg_7b(self):
    method _lora_cfg_4khd_7b (line 32) | def _lora_cfg_4khd_7b(self, model_info: dict):
    method model_info (line 45) | def model_info(self):

FILE: lmdeploy/turbomind/deploy/target_model/base.py
  function tprint (line 18) | def tprint(*args, **kwargs):
  function _weight_dtype_map (line 28) | def _weight_dtype_map(weight_type: str, default=None):
  function _pad_inter_size (line 36) | def _pad_inter_size(inter_size: int, group_size: int, tp: int):
  class BaseOutputModel (line 44) | class BaseOutputModel(ABC):
    method __init__ (line 47) | def __init__(self, input_model: BaseInputModel, cfg: TurbomindModelCon...
    method single_to_list (line 90) | def single_to_list(self, config: dict, keys):
    method update_model_config (line 98) | def update_model_config(self):
    method update_attention_config (line 108) | def update_attention_config(self):
    method update_lora_config (line 114) | def update_lora_config(self):
    method export_config (line 120) | def export_config(self) -> None:
    method export_weight (line 127) | def export_weight(self, param: torch.Tensor, name: str) -> None:
    method save_split (line 184) | def save_split(self, tensor: torch.Tensor, name: str, split_dim=None, ...
    method export (line 218) | def export(self) -> None:
    method export_iter (line 229) | def export_iter(self):
    method tm_config (line 236) | def tm_config(self):

FILE: lmdeploy/turbomind/deploy/target_model/fp.py
  class TurbomindModel (line 7) | class TurbomindModel(BaseOutputModel):

FILE: lmdeploy/turbomind/supported_models.py
  function is_supported (line 67) | def is_supported(model_path: str):

FILE: lmdeploy/turbomind/tokenizer_info.py
  class VocabType (line 27) | class VocabType(Enum):
  class TokenizerInfo (line 59) | class TokenizerInfo(_xgr.TokenizerInfo):
    method __init__ (line 72) | def __init__(
    method _is_tiktoken_tokenizer (line 107) | def _is_tiktoken_tokenizer(tokenizer: PreTrainedTokenizerBase) -> bool:
    method _is_sentencepiece_tokenizer (line 120) | def _is_sentencepiece_tokenizer(tokenizer: PreTrainedTokenizerBase) ->...
    method from_huggingface (line 134) | def from_huggingface(

FILE: lmdeploy/turbomind/turbomind.py
  function _construct_stop_or_bad_words (line 44) | def _construct_stop_or_bad_words(words: List[int] = None):
  function _np_dict_to_tm_dict (line 52) | def _np_dict_to_tm_dict(np_dict: dict):
  function _tm_dict_to_torch_dict (line 61) | def _tm_dict_to_torch_dict(tm_dict: _tm.TensorMap):
  function complete_parallel_config (line 72) | def complete_parallel_config(cfg: TurbomindEngineConfig):
  function update_parallel_config (line 87) | def update_parallel_config(cfg: TurbomindEngineConfig):
  class TurboMind (line 115) | class TurboMind:
    method __init__ (line 130) | def __init__(self,
    method _check_unloaded_tm_params (line 177) | def _check_unloaded_tm_params(self):
    method _load_weights (line 184) | def _load_weights(self):
    method _process_weights (line 193) | def _process_weights(self):
    method _create_engine (line 199) | def _create_engine(self):
    method _create_weight (line 206) | def _create_weight(self, model_comm):
    method _get_model_params (line 220) | def _get_model_params(self):
    method _postprocess_config (line 248) | def _postprocess_config(self, tm_config: TurbomindModelConfig, engine_...
    method _from_hf (line 267) | def _from_hf(self, model_path: str, engine_config: TurbomindEngineConf...
    method sleep (line 288) | def sleep(self, level: int = 1):
    method wakeup (line 294) | def wakeup(self, tags: Optional[list[str]] = None):
    method update_params (line 302) | def update_params(self, request: UpdateParamsRequest):
    method from_pretrained (line 345) | def from_pretrained(cls,
    method close (line 376) | def close(self):
    method create_instance (line 388) | def create_instance(self, cuda_stream_id=0):
    method get_schedule_metrics (line 398) | def get_schedule_metrics(self):
  function _get_logits (line 408) | def _get_logits(outputs, offset: int):
  function _get_last_hidden_state (line 417) | def _get_last_hidden_state(outputs, offset: int):
  function _get_logprobs_impl (line 426) | def _get_logprobs_impl(logprob_vals: torch.Tensor, logprob_idxs: torch.T...
  function _get_logprobs (line 460) | def _get_logprobs(outputs, output_logprobs: int):
  function _get_metrics (line 475) | def _get_metrics(metrics):
  class StreamingSemaphore (line 497) | class StreamingSemaphore:
    method __init__ (line 499) | def __init__(self):
    method acquire (line 504) | async def acquire(self):
    method release (line 513) | def release(self):
  class TurboMindInstance (line 520) | class TurboMindInstance:
    method __init__ (line 528) | def __init__(self, tm_model: TurboMind, config: TurbomindModelConfig, ...
    method model_inst (line 556) | def model_inst(self):
    method _create_model_instance (line 561) | def _create_model_instance(self):
    method _get_extra_output_processors (line 565) | def _get_extra_output_processors(self, outputs: Dict[str, torch.Tensor...
    method prepare_embeddings (line 584) | def prepare_embeddings(self, input_embeddings=None, input_embedding_ra...
    method prepare_mrope (line 608) | def prepare_mrope(self, input_meta: Dict[str, Any], input_len: int):
    method prepare_inputs (line 615) | def prepare_inputs(self,
    method async_cancel (line 642) | async def async_cancel(self, session_id: int = None):
    method async_end_cb (line 645) | def async_end_cb(self, fut: asyncio.Future, status: int):
    method async_end (line 650) | async def async_end(self, session_id):
    method async_signal_cb (line 655) | def async_signal_cb(self, s: StreamingSemaphore):
    method async_stream_infer (line 659) | async def async_stream_infer(self,
    method _get_error_output (line 799) | def _get_error_output(self, status):
    method _get_generation_config (line 802) | def _get_generation_config(self, cfg: GenerationConfig):

FILE: lmdeploy/utils.py
  class _ASNI_COLOR (line 18) | class _ASNI_COLOR:
  function can_colorize (line 28) | def can_colorize(*, no_color: bool | None = None, force_color: bool | No...
  class ColorFormatter (line 56) | class ColorFormatter(logging.Formatter):
    method format (line 67) | def format(self, record: LogRecord):
  class FilterDuplicateWarning (line 79) | class FilterDuplicateWarning(logging.Filter):
    method __init__ (line 86) | def __init__(self, name: str = 'lmdeploy'):
    method filter (line 90) | def filter(self, record: LogRecord) -> bool:
  function get_logger (line 112) | def get_logger(name: str | None = None,
  function filter_suffix (line 180) | def filter_suffix(response: str, suffixes: list[str] | None = None) -> str:
  function _stop_words (line 199) | def _stop_words(stop_words: list[int | str], tokenizer: object):
  function get_hf_gen_cfg (line 222) | def get_hf_gen_cfg(path: str):
  function get_model (line 231) | def get_model(pretrained_model_name_or_path: str, download_dir: str = No...
  function logging_timer (line 253) | def logging_timer(op_name: str, logger: Logger, level: int = logging.DEB...
  function _get_and_verify_max_len (line 297) | def _get_and_verify_max_len(
  function get_max_batch_size (line 366) | def get_max_batch_size(device_type: str):
  function is_bf16_supported (line 392) | def is_bf16_supported(device_type: str = 'cuda'):
  function try_import_deeplink (line 433) | def try_import_deeplink(device_type: str):
  function serialize_state_dict (line 449) | def serialize_state_dict(state_dict: dict) -> str:
  function is_dlblas_installed (line 481) | def is_dlblas_installed():
  class FlattenedTensorMetadata (line 494) | class FlattenedTensorMetadata:
  class FlattenedTensorBucket (line 504) | class FlattenedTensorBucket:
    method __init__ (line 507) | def __init__(
    method get_flattened_tensor (line 550) | def get_flattened_tensor(self) -> torch.Tensor:
    method get_metadata (line 554) | def get_metadata(self) -> list[FlattenedTensorMetadata]:
    method reconstruct_tensors (line 558) | def reconstruct_tensors(self) -> list[tuple[str, torch.Tensor]]:

FILE: lmdeploy/version.py
  function parse_version_info (line 8) | def parse_version_info(version_str: str) -> Tuple:

FILE: lmdeploy/vl/constants.py
  class Modality (line 7) | class Modality(str, Enum):

FILE: lmdeploy/vl/engine.py
  function _raise_exception_on_finish (line 17) | def _raise_exception_on_finish(task: asyncio.Task) -> None:
  function _accepts_arg (line 27) | def _accepts_arg(func, arg_name: str) -> bool:
  class ImageEncoder (line 32) | class ImageEncoder:
    method __init__ (line 35) | def __init__(
    method preprocess (line 50) | async def preprocess(self,
    method async_infer (line 63) | async def async_infer(self, messages: List[Dict]) -> List[Dict]:
    method wrap_for_pytorch (line 76) | async def wrap_for_pytorch(
    method wrap_for_turbomind (line 117) | async def wrap_for_turbomind(

FILE: lmdeploy/vl/media/base.py
  class MediaIO (line 11) | class MediaIO(ABC, Generic[_T]):
    method load_bytes (line 14) | def load_bytes(self, data: bytes) -> _T:
    method load_base64 (line 18) | def load_base64(self, media_type: str, data: str) -> _T:
    method load_file (line 22) | def load_file(self, filepath: Path) -> _T:

FILE: lmdeploy/vl/media/connection.py
  function _load_http_url (line 23) | def _load_http_url(url_spec: ParseResult, media_io: MediaIO[_M]) -> _M:
  function _load_data_url (line 40) | def _load_data_url(url_spec: ParseResult, media_io: MediaIO[_M]) -> _M:
  function _load_file_url (line 54) | def _load_file_url(url_spec: ParseResult, media_io: MediaIO[_M]) -> _M:
  function load_from_url (line 61) | def load_from_url(url: str, media_io: MediaIO[_M]) -> _M:

FILE: lmdeploy/vl/media/image.py
  class ImageMediaIO (line 15) | class ImageMediaIO(MediaIO[Image.Image]):
    method __init__ (line 17) | def __init__(self, image_mode: str = 'RGB', **kwargs) -> None:
    method load_bytes (line 24) | def load_bytes(self, data: bytes) -> Image.Image:
    method load_base64 (line 28) | def load_base64(self, media_type: str, data: str) -> Image.Image:
    method load_file (line 31) | def load_file(self, file_path: Path) -> Image.Image:
    method encode_base64 (line 37) | def encode_base64(self, image: Image.Image, image_format: str = 'PNG')...

FILE: lmdeploy/vl/media/time_series.py
  class TimeSeriesMediaIO (line 16) | class TimeSeriesMediaIO(MediaIO[npt.NDArray]):
    method __init__ (line 18) | def __init__(self, **kwargs):
    method load_bytes (line 24) | def load_bytes(self, data: bytes) -> npt.NDArray:
    method load_base64 (line 28) | def load_base64(self, media_type: str, data: str) -> npt.NDArray:
    method load_file (line 31) | def load_file(self, filepath: Path) -> npt.NDArray:
    method encode_base64 (line 56) | def encode_base64(self, data: npt.NDArray) -> str:

FILE: lmdeploy/vl/media/video.py
  class VideoMediaIO (line 23) | class VideoMediaIO(MediaIO[tuple[npt.NDArray, dict[str, Any]]]):
    method __init__ (line 25) | def __init__(
    method _get_video_loader_backend (line 40) | def _get_video_loader_backend(self) -> VideoLoader:
    method load_bytes (line 63) | def load_bytes(self, data: bytes) -> tuple[npt.NDArray, dict[str, Any]]:
    method load_base64 (line 66) | def load_base64(self, media_type: str, data: str) -> tuple[npt.NDArray...
    method load_file (line 97) | def load_file(self, filepath: Path) -> tuple[npt.NDArray, dict[str, An...
    method encode_base64 (line 100) | def encode_base64(

FILE: lmdeploy/vl/media/video_loader.py
  class VideoLoader (line 21) | class VideoLoader:
    method load_bytes (line 25) | def load_bytes(self, data: bytes, num_frames: int = -1, **kwargs) -> t...
    method smart_nframes (line 29) | def smart_nframes(self, total_frames_num: int, num_frames: int, fps: i...
  class OpenCVVideoLoader (line 47) | class OpenCVVideoLoader(VideoLoader):
    method get_cv2_video_api (line 49) | def get_cv2_video_api(self):
    method _read_frames (line 65) | def _read_frames(
    method load_file (line 117) | def load_file(
    method load_bytes (line 130) | def load_bytes(
  class DecordVideoLoader (line 183) | class DecordVideoLoader(VideoLoader):
    method load_file (line 186) | def load_file(self,
    method load_bytes (line 211) | def load_bytes(self,
  class TorchCodecVideoLoader (line 234) | class TorchCodecVideoLoader(VideoLoader):
    method load_file (line 237) | def load_file(self,
    method load_bytes (line 266) | def load_bytes(self,
  class TorchVisionVideoLoader (line 289) | class TorchVisionVideoLoader(VideoLoader):
    method load_file (line 292) | def load_file(self,
    method load_bytes (line 322) | def load_bytes(self,

FILE: lmdeploy/vl/model/base.py
  class VisionModel (line 15) | class VisionModel(ABC):
    method __init__ (line 19) | def __init__(self,
    method get_pad_token_id (line 35) | def get_pad_token_id(self, model_path, hf_config):
    method build_preprocessor (line 48) | def build_preprocessor(self, ):
    method build_model (line 56) | def build_model(self, ):
    method preprocess (line 65) | def preprocess(self, messages: List[Dict]) -> List[Dict]:
    method has_input_ids (line 108) | def has_input_ids(self, messages: List[Dict]) -> bool:
    method forward (line 120) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li...
    method to_pytorch (line 135) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star...
    method to_turbomind (line 151) | def to_turbomind(self, messages, chat_template, tokenizer, sequence_st...
    method collect_multimodal_items (line 168) | def collect_multimodal_items(messages):
    method IMAGE_TOKEN_included (line 198) | def IMAGE_TOKEN_included(messages):
    method to_pytorch_with_input_ids (line 218) | def to_pytorch_with_input_ids(self, messages):
    method to_pytorch_aux (line 255) | def to_pytorch_aux(self, messages, prompt, IMAGE_TOKEN, tokenizer, seq...
    method to_turbomind_aux (line 290) | def to_turbomind_aux(self, messages, prompt, IMAGE_TOKEN, tokenizer, s...
    method match (line 327) | def match(cls, config: AutoConfig):

FILE: lmdeploy/vl/model/builder.py
  function load_vl_model (line 40) | def load_vl_model(model_path: str,

FILE: lmdeploy/vl/model/cogvlm.py
  class CogVLMVisionModel (line 11) | class CogVLMVisionModel(VisionModel):
    method build_preprocessor (line 16) | def build_preprocessor(self):
    method build_model (line 33) | def build_model(self):
    method preprocess (line 42) | def preprocess(self, messages: List[Dict]) -> List[Dict]:
    method proc_messages (line 58) | def proc_messages(messages, chat_template, sequence_start):
    method to_pytorch (line 88) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star...

FILE: lmdeploy/vl/model/deepseek.py
  function check_deepseek_vl_install (line 15) | def check_deepseek_vl_install():
  class DeepSeekVisionModel (line 26) | class DeepSeekVisionModel(VisionModel):
    method build_preprocessor (line 31) | def build_preprocessor(self):
    method build_model (line 39) | def build_model(self):
    method preprocess (line 89) | def preprocess(self, messages: List[Dict]) -> List[Dict]:
    method forward (line 108) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li...
    method proc_messages (line 135) | def proc_messages(messages, chat_template, sequence_start):
    method to_pytorch (line 167) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star...
    method to_turbomind (line 171) | def to_turbomind(self, messages, chat_template, tokenizer, sequence_st...

FILE: lmdeploy/vl/model/deepseek_vl2.py
  function check_deepseek_vl2_install (line 15) | def check_deepseek_vl2_install():
  function check_trans_version (line 25) | def check_trans_version():
  class DeepSeek2VisionModel (line 39) | class DeepSeek2VisionModel(VisionModel):
    method match (line 45) | def match(cls, config: AutoConfig):
    method build_preprocessor (line 52) | def build_preprocessor(self):
    method build_model (line 64) | def build_model(self):
    method preprocess (line 70) | def preprocess(self, messages: List[Dict]) -> List[Dict]:
    method forward (line 106) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li...
    method proc_single_message (line 121) | def proc_single_message(message):
    method proc_messages (line 150) | def proc_messages(messages, chat_template, sequence_start):
    method to_pytorch (line 162) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star...
    method to_turbomind (line 166) | def to_turbomind(self, messages, chat_template, tokenizer, sequence_st...

FILE: lmdeploy/vl/model/gemma3_vl.py
  class Gemma3ImagesKwargs (line 14) | class Gemma3ImagesKwargs(ImagesKwargs):
  class Gemma3ProcessorKwargs (line 22) | class Gemma3ProcessorKwargs(ProcessingKwargs, total=False):
  class Gemma3VisionModel (line 38) | class Gemma3VisionModel(VisionModel):
    method __init__ (line 43) | def __init__(self,
    method build_preprocessor (line 51) | def build_preprocessor(self):
    method build_model (line 58) | def build_model(self):
    method preprocess (line 64) | def preprocess(self, messages: List[Dict]) -> List[Dict]:
    method forward (line 94) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li...
    method proc_messages (line 109) | def proc_messages(messages, chat_template, sequence_start):
    method to_pytorch (line 126) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star...
    method to_turbomind (line 130) | def to_turbomind(self, messages, chat_template, tokenizer, sequence_st...

FILE: lmdeploy/vl/model/glm4_1v.py
  class GLM4_1_VisionModel (line 13) | class GLM4_1_VisionModel(VisionModel):
    method match (line 19) | def match(cls, config: AutoConfig):
    method build_preprocessor (line 26) | def build_preprocessor(self):
    method build_model (line 33) | def build_model(self):
    method preprocess (line 36) | def preprocess(self, messages: List[Dict]) -> List[Dict]:
    method proc_messages (line 55) | def proc_messages(messages, chat_template, sequence_start):
    method to_pytorch (line 77) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star...

FILE: lmdeploy/vl/model/glm4_v.py
  class GLM4VisionModel (line 13) | class GLM4VisionModel(VisionModel):
    method match (line 19) | def match(cls, config: AutoConfig):
    method build_preprocessor (line 26) | def build_preprocessor(self):
    method build_model (line 38) | def build_model(self):
    method preprocess (line 47) | def preprocess(self, messages: List[Dict]) -> List[Dict]:
    method proc_messages (line 71) | def proc_messages(messages, chat_template, sequence_start):
    method to_pytorch (line 89) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star...

FILE: lmdeploy/vl/model/interns1_pro.py
  function check_transformers (line 15) | def check_transformers():
  class InternS1ProVisionModel (line 24) | class InternS1ProVisionModel(VisionModel):
    method build_preprocessor (line 32) | def build_preprocessor(self):
    method get_processor_args (line 52) | def get_processor_args(self, mm_processor_kwargs: Optional[Dict[str, A...
    method check_time_series_input (line 88) | def check_time_series_input(self, messages):
    method _preprocess_image (line 94) | def _preprocess_image(self,
    method _preprocess_video (line 113) | def _preprocess_video(self,
    method _preprocess_time_series (line 144) | def _preprocess_time_series(self,
    method preprocess (line 185) | def preprocess(self, messages: List[Dict], mm_processor_kwargs: Dict[s...
    method proc_messages (line 209) | def proc_messages(self,
    method to_pytorch_aux_video (line 242) | def to_pytorch_aux_video(self, messages, prompt, VIDEO_TOKEN, tokenize...
    method to_pytorch_aux_ts (line 286) | def to_pytorch_aux_ts(self, messages, prompt, TS_TOKEN, tokenizer, seq...
    method to_pytorch (line 320) | def to_pytorch(self,
    method build_model (line 342) | def build_model(self):
    method forward (line 347) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li...
    method to_turbomind (line 351) | def to_turbomind(self,

FILE: lmdeploy/vl/model/internvl.py
  function find_closest_aspect_ratio (line 14) | def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height...
  function dynamic_preprocess (line 31) | def dynamic_preprocess(image, min_num=1, max_num=6, image_size=448, use_...
  class InternVLVisionModel (line 67) | class InternVLVisionModel(VisionModel):
    method __init__ (line 72) | def __init__(self,
    method build_preprocessor (line 83) | def build_preprocessor(self):
    method build_model (line 117) | def build_model(self):
    method _preprocess_v1_5 (line 143) | def _preprocess_v1_5(self, image, params=None):
    method _forward_v1_5 (line 159) | def _forward_v1_5(self, inputs, max_batch_size):
    method _preprocess (line 174) | def _preprocess(self, image, params=None):
    method _forward (line 179) | def _forward(self, inputs, max_batch_size):
    method preprocess (line 193) | def preprocess(self, messages: List[Dict]) -> List[Dict]:
    method forward (line 210) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li...
    method proc_messages (line 227) | def proc_messages(
    method to_pytorch (line 270) | def to_pytorch(self,
    method to_turbomind (line 285) | def to_turbomind(self,

FILE: lmdeploy/vl/model/internvl3_hf.py
  class InternVLImagesKwargs (line 15) | class InternVLImagesKwargs(ImagesKwargs, total=False):
  class InternVLProcessorKwargs (line 21) | class InternVLProcessorKwargs(ProcessingKwargs, total=False):
  class InternVL3VisionModel (line 35) | class InternVL3VisionModel(InternVLVisionModel):
    method __init__ (line 40) | def __init__(self,
    method build_preprocessor (line 49) | def build_preprocessor(self):
    method build_model (line 57) | def build_model(self):
    method preprocess (line 86) | def preprocess(self, messages: List[Dict]) -> List[Dict]:
    method forward (line 119) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li...

FILE: lmdeploy/vl/model/internvl_llava.py
  function check_llava_install (line 19) | def check_llava_install():
  function _intern_vision_model__from_pretrained (line 28) | def _intern_vision_model__from_pretrained(vision_tower_name: str):
  function _intern_vl_model__from_pretrained (line 37) | def _intern_vl_model__from_pretrained(vision_tower_name: str):
  function init_empty_vit (line 49) | def init_empty_vit():
  class InternVLLlavaVisionModel (line 61) | class InternVLLlavaVisionModel(LlavaVisionModel):
    method match (line 65) | def match(cls, config: AutoConfig):
    method build_preprocessor (line 74) | def build_preprocessor(self):
    method build_model (line 77) | def build_model(self):
    method preprocess (line 128) | def preprocess(self, messages: List[Dict]) -> List[Dict]:
    method forward (line 133) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li...

FILE: lmdeploy/vl/model/llama4.py
  function check_trans_version (line 13) | def check_trans_version():
  class LLama4VisionModel (line 27) | class LLama4VisionModel(VisionModel):
    method match (line 33) | def match(cls, config: AutoConfig):
    method build_preprocessor (line 38) | def build_preprocessor(self):
    method build_model (line 55) | def build_model(self):
    method preprocess (line 61) | def preprocess(self, messages: List[Dict]) -> List[Dict]:
    method forward (line 87) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li...
    method proc_messages (line 102) | def proc_messages(messages, chat_template, sequence_start):
    method to_pytorch_aux (line 121) | def to_pytorch_aux(self, messages, prompt, IMAGE_TOKEN, tokenizer, seq...
    method to_pytorch (line 156) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star...
    method to_turbomind (line 160) | def to_turbomind(self, messages, chat_template, tokenizer, sequence_st...

FILE: lmdeploy/vl/model/llava.py
  function check_llava_install (line 21) | def check_llava_install():
  function _clip_vision_tower_load_model (line 31) | def _clip_vision_tower_load_model(self, **kwargs):
  function init_llava_vision_tower (line 42) | def init_llava_vision_tower(config):
  function select_best_resolution (line 55) | def select_best_resolution(original_size, possible_resolutions):
  function resize_and_pad_image (line 86) | def resize_and_pad_image(image, target_resolution):
  function divide_to_patches (line 121) | def divide_to_patches(image, patch_size):
  function process_anyres_image (line 142) | def process_anyres_image(image, processor, grid_pinpoints):
  function expand2square (line 171) | def expand2square(pil_img, background_color):
  function process_images (line 185) | def process_images(images, image_processor, model_cfg):
  class LlavaVisionModel (line 205) | class LlavaVisionModel(LlavaHfVisionModel):
    method match (line 209) | def match(cls, config: AutoConfig):
    method build_preprocessor (line 224) | def build_preprocessor(self):
    method build_model (line 234) | def build_model(self):
    method encode_images (line 292) | def encode_images(self, images: torch.Tensor) -> torch.Tensor:
    method preprocess (line 298) | def preprocess(self, messages: List[Dict]) -> List[Dict]:
    method forward (line 314) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li...

FILE: lmdeploy/vl/model/llava_hf.py
  class LlavaHfVisionModel (line 16) | class LlavaHfVisionModel(VisionModel):
    method build_preprocessor (line 21) | def build_preprocessor(self):
    method build_model (line 33) | def build_model(self):
    method preprocess (line 58) | def preprocess(self, messages: List[Dict]) -> List[Dict]:
    method forward (line 74) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li...
    method proc_messages (line 109) | def proc_messages(messages, chat_template, sequence_start):
    method to_pytorch (line 126) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star...
    method to_turbomind (line 130) | def to_turbomind(self, messages, chat_template, tokenizer, sequence_st...

FILE: lmdeploy/vl/model/llava_next.py
  class LlavaNextVisionModel (line 16) | class LlavaNextVisionModel(LlavaHfVisionModel):
    method build_preprocessor (line 21) | def build_preprocessor(self):
    method build_model (line 34) | def build_model(self):
    method preprocess (line 66) | def preprocess(self, messages: List[Dict]) -> List[Dict]:
    method forward (line 102) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li...

FILE: lmdeploy/vl/model/minicpmv.py
  class MiniCPMVModel (line 18) | class MiniCPMVModel(VisionModel):
    method __init__ (line 23) | def __init__(self,
    method build_preprocessor (line 38) | def build_preprocessor(self):
    method build_model (line 44) | def build_model(self):
    method _get_slice_image (line 72) | def _get_slice_image(self, image: Image):
    method _reshape_by_patch (line 82) | def _reshape_by_patch(self, slice_images):
    method _preprocess_v2_5 (line 97) | def _preprocess_v2_5(self, image: Image, params: Dict = None) -> Dict:
    method _preprocess_v2_6 (line 111) | def _preprocess_v2_6(self, image: Image, params: Dict = None) -> Dict:
    method preprocess (line 133) | def preprocess(self, messages: List[Dict]) -> List[Dict]:
    method forward (line 149) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li...
    method proc_messages (line 202) | def proc_messages(self, messages, chat_template, sequence_start):
    method to_pytorch (line 237) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star...
    method to_turbomind (line 241) | def to_turbomind(self, messages, chat_template, tokenizer, sequence_st...

FILE: lmdeploy/vl/model/mllama.py
  function check_transformers (line 8) | def check_transformers():
  class MllamaVLModel (line 17) | class MllamaVLModel(VisionModel):
    method build_preprocessor (line 22) | def build_preprocessor(self):
    method preprocess (line 27) | def preprocess(self, messages: List[Dict]) -> List[Dict]:
    method build_model (line 39) | def build_model(self):
    method proc_messages (line 49) | def proc_messages(messages, chat_template, sequence_start):
    method to_pytorch (line 66) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star...

FILE: lmdeploy/vl/model/molmo.py
  class MolmoVisionModel (line 16) | class MolmoVisionModel(VisionModel):
    method build_preprocessor (line 21) | def build_preprocessor(self):
    method build_model (line 27) | def build_model(self):
    method preprocess (line 53) | def preprocess(self, messages: List[Dict]) -> List[Dict]:
    method forward (line 78) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li...
    method proc_messages (line 129) | def proc_messages(messages):
    method to_pytorch (line 148) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star...
    method to_turbomind (line 151) | def to_turbomind(self, messages, chat_template, tokenizer, sequence_st...

FILE: lmdeploy/vl/model/phi3_vision.py
  class Phi3VisionModel (line 11) | class Phi3VisionModel(LlavaHfVisionModel):
    method build_preprocessor (line 16) | def build_preprocessor(self):
    method build_model (line 23) | def build_model(self):
    method preprocess (line 32) | def preprocess(self, messages: List[Dict]) -> List[Dict]:

FILE: lmdeploy/vl/model/qwen.py
  class QwenVisionModel (line 16) | class QwenVisionModel(VisionModel):
    method build_preprocessor (line 21) | def build_preprocessor(self):
    method build_model (line 33) | def build_model(self):
    method preprocess (line 72) | def preprocess(self, messages: List[Dict]) -> List[Dict]:
    method forward (line 88) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li...
    method proc_messages (line 113) | def proc_messages(messages, chat_template, sequence_start):
    method to_pytorch (line 134) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star...
    method to_turbomind (line 138) | def to_turbomind(self, messages, chat_template, tokenizer, sequence_st...

FILE: lmdeploy/vl/model/qwen2.py
  function check_qwen_vl_deps_install (line 8) | def check_qwen_vl_deps_install():
  class Qwen2VLModel (line 23) | class Qwen2VLModel(VisionModel):
    method build_preprocessor (line 28) | def build_preprocessor(self):
    method preprocess (line 36) | def preprocess(self, messages: list[dict]) -> list[dict]:
    method build_model (line 57) | def build_model(self):
    method forward (line 94) | def forward(self, messages: list[dict], max_batch_size: int = 1) -> li...
    method proc_messages (line 126) | def proc_messages(self, messages, chat_template, sequence_start, chat_...
    method get_mrope_info (line 163) | def get_mrope_info(seq_len: int,
    method to_pytorch (line 187) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star...
    method to_turbomind (line 192) | def to_turbomind(self, messages, chat_template, tokenizer, sequence_st...

FILE: lmdeploy/vl/model/qwen3.py
  function check_transformers (line 14) | def check_transformers():
  class Qwen3VLModel (line 23) | class Qwen3VLModel(VisionModel):
    method build_preprocessor (line 28) | def build_preprocessor(self):
    method get_processor_args (line 44) | def get_processor_args(self, mm_processor_kwargs: Dict[str, Any] | Non...
    method _preprocess_image (line 80) | def _preprocess_image(self,
    method _preprocess_video (line 99) | def _preprocess_video(self,
    method preprocess (line 130) | def preprocess(self, messages: List[Dict], mm_processor_kwargs: Dict[s...
    method proc_messages (line 150) | def proc_messages(self, messages, chat_template, sequence_start, chat_...
    method to_pytorch_aux_video (line 172) | def to_pytorch_aux_video(self, messages, prompt, VIDEO_TOKEN, tokenize...
    method to_pytorch (line 216) | def to_pytorch(self,
    method build_model (line 231) | def build_model(self):
    method forward (line 236) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li...
    method to_turbomind (line 240) | def to_turbomind(self,

FILE: lmdeploy/vl/model/qwen3_5.py
  function check_transformers (line 12) | def check_transformers():
  class Qwen3_5Model (line 21) | class Qwen3_5Model(Qwen3VLModel):
    method build_preprocessor (line 26) | def build_preprocessor(self):

FILE: lmdeploy/vl/model/utils.py
  function disable_transformers_logging (line 11) | def disable_transformers_logging():
  function disable_logging (line 21) | def disable_logging():
  function _set_func (line 29) | def _set_func(origin_func_path: str | None, rewrite_func: Callable, orig...
  function rewrite_ctx (line 75) | def rewrite_ctx(origin_func_path: list[str | Callable], rewrite_func: li...
  function add_device_hook (line 93) | def add_device_hook(module: torch.nn.Module, device: torch.device, fn: C...

FILE: lmdeploy/vl/model/xcomposer2.py
  function check_xcomposer_install (line 21) | def check_xcomposer_install():
  class ModelType (line 31) | class ModelType(enum.Enum):
  function get_xcomposer_type (line 38) | def get_xcomposer_type(model_path: str) -> Tuple[ModelType, Any]:
  function _CLIPVisionModel_from_pretrained (line 54) | def _CLIPVisionModel_from_pretrained(vision_tower_name):
  function init_empty_vit (line 62) | def init_empty_vit(model_path):
  class Xcomposer2VisionModel (line 87) | class Xcomposer2VisionModel(VisionModel):
    method __init__ (line 90) | def __init__(self,
    method match (line 103) | def match(cls, config: AutoConfig):
    method build_preprocessor (line 114) | def build_preprocessor(self):
    method build_model (line 136) | def build_model(self):
    method _preprocess_2d5 (line 183) | def _preprocess_2d5(self, image: Image, params: Dict) -> Dict:
    method _preprocess_7b (line 193) | def _preprocess_7b(self, image: Image, params: Dict) -> Dict:
    method _preprocess_4khd_7b (line 198) | def _preprocess_4khd_7b(self, image: Image, params: Dict) -> Dict:
    method preprocess (line 207) | def preprocess(self, messages: List[Dict]) -> List[Dict]:
    method forward (line 223) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li...
    method proc_messages (line 257) | def proc_messages(messages, chat_template, sequence_start, model_type):
    method to_pytorch (line 284) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star...
    method to_turbomind (line 288) | def to_turbomind(self, messages, chat_template, tokenizer, sequence_st...

FILE: lmdeploy/vl/model/yi.py
  function _build_vision_projector (line 19) | def _build_vision_projector(config, delay_load=False, **kwargs):
  function _build_vision_tower (line 57) | def _build_vision_tower(vision_tower_cfg, **kwargs):
  function init_yi_model (line 74) | def init_yi_model():
  class YiVisionModel (line 85) | class YiVisionModel(LlavaVisionModel):
    method match (line 89) | def match(cls, config: AutoConfig):
    method build_preprocessor (line 98) | def build_preprocessor(self):
    method build_model (line 109) | def build_model(self):
    method preprocess (line 120) | def preprocess(self, messages: List[Dict]) -> List[Dict]:

FILE: lmdeploy/vl/tools/merge_xcomposer2d5_task.py
  function main (line 11) | def main(src_path: str, dst_path: str, task: str):

FILE: lmdeploy/vl/utils.py
  function load_image (line 13) | def load_image(image_url: str, **kwargs) -> Image.Image:
  function load_video (line 19) | def load_video(video_url: str, **kwargs) -> Tuple[npt.NDArray, Dict[str,...
  function load_time_series (line 26) | def load_time_series(ts_url: str, **kwargs) -> npt.NDArray:
  function encode_image_base64 (line 32) | def encode_image_base64(image: str | Image.Image, format: str = 'PNG', *...
  function encode_video_base64 (line 40) | def encode_video_base64(video: str | npt.NDArray, format: str = 'JPEG', ...
  function encode_time_series_base64 (line 49) | def encode_time_series_base64(data: str | npt.NDArray, **kwargs) -> str:

FILE: setup.py
  function get_target_device (line 13) | def get_target_device():
  function readme (line 17) | def readme():
  function get_version (line 23) | def get_version():
  function get_turbomind_deps (line 35) | def get_turbomind_deps():
  function parse_requirements (line 58) | def parse_requirements(fname='requirements.txt', with_version=True):

FILE: src/turbomind/comm/barrier.h
  function class (line 13) | class Barrier {
  function namespace (line 47) | namespace turbomind::comm {

FILE: src/turbomind/comm/cuda_ipc/bootstrap.h
  function namespace (line 12) | namespace turbomind::comm {
  function getNranks (line 53) | int getNranks()
  function getNranksPerNode (line 58) | int getNranksPerNode()
  function send (line 63) | void send(void* data, int size, int peer, int tag)

FILE: src/turbomind/comm/cuda_ipc/common.h
  function namespace (line 5) | namespace turbomind::comm {

FILE: src/turbomind/comm/cuda_ipc/cuda_ipc_comm.h
  function namespace (line 17) | namespace turbomind::comm {
  function multicast_capability_ (line 211) | int multicast_capability_{false};

FILE: src/turbomind/comm/cuda_ipc/group_sum.h
  function namespace (line 7) | namespace turbomind::comm {

FILE: src/turbomind/comm/cuda_ipc/mscclpp.h
  function namespace (line 9) | namespace mscclpp {

FILE: src/turbomind/comm/cuda_ipc/semaphore.h
  function namespace (line 8) | namespace turbomind::comm {
  function SystemSemaphoreInfo (line 57) | SystemSemaphoreInfo* handle()

FILE: src/turbomind/comm/device_comm.cc
  type turbomind::comm (line 6) | namespace turbomind::comm {
    function DeviceComm (line 14) | DeviceComm CreateDeviceCommunicator(const std::string& backend, int n_...

FILE: src/turbomind/comm/device_comm.h
  function namespace (line 13) | namespace turbomind::comm {

FILE: src/turbomind/comm/env.h
  function is_set (line 17) | static auto value = [] {

FILE: src/turbomind/comm/gloo/gloo_comm.cc
  type turbomind::comm (line 28) | namespace turbomind::comm {
    function createGlooDevice (line 33) | std::shared_ptr<::gloo::transport::Device> createGlooDevice()
    class Store (line 55) | class Store: public ::gloo::rendezvous::PrefixStore {
      method Store (line 57) | explicit Store(const std::string& host, int port, const std::string&...
      method New (line 65) | std::shared_ptr<Store> New(const std::string& prefix)
    class GlobalStoreFactory (line 79) | class GlobalStoreFactory {
      method GlobalStoreFactory (line 81) | static GlobalStoreFactory& Instance()
      method New (line 87) | std::string New()
      method Load (line 101) | std::shared_ptr<Store> Load(const std::string& info)
      method GlobalStoreFactory (line 119) | GlobalStoreFactory() {}
    type GlooCommImpl (line 127) | struct GlooCommImpl: public HostCommImpl {
      type SplitInfo (line 129) | struct SplitInfo {
      method GlooCommImpl (line 144) | GlooCommImpl(std::shared_ptr<Store> store, int n_ranks, int rank):
      method rank (line 155) | int rank() const override
      method n_ranks (line 160) | int n_ranks() const override
      method is_same_process (line 165) | bool is_same_process() const override
      method Split (line 170) | std::shared_ptr<HostCommImpl> Split(int color, int key) override
      method Sync (line 188) | void Sync(bool blocking) override
      method Broadcast (line 194) | void Broadcast(void* data, int count, DataType dtype, int root, copy...
      method AllGather (line 224) | void AllGather(void* data, int count, DataType dtype, copy_fn copy, ...
      method Broadcast (line 254) | void Broadcast(void* data, int count, DataType dtype, int root)
      method AllGather (line 262) | void AllGather(void* data, int count, DataType dtype)
      method ReduceFunc (line 269) | static ReduceFunc getReduceFunc(DataType dtype, RedOp red_op)
      method AllReduce (line 310) | void AllReduce(void* data, int count, DataType dtype, RedOp red_op) ...
    class GlooGroupId (line 344) | class GlooGroupId: public HostGroupId {
      method Initialize (line 346) | void Initialize() override
      method Export (line 352) | void Export(std::ostream& os) override
      method Import (line 357) | void Import(std::istream& is) override
      method HostComm (line 364) | HostComm CreateCommunicator(int n_ranks, int rank, int node_rank = 0...
    function CreateGlooGroupId (line 376) | std::unique_ptr<HostGroupId> CreateGlooGroupId()

FILE: src/turbomind/comm/gloo/hybrid_comm.cc
  type turbomind::comm (line 6) | namespace turbomind::comm {
    type HybridCommImpl (line 11) | struct HybridCommImpl: public HostCommImpl {
      method HybridCommImpl (line 13) | HybridCommImpl(int n_ranks, int rank, int node_rank, HostGroupId* gl...
      method HybridCommImpl (line 30) | HybridCommImpl(std::shared_ptr<HostCommImpl> gloo_comm, std::shared_...
      method init_inter_comm (line 45) | void init_inter_comm()
      method Split (line 68) | std::shared_ptr<HostCommImpl> Split(int color, int key) override
      method rank (line 80) | int rank() const override
      method n_ranks (line 85) | int n_ranks() const override
      method is_same_process (line 90) | bool is_same_process() const override
      method Sync (line 95) | void Sync(bool blocking) override
      method Broadcast (line 103) | void Broadcast(void* data, int count, DataType dtype, int root, copy...
      method Broadcast (line 115) | void Broadcast(void* data, int count, DataType dtype, int root, copy...
      method AllGather (line 127) | void AllGather(void* data, int count, DataType dtype, copy_fn copy, ...
      method AllGather (line 136) | void AllGather(void* data, int count, DataType dtype, copy_fn copy)
      method AllReduce (line 146) | void AllReduce(void* data, int count, DataType dtype, RedOp red_op) ...
    class HybridGroupId (line 175) | class HybridGroupId: public HostGroupId {
      method HybridGroupId (line 177) | HybridGroupId()
      method Initialize (line 183) | void Initialize() override
      method Export (line 189) | void Export(std::ostream& os) override
      method Import (line 195) | void Import(std::istream& is) override
      method HostComm (line 201) | HostComm CreateCommunicator(int n_ranks, int rank, int node_rank)
    function CreateHybridGroupId (line 215) | std::unique_ptr<HostGroupId> CreateHybridGroupId()

FILE: src/turbomind/comm/gloo/tcp_store.cc
  type turbomind::comm (line 14) | namespace turbomind::comm {
    type CheckResponseType (line 22) | enum class CheckResponseType : uint8_t
    type QueryType (line 28) | enum class QueryType : uint8_t
    type Buffer (line 51) | struct Buffer {
      method append (line 55) | void append(T val)
      method append (line 61) | void append(const std::vector<char>& vec)
      method append (line 67) | void append(const std::string& str)
      method count (line 78) | size_t count() const
    function validate (line 84) | void validate(std::shared_ptr<::gloo::transport::tcp::Socket>& socket)
    function ping (line 92) | void ping(std::shared_ptr<::gloo::transport::tcp::Socket>& socket)

FILE: src/turbomind/comm/gloo/tcp_store.h
  function namespace (line 11) | namespace turbomind::comm {

FILE: src/turbomind/comm/gloo/test_ipc_comm.cc
  type Store (line 22) | struct Store {
    method Store (line 32) | Store(const std::string& hostname, const std::string& port, int nnodes...
    method start (line 75) | void start()
    method stop (line 81) | void stop()
  type TestGlooComm (line 88) | struct TestGlooComm {
    method TestGlooComm (line 97) | TestGlooComm(const std::string& host, const std::string& port, int nno...
    method init (line 103) | void init()
    method test_broadcast (line 132) | void test_broadcast()
    method test_allgather (line 180) | void test_allgather()
    method test_allreduce (line 241) | void test_allreduce()
    method test_perf (line 272) | void test_perf()
  function main (line 367) | int main(int argc, char* argv[])

FILE: src/turbomind/comm/host_comm.cc
  type turbomind::comm (line 5) | namespace turbomind::comm {
    function CreateHostGroupId (line 15) | std::unique_ptr<HostGroupId> CreateHostGroupId(const std::string& back...

FILE: src/turbomind/comm/host_comm.h
  type class (line 19) | enum class
  function noexcept (line 72) | const noexcept
  function namespace (line 86) | namespace detail {
  function class (line 190) | class HostGroupId {

FILE: src/turbomind/comm/test_host_comm.cc
  function main (line 11) | int main(int argc, char* argv[])

FILE: src/turbomind/comm/thread_comm.cc
  type turbomind::comm (line 15) | namespace turbomind::comm {
    type ThreadCommImpl (line 17) | struct ThreadCommImpl: public HostCommImpl {
      class State (line 19) | class State {
        method State (line 21) | explicit State(int n): n_{n}, channels_(n * n), barrier_{n} {}
        method sync (line 28) | void sync()
      method ThreadCommImpl (line 44) | ThreadCommImpl(int n_ranks, std::shared_ptr<State> state, int rank):
      method rank (line 49) | int rank() const override
      method n_ranks (line 54) | int n_ranks() const override
      method is_same_process (line 59) | bool is_same_process() const override
      method Split (line 69) | std::shared_ptr<HostCommImpl> Split(int color, int key) override
      method Sync (line 104) | void Sync(bool blocking) override
      method Broadcast (line 135) | void Broadcast(void* data, int count, DataType dtype, int root, copy...
      method AllGather (line 168) | void AllGather(void* data, int count, DataType dtype, copy_fn copy, ...
      method reduce (line 201) | static void reduce(void* src, int n, void* dst, int offset)
      method reduce_fn (line 221) | static reduce_fn get_reduce(DataType dtype, RedOp red_op)
      method AllReduce (line 259) | void AllReduce(void* data, int count, DataType dtype, RedOp red_op) ...
    class ThreadGroupId (line 295) | class ThreadGroupId: public HostGroupId {
      method Initialize (line 297) | void Initialize() override
      method Export (line 302) | void Export(std::ostream& os) override
      method Import (line 310) | void Import(std::istream& is) override
      method HostComm (line 319) | HostComm CreateCommunicator(int n_ranks, int rank, int node_rank = 0...
      type Internal (line 338) | struct Internal {
    function CreateThreadGroupId (line 347) | std::unique_ptr<HostGroupId> CreateThreadGroupId()
    function save (line 353) | void save(Archive& ar, const std::shared_ptr<ThreadCommImpl::State>& p)
    function load (line 359) | void load(Archive& ar, std::shared_ptr<ThreadCommImpl::State>& p)

FILE: src/turbomind/core/allocator.cc
  type turbomind::core (line 8) | namespace turbomind::core {
    function Stream (line 12) | Stream AllocatorImpl::stream() const noexcept
    class CudaMemPoolAllocator (line 17) | class CudaMemPoolAllocator: public AllocatorImpl {
      method CudaMemPoolAllocator (line 19) | CudaMemPoolAllocator(Stream stream, bool use_default_pool):
      method deallocate (line 53) | void deallocate(void* p, ssize_t) override
      method Device (line 58) | Device device() const noexcept override
      method Stream (line 63) | Stream stream() const noexcept override
      method trim (line 68) | void trim(size_t bytes_to_keep)
    class CudaAllocator (line 80) | class CudaAllocator: public AllocatorImpl {
      method deallocate (line 89) | void deallocate(void* p, ssize_t) override
      method Device (line 94) | Device device() const noexcept override
    class CudaHostAllocator (line 100) | class CudaHostAllocator: public AllocatorImpl {
      method deallocate (line 109) | void deallocate(void* p, ssize_t) override
      method Device (line 114) | Device device() const noexcept override
    class HostAllocator (line 120) | class HostAllocator: public AllocatorImpl {
      method deallocate (line 127) | void deallocate(void* p, ssize_t) override
      method Device (line 132) | Device device() const noexcept override

FILE: src/turbomind/core/allocator.h
  function DeviceType (line 14) | enum class DeviceType : int
  function namespace (line 45) | namespace turbomind::core {
  function class (line 63) | class AllocatorImpl {
  function explicit (line 124) | explicit StackAllocatorImpl(shared_ptr<AllocatorImpl> underlying_impl): ...
  function deallocate (line 231) | void deallocate(void* p, ssize_t size) override

FILE: src/turbomind/core/buffer.cc
  type turbomind::core (line 7) | namespace turbomind::core {
    function Buffer (line 9) | Buffer Buffer::view(DataType dtype) const
    function Buffer (line 23) | Buffer Buffer::slice(ssize_t base, ssize_t size) const
    function Copy (line 46) | void Copy(const Buffer& a, ssize_t n, Ref<Buffer> b_, const Stream& st...
    function Copy (line 57) | void Copy(const Buffer& a, ssize_t n, Ref<Buffer> b_)
    function Copy (line 62) | void Copy(const Buffer& a, Ref<Buffer> b_, const Stream& stream)
    function Copy (line 68) | void Copy(const Buffer& a, Ref<Buffer> b_)
    type detail (line 73) | namespace detail {
    function Clear (line 85) | void Clear(Ref<Buffer> b_, const Stream& stream)
    function Clear (line 93) | void Clear(Ref<Buffer> b_)

FILE: src/turbomind/core/buffer.h
  function namespace (line 15) | namespace turbomind::core {
  function explicit (line 22) | explicit Buffer(DataType dtype): Buffer()
  function Buffer (line 130) | Buffer borrow() const
  function Buffer (line 169) | inline Buffer empty_like(const Buffer& buffer, Device device)

FILE: src/turbomind/core/check.cc
  type turbomind::core (line 10) | namespace turbomind::core {
    function StripSrcPrefix (line 14) | std::string StripSrcPrefix(const char* file)
    function ReportNullError (line 83) | void ReportNullError(const char* file, int line, const char* expr)

FILE: src/turbomind/core/check.h
  function class (line 25) | class CheckErrorStream {
  function class (line 59) | class CheckOpStringBuilder {
  function string (line 74) | string* MakeCheckOpString(const T1& v1, const T2& v2)

FILE: src/turbomind/core/common.h
  function namespace (line 11) | namespace turbomind::core {

FILE: src/turbomind/core/context.cc
  type turbomind::core (line 7) | namespace turbomind::core {
    type ContextStorage (line 11) | struct ContextStorage {
      method ContextStorage (line 26) | ContextStorage()
      method push (line 31) | void push(const Stream& stream)
      method push (line 41) | void push(const Allocator& alloc)
      method pop (line 62) | void pop()
      method ContextStorage (line 79) | static ContextStorage& instance()
    function Stream (line 103) | Stream& Context::stream()
    function Allocator (line 110) | Allocator& Context::host_alloc()
    function Allocator (line 117) | Allocator& Context::device_alloc()
    function Allocator (line 124) | Allocator& Context::pinned_alloc()
    function Allocator (line 131) | Allocator& Context::alloc(Device device)

FILE: src/turbomind/core/context.h
  function namespace (line 7) | namespace turbomind::core {

FILE: src/turbomind/core/copy.cc
  type turbomind::core (line 12) | namespace turbomind::core {
    type CUmemcpyFlags_enum (line 16) | enum CUmemcpyFlags_enum
    type CUmemcpySrcAccessOrder_enum (line 22) | enum CUmemcpySrcAccessOrder_enum
    type CUmemcpyAttributes_st (line 31) | struct CUmemcpyAttributes_st {

FILE: src/turbomind/core/copy.h
  function namespace (line 8) | namespace turbomind::core {

FILE: src/turbomind/core/core.h
  function namespace (line 14) | namespace turbomind {

FILE: src/turbomind/core/cuda_data_type.h
  function namespace (line 14) | namespace turbomind {
  function DataType (line 40) | constexpr DataType from_cuda_dtype(cudaDataType type) {

FILE: src/turbomind/core/data_type.h
  type __half (line 12) | struct __half
  type __nv_bfloat16 (line 13) | struct __nv_bfloat16
  type __nv_fp8_e4m3 (line 14) | struct __nv_fp8_e4m3
  type __nv_fp8_e5m2 (line 15) | struct __nv_fp8_e5m2
  function namespace (line 17) | namespace turbomind {
  function numel (line 233) | ptrdiff_t numel(std::ptrdiff_t size) { return numel(data_type_v<T>, size...

FILE: src/turbomind/core/interval.h
  function class (line 11) | class Interval {
  function explicit (line 27) | explicit Interval(int first): first_{first}, last_{INT_MAX} {};
  function Size (line 43) | Size size() const noexcept

FILE: src/turbomind/core/layout.cc
  type turbomind::core (line 7) | namespace turbomind::core {
    function Layout (line 42) | Layout Layout::coalesce() const noexcept
    function Layout (line 68) | Layout Layout::view(vector<ssize_t> shape) const

FILE: src/turbomind/core/layout.h
  function namespace (line 9) | namespace turbomind::core {
  function Layout (line 82) | Layout permute(const vector<int>& dims) const
  function Layout (line 93) | Layout transpose(int a, int b) const
  function offset (line 103) | ssize_t offset(const vector<ssize_t>& idxs) const
  function offset (line 114) | ssize_t offset(ssize_t idx0) const
  function Layout (line 127) | Layout squeeze(int dim) const
  function std (line 169) | inline std::string to_string(const Layout& x)

FILE: src/turbomind/core/module.cc
  type turbomind::core (line 6) | namespace turbomind::core {

FILE: src/turbomind/core/module.h
  function namespace (line 7) | namespace turbomind::core {

FILE: src/turbomind/core/ranges.h
  function namespace (line 3) | namespace turbomind::core {

FILE: src/turbomind/core/serdes.h
  function namespace (line 9) | namespace turbomind::core {
  function T (line 45) | T* data() const
  function size_ (line 104) | size_t size_{}
  function size (line 106) | size_t size()

FILE: src/turbomind/core/state.h
  function namespace (line 10) | namespace turbomind {
  function Swap (line 38) | void Swap()

FILE: src/turbomind/core/stream.cc
  type turbomind::core (line 5) | namespace turbomind::core {
    function Stream (line 7) | Stream Stream::create(int priority)

FILE: src/turbomind/core/stream.h
  function namespace (line 8) | namespace turbomind::core {

FILE: src/turbomind/core/tensor.cc
  type turbomind::core (line 7) | namespace turbomind::core {
    function Tensor (line 15) | Tensor& TensorMap::at(const std::string& key)
    function Tensor (line 34) | Tensor* TensorMap::try_(const std::string& key)
    function Copy (line 43) | void Copy(const Tensor& src, Ref<Tensor> dst_, const Stream& stream)
    function Copy (line 55) | void Copy(const Tensor& src, Ref<Tensor> dst_)
    function Clear (line 60) | void Clear(Ref<Tensor> a_, const Stream& stream)
    function Clear (line 69) | void Clear(Ref<Tensor> a_)
    function Copy (line 76) | void Copy(const Tensor& src, Tensor& dst, Stream& stream)
    function Copy (line 167) | void Copy(const Tensor& src, Tensor&& dst, Stream& stream)

FILE: src/turbomind/core/tensor.h
  function namespace (line 12) | namespace turbomind::core {
  function byte_size (line 69) | ssize_t byte_size() const noexcept
  function Tensor (line 113) | Tensor view(std::vector<ssize_t> shape) const
  function Tensor (line 160) | Tensor slice(std::vector<ssize_t> base, std::vector<ssize_t> shape) const
  function Tensor (line 177) | Tensor borrow() const

FILE: src/turbomind/engine/batch.h
  function namespace (line 9) | namespace turbomind {

FILE: src/turbomind/engine/engine.cc
  type turbomind (line 28) | namespace turbomind {
    type RequestData (line 34) | struct RequestData {
    function serdes (line 43) | void serdes(Archive& ar, RequestData& r)
    type Engine::Impl (line 51) | struct Engine::Impl {
      method Run (line 90) | void Run(BatchOp op, int phase, Ref<TensorMap> env)
      method Start (line 95) | void Start()
      type State (line 138) | struct State {
        method size (line 147) | int size() const noexcept
      type Data (line 155) | struct Data {

FILE: src/turbomind/engine/engine.h
  function namespace (line 12) | namespace turbomind {

FILE: src/turbomind/engine/gateway.cc
  type turbomind (line 9) | namespace turbomind {

FILE: src/turbomind/engine/gateway.h
  function bind (line 31) | void bind(const std::vector<uint64_t>& seq_ids, int rank)
  function unbind (line 41) | void unbind(const std::vector<uint64_t>& seq_ids, int rank)

FILE: src/turbomind/engine/model_executor.cc
  type turbomind (line 16) | namespace turbomind {
    type ModelExecutor::Impl (line 21) | struct ModelExecutor::Impl {
      method InternalThreadEntry (line 33) | void InternalThreadEntry()
      method Run (line 56) | void Run(BatchData& d)
      method Impl (line 78) | Impl(LanguageModel&                model,
      method Start (line 94) | void Start()

FILE: src/turbomind/engine/model_executor.h
  function namespace (line 13) | namespace turbomind {

FILE: src/turbomind/engine/model_request.cc
  type turbomind (line 17) | namespace turbomind {

FILE: src/turbomind/engine/model_request.h
  function namespace (line 10) | namespace xgrammar {
  function namespace (line 14) | namespace turbomind {

FILE: src/turbomind/engine/queue.h
  function close (line 36) | void close()

FILE: src/turbomind/engine/request.cc
  type turbomind (line 7) | namespace turbomind {
    function UpdateState (line 46) | void UpdateState(Request& r, int status, int seq_len)

FILE: src/turbomind/engine/request.h
  function namespace (line 16) | namespace xgrammar {
  function namespace (line 21) | namespace turbomind {
  type Request (line 87) | struct Request {
  type RequestCache (line 139) | struct RequestCache {

FILE: src/turbomind/engine/request_queue.cc
  type turbomind (line 8) | namespace turbomind {

FILE: src/turbomind/engine/request_queue.h
  function push (line 18) | void push(std::shared_ptr<Request> r)

FILE: src/turbomind/engine/signal_buffer.h
  function aborted_ (line 58) | bool aborted_{false};

FILE: src/turbomind/generation/base_param.h
  function namespace (line 5) | namespace turbomind {

FILE: src/turbomind/generation/generation.cc
  type turbomind (line 25) | namespace turbomind {
    type GenerationData (line 31) | struct GenerationData {
    type Generation::Impl (line 43) | struct Generation::Impl {
      method Impl (line 77) | Impl(DataType              dtype,
      method Setup (line 124) | void Setup(int phase, TensorMap& env)
      method Prepare (line 203) | void Prepare(int phase, TensorMap& env)
      method Unprep (line 216) | void Unprep(int phase, TensorMap& env)
      method Fetch (line 227) | void Fetch(int phase, TensorMap& env)
      method Update (line 241) | void Update(int phase, TensorMap& env)
      method Forward (line 246) | void Forward(int phase, TensorMap& env)

FILE: src/turbomind/generation/generation.h
  function namespace (line 10) | namespace turbomind {

FILE: src/turbomind/generation/guided_decoding.cc
  type turbomind (line 10) | namespace turbomind {
    type GuidedDecoding::Data (line 12) | struct GuidedDecoding::Data {

FILE: src/turbomind/generation/guided_decoding.h
  function namespace (line 10) | namespace turbomind {

FILE: src/turbomind/generation/logits_processor.cc
  type turbomind (line 29) | namespace turbomind {
    type LogitsProcessor::Data (line 31) | struct LogitsProcessor::Data {
      method Data (line 33) | Data(int max_batch_size, DeviceType device)

FILE: src/turbomind/generation/logits_processor.h
  function namespace (line 25) | namespace turbomind {

FILE: src/turbomind/generation/sampling.cc
  type turbomind (line 29) | namespace turbomind {
    type SamplingData (line 31) | struct SamplingData {
      method SamplingData (line 33) | explicit SamplingData(int max_batch_size, DeviceType device)

FILE: src/turbomind/generation/sampling.h
  function namespace (line 7) | namespace turbomind {

FILE: src/turbomind/generation/stop_criteria.cc
  type turbomind (line 11) | namespace turbomind {
    type StopCriteriaData (line 13) | struct StopCriteriaData {
      method StopCriteriaData (line 14) | explicit StopCriteriaData(int batch_size)

FILE: src/turbomind/generation/stop_criteria.h
  function namespace (line 23) | namespace turbomind {

FILE: src/turbomind/generation/utils.h
  function namespace (line 7) | namespace turbomind {

FILE: src/turbomind/kernels/activation.h
  function namespace (line 5) | namespace turbomind {

FILE: src/turbomind/kernels/activation_kernels.h
  function namespace (line 23) | namespace turbomind {

FILE: src/turbomind/kernels/apply_token_bitmask_inplace_cuda.h
  function namespace (line 3) | namespace turbomind {

FILE: src/turbomind/kernels/attention/arch.h
  function namespace (line 5) | namespace turbomind::arch {

FILE: src/turbomind/kernels/attention/attention.h
  function namespace (line 7) | namespace turbomind {

FILE: src/turbomind/kernels/attention/attention_params.h
  type LinearIteratorParams (line 14) | struct LinearIteratorParams {
  type BlockIteratorParams (line 20) | struct BlockIteratorParams {
  function cp_rank (line 85) | int                 cp_rank{0}
  function offset_q (line 87) | int                 offset_q{0}
  function CacheIterFactory (line 102) | CacheIterFactory apply(const Param& param)

FILE: src/turbomind/kernels/attention/attention_template.h
  function namespace (line 10) | namespace turbomind {

FILE: src/turbomind/kernels/attention/attention_universal.h
  function namespace (line 19) | namespace attention {
  function hi_end_ (line 65) | int hi_end_{1}
  function __device__ (line 67) | __device__ bool check_h(int hi)
  function Vec (line 194) | Vec vec_Q[ITER_S][ITER_C]{}
  function __device__ (line 340) | __device__ AttentionUniversal(int q_group_size, int q_head_per_cta, int ...
  function attention_kernel (line 606) | void attention_kernel(typename Kernel::ParamType            params,

FILE: src/turbomind/kernels/attention/block.h
  function namespace (line 11) | namespace turbomind {
  function TM_HOST_DEVICE (line 66) | TM_HOST_DEVICE auto k_data(char* block, int ti) const

FILE: src/turbomind/kernels/attention/block_iterator.h
  function namespace (line 8) | namespace turbomind {

FILE: src/turbomind/kernels/attention/cp_utils.h
  function namespace (line 6) | namespace turbomind {

FILE: src/turbomind/kernels/attention/cta_map.h
  function namespace (line 5) | namespace turbomind::attention {
  type ReduceCtaMap (line 130) | struct ReduceCtaMap {
  function query_idx (line 135) | int query_idx()
  function __device__ (line 139) | static __device__ int head_idx()
  function __device__ (line 143) | static __device__ int split_idx()

FILE: src/turbomind/kernels/attention/decoding.h
  function namespace (line 7) | namespace turbomind {

FILE: src/turbomind/kernels/attention/decoding_template.h
  function namespace (line 10) | namespace turbomind {

FILE: src/turbomind/kernels/attention/desc.h
  function namespace (line 10) | namespace turbomind::attention {

FILE: src/turbomind/kernels/attention/impl.h
  function namespace (line 5) | namespace turbomind {

FILE: src/turbomind/kernels/attention/impl_16816.h
  function namespace (line 13) | namespace turbomind::attention {
  type StateQK (line 150) | struct StateQK {
  function __device__ (line 173) | __device__ void Load(int k, int pipe_iter)
  function ComputeQK (line 203) | void
  function Transform (line 231) | struct StatePV {
  function ComputePV (line 277) | void

FILE: src/turbomind/kernels/attention/impl_1688.h
  function namespace (line 12) | namespace turbomind::attention {
  function Load (line 130) | struct StateQK {
  function __device__ (line 159) | __device__ void Transform(int k) {}
  type StatePV (line 185) | struct StatePV {
  function Load (line 196) | void Load(int k, int pipe_iter)
  function __device__ (line 207) | __device__ void Transform(int k) {}

FILE: src/turbomind/kernels/attention/impl_81616.h
  function __device__ (line 149) | __device__ static void Sync()
  function __device__ (line 173) | static __device__ int2 get_warp_ids()
  function Load (line 262) | struct StateQK {
  function __device__ (line 402) | __device__ void Load(int m, int pipe_iter)
  function tmp_L (line 571) | float tmp_L{}

FILE: src/turbomind/kernels/attention/impl_884.h
  function namespace (line 14) | namespace turbomind::attention {
  function Transform (line 95) | struct SharedStorage {
  function Load (line 230) | struct StatePV {
  function __device__ (line 260) | __device__ void Transform(int k) {}
  function tmp_L (line 344) | float tmp_L{}

FILE: src/turbomind/kernels/attention/impl_m16n8.h
  function namespace (line 7) | namespace turbomind::attention {

FILE: src/turbomind/kernels/attention/impl_simt.h
  function namespace (line 17) | namespace turbomind::attention {
  function __device__ (line 289) | __device__ void Load(int n, int pipe_iter)
  function __device__ (line 314) | __device__ void Transform(int n)
  function ComputeQK (line 325) | void
  type StatePV (line 374) | struct StatePV {
  function Load (line 388) | void Load(int k, int pipe_iter)
  function __device__ (line 413) | __device__ void Transform(int k)
  function ComputePV (line 424) | void
  function tmp_L (line 492) | float tmp_L{}

FILE: src/turbomind/kernels/attention/iterator.h
  function namespace (line 11) | namespace turbomind {
  function __device__ (line 78) | __device__ explicit BaseSmemIterator(T* smem): smem_{smem}
  type Fragment (line 86) | struct Fragment {
  function Prefetch (line 93) | void Prefetch(Args... args)
  function Load (line 101) | void Load(const CacheIter& cache_iter, Fragment& frag, int max_s)
  function __device__ (line 107) | __device__ void Save(const Fragment& frag)

FILE: src/turbomind/kernels/attention/iterator_sm70.h
  function namespace (line 8) | namespace turbomind {

FILE: src/turbomind/kernels/attention/iterator_sm80.h
  function namespace (line 10) | namespace turbomind {

FILE: src/turbomind/kernels/attention/kernel.h
  function namespace (line 7) | namespace turbomind::attention {

FILE: src/turbomind/kernels/attention/kernel_impl.h
  function namespace (line 14) | namespace turbomind::attention {

FILE: src/turbomind/kernels/attention/kv_cache_utils_v2.h
  function namespace (line 8) | namespace turbomind {

FILE: src/turbomind/kernels/attention/linear_iterator.h
  function namespace (line 5) | namespace turbomind {

FILE: src/turbomind/kernels/attention/mainloop.h
  function namespace (line 5) | namespace turbomind::attention {

FILE: src/turbomind/kernels/attention/mainloop_sm70.h
  function namespace (line 9) | namespace turbomind::attention {

FILE: src/turbomind/kernels/attention/mainloop_sm80.h
  function namespace (line 11) | namespace turbomind::attention {

FILE: src/turbomind/kernels/attention/quantization.h
  function T (line 21) | T Infinity()
  function T (line 41) | constexpr T Max(T a, T b)
  function T (line 65) | constexpr T Min(T a, T b)
  function y (line 216) | uint32_t y{}
  function y (line 238) | uint32_t y{}
  function y (line 264) | uint32_t y{}
  function y (line 285) | uint32_t y{}
  function __device__ (line 420) | __device__ auto operator()(const Array<T, N>& v) const -> Array<T, N>
  function __device__ (line 497) | __device__ ConvertKvCache(half scale, half zero)
  function __device__ (line 668) | __device__ ConvertKvCache(T scale, T zero): scale_{scale}, zero_{zero} {}

FILE: src/turbomind/kernels/attention/reduce.h
  function namespace (line 12) | namespace turbomind::attention {

FILE: src/turbomind/kernels/attention/reference.h
  function namespace (line 12) | namespace turbomind {

FILE: src/turbomind/kernels/attention/registrar.h
  function namespace (line 11) | namespace turbomind::attention {
  function std (line 33) | inline std::vector<RegisterFn>& gKernelFactories()
  type Registrar (line 39) | struct Registrar {

FILE: src/turbomind/kernels/attention/registry.h
  function namespace (line 10) | namespace turbomind::attention {

FILE: src/turbomind/kernels/attention/rotary_embedding.h
  function namespace (line 8) | namespace turbomind {

FILE: src/turbomind/kernels/attention/test_utils.h
  function namespace (line 10) | namespace turbomind {

FILE: src/turbomind/kernels/attention/utils.cc
  type turbomind (line 9) | namespace turbomind {
    function GetSplitCount (line 11) | int GetSplitCount(

FILE: src/turbomind/kernels/attention/utils.h
  function namespace (line 5) | namespace turbomind {

FILE: src/turbomind/kernels/ban_bad_words.h
  function namespace (line 23) | namespace turbomind {

FILE: src/turbomind/kernels/core/array.h
  function namespace (line 11) | namespace turbomind {

FILE: src/turbomind/kernels/core/array_ops.h
  function namespace (line 10) | namespace turbomind {
  function copy (line 160) | void copy(const Array<T, N>& src, Array<T, N>& dst)
  function __device__ (line 166) | inline __device__ void copy(const Array<T, N> (&src)[M], Array<T, N> (&d...
  function Store (line 175) | void Store(T* dst, const Array<T, N>& src)
  function __device__ (line 206) | inline __device__ void Stcs(T* __restrict__ dst, const Array<T, N>& src)
  function Stcg (line 231) | void Stcg(T* __restrict__ dst, const Array<T, N>& src)
  function else (line 266) | else if constexpr (sizeof(Array<T, N>) == sizeof(uint)) {
  function else (line 269) | else if constexpr (sizeof(Array<T, N>) == sizeof(uint16_t)) {
  function else (line 272) | else if constexpr (sizeof(Array<T, N>) == sizeof(uint8_t)) {
  function else (line 291) | else if constexpr (sizeof(Array<T, N>) == sizeof(uint)) {
  function else (line 294) | else if constexpr (sizeof(Array<T, N>) == sizeof(uint16_t)) {
  function else (line 297) | else if constexpr (sizeof(Array<T, N>) == sizeof(uint8_t)) {
  function else (line 316) | else if constexpr (sizeof(Array<T, N>) == sizeof(uint)) {
  function else (line 319) | else if constexpr (sizeof(Array<T, N>) == sizeof(uint16_t)) {
  function else (line 322) | else if constexpr (sizeof(Array<T, N>) == sizeof(uint8_t)) {
  function else (line 339) | else if constexpr (sizeof(Array<T, N>) == sizeof(uint)) {
  function else (line 342) | else if constexpr (sizeof(Array<T, N>) == sizeof(uint16_t)) {
  function else (line 345) | else if constexpr (sizeof(Array<T, N>) == sizeof(uint8_t)) {
  function else (line 381) | else if constexpr (sizeof(Array<T, N>) == sizeof(uint)) {
  function __device__ (line 391) | inline __device__ void StShared(uint32_t uintptr, Array<T, N>& src)
  function __device__ (line 454) | __inline__ __device__ uint transpose_m8n8_b16_warp_shuffle(uint value)
  function __device__ (line 474) | __inline__ __device__ uint transpose_m8n8_b16_movmatrix(uint a)
  function __device__ (line 487) | __inline__ __device__ uint32_t transpose_m8n8_b16(uint32_t a)

FILE: src/turbomind/kernels/core/data_type.h
  function namespace (line 14) | namespace turbomind {

FILE: src/turbomind/kernels/core/floating_point.h
  function namespace (line 7) | namespace turbomind {

FILE: src/turbomind/kernels/core/layout.h
  function namespace (line 6) | namespace turbomind {
  type Offset (line 96) | struct Offset {
  function __device__ (line 115) | __device__ SmemAccessor(Pointer ptr): ptr_{ptr}
  function __device__ (line 117) | __device__ T& operator()(int s, int c)
  function __device__ (line 122) | __device__ T& operator()(int s, int c, int offset)
  function __device__ (line 127) | __device__ T& operator()(int idx)
  function __host__ (line 139) | __host__ __device__ Stride(T0 v0, T1 v1): v0{v0}, v1{v1} {}

FILE: src/turbomind/kernels/core/math.h
  function namespace (line 10) | namespace turbomind {

FILE: src/turbomind/kernels/core/meta.h
  function value_type (line 22) | constexpr value_type operator()() const noexcept

FILE: src/turbomind/kernels/core/mma.h
  function namespace (line 9) | namespace turbomind {

FILE: src/turbomind/kernels/core/pipe_iter.h
  function namespace (line 5) | namespace turbomind {

FILE: src/turbomind/kernels/core/smem.h
  function namespace (line 9) | namespace turbomind {

FILE: src/turbomind/kernels/core/sub_byte_ptr.h
  function __host__ (line 16) | constexpr __host__ __device__ SubBytePtr(char* ptr): ptr_(ptr) {}

FILE: src/turbomind/kernels/core/sync.h
  function namespace (line 5) | namespace turbomind {

FILE: src/turbomind/kernels/core/thread_map.h
  function namespace (line 10) | namespace turbomind {
  function Print (line 117) | Print(TMap)

FILE: src/turbomind/kernels/decoding_kernels.h
  function namespace (line 23) | namespace turbomind {

FILE: src/turbomind/kernels/gemm/arch.h
  function namespace (line 5) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/arch/config_simt.h
  function namespace (line 16) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/arch/config_sm70_s884.h
  function namespace (line 20) | namespace turbomind::gemm::sm70_s884 {

FILE: src/turbomind/kernels/gemm/arch/config_sm75_s16816.h
  function namespace (line 18) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/arch/config_sm80_s16816.h
  function namespace (line 20) | namespace turbomind::gemm::sm80_s16816 {

FILE: src/turbomind/kernels/gemm/arch/mma_simt.h
  function namespace (line 10) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/arch/mma_sm70.h
  function namespace (line 10) | namespace turbomind::gemm {
  function __device__ (line 38) | __device__ static constexpr OffsetC static_offset_C()

FILE: src/turbomind/kernels/gemm/arch/mma_sm80.h
  function namespace (line 10) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/arch/operand_simt.h
  function namespace (line 14) | namespace turbomind::gemm {
  type GetSmemLayout (line 93) | struct GetSmemLayout {  // m-major
  type GetSmemLayout_Pack (line 104) | struct GetSmemLayout_Pack {
  type GetSmemLayout (line 137) | struct GetSmemLayout {  // m-major

FILE: src/turbomind/kernels/gemm/arch/operand_sm70_s884.h
  function namespace (line 13) | namespace turbomind::gemm {
  type GetSmemLayout (line 131) | struct GetSmemLayout {  // m-major

FILE: src/turbomind/kernels/gemm/arch/operand_sm80_s16816.h
  function namespace (line 15) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/arch/smem_copy_simt.h
  function namespace (line 11) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/arch/smem_copy_sm70.h
  function __device__ (line 21) | __device__ static int2 unique(int thread_idx, int pack_idx)
  function __device__ (line 33) | __device__ static int2 get_offset(int thread_idx)
  function __device__ (line 56) | __device__ static int2 unique(int thread_idx, int pack_idx)
  function __device__ (line 68) | __device__ static int2 get_offset(int thread_idx)
  function __device__ (line 90) | __device__ static int2 unique(int thread_idx, int pack_idx)
  function __device__ (line 101) | __device__ static int2 get_offset(int thread_idx)

FILE: src/turbomind/kernels/gemm/arch/smem_copy_sm80.h
  function apply (line 15) | static void apply(S src_ptr, D dst_ptr)
  function apply (line 30) | static void apply(S src_ptr, D dst_ptr)
  function apply (line 45) | static void apply(S src_ptr, D dst_ptr)
  function __device__ (line 66) | __device__ static int2 get_offset(int thread_idx)  // -> (m, k)
  function __device__ (line 97) | __device__ static int2 get_offset(int thread_idx)
  function __device__ (line 131) | __device__ static int2 get_offset(int thread_idx)
  function copy (line 152) | static void copy(S&& src_ptr, D&& dst_ptr, bool)
  function __device__ (line 169) | __device__ static int2 unique(int thread_idx, int pack_idx)
  function __device__ (line 184) | __device__ static int2 get_offset(int thread_idx)
  function copy (line 192) | static void copy(S&& src_ptr, D&& dst_ptr, bool mask)
  function __device__ (line 200) | __device__ static int2 unique(int thread_idx, int pack_idx)

FILE: src/turbomind/kernels/gemm/cast.h
  function namespace (line 8) | namespace turbomind {

FILE: src/turbomind/kernels/gemm/context.h
  function namespace (line 8) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/convert.h
  function namespace (line 8) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/cp_async.h
  function namespace (line 13) | namespace turbomind {
  function __device__ (line 178) | __device__ static void apply(int smem_ptr, const void* __restrict__ src,...
  function __device__ (line 184) | __device__ static void apply(int smem_ptr, const void* __restrict__ src,...
  function __device__ (line 196) | __device__ static void apply(int smem_ptr, const void* __restrict__ src,...
  function __device__ (line 202) | __device__ static void apply(int smem_ptr, const void* __restrict__ src,...

FILE: src/turbomind/kernels/gemm/cta_map.h
  function namespace (line 9) | namespace turbomind::gemm {
  function TM_HOST_DEVICE (line 38) | TM_HOST_DEVICE static int get_log_tile(int2 tiled_mn, int N)
  function TM_HOST_DEVICE (line 43) | TM_HOST_DEVICE static dim3 get_grid_shape(int3 tiled_shape, int log_tile)
  function TM_DEVICE (line 51) | TM_DEVICE static int3 get_tile_offset(int log_tile)
  function TM_HOST_DEVICE (line 90) | TM_HOST_DEVICE static int get_log_tile(int2 tiled_mn, int tile_size)
  function TM_HOST_DEVICE (line 111) | TM_HOST_DEVICE std::true_type init(int block_idx_x, int block_idx_y, int...
  function TM_HOST_DEVICE (line 194) | TM_HOST_DEVICE static int get_log_tile(int2 tiled_mn, int tile_size)
  function TM_HOST_DEVICE (line 199) | TM_HOST_DEVICE dim3 get_grid_shape()

FILE: src/turbomind/kernels/gemm/desc.h
  function namespace (line 12) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/dispatch_cache.h
  function namespace (line 9) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/epilogue.h
  function namespace (line 16) | namespace turbomind::gemm {
  type MatrixCombination_v3 (line 108) | struct MatrixCombination_v3 {
  function apply (line 161) | static void apply(Array<T, N>& x)
  type Silu (line 171) | struct Silu {
  type EpilogueParam (line 178) | struct EpilogueParam {

FILE: src/turbomind/kernels/gemm/format.h
  function namespace (line 7) | namespace turbomind::gemm {
  type Converter (line 23) | struct Converter

FILE: src/turbomind/kernels/gemm/gemm.h
  function namespace (line 12) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/gemm_universal.h
  type GemmParam (line 22) | struct GemmParam {
  function __device__ (line 30) | __device__ MatrixData resolve_op(const MatrixParam& param, int gemm_id)
  type SharedStorage (line 77) | struct SharedStorage {
  function __device__ (line 95) | __device__ void operator()(const Param& param, const EpilogueParam& epi_...
  function typename (line 134) | typename OperandB::GmemIter gmem_B{mat_B, {offset_n, offset_k}, {extent_...
  function typename (line 140) | typename OperandV::GmemIter gmem_V{mat_V, offset_V, extent_V};
  function gemm_kernel (line 172) | void gemm_kernel(Param param, EpilogueParam epi_param, Scheduler sched)

FILE: src/turbomind/kernels/gemm/gemm_universal_sm90.h
  function namespace (line 28) | namespace turbomind::gemm {
  function wgmma_impl (line 84) | void
  function wgmma (line 91) | void wgmma(uint64_t desc_a, uint64_t desc_b, float (&frag_C)[N], bool cl...
  function typename (line 121) | static constexpr typename cute::MMA_Traits<MMA_Atom>::Shape_MNK MMA_Shape{}
  function __device__ (line 193) | __device__ void operator()(const CUtensorMap& tm_a,
  function scale_U (line 395) | float scale_U{}

FILE: src/turbomind/kernels/gemm/gemm_universal_sm90_v2.h
  function namespace (line 35) | namespace turbomind::gemm {
  function wgmma_impl (line 96) | void
  function wgmma (line 103) | void wgmma(uint64_t desc_a, uint64_t desc_b, float (&frag_C)[N], bool cl...
  function namespace (line 146) | namespace arch {
  function __device__ (line 294) | __device__ void operator()(const CUtensorMap& tm_a,
  function pred_V (line 500) | uint32_t pred_V{}
  function iter_V (line 501) | int      iter_V{}
  function scale_accum (line 541) | auto scale_accum = [&](int m) {  // cta_n = mma_iter_n * wg_n * mma_atom_n
  function gmma (line 567) | auto gmma = [&](int m) {

FILE: src/turbomind/kernels/gemm/gemm_universal_sm90_v3.h
  function namespace (line 39) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/gemm_universal_sm90_v4.h
  function namespace (line 37) | namespace turbomind::gemm {
  function wgmma_impl (line 98) | void
  function wgmma (line 105) | void wgmma(uint64_t desc_a, uint64_t desc_b, float (&frag_C)[N], bool cl...
  function namespace (line 148) | namespace arch {
  function __device__ (line 305) | __device__ void operator()(const CUtensorMap& tm_a,
  function epi_barrier (line 445) | auto epi_barrier = [&](int phase) {  // 0, 1
  function pred_V (line 518) | uint32_t pred_V{}
  function iter_V (line 519) | int      iter_V{}
  function scale_accum (line 568) | auto scale_accum = [&](int m) {  // cta_n = mma_iter_n * wg_n * mma_atom_n
  function gmma (line 594) | auto gmma = [&](int m) {
  type EmptyBarrier (line 744) | struct EmptyBarrier {

FILE: src/turbomind/kernels/gemm/gemm_universal_sm90_v5.h
  function namespace (line 41) | namespace turbomind::gemm {
  function __device__ (line 139) | __device__ void operator()(const CUtensorMap& tm_a,
  function __device__ (line 665) | __device__ auto Fetch_V(const MatrixParam&        param_V,

FILE: src/turbomind/kernels/gemm/gpu_metric.h
  function namespace (line 7) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/iterator.h
  function namespace (line 13) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/iterator_sm70.h
  function namespace (line 17) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/iterator_sm80.h
  function namespace (line 18) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/iterator_sm90.h
  function namespace (line 6) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/kernel.h
  function namespace (line 16) | namespace turbomind::gemm {
  type ClusteringParam (line 113) | struct ClusteringParam {

FILE: src/turbomind/kernels/gemm/kernel_impl.h
  function transpose (line 138) | auto transpose = [](MatrixLayout x) {
  function GemmParam (line 198) | GemmParam param{
  function GetMaxSplits (line 233) | int GetMaxSplits(const int4& shape, int swizzle, size_t bsize, size_t ps...

FILE: src/turbomind/kernels/gemm/kernel_impl_sm90.h
  function __launch_bounds__ (line 38) | void __launch_bounds__(Kernel::CTA_SIZE, 1) gemm_kernel_name(const __gri...
  function Launch (line 148) | int Launch(const Operation&    operation,

FILE: src/turbomind/kernels/gemm/mainloop_sm70.h
  function namespace (line 17) | namespace turbomind::gemm {
  function Binding (line 238) | Binding gmem_iters{gmem_A, gmem_B, gmem_U, gmem_V};

FILE: src/turbomind/kernels/gemm/mainloop_sm80_v2.h
  function __device__ (line 26) | __device__ void Advance()
  function operator (line 31) | operator bool()
  type GroupIter (line 38) | struct GroupIter
  function __device__ (line 39) | __device__ void               Advance() {}
  function operator (line 40) | operator bool()
  function __device__ (line 52) | __device__ SmemIter(Pointer base): base_{base}, pointer{base}, pipe_iter...
  function Binding (line 258) | Binding gmem_iters{gmem_A, gmem_B, gmem_U, gmem_V};
  function SmemCopyA (line 309) | SmemCopyA smem_copy_A{{offset_m, offset_k}}
  function SmemCopyU (line 310) | SmemCopyU smem_copy_U{{offset_m, offset_k}}
  function SmemCopyB (line 311) | SmemCopyB smem_copy_B{{offset_n, offset_k}}
  function SmemCopyV (line 312) | SmemCopyV smem_copy_V{{offset_n, offset_k}}
  function preload (line 314) | auto preload = [&](int k) {

FILE: src/turbomind/kernels/gemm/matrix_ptr.h
  type __align__ (line 9) | struct __align__
  type MatrixParam (line 15) | struct MatrixParam {
  type MatrixData (line 22) | struct MatrixData {
  function MatrixParam (line 27) | inline MatrixParam to_param(void* ptr, MatrixLayout layout)
  function StridedPtr (line 40) | StridedPtr ptr{param.ptr, param.stride};
  function else (line 46) | else if constexpr (mode == Striding::kIndexed) {
  function __device__ (line 62) | __device__ MatrixData resolve(const MatrixParam& param, int g)

FILE: src/turbomind/kernels/gemm/moe_utils_v2.h
  function namespace (line 10) | namespace turbomind {

FILE: src/turbomind/kernels/gemm/operand.h
  type GetSmemLayout (line 20) | struct GetSmemLayout {
  type GetGmemIter (line 29) | struct GetGmemIter {

FILE: src/turbomind/kernels/gemm/predicate.h
  function namespace (line 8) | namespace turbomind::gemm {
  function __device__ (line 47) | __device__ void set(int, int) {}
  function __device__ (line 49) | __device__ void clear()

FILE: src/turbomind/kernels/gemm/registry.h
  function namespace (line 8) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/scaled_gmma_fp8_sm90.h
  function namespace (line 13) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/simt.h
  function namespace (line 5) | namespace turbomind::gemm::simt {

FILE: src/turbomind/kernels/gemm/sm90_utils.h
  function namespace (line 15) | namespace turbomind::gemm {
  function wgmma_impl (line 76) | void
  function wgmma (line 83) | void wgmma(uint64_t desc_a, uint64_t desc_b, float (&frag_C)[N], bool cl...
  function namespace (line 126) | namespace arch {

FILE: src/turbomind/kernels/gemm/smem_copy.h
  function namespace (line 14) | namespace turbomind::gemm {
  function __device__ (line 54) | __device__ SmemAccessorV2(get_pointer_type<T> ptr): base_{ptr}
  function __device__ (line 55) | __device__ T& operator()(int m, int k)
  function __device__ (line 70) | __device__ static int2 get_offset(int thread_idx)  // -> (m, k)
  type SmemCopyAtom_Pack_v3 (line 90) | struct SmemCopyAtom_Pack_v3 {
  type SmemCopy (line 116) | struct SmemCopy {
  function Accessor (line 172) | Accessor   smem{src_ptr};

FILE: src/turbomind/kernels/gemm/test/quantization.h
  function namespace (line 9) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/test/quantization_impl.h
  function namespace (line 15) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/test/reference.h
  function namespace (line 9) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/test/test_gemm_v2.cc
  type TestParameter (line 10) | struct TestParameter: Testbed_v3::Parameter {
    method TestParameter (line 11) | TestParameter(DataType dtype, DataType wtype, DataType itype, int grou...
  function main (line 21) | int main()

FILE: src/turbomind/kernels/gemm/test/test_utils.h
  function namespace (line 14) | namespace turbomind {

FILE: src/turbomind/kernels/gemm/test/testbed_v3.h
  type Parameter (line 29) | struct Parameter {
  function invoke (line 57) | auto invoke = [&](auto t) {
  type Testbed_v3 (line 80) | struct Testbed_v3
  function Route (line 156) | void Route()
  function GenerateWeight (line 226) | void GenerateWeight()
  function GenerateWeight (line 240) | void GenerateWeight(DenseWeight& original, DenseWeight& quant, DenseWeig...
  function GetReference (line 290) | void GetReference()
  function MatrixLayout (line 310) | const MatrixLayout desc_D{d.dtype(), kRowMajor, (int)d.shape(0), (int)d....
  function Run (line 346) | void Run()

FILE: src/turbomind/kernels/gemm/thread_group_map.h
  function namespace (line 12) | namespace turbomind::gemm {
  function __device__ (line 87) | __device__ static int3 get_offset(int group_id)
  function Print_ (line 101) | Print_(TMap)

FILE: src/turbomind/kernels/gemm/thread_map.h
  function namespace (line 13) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/tiled_mma.h
  function namespace (line 17) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/tma.h
  function namespace (line 9) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/transform.h
  function namespace (line 13) | namespace turbomind::gemm {
  type Transform_HMMA_SIMT_B (line 108) | struct Transform_HMMA_SIMT_B {

FILE: src/turbomind/kernels/gemm/tuner/cache_utils.h
  function namespace (line 7) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/tuner/measurer.h
  function namespace (line 10) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/tuner/params.cc
  type turbomind::gemm (line 9) | namespace turbomind::gemm {
    function ParseTuningParams (line 11) | void ParseTuningParams(TuningParams& params, const std::string& str)
    function ParseTuningSequence (line 37) | std::vector<int> ParseTuningSequence(const std::string& str)
    function GenerateTuningSequence (line 66) | std::vector<int> GenerateTuningSequence(const std::vector<std::array<i...
    function GetDefaultTuningGenerators (line 98) | std::vector<std::array<int, 3>> GetDefaultTuningGenerators()

FILE: src/turbomind/kernels/gemm/tuner/sampler.h
  function namespace (line 10) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/tuner/stats.h
  function namespace (line 5) | namespace turbomind::gemm {
  function add_sample (line 31) | void add_sample(float x) noexcept

FILE: src/turbomind/kernels/gemm/tuner/stopping_criterion.cc
  type turbomind::gemm (line 6) | namespace turbomind::gemm {
    type stopping_criterions (line 8) | namespace stopping_criterions {
      class Optimistic (line 10) | class Optimistic: public StoppingCriterion {
        method Optimistic (line 12) | Optimistic(int min_iter, int max_iter, float max_ms)
        method should_stop (line 18) | bool should_stop(const Stats& stats) override
    function CreateStoppingCriterion (line 31) | std::unique_ptr<StoppingCriterion> CreateStoppingCriterion(int min_ite...

FILE: src/turbomind/kernels/gemm/tuner/stopping_criterion.h
  function namespace (line 6) | namespace turbomind::gemm {

FILE: src/turbomind/kernels/gemm/types.h
  function Order (line 15) | enum class Order : int
  type MMA_Tag (line 42) | typedef enum MMA_Tag
  type Op_Tag (line 50) | typedef enum Op_Tag
  function MMA_Tag (line 60) | constexpr MMA_Tag get_mma_tag(Pack pack)
  function Op_Tag (line 65) | constexpr Op_Tag get_operand_tag(Pack pack)
  function get_pack_num (line 70) | constexpr int get_pack_num(Pack pack)
  type class (line 75) | enum class
  type class (line 99) | enum class
  type class (line 124) | enum class
  type QuantDesc (line 131) | struct QuantDesc {
  function std (line 141) | inline std::string to_string(QuantDesc desc)
  function DispatchPolicy (line 151) | enum class DispatchPolicy : int

FILE: src/turbomind/kernels/gemm/utils.h
  function namespace (line 8) | namespace turbomind::gemm {
  function int2 (line 115) | static constexpr int2 apply(int2 mk)

FILE: src/turbomind/kernels/gpt_kernels.h
  function namespace (line 26) | namespace turbomind {

FILE: src/turbomind/kernels/logprob_kernels.h
  function namespace (line 19) | namespace turbomind {

FILE: src/turbomind/kernels/norm/rms_norm.h
  function namespace (line 7) | namespace turbomind {

FILE: src/turbomind/kernels/penalty_types.h
  function namespace (line 24) | namespace turbomind {

FILE: src/turbomind/kernels/quantization.h
  function namespace (line 3) | namespace turbomind {

FILE: src/turbomind/kernels/sampling_kernels.h
  function namespace (line 24) | namespace turbomind {

FILE: src/turbomind/kernels/sampling_penalty_kernels.h
  function namespace (line 24) | namespace turbomind {

FILE: src/turbomind/kernels/sampling_topk_kernels.h
  function namespace (line 21) | namespace turbomind {

FILE: src/turbomind/kernels/sampling_topp_kernels.h
  function namespace (line 20) | namespace turbomind {

FILE: src/turbomind/kernels/stop_criteria_kernels.h
  function namespace (line 22) | namespace turbomind {

FILE: src/turbomind/kernels/test_quantization.cc
  function main (line 13) | int main()

FILE: src/turbomind/kernels/unfused_attention_kernels.h
  function namespace (line 18) | namespace turbomind {

FILE: src/turbomind/macro.h
  type uint (line 9) | typedef unsigned int uint;

FILE: src/turbomind/models/input_processor.cc
  type turbomind (line 11) | namespace turbomind {
    type InputProcessor::Impl (line 15) | struct InputProcessor::Impl {
      method Impl (line 17) | Impl(const EngineParam& engine, const ModelParam& model, int phases):
      method Add (line 38) | int Add(RequestCache& c)
      method Add (line 106) | void Add(int phase, TensorMap& env)
      method Setup (line 117) | void Setup(int phase, TensorMap& env)
      method Prepare (line 179) | void Prepare(int phase, TensorMap& env)
      method PatchEmbedding (line 204) | void PatchEmbedding(int phase, Tensor& embeds, BatchCopy& copy)
      type Data (line 217) | struct Data {

FILE: src/turbomind/models/input_processor.h
  function namespace (line 6) | namespace turbomind {

FILE: src/turbomind/models/language_model.cc
  type turbomind (line 29) | namespace turbomind {
    type LanguageModel::Impl (line 35) | struct LanguageModel::Impl {
      type Data (line 67) | struct Data {
      method Run (line 84) | void Run(BatchOp op, int phase, TensorMap& env)
    function Tensor (line 201) | Tensor LanguageModel::Impl::LookupEmbedding(const Buffer_<int>& input_...
    function Tensor (line 270) | Tensor LanguageModel::Impl::PostEmbedding(const Tensor& features, Buff...
    function ModelParam (line 511) | const ModelParam& LanguageModel::model_param() const noexcept
    function AttentionParam (line 516) | const AttentionParam& LanguageModel::attn_param() const noexcept

FILE: src/turbomind/models/language_model.h
  function namespace (line 10) | namespace turbomind {

FILE: src/turbomind/models/llama/Barrier.h
  function namespace (line 11) | namespace turbomind {

FILE: src/turbomind/models/llama/BlockManager.cc
  type turbomind (line 10) | namespace turbomind {
    function Snapshot (line 251) | Snapshot BlockManager::TakeSnapshot()

FILE: src/turbomind/models/llama/BlockManager.h
  function namespace (line 21) | namespace turbomind {

FILE: src/turbomind/models/llama/BlockTrie.cc
  type turbomind (line 6) | namespace turbomind {
    function hash (line 8) | size_t hash(const std::vector<int>& vec)

FILE: src/turbomind/models/llama/BlockTrie.h
  function namespace (line 10) | namespace turbomind {

FILE: src/turbomind/models/llama/GatedDeltaNetLayer.cc
  type turbomind (line 10) | namespace turbomind {
    function linear_layer_index (line 136) | static int linear_layer_index(int layer_id, const std::vector<int>& la...

FILE: src/turbomind/models/llama/GatedDeltaNetLayer.h
  function namespace (line 10) | namespace turbomind {

FILE: src/turbomind/models/llama/GatedDeltaNetWeight.cc
  type turbomind (line 5) | namespace turbomind {
    function concat_weights_4 (line 73) | static void

FILE: src/turbomind/models/llama/GatedDeltaNetWeight.h
  function namespace (line 7) | namespace turbomind {

FILE: src/turbomind/models/llama/LlamaDecoderLayerWeight.cc
  type turbomind (line 34) | namespace turbomind {
    function is_fuse_silu_act (line 36) | static bool is_fuse_silu_act()

FILE: src/turbomind/models/llama/LlamaDecoderLayerWeight.h
  function namespace (line 29) | namespace turbomind {

FILE: src/turbomind/models/llama/LlamaDenseWeight.cc
  type turbomind (line 19) | namespace turbomind {
    function Convert (line 89) | static void Convert(LlamaDenseWeight& dense, bool is_grouped, cudaStre...
    function ConvertBlockscaleFP8Native (line 221) | static void ConvertBlockscaleFP8Native(LlamaDenseWeight& dense, cudaSt...
    function Interleave (line 381) | static void Interleave(const Tensor& a, const Tensor& b, Tensor& c, cu...
    function interleave (line 420) | void interleave(LlamaDenseWeight& c, LlamaDenseWeight& a, LlamaDenseWe...
    function Chunk (line 446) | static void Chunk(const Tensor& a, const Tensor& b, Tensor& c, cudaStr...
    function chunk (line 481) | void chunk(LlamaDenseWeight& c, LlamaDenseWeight& a, LlamaDenseWeight&...
    function LinkExperts (line 630) | void LinkExperts(std::function<LlamaDenseWeight*(int)> experts, int n,...

FILE: src/turbomind/models/llama/LlamaDenseWeight.h
  function namespace (line 30) | namespace turbomind {
  type LlamaAttentionWeight (line 85) | struct LlamaAttentionWeight
  function window_size (line 121) | int window_size{}
  function Module (line 124) | struct LlamaFfnWeight: core::Module {
  function Module (line 156) | struct MoeFfnWeight: core::Module {

FILE: src/turbomind/models/llama/LlamaFfnLayer.cc
  type turbomind (line 25) | namespace turbomind {

FILE: src/turbomind/models/llama/LlamaFfnLayer.h
  function namespace (line 28) | namespace turbomind {

FILE: src/turbomind/models/llama/LlamaLinear.h
  function namespace (line 11) | namespace turbomind {

FILE: src/turbomind/models/llama/LlamaWeight.cc
  type turbomind (line 30) | namespace turbomind {

FILE: src/turbomind/models/llama/LlamaWeight.h
  function namespace (line 30) | namespace turbomind {

FILE: src/turbomind/models/llama/SequenceManager.cc
  type turbomind (line 15) | namespace turbomind {
    function vector2string (line 18) | std::string vector2string(const std::vector<T>& data)
    function Sequence (line 142) | const Sequence* SequenceManager::Create(uint64_t id)
    function Sequence (line 159) | const Sequence* SequenceManager::Get(uint64_t id)
    type Schedule (line 366) | struct Schedule {
      method Schedule (line 384) | Schedule(Snapshot snapshot, int size, int max_fwd_tokens, int max_tm...
      method Unlock (line 396) | int Unlock(const Sequences& seqs, int vidx)
    type Transaction (line 434) | struct Transaction {
      method Transaction (line 449) | explicit Transaction(
      method Process (line 460) | void Process()
      method Commit (line 496) | void Commit()
    function SortByKey (line 533) | static void SortByKey(const std::vector<Key>& keys, std::vector<Ts>&.....

FILE: src/turbomind/models/llama/SequenceManager.h
  function namespace (line 15) | namespace turbomind {
  type Outcome (line 124) | struct Outcome {

FILE: src/turbomind/models/llama/bench_conv1d_silu.cc
  type Args (line 18) | struct Args {
    method DataType (line 28) | static DataType ParseDtype(const char* s)
    method Args (line 38) | static Args Parse(int argc, char** argv)
    method Print (line 70) | void Print() const
  function benchmark_kernel (line 85) | static float
  function cpu_conv1d_silu (line 126) | static void cpu_conv1d_silu(T*         h_out,
  function main (line 170) | int main(int argc, char** argv)

FILE: src/turbomind/models/llama/bench_gated_delta_net.cc
  type Args (line 15) | struct Args {
    method DataType (line 25) | static DataType ParseDtype(const char* s)
    method Args (line 37) | static Args Parse(int argc, char** argv)
    method Print (line 69) | void Print() const
  function benchmark_kernel (line 84) | static float
  function main (line 112) | int main(int argc, char** argv)

FILE: src/turbomind/models/llama/context.h
  function namespace (line 17) | namespace turbomind {

FILE: src/turbomind/models/llama/gated_delta_net_kernels.h
  function namespace (line 9) | namespace turbomind {

FILE: src/turbomind/models/llama/llama_kernels.h
  function namespace (line 10) | namespace turbomind {

FILE: src/turbomind/models/llama/llama_params.h
  function namespace (line 15) | namespace turbomind {
  function HasLinearAttention (line 81) | inline bool HasLinearAttention(const ModelParam& model_param)
  type MoeParam (line 92) | struct MoeParam {
  type AttentionParam (line 116) | struct AttentionParam {
  type EngineParam (line 126) | struct EngineParam {

FILE: src/turbomind/models/llama/llama_rope.h
  function namespace (line 11) | namespace turbomind {
  type YarnRopeParam (line 35) | struct YarnRopeParam {
  type Llama3RopeParam (line 41) | struct Llama3RopeParam {
  type MropeRopeParam (line 47) | struct MropeRopeParam {
  type RopeParam (line 51) | struct RopeParam {
  type YarnRopeKernelParam (line 66) | struct YarnRopeKernelParam {
  type Llama3RopeKernelParam (line 73) | struct Llama3RopeKernelParam {
  type MropeRopeKernelParam (line 79) | struct MropeRopeKernelParam {
  type RopeKernelParam (line 88) | struct RopeKernelParam {
  function init_rope_kernel_param (line 101) | inline void init_rope_kernel_param(const RopeParam& rope, RopeKernelPara...
  function else (line 140) | else if (rope.type == RopeType::kLlama3) {
  function else (line 150) | else if (rope.type == RopeType::kMrope) {

FILE: src/turbomind/models/llama/llama_utils.h
  function namespace (line 10) | namespace turbomind {

FILE: src/turbomind/models/llama/mla_utils.h
  function namespace (line 8) | namespace turbomind {

FILE: src/turbomind/models/llama/moe_ffn_layer.cc
  type turbomind (line 20) | namespace turbomind {

FILE: src/turbomind/models/llama/moe_ffn_layer.h
  function namespace (line 11) | namespace turbomind {

FILE: src/turbomind/models/llama/unified_attention_layer.cc
  type turbomind (line 54) | namespace turbomind {
    type AttentionData (line 56) | struct AttentionData {
      type Stat (line 57) | struct Stat {
    function init_dynamic_ntk (line 183) | static void init_dynamic_ntk(RequestCache& cache, const RopeParam& rope)
    function Tensor (line 377) | Tensor UnifiedAttentionLayer::core_attention(Tensor& qkv, const Forwar...
    function Tensor (line 592) | Tensor UnifiedAttentionLayer::forward_mla(const Tensor& hidden_state, ...

FILE: src/turbomind/models/llama/unified_attention_layer.h
  function namespace (line 35) | namespace turbomind {

FILE: src/turbomind/models/llama/unified_decoder.cc
  type turbomind (line 23) | namespace turbomind {

FILE: src/turbomind/models/llama/unified_decoder.h
  function namespace (line 12) | namespace turbomind {

FILE: src/turbomind/models/output_processor.cc
  type turbomind (line 10) | namespace turbomind {
    type OutputProcessor::Impl (line 15) | struct OutputProcessor::Impl {
      method Impl (line 25) | Impl(const ModelParam&                    model,
      type Data (line 40) | struct Data {
      type Matching (line 50) | struct Matching {
      method Add (line 69) | void Add(int phase, TensorMap& env)
      method Setup (line 90) | void Setup(int phase, TensorMap& env)
      method Prepare (line 159) | void Prepare(int phase, TensorMap& env)
      method OutputHiddenStates (line 168) | void OutputHiddenStates(const Ranges& ranges, const Tensor& h, int t...
      method ComputeAndOutputLogits (line 181) | void ComputeAndOutputLogits(const Data& data, const Tensor& h, const...
      method OutputLogits (line 210) | void OutputLogits(Ranges& ranges_, const Tensor& l, int type, const ...
      method OutputLogitsImpl (line 220) | bool OutputLogitsImpl(
      method OutputHiddenStatesAndLogits (line 260) | void OutputHiddenStatesAndLogits(int phase, TensorMap& env, int type)

FILE: src/turbomind/models/output_processor.h
  function namespace (line 6) | namespace turbomind {

FILE: src/turbomind/python/bind.cpp
  function DLDevice (line 36) | DLDevice getDLDevice(const Tensor& tensor)
  function DLManagedTensor (line 64) | DLManagedTensor* TritonTensorToDLManagedTensor(Tensor& tensor)
  function getMemoryType (line 142) | ft::DeviceType getMemoryType(DLDevice device)
  function getDataType (line 155) | ft::DataType getDataType(DLDataType data_type)
  function DLManagedTensorToTritonTensor (line 214) | std::shared_ptr<Tensor> DLManagedTensorToTritonTensor(DLManagedTensor* t...
  function safe_memcpy (line 230) | static void safe_memcpy(void* dst, const void* src, size_t size)
  type ScopedGIL (line 277) | struct ScopedGIL {
    method ScopedGIL (line 278) | ScopedGIL(const ScopedGIL&) = delete;
    method ScopedGIL (line 279) | ScopedGIL& operator=(const ScopedGIL&) = delete;
    method ScopedGIL (line 280) | ScopedGIL(ScopedGIL&&)                 = delete;
    method ScopedGIL (line 281) | ScopedGIL& operator=(ScopedGIL&&) = delete;
    method ScopedGIL (line 282) | ScopedGIL()
  function PYBIND11_MODULE (line 295) | PYBIND11_MODULE(_turbomind, m)

FILE: src/turbomind/python/dlpack.h
  type DLPackVersion (line 61) | typedef struct {
  type DLDeviceType (line 75) | typedef enum
  type DLDevice (line 126) | typedef struct {
  type DLDataTypeCode (line 139) | typedef enum
  type DLDataType (line 176) | typedef struct {
  type DLTensor (line 194) | typedef struct {
  type DLManagedTensor (line 253) | typedef struct DLManagedTensor {
  type DLManagedTensorVersioned (line 284) | struct DLManagedTensorVersioned {

FILE: src/turbomind/python/xgrammar_bind.cpp
  function CommonEncodedVocabType (line 27) | static const std::vector<std::string>
  function TokenizerInfo (line 46) | TokenizerInfo TokenizerInfo_Init(const std::vector<std::string>&     enc...
  function TokenizerInfo_GetVocabType (line 57) | int TokenizerInfo_GetVocabType(const TokenizerInfo& tokenizer)
  function TokenizerInfo_GetDecodedVocab (line 62) | std::vector<py::bytes> TokenizerInfo_GetDecodedVocab(const TokenizerInfo...
  function PYBIND11_MODULE (line 75) | PYBIND11_MODULE(_xgrammar, m)

FILE: src/turbomind/turbomind.cc
  type turbomind (line 35) | namespace turbomind {
    function get_moe_method (line 42) | static std::optional<MoeParam::Method> get_moe_method()
    function parse_default_rope_param (line 67) | static void parse_default_rope_param(const YAML::Node& node, RopeParam...
    function parse_linear_rope_param (line 77) | static void parse_linear_rope_param(const YAML::Node& node, RopeParam&...
    function parse_dynamic_rope_param (line 83) | static void parse_dynamic_rope_param(const YAML::Node& node, RopeParam...
    function parse_yarn_rope_param (line 89) | static void parse_yarn_rope_param(const YAML::Node& node, RopeParam& p...
    function parse_llama3_rope_param (line 97) | static void parse_llama3_rope_param(const YAML::Node& node, RopeParam&...
    function parse_mrope_rope_param (line 105) | static void parse_mrope_rope_param(const YAML::Node& node, RopeParam& ...
    function parse_rope_param (line 113) | static void parse_rope_param(const YAML::Node& node, RopeParam& rope)
    function DataType (line 142) | static DataType data_type_from_string(std::string str)
    type TurboMind::Impl (line 169) | struct TurboMind::Impl {
      method CreateRequest (line 207) | unique_ptr<ModelRequest> CreateRequest()
      method CreateWeights (line 216) | void CreateWeights(int index)
      method TensorMap (line 228) | TensorMap GetWeights(int index)
      method ProcessWeights (line 238) | void ProcessWeights(int index)
      method Sleep (line 256) | void Sleep(int index, int level)
      method WakeUp (line 282) | void WakeUp(int index, const std::vector<std::string>& tags)
      method HandleMissingParams (line 308) | void HandleMissingParams()
    function Join (line 644) | static std::string Join(Iter first, Iter last, const std::string& delim)
    type Channel (line 728) | struct Channel {
    function TensorMap (line 791) | TensorMap TurboMind::GetWeights(int index)

FILE: src/turbomind/turbomind.h
  function namespace (line 13) | namespace turbomind {

FILE: src/turbomind/utils/anomaly_handler.h
  function namespace (line 15) | namespace turbomind {

FILE: src/turbomind/utils/constant.h
  function namespace (line 5) | namespace turbomind {

FILE: src/turbomind/utils/cuda_utils.cc
  type turbomind (line 22) | namespace turbomind {
    function syncAndCheck (line 24) | void syncAndCheck(const char* const file, int const line)
    function printMatrix (line 47) | void printMatrix(T* ptr, int m, int k, int stride, bool is_device_ptr)
    function printMatrix (line 89) | void printMatrix(unsigned long long* ptr, int m, int k, int stride, bo...
    function printMatrix (line 126) | void printMatrix(int* ptr, int m, int k, int stride, bool is_device_ptr)
    function printMatrix (line 165) | void printMatrix(size_t* ptr, int m, int k, int stride, bool is_device...
    function check_max_val (line 204) | void check_max_val(const T* result, const int size)
    function check_abs_mean_val (line 226) | void check_abs_mean_val(const T* result, const int size)
    function getSMVersion (line 246) | int getSMVersion()
    function getSMCount (line 257) | int getSMCount()
    function getDeviceName (line 266) | std::string getDeviceName()
    function getDevice (line 275) | int getDevice()
    function getDeviceCount (line 282) | int getDeviceCount()
    function trim_default_mempool (line 289) | void trim_default_mempool(int device_id)

FILE: src/turbomind/utils/cuda_utils.h
  function namespace (line 39) | namespace turbomind {

FILE: src/turbomind/utils/logger.cc
  type turbomind (line 20) | namespace turbomind {
    function Logger (line 22) | Logger& Logger::getLogger()

FILE: src/turbomind/utils/logger.h
  function namespace (line 25) | namespace turbomind {
  function setLevel (line 71) | void setLevel(const Level level)
  function std (line 96) | inline const std::string getLevelName(const Level level)
  function std (line 101) | inline const std::string getPrefix(const Level level)
  function std (line 106) | inline const std::string getPrefix(const Level level, const int rank)

FILE: src/turbomind/utils/memory_utils.h
  function namespace (line 21) | namespace turbomind {

FILE: src/turbomind/utils/metrics.h
  function namespace (line 8) | namespace turbomind {

FILE: src/turbomind/utils/monotonic.h
  function namespace (line 7) | namespace turbomind {

FILE: src/turbomind/utils/nvtx_utils.cc
  type ft_nvtx (line 24) | namespace ft_nvtx {
    function getScope (line 25) | std::string getScope()
    function addScope (line 29) | void addScope(std::string name)
    function setScope (line 34) | void setScope(std::string name)
    function resetScope (line 39) | void resetScope()
    function setDeviceDomain (line 44) | void setDeviceDomain(int deviceId)
    function resetDeviceDomain (line 49) | void resetDeviceDomain()
    function getDeviceDomain (line 54) | int getDeviceDomain()
    function isEnableNvtx (line 59) | bool isEnableNvtx()
    function ftNvtxRangePush (line 69) | void ftNvtxRangePush(std::string name)
    function ftNvtxRangePop (line 82) | void ftNvtxRangePop()

FILE: src/turbomind/utils/nvtx_utils.h
  function namespace (line 19) | namespace ft_nvtx {

FILE: src/turbomind/utils/parser.cc
  type turbomind (line 8) | namespace turbomind {
    function ParseArgsList (line 10) | std::vector<std::pair<std::string, std::string>> ParseArgsList(const s...
    function ParseListOrTuple (line 26) | std::vector<std::string> ParseListOrTuple(const std::string& str)

FILE: src/turbomind/utils/parser.h
  function namespace (line 4) | namespace turbomind {

FILE: src/turbomind/utils/string_utils.h
  function namespace (line 24) | namespace turbomind {

FILE: src/turbomind/utils/test_utils.h
  function namespace (line 23) | namespace turbomind {

FILE: tests/csrc/unittests/gtest_utils.h
  function initRandomInt (line 116) | void initRandomInt(int* ptr, size_t size, int minval, int maxval) {
  function namespace (line 143) | namespace math {
  type testing (line 165) | typedef testing::Types<float, half, __nv_bfloat16> SamplingTypes;
  type testing (line 167) | typedef testing::Types<float, half> SamplingTypes;
  type testing (line 171) | typedef testing::Types<half, __nv_bfloat16>        SamplingTypes;
  type testing (line 173) | typedef testing::Types<half> SamplingTypes;
  type testing (line 177) | typedef testing::Types<float> FloatType;
  type testing (line 178) | typedef testing::Types<float, half> FloatAndHalfTypes;
  type FloatAndHalfTypes (line 180) | typedef FloatAndHalfTypes SupportTypes;
  type testing (line 182) | typedef testing::Types<float, half, __nv_bfloat16> FloatHalfBf16Types;
  type FloatHalfBf16Types (line 183) | typedef FloatHalfBf16Types SupportTypes;
  function class (line 186) | class FtTestBase: public testing::Test {

FILE: tests/csrc/unittests/unittest_utils.h
  function class (line 39) | class TestFailureError: public std::exception {
  function initRandomInt (line 167) | void initRandomInt(int* ptr, size_t size, int minval, int maxval)
  function printMatrixWithLimit (line 205) | void printMatrixWithLimit(T* ptr, int m, int k, int stride, bool is_devi...

FILE: tests/pytorch/config/test_hf_overrides.py
  class TestHFOverrides (line 4) | class TestHFOverrides:
    method hf_config (line 7) | def hf_config(self):
    method test_hf_overrides (line 11) | def test_hf_overrides(self, hf_config):

FILE: tests/pytorch/engine/test_logits_process.py
  function test_process_temperature (line 9) | def test_process_temperature():
  function test_process_bad_words (line 27) | def test_process_bad_words():
  function test_processrepetition_penalty (line 52) | def test_processrepetition_penalty():
  function test_filter_topk_sorted (line 75) | def test_filter_topk_sorted():
  function test_filter_topp_sorted (line 93) | def test_filter_topp_sorted():
  function test_filter_minp_sorted (line 111) | def test_filter_minp_sorted():
  function test_filter_ngram (line 129) | def test_filter_ngram():

FILE: tests/pytorch/engine/test_request.py
  class TestRequestHander (line 11) | class TestRequestHander:
    method event_loop (line 14) | def event_loop(self):
    method manager (line 25) | def manager(self):
    method test_bind (line 28) | def test_bind(self, manager, event_loop):

FILE: tests/pytorch/engine/test_zmq_rpc.py
  class TestZMQRPC (line 5) | class TestZMQRPC:
    method sub_proc (line 7) | def sub_proc(self, shared_dict=None, condition=None):
    method async_main (line 38) | async def async_main(self, port):
    method test_zmq_rpc (line 58) | def test_zmq_rpc(self):

FILE: tests/pytorch/kernel/test_activation.py
  class TestSiluAndMul (line 5) | class TestSiluAndMul:
    method seqlen (line 8) | def seqlen(self, request):
    method feat_size (line 12) | def feat_size(self, request):
    method x (line 16) | def x(self, seqlen, feat_size):
    method gt (line 20) | def gt(self, x):
    method test_silu_and_mul (line 27) | def test_silu_and_mul(self, x, gt):
  class TestSiluAndMulMoEEP (line 34) | class TestSiluAndMulMoEEP:
    method num_experts (line 37) | def num_experts(self, request):
    method seqlen (line 41) | def seqlen(self, request):
    method feat_size (line 45) | def feat_size(self, request):
    method dtype (line 49) | def dtype(self):
    method x (line 53) | def x(self, num_experts, seqlen, feat_size, dtype):
    method mask_m (line 57) | def mask_m(self, num_experts, seqlen):
    method elem_mask (line 62) | def elem_mask(self, mask_m, seqlen):
    method gt (line 67) | def gt(self, x):
    method test_silu_and_mul (line 75) | def test_silu_and_mul(self, x, mask_m, elem_mask, gt):

FILE: tests/pytorch/kernel/test_apply_rotary.py
  function _rotate_half (line 7) | def _rotate_half(x):
  function _bf16_mark (line 14) | def _bf16_mark():
  class TestApplyRotary (line 18) | class TestApplyRotary:
    method dtype (line 21) | def dtype(self, request):
    method batch_size (line 25) | def batch_size(self):
    method num_heads_q (line 29) | def num_heads_q(self, request):
    method num_heads_k (line 33) | def num_heads_k(self, request):
    method feature_dim (line 37) | def feature_dim(self):
    method seq_length (line 41) | def seq_length(self, batch_size):
    method max_seqlen (line 45) | def max_seqlen(self, seq_length):
    method q_states (line 49) | def q_states(self, seq_length, num_heads_q, feature_dim, dtype):
    method k_states (line 53) | def k_states(self, seq_length, num_heads_k, feature_dim, dtype):
    method position_ids_1d (line 57) | def position_ids_1d(self, seq_length, max_seqlen):
    method cached_cos (line 61) | def cached_cos(self, max_seqlen, feature_dim, dtype):
    method cached_sin (line 65) | def cached_sin(self, max_seqlen, feature_dim, dtype):
    method cos (line 69) | def cos(self, cached_cos, position_ids_1d):
    method sin (line 73) | def sin(self, cached_sin, position_ids_1d):
    method gt (line 77) | def gt(self, q_states, k_states, cos, sin, position_ids_1d):
    method test_apply_rotary (line 87) | def test_apply_rotary(self, q_states, k_states, cos, sin, gt):

FILE: tests/pytorch/kernel/test_bitonic_topk.py
  class TestBitonicTopk (line 5) | class TestBitonicTopk:
    method device (line 8) | def device(self):
    method k (line 12) | def k(self):
    method q_seqlens (line 16) | def q_seqlens(self, device):
    method kv_seqlens (line 22) | def kv_seqlens(self, device):
    method batch_size (line 28) | def batch_size(self, kv_seqlens):
    method max_kv_len (line 32) | def max_kv_len(self, kv_seqlens):
    method scores (line 36) | def scores(self, q_seqlens, max_kv_len, device):
    method gt (line 41) | def gt(self, scores, q_seqlens, kv_seqlens, k):
    method test_bitonic_topk (line 60) | def test_bitonic_topk(self, scores, q_seqlens, kv_seqlens, k, gt):

FILE: tests/pytorch/kernel/test_causal_conv1d.py
  function do_test (line 5) | def do_test():
  class TestCausalConv1dUpdate (line 17) | class TestCausalConv1dUpdate:
    method device (line 20) | def device(self):
    method batch (line 24) | def batch(self):
    method hidden_size (line 28) | def hidden_size(self):
    method width (line 32) | def width(self):
    method x (line 36) | def x(self, batch, hidden_size, device):
    method weight (line 40) | def weight(self, hidden_size, width, device):
    method conv_state (line 44) | def conv_state(self, batch, hidden_size, width, device):
    method bias (line 50) | def bias(self, hidden_size, device):
    method conv_state_indices (line 54) | def conv_state_indices(self, batch, device):
    method activation (line 59) | def activation(self, request):
    method test_causal_conv1d_update (line 62) | def test_causal_conv1d_update(self, x, conv_state, weight, bias, activ...
  class TestCausalConv1dFn (line 85) | class TestCausalConv1dFn:
    method device (line 88) | def device(self):
    method hidden_size (line 92) | def hidden_size(self):
    method seqlen (line 96) | def seqlen(self):
    method seq_idx (line 100) | def seq_idx(self, seqlen, device):
    method x (line 107) | def x(self, hidden_size, seqlen, device):
    method weight (line 111) | def weight(self, hidden_size, device):
    method bias (line 115) | def bias(self, hidden_size, device):
    method activation (line 119) | def activation(self, request):
    method test_causal_conv1d_fn (line 122) | def test_causal_conv1d_fn(self, x, weight, bias, activation, seq_idx):

FILE: tests/pytorch/kernel/test_ds_index.py
  function _make_A (line 5) | def _make_A(M, K, group_size, out_dtype, device):
  class TestDSIndex (line 28) | class TestDSIndex:
    method num_heads (line 31) | def num_heads(self):
    method head_dim (line 35) | def head_dim(self):
    method block_size (line 39) | def block_size(self):
    method device (line 43) | def device(self):
    method q_seqlens (line 47) | def q_seqlens(self, request):
    method kv_seqlens (line 51) | def kv_seqlens(self, request):
    method k_seqlens (line 55) | def k_seqlens(self, kv_seqlens, device):
    method cu_seqlen_q (line 59) | def cu_seqlen_q(self, q_seqlens, device):
    method cu_seqlen_kv (line 63) | def cu_seqlen_kv(self, kv_seqlens, device):
    method query (line 67) | def query(self, q_seqlens, num_heads, head_dim, device):
    method q (line 76) | def q(self, query):
    method q_s (line 80) | def q_s(self, query):
    method key (line 84) | def key(self, kv_seqlens, head_dim):
    method k (line 93) | def k(self, key):
    method k_s (line 97) | def k_s(self, key):
    method cache_key (line 101) | def cache_key(self, k, k_s, kv_seqlens, block_size, head_dim):
    method k_cache (line 130) | def k_cache(self, cache_key):
    method k_s_cache (line 134) | def k_s_cache(self, cache_key):
    method block_offset (line 138) | def block_offset(self, cache_key):
    method test_fp8_index (line 143) | def test_fp8_index(self, q, q_s, k_cache, k_s_cache, cu_seqlen_q, k_se...

FILE: tests/pytorch/kernel/test_fill_kv_cache.py
  function _div_up (line 5) | def _div_up(a, b):
  function quant (line 9) | def quant(kv: torch.Tensor, nbits: int = 8):
  class TestFillKVCache (line 22) | class TestFillKVCache:
    method num_heads (line 25) | def num_heads(self):
    method head_dim (line 29) | def head_dim(self):
    method block_size (line 33) | def block_size(self):
    method seq_lens (line 37) | def seq_lens(self, request):
    method history_lens (line 41) | def history_lens(self, request):
    method batch_size (line 45) | def batch_size(self, seq_lens):
    method kv_lens (line 49) | def kv_lens(self, seq_lens, history_lens):
    method max_q_seq_length (line 53) | def max_q_seq_length(self, seq_lens):
    method num_tokens (line 57) | def num_tokens(self, seq_lens):
    method num_blocks_per_input (line 61) | def num_blocks_per_input(self, kv_lens, block_size):
    method max_num_blocks (line 65) | def max_num_blocks(self, num_blocks_per_input):
    method q_seq_length (line 69) | def q_seq_length(self, seq_lens):
    method q_start_loc (line 73) | def q_start_loc(self, q_seq_length):
    method kv_seq_length (line 78) | def kv_seq_length(self, kv_lens):
    method k_states (line 82) | def k_states(self, num_tokens, num_heads, head_dim):
    method v_states (line 86) | def v_states(self, k_states):
    method k_caches (line 90) | def k_caches(self, batch_size, max_num_blocks, block_size, num_heads, ...
    method v_caches (line 95) | def v_caches(self, k_caches):
    method block_offsets (line 99) | def block_offsets(self, num_blocks_per_input):
    method gt (line 108) | def gt(self, k_states, v_states, k_caches, v_caches, seq_lens, history...
    method test_fill_kv_cache (line 144) | def test_fill_kv_cache(self, k_states, v_states, k_caches, v_caches, b...
  class TestFillKVCacheInt8 (line 154) | class TestFillKVCacheInt8(TestFillKVCache):
    method head_dim (line 157) | def head_dim(self, request):
    method k_caches (line 161) | def k_caches(self, batch_size, max_num_blocks, block_size, num_heads, ...
    method v_caches (line 166) | def v_caches(self, k_caches):
    method k_scales_zeros (line 170) | def k_scales_zeros(self, batch_size, max_num_blocks, block_size, num_h...
    method v_scales_zeros (line 175) | def v_scales_zeros(self, k_scales_zeros):
    method nbits (line 179) | def nbits(self):
    method gt (line 183) | def gt(self, k_states, v_states, k_caches, v_caches, seq_lens, history...
    method test_fill_kv_cache (line 233) | def test_fill_kv_cache(self, k_states, v_states, k_caches, v_caches, k...
  class TestFillKVCacheInt4 (line 245) | class TestFillKVCacheInt4(TestFillKVCacheInt8):
    method k_caches (line 248) | def k_caches(self, batch_size, max_num_blocks, block_size, num_heads, ...
    method nbits (line 253) | def nbits(self):
    method test_fill_kv_cache (line 262) | def test_fill_kv_cache(self, k_states, v_states, k_caches, v_caches, k...
  class TestFillKVCacheBlockedFP8 (line 277) | class TestFillKVCacheBlockedFP8(TestFillKVCache):
    method initialize (line 280) | def initialize(self):
    method scale_fmt (line 287) | def scale_fmt(self, request):
    method quant_dtype (line 291) | def quant_dtype(self):
    method num_heads (line 295) | def num_heads(self):
    method head_dim (line 299) | def head_dim(self):
    method block_size (line 303) | def block_size(self):
    method group_size (line 307) | def group_size(self):
    method cu_seqlen_q (line 311) | def cu_seqlen_q(self, q_start_loc, q_seq_length):
    method k_caches (line 318) | def k_caches(self, batch_size, max_num_blocks, block_size, num_heads, ...
    method v_caches (line 323) | def v_caches(self, k_caches):
    method ks_caches (line 327) | def ks_caches(self, batch_size, max_num_blocks, block_size, num_heads,...
    method vs_caches (line 332) | def vs_caches(self, ks_caches):
    method gt (line 336) | def gt(self, k_states, v_states, group_size, quant_dtype, scale_fmt):
    method uncache (line 354) | def uncache(self, k_caches, ks_caches, v_caches, vs_caches, cu_seqlen_...
    method test_fill_kv_cache (line 386) | def test_fill_kv_cache(self, k_states, v_states, k_caches, v_caches, k...

FILE: tests/pytorch/kernel/test_flash_attention.py
  function _conti_input (line 7) | def _conti_input(data, q_seqlens):
  function _make_bias (line 13) | def _make_bias(q_seqlens, history_lens, neg_val, causal):
  function _make_bias_alibi (line 36) | def _make_bias_alibi(q_seqlens, history_lens, neg_val, causal, alibi_slo...
  function _make_block_sparse_bias (line 59) | def _make_block_sparse_bias(q_seqlens: torch.Tensor, history_lens: torch...
  function _naive_attention (line 80) | def _naive_attention(batched_q, batched_kv, bias, sinks=None):
  function _naive_window_attention (line 117) | def _naive_window_attention(q, k, v, seqlens_q, seqlens_k, window_size):
  class TestFlashAttention (line 149) | class TestFlashAttention:
    method dtype (line 152) | def dtype(self):
    method head_dim_k (line 156) | def head_dim_k(self, request):
    method head_dim_v (line 160) | def head_dim_v(self, request):
    method num_heads_q (line 164) | def num_heads_q(self, request):
    method num_heads_k (line 168) | def num_heads_k(self, request):
    method causal (line 172) | def causal(self, request):
    method q_seqlens (line 176) | def q_seqlens(self, request):
    method cu_seqlens_q (line 180) | def cu_seqlens_q(self, q_seqlens):
    method history_lens (line 186) | def history_lens(self, request):
    method kv_seqlens (line 190) | def kv_seqlens(self, q_seqlens, history_lens):
    method cu_seqlens_k (line 194) | def cu_seqlens_k(self, kv_seqlens):
    method batched_q (line 200) | def batched_q(self, q_seqlens, num_heads_q, head_dim_k, dtype):
    method batched_kv (line 208) | def batched_kv(self, q_seqlens, history_lens, num_heads_k, head_dim_k,...
    method conti_q (line 218) | def conti_q(self, q_seqlens, batched_q):
    method conti_kv (line 222) | def conti_kv(self, kv_seqlens, batched_kv):
    method mask (line 230) | def mask(self, q_seqlens, history_lens, causal):
    method gt (line 235) | def gt(self, batched_q, batched_kv, mask):
    method conti_gt (line 239) | def conti_gt(self, gt, q_seqlens):
    method test_flash_attention (line 248) | def test_flash_attention(self, conti_q, conti_kv, q_seqlens, cu_seqlen...
    method win_size (line 263) | def win_size(self, request):
    method window_gt (line 267) | def window_gt(self, conti_q, conti_kv, q_seqlens, kv_seqlens, win_size):
    method test_window_attention (line 283) | def test_window_attention(self, conti_q, conti_kv, q_seqlens, cu_seqle...
    method sinks (line 299) | def sinks(self, num_heads_q, dtype):
    method sink_gt (line 303) | def sink_gt(self, batched_q, batched_kv, mask, sinks):
    method conti_sink_gt (line 307) | def conti_sink_gt(self, sink_gt, q_seqlens):
    method test_sinks (line 316) | def test_sinks(self, conti_q, conti_kv, q_seqlens, cu_seqlens_q, cu_se...
    method block_sparse_size (line 333) | def block_sparse_size(self):
    method block_sparse_mask (line 337) | def block_sparse_mask(self, q_seqlens, history_lens, block_sparse_size):
    method block_sparse_gt (line 342) | def block_sparse_gt(self, batched_q, batched_kv, block_sparse_mask):
    method test_block_sparse_attention (line 350) | def test_block_sparse_attention(self, conti_q, conti_kv, q_seqlens, cu...
    method alibi_slopes (line 368) | def alibi_slopes(self, num_heads_q):
    method alibi_bias (line 372) | def alibi_bias(self, q_seqlens, history_lens, causal, alibi_slopes):
    method alibi_gt (line 377) | def alibi_gt(self, batched_q, batched_kv, alibi_bias):
    method conti_alibi_gt (line 381) | def conti_alibi_gt(self, alibi_gt, q_seqlens):
    method test_alibi (line 392) | def test_alibi(self, conti_q, conti_kv, q_seqlens, cu_seqlens_q, cu_se...

FILE: tests/pytorch/kernel/test_flatten_kv_cache.py
  function _div_up (line 5) | def _div_up(a, b):
  class TestFlattenKVCache (line 9) | class TestFlattenKVCache:
    method out_dtype (line 12) | def out_dtype(self):
    method num_heads (line 16) | def num_heads(self):
    method head_dim (line 20) | def head_dim(self):
    method block_size (line 24) | def block_size(self):
    method kv_lens (line 28) | def kv_lens(self):
    method batch_size (line 32) | def batch_size(self, kv_lens):
    method num_blocks_per_input (line 36) | def num_blocks_per_input(self, kv_lens, block_size):
    method max_num_blocks (line 40) | def max_num_blocks(self, num_blocks_per_input):
    method out_size (line 44) | def out_size(self, kv_lens):
    method kv_seqlens (line 48) | def kv_seqlens(self, kv_lens):
    method k_caches (line 52) | def k_caches(self, batch_size, max_num_blocks, block_size, num_heads, ...
    method v_caches (line 57) | def v_caches(self, k_caches):
    method block_offsets (line 61) | def block_offsets(self, num_blocks_per_input):
    method gt (line 70) | def gt(self, k_caches, v_caches, kv_lens, block_offsets, block_size, n...
    method test_flatten_kv_cache (line 89) | def test_flatten_kv_cache(self, k_caches, v_caches, kv_seqlens, block_...
  function precise_round (line 97) | def precise_round(x: torch.Tensor):
  function quant (line 101) | def quant(kv: torch.Tensor, nbits: int = 8):
  class TestFlattenKVCacheQuant8 (line 114) | class TestFlattenKVCacheQuant8(TestFlattenKVCache):
    method nbits (line 117) | def nbits(self):
    method atol (line 121) | def atol(self):
    method rtol (line 125) | def rtol(self):
    method k_quant (line 129) | def k_quant(self, k_caches, nbits):
    method v_quant (line 133) | def v_quant(self, v_caches, nbits):
    method test_flatten_kv_cache (line 136) | def test_flatten_kv_cache(self, k_quant, v_quant, kv_seqlens, block_of...
  class TestFlattenKVCacheQuant4 (line 160) | class TestFlattenKVCacheQuant4(TestFlattenKVCacheQuant8):
    method nbits (line 163) | def nbits(self):
    method atol (line 167) | def atol(self):
    method rtol (line 171) | def rtol(self):
  class TestFlattenKVCacheMLAFP8 (line 176) | class TestFlattenKVCacheMLAFP8(TestFlattenKVCache):
    method out_dtype (line 179) | def out_dtype(self):
    method num_heads (line 183) | def num_heads(self):
    method head_dim (line 187) | def head_dim(self):
    method block_size (line 191) | def block_size(self):
    method k_cache_mla (line 195) | def k_cache_mla(self, k_caches):
    method _dequant (line 207) | def _dequant(self, k_cache_mla):
    method gt (line 219) | def gt(self, k_cache_mla, kv_lens, block_offsets, block_size, num_head...
    method test_flatten_kv_cache (line 236) | def test_flatten_kv_cache(self, k_cache_mla, kv_seqlens, block_offsets...

FILE: tests/pytorch/kernel/test_fuse_moe_blocked_fp8.py
  function _make_A (line 5) | def _make_A(M, K, group_size, out_dtype, device='cuda'):
  function _make_B (line 26) | def _make_B(E, K, N, group_size, out_dtype, device='cuda'):
  function _get_sorted_idx (line 55) | def _get_sorted_idx(topk_idx: torch.Tensor, num_experts: int):
  class TestFusedMoEFP8KernelLauncher (line 64) | class TestFusedMoEFP8KernelLauncher:
    method dtype (line 67) | def dtype(self):
    method quant_dtype (line 71) | def quant_dtype(self):
    method device (line 75) | def device(self):
    method N (line 79) | def N(self):
    method K (line 83) | def K(self):
    method M (line 87) | def M(self):
    method num_experts (line 91) | def num_experts(self):
    method top_k (line 95) | def top_k(self):
    method group_size (line 99) | def group_size(self):
    method build_A (line 103) | def build_A(self, M, K, group_size, quant_dtype, device):
    method A (line 107) | def A(self, build_A, dtype):
    method A_quant (line 111) | def A_quant(self, build_A):
    method A_scale (line 115) | def A_scale(self, build_A):
    method build_B (line 119) | def build_B(self, num_experts, N, K, group_size, quant_dtype, device):
    method B (line 123) | def B(self, build_B, dtype):
    method B_quant (line 127) | def B_quant(self, build_B):
    method B_scale (line 131) | def B_scale(self, build_B):
    method bias (line 135) | def bias(self, build_B, dtype):
    method router_weights (line 140) | def router_weights(self, M, num_experts, device, dtype):
    method topk_weights (line 144) | def topk_weights(self, router_weights, top_k):
    method topk_idx (line 148) | def topk_idx(self, topk_weights):
    method sort_and_cnt (line 152) | def sort_and_cnt(self, topk_idx, num_experts):
    method sorted_idx (line 156) | def sorted_idx(self, sort_and_cnt):
    method exp_tok_cnt (line 160) | def exp_tok_cnt(self, sort_and_cnt):
    method exp_end (line 164) | def exp_end(self, exp_tok_cnt):
    method exp_start (line 168) | def exp_start(self, exp_end, exp_tok_cnt):
    method gt (line 172) | def gt(self, A, B, bias, top_k, sorted_idx, exp_start, exp_end, M):
    method test_launcher (line 191) | def test_launcher(self, A_quant, A_scale, B, B_quant, B_scale, bias, s...
  class TestFusedMoeBlockedFP8 (line 216) | class TestFusedMoeBlockedFP8:
    method dtype (line 219) | def dtype(self):
    method quant_dtype (line 223) | def quant_dtype(self):
    method device (line 227) | def device(self):
    method in_size (line 231) | def in_size(self):
    method seq_len (line 235) | def seq_len(seq_len):
    method hidden_size (line 239) | def hidden_size(self):
    method out_size (line 243) | def out_size(self):
    method num_experts (line 247) | def num_experts(self):
    method top_k (line 251) | def top_k(self):
    method group_size (line 255) | def group_size(self):
    method renormalize (line 259) | def renormalize(self):
    method build_hidden_states (line 263) | def build_hidden_states(self, seq_len, in_size, group_size, quant_dtyp...
    method hidden_states (line 267) | def hidden_states(self, build_hidden_states, dtype):
    method states_quanted (line 271) | def states_quanted(self, build_hidden_states):
    method states_scale (line 275) | def states_scale(self, build_hidden_states):
    method build_w1 (line 279) | def build_w1(self, num_experts, hidden_size, in_size, group_size, quan...
    method w1 (line 283) | def w1(self, build_w1, dtype):
    method w1_quant (line 287) | def w1_quant(self, build_w1):
    method w1_scale (line 291) | def w1_scale(self, build_w1):
    method build_w2 (line 295) | def build_w2(self, num_experts, out_size, hidden_size, group_size, qua...
    method w2 (line 304) | def w2(self, build_w2, dtype):
    method w2_quant (line 308) | def w2_quant(self, build_w2):
    method w2_scale (line 312) | def w2_scale(self, build_w2):
    method router_logits (line 316) | def router_logits(self, seq_len, num_experts, dtype, device):
    method topk_logits (line 320) | def topk_logits(self, router_logits, top_k):
    method topk_weights (line 325) | def topk_weights(self, topk_logits):
    method topk_idx (line 329) | def topk_idx(self, topk_logits):
    method gt (line 333) | def gt(self, hidden_states, w1, w2, topk_weights, topk_idx, top_k, ren...
    method test_fused_moe (line 339) | def test_fused_moe(self, states_quanted, states_scale, w1_quant, w1_sc...

FILE: tests/pytorch/kernel/test_fused_lora.py
  class TestFusedLoRA (line 7) | class TestFusedLoRA:
    method dtype (line 10) | def dtype(self):
    method head_size (line 14) | def head_size(self):
    method out_head_size (line 18) | def out_head_size(self):
    method seq_lens (line 22) | def seq_lens(self, request):
    method ranks (line 26) | def ranks(self):
    method start_loc (line 30) | def start_loc(self, seq_lens):
    method input (line 34) | def input(self, seq_lens, head_size, dtype):
    method adapter_ids (line 39) | def adapter_ids(self, seq_lens, ranks):
    method scaling (line 47) | def scaling(self, ranks):
    method lora_a (line 51) | def lora_a(self, ranks, head_size, dtype):
    method lora_b (line 59) | def lora_b(self, ranks, out_head_size, dtype):
    method fused_lora_a (line 67) | def fused_lora_a(self, lora_a):
    method fused_lora_b (line 71) | def fused_lora_b(self, lora_b):
    method gt (line 75) | def gt(self, input, start_loc, seq_lens, adapter_ids, lora_a, lora_b, ...
    method test_fused_lora (line 90) | def test_fused_lora(self, input, fused_lora_a, fused_lora_b, start_loc...

FILE: tests/pytorch/kernel/test_fused_moe.py
  function _get_sorted_idx (line 6) | def _get_sorted_idx(topk_idx: torch.Tensor, num_experts: int):
  class TestFusedMoEKernelLauncher (line 14) | class TestFusedMoEKernelLauncher:
    method dtype (line 17) | def dtype(self):
    method device (line 21) | def device(self):
    method N (line 25) | def N(self):
    method K (line 29) | def K(self):
    method M (line 33) | def M(self):
    method num_experts (line 37) | def num_experts(self):
    method top_k (line 41) | def top_k(self):
    method A (line 45) | def A(self, M, K, device, dtype):
    method B (line 50) | def B(self, num_experts, N, K, device, dtype):
    method bias (line 55) | def bias(self, num_experts, N, device, dtype):
    method router_weights (line 59) | def router_weights(self, M, num_experts, device, dtype):
    method topk_weights (line 63) | def topk_weights(self, router_weights, top_k):
    method topk_idx (line 67) | def topk_idx(self, topk_weights):
    method sort_and_cnt (line 71) | def sort_and_cnt(self, topk_idx, num_experts):
    method sorted_idx (line 75) | def sorted_idx(self, sort_and_cnt):
    method exp_tok_cnt (line 79) | def exp_tok_cnt(self, sort_and_cnt):
    method exp_end (line 83) | def exp_end(self, exp_tok_cnt):
    method exp_start (line 87) | def exp_start(self, exp_end, exp_tok_cnt):
    method gt (line 91) | def gt(self, A, B, bias, top_k, topk_idx):
    method test_launcher (line 107) | def test_launcher(self, A, B, bias, sorted_idx, exp_start, exp_end, to...
  function _mlp_forward (line 126) | def _mlp_forward(hidden_states, gate_proj, up_proj, down_proj):
  class TestFusedMoe (line 132) | class TestFusedMoe:
    method dtype (line 135) | def dtype(self):
    method device (line 139) | def device(self):
    method in_size (line 143) | def in_size(self):
    method seq_len (line 147) | def seq_len(seq_len):
    method hidden_size (line 151) | def hidden_size(self):
    method out_size (line 155) | def out_size(self):
    method num_experts (line 159) | def num_experts(self):
    method top_k (line 163) | def top_k(self):
    method renormalize (line 167) | def renormalize(self):
    method hidden_states (line 171) | def hidden_states(self, seq_len, in_size, dtype, device):
    method w1 (line 176) | def w1(self, num_experts, hidden_size, in_size, dtype, device):
    method w2 (line 181) | def w2(self, num_experts, out_size, hidden_size, dtype, device):
    method router_logits (line 186) | def router_logits(self, seq_len, num_experts, dtype, device):
    method topk_logits (line 190) | def topk_logits(self, router_logits, top_k):
    method topk_weights (line 195) | def topk_weights(self, topk_logits):
    method topk_idx (line 199) | def topk_idx(self, topk_logits):
    method gt (line 203) | def gt(self, hidden_states, w1, w2, topk_weights, topk_idx, renormalize):
    method test_fused_moe (line 221) | def test_fused_moe(self, hidden_states, w1, w2, topk_weights, topk_idx...
  class TestFusedMoeW8A8 (line 227) | class TestFusedMoeW8A8(TestFusedMoe):
    method quant_states (line 230) | def quant_states(self, hidden_states):
    method quant_weight (line 235) | def quant_weight(self, w):
    method quant_w1 (line 245) | def quant_w1(self, w1):
    method quant_w2 (line 250) | def quant_w2(self, w2):
    method test_fused_moe (line 255) | def test_fused_moe(self, quant_states, quant_w1, quant_w2, topk_weight...

FILE: tests/pytorch/kernel/test_gated_delta_rule.py
  function do_test (line 5) | def do_test():
  function naive_recurrent_gdr (line 13) | def naive_recurrent_gdr(
  class TestRecurrentGatedDeltaRule (line 59) | class TestRecurrentGatedDeltaRule:
    method auto_context (line 62) | def auto_context(self):
    method batch (line 75) | def batch(self):
    method num_heads (line 79) | def num_heads(self):
    method seqlen (line 83) | def seqlen(self):
    method head_dim (line 87) | def head_dim(self):
    method use_qk_l2norm_in_kernel (line 91) | def use_qk_l2norm_in_kernel(self, request):
    method q (line 95) | def q(self, batch, seqlen, num_heads, head_dim):
    method k (line 99) | def k(self, batch, seqlen, num_heads, head_dim):
    method v (line 103) | def v(self, batch, seqlen, num_heads, head_dim):
    method g (line 107) | def g(self, batch, seqlen, num_heads):
    method beta (line 111) | def beta(self, batch, seqlen, num_heads):
    method initial_state (line 115) | def initial_state(self, batch, num_heads, head_dim):
    method gt (line 119) | def gt(self, q, k, v, g, beta, initial_state, use_qk_l2norm_in_kernel):
    method test_fused_gated_delta_rule (line 130) | def test_fused_gated_delta_rule(self, q, k, v, g, beta, initial_state,...

FILE: tests/pytorch/kernel/test_gemm_fp8.py
  function _make_quant_val (line 5) | def _make_quant_val(shape, out_dtype):
  function fast_log2_ceil_torch (line 17) | def fast_log2_ceil_torch(x: torch.Tensor) -> torch.Tensor:
  function fast_pow2_torch (line 27) | def fast_pow2_torch(x: torch.Tensor) -> torch.Tensor:
  function fast_round_scale_torch (line 32) | def fast_round_scale_torch(amax: torch.Tensor, fp8_max_inv: torch.Tensor...
  function _make_quant_scale_ue8m0 (line 36) | def _make_quant_scale_ue8m0(shape, out_dtype):
  function _make_quant_scale (line 44) | def _make_quant_scale(shape, out_dtype, scale_fmt: str = None):
  function _make_A (line 56) | def _make_A(M, K, group_size, out_dtype, scale_fmt: str = None):
  function _aligned_size (line 69) | def _aligned_size(a, b):
  function _make_B (line 73) | def _make_B(K, N, group_size, out_dtype, scale_fmt: str = None):
  class TestQuantFP8 (line 91) | class TestQuantFP8:
    method M (line 94) | def M(self, request):
    method K (line 98) | def K(self):
    method group_size (line 102) | def group_size(self):
    method out_dtype (line 106) | def out_dtype(self):
    method scale_fmt (line 110) | def scale_fmt(self, request):
    method build_A (line 114) | def build_A(self, M, K, group_size, out_dtype, scale_fmt):
    method A (line 118) | def A(self, build_A):
    method quant_A (line 122) | def quant_A(self, build_A):
    method scale (line 126) | def scale(self, build_A):
    method gt (line 130) | def gt(self, quant_A, scale):
    method test_quant_fp8 (line 135) | def test_quant_fp8(self, A, group_size, out_dtype, scale_fmt, gt):
  class TestGemmFP8 (line 147) | class TestGemmFP8:
    method M (line 150) | def M(self):
    method N (line 154) | def N(self):
    method K (line 159) | def K(self):
    method group_size (line 163) | def group_size(self):
    method quant_dtype (line 167) | def quant_dtype(self):
    method out_dtype (line 171) | def out_dtype(self):
    method build_A (line 175) | def build_A(self, M, K, group_size, quant_dtype):
    method A (line 179) | def A(self, build_A, out_dtype):
    method quant_A (line 183) | def quant_A(self, build_A):
    method scale_A (line 187) | def scale_A(self, build_A):
    method build_B (line 191) | def build_B(self, K, N, group_size, quant_dtype):
    method B (line 195) | def B(self, build_B, out_dtype):
    method quant_B (line 199) | def quant_B(self, build_B):
    method scale_B (line 203) | def scale_B(self, build_B):
    method gt (line 207) | def gt(self, A, B):
    method test_gemm_fp8 (line 210) | def test_gemm_fp8(self, quant_A, scale_A, quant_B, scale_B, out_dtype,...

FILE: tests/pytorch/kernel/test_moe_route.py
  function reference_noaux_tc_routing (line 5) | def reference_noaux_tc_routing(
  class TestNoauxTC (line 40) | class TestNoauxTC:
    method auto_context (line 43) | def auto_context(self):
    method batch_size (line 56) | def batch_size(self):
    method num_experts (line 60) | def num_experts(self):
    method logits (line 64) | def logits(self, batch_size, num_experts):
    method bias (line 68) | def bias(self, num_experts):
    method kwargs (line 72) | def kwargs(self):
    method gt (line 83) | def gt(self, logits, bias, kwargs):
    method test_noaux_tc_router (line 86) | def test_noaux_tc_router(self, logits, bias, kwargs, gt):

FILE: tests/pytorch/kernel/test_multinomial_sampling.py
  function _bf16_mark (line 7) | def _bf16_mark():
  class TestMultinomialSampling (line 11) | class TestMultinomialSampling:
    method num_tokens (line 14) | def num_tokens(self, request):
    method select_ids (line 18) | def select_ids(self, request):
    method batch_size (line 22) | def batch_size(self, select_ids):
    method dtype (line 26) | def dtype(self, request):
    method scores (line 30) | def scores(self, num_tokens, batch_size, select_ids, dtype):
    method seeds (line 38) | def seeds(self, batch_size):
    method offsets (line 42) | def offsets(self, batch_size):
    method indices (line 46) | def indices(self, scores):
    method gt (line 53) | def gt(self, batch_size, select_ids, indices):
    method test_multinomial_sampling (line 62) | def test_multinomial_sampling(self, scores, seeds, offsets, indices, gt):

FILE: tests/pytorch/kernel/test_paged_attention.py
  function _conti_input (line 7) | def _conti_input(data, seq_lens):
  function _make_bias (line 13) | def _make_bias(q_seqlens, history_lens, neg_val):
  function _make_alibi_bias (line 28) | def _make_alibi_bias(q_seqlens, history_lens, neg_val, alibi_slopes):
  function _make_block_sparse_bias (line 49) | def _make_block_sparse_bias(q_seqlens: torch.Tensor, history_lens: torch...
  function _make_blocked_cache (line 70) | def _make_blocked_cache(batched_k,
  function _naive_attention (line 104) | def _naive_attention(batched_q, batched_kv, bias, sinks=None):
  function _naive_window_attention (line 141) | def _naive_window_attention(q, k, v, seqlens_q, seqlens_k, window_size):
  class TestPagedAttentionBase (line 173) | class TestPagedAttentionBase:
    method dtype (line 176) | def dtype(self):
    method feat_dim (line 180) | def feat_dim(self, request):
    method feat_dim_v (line 184) | def feat_dim_v(self, request):
    method num_heads_q (line 188) | def num_heads_q(self, request):
    method num_heads_k (line 192) | def num_heads_k(self, request):
    method block_size (line 196) | def block_size(self, request):
    method layout (line 200) | def layout(self, request):
    method history_lens (line 204) | def history_lens(self, request):
    method seq_len (line 208) | def seq_len(self):
    method seq_lens (line 212) | def seq_lens(self, seq_len, history_lens):
    method kv_seqlens (line 216) | def kv_seqlens(self, seq_lens, history_lens):
    method batched_q (line 220) | def batched_q(self, seq_len, kv_seqlens, num_heads_q, feat_dim, dtype):
    method batched_kv (line 227) | def batched_kv(self, kv_seqlens, num_heads_k, feat_dim, feat_dim_v, dt...
    method conti_q (line 236) | def conti_q(self, seq_lens, batched_q):
    method block_offsets (line 240) | def block_offsets(self, kv_seqlens, block_size):
    method conti_kv (line 254) | def conti_kv(self, batched_kv, history_lens):
    method blocked_kv (line 261) | def blocked_kv(self, batched_kv, kv_seqlens, history_lens, block_offse...
    method mask (line 269) | def mask(self, history_lens):
    method gt (line 275) | def gt(self, batched_q, batched_kv, mask):
    method conti_gt (line 279) | def conti_gt(self, gt, seq_lens):
  class TestPagedAttention (line 283) | class TestPagedAttention(TestPagedAttentionBase):
    method test_paged_attention (line 291) | def test_paged_attention(self, conti_q, blocked_kv, block_offsets, kv_...
    method win_size (line 304) | def win_size(self, request):
    method window_gt (line 308) | def window_gt(self, conti_q, conti_kv, seq_lens, history_lens, win_size):
    method test_window_attention (line 326) | def test_window_attention(self, conti_q, blocked_kv, block_offsets, kv...
  class TestPagedAttentionSink (line 340) | class TestPagedAttentionSink(TestPagedAttentionBase):
    method sinks (line 343) | def sinks(self, num_heads_q, dtype):
    method sink_gt (line 347) | def sink_gt(self, batched_q, batched_kv, mask, sinks):
    method conti_sink_gt (line 351) | def conti_sink_gt(self, sink_gt, seq_lens):
    method test_paged_attention (line 360) | def test_paged_attention(self, conti_q, blocked_kv, block_offsets, kv_...
  function quant (line 375) | def quant(kv: torch.Tensor, nbits: int = 8):
  function _make_blocked_cache_quant (line 388) | def _make_blocked_cache_quant(batched_k, batched_v, seq_lens, history_le...
  class TestPagedAttentionInt8 (line 423) | class TestPagedAttentionInt8(TestPagedAttention):
    method nbits (line 426) | def nbits(self):
    method blocked_kv (line 430) | def blocked_kv(self, batched_kv, seq_lens, history_lens, block_offsets...
    method test_paged_attention (line 441) | def test_paged_attention(self, conti_q, blocked_kv, block_offsets, kv_...
    method test_window_attention (line 467) | def test_window_attention(self, conti_q, blocked_kv, block_offsets, kv...
  class TestPagedAttentionInt4 (line 486) | class TestPagedAttentionInt4(TestPagedAttentionInt8):
    method nbits (line 489) | def nbits(self):
  class TestPagedAttentionBlockDecoding (line 493) | class TestPagedAttentionBlockDecoding(TestPagedAttentionBase):
    method seq_len (line 496) | def seq_len(self):
    method mask (line 500) | def mask(self, seq_lens, history_lens, seq_len):
    method gt (line 505) | def gt(self, batched_q, batched_kv, mask):
    method conti_gt (line 509) | def conti_gt(self, gt, seq_lens):
    method test_paged_attention (line 518) | def test_paged_attention(self, conti_q, blocked_kv, block_offsets, kv_...
  class TestPagedAttentionAlibi (line 532) | class TestPagedAttentionAlibi(TestPagedAttentionBase):
    method alibi_slopes (line 535) | def alibi_slopes(self, num_heads_q):
    method mask (line 539) | def mask(self, seq_lens, history_lens, alibi_slopes):
    method test_paged_attention (line 549) | def test_paged_attention(self, conti_q, blocked_kv, block_offsets, kv_...

FILE: tests/pytorch/kernel/test_rms_norm.py
  function _bf16_mark (line 7) | def _bf16_mark():
  class TestRMSNorm (line 11) | class TestRMSNorm:
    method initialize (line 14) | def initialize(self):
    method dtype (line 21) | def dtype(self, request):
    method input_shape (line 25) | def input_shape(self, request):
    method hidden_size (line 29) | def hidden_size(self, input_shape):
    method input (line 33) | def input(self, dtype, input_shape):
    method weight (line 37) | def weight(self, dtype, hidden_size):
    method eps (line 41) | def eps(self):
    method gt (line 45) | def gt(self, input, weight, eps):
    method test_rms_norm (line 54) | def test_rms_norm(self, input, weight, eps, gt):
    method residual (line 61) | def residual(self, dtype, input_shape):
    method gt_residual (line 65) | def gt_residual(self, input, residual, weight, eps):
    method test_rms_norm_residual (line 77) | def test_rms_norm_residual(self, input, residual, weight, eps, gt_resi...

FILE: tests/pytorch/nn/test_embedding.py
  function parallel_emb (line 14) | def parallel_emb(rank: int, world_size: int, vocab_size: int, feat_size:...
  class TestEmbedding (line 45) | class TestEmbedding:
    method vocab_size (line 48) | def vocab_size(self, request):
    method feat_size (line 52) | def feat_size(self, request):
    method padding_idx (line 56) | def padding_idx(self, request):
    method dtype (line 60) | def dtype(self, request):
    method tp (line 64) | def tp(self, request):
    method seqlen (line 68) | def seqlen(self, request):
    method weight (line 72) | def weight(self, vocab_size, feat_size, dtype):
    method x (line 76) | def x(self, seqlen, vocab_size):
    method gt (line 80) | def gt(self, x, vocab_size, feat_size, padding_idx, dtype, weight):
    method test_embedding (line 97) | def test_embedding(self, vocab_size, feat_size, padding_idx, seqlen, t...

FILE: tests/pytorch/paging/test_block_manager.py
  class TestAllocator (line 13) | class TestAllocator:
    method num_gpu_blocks (line 16) | def num_gpu_blocks(self):
    method num_cpu_blocks (line 20) | def num_cpu_blocks(self):
    method allocator (line 24) | def allocator(self, num_cpu_blocks, num_gpu_blocks):
    method test_alloc (line 27) | def test_alloc(self, allocator, num_cpu_blocks, num_gpu_blocks):
    method test_full (line 53) | def test_full(self, allocator, num_cpu_blocks, num_gpu_blocks):
  class TestDefaultBlockManager (line 75) | class TestDefaultBlockManager:
    method block_size (line 78) | def block_size(self):
    method num_cpu_blocks (line 82) | def num_cpu_blocks(self):
    method num_gpu_blocks (line 86) | def num_gpu_blocks(self):
    method max_batch_size (line 90) | def max_batch_size(self):
    method cache_config (line 94) | def cache_config(self, block_size, num_cpu_blocks, num_gpu_blocks, max...
    method scheduler_config (line 101) | def scheduler_config(self, max_batch_size):
    method seq_meta (line 108) | def seq_meta(self, block_size):
    method scheduler (line 114) | def scheduler(self, cache_config, scheduler_config, seq_meta):
    method block_mgr (line 118) | def block_mgr(self, scheduler):
    method test_alloc (line 121) | def test_alloc(self, scheduler, block_mgr, num_gpu_blocks):
    method test_num_required_blocks (line 146) | def test_num_required_blocks(self, scheduler, block_mgr):
    method test_append_slot (line 167) | def test_append_slot(self, scheduler, block_mgr, num_gpu_blocks):
    method test_swap (line 193) | def test_swap(self, scheduler, block_mgr, num_gpu_blocks):
  class TestWindowBlockManager (line 233) | class TestWindowBlockManager:
    method window_size (line 236) | def window_size(self):
    method block_size (line 240) | def block_size(self):
    method num_cpu_blocks (line 244) | def num_cpu_blocks(self):
    method num_gpu_blocks (line 248) | def num_gpu_blocks(self):
    method max_batch_size (line 252) | def max_batch_size(self):
    method cache_config (line 256) | def cache_config(self, block_size, num_cpu_blocks, num_gpu_blocks, max...
    method scheduler_config (line 264) | def scheduler_config(self, max_batch_size):
    method seq_meta (line 271) | def seq_meta(self, block_size):
    method scheduler (line 277) | def scheduler(self, cache_config, scheduler_config, seq_meta):
    method block_mgr (line 281) | def block_mgr(self, scheduler):
    method test_alloc (line 284) | def test_alloc(self, scheduler, block_mgr, num_gpu_blocks):
    method test_win_alloc (line 309) | def test_win_alloc(self, scheduler, block_mgr, num_gpu_blocks, window_...

FILE: tests/pytorch/paging/test_block_trie.py
  class TestBlockTire (line 9) | class TestBlockTire:
    method block_size (line 12) | def block_size(self):
    method num_cpu_blocks (line 16) | def num_cpu_blocks(self):
    method num_gpu_blocks (line 20) | def num_gpu_blocks(self):
    method max_batch_size (line 24) | def max_batch_size(self):
    method cache_config (line 28) | def cache_config(self, block_size, num_cpu_blocks, num_gpu_blocks, max...
    method scheduler_config (line 36) | def scheduler_config(self, max_batch_size):
    method seq_meta (line 43) | def seq_meta(self, block_size):
    method scheduler (line 49) | def scheduler(self, cache_config, scheduler_config, seq_meta):
    method block_mgr (line 53) | def block_mgr(self, scheduler):
    method block_trie (line 57) | def block_trie(self, scheduler):
    method test_allocate (line 60) | def test_allocate(self, block_trie, block_mgr, scheduler):
    method test_match (line 99) | def test_match(self, block_trie, block_mgr, scheduler):
    method test_evict (line 137) | def test_evict(self, block_trie, scheduler, num_gpu_blocks):

FILE: tests/pytorch/paging/test_scheduler.py
  class TestScheduler (line 9) | class TestScheduler:
    method block_size (line 12) | def block_size(self):
    method num_cpu_blocks (line 16) | def num_cpu_blocks(self):
    method num_gpu_blocks (line 20) | def num_gpu_blocks(self):
    method max_batch_size (line 24) | def max_batch_size(self):
    method cache_config (line 28) | def cache_config(self, block_size, num_cpu_blocks, num_gpu_blocks, max...
    method scheduler_config (line 35) | def scheduler_config(self, max_batch_size):
    method seq_meta (line 42) | def seq_meta(self, block_size):
    method scheduler (line 48) | def scheduler(self, cache_config, scheduler_config, seq_meta):
    method test_schedule_base (line 51) | def test_schedule_base(self, scheduler, block_size, num_gpu_blocks):
    method test_update (line 76) | def test_update(self, scheduler, block_size, num_gpu_blocks):
    method test_evict (line 119) | def test_evict(self, scheduler, block_size, num_gpu_blocks, num_cpu_bl...

FILE: tests/test_lmdeploy/test_auto_backend.py
  class TestAutoBackend (line 8) | class TestAutoBackend:
    method turbomind_workspace (line 11) | def turbomind_workspace(self):
    method models (line 17) | def models(self):
    method test_turbomind_is_supported (line 41) | def test_turbomind_is_supported(self, turbomind_workspace, models):
    method test_autoget_backend (line 47) | def test_autoget_backend(self, turbomind_workspace, models):

FILE: tests/test_lmdeploy/test_content_merge.py
  class TestMergeMessageContent (line 6) | class TestMergeMessageContent:
    method test_missing_content_field (line 9) | def test_missing_content_field(self):
    method test_explicit_none_content (line 33) | def test_explicit_none_content(self):
    method test_string_content_unchanged (line 57) | def test_string_content_unchanged(self):
    method test_single_text_block (line 65) | def test_single_text_block(self):
    method test_multiple_text_blocks_newline_join (line 72) | def test_multiple_text_blocks_newline_join(self):
    method test_mixed_content_types (line 95) | def test_mixed_content_types(self):
    method test_empty_list_content (line 120) | def test_empty_list_content(self):
    method test_list_with_non_text_blocks_only (line 127) | def test_list_with_non_text_blocks_only(self):
    method test_preserve_all_message_fields (line 148) | def test_preserve_all_message_fields(self):
    method test_text_block_with_missing_text_field (line 171) | def test_text_block_with_missing_text_field(self):
    method test_gpt_oss_tool_call_scenario (line 195) | def test_gpt_oss_tool_call_scenario(self):
  function test_merge_message_content_parametrized (line 269) | def test_merge_message_content_parametrized(msg, expected_content):
  function test_batch_message_processing (line 275) | def test_batch_message_processing():

FILE: tests/test_lmdeploy/test_grammar.py
  function test_guided_matrix (line 62) | def test_guided_matrix(model_id, backend_name, backend_factory, schema_t...
  function test_mix_guided_matrix (line 102) | def test_mix_guided_matrix(model_id, backend_name, backend_factory):

FILE: tests/test_lmdeploy/test_harmony_gpt_oss_parser.py
  function _install_openai_harmony_stub (line 18) | def _install_openai_harmony_stub():
  class DummyParser (line 53) | class DummyParser:
    class _Msg (line 66) | class _Msg:
      method __init__ (line 68) | def __init__(self, channel, recipient):
    method __init__ (line 72) | def __init__(self):
    method process (line 78) | def process(self, token):
  function _chat_completion_v1 (line 117) | def _chat_completion_v1(request, token_chunks: List[List[int]]):
  function _stream_parse (line 163) | def _stream_parse(request, token_chunks: List[List[int]]):
  function _t (line 193) | def _t(s: str) -> List[int]:
  function test_parser_stream_basic (line 226) | def test_parser_stream_basic(token_chunks: List[List[int]], expects: Lis...
  function test_parser_stream_multiple_calls_indices (line 242) | def test_parser_stream_multiple_calls_indices():
  function test_parser_stream_interleaved_channels (line 261) | def test_parser_stream_interleaved_channels():
  function test_parser_stream_two_calls_same_func (line 277) | def test_parser_stream_two_calls_same_func(token_chunks: List[List[int]]...
  function test_open_tool_call_no_args (line 291) | def test_open_tool_call_no_args():
  function test_parser_nonstream (line 310) | def test_parser_nonstream(token_chunks: List[List[int]], expects: List[T...

FILE: tests/test_lmdeploy/test_lite/test_quantization/test_utils/test_cal_qparams.py
  function test_cal_qparams (line 10) | def test_cal_qparams():

FILE: tests/test_lmdeploy/test_messages.py
  function test_engine_generation_config (line 9) | def test_engine_generation_config():
  function test_update_from_hf_gen_cfg (line 24) | def test_update_from_hf_gen_cfg(model_path):

FILE: tests/test_lmdeploy/test_model.py
  function test_HFChatTemplate_get_prompt_sequence_start_True (line 61) | def test_HFChatTemplate_get_prompt_sequence_start_True(model_path):
  function test_HFChatTemplate_message2prompt_sequence_start_True (line 73) | def test_HFChatTemplate_message2prompt_sequence_start_True(model_path):
  function test_base_model (line 85) | def test_base_model():
  function test_vicuna (line 92) | def test_vicuna():
  function test_prefix_response (line 110) | def test_prefix_response():
  function test_internlm_chat (line 117) | def test_internlm_chat():
  function test_baichuan (line 137) | def test_baichuan():
  function test_llama2 (line 149) | def test_llama2():
  function test_codellama_completion (line 169) | def test_codellama_completion():
  function test_codellama_infilling (line 180) | def test_codellama_infilling():
  function test_codellama_chat (line 195) | def test_codellama_chat():
  function test_codellama_python_specialist (line 206) | def test_codellama_python_specialist():
  function test_codellama_others (line 216) | def test_codellama_others():
  function test_deepseek_vl2 (line 226) | def test_deepseek_vl2(model_path_or_name):
  function test_qwen3 (line 251) | def test_qwen3(model_path, enable_thinking):
  function test_HFChatTemplate_get_prompt_sequence_start_False_Qwen (line 322) | def test_HFChatTemplate_get_prompt_sequence_start_False_Qwen(model_path):
  function test_HFChatTemplate_get_prompt_sequence_start_False_Qwen3_5 (line 332) | def test_HFChatTemplate_get_prompt_sequence_start_False_Qwen3_5(model_pa...
  function test_HFChatTemplate_DeepSeek_V3 (line 342) | def test_HFChatTemplate_DeepSeek_V3(model_path):
  function test_HFChatTemplate_DeepSeek_thinking (line 351) | def test_HFChatTemplate_DeepSeek_thinking(model_path):
  function test_HFChatTemplate_Qwen3_VL_with_vision_id (line 360) | def test_HFChatTemplate_Qwen3_VL_with_vision_id(model_path):
  function test_gemma_chat_template (line 423) | def test_gemma_chat_template(model_path):

FILE: tests/test_lmdeploy/test_pipeline.py
  class TestBackendInference (line 13) | class TestBackendInference:
    method backend_config (line 17) | def backend_config(self, backend):
    method pipe (line 28) | def pipe(self, backend_config):
    method test_infer_single_string (line 39) | def test_infer_single_string(self, pipe):
    method test_infer_batch_strings (line 50) | def test_infer_batch_strings(self, pipe):
    method test_infer_openai_format (line 61) | def test_infer_openai_format(self, pipe):
    method test_infer_with_generation_config (line 76) | def test_infer_with_generation_config(self, pipe):
    method test_call_method (line 85) | def test_call_method(self, pipe):
    method test_stream_infer_single (line 93) | def test_stream_infer_single(self, pipe):
    method test_stream_infer_batch (line 107) | def test_stream_infer_batch(self, pipe):
    method test_stream_infer_with_session (line 123) | def test_stream_infer_with_session(self, pipe):
    method test_chat_streaming (line 166) | def test_chat_streaming(self, pipe):
    method test_chat_non_streaming (line 185) | def test_chat_non_streaming(self, pipe):
    method test_chat_multi_turn (line 199) | def test_chat_multi_turn(self, pipe):
    method test_session_creation (line 217) | def test_session_creation(self, pipe):
    method test_get_ppl_single (line 226) | def test_get_ppl_single(self, pipe):
    method test_get_ppl_batch (line 241) | def test_get_ppl_batch(self, pipe):
    method test_stream_infer_stream_response_parameter (line 257) | def test_stream_infer_stream_response_parameter(self, pipe):
    method test_infer_different_max_tokens (line 267) | def test_infer_different_max_tokens(self, pipe, max_new_tokens):
    method test_batch_infer_different_gen_configs (line 275) | def test_batch_infer_different_gen_configs(self, pipe):
    method test_infer_zero_tokens (line 285) | def test_infer_zero_tokens(self, pipe):

FILE: tests/test_lmdeploy/test_qwen3_parser.py
  class DummyTokenizer (line 19) | class DummyTokenizer:
    method decode (line 21) | def decode(self, token_ids: List[int]) -> str:
    method encode (line 24) | def encode(self, text: str) -> List[int]:
  function _chat_completion_v1 (line 175) | def _chat_completion_v1(
  function _stream_parse (line 266) | def _stream_parse(request: ChatCompletionRequest, text_sequence: List[st...
  function test_parser_stream (line 300) | def test_parser_stream(text_sequence: List[str], expects: List[TestExpec...
  function test_parser_nonstream (line 320) | def test_parser_nonstream(text_sequence: List[str], expects: List[TestEx...
  function test_no_think_nonstream (line 338) | def test_no_think_nonstream():

FILE: tests/test_lmdeploy/test_qwen3coder_parser.py
  class DummyTokenizer (line 18) | class DummyTokenizer:
    method decode (line 20) | def decode(self, token_ids: List[int]) -> str:
    method encode (line 23) | def encode(self, text: str) -> List[int]:
  function _chat_completion_v1 (line 56) | def _chat_completion_v1(
  function _stream_parse (line 149) | def _stream_parse(request: ChatCompletionRequest, text_sequence: List[st...
  function test_parser_stream (line 188) | def test_parser_stream(text_sequence: List[str], expects: List[TestExpec...
  function test_parser_nonstream (line 215) | def test_parser_nonstream(text_sequence: List[str], expects: List[TestEx...
  function test_no_think_nonstream (line 233) | def test_no_think_nonstream():

FILE: tests/test_lmdeploy/test_tokenizer.py
  function test_tokenizer (line 17) | def test_tokenizer(model_path, input, interval, add_special_tokens, skip...
  function test_tokenizer_with_stop_words (line 39) | def test_tokenizer_with_stop_words(model_path, stop_words):
  function test_qwen_vl_decode_special (line 45) | def test_qwen_vl_decode_special():
  function test_glm4_special_token (line 55) | def test_glm4_special_token():
  function test_check_transformers_version (line 73) | def test_check_transformers_version(model_path):

FILE: tests/test_lmdeploy/test_turbomind/test_converter.py
  function test_torch_dtype_fallback (line 11) | def test_torch_dtype_fallback():
  function test_ffn_reader_kind_none (line 27) | def test_ffn_reader_kind_none():
  function test_registered_models (line 70) | def test_registered_models():
  function test_update_from_engine_config (line 109) | def test_update_from_engine_config():
  function test_dtype (line 148) | def test_dtype():

FILE: tests/test_lmdeploy/test_utils.py
  function test_get_and_verify_max_len (line 6) | def test_get_and_verify_max_len():

FILE: tests/test_lmdeploy/test_vl/test_hf_chat_template.py
  function get_model_and_chat_template (line 9) | def get_model_and_chat_template(model_path):
  function mock_messages (line 23) | def mock_messages():
  function mock_pure_img_messages (line 36) | def mock_pure_img_messages():
  function mock_pure_text_messages (line 46) | def mock_pure_text_messages():
  class TestInternVLHFChatTemplate (line 56) | class TestInternVLHFChatTemplate:
    method models (line 59) | def models(self):
    method test_proc_messages (line 73) | def test_proc_messages(self, models, mock_messages):
    method test_proc_pure_img_messages (line 85) | def test_proc_pure_img_messages(self, models, mock_pure_img_messages):
    method test_proc_pure_text_messages (line 97) | def test_proc_pure_text_messages(self, models, mock_pure_text_messages):
  class TestQwenVLChatTemplate (line 108) | class TestQwenVLChatTemplate:
    method models (line 111) | def models(self):
    method test_proc_messages (line 136) | def test_proc_messages(self, models, mock_messages):
    method test_pure_img_messages (line 146) | def test_pure_img_messages(self, models, mock_pure_img_messages):
    method test_pure_text_messages (line 156) | def test_pure_text_messages(self, models, mock_pure_text_messages):

FILE: tests/test_lmdeploy/test_vl/test_nonhf_chat_template.py
  function get_model_and_chat_template (line 9) | def get_model_and_chat_template(model_path):
  class TestInternVLChatTemplate (line 22) | class TestInternVLChatTemplate:
    method internvl3_5 (line 25) | def internvl3_5(self):
    method internvl3 (line 40) | def internvl3(self):
    method internvl2_5 (line 54) | def internvl2_5(self):
    method internvl2 (line 68) | def internvl2(self):
    method mock_messages (line 82) | def mock_messages(self):
    method mock_IMAGE_TOKEN_messages (line 94) | def mock_IMAGE_TOKEN_messages(self):
    method test_internvl3_5 (line 104) | def test_internvl3_5(self, internvl3_5, mock_messages):
    method test_internvl3_5_backward_compatibility (line 116) | def test_internvl3_5_backward_compatibility(self, internvl3_5, mock_IM...
    method test_internvl3 (line 128) | def test_internvl3(self, internvl3, mock_messages):
    method test_internvl3_backward_compatibility (line 141) | def test_internvl3_backward_compatibility(self, internvl3, mock_IMAGE_...
    method test_internvl2_5 (line 153) | def test_internvl2_5(self, internvl2_5, mock_messages):
    method test_internvl2_5_backward_compatibility (line 166) | def test_internvl2_5_backward_compatibility(self, internvl2_5, mock_IM...
    method test_internvl2 (line 178) | def test_internvl2(self, internvl2, mock_messages):
    method test_internvl2_backward_compatibility (line 190) | def test_internvl2_backward_compatibility(self, internvl2, mock_IMAGE_...

FILE: tests/test_lmdeploy/test_vl/test_qwen3vl_processor.py
  function qwen3vl_model (line 17) | def qwen3vl_model(request):
  function sample_messages (line 25) | def sample_messages():
  function test_qwen3vl_preprocess_with_custom_pixels (line 44) | def test_qwen3vl_preprocess_with_custom_pixels(qwen3vl_model, sample_mes...

FILE: tests/test_lmdeploy/test_vl/test_vl_encode.py
  function test_image_encode_decode (line 9) | def test_image_encode_decode():
  function test_video_encode_decode (line 22) | def test_video_encode_decode():
  function test_time_series_encode_decode (line 45) | def test_time_series_encode_decode():
  function test_image_modes (line 57) | def test_image_modes():
  function test_truncated_image (line 68) | def test_truncated_image():
  function test_single_frame_video (line 75) | def test_single_frame_video():
  function test_video_sampling_params (line 85) | def test_video_sampling_params():
  function test_invalid_inputs (line 115) | def test_invalid_inputs():