SYMBOL INDEX (7894 symbols across 838 files) FILE: .github/scripts/action_tools.py function run_cmd (line 17) | def run_cmd(cmd_lines: List[str], log_path: str, cwd: str = None): function _append_summary (line 52) | def _append_summary(content): function add_summary (line 58) | def add_summary(csv_path: str): function evaluate (line 78) | def evaluate(models: List[str], function create_model_links (line 187) | def create_model_links(src_dir: str, dst_dir: str): function generate_benchmark_report (line 201) | def generate_benchmark_report(report_path: str): function generate_csv_from_profile_result (line 255) | def generate_csv_from_profile_result(file_path: str, out_path: str): function generate_output_for_evaluation (line 277) | def generate_output_for_evaluation(result_dir: str): function find_csv_files (line 291) | def find_csv_files(directory): FILE: .github/scripts/check_lmdeploy.py function check_module_init (line 8) | def check_module_init(root: str): FILE: .github/scripts/doc_link_checker.py function make_parser (line 9) | def make_parser(): function analyze_doc (line 19) | def analyze_doc(home, path): function traverse (line 66) | def traverse(target): FILE: autotest/benchmark/test_apiserver_performance.py function get_models (line 6) | def get_models(backend, parallel_config): function test_turbomind_apiserver_tp1 (line 14) | def test_turbomind_apiserver_tp1(config, run_config, worker_id): function test_turbomind_apiserver_tp2 (line 23) | def test_turbomind_apiserver_tp2(config, run_config, worker_id): function test_turbomind_apiserver_tp4 (line 32) | def test_turbomind_apiserver_tp4(config, run_config, worker_id): function test_turbomind_apiserver_tp8 (line 41) | def test_turbomind_apiserver_tp8(config, run_config, worker_id): function test_pytorch_apiserver_tp1 (line 50) | def test_pytorch_apiserver_tp1(config, run_config, worker_id): function test_pytorch_apiserver_tp2 (line 59) | def test_pytorch_apiserver_tp2(config, run_config, worker_id): function test_pytorch_apiserver_tp4 (line 68) | def test_pytorch_apiserver_tp4(config, run_config, worker_id): function test_pytorch_apiserver_tp8 (line 77) | def test_pytorch_apiserver_tp8(config, run_config, worker_id): function test_pytorch_apiserver_tp16 (line 86) | def test_pytorch_apiserver_tp16(config, run_config, worker_id): function test_restful_func_tp2 (line 131) | def test_restful_func_tp2(config, run_config, worker_id): FILE: autotest/benchmark/test_longtext_performance.py function get_models (line 6) | def get_models(backend, parallel_config): function test_turbomind_longtext_throughput_tp1 (line 14) | def test_turbomind_longtext_throughput_tp1(config, run_config, worker_id): function test_turbomind_longtext_throughput_tp2 (line 23) | def test_turbomind_longtext_throughput_tp2(config, run_config, worker_id): function test_turbomind_longtext_throughput_tp4 (line 32) | def test_turbomind_longtext_throughput_tp4(config, run_config, worker_id): function test_turbomind_longtext_throughput_tp8 (line 41) | def test_turbomind_longtext_throughput_tp8(config, run_config, worker_id): function test_pytorch_longtext_throughput_tp1 (line 50) | def test_pytorch_longtext_throughput_tp1(config, run_config, worker_id): function test_pytorch_longtext_throughput_tp2 (line 59) | def test_pytorch_longtext_throughput_tp2(config, run_config, worker_id): function test_pytorch_longtext_throughput_tp4 (line 68) | def test_pytorch_longtext_throughput_tp4(config, run_config, worker_id): function test_pytorch_longtext_throughput_tp8 (line 77) | def test_pytorch_longtext_throughput_tp8(config, run_config, worker_id): function test_pytorch_longtext_throughput_tp16 (line 86) | def test_pytorch_longtext_throughput_tp16(config, run_config, worker_id): FILE: autotest/benchmark/test_mllm_apiserver_performance.py function get_models (line 6) | def get_models(backend, parallel_config): function test_turbomind_mllm_apiserver_tp1 (line 14) | def test_turbomind_mllm_apiserver_tp1(config, run_config, worker_id): function test_turbomind_mllm_apiserver_tp2 (line 23) | def test_turbomind_mllm_apiserver_tp2(config, run_config, worker_id): function test_turbomind_mllm_apiserver_tp4 (line 32) | def test_turbomind_mllm_apiserver_tp4(config, run_config, worker_id): function test_turbomind_mllm_apiserver_tp8 (line 41) | def test_turbomind_mllm_apiserver_tp8(config, run_config, worker_id): function test_pytorch_mllm_apiserver_tp1 (line 50) | def test_pytorch_mllm_apiserver_tp1(config, run_config, worker_id): function test_pytorch_mllm_apiserver_tp2 (line 59) | def test_pytorch_mllm_apiserver_tp2(config, run_config, worker_id): function test_pytorch_mllm_apiserver_tp4 (line 68) | def test_pytorch_mllm_apiserver_tp4(config, run_config, worker_id): function test_pytorch_mllm_apiserver_tp8 (line 77) | def test_pytorch_mllm_apiserver_tp8(config, run_config, worker_id): function test_pytorch_mllm_apiserver_tp16 (line 86) | def test_pytorch_mllm_apiserver_tp16(config, run_config, worker_id): FILE: autotest/benchmark/test_prefixcache_performance.py function get_models (line 6) | def get_models(backend, parallel_config): function test_turbomind_prefix_tp1 (line 14) | def test_turbomind_prefix_tp1(config, run_config, worker_id): function test_turbomind_prefix_tp2 (line 23) | def test_turbomind_prefix_tp2(config, run_config, worker_id): function test_turbomind_prefix_tp4 (line 32) | def test_turbomind_prefix_tp4(config, run_config, worker_id): function test_turbomind_prefix_tp8 (line 41) | def test_turbomind_prefix_tp8(config, run_config, worker_id): function test_pytorch_prefix_tp1 (line 50) | def test_pytorch_prefix_tp1(config, run_config, worker_id): function test_pytorch_prefix_tp2 (line 59) | def test_pytorch_prefix_tp2(config, run_config, worker_id): function test_pytorch_prefix_tp4 (line 68) | def test_pytorch_prefix_tp4(config, run_config, worker_id): function test_pytorch_prefix_tp8 (line 77) | def test_pytorch_prefix_tp8(config, run_config, worker_id): function test_pytorch_prefix_tp16 (line 86) | def test_pytorch_prefix_tp16(config, run_config, worker_id): function test_pytorch_prefix_pr_test_tp1 (line 113) | def test_pytorch_prefix_pr_test_tp1(config, run_config, worker_id): FILE: autotest/benchmark/test_throughput_performance.py function get_models (line 6) | def get_models(backend, parallel_config): function test_turbomind_throughput_tp1 (line 16) | def test_turbomind_throughput_tp1(config, run_config, worker_id): function test_turbomind_throughput_tp2 (line 25) | def test_turbomind_throughput_tp2(config, run_config, worker_id): function test_turbomind_throughput_tp4 (line 34) | def test_turbomind_throughput_tp4(config, run_config, worker_id): function test_turbomind_throughput_tp8 (line 43) | def test_turbomind_throughput_tp8(config, run_config, worker_id): function test_pytorch_throughput_tp1 (line 52) | def test_pytorch_throughput_tp1(config, run_config, worker_id): function test_pytorch_throughput_tp2 (line 61) | def test_pytorch_throughput_tp2(config, run_config, worker_id): function test_pytorch_throughput_tp4 (line 70) | def test_pytorch_throughput_tp4(config, run_config, worker_id): function test_pytorch_throughput_tp8 (line 79) | def test_pytorch_throughput_tp8(config, run_config, worker_id): function test_pytorch_throughput_tp16 (line 88) | def test_pytorch_throughput_tp16(config, run_config, worker_id): function test_throughput_func_tp2 (line 114) | def test_throughput_func_tp2(config, run_config, worker_id): function test_throughput_prtest_tp1 (line 141) | def test_throughput_prtest_tp1(config, run_config, worker_id): FILE: autotest/conftest.py function config (line 18) | def config(): function cli_case_config (line 24) | def cli_case_config(): function common_case_config (line 32) | def common_case_config(): function shared_ray_manager (line 40) | def shared_ray_manager(): function shared_proxy_manager (line 71) | def shared_proxy_manager(): FILE: autotest/evaluate/test_api_evaluate.py function _run_ray_distributed_test (line 13) | def _run_ray_distributed_test( function _run_proxy_distributed_test (line 59) | def _run_proxy_distributed_test(config, function run_eval_test (line 111) | def run_eval_test(config, run_config, worker_id, test_type='infer', eval... function get_models (line 192) | def get_models(backend, parallel_config): function test_turbomind_infer_tp1 (line 201) | def test_turbomind_infer_tp1(config, run_config, worker_id): function test_turbomind_infer_tp2 (line 210) | def test_turbomind_infer_tp2(config, run_config, worker_id): function test_turbomind_infer_tp4 (line 219) | def test_turbomind_infer_tp4(config, run_config, worker_id): function test_turbomind_infer_tp8 (line 228) | def test_turbomind_infer_tp8(config, run_config, worker_id): function test_turbomind_infer_cp2tp8 (line 237) | def test_turbomind_infer_cp2tp8(config, run_config, worker_id): function test_pytorch_restful_tp1 (line 247) | def test_pytorch_restful_tp1(config, run_config, worker_id): function test_pytorch_restful_tp2 (line 257) | def test_pytorch_restful_tp2(config, run_config, worker_id): function test_pytorch_restful_tp4 (line 267) | def test_pytorch_restful_tp4(config, run_config, worker_id): function test_pytorch_restful_tp8 (line 277) | def test_pytorch_restful_tp8(config, run_config, worker_id): function test_pytorch_restful_tp16 (line 287) | def test_pytorch_restful_tp16(config, run_config, worker_id): function test_pytorch_restful_distributed_tp16 (line 296) | def test_pytorch_restful_distributed_tp16(shared_ray_manager, config, ru... function test_pytorch_restful_distributed_dpep8 (line 309) | def test_pytorch_restful_distributed_dpep8(shared_proxy_manager, config,... function test_pytorch_restful_distributed_dpep16 (line 322) | def test_pytorch_restful_distributed_dpep16(shared_proxy_manager, config... function test_turbomind_eval_tp1 (line 335) | def test_turbomind_eval_tp1(config, run_config, worker_id): function test_turbomind_eval_tp2 (line 344) | def test_turbomind_eval_tp2(config, run_config, worker_id): function test_turbomind_eval_tp4 (line 353) | def test_turbomind_eval_tp4(config, run_config, worker_id): function test_turbomind_eval_tp8 (line 362) | def test_turbomind_eval_tp8(config, run_config, worker_id): function test_pytorch_eval_tp1 (line 372) | def test_pytorch_eval_tp1(config, run_config, worker_id): function test_pytorch_eval_tp2 (line 382) | def test_pytorch_eval_tp2(config, run_config, worker_id): function test_pytorch_eval_tp4 (line 392) | def test_pytorch_eval_tp4(config, run_config, worker_id): function test_pytorch_eval_tp8 (line 402) | def test_pytorch_eval_tp8(config, run_config, worker_id): function test_pytorch_eval_tp16 (line 412) | def test_pytorch_eval_tp16(config, run_config, worker_id): function test_pytorch_eval_distributed_tp16 (line 421) | def test_pytorch_eval_distributed_tp16(config, run_config, worker_id): function test_pytorch_eval_distributed_dpep8 (line 430) | def test_pytorch_eval_distributed_dpep8(config, run_config, worker_id): function test_pytorch_eval_distributed_dpep16 (line 439) | def test_pytorch_eval_distributed_dpep16(config, run_config, worker_id): function test_turbomind_eval_cp2tp8 (line 448) | def test_turbomind_eval_cp2tp8(config, run_config, worker_id): FILE: autotest/evaluate/test_mllm_api_evaluate.py function run_eval_test (line 10) | def run_eval_test(config, run_config, worker_id, test_type='infer', eval... function get_models (line 69) | def get_models(backend, parallel_config): function test_turbomind_vl_eval_tp1 (line 85) | def test_turbomind_vl_eval_tp1(config, run_config, worker_id): function test_turbomind_vl_eval_tp2 (line 94) | def test_turbomind_vl_eval_tp2(config, run_config, worker_id): function test_turbomind_vl_eval_tp4 (line 103) | def test_turbomind_vl_eval_tp4(config, run_config, worker_id): function test_turbomind_vl_eval_tp8 (line 112) | def test_turbomind_vl_eval_tp8(config, run_config, worker_id): function test_pytorch_vl_eval_tp1 (line 122) | def test_pytorch_vl_eval_tp1(config, run_config, worker_id): function test_pytorch_vl_eval_tp2 (line 132) | def test_pytorch_vl_eval_tp2(config, run_config, worker_id): function test_pytorch_vl_eval_tp4 (line 142) | def test_pytorch_vl_eval_tp4(config, run_config, worker_id): function test_pytorch_vl_eval_tp8 (line 152) | def test_pytorch_vl_eval_tp8(config, run_config, worker_id): function test_pytorch_vl_eval_tp16 (line 162) | def test_pytorch_vl_eval_tp16(config, run_config, worker_id): function test_turbomind_eval_tp1 (line 171) | def test_turbomind_eval_tp1(config, run_config, worker_id): function test_turbomind_eval_tp2 (line 180) | def test_turbomind_eval_tp2(config, run_config, worker_id): function test_turbomind_eval_tp4 (line 189) | def test_turbomind_eval_tp4(config, run_config, worker_id): function test_turbomind_eval_tp8 (line 198) | def test_turbomind_eval_tp8(config, run_config, worker_id): function test_pytorch_eval_tp1 (line 208) | def test_pytorch_eval_tp1(config, run_config, worker_id): function test_pytorch_eval_tp2 (line 218) | def test_pytorch_eval_tp2(config, run_config, worker_id): function test_pytorch_eval_tp4 (line 228) | def test_pytorch_eval_tp4(config, run_config, worker_id): function test_pytorch_eval_tp8 (line 238) | def test_pytorch_eval_tp8(config, run_config, worker_id): function test_pytorch_eval_tp16 (line 248) | def test_pytorch_eval_tp16(config, run_config, worker_id): FILE: autotest/interface/pipeline/test_pipeline_func.py function init_pipeline (line 15) | def init_pipeline(model_path, backend_config): function run_case_in_spawn (line 21) | def run_case_in_spawn(worker_id, target, args): function run_pipeline_testcase_prompt (line 33) | def run_pipeline_testcase_prompt(config, model, backend, file_name): function run_pipeline_testcase_prompt_stream (line 43) | def run_pipeline_testcase_prompt_stream(config, model, backend, file_name): function run_pipeline_testcase_multi_prompt (line 55) | def run_pipeline_testcase_multi_prompt(config, model, backend, file_name): function run_pipeline_testcase_multi_prompt_stream (line 65) | def run_pipeline_testcase_multi_prompt_stream(config, model, backend, fi... function run_pipeline_testcase_message (line 77) | def run_pipeline_testcase_message(config, model, backend, file_name): function run_pipeline_testcase_message_stream (line 88) | def run_pipeline_testcase_message_stream(config, model, backend, file_na... function run_pipeline_testcase_message_batch (line 101) | def run_pipeline_testcase_message_batch(config, model, backend, file_name): function run_pipeline_testcase_message_batch_stream (line 112) | def run_pipeline_testcase_message_batch_stream(config, model, backend, f... function run_pipeline_testcase_logprobs (line 125) | def run_pipeline_testcase_logprobs(config, model, backend, file_name): function run_pipeline_testcase_logprobs_stream (line 136) | def run_pipeline_testcase_logprobs_stream(config, model, backend, file_n... function run_pipeline_testcase_session_len (line 149) | def run_pipeline_testcase_session_len(config, model, backend, file_name): function run_pipeline_testcase_min_new_tokens (line 163) | def run_pipeline_testcase_min_new_tokens(config, model, backend, file_na... function run_pipeline_testcase_stop_words (line 177) | def run_pipeline_testcase_stop_words(config, model, backend, file_name): function run_pipeline_testcase_bad_words (line 192) | def run_pipeline_testcase_bad_words(config, model, backend, file_name): function run_pipeline_testcase_special_words_false (line 205) | def run_pipeline_testcase_special_words_false(config, model, backend, fi... function run_pipeline_testcase_special_words_true (line 225) | def run_pipeline_testcase_special_words_true(config, model, backend, fil... function run_pipeline_testcase_repetition_penalty (line 245) | def run_pipeline_testcase_repetition_penalty(config, model, backend, fil... function run_pipeline_testcase_repetition_penalty_bigger (line 256) | def run_pipeline_testcase_repetition_penalty_bigger(config, model, backe... function run_pipeline_testcase_min_top_p (line 267) | def run_pipeline_testcase_min_top_p(config, model, backend, file_name): function run_pipeline_testcase_min_top_k (line 278) | def run_pipeline_testcase_min_top_k(config, model, backend, file_name): function run_pipeline_testcase_diff_random_seed (line 291) | def run_pipeline_testcase_diff_random_seed(config, model, backend, file_... function run_pipeline_testcase_same_random_seed (line 304) | def run_pipeline_testcase_same_random_seed(config, model, backend, file_... function run_pipeline_testcase_do_sample_batch (line 317) | def run_pipeline_testcase_do_sample_batch(config, model, backend, file_n... function run_pipeline_testcase_max_new_tokens (line 328) | def run_pipeline_testcase_max_new_tokens(config, model, backend, file_na... function run_pipeline_testcase_ignore_eos (line 342) | def run_pipeline_testcase_ignore_eos(config, model, backend, file_name): function test_return_with_prompt (line 358) | def test_return_with_prompt(config, model, backend, worker_id): function test_return_with_prompt_stream (line 367) | def test_return_with_prompt_stream(config, model, backend, worker_id): function test_return_with_multi_prompt (line 376) | def test_return_with_multi_prompt(config, model, backend, worker_id): function test_return_with_multi_prompt_stream (line 385) | def test_return_with_multi_prompt_stream(config, model, backend, worker_... function test_return_with_message (line 394) | def test_return_with_message(config, model, backend, worker_id): function test_return_with_message_stream (line 402) | def test_return_with_message_stream(config, model, backend, worker_id): function test_return_with_message_batch (line 410) | def test_return_with_message_batch(config, model, backend, worker_id): function test_return_with_message_batch_stream (line 418) | def test_return_with_message_batch_stream(config, model, backend, worker... function test_return_check_logprobs (line 426) | def test_return_check_logprobs(config, model, backend, worker_id): function test_return_check_logprobs_stream (line 434) | def test_return_check_logprobs_stream(config, model, backend, worker_id): function test_backend_config_session_len (line 442) | def test_backend_config_session_len(config, model, backend, worker_id): function test_gen_config_min_new_tokens (line 450) | def test_gen_config_min_new_tokens(config, model, backend, worker_id): function test_gen_config_stop_words (line 458) | def test_gen_config_stop_words(config, model, backend, worker_id): function test_gen_config_bad_words (line 466) | def test_gen_config_bad_words(config, model, backend, worker_id): function test_gen_config_special_words_false (line 474) | def test_gen_config_special_words_false(config, model, backend, worker_id): function test_gen_config_special_words_true (line 482) | def test_gen_config_special_words_true(config, model, backend, worker_id): function test_gen_config_minimum_repetition_penalty (line 490) | def test_gen_config_minimum_repetition_penalty(config, model, backend, w... function test_gen_config_repetition_penalty_bigger_than_1 (line 498) | def test_gen_config_repetition_penalty_bigger_than_1(config, model, back... function test_gen_config_minimun_topp (line 506) | def test_gen_config_minimun_topp(config, model, backend, worker_id): function test_gen_config_minimun_topk (line 514) | def test_gen_config_minimun_topk(config, model, backend, worker_id): function test_gen_config_diff_random_seed (line 522) | def test_gen_config_diff_random_seed(config, model, backend, worker_id): function test_gen_config_same_random_seed (line 530) | def test_gen_config_same_random_seed(config, model, backend, worker_id): function test_gen_config_do_sample_batch (line 538) | def test_gen_config_do_sample_batch(config, model, backend, worker_id): function test_gen_config_max_new_tokens (line 546) | def test_gen_config_max_new_tokens(config, model, backend, worker_id): function test_gen_config_ignore_eos (line 554) | def test_gen_config_ignore_eos(config, model, backend, worker_id): function test_backend_config_input_validation (line 562) | def test_backend_config_input_validation(config, model, backend, worker_... function test_backend_config_validate_turbomind (line 599) | def test_backend_config_validate_turbomind(config, model, backend, worke... function test_backend_config_validate_pytorch (line 637) | def test_backend_config_validate_pytorch(config, model, backend, worker_... function test_backend_config_tp (line 667) | def test_backend_config_tp(config, model, backend, worker_id): FILE: autotest/interface/pipeline/test_pipeline_longtext_func.py function run_case_in_spawn (line 24) | def run_case_in_spawn(target, args): function test_history_issue_tp1 (line 33) | def test_history_issue_tp1(config, model, worker_id): function test_history_issue_tp2 (line 43) | def test_history_issue_tp2(config, model, worker_id): function stream_infer_worker (line 52) | def stream_infer_worker(config, model, tp_num): function test_long_test_passkey_tp1 (line 77) | def test_long_test_passkey_tp1(config, model, backend, worker_id): function test_long_test_passkey_tp2 (line 90) | def test_long_test_passkey_tp2(config, model, backend, worker_id): function test_long_test_passkey_tp8 (line 104) | def test_long_test_passkey_tp8(config, model, backend, worker_id): function passkey_retrival_worker (line 125) | def passkey_retrival_worker(config, model, backend, log_name, tp_num, se... function get_passkey_prompt (line 177) | def get_passkey_prompt(pipe, session_len): FILE: autotest/interface/restful/test_restful_chat_completions_v1.py class TestRestfulInterfaceBase (line 22) | class TestRestfulInterfaceBase: method test_get_model (line 25) | def test_get_model(self, config, backend, model_case): method test_encode_s1 (line 34) | def test_encode_s1(self, backend, model_case): method test_encode (line 54) | def test_encode(self, backend, model_case): class TestRestfulInterfaceChatCompletions (line 78) | class TestRestfulInterfaceChatCompletions: method test_return_info_with_prompt (line 80) | def test_return_info_with_prompt(self, backend, model_case): method test_return_info_with_messegae (line 94) | def test_return_info_with_messegae(self, backend, model_case): method test_return_info_with_prompt_streaming (line 106) | def test_return_info_with_prompt_streaming(self, backend, model_case): method test_return_info_with_messegae_streaming (line 125) | def test_return_info_with_messegae_streaming(self, backend, model_case): method test_single_stopword (line 142) | def test_single_stopword(self, backend, model_case): method test_single_stopword_streaming (line 159) | def test_single_stopword_streaming(self, backend, model_case): method test_array_stopwords (line 181) | def test_array_stopwords(self, backend, model_case): method test_array_stopwords_streaming (line 200) | def test_array_stopwords_streaming(self, backend, model_case): method test_special_words (line 225) | def test_special_words(self, backend, model_case): method test_minimum_repetition_penalty (line 253) | def test_minimum_repetition_penalty(self, backend, model_case): method test_minimum_repetition_penalty_streaming (line 272) | def test_minimum_repetition_penalty_streaming(self, backend, model_case): method test_repetition_penalty_bigger_than_1 (line 297) | def test_repetition_penalty_bigger_than_1(self, backend, model_case): method test_repetition_penalty_bigger_than_1_streaming (line 313) | def test_repetition_penalty_bigger_than_1_streaming(self, backend, mod... method test_minimum_topp (line 334) | def test_minimum_topp(self, backend, model_case): method test_minimum_topp_streaming (line 355) | def test_minimum_topp_streaming(self, backend, model_case): method test_mistake_modelname_return (line 381) | def test_mistake_modelname_return(self, backend, model_case): method test_mistake_modelname_return_streaming (line 396) | def test_mistake_modelname_return_streaming(self, backend, model_case): method test_mutilple_times_response_should_not_same (line 415) | def test_mutilple_times_response_should_not_same(self, backend, model_... method test_mutilple_times_response_should_not_same_streaming (line 434) | def test_mutilple_times_response_should_not_same_streaming(self, backe... method test_longtext_input (line 458) | def test_longtext_input(self, backend, model_case): method test_longtext_input_streaming (line 473) | def test_longtext_input_streaming(self, backend, model_case): method test_ignore_eos (line 492) | def test_ignore_eos(self, backend, model_case): method test_ignore_eos_streaming (line 511) | def test_ignore_eos_streaming(self, backend, model_case): method __test_max_tokens_or_max_completion_tokens (line 536) | def __test_max_tokens_or_max_completion_tokens( method test_max_tokens (line 572) | def test_max_tokens(self, backend, model_case): method test_max_completion_tokens (line 575) | def test_max_completion_tokens(self, backend, model_case): method __test_max_tokens_streaming_or_max_completion_tokens_streaming (line 578) | def __test_max_tokens_streaming_or_max_completion_tokens_streaming( method test_max_tokens_streaming (line 622) | def test_max_tokens_streaming(self, backend, model_case): method test_max_completion_tokens_streaming (line 625) | def test_max_completion_tokens_streaming(self, backend, model_case): method test_logprobs (line 629) | def test_logprobs(self, backend, model_case): method test_logprobs_streaming (line 649) | def test_logprobs_streaming(self, backend, model_case): class TestRestfulOpenAI (line 680) | class TestRestfulOpenAI: method test_return_info (line 683) | def test_return_info(self, backend, model_case): method test_return_info_streaming (line 699) | def test_return_info_streaming(self, backend, model_case): method test_single_stopword (line 720) | def test_single_stopword(self, backend, model_case): method test_single_stopword_streaming (line 739) | def test_single_stopword_streaming(self, backend, model_case): method test_array_stopwords (line 763) | def test_array_stopwords(self, backend, model_case): method test_array_stopwords_streaming (line 785) | def test_array_stopwords_streaming(self, backend, model_case): method test_minimum_topp (line 812) | def test_minimum_topp(self, backend, model_case): method test_minimum_topp_streaming (line 835) | def test_minimum_topp_streaming(self, backend, model_case): method test_mistake_modelname_return (line 863) | def test_mistake_modelname_return(self, backend, model_case): method test_mistake_modelname_return_streaming (line 878) | def test_mistake_modelname_return_streaming(self, backend, model_case): method test_mutilple_times_response_should_not_same (line 894) | def test_mutilple_times_response_should_not_same(self, backend, model_... method test_mutilple_times_response_should_not_same_streaming (line 914) | def test_mutilple_times_response_should_not_same_streaming(self, backe... method test_longtext_input (line 940) | def test_longtext_input(self, backend, model_case): method test_longtext_input_streaming (line 958) | def test_longtext_input_streaming(self, backend, model_case): method test_max_tokens (line 983) | def test_max_tokens(self, backend, model_case): method test_max_tokens_streaming (line 1000) | def test_max_tokens_streaming(self, backend, model_case): method test_logprobs (line 1031) | def test_logprobs(self, backend, model_case): method test_logprobs_streaming (line 1052) | def test_logprobs_streaming(self, backend, model_case): method test_input_validation (line 1083) | def test_input_validation(self, backend, model_case): method test_input_validation_streaming (line 1116) | def test_input_validation_streaming(self, backend, model_case): method test_disable_think (line 1150) | def test_disable_think(self, backend, model_case): method test_disable_think_with_image (line 1183) | def test_disable_think_with_image(self, backend, model_case): FILE: autotest/interface/restful/test_restful_completions_v1.py class TestRestfulInterfaceBase (line 15) | class TestRestfulInterfaceBase: method test_get_model (line 18) | def test_get_model(self, config, backend, model_case): method test_encode (line 24) | def test_encode(self, backend, model_case): method test_return (line 42) | def test_return(self, backend, model_case): method test_return_streaming (line 58) | def test_return_streaming(self, backend, model_case): method test_max_tokens (line 72) | def test_max_tokens(self, backend, model_case): method test_single_stopword (line 85) | def test_single_stopword(self, backend, model_case): method test_array_stopwords (line 96) | def test_array_stopwords(self, backend, model_case): method test_completions_stream (line 109) | def test_completions_stream(self, backend, model_case): method test_completions_stream_stopword (line 127) | def test_completions_stream_stopword(self, backend, model_case): method test_completions_stream_stopwords (line 151) | def test_completions_stream_stopwords(self, backend, model_case): method test_batch_prompt_order (line 177) | def test_batch_prompt_order(self, backend, model_case): FILE: autotest/interface/restful/test_restful_generate.py class TestGenerateComprehensive (line 22) | class TestGenerateComprehensive: method setup_api (line 25) | def setup_api(self, request, config, model_name, backend): method _log_request_response (line 38) | def _log_request_response(self, payload, response_data, stream_raw=None): method _post (line 55) | def _post(self, payload, stream=False): method _validate_generation_response (line 117) | def _validate_generation_response(self, method test_basic_generation (line 235) | def test_basic_generation(self): method test_input_ids_mode (line 294) | def test_input_ids_mode(self, config): method test_conflict_prompt_and_input_ids (line 349) | def test_conflict_prompt_and_input_ids(self): method test_input_ids_with_logprob (line 437) | def test_input_ids_with_logprob(self, config): method test_stop_str_with_include_flag (line 497) | def test_stop_str_with_include_flag(self): method test_streaming_mode (line 542) | def test_streaming_mode(self): method test_streaming_incremental_correctness (line 572) | def test_streaming_incremental_correctness(self): method test_return_logprob (line 625) | def test_return_logprob(self): method test_same_session_id_allowed (line 635) | def test_same_session_id_allowed(self): method test_empty_prompt_rejected (line 658) | def test_empty_prompt_rejected(self): method test_input_ids_rejected (line 673) | def test_input_ids_rejected(self): method test_stress_concurrent_requests (line 706) | def test_stress_concurrent_requests(self): method test_stress_long_prompt_and_generation (line 761) | def test_stress_long_prompt_and_generation(self): method test_stress_streaming_under_load (line 771) | def test_stress_streaming_under_load(self): method test_temperature_parameter (line 824) | def test_temperature_parameter(self): method test_top_p_parameter (line 844) | def test_top_p_parameter(self): method test_top_k_parameter (line 857) | def test_top_k_parameter(self): method test_min_p_parameter (line 870) | def test_min_p_parameter(self): method test_repetition_penalty (line 878) | def test_repetition_penalty(self): method test_ignore_eos_parameter (line 900) | def test_ignore_eos_parameter(self): method test_skip_special_tokens (line 917) | def test_skip_special_tokens(self, config): method test_stop_token_ids (line 941) | def test_stop_token_ids(self): method test_combined_parameters (line 968) | def test_combined_parameters(self): method test_streaming_with_all_parameters (line 984) | def test_streaming_with_all_parameters(self): method test_invalid_temperature_values (line 1008) | def test_invalid_temperature_values(self): method test_invalid_top_p_values (line 1019) | def test_invalid_top_p_values(self): method test_invalid_top_k_values (line 1027) | def test_invalid_top_k_values(self): method test_boundary_max_tokens (line 1035) | def test_boundary_max_tokens(self): method test_parameter_interactions (line 1057) | def test_parameter_interactions(self): method test_session_id_with_all_parameters (line 1074) | def test_session_id_with_all_parameters(self): method test_edge_cases_stop_conditions (line 1105) | def test_edge_cases_stop_conditions(self): method test_spaces_between_special_tokens (line 1134) | def test_spaces_between_special_tokens(self, config): method test_request_returns_experts (line 1160) | def test_request_returns_experts(self): FILE: autotest/toolchain/test_lagent.py function test_repeat (line 8) | def test_repeat(config, model): FILE: autotest/tools/chat/test_command_chat_hf_pytorch.py function test_hf_pytorch_chat_tp1 (line 15) | def test_hf_pytorch_chat_tp1(config, run_config, cli_case_config, worker... function test_hf_pytorch_chat_tp2 (line 23) | def test_hf_pytorch_chat_tp2(config, run_config, cli_case_config, worker... function test_hf_pytorch_chat_tp4 (line 31) | def test_hf_pytorch_chat_tp4(config, run_config, cli_case_config, worker... function test_hf_pytorch_chat_tp8 (line 39) | def test_hf_pytorch_chat_tp8(config, run_config, cli_case_config, worker... function test_hf_pytorch_chat_tp16 (line 47) | def test_hf_pytorch_chat_tp16(config, run_config, cli_case_config, worke... function test_hf_pytorch_base_tp1 (line 55) | def test_hf_pytorch_base_tp1(config, run_config, cli_case_config, worker... function test_hf_pytorch_base_tp2 (line 63) | def test_hf_pytorch_base_tp2(config, run_config, cli_case_config, worker... function test_hf_pytorch_chat_pr_tp2 (line 71) | def test_hf_pytorch_chat_pr_tp2(config, run_config, cli_case_config, wor... function test_hf_pytorch_chat_pr_tp1 (line 80) | def test_hf_pytorch_chat_pr_tp1(config, run_config, cli_case_config, wor... function test_modelscope_pytorch_chat_tp1 (line 88) | def test_modelscope_pytorch_chat_tp1(config, run_config, cli_case_config... function test_pytorch_chat_with_lora_tp1 (line 99) | def test_pytorch_chat_with_lora_tp1(config, run_config, cli_case_config,... function test_pytorch_chat_with_lora_tp2 (line 109) | def test_pytorch_chat_with_lora_tp2(config, run_config, cli_case_config,... FILE: autotest/tools/chat/test_command_chat_hf_turbomind.py function test_hf_turbomind_chat_tp1 (line 15) | def test_hf_turbomind_chat_tp1(config, run_config, cli_case_config, work... function test_hf_turbomind_chat_tp2 (line 22) | def test_hf_turbomind_chat_tp2(config, run_config, cli_case_config, work... function test_hf_turbomind_chat_tp4 (line 29) | def test_hf_turbomind_chat_tp4(config, run_config, cli_case_config, work... function test_hf_turbomind_chat_tp8 (line 36) | def test_hf_turbomind_chat_tp8(config, run_config, cli_case_config, work... function test_hf_turbomind_chat_fallback_backend_tp1 (line 43) | def test_hf_turbomind_chat_fallback_backend_tp1(config, run_config, cli_... function test_hf_turbomind_chat_fallback_backend_tp2 (line 50) | def test_hf_turbomind_chat_fallback_backend_tp2(config, run_config, cli_... function test_hf_turbomind_base_tp1 (line 57) | def test_hf_turbomind_base_tp1(config, run_config, cli_case_config, work... function test_hf_turbomind_base_tp2 (line 64) | def test_hf_turbomind_base_tp2(config, run_config, cli_case_config, work... function test_hf_turbomind_chat_pr_tp2 (line 72) | def test_hf_turbomind_chat_pr_tp2(config, run_config, cli_case_config, w... function test_hf_turbomind_chat_pr_tp1 (line 81) | def test_hf_turbomind_chat_pr_tp1(config, run_config, cli_case_config, w... function test_modelscope_turbomind_chat_tp1 (line 89) | def test_modelscope_turbomind_chat_tp1(config, run_config, cli_case_conf... FILE: autotest/tools/pipeline/llm_case.py function run_pipeline_chat_test (line 13) | def run_pipeline_chat_test(model_path, run_config, cases_path, is_pr_tes... FILE: autotest/tools/pipeline/mllm_case.py function run_pipeline_mllm_test (line 23) | def run_pipeline_mllm_test(model_path, run_config, resource_path, is_pr_... function internvl_vl_testcase (line 125) | def internvl_vl_testcase(pipe, resource_path, lang='en'): function MiniCPM_vl_testcase (line 245) | def MiniCPM_vl_testcase(pipe, resource_path): function Qwen_vl_testcase (line 343) | def Qwen_vl_testcase(pipe, resource_path): FILE: autotest/tools/pipeline/test_pipeline_chat_pytorch_llm.py function test_pipeline_chat_tp1 (line 16) | def test_pipeline_chat_tp1(config, run_config, common_case_config, worke... function test_pipeline_chat_tp2 (line 24) | def test_pipeline_chat_tp2(config, run_config, common_case_config, worke... function test_pipeline_chat_tp4 (line 32) | def test_pipeline_chat_tp4(config, run_config, common_case_config, worke... function test_pipeline_chat_tp8 (line 40) | def test_pipeline_chat_tp8(config, run_config, common_case_config, worke... function test_pipeline_chat_tp16 (line 48) | def test_pipeline_chat_tp16(config, run_config, common_case_config, work... function test_pipeline_chat_pytorch_prefix_cache_tp2 (line 56) | def test_pipeline_chat_pytorch_prefix_cache_tp2(config, run_config, comm... function test_hf_pytorch_chat_pr_tp2 (line 64) | def test_hf_pytorch_chat_pr_tp2(config, run_config, common_case_config, ... function test_hf_pytorch_chat_pr_tp1 (line 73) | def test_hf_pytorch_chat_pr_tp1(config, run_config, common_case_config, ... function test_modelscope_pipeline_chat_tp1 (line 81) | def test_modelscope_pipeline_chat_tp1(config, run_config, common_case_co... function test_pytorch_chat_with_lora_tp1 (line 89) | def test_pytorch_chat_with_lora_tp1(config, run_config, common_case_conf... function test_pytorch_chat_with_lora_tp2 (line 96) | def test_pytorch_chat_with_lora_tp2(config, run_config, common_case_conf... function test_pipeline_chat_speculative_decoding_tp1 (line 105) | def test_pipeline_chat_speculative_decoding_tp1(config, run_config, comm... FILE: autotest/tools/pipeline/test_pipeline_chat_pytorch_mllm.py function get_models (line 8) | def get_models(parallel_config): function test_restful_chat_tp1 (line 15) | def test_restful_chat_tp1(config, run_config, worker_id): function test_restful_chat_tp2 (line 21) | def test_restful_chat_tp2(config, run_config, worker_id): function test_restful_chat_tp4 (line 27) | def test_restful_chat_tp4(config, run_config, worker_id): function test_restful_chat_tp8 (line 33) | def test_restful_chat_tp8(config, run_config, worker_id): function test_restful_chat_tp16 (line 39) | def test_restful_chat_tp16(config, run_config, worker_id): FILE: autotest/tools/pipeline/test_pipeline_chat_turbomind_llm.py function test_pipeline_chat_tp1 (line 15) | def test_pipeline_chat_tp1(config, run_config, common_case_config, worke... function test_pipeline_chat_tp2 (line 22) | def test_pipeline_chat_tp2(config, run_config, common_case_config, worke... function test_pipeline_chat_tp4 (line 29) | def test_pipeline_chat_tp4(config, run_config, common_case_config, worke... function test_pipeline_chat_tp8 (line 36) | def test_pipeline_chat_tp8(config, run_config, common_case_config, worke... function test_pipeline_chat_prefix_cache_tp2 (line 43) | def test_pipeline_chat_prefix_cache_tp2(config, run_config, common_case_... function test_pipeline_chat_fallback_backend_tp1 (line 50) | def test_pipeline_chat_fallback_backend_tp1(config, run_config, common_c... function test_pipeline_chat_fallback_backend_tp2 (line 58) | def test_pipeline_chat_fallback_backend_tp2(config, run_config, common_c... function test_pipeline_chat_pr_tp2 (line 68) | def test_pipeline_chat_pr_tp2(config, run_config, common_case_config, wo... function test_pipeline_chat_pr_tp1 (line 79) | def test_pipeline_chat_pr_tp1(config, run_config, common_case_config, wo... function test_modelscope_restful_chat_tp1 (line 88) | def test_modelscope_restful_chat_tp1(config, run_config, common_case_con... FILE: autotest/tools/pipeline/test_pipeline_chat_turbomind_mllm.py function get_models (line 10) | def get_models(parallel_config): function test_restful_chat_tp1 (line 17) | def test_restful_chat_tp1(config, run_config, worker_id): function test_restful_chat_tp2 (line 23) | def test_restful_chat_tp2(config, run_config, worker_id): function test_restful_chat_tp4 (line 29) | def test_restful_chat_tp4(config, run_config, worker_id): function test_restful_chat_tp8 (line 35) | def test_restful_chat_tp8(config, run_config, worker_id): function test_restful_chat_tp16 (line 41) | def test_restful_chat_tp16(config, run_config, worker_id): function test_restful_chat_fallback_backend_tp1 (line 48) | def test_restful_chat_fallback_backend_tp1(config, run_config, worker_id): function test_pipeline_pr_test (line 56) | def test_pipeline_pr_test(config, run_config, worker_id): function test_pipeline_pr_tp2_test (line 65) | def test_pipeline_pr_tp2_test(config, run_config, worker_id): FILE: autotest/tools/quantization/test_quantization_awq.py function test_quantization_awq (line 13) | def test_quantization_awq(config, model, worker_id): function test_quantization_gptq (line 22) | def test_quantization_gptq(config, model, worker_id): function test_quantization_awq_pr (line 34) | def test_quantization_awq_pr(config, model): function quantization_all (line 39) | def quantization_all(config, quantization_model_name, origin_model_name,... FILE: autotest/tools/quantization/test_quantization_w8a8.py function test_quantization_w8a8 (line 13) | def test_quantization_w8a8(config, model, worker_id): function quantization_w8a8 (line 17) | def quantization_w8a8(config, quantization_model_name, origin_model_name... FILE: autotest/tools/restful/test_restful_chat_hf_pytorch_llm.py function _run_ray_distributed_test (line 16) | def _run_ray_distributed_test( function _run_proxy_distributed_test (line 41) | def _run_proxy_distributed_test( function test_restful_chat_tp1 (line 74) | def test_restful_chat_tp1(config, run_config, common_case_config, worker... function test_restful_chat_tp2 (line 82) | def test_restful_chat_tp2(config, run_config, common_case_config, worker... function test_restful_chat_tp4 (line 90) | def test_restful_chat_tp4(config, run_config, common_case_config, worker... function test_restful_chat_tp8 (line 98) | def test_restful_chat_tp8(config, run_config, common_case_config, worker... function test_restful_chat_tp16 (line 106) | def test_restful_chat_tp16(config, run_config, common_case_config, worke... function test_restful_chat_distributed_tp16 (line 115) | def test_restful_chat_distributed_tp16(shared_ray_manager, config, run_c... function test_restful_chat_distributed_dpep16 (line 127) | def test_restful_chat_distributed_dpep16(shared_proxy_manager, config, r... function test_restful_chat_pytorch_prefix_cache_tp2 (line 138) | def test_restful_chat_pytorch_prefix_cache_tp2(config, run_config, commo... function test_hf_pytorch_chat_pr_tp2 (line 146) | def test_hf_pytorch_chat_pr_tp2(config, run_config, common_case_config, ... function test_hf_pytorch_chat_pr_tp1 (line 155) | def test_hf_pytorch_chat_pr_tp1(config, run_config, common_case_config, ... function test_modelscope_restful_chat_tp1 (line 163) | def test_modelscope_restful_chat_tp1(config, run_config, common_case_con... function test_pytorch_chat_with_lora_tp1 (line 171) | def test_pytorch_chat_with_lora_tp1(config, run_config, common_case_conf... function test_pytorch_chat_with_lora_tp2 (line 178) | def test_pytorch_chat_with_lora_tp2(config, run_config, common_case_conf... function test_restful_chat_reasoning_tp1 (line 188) | def test_restful_chat_reasoning_tp1(config, run_config, worker_id): function test_restful_chat_reasoning_tp2 (line 198) | def test_restful_chat_reasoning_tp2(config, run_config, worker_id): function test_restful_chat_tools_tp1 (line 208) | def test_restful_chat_tools_tp1(config, run_config, worker_id): function test_restful_chat_tools_tp2 (line 218) | def test_restful_chat_tools_tp2(config, run_config, worker_id): function test_restful_chat_tools_tp4 (line 228) | def test_restful_chat_tools_tp4(config, run_config, worker_id): function test_restful_chat_speculative_decoding_tp1 (line 237) | def test_restful_chat_speculative_decoding_tp1(config, run_config, commo... function test_restful_chat_speculative_decoding_tp16 (line 247) | def test_restful_chat_speculative_decoding_tp16(shared_ray_manager, conf... FILE: autotest/tools/restful/test_restful_chat_hf_pytorch_mllm.py function test_restful_chat_tp1 (line 11) | def test_restful_chat_tp1(config, run_config, worker_id): function test_restful_chat_tp2 (line 17) | def test_restful_chat_tp2(config, run_config, worker_id): function test_restful_chat_tp4 (line 23) | def test_restful_chat_tp4(config, run_config, worker_id): function test_restful_chat_tp8 (line 29) | def test_restful_chat_tp8(config, run_config, worker_id): function test_restful_chat_tp16 (line 35) | def test_restful_chat_tp16(config, run_config, worker_id): FILE: autotest/tools/restful/test_restful_chat_hf_turbomind_llm.py function test_restful_chat_tp1 (line 16) | def test_restful_chat_tp1(config, run_config, common_case_config, worker... function test_restful_chat_tp2 (line 23) | def test_restful_chat_tp2(config, run_config, common_case_config, worker... function test_restful_chat_tp4 (line 30) | def test_restful_chat_tp4(config, run_config, common_case_config, worker... function test_restful_chat_tp8 (line 37) | def test_restful_chat_tp8(config, run_config, common_case_config, worker... function test_restful_chat_prefix_cache_tp2 (line 44) | def test_restful_chat_prefix_cache_tp2(config, run_config, common_case_c... function test_restful_chat_fallback_backend_tp1 (line 51) | def test_restful_chat_fallback_backend_tp1(config, run_config, common_ca... function test_restful_chat_fallback_backend_tp2 (line 59) | def test_restful_chat_fallback_backend_tp2(config, run_config, common_ca... function test_restful_chat_pr_tp2 (line 69) | def test_restful_chat_pr_tp2(config, run_config, common_case_config, wor... function test_restful_chat_pr_tp1 (line 80) | def test_restful_chat_pr_tp1(config, run_config, common_case_config, wor... function test_restful_logprobs (line 90) | def test_restful_logprobs(config, run_config, worker_id): function test_modelscope_restful_chat_tp1 (line 98) | def test_modelscope_restful_chat_tp1(config, run_config, common_case_con... function test_restful_chat_reasoning_tp1 (line 109) | def test_restful_chat_reasoning_tp1(config, run_config, worker_id): function test_restful_chat_reasoning_tp2 (line 119) | def test_restful_chat_reasoning_tp2(config, run_config, worker_id): function test_restful_chat_tools_tp1 (line 129) | def test_restful_chat_tools_tp1(config, run_config, worker_id): function test_restful_chat_tools_tp2 (line 139) | def test_restful_chat_tools_tp2(config, run_config, worker_id): function test_restful_chat_tools_tp4 (line 149) | def test_restful_chat_tools_tp4(config, run_config, worker_id): FILE: autotest/tools/restful/test_restful_chat_hf_turbomind_mllm.py function test_restful_chat_tp1 (line 12) | def test_restful_chat_tp1(config, run_config, worker_id): function test_restful_chat_tp2 (line 18) | def test_restful_chat_tp2(config, run_config, worker_id): function test_restful_chat_tp4 (line 24) | def test_restful_chat_tp4(config, run_config, worker_id): function test_restful_chat_tp8 (line 30) | def test_restful_chat_tp8(config, run_config, worker_id): function test_restful_chat_tp16 (line 36) | def test_restful_chat_tp16(config, run_config, worker_id): function test_restful_chat_fallback_backend_tp1 (line 43) | def test_restful_chat_fallback_backend_tp1(config, run_config, worker_id): FILE: autotest/utils/benchmark_utils.py function throughput_test (line 11) | def throughput_test(config, run_config, worker_id: str = '', is_smoke: b... function longtext_throughput_test (line 56) | def longtext_throughput_test(config, run_config, worker_id: str = ''): function restful_test (line 103) | def restful_test(config, run_config, worker_id: str = '', is_smoke: bool... function restful_profile (line 133) | def restful_profile(config, run_config, port, is_smoke: bool = False): function mllm_restful_profile (line 165) | def mllm_restful_profile(config, run_config, port, is_smoke: bool = False): function prefixcache_throughput_test (line 196) | def prefixcache_throughput_test(config, run_config, worker_id: str = '',... function get_max_cache_entry (line 257) | def get_max_cache_entry(model, backend): FILE: autotest/utils/common_utils.py function execute_command_with_logging (line 6) | def execute_command_with_logging(cmd, FILE: autotest/utils/config_utils.py function resolve_extra_params (line 15) | def resolve_extra_params(extra_params: dict[str, Any], model_base_path: ... function get_func_config_list (line 39) | def get_func_config_list(backend: str, function get_cli_common_param (line 134) | def get_cli_common_param(run_config: dict[str, Any]) -> str: function get_cli_str (line 169) | def get_cli_str(config: dict[str, Any]) -> str: function get_parallel_config (line 188) | def get_parallel_config(config: dict[str, Any], model_name: str) -> list... function _extract_models_from_config (line 208) | def _extract_models_from_config(config_value: Any) -> list[str]: function get_model_list (line 220) | def get_model_list(config: dict[str, Any], function _filter_by_test_func_type (line 259) | def _filter_by_test_func_type(config: dict[str, Any], model_list: list[s... function _extend_turbomind_quant_models (line 273) | def _extend_turbomind_quant_models(quant_config: dict[str, Any], base_mo... function _extend_pytorch_quant_models (line 288) | def _extend_pytorch_quant_models(quant_config: dict[str, Any], base_mode... function _is_kvint_model (line 300) | def _is_kvint_model(config: dict[str, Any], backend: str, model: str, qu... function _base_model_name (line 310) | def _base_model_name(model: str) -> str: function get_quantization_model_list (line 316) | def get_quantization_model_list(type: str) -> list[str]: function get_config (line 348) | def get_config() -> dict[str, Any]: function get_cuda_prefix_by_workerid (line 378) | def get_cuda_prefix_by_workerid(worker_id: str | None, parallel_config: ... function get_cuda_id_by_workerid (line 395) | def get_cuda_id_by_workerid(worker_id: str | None, tp_num: int = 1) -> s... function get_workerid (line 406) | def get_workerid(worker_id: str | None) -> int: function is_quantization_model (line 415) | def is_quantization_model(model: str) -> bool: function _get_communicator_list (line 421) | def _get_communicator_list(config: dict[str, Any], function set_device_env_variable (line 439) | def set_device_env_variable(worker_id: str | None, parallel_config: dict... function unset_device_env_variable (line 460) | def unset_device_env_variable(): function is_model_in_list (line 470) | def is_model_in_list(config: dict[str, Any], parallel_config: dict[str, ... function get_case_str_by_config (line 476) | def get_case_str_by_config(run_config: dict[str, Any], is_simple: bool =... function parse_config_by_case (line 501) | def parse_config_by_case(case_str: str) -> dict[str, Any]: function test_config (line 531) | def test_config(): function test_get_case_str_by_config (line 574) | def test_get_case_str_by_config(): function test_cli_common_param (line 596) | def test_cli_common_param(): function test_return_info_turbomind (line 637) | def test_return_info_turbomind(): function test_return_info_pytorch (line 741) | def test_return_info_pytorch(): function test_run_config (line 845) | def test_run_config(): function test_get_parallel_config (line 880) | def test_get_parallel_config(): FILE: autotest/utils/evaluate_utils.py function write_to_summary (line 16) | def write_to_summary(case_name, result, msg, metrics, result_dir): function llm_summary (line 67) | def llm_summary(case_name, result, msg, work_dir, result_dir=None): function mllm_summary (line 107) | def mllm_summary(case_name, function eval_test (line 146) | def eval_test(model_path, eval_path, case_name, port=DEFAULT_PORT, test_... function mllm_eval_test (line 268) | def mllm_eval_test(model_path, eval_path, case_name, port=DEFAULT_PORT, ... FILE: autotest/utils/get_run_config.py function get_model_name (line 5) | def get_model_name(model): function _simple_model_name (line 51) | def _simple_model_name(model): FILE: autotest/utils/mp_log_utils.py function write_log (line 7) | def write_log(config, result, msg, is_new: bool = True, case_path_tag: s... function assert_log (line 22) | def assert_log(config, case_path_tag: str = 'default'): FILE: autotest/utils/pipeline_chat.py function run_pipeline_llm_test (line 13) | def run_pipeline_llm_test(config, run_config, common_case_config, worker... function run_pipeline_mllm_test (line 73) | def run_pipeline_mllm_test(config, run_config, worker_id: str = '', is_s... function get_response_from_output (line 165) | def get_response_from_output(output_text, case): function get_response_from_output_by_prompt (line 169) | def get_response_from_output_by_prompt(output_text, case, prompt): function assert_pipeline_single_return (line 178) | def assert_pipeline_single_return(output, logprobs_num: int = 0): function assert_pipeline_batch_return (line 186) | def assert_pipeline_batch_return(output, size: int = 1): function assert_pipeline_single_stream_return (line 196) | def assert_pipeline_single_stream_return(output, logprobs_num: int = 0): function assert_pipeline_batch_stream_return (line 205) | def assert_pipeline_batch_stream_return(output, size: int = 1): function assert_pipeline_single_element (line 214) | def assert_pipeline_single_element(output, is_stream: bool = False, is_l... function internvl_vl_testcase (line 246) | def internvl_vl_testcase(output_text, file, lang: str = 'en'): function MiniCPM_vl_testcase (line 288) | def MiniCPM_vl_testcase(output_text, file): function Qwen_vl_testcase (line 315) | def Qwen_vl_testcase(output_text, file): function save_pipeline_common_log (line 342) | def save_pipeline_common_log(config, log_name, result, content, msg: str... function assert_pipeline_common_log (line 351) | def assert_pipeline_common_log(config, log_name): FILE: autotest/utils/proxy_distributed_utils.py function is_port_open (line 18) | def is_port_open(host: str, port: int, timeout: float = 1.0) -> bool: function check_nodes_status (line 29) | def check_nodes_status(host: str, proxy_port: int, model_name: str, expe... function wait_for_model_service_ready (line 79) | def wait_for_model_service_ready(host: str, function proxy_worker_node_wait (line 147) | def proxy_worker_node_wait(manager, timeout_minutes: int = 120): class ProxyDistributedManager (line 183) | class ProxyDistributedManager: method __init__ (line 185) | def __init__(self): method start (line 193) | def start(self): method cleanup (line 206) | def cleanup(self): class ApiServerPerTest (line 216) | class ApiServerPerTest: method __init__ (line 218) | def __init__(self, proxy_manager: ProxyDistributedManager, config: dic... method start (line 236) | def start(self): method wait_until_ready (line 269) | def wait_until_ready(self): method cleanup (line 280) | def cleanup(self): FILE: autotest/utils/quantization_utils.py function quantization (line 6) | def quantization(config, FILE: autotest/utils/ray_distributed_utils.py function wait_for_model_service_ready (line 20) | def wait_for_model_service_ready( function verify_service_functionality (line 72) | def verify_service_functionality(host: str, api_port: int, model_name: s... class RayLMDeployManager (line 102) | class RayLMDeployManager: method __init__ (line 104) | def __init__( method start_ray_cluster (line 137) | def start_ray_cluster(self): method start_lmdeploy_api_server (line 153) | def start_lmdeploy_api_server(self, config: dict[str, Any], run_config... method cleanup (line 219) | def cleanup(self, force: bool = True): method get_cluster_info (line 255) | def get_cluster_info(self) -> dict[str, Any]: method __enter__ (line 266) | def __enter__(self): method __exit__ (line 269) | def __exit__(self, exc_type, exc_val, exc_tb): function ray_worker_node_wait (line 273) | def ray_worker_node_wait(manager: RayLMDeployManager, timeout_minutes: i... FILE: autotest/utils/restful_return_check.py function assert_chat_completions_batch_return (line 4) | def assert_chat_completions_batch_return(output, model_name, check_logpr... function assert_completions_batch_return (line 22) | def assert_completions_batch_return(output, model_name, check_logprobs: ... function assert_usage (line 39) | def assert_usage(usage): function assert_logprobs (line 46) | def assert_logprobs(logprobs, logprobs_num): function assert_logprob_element (line 55) | def assert_logprob_element(logprob): function assert_chat_completions_stream_return (line 61) | def assert_chat_completions_stream_return(output, function assert_completions_stream_return (line 89) | def assert_completions_stream_return(output, function has_repeated_fragment (line 117) | def has_repeated_fragment(text, repeat_count=5): FILE: autotest/utils/rule_condition_assert.py function assert_result (line 1) | def assert_result(input, rule_condition, model_name: str = None): FILE: autotest/utils/run_client_chat.py function run_tests (line 12) | def run_tests(config, usercase, cli_case_config, run_config, worker_id): function hf_command_line_test (line 23) | def hf_command_line_test(config, case, case_info, run_config, cuda_prefi... function command_test (line 46) | def command_test(config, cmd, run_config, case_info, need_extract_output): function parse_dialogue (line 117) | def parse_dialogue(inputs: str): function extract_output (line 126) | def extract_output(output: str, model: str): FILE: autotest/utils/run_restful_chat.py function start_openai_service (line 22) | def start_openai_service(config, run_config, worker_id, timeout: int = 1... function stop_restful_api (line 96) | def stop_restful_api(pid, startRes): function terminate_restful_api (line 104) | def terminate_restful_api(worker_id): function run_all_step (line 119) | def run_all_step(log_path, case_name, cases_info, port: int = DEFAULT_PO... function open_chat_test (line 137) | def open_chat_test(log_path, case_name, case_info, url): function health_check (line 194) | def health_check(url, model_name): function get_model (line 210) | def get_model(url): function _run_logprobs_test (line 220) | def _run_logprobs_test(port: int = DEFAULT_PORT): function run_vl_testcase (line 244) | def run_vl_testcase(log_path, resource_path, port: int = DEFAULT_PORT): function _run_reasoning_case (line 297) | def _run_reasoning_case(log_path, port: int = DEFAULT_PORT): function test_internlm_multiple_round_prompt (line 342) | def test_internlm_multiple_round_prompt(client, model): function test_qwen_multiple_round_prompt (line 443) | def test_qwen_multiple_round_prompt(client, model): function _run_tools_case (line 588) | def _run_tools_case(log_path, port: int = DEFAULT_PORT): function proxy_health_check (line 691) | def proxy_health_check(url): function start_proxy_server (line 704) | def start_proxy_server(log_path, port, case_name: str = 'default'): function run_llm_test (line 770) | def run_llm_test(config, run_config, common_case_config, worker_id): function run_mllm_test (line 786) | def run_mllm_test(config, run_config, worker_id): function run_reasoning_case (line 800) | def run_reasoning_case(config, run_config, worker_id): function run_tools_case (line 812) | def run_tools_case(config, run_config, worker_id): function run_logprob_test (line 824) | def run_logprob_test(config, run_config, worker_id): FILE: autotest/utils/toolkit.py function parse_sse_stream (line 6) | def parse_sse_stream(content: str) -> list[str]: function _load_tokenizer_cached (line 25) | def _load_tokenizer_cached(model_path: str): function encode_text (line 33) | def encode_text(model_path: str, text: str) -> list[int]: FILE: benchmark/benchmark_decode.py function benchmark (line 13) | def benchmark(model_path, share_gpt_path, downsample=100, accel=None, sa... FILE: benchmark/benchmark_pipeline.py function get_cmd (line 9) | def get_cmd(model_path, backend, engine_config, data_config): function benchmark (line 36) | def benchmark(model_path, backend, engine_config, data_config): function main (line 63) | def main(model_path=None, backend=None, config_path=None): FILE: benchmark/benchmark_serving.py function get_launching_server_cmd (line 10) | def get_launching_server_cmd(model_path, backend, server_config): function get_output_file (line 31) | def get_output_file(model_path, backend, server_config): function get_server_ip_port (line 58) | def get_server_ip_port(backend: str, server_config: Dict) -> Tuple[str, ... function wait_server_ready (line 78) | def wait_server_ready(server_ip: str, server_port: int) -> bool: function get_client_cmd (line 93) | def get_client_cmd(backend: str, server_ip: str, server_port: int, clien... function benchmark (line 115) | def benchmark(model_path: str, backend: str, server_config: Dict, data_c... function validate_config (line 169) | def validate_config(config: Dict) -> None: function main (line 190) | def main(backend: str, config_path: str, model_path: Optional[str] = None): FILE: benchmark/benchmark_throughput.py function get_cmd (line 9) | def get_cmd(model_path, backend, engine_config, data_config): function benchmark (line 36) | def benchmark(model_path, backend, engine_config, data_config): function main (line 63) | def main(model_path=None, backend=None, config_path=None): FILE: benchmark/profile_pipeline_api.py function sample_sharegpt_requests (line 20) | def sample_sharegpt_requests( function sample_random_requests (line 66) | def sample_random_requests( class Engine (line 132) | class Engine: method __init__ (line 134) | def __init__(self, model_path: str, engine_config, csv: str): method process_request (line 140) | def process_request(self, requests, profiler: Profiler, temperature, t... function parse_args (line 199) | def parse_args(): function main (line 284) | def main(): FILE: benchmark/profile_restful_api.py class RequestFuncInput (line 55) | class RequestFuncInput: class RequestFuncOutput (line 66) | class RequestFuncOutput: function remove_prefix (line 77) | def remove_prefix(text: str, prefix: str) -> str: function async_request_trt_llm (line 83) | async def async_request_trt_llm( function async_request_openai_completions (line 153) | async def async_request_openai_completions( function async_request_openai_chat_completions (line 231) | async def async_request_openai_chat_completions( function async_request_sglang_generate (line 339) | async def async_request_sglang_generate( function async_request_gserver (line 416) | async def async_request_gserver( function get_model (line 423) | def get_model(pretrained_model_name_or_path: str) -> str: function get_tokenizer (line 438) | def get_tokenizer(pretrained_model_name_or_path: str, ) -> Union[PreTrai... function get_processor (line 449) | def get_processor(pretrained_model_name_or_path: str, ) -> Union[PreTrai... class BenchmarkMetrics (line 476) | class BenchmarkMetrics: function download_and_cache_file (line 506) | def download_and_cache_file(url: str, filename: Optional[str] = None): class DatasetRow (line 541) | class DatasetRow: method __post_init__ (line 549) | def __post_init__(self): function sample_sharegpt_requests (line 556) | def sample_sharegpt_requests(dataset_path: str, function compute_random_lens (line 609) | def compute_random_lens(full_len: int, range_ratio: float, num: int): function sample_random_requests (line 617) | def sample_random_requests( function parse_image_resolution (line 686) | def parse_image_resolution(image_resolution: str) -> Tuple[int, int]: function gen_mm_prompt (line 714) | def gen_mm_prompt(tokenizer, image_pad_id, token_num): function create_mm_data_row (line 724) | def create_mm_data_row(text_prompt, images: list, images_base64, output_... function sample_image_requests (line 794) | def sample_image_requests( function get_request (line 887) | async def get_request( function calculate_metrics (line 905) | def calculate_metrics( function benchmark (line 980) | async def benchmark( function parse_request_rate_range (line 1161) | def parse_request_rate_range(request_rate_range): function check_chat_template (line 1169) | def check_chat_template(model_path): function run_benchmark (line 1178) | def run_benchmark(args_: argparse.Namespace): function set_ulimit (line 1330) | def set_ulimit(target_soft_limit=65535): FILE: benchmark/profile_throughput.py function sample_sharegpt_requests (line 24) | def sample_sharegpt_requests( function sample_random_requests (line 69) | def sample_random_requests( class Engine (line 135) | class Engine: method __init__ (line 137) | def __init__(self, model_path: str, engine_config: Union[PytorchEngine... method _inference (line 151) | async def _inference(self, req_queue: Queue, session_id: int, temperat... method process_request (line 199) | def process_request(self, requests, profiler: Profiler, concurrency, t... function parse_args (line 237) | def parse_args(): function main (line 337) | def main(): FILE: docs/en/conf.py function metrics (line 62) | def metrics(): FILE: docs/zh_cn/conf.py function metrics (line 62) | def metrics(): FILE: eval/eval.py class ProcessManager (line 9) | class ProcessManager: method __init__ (line 12) | def __init__(self): method __enter__ (line 16) | def __enter__(self): method __exit__ (line 27) | def __exit__(self, exc_type, exc_val, exc_tb): method _signal_handler (line 33) | def _signal_handler(self, sig, frame): method start_process (line 40) | def start_process(self, cmd): method cleanup (line 44) | def cleanup(self): function read_config (line 58) | def read_config(): function update_datasets (line 80) | def update_datasets(config, datasets): function get_model_name_from_server (line 118) | def get_model_name_from_server(server: str, tag: str) -> str: function save_config (line 128) | def save_config(work_dir: str, config: str): function perform_evaluation (line 144) | def perform_evaluation(config, api_server, judger_server, mode, work_dir... function main (line 195) | def main(): FILE: examples/lite/qwen3_30b_a3b_awq.py function parse_args (line 9) | def parse_args(): function main (line 25) | def main(): FILE: examples/lite/qwen3_30b_a3b_gptq.py function parse_args (line 9) | def parse_args(): function main (line 25) | def main(): FILE: lmdeploy/api.py function pipeline (line 15) | def pipeline(model_path: str, function serve (line 78) | def serve(model_path: str, function client (line 101) | def client(api_server_url: str = 'http://0.0.0.0:23333', api_key: str | ... FILE: lmdeploy/archs.py function autoget_backend (line 13) | def autoget_backend(model_path: str) -> Literal['turbomind', 'pytorch']: function autoget_backend_config (line 58) | def autoget_backend_config( function check_vl_llm (line 96) | def check_vl_llm(backend: str, config: dict) -> bool: function get_task (line 131) | def get_task(backend: str, model_path: str): function get_model_arch (line 147) | def get_model_arch(model_path: str): function search_nested_config (line 176) | def search_nested_config(config, key): FILE: lmdeploy/cli/chat.py function input_prompt (line 10) | def input_prompt(): function build_pipe (line 17) | def build_pipe(model_path, backend, **kwargs): function build_gen_config (line 55) | def build_gen_config(**kwargs): function get_adapter_name (line 63) | def get_adapter_name(adapters=None, **kwargs): function main (line 71) | def main(model_path, backend, **kwargs): FILE: lmdeploy/cli/cli.py class CLI (line 10) | class CLI(object): method add_parser_chat (line 18) | def add_parser_chat(): method add_parser_checkenv (line 78) | def add_parser_checkenv(): method check_env (line 93) | def check_env(args): method chat (line 157) | def chat(args): method add_parsers (line 169) | def add_parsers(): FILE: lmdeploy/cli/entrypoint.py function run (line 10) | def run(): FILE: lmdeploy/cli/lite.py class SubCliLite (line 6) | class SubCliLite(object): method add_parser_auto_awq (line 18) | def add_parser_auto_awq(): method add_parser_auto_gptq (line 44) | def add_parser_auto_gptq(): method add_parser_calibrate (line 66) | def add_parser_calibrate(): method add_parser_smooth_quant (line 83) | def add_parser_smooth_quant(): method auto_awq (line 107) | def auto_awq(args): method auto_gptq (line 114) | def auto_gptq(args): method calibrate (line 121) | def calibrate(args): method smooth_quant (line 128) | def smooth_quant(args): method add_parsers (line 135) | def add_parsers(): FILE: lmdeploy/cli/serve.py class SubCliServe (line 10) | class SubCliServe: method add_parser_api_server (line 22) | def add_parser_api_server(): method add_parser_proxy (line 161) | def add_parser_proxy(): method api_server (line 201) | def api_server(args): method proxy (line 337) | def proxy(args): method add_parsers (line 344) | def add_parsers(): FILE: lmdeploy/cli/utils.py class DefaultsAndTypesHelpFormatter (line 15) | class DefaultsAndTypesHelpFormatter(argparse.HelpFormatter): method _get_help_string (line 18) | def _get_help_string(self, action): function convert_args (line 35) | def convert_args(args): function get_lora_adapters (line 42) | def get_lora_adapters(adapters: List[str]): function get_chat_template (line 71) | def get_chat_template(chat_template: str, model_path: str = None): function get_speculative_config (line 102) | def get_speculative_config(args): class ArgumentHelper (line 115) | class ArgumentHelper: method model_name (line 119) | def model_name(parser): method dtype (line 130) | def dtype(parser, default: str = 'auto'): method quant_dtype (line 142) | def quant_dtype(parser, default: str = 'int8'): method model_format (line 151) | def model_format(parser, default: str = None): method revision (line 161) | def revision(parser, default: str = None): method download_dir (line 170) | def download_dir(parser, default: str = None): method tp (line 178) | def tp(parser): method dp (line 187) | def dp(parser): method ep (line 196) | def ep(parser): method cp (line 205) | def cp(parser): method dp_rank (line 215) | def dp_rank(parser): method node_rank (line 224) | def node_rank(parser): method num_nodes (line 230) | def num_nodes(parser): method dist_init_addr (line 236) | def dist_init_addr(parser): method session_id (line 242) | def session_id(parser): method session_len (line 248) | def session_len(parser, default: int = None): method max_batch_size (line 255) | def max_batch_size(parser): method quant_policy (line 265) | def quant_policy(parser, default: int = 0): method rope_scaling_factor (line 275) | def rope_scaling_factor(parser): method hf_overrides (line 281) | def hf_overrides(parser): method use_logn_attn (line 289) | def use_logn_attn(parser): method block_size (line 298) | def block_size(parser): method top_p (line 304) | def top_p(parser): method top_k (line 316) | def top_k(parser): method temperature (line 327) | def temperature(parser, default: float = 0.8): method repetition_penalty (line 331) | def repetition_penalty(parser): method log_level (line 340) | def log_level(parser): method api_keys (line 351) | def api_keys(parser): method ssl (line 361) | def ssl(parser): method backend (line 372) | def backend(parser): method stream_output (line 382) | def stream_output(parser): method calib_dataset (line 388) | def calib_dataset(parser): method calib_samples (line 399) | def calib_samples(parser): method calib_seqlen (line 408) | def calib_seqlen(parser): method calib_batchsize (line 414) | def calib_batchsize(parser): method calib_search_scale (line 426) | def calib_search_scale(parser): method device (line 438) | def device(parser, default: str = 'cuda', choices: List[str] = ['cuda'... method chat_template (line 448) | def chat_template(parser): method reasoning_parser (line 461) | def reasoning_parser(parser): method tool_call_parser (line 472) | def tool_call_parser(parser): method allow_terminate_by_client (line 483) | def allow_terminate_by_client(parser): method enable_abort_handling (line 492) | def enable_abort_handling(parser): method cache_max_entry_count (line 502) | def cache_max_entry_count(parser): method adapters (line 512) | def adapters(parser): method work_dir (line 525) | def work_dir(parser): method cache_block_seq_len (line 534) | def cache_block_seq_len(parser): method enable_prefix_caching (line 548) | def enable_prefix_caching(parser): method num_tokens_per_iter (line 557) | def num_tokens_per_iter(parser): method max_prefill_iters (line 564) | def max_prefill_iters(parser): method async_ (line 571) | def async_(parser): method max_prefill_token_num (line 581) | def max_prefill_token_num(parser): method vision_max_batch_size (line 588) | def vision_max_batch_size(parser): method max_log_len (line 592) | def max_log_len(parser): method disable_fastapi_docs (line 600) | def disable_fastapi_docs(parser): method eager_mode (line 608) | def eager_mode(parser): method communicator (line 618) | def communicator(parser): method enable_microbatch (line 627) | def enable_microbatch(parser): method enable_eplb (line 635) | def enable_eplb(parser): method disable_metrics (line 641) | def disable_metrics(parser): method role (line 650) | def role(parser): method migration_backend (line 660) | def migration_backend(parser): method disable_vision_encoder (line 668) | def disable_vision_encoder(parser): method logprobs_mode (line 676) | def logprobs_mode(parser): method dllm_block_length (line 685) | def dllm_block_length(parser): method dllm_unmasking_strategy (line 690) | def dllm_unmasking_strategy(parser): method dllm_denoising_steps (line 699) | def dllm_denoising_steps(parser): method dllm_confidence_threshold (line 707) | def dllm_confidence_threshold(parser): method enable_return_routed_experts (line 715) | def enable_return_routed_experts(parser): method add_spec_group (line 724) | def add_spec_group(parser): method distributed_executor_backend (line 745) | def distributed_executor_backend(parser): class FlexibleArgumentParser (line 755) | class FlexibleArgumentParser(argparse.ArgumentParser): method parse_args (line 758) | def parse_args(self, args=None, namespace=None): FILE: lmdeploy/lite/apis/auto_awq.py function save_vl_model (line 18) | def save_vl_model(vl_model, model_path, dst_path): function auto_awq (line 41) | def auto_awq(model: str, FILE: lmdeploy/lite/apis/calibrate.py function _prepare_for_calibrate (line 78) | def _prepare_for_calibrate(model: nn.Module, function make_compatible_internvl_config (line 149) | def make_compatible_internvl_config(model_path): function update_moe_mapping (line 166) | def update_moe_mapping(model, model_type): function calibrate (line 198) | def calibrate(model: str, FILE: lmdeploy/lite/apis/get_small_sharded_hf.py function parse_args (line 12) | def parse_args(): function main (line 20) | def main(): FILE: lmdeploy/lite/apis/gptq.py function auto_gptq (line 11) | def auto_gptq(model: str, FILE: lmdeploy/lite/apis/smooth_quant.py function smooth_quant (line 17) | def smooth_quant(model: str, FILE: lmdeploy/lite/modeling/internlm2_gptq.py class InternLM2GPTQForCausalLM (line 5) | class InternLM2GPTQForCausalLM(BaseGPTQForCausalLM): FILE: lmdeploy/lite/modeling/internlm3_gptq.py class InternLM3GPTQForCausalLM (line 5) | class InternLM3GPTQForCausalLM(BaseGPTQForCausalLM): FILE: lmdeploy/lite/quantization/activation/observer.py class KVCacheObserver (line 8) | class KVCacheObserver(GlobalAvailMixin): method __init__ (line 12) | def __init__(self, num_head: int, head_dim: int) -> None: method observe (line 26) | def observe(self, x: torch.Tensor) -> None: class ActivationObserver (line 53) | class ActivationObserver(GlobalAvailMixin): method __init__ (line 61) | def __init__(self, dim: int) -> None: method disable (line 79) | def disable(cls): method enable (line 84) | def enable(cls): method observe (line 89) | def observe(self, x: torch.Tensor, save_input: bool = False) -> None: method save_ratio (line 127) | def save_ratio(self, ratio: float) -> None: FILE: lmdeploy/lite/quantization/awq.py function skipped_module (line 128) | def skipped_module(name: str): function get_weight_scale (line 137) | def get_weight_scale(weight, q_group_size=-1): function smooth_ln_fcs (line 153) | def smooth_ln_fcs(ln: torch.nn.Module, function smooth_fc_fcs (line 206) | def smooth_fc_fcs(pre_fc: torch.nn.Module, function check_awq_supported (line 269) | def check_awq_supported(layer_type): function quant_weights (line 296) | def quant_weights(model, fcs, bits, symmetry, group_size=-1, device='cud... function smooth_layers (line 323) | def smooth_layers(layers, fc2fcs, norm2fcs, a_scales, group_size=-1, dev... function pseudo_quantize_tensor (line 351) | def pseudo_quantize_tensor(w, w_bit=8, w_group_size=-1, return_scale_zer... function awq_layers (line 380) | def awq_layers(layers, fc2fcs, norm2fcs, a_scales, a_ratios=None, group_... FILE: lmdeploy/lite/quantization/calibration.py class CalibrationContext (line 16) | class CalibrationContext(): method __init__ (line 30) | def __init__(self, method _guess_num_heads (line 81) | def _guess_num_heads(self, model): method _init_input_observers (line 92) | def _init_input_observers(self, name2mod): method _init_output_observers (line 98) | def _init_output_observers(self, name2mod): method _insert_input_observers (line 104) | def _insert_input_observers(self): method _insert_output_observers (line 121) | def _insert_output_observers(self): method _wrap_decoder_layers (line 138) | def _wrap_decoder_layers(self): method collect_inputs_stats (line 168) | def collect_inputs_stats(self): method collect_outputs_stats (line 183) | def collect_outputs_stats(self): method export (line 199) | def export(self, out_dir): method calibrate (line 216) | def calibrate(self, data): method __enter__ (line 227) | def __enter__(self): method __exit__ (line 241) | def __exit__(self, exc_type, exc_value, traceback): function auto_scale_block (line 253) | def auto_scale_block(module, module_kwargs, w_bit, w_group_size, input_f... class CalibrationContextV2 (line 337) | class CalibrationContextV2(CalibrationContext): method __init__ (line 339) | def __init__(self, method _insert_input_observers (line 355) | def _insert_input_observers(self): method export (line 372) | def export(self, out_dir): method _wrap_decoder_layers_for_search (line 399) | def _wrap_decoder_layers_for_search(self): method __enter__ (line 441) | def __enter__(self): FILE: lmdeploy/lite/quantization/modules/linear.py class WeightOnlyQLinear (line 15) | class WeightOnlyQLinear(nn.Module): method __init__ (line 28) | def __init__( method from_linear (line 74) | def from_linear(cls: Type['WeightOnlyQLinear'], method forward (line 141) | def forward(self, x): FILE: lmdeploy/lite/quantization/weight/quant_utils.py function _aligned_size (line 7) | def _aligned_size(a, b): function fast_log2_ceil_torch (line 11) | def fast_log2_ceil_torch(x: torch.Tensor) -> torch.Tensor: function fast_pow2_torch (line 21) | def fast_pow2_torch(x: torch.Tensor) -> torch.Tensor: function fast_round_scale_torch (line 26) | def fast_round_scale_torch(amax: torch.Tensor, fp8_max: torch.Tensor) ->... function _get_quant_scaling (line 30) | def _get_quant_scaling(weight: torch.Tensor, function quant_blocked_fp8 (line 47) | def quant_blocked_fp8(weight: torch.Tensor, FILE: lmdeploy/lite/quantization/weight/quantizer.py class WeightQuantizer (line 13) | class WeightQuantizer(GlobalAvailMixin): method __init__ (line 59) | def __init__(self, bits: int, symmetry: bool, granularity: str, group_... method calculate_qparams (line 81) | def calculate_qparams(self, weight: torch.Tensor) -> QParams: method quant (line 98) | def quant(self, weight: torch.Tensor, qparams: Optional[QParams] = Non... FILE: lmdeploy/lite/utils/batch_split.py function split_decoder_layer_inputs (line 7) | def split_decoder_layer_inputs(batch_size, *args: Union[torch.Tensor, Any], function concat_decoder_layer_outputs (line 61) | def concat_decoder_layer_outputs(batch_outputs: List[Any]) -> Any: FILE: lmdeploy/lite/utils/cal_qparams.py class QParams (line 7) | class QParams(NamedTuple): function precise_round (line 15) | def precise_round(x): function cal_qparams_per_channel_absmax (line 20) | def cal_qparams_per_channel_absmax(w: torch.Tensor, n_bits: int, return_... function cal_qparams_per_channel_minmax (line 36) | def cal_qparams_per_channel_minmax(w: torch.Tensor, n_bits: int, return_... function cal_qparams_per_group_absmax (line 58) | def cal_qparams_per_group_absmax(w: torch.Tensor, n_bits: int, group_siz... function cal_qparams_per_group_minmax (line 79) | def cal_qparams_per_group_minmax(w: torch.Tensor, n_bits: int, group_siz... function cal_qparams_per_tensor_minmax (line 105) | def cal_qparams_per_tensor_minmax(w: torch.Tensor, n_bits: int, return_s... function cal_qparams_per_tensor_absmax (line 125) | def cal_qparams_per_tensor_absmax(w: torch.Tensor, n_bits: int, return_s... FILE: lmdeploy/lite/utils/calib_dataloader.py function set_seed (line 8) | def set_seed(seed): function process_dataset (line 14) | def process_dataset(ds, tokenizer, max_seq_length): function get_wikitext2 (line 102) | def get_wikitext2(dataset, tokenizer, nsamples, seed, seqlen): function get_c4 (line 128) | def get_c4(dataset, tokenizer, nsamples, seed, seqlen): function get_pileval (line 158) | def get_pileval(dataset, tokenizer, nsamples, seed, seqlen=512): function get_gsm8k (line 211) | def get_gsm8k(dataset, tokenizer, nsamples, seed, seqlen): function get_neuralmagic_calibration (line 250) | def get_neuralmagic_calibration(dataset, tokenizer, nsamples, seed, seql... function get_open_platypus (line 289) | def get_open_platypus(dataset, tokenizer, nsamples, seed, seqlen): function get_openwebtext (line 328) | def get_openwebtext(dataset, tokenizer, nsamples, seed, seqlen): function get_calib_loaders (line 362) | def get_calib_loaders(name, tokenizer, nsamples=128, seed=0, seqlen=2048): FILE: lmdeploy/lite/utils/collect.py function collect_target_modules (line 7) | def collect_target_modules(model: nn.Module, function collect_target_weights (line 41) | def collect_target_weights(model: nn.Module, target: Union[str, type], s... function bimap_name_mod (line 64) | def bimap_name_mod(name2mod_mappings: List[Dict[str, nn.Module]]) -> Tup... FILE: lmdeploy/lite/utils/global_avail.py class GlobalAvailMixin (line 7) | class GlobalAvailMixin: method global_available (line 12) | def global_available(self, key: Union[str, nn.Module] = 'default', gro... method _save_instance (line 24) | def _save_instance(cls, method find (line 44) | def find(cls, key: Union[str, nn.Module] = 'default', group: str = 'de... method find_group (line 60) | def find_group(cls, group: str) -> Dict[Union[str, nn.Module], 'Global... method instances (line 73) | def instances(cls) -> Dict[str, Dict[Union[str, nn.Module], 'GlobalAva... FILE: lmdeploy/lite/utils/load.py class LoadNoInit (line 9) | class LoadNoInit: method __init__ (line 12) | def __init__(self): method __enter__ (line 22) | def __enter__(self, *args, **kwargs): method __exit__ (line 34) | def __exit__(self, *args, **kwargs): function load_hf_from_pretrained (line 47) | def load_hf_from_pretrained(pretrained_model_name_or_path, dtype: Litera... FILE: lmdeploy/lite/utils/memory_efficient.py function extract_return_values (line 15) | def extract_return_values(module: nn.Module) -> List[str]: function find_kv_cache_idx (line 36) | def find_kv_cache_idx(module: nn.Module) -> int: function find_modules_by_return_value (line 46) | def find_modules_by_return_value(model: nn.Module, value: str) -> List[n... function offload_kv_cache (line 79) | def offload_kv_cache(model: nn.Module, device: str = 'cuda') -> None: function offload_weights (line 141) | def offload_weights(model: nn.Module, device: str = 'cuda') -> None: function memory_efficient_inference (line 198) | def memory_efficient_inference(model: nn.Module, offload: bool = True, d... FILE: lmdeploy/logger.py class RequestLogger (line 11) | class RequestLogger: method __init__ (line 20) | def __init__(self, max_log_len: Optional[int]) -> None: method log_prompt (line 23) | def log_prompt(self, session_id: int, prompt: str) -> None: method log_inputs (line 34) | def log_inputs(self, session_id: int, prompt: Optional[str], prompt_to... FILE: lmdeploy/messages.py class GenerationConfig (line 25) | class GenerationConfig: method convert_stop_bad_words_to_ids (line 138) | def convert_stop_bad_words_to_ids(self, tokenizer: Tokenizer): method update_from_hf_gen_cfg (line 160) | def update_from_hf_gen_cfg(self, generation_config, tokenizer_eos_toke... method __post_init__ (line 179) | def __post_init__(self): class TurbomindEngineConfig (line 190) | class TurbomindEngineConfig: method __post_init__ (line 290) | def __post_init__(self): class PytorchEngineConfig (line 304) | class PytorchEngineConfig: method __post_init__ (line 425) | def __post_init__(self): class ResponseType (line 450) | class ResponseType(enum.Enum): class Response (line 467) | class Response: method __str__ (line 499) | def __str__(self): method __repr__ (line 502) | def __repr__(self): method _format_none_text_fields (line 505) | def _format_none_text_fields(self): method extend (line 529) | def extend(self, other: 'Response') -> 'Response': class EventType (line 557) | class EventType(enum.IntEnum): class EngineEvent (line 572) | class EngineEvent: method new_event (line 583) | def new_event(cls, event_type: EventType, timestamp: Optional[float] =... class ScheduleMetrics (line 591) | class ScheduleMetrics: class RequestMetrics (line 602) | class RequestMetrics: class EngineOutput (line 615) | class EngineOutput: class VisionConfig (line 638) | class VisionConfig: class SpeculativeConfig (line 654) | class SpeculativeConfig: FILE: lmdeploy/metrics/loggers.py class StatLoggerBase (line 17) | class StatLoggerBase(ABC): method record_schedule (line 20) | def record_schedule(self, stats: SchedulerStats) -> None: method record_iteration (line 24) | def record_iteration(self, stats: IterationStats) -> None: method record_specdecode (line 28) | def record_specdecode(self, stats: SpeculativeDecodingStats) -> None: method log (line 31) | def log(self): # noqa class LoggingStatLogger (line 35) | class LoggingStatLogger(StatLoggerBase): method __init__ (line 37) | def __init__(self, dp_rank: int = 0): method _reset (line 42) | def _reset(self, now): method record_schedule (line 52) | def record_schedule(self, stats: SchedulerStats): method record_iteration (line 55) | def record_iteration(self, stats: IterationStats): method record_specdecode (line 62) | def record_specdecode(self, stats: SpeculativeDecodingStats): method record_finish (line 73) | def record_finish(self, stats: RequestStats): method get_spec_msg (line 76) | def get_spec_msg(self): method log (line 98) | def log(self): class PrometheusStatLogger (line 133) | class PrometheusStatLogger(StatLoggerBase): method __init__ (line 135) | def __init__(self, model_name: str, max_model_len: int, dp_rank: int =... method record_schedule (line 309) | def record_schedule(self, stats: SchedulerStats) -> None: method record_iteration (line 319) | def record_iteration(self, stats: IterationStats) -> None: method record_finish (line 335) | def record_finish(self, stats: RequestStats) -> None: method record_specdecode (line 345) | def record_specdecode(self, stats: SpeculativeDecodingStats) -> None: function build_buckets (line 349) | def build_buckets(mantissa_lst: List[int], max_value: int) -> List[int]: function build_1_2_5_buckets (line 364) | def build_1_2_5_buckets(max_value: int) -> List[int]: FILE: lmdeploy/metrics/metrics_processor.py class MetricsProcessor (line 14) | class MetricsProcessor(): method __init__ (line 17) | def __init__(self): method start_metrics_handler (line 25) | def start_metrics_handler(self, enable_metrics: bool): method stop_metrics_handler (line 33) | async def stop_metrics_handler(self): method _run_metrics_handler (line 45) | async def _run_metrics_handler(self): method update_schedule_stats (line 83) | async def update_schedule_stats(self, schedule_metrics: ScheduleMetrics): method queue_update (line 90) | def queue_update(self, update_data: tuple): method increase_total_requests (line 96) | def increase_total_requests(self): method increase_completed_requests (line 100) | def increase_completed_requests(self): method increase_api_routed_requests (line 104) | def increase_api_routed_requests(self): method decrease_api_routed_requests (line 108) | def decrease_api_routed_requests(self): FILE: lmdeploy/metrics/stats.py class SchedulerStats (line 14) | class SchedulerStats: method __repr__ (line 44) | def __repr__(self): method update_from_schedule_metrics (line 56) | def update_from_schedule_metrics(self, scheduled_metrics: ScheduleMetr... class RequestStats (line 63) | class RequestStats: method __init__ (line 66) | def __init__(self, arrival_time: float = None, prompt_tokens: int = 0): method __repr__ (line 100) | def __repr__(self): method update_from_events (line 111) | def update_from_events(self, engine_events: List[EngineEvent]): method e2e_latency (line 126) | def e2e_latency(self) -> float: method queued_time_interval (line 131) | def queued_time_interval(self) -> float: method prefill_time_interval (line 136) | def prefill_time_interval(self) -> float: method decode_time_interval (line 144) | def decode_time_interval(self) -> float: method inference_time_interval (line 152) | def inference_time_interval(self) -> float: class IterationStats (line 160) | class IterationStats: method __init__ (line 163) | def __init__(self): method __repr__ (line 181) | def __repr__(self): method _time_since (line 191) | def _time_since(self, start: float) -> float: method update_from_output (line 195) | def update_from_output(self, outputs: EngineOutput, req_stats: Request... class SpeculativeDecodingStats (line 231) | class SpeculativeDecodingStats: method __post_init__ (line 240) | def __post_init__(self): method update_from_output (line 244) | def update_from_output(self, outputs: EngineOutput): method update_per_draft (line 253) | def update_per_draft(self, num_draft_tokens: int, num_accepted_tokens:... method __repr__ (line 261) | def __repr__(self): FILE: lmdeploy/model.py function random_uuid (line 16) | def random_uuid() -> str: function get_text (line 21) | def get_text(content: Union[str, List[dict]]): class ChatTemplateConfig (line 35) | class ChatTemplateConfig: method chat_template (line 69) | def chat_template(self): method to_json (line 80) | def to_json(self, file_path=None): method from_json (line 90) | def from_json(cls, file_or_string): class BaseChatTemplate (line 111) | class BaseChatTemplate: method __init__ (line 114) | def __init__(self, method get_prompt (line 141) | def get_prompt(self, prompt, sequence_start=True): method messages2prompt (line 167) | def messages2prompt(self, messages, sequence_start=True, **kwargs): method match (line 194) | def match(cls, model_path: str) -> Optional[str]: class CogVLM (line 204) | class CogVLM(BaseChatTemplate): method __init__ (line 207) | def __init__(self, method match (line 228) | def match(cls, model_path: str) -> Optional[str]: class Vicuna (line 240) | class Vicuna(BaseChatTemplate): method __init__ (line 243) | def __init__( method get_prompt (line 262) | def get_prompt(self, prompt, sequence_start=True): method messages2prompt (line 267) | def messages2prompt(self, messages, sequence_start=True, **kwargs): method match (line 273) | def match(cls, model_path: str) -> Optional[str]: class Llavav1 (line 287) | class Llavav1(Vicuna): method __init__ (line 290) | def __init__( method match (line 297) | def match(cls, model_path: str) -> Optional[str]: class InternLMChat7B (line 312) | class InternLMChat7B(BaseChatTemplate): method __init__ (line 315) | def __init__( method match (line 342) | def match(cls, model_path: str) -> Optional[str]: class Baichuan2 (line 355) | class Baichuan2(BaseChatTemplate): method __init__ (line 359) | def __init__(self, user='', assistant='', ... method match (line 363) | def match(cls, model_path: str) -> Optional[str]: class Llama2 (line 375) | class Llama2(BaseChatTemplate): method __init__ (line 378) | def __init__( method match (line 401) | def match(cls, model_path: str) -> Optional[str]: class CodeLlama (line 412) | class CodeLlama(Llama2): method __init__ (line 414) | def __init__(self, meta_instruction='', suffix_first=False, stop_words... method get_prompt (line 427) | def get_prompt(self, prompt, sequence_start=True): method _infill_prompt (line 435) | def _infill_prompt(self, prompt): method match (line 446) | def match(cls, model_path: str) -> Optional[str]: class ChatGLM2 (line 457) | class ChatGLM2(BaseChatTemplate): method __init__ (line 459) | def __init__(self, user='问:', eoh='\n\n', assistant='答:', eoa='\n\n', ... method get_prompt (line 467) | def get_prompt(self, prompt, sequence_start=True): method messages2prompt (line 478) | def messages2prompt(self, messages, sequence_start=True, **kwargs): method match (line 497) | def match(cls, model_path: str) -> Optional[str]: class MistralChat (line 509) | class MistralChat(BaseChatTemplate): method __init__ (line 516) | def __init__(self, user='[INST] ', eoh=' [/INST]', eoa='', **kwargs): method match (line 520) | def match(cls, model_path: str) -> Optional[str]: class InternVLZH (line 535) | class InternVLZH(BaseChatTemplate): method __init__ (line 537) | def __init__(self, user=': ', eoh=' ', assistant=': ', eoa... method get_prompt (line 540) | def get_prompt(self, prompt, sequence_start=True): method messages2prompt (line 545) | def messages2prompt(self, messages, sequence_start=True, **kwargs): method match (line 551) | def match(cls, model_path: str) -> Optional[str]: class DeepseekVL (line 563) | class DeepseekVL(BaseChatTemplate): method __init__ (line 565) | def __init__( method get_prompt (line 582) | def get_prompt(self, prompt, sequence_start=True): method messages2prompt (line 587) | def messages2prompt(self, messages, sequence_start=True, **kwargs): method match (line 593) | def match(cls, model_path: str) -> Optional[str]: class DeepseekVL2 (line 605) | class DeepseekVL2(BaseChatTemplate): method __init__ (line 607) | def __init__(self, method get_prompt (line 623) | def get_prompt(self, prompt, sequence_start=True): method messages2prompt (line 626) | def messages2prompt(self, messages, sequence_start=True, **kwargs): method match (line 632) | def match(cls, model_path: str) -> Optional[str]: class ChatmlDirect (line 644) | class ChatmlDirect(BaseChatTemplate): method __init__ (line 646) | def __init__(self, method match (line 667) | def match(cls, model_path: str) -> Optional[str]: class HFChatTemplate (line 679) | class HFChatTemplate(BaseChatTemplate): method __init__ (line 685) | def __init__(self, model_path: str = '', **kwargs): method get_prompt (line 706) | def get_prompt(self, prompt, sequence_start=True, **kwargs): method messages2prompt (line 710) | def messages2prompt(self, messages, sequence_start=True, **kwargs): method _user_instruction (line 745) | def _user_instruction(self): method _assistant_instruction (line 756) | def _assistant_instruction(self): method _system_instruction (line 773) | def _system_instruction(self): method match (line 790) | def match(cls, model_path: str) -> Optional[str]: function get_chat_template (line 798) | def get_chat_template(model_path: str, config: Optional[ChatTemplateConf... FILE: lmdeploy/pipeline.py class Pipeline (line 30) | class Pipeline: method __init__ (line 33) | def __init__(self, method infer (line 83) | def infer(self, method batch_infer (line 125) | def batch_infer(self, *args, **kwargs): method stream_infer (line 128) | def stream_infer(self, method close (line 164) | def close(self): method chat (line 169) | def chat(self, method session (line 230) | def session(self) -> 'Session': method get_reward_score (line 234) | def get_reward_score(self, input_ids: List) -> List[float]: method get_ppl (line 256) | def get_ppl(self, input_ids: List[int] | List[List[int]]) -> List[float]: method __call__ (line 306) | def __call__(self, method __enter__ (line 312) | def __enter__(self): method __exit__ (line 315) | def __exit__(self, exc_type, exc_value, traceback): method generate (line 319) | async def generate(self, *args, **kwargs): method _is_single (line 328) | def _is_single(prompts): method _request_generator (line 333) | def _request_generator(self, method _get_limiter (line 370) | def _get_limiter(self): method _infer (line 375) | def _infer(self, requests: Iterator[Dict], multiplex: bool, pbar=None,... method _run (line 413) | def _run(self, fn=None, coro=None): method _batch_iterator (line 424) | def _batch_iterator(self, sizes, max_value): method _get_long_text_ppl (line 446) | def _get_long_text_ppl(self, session, input_ids, max_input_len): method _get_ppl (line 472) | def _get_ppl(self, class _EventLoopThread (line 523) | class _EventLoopThread: method __init__ (line 525) | def __init__(self, daemon=False): method _thread_entry (line 534) | def _thread_entry(self, fut): method _cancel_all_tasks (line 550) | def _cancel_all_tasks(self): method close (line 574) | def close(self): FILE: lmdeploy/profiler.py class Session (line 10) | class Session: method __init__ (line 16) | def __init__(self, input_len, req_output_len): method tick (line 23) | def tick(self, n_token): method finish (line 27) | def finish(self, status): class Profiler (line 31) | class Profiler: method __init__ (line 33) | def __init__(self, stream_output: bool, percentages: List[int]): method new_session (line 38) | def new_session(self, *args, **kwargs): method start (line 43) | def start(self): method finish (line 46) | def finish(self): method compute_metrics (line 49) | def compute_metrics(self): method summarize (line 106) | def summarize(self, title: str, hyperparams: List = None, header=40, d... method save_csv (line 140) | def save_csv(self, csv_file: str, hyperparams): FILE: lmdeploy/pytorch/adapter/adapter.py function get_ranks_and_scalings (line 10) | def get_ranks_and_scalings(target_name: str, cfgs: Iterable, device: tor... function find_all_target (line 26) | def find_all_target(model: torch.nn.Module, target_name: str): function get_layer_index (line 48) | def get_layer_index(key: str, layers_pattern: str = None): function _get_reverse_pack_map (line 63) | def _get_reverse_pack_map(model: nn.Module): function _get_key_map (line 73) | def _get_key_map(reverse_map: Dict[str, str]): function load_lora_weights (line 84) | def load_lora_weights(model: nn.Module, weights: Iterable[Tuple[str, tor... class AdapterManager (line 111) | class AdapterManager: method __init__ (line 114) | def __init__(self, adapters: Dict[str, str]): method get_adapter_ids (line 125) | def get_adapter_ids(self, names: List[str]): method num_adapters (line 128) | def num_adapters(self): FILE: lmdeploy/pytorch/backends/activation.py class SiluAndMulImpl (line 5) | class SiluAndMulImpl(ABC): method forward (line 9) | def forward(self, x): class SiluAndMulBuilder (line 14) | class SiluAndMulBuilder(ABC): method build (line 19) | def build(inplace: bool = False): class GeluAndMulImpl (line 24) | class GeluAndMulImpl(ABC): method forward (line 28) | def forward(self, x): class GeluAndMulBuilder (line 33) | class GeluAndMulBuilder(ABC): method build (line 38) | def build(approximate: str = 'none'): FILE: lmdeploy/pytorch/backends/apply_rotary_emb.py class ApplyRotaryEmbImpl (line 7) | class ApplyRotaryEmbImpl(ABC): method forward (line 11) | def forward(self, query: Tensor, key: Tensor, cos: Tensor, sin: Tensor... class ApplyRotaryEmbBuilder (line 16) | class ApplyRotaryEmbBuilder(ABC): method build (line 21) | def build(): FILE: lmdeploy/pytorch/backends/attention.py class AttentionMetadata (line 11) | class AttentionMetadata: class AttentionImpl (line 27) | class AttentionImpl(ABC, Generic[T]): method __init__ (line 30) | def __init__( method make_alibi_slopes (line 67) | def make_alibi_slopes(head_start: int, head_end: int, num_heads: int, ... method set_alibi_slopes (line 85) | def set_alibi_slopes(self, slopes: torch.Tensor): method forward (line 89) | def forward( class AttentionBuilder (line 107) | class AttentionBuilder(ABC, Generic[T]): method build (line 112) | def build( FILE: lmdeploy/pytorch/backends/awq_modules.py class LinearW4A16Impl (line 8) | class LinearW4A16Impl(ABC): method update_weights (line 11) | def update_weights(self, method forward (line 20) | def forward(self, class LinearW4A16Builder (line 30) | class LinearW4A16Builder(ABC): method build (line 35) | def build(in_features: int, FILE: lmdeploy/pytorch/backends/base.py class OpType (line 13) | class OpType(Enum): class OpsBackend (line 45) | class OpsBackend(ABC): method get_name (line 50) | def get_name() -> str: method get_layer_impl_builder (line 56) | def get_layer_impl_builder(cls, layer_type: OpType): method get_attention_metadata_cls (line 62) | def get_attention_metadata_cls(): method get_k_block_shape (line 68) | def get_k_block_shape( method get_v_block_shape (line 79) | def get_v_block_shape( method update_step_context (line 89) | def update_step_context(cls, step_context): method build_graph_runner (line 97) | def build_graph_runner(model: torch.nn.Module, model_config: ModelConf... method device_count (line 104) | def device_count(): method support_ray (line 109) | def support_ray(): FILE: lmdeploy/pytorch/backends/blockedf8_modules.py class LinearBlockedF8Impl (line 9) | class LinearBlockedF8Impl(ABC): method __init__ (line 12) | def __init__(self): method update_weights (line 15) | def update_weights(self, weight: torch.Tensor, scale: torch.Tensor, bi... method set_scale_fmt (line 19) | def set_scale_fmt(self, scale_fmt: Optional[str]): method forward (line 24) | def forward(self, class LinearBlockedF8Builder (line 37) | class LinearBlockedF8Builder(ABC): method build (line 42) | def build(in_features: int, out_features: int, bias: bool = True, dtyp... FILE: lmdeploy/pytorch/backends/causal_conv1d.py class CausalConv1dImpl (line 7) | class CausalConv1dImpl(ABC): method conv1d_fn (line 11) | def conv1d_fn(self, method update_fn (line 22) | def update_fn(self, class CausalConv1dBuilder (line 33) | class CausalConv1dBuilder(ABC): method build (line 38) | def build(): FILE: lmdeploy/pytorch/backends/cuda/activation.py class TritonSiluAndMulImpl (line 7) | class TritonSiluAndMulImpl(SiluAndMulImpl): method __init__ (line 10) | def __init__(self, inplace: bool): method forward (line 13) | def forward(self, x): class TritonSiluAndMulBuilder (line 30) | class TritonSiluAndMulBuilder(SiluAndMulBuilder): method build (line 34) | def build(inplace: bool = False): FILE: lmdeploy/pytorch/backends/cuda/apply_rotary_emb.py class TritonApplyRotaryEmbImpl (line 10) | class TritonApplyRotaryEmbImpl(ApplyRotaryEmbImpl): method forward (line 13) | def forward(self, query: Tensor, key: Tensor, cos: Tensor, sin: Tensor... class TritonApplyRotaryEmbBuilder (line 24) | class TritonApplyRotaryEmbBuilder(ApplyRotaryEmbBuilder): method build (line 28) | def build(): FILE: lmdeploy/pytorch/backends/cuda/attention/__init__.py function use_fa3_warning (line 26) | def use_fa3_warning(): function _enable_fa3 (line 35) | def _enable_fa3(alibi: bool, learnable_sink: bool, block_sparse_size: in... function _normalize_sliding_window (line 53) | def _normalize_sliding_window(sliding_window): class TritonAttentionBuilder (line 69) | class TritonAttentionBuilder(AttentionBuilder[TritonAttentionMetadata]): method build (line 79) | def build( FILE: lmdeploy/pytorch/backends/cuda/attention/default.py class TritonAttentionMetadata (line 14) | class TritonAttentionMetadata(AttentionMetadata): function _cdiv (line 56) | def _cdiv(a, b): class TritonAttentionImpl (line 69) | class TritonAttentionImpl(AttentionImpl[TritonAttentionMetadata]): method __init__ (line 72) | def __init__( method _get_max_q_seqlen (line 111) | def _get_max_q_seqlen( method _get_fill_meta (line 126) | def _get_fill_meta( method _fill_kv_cache_impl (line 138) | def _fill_kv_cache_impl( method _forward_decoding (line 177) | def _forward_decoding( method _forward_prefill (line 226) | def _forward_prefill( method forward (line 298) | def forward( FILE: lmdeploy/pytorch/backends/cuda/attention/fa3.py class FA3Impl (line 11) | class FA3Impl(TritonAttentionImpl): method __init__ (line 24) | def __init__( method _get_max_q_seqlen (line 54) | def _get_max_q_seqlen( method _normalize_sliding_window (line 66) | def _normalize_sliding_window(self, sliding_window): method _decoding_speculative (line 81) | def _decoding_speculative( method _decoding_standard (line 126) | def _decoding_standard( method _forward_decoding (line 176) | def _forward_decoding( method _forward_prefill (line 210) | def _forward_prefill( method forward (line 275) | def forward( FILE: lmdeploy/pytorch/backends/cuda/attention/mla.py function _cdiv (line 14) | def _cdiv(a, b): function _try_dynamic_compile (line 19) | def _try_dynamic_compile(func, *args, **kwargs): class NSAIndicesUpdater (line 29) | class NSAIndicesUpdater: method __init__ (line 36) | def __init__(self): method _update_decode_impl (line 40) | def _update_decode_impl(self, nsa_indices: torch.Tensor, block_offsets... method update_decode (line 51) | def update_decode(self, nsa_indices: torch.Tensor, block_offsets: torc... method _update_prefill_impl (line 59) | def _update_prefill_impl(self, nsa_indices: torch.Tensor, q_seqlens: t... method update_prefill (line 68) | def update_prefill(self, nsa_indices: torch.Tensor, q_seqlens: torch.T... method build (line 78) | def build(): class FlashMLAImpl (line 82) | class FlashMLAImpl(TritonAttentionImpl): method __init__ (line 97) | def __init__( method _get_flash_mla_sparse_fwd (line 143) | def _get_flash_mla_sparse_fwd(self): method flash_mla_decoding (line 154) | def flash_mla_decoding( method _prefill_sparse (line 196) | def _prefill_sparse(self, query: torch.Tensor, flatten_k: torch.Tensor... method _prefill_triton (line 232) | def _prefill_triton( method _prefill_fa3 (line 271) | def _prefill_fa3( method run_flatten_kv_cache (line 315) | def run_flatten_kv_cache(self, method _get_max_q_seqlen (line 369) | def _get_max_q_seqlen( method _fill_kv_cache_impl (line 382) | def _fill_kv_cache_impl(self, method _forward_decoding (line 449) | def _forward_decoding( method _forward_prefill (line 472) | def _forward_prefill( method forward (line 520) | def forward( FILE: lmdeploy/pytorch/backends/cuda/awq_modules.py function wq_gemm_forward (line 11) | def wq_gemm_forward( class AwqLinearW4A16Impl (line 43) | class AwqLinearW4A16Impl(LinearW4A16Impl): method __init__ (line 46) | def __init__(self, in_features: int, out_features: int, w_bit: int, gr... method forward (line 52) | def forward(self, class AwqLinearW4A16Builder (line 68) | class AwqLinearW4A16Builder(LinearW4A16Builder): method build (line 72) | def build(in_features: int, FILE: lmdeploy/pytorch/backends/cuda/blockedf8_modules.py class TritonLinearBlockedF8Impl (line 16) | class TritonLinearBlockedF8Impl(LinearBlockedF8Impl): method __init__ (line 19) | def __init__(self, in_features: int, out_features: int, block_size: in... method forward (line 26) | def forward(self, class TritonLinearBlockedF8Builder (line 58) | class TritonLinearBlockedF8Builder(LinearBlockedF8Builder): method build (line 62) | def build(in_features: int, out_features: int, block_size: int = 128, ... class DeepGemmLinearBlockedF8Impl (line 73) | class DeepGemmLinearBlockedF8Impl(LinearBlockedF8Impl): method __init__ (line 76) | def __init__(self, in_features: int, out_features: int, block_size: in... method warmup (line 89) | def warmup(self, warmup_meta: WarmupMeta): method forward (line 112) | def forward(self, FILE: lmdeploy/pytorch/backends/cuda/causal_conv1d.py class CausalConv1dTilelangImpl (line 10) | class CausalConv1dTilelangImpl(CausalConv1dImpl): method __init__ (line 13) | def __init__(self): method conv1d_fn (line 18) | def conv1d_fn(self, method update_fn (line 32) | def update_fn(self, class CausalConv1dDaoImpl (line 48) | class CausalConv1dDaoImpl(CausalConv1dTilelangImpl): method __init__ (line 50) | def __init__(self): function has_dao (line 61) | def has_dao(): class CausalConv1dCudaBuilder (line 71) | class CausalConv1dCudaBuilder(CausalConv1dBuilder): method build (line 75) | def build() -> CausalConv1dImpl: FILE: lmdeploy/pytorch/backends/cuda/flash_attention.py class TritonFlashAttentionImpl (line 7) | class TritonFlashAttentionImpl(FlashAttentionImpl): method __init__ (line 10) | def __init__( method forward (line 42) | def forward(self, class TritonFlashAttentionBuilder (line 71) | class TritonFlashAttentionBuilder(FlashAttentionBuilder): method build (line 75) | def build( FILE: lmdeploy/pytorch/backends/cuda/gated_delta_rule.py function has_fla (line 11) | def has_fla(): class CudaGatedDeltaRuleImpl (line 19) | class CudaGatedDeltaRuleImpl(GatedDeltaRuleImpl): method __init__ (line 21) | def __init__(self): method chunk_gated_delta_rule (line 30) | def chunk_gated_delta_rule(self, method fused_recurrent_gated_delta_rule (line 68) | def fused_recurrent_gated_delta_rule(self, class CudaGatedDeltaRuleBuilder (line 93) | class CudaGatedDeltaRuleBuilder(GatedDeltaRuleBuilder): method build (line 96) | def build() -> GatedDeltaRuleImpl: FILE: lmdeploy/pytorch/backends/cuda/graph_runner.py function next_power_of_2 (line 22) | def next_power_of_2(n: int): function _get_capture_batch_size_impl (line 36) | def _get_capture_batch_size_impl(max_batches: int): function _false (line 54) | def _false(*args, **kwargs): class CUDASingleGraphRunner (line 59) | class CUDASingleGraphRunner: method __init__ (line 62) | def __init__( method capture (line 102) | def capture(self, **kwargs): method forward (line 127) | def forward(self, **kwargs): method __del__ (line 138) | def __del__(self): class CUDAGraphRunner (line 143) | class CUDAGraphRunner(GraphRunner): method __init__ (line 146) | def __init__(self, model: torch.nn.Module, model_config: ModelConfig, ... method check_enable_graph (line 164) | def check_enable_graph(self): method _try_compile_model_once (line 171) | def _try_compile_model_once(self): method _get_capture_tokens (line 182) | def _get_capture_tokens(self, batch_size: int): method get_graph_key (line 190) | def get_graph_key(self, input_ids: torch.Tensor, position_ids: torch.T... method _prepare_inputs (line 206) | def _prepare_inputs(self, **kwargs): method _get_max_tokens (line 214) | def _get_max_tokens(self, graph_key: tuple, input_ids: torch.Tensor, q... method __call__ (line 222) | def __call__(self, **kwargs): method prepare_inputs_for_generation (line 262) | def prepare_inputs_for_generation( method reset (line 281) | def reset(self): method update_inputs (line 293) | def update_inputs(self, inputs): method get_capture_batch_sizes (line 306) | def get_capture_batch_sizes(self) -> List[int]: FILE: lmdeploy/pytorch/backends/cuda/lora.py class PackedLoRAInput (line 13) | class PackedLoRAInput: class TritonLoRAImpl (line 23) | class TritonLoRAImpl(LoRAImpl): method _make_packed_lora_input (line 27) | def _make_packed_lora_input(x, ctx_mgr): method forward (line 41) | def forward(self, class TritonLoRABuilder (line 84) | class TritonLoRABuilder(LoRABuilder): method build (line 88) | def build(): FILE: lmdeploy/pytorch/backends/cuda/moe/blocked_fp8.py class TritonFusedMoEBlockedF8Impl (line 22) | class TritonFusedMoEBlockedF8Impl(FusedMoEBlockedF8Impl): method __init__ (line 25) | def __init__(self, method ep_expert_list (line 38) | def ep_expert_list(self, world_size: int, rank: int): method forward (line 46) | def forward(self, class FusedDeepEpMoEBlockedF8Impl (line 90) | class FusedDeepEpMoEBlockedF8Impl(TritonFusedMoEBlockedF8Impl): method __init__ (line 92) | def __init__(self, method ep_expert_list (line 128) | def ep_expert_list(self, world_size: int, rank: int): method forward (line 141) | def forward(self, method do_renormalize (line 168) | def do_renormalize(self, topk_weights): method fusedmoe_build (line 171) | def fusedmoe_build(self, low_latency_mode: bool = False): class TritonFusedMoEBlockedF8Builder (line 186) | class TritonFusedMoEBlockedF8Builder(FusedMoEBlockedF8Builder): method build (line 190) | def build(top_k: int, FILE: lmdeploy/pytorch/backends/cuda/moe/default.py class TritonFusedMoEImpl (line 21) | class TritonFusedMoEImpl(FusedMoEImpl): method __init__ (line 24) | def __init__(self, top_k: int, num_experts: int, renormalize: bool = F... method update_weights (line 29) | def update_weights(self, gate_up_weights: torch.Tensor, down_weights: ... method ep_expert_list (line 34) | def ep_expert_list(self, world_size: int, rank: int): method forward (line 42) | def forward(self, class FusedMoENormal (line 73) | class FusedMoENormal: method __init__ (line 75) | def __init__( method forward (line 99) | def forward( method capture (line 121) | def capture(self): method wait (line 124) | def wait(self, event): method dispatch_async (line 128) | def dispatch_async(self, method combine_async (line 138) | def combine_async(self, x: torch.Tensor, handle: tuple, previous_event... method release (line 141) | def release(self): method fusedmoe_forward (line 144) | def fusedmoe_forward(self, state, up_weight, down_weight): function _disposible_tensor (line 150) | def _disposible_tensor(tensor): function dispatch_ll (line 159) | def dispatch_ll( function dispatch_async_ll (line 200) | def dispatch_async_ll( class FusedMoELowLatency (line 230) | class FusedMoELowLatency: method __init__ (line 232) | def __init__( method experts (line 253) | def experts( method forward (line 279) | def forward(self, method wait (line 300) | def wait(self, event): method dispatch_async (line 303) | def dispatch_async( method combine_async (line 313) | def combine_async( method fusedmoe_forward (line 323) | def fusedmoe_forward(self, state, up_weight, down_weight): function build_deepep_moe (line 333) | def build_deepep_moe( class FusedMoEEPImpl (line 360) | class FusedMoEEPImpl(TritonFusedMoEImpl): method __init__ (line 363) | def __init__( method update_weights (line 398) | def update_weights(self, gate_up_weights: torch.Tensor, down_weights: ... method forward (line 401) | def forward(self, method ep_expert_list (line 425) | def ep_expert_list(self, world_size: int, rank: int): method do_renormalize (line 432) | def do_renormalize(self, topk_weights): method fusedmoe_build (line 435) | def fusedmoe_build(self, low_latency_mode: bool = False): class TritonFusedMoEBuilder (line 447) | class TritonFusedMoEBuilder(FusedMoEBuilder): method build (line 451) | def build( FILE: lmdeploy/pytorch/backends/cuda/moe/ep_utils.py function split_inputs_by_attn_tp (line 10) | def split_inputs_by_attn_tp( function gather_outputs_by_attn_tp (line 37) | def gather_outputs_by_attn_tp(out_states: torch.Tensor, split_size: List... FILE: lmdeploy/pytorch/backends/cuda/moe/w8a8.py class TritonFusedMoEW8A8Impl (line 16) | class TritonFusedMoEW8A8Impl(FusedMoEW8A8Impl): method __init__ (line 19) | def __init__( method update_weights (line 33) | def update_weights(self, gate_up_weights: torch.Tensor, down_weights: ... method forward (line 38) | def forward(self, class TritonFusedMoEW8A8Builder (line 77) | class TritonFusedMoEW8A8Builder(FusedMoEW8A8Builder): method build (line 81) | def build( FILE: lmdeploy/pytorch/backends/cuda/moe_router.py function is_power_of_two (line 12) | def is_power_of_two(n): class TritonRouterNoauxTCImpl (line 16) | class TritonRouterNoauxTCImpl(DefaultRouterNoauxTCImpl): method __init__ (line 18) | def __init__( method should_enable_custom_kernel (line 42) | def should_enable_custom_kernel(self) -> bool: method forward (line 60) | def forward(self, logits: torch.Tensor, bias: torch.Tensor) -> Tuple[t... class TritonRouterNoauxTCBuilder (line 77) | class TritonRouterNoauxTCBuilder(RouterNoauxTCBuilder): method build (line 80) | def build( FILE: lmdeploy/pytorch/backends/cuda/multinomial_sampling.py class TritonMultinomialSamplingImpl (line 10) | class TritonMultinomialSamplingImpl(MultinomialSamplingImpl): method forward (line 12) | def forward(self, class TritonMultinomialSamplingBuilder (line 21) | class TritonMultinomialSamplingBuilder(MultinomialSamplingBuilder): method build (line 24) | def build(): FILE: lmdeploy/pytorch/backends/cuda/norm.py class TritonRMSNormImpl (line 9) | class TritonRMSNormImpl(RMSNormImpl): method __init__ (line 12) | def __init__(self, hidden_size: int, eps: float = 1e-6): method forward (line 16) | def forward(self, x: torch.Tensor, weight: torch.Tensor, residual: tor... class TritonRMSNormBuilder (line 26) | class TritonRMSNormBuilder(RMSNormBuilder): method build (line 30) | def build(weight: torch.Tensor, eps: float = 1e-6): FILE: lmdeploy/pytorch/backends/cuda/nsa.py class TritonNSAIndexFP8 (line 12) | class TritonNSAIndexFP8(BaseNSAIndexFP8): method __init__ (line 14) | def __init__(self, topk: int, softmax_scale: float, block_size: int, f... method forward (line 23) | def forward(self, q: Tensor, k: Tensor, weights: Tensor, k_cache: Tens... class TritonNSAIndexFP8Builder (line 68) | class TritonNSAIndexFP8Builder(BaseNSAIndexFP8Builder): method build (line 71) | def build(topk: int, softmax_scale: float, block_size: int = 128, fill... FILE: lmdeploy/pytorch/backends/cuda/op_backend.py class CudaOpsBackend (line 15) | class CudaOpsBackend(DefaultOpsBackend): method get_name (line 19) | def get_name() -> str: method get_layer_impl_builder (line 24) | def get_layer_impl_builder(cls, layer_type: OpType): method get_attention_metadata_cls (line 85) | def get_attention_metadata_cls(): method get_k_block_shape (line 91) | def get_k_block_shape( method get_v_block_shape (line 105) | def get_v_block_shape( method update_meta_flashmla (line 119) | def update_meta_flashmla(cls, attn_metadata, model_config: ModelConfig... method update_meta_flashattn (line 139) | def update_meta_flashattn(cls, attn_metadata, step_context): method update_step_context (line 162) | def update_step_context(cls, step_context): method build_graph_runner (line 207) | def build_graph_runner(model: torch.nn.Module, model_config: ModelConf... method device_count (line 225) | def device_count(): method support_ray (line 230) | def support_ray(): FILE: lmdeploy/pytorch/backends/cuda/qmodules.py class TritonRMSNormW8A8Impl (line 14) | class TritonRMSNormW8A8Impl(RMSNormW8A8Impl): method __init__ (line 17) | def __init__(self, hidden_size: int, eps: float = 1e-6, quant_dtype: t... method forward (line 23) | def forward(self, x: torch.Tensor, weight: torch.Tensor, residual: tor... class TritonRMSNormBuilder (line 39) | class TritonRMSNormBuilder(RMSNormW8A8Builder): method build (line 43) | def build(hidden_size: int, eps: float = 1e-6, quant_dtype: torch.dtyp... class TritonLinearW8A8Impl (line 48) | class TritonLinearW8A8Impl(LinearW8A8Impl): method __init__ (line 51) | def __init__(self, method forward (line 61) | def forward(self, class TritonLinearW8A8Builder (line 87) | class TritonLinearW8A8Builder(LinearW8A8Builder): method build (line 91) | def build(in_features: int, FILE: lmdeploy/pytorch/backends/cuda/token_dispatcher.py function get_buffer_common (line 25) | def get_buffer_common( function get_buffer_normal (line 57) | def get_buffer_normal(group: dist.ProcessGroup, hidden_bytes: int): function get_buffer_low_latency (line 77) | def get_buffer_low_latency( class DeepEPTokenDispatcher (line 105) | class DeepEPTokenDispatcher(TokenDispatcherImpl): method __init__ (line 110) | def __init__( method dispatch (line 135) | def dispatch( method dispatch_normal (line 166) | def dispatch_normal( method dispatch_normal_async (line 217) | def dispatch_normal_async(self, method combine (line 267) | def combine(self, hidden_states: torch.Tensor) -> torch.Tensor: method combine_normal (line 274) | def combine_normal(self, x: torch.Tensor, handle: Tuple, previous_even... method combine_normal_async (line 284) | def combine_normal_async(self, x: torch.Tensor, handle: Tuple, previou... method release (line 294) | def release(self): method get_number_of_tokens_per_expert (line 304) | def get_number_of_tokens_per_expert(self) -> torch.Tensor: method get_permuted_hidden_states_by_experts (line 308) | def get_permuted_hidden_states_by_experts(self, method get_restored_hidden_states_by_experts (line 328) | def get_restored_hidden_states_by_experts( class DeepEPTokenDispatcherLowLatency (line 350) | class DeepEPTokenDispatcherLowLatency(TokenDispatcherImpl): method __init__ (line 352) | def __init__( method dispatch (line 378) | def dispatch( method dispatch_async (line 407) | def dispatch_async( method combine (line 427) | def combine( method combine_async (line 444) | def combine_async( class TokenDispatcherBuilder (line 465) | class TokenDispatcherBuilder: method build (line 469) | def build( FILE: lmdeploy/pytorch/backends/cuda/utils.py function has_tilelang (line 6) | def has_tilelang(): FILE: lmdeploy/pytorch/backends/cuda/warmup_manager.py class WarmupMeta (line 13) | class WarmupMeta: class WarmupManager (line 21) | class WarmupManager: method __init__ (line 23) | def __init__(self): method __contains__ (line 26) | def __contains__(self, key: str): method __getitem__ (line 30) | def __getitem__(self, key: str): method __setitem__ (line 34) | def __setitem__(self, key: str, val): method warmup (line 38) | def warmup(self, warmup_meta: WarmupMeta): function get_warmup_manager (line 50) | def get_warmup_manager(): FILE: lmdeploy/pytorch/backends/deepep_moe_checker.py class MoEBackend (line 6) | class MoEBackend: method __init__ (line 8) | def __init__(self): method set_deepep_moe_backend (line 12) | def set_deepep_moe_backend(self): method use_deepep_moe_backend (line 16) | def use_deepep_moe_backend(self): function get_moe_backend (line 21) | def get_moe_backend(): FILE: lmdeploy/pytorch/backends/default/activation.py class DefaultSiluAndMulImpl (line 8) | class DefaultSiluAndMulImpl(SiluAndMulImpl): method __init__ (line 11) | def __init__(self, inplace: bool): method forward (line 15) | def forward(self, x): class DefaultSiluAndMulBuilder (line 21) | class DefaultSiluAndMulBuilder(SiluAndMulBuilder): method build (line 25) | def build(inplace: bool = False): class DefaultGeluAndMulImpl (line 30) | class DefaultGeluAndMulImpl(GeluAndMulImpl): method __init__ (line 33) | def __init__(self, approximate: str = 'none'): method forward (line 36) | def forward(self, x): class DefaultGeluAndMulBuilder (line 42) | class DefaultGeluAndMulBuilder(GeluAndMulBuilder): method build (line 46) | def build(approximate: str = 'none'): FILE: lmdeploy/pytorch/backends/default/apply_rotary_emb.py function rotate_half (line 8) | def rotate_half(x): class DefaultApplyRotaryEmbImpl (line 19) | class DefaultApplyRotaryEmbImpl(ApplyRotaryEmbImpl): method forward (line 22) | def forward(self, query: Tensor, key: Tensor, cos: Tensor, sin: Tensor... class DefaultApplyRotaryEmbBuilder (line 42) | class DefaultApplyRotaryEmbBuilder(ApplyRotaryEmbBuilder): method build (line 46) | def build(): FILE: lmdeploy/pytorch/backends/default/awq_modules.py function get_shifts (line 13) | def get_shifts(bits: int, device: torch.device): function unpack_awq (line 20) | def unpack_awq(qweight: torch.Tensor, qzeros: torch.Tensor, bits: int): function dequantize_gemm (line 38) | def dequantize_gemm(qweight, qzeros, scales, bits, group_size): class DefaultLinearW4A16Impl (line 50) | class DefaultLinearW4A16Impl(LinearW4A16Impl): method __init__ (line 53) | def __init__(self, in_features: int, out_features: int, w_bit: int, gr... method forward (line 59) | def forward(self, class DefaultLinearW4A16Builder (line 85) | class DefaultLinearW4A16Builder(LinearW4A16Builder): method build (line 89) | def build(in_features: int, FILE: lmdeploy/pytorch/backends/default/embedding.py function get_masked_input_and_mask (line 9) | def get_masked_input_and_mask(input: torch.Tensor, start_index: int, end... class DefaultEmbeddingImpl (line 16) | class DefaultEmbeddingImpl(EmbeddingImpl): method __init__ (line 19) | def __init__(self, start_index: int, end_index: int): method forward (line 23) | def forward(self, x, weight: torch.Tensor, all_reduce: bool = False, g... class DefaultEmbeddingBuilder (line 36) | class DefaultEmbeddingBuilder(EmbeddingBuilder): method build (line 40) | def build(start_index: int, end_index: int): FILE: lmdeploy/pytorch/backends/default/linear.py class DefaultLinearImpl (line 11) | class DefaultLinearImpl(LinearImpl): method forward (line 14) | def forward(self, class DefaultLinearBuilder (line 33) | class DefaultLinearBuilder(LinearBuilder): method build (line 37) | def build(in_features: int, out_features: int, bias: bool = True, dtyp... FILE: lmdeploy/pytorch/backends/default/moe.py class DefaultSoftmaxTopKImpl (line 7) | class DefaultSoftmaxTopKImpl(SoftmaxTopKImpl): method __init__ (line 10) | def __init__(self, top_k: int, dim: int = -1, n_groups: int = -1): method forward (line 16) | def forward(self, x: torch.Tensor): class DefaultSoftmaxTopKBuilder (line 35) | class DefaultSoftmaxTopKBuilder(SoftmaxTopKBuilder): method build (line 39) | def build(top_k: int, dim: int = -1, n_groups: int = -1): FILE: lmdeploy/pytorch/backends/default/moe_router.py function _compute_scores (line 10) | def _compute_scores(scoring_func: str, logits: torch.Tensor): function get_group_offsets (line 23) | def get_group_offsets(n_groups: int, group_size: int, device: str | torc... class DefaultRouterNoauxTCImpl (line 28) | class DefaultRouterNoauxTCImpl(RouterNoauxTCImpl): method __init__ (line 30) | def __init__( method _forward_router_n_groups (line 55) | def _forward_router_n_groups(self, scores_for_choice: torch.Tensor) ->... method _forward_default (line 67) | def _forward_default(self, scores: torch.Tensor, scores_for_choice: to... method renorm (line 83) | def renorm(self, topk_weight: torch.Tensor) -> torch.Tensor: method forward (line 93) | def forward(self, logits: torch.Tensor, bias: torch.Tensor) -> Tuple[t... class DefaultRouterNoauxTCBuilder (line 108) | class DefaultRouterNoauxTCBuilder(RouterNoauxTCBuilder): method build (line 111) | def build( FILE: lmdeploy/pytorch/backends/default/multinomial_sampling.py class DefaultMultinomialSamplingImpl (line 8) | class DefaultMultinomialSamplingImpl(MultinomialSamplingImpl): method forward (line 11) | def forward(self, class DefaultMultinomialSamplingBuilder (line 22) | class DefaultMultinomialSamplingBuilder(MultinomialSamplingBuilder): method build (line 26) | def build(): FILE: lmdeploy/pytorch/backends/default/norm.py class DefaultRMSNormImpl (line 7) | class DefaultRMSNormImpl(RMSNormImpl): method __init__ (line 10) | def __init__(self, hidden_size: int, eps: float = 1e-6): method forward (line 14) | def forward(self, x: torch.Tensor, weight: torch.Tensor, residual: tor... class DefaultRMSNormBuilder (line 29) | class DefaultRMSNormBuilder(RMSNormBuilder): method build (line 33) | def build(hidden_size: int, eps: float = 1e-6): class DefaultLayerNormImpl (line 38) | class DefaultLayerNormImpl(LayerNormImpl): method __init__ (line 41) | def __init__(self, normalized_shape: int, eps: float = 1e-6): method forward (line 47) | def forward(self, class DefaultLayerNormBuilder (line 62) | class DefaultLayerNormBuilder(LayerNormBuilder): method build (line 66) | def build(normalized_shape: int, eps: float = 1e-6): FILE: lmdeploy/pytorch/backends/default/op_backend.py class DefaultOpsBackend (line 9) | class DefaultOpsBackend(OpsBackend): method get_name (line 12) | def get_name() -> str: method get_layer_impl_builder (line 16) | def get_layer_impl_builder(cls, layer_type: OpType): method get_k_block_shape (line 58) | def get_k_block_shape( method get_v_block_shape (line 72) | def get_v_block_shape( method init (line 86) | def init(): method ccl_backend (line 90) | def ccl_backend() -> str: FILE: lmdeploy/pytorch/backends/default/rotary_embedding.py function safe_torch_compile (line 14) | def safe_torch_compile(**compile_kwargs): function _rotary_embedding_fwd (line 44) | def _rotary_embedding_fwd(position_ids: torch.Tensor, class RotaryEmbeddingImpl (line 74) | class RotaryEmbeddingImpl(RotaryEmbeddingImpl, nn.Module): method __init__ (line 77) | def __init__(self, dim: int, base: int = 10000, scaling_factor: float ... method forward (line 85) | def forward(self, x: torch.Tensor, position_ids: torch.Tensor): class LlamaDynamicNTKScalingRotaryEmbedding (line 98) | class LlamaDynamicNTKScalingRotaryEmbedding(RotaryEmbeddingImpl): method __init__ (line 104) | def __init__(self, dim: int, base: int = 10000, scaling_factor: float ... method _ntk_inv_freq (line 108) | def _ntk_inv_freq(self, seq_len: torch.Tensor): method forward (line 116) | def forward(self, x: torch.Tensor, position_ids: torch.Tensor): class Llama3RotaryEmbeddingImpl (line 134) | class Llama3RotaryEmbeddingImpl(RotaryEmbeddingImpl): method __init__ (line 137) | def __init__( function yarn_find_correction_dim (line 167) | def yarn_find_correction_dim(num_rotations, dim, base=10000, max_positio... function yarn_find_correction_range (line 173) | def yarn_find_correction_range(low_rot, high_rot, dim, base=10000, max_p... function yarn_get_mscale (line 183) | def yarn_get_mscale(scale=1, mscale=1): function yarn_linear_ramp_mask (line 190) | def yarn_linear_ramp_mask(min, max, dim): class YarnRotaryEmbeddingImpl (line 200) | class YarnRotaryEmbeddingImpl(RotaryEmbeddingImpl): method __init__ (line 203) | def __init__(self, method forward (line 244) | def forward(self, x: torch.Tensor, position_ids: torch.Tensor): class LongRoPEScalingRotaryEmbeddingImpl (line 258) | class LongRoPEScalingRotaryEmbeddingImpl(RotaryEmbeddingImpl): method __init__ (line 261) | def __init__( method forward (line 285) | def forward(self, x: torch.Tensor, position_ids: torch.Tensor): class FopeRotaryEmbeddingImpl (line 310) | class FopeRotaryEmbeddingImpl(RotaryEmbeddingImpl): method __init__ (line 312) | def __init__(self, method forward (line 335) | def forward(self, x: torch.Tensor, position_ids: torch.Tensor, sin_coe... class DefaultRotaryEmbeddingBuilder (line 372) | class DefaultRotaryEmbeddingBuilder(RotaryEmbeddingBuilder): method build (line 376) | def build( FILE: lmdeploy/pytorch/backends/default/token_dispatcher.py class AlltoAllTokenDispatcher (line 9) | class AlltoAllTokenDispatcher(TokenDispatcherImpl): method __init__ (line 11) | def __init__( method sort_chunks_by_idxs (line 30) | def sort_chunks_by_idxs(self, input: torch.Tensor, split_sizes: torch.... method all_to_all (line 37) | def all_to_all(self, group: torch.distributed.group, input_: torch.Ten... method preprocess (line 55) | def preprocess(self, routing_map: torch.Tensor, local_expert_indices) ... method dispatch (line 82) | def dispatch(self, hidden_states: torch.Tensor, topk_ids: torch.Tensor... method combine (line 108) | def combine(self, hidden_states: torch.Tensor) -> torch.Tensor: FILE: lmdeploy/pytorch/backends/dlinfer/activation.py class DlinferSiluAndMulImpl (line 7) | class DlinferSiluAndMulImpl(SiluAndMulImpl): method forward (line 10) | def forward(self, x): class DlinferSiluAndMulBuilder (line 15) | class DlinferSiluAndMulBuilder(SiluAndMulBuilder): method build (line 19) | def build(inplace: bool = False): FILE: lmdeploy/pytorch/backends/dlinfer/apply_rotary_emb.py class DlinferApplyRotaryEmbImpl (line 9) | class DlinferApplyRotaryEmbImpl(ApplyRotaryEmbImpl): method forward (line 12) | def forward(self, query: Tensor, key: Tensor, cos: Tensor, sin: Tensor... class DlinferApplyRotaryEmbBuilder (line 23) | class DlinferApplyRotaryEmbBuilder(ApplyRotaryEmbBuilder): method build (line 27) | def build(): FILE: lmdeploy/pytorch/backends/dlinfer/ascend/op_backend.py class SocVersion (line 25) | class SocVersion: method device_name (line 31) | def device_name(cls) -> str: method is_Ascend310P (line 41) | def is_Ascend310P(cls) -> bool: method is_Ascend910 (line 45) | def is_Ascend910(cls) -> bool: method soc_version (line 50) | def soc_version(cls) -> int: method is_A2 (line 54) | def is_A2(cls) -> bool: method is_A3 (line 58) | def is_A3(cls) -> bool: class DistMeta (line 63) | class DistMeta: class AscendKVQuantMeta (line 73) | class AscendKVQuantMeta: method set_value (line 78) | def set_value(cls, device: str, dtype: torch.dtype, record_file: str, ... class AscendOpsBackend (line 118) | class AscendOpsBackend(DlinferOpsBackend): method get_name (line 126) | def get_name() -> str: method get_k_block_shape (line 131) | def get_k_block_shape( method get_v_block_shape (line 143) | def get_v_block_shape( method update_step_context (line 155) | def update_step_context(cls, step_context): method build_graph_runner (line 432) | def build_graph_runner(model: torch.nn.Module, model_config: ModelConf... method init (line 441) | def init(): method ccl_backend (line 453) | def ccl_backend(): method device_count (line 457) | def device_count(): method support_ray (line 462) | def support_ray(): FILE: lmdeploy/pytorch/backends/dlinfer/ascend/utils.py function nd_to_nz_spec (line 8) | def nd_to_nz_spec(tensor: torch.Tensor) -> torch.Tensor: FILE: lmdeploy/pytorch/backends/dlinfer/attention.py class DlinferAttentionMetadata (line 12) | class DlinferAttentionMetadata(AttentionMetadata): class DlinferAttentionImpl (line 23) | class DlinferAttentionImpl(AttentionImpl[DlinferAttentionMetadata]): method __init__ (line 26) | def __init__( method forward (line 58) | def forward( class DlinferAttentionBuilder (line 150) | class DlinferAttentionBuilder(AttentionBuilder[DlinferAttentionMetadata]): method build (line 154) | def build( FILE: lmdeploy/pytorch/backends/dlinfer/awq_modules.py class AwqLinearW4A16Impl (line 11) | class AwqLinearW4A16Impl(LinearW4A16Impl): method __init__ (line 14) | def __init__(self, in_features: int, out_features: int, w_bit: int, gr... method forward (line 20) | def forward(self, class AwqLinearW4A16Builder (line 33) | class AwqLinearW4A16Builder(LinearW4A16Builder): method build (line 37) | def build(in_features: int, FILE: lmdeploy/pytorch/backends/dlinfer/camb/op_backend.py class CambOpsBackend (line 14) | class CambOpsBackend(DlinferOpsBackend): method get_name (line 19) | def get_name() -> str: method get_k_block_shape (line 24) | def get_k_block_shape( method get_v_block_shape (line 37) | def get_v_block_shape( method update_step_context (line 50) | def update_step_context(cls, step_context): method build_graph_runner (line 121) | def build_graph_runner(model: torch.nn.Module, model_config: ModelConf... method support_ray (line 128) | def support_ray(): FILE: lmdeploy/pytorch/backends/dlinfer/flash_attention.py class DlinferFlashAttentionImpl (line 7) | class DlinferFlashAttentionImpl(FlashAttentionImpl): method __init__ (line 10) | def __init__( method forward (line 38) | def forward(self, class DlinferFlashAttentionBuilder (line 71) | class DlinferFlashAttentionBuilder(FlashAttentionBuilder): method build (line 75) | def build( FILE: lmdeploy/pytorch/backends/dlinfer/linear.py class DlinferLinearImpl (line 13) | class DlinferLinearImpl(LinearImpl): method update_weights (line 16) | def update_weights(self, weight: torch.Tensor, bias: Optional[torch.Te... method forward (line 22) | def forward(self, class DlinferLinearBuilder (line 37) | class DlinferLinearBuilder(LinearBuilder): method build (line 41) | def build(in_features: int, out_features: int, bias: bool = True, dtyp... FILE: lmdeploy/pytorch/backends/dlinfer/maca/op_backend.py class MacaOpsBackend (line 14) | class MacaOpsBackend(DlinferOpsBackend): method get_name (line 19) | def get_name() -> str: method get_k_block_shape (line 24) | def get_k_block_shape( method get_v_block_shape (line 33) | def get_v_block_shape( method update_step_context (line 42) | def update_step_context(cls, step_context): method build_graph_runner (line 112) | def build_graph_runner(model: torch.nn.Module, model_config: ModelConf... method support_ray (line 119) | def support_ray(): FILE: lmdeploy/pytorch/backends/dlinfer/moe.py class DlinferSoftmaxTopKImpl (line 15) | class DlinferSoftmaxTopKImpl(SoftmaxTopKImpl): method __init__ (line 18) | def __init__(self, top_k: int, dim: int = -1, n_groups: int = -1): method forward (line 23) | def forward(self, x: torch.Tensor): class DlinferSoftmaxTopKBuilder (line 32) | class DlinferSoftmaxTopKBuilder(SoftmaxTopKBuilder): method build (line 36) | def build(top_k: int, dim: int = -1, n_groups: int = -1): class DlinferFusedMoEImpl (line 41) | class DlinferFusedMoEImpl(FusedMoEImpl): method __init__ (line 44) | def __init__(self, method update_weights (line 63) | def update_weights(self, gate_up_weights: torch.Tensor, down_weights: ... method ep_expert_list (line 72) | def ep_expert_list(self, world_size: int, rank: int): method forward (line 80) | def forward(self, class DlinferFusedMoEBuilder (line 102) | class DlinferFusedMoEBuilder(FusedMoEBuilder): method build (line 106) | def build(top_k: int, FILE: lmdeploy/pytorch/backends/dlinfer/norm.py class DlinferRMSNormImpl (line 9) | class DlinferRMSNormImpl(RMSNormImpl): method __init__ (line 12) | def __init__(self, hidden_size: int, eps: float = 1e-6): method forward (line 16) | def forward(self, x: torch.Tensor, weight: torch.Tensor, residual: tor... class DlinferRMSNormBuilder (line 26) | class DlinferRMSNormBuilder(RMSNormBuilder): method build (line 30) | def build(weight: torch.Tensor, eps: float = 1e-6): FILE: lmdeploy/pytorch/backends/dlinfer/op_backend.py class DlinferOpsBackend (line 14) | class DlinferOpsBackend(DefaultOpsBackend): method get_name (line 18) | def get_name() -> str: method get_layer_impl_builder (line 23) | def get_layer_impl_builder(cls, layer_type: OpType): method get_attention_metadata_cls (line 66) | def get_attention_metadata_cls(): method get_k_block_shape (line 71) | def get_k_block_shape( method get_v_block_shape (line 84) | def get_v_block_shape( method update_step_context (line 97) | def update_step_context(cls, step_context): FILE: lmdeploy/pytorch/backends/dlinfer/qmodules.py class DlinferLinearW8A8Impl (line 14) | class DlinferLinearW8A8Impl(LinearW8A8Impl): method __init__ (line 17) | def __init__(self, method update_weights (line 27) | def update_weights(self, weight: torch.Tensor, scale: torch.Tensor, bi... method forward (line 34) | def forward(self, class DlinferLinearW8A8Builder (line 54) | class DlinferLinearW8A8Builder(LinearW8A8Builder): method build (line 58) | def build(in_features: int, class DlinferRMSNormW8A8Impl (line 67) | class DlinferRMSNormW8A8Impl(RMSNormW8A8Impl): method __init__ (line 70) | def __init__(self, hidden_size: int, eps: float = 1e-6, quant_dtype: t... method forward (line 76) | def forward(self, x: torch.Tensor, weight: torch.Tensor, residual: tor... class DlinferRMSNormW8A8Builder (line 88) | class DlinferRMSNormW8A8Builder(RMSNormW8A8Builder): method build (line 92) | def build(hidden_size: int, eps: float = 1e-6, quant_dtype: torch.dtyp... FILE: lmdeploy/pytorch/backends/dlinfer/rotary_embedding.py function _rotary_embedding_fwd (line 14) | def _rotary_embedding_fwd(position_ids: torch.Tensor, class DlinferRotaryEmbeddingImpl (line 41) | class DlinferRotaryEmbeddingImpl(RotaryEmbeddingImpl, nn.Module): method __init__ (line 44) | def __init__(self, dim: int, base: int = 10000, scaling_factor: float ... method forward (line 54) | def forward(self, x, position_ids): class DlinferLlamaDynamicNTKScalingRotaryEmbedding (line 63) | class DlinferLlamaDynamicNTKScalingRotaryEmbedding(LlamaDynamicNTKScalin... method __init__ (line 69) | def __init__(self, dim: int, base: int = 10000, scaling_factor: float ... method _ntk_inv_freq (line 77) | def _ntk_inv_freq(self, seq_len: torch.Tensor): method forward (line 83) | def forward(self, x: torch.Tensor, position_ids: torch.Tensor): class DlinferLlama3RotaryEmbeddingImpl (line 96) | class DlinferLlama3RotaryEmbeddingImpl(DlinferRotaryEmbeddingImpl): method __init__ (line 99) | def __init__( class DlinferYarnRotaryEmbeddingImpl (line 129) | class DlinferYarnRotaryEmbeddingImpl(YarnRotaryEmbeddingImpl): method __init__ (line 132) | def __init__(self, method forward (line 140) | def forward(self, x: torch.Tensor, position_ids: torch.Tensor): class DlinferRotaryEmbeddingBuilder (line 148) | class DlinferRotaryEmbeddingBuilder(RotaryEmbeddingBuilder): method build (line 152) | def build( FILE: lmdeploy/pytorch/backends/embedding.py class EmbeddingImpl (line 8) | class EmbeddingImpl(ABC): method forward (line 12) | def forward(self, x, weight: torch.Tensor, all_reduce: bool = False, g... class EmbeddingBuilder (line 17) | class EmbeddingBuilder(ABC): method build (line 22) | def build(start_index: int, end_index: int): FILE: lmdeploy/pytorch/backends/flash_attention.py class FlashAttentionImpl (line 7) | class FlashAttentionImpl(ABC): method forward (line 10) | def forward(self, class FlashAttentionBuilder (line 23) | class FlashAttentionBuilder(ABC): method build (line 28) | def build( FILE: lmdeploy/pytorch/backends/gated_delta_rule.py class GatedDeltaRuleImpl (line 7) | class GatedDeltaRuleImpl(ABC): method chunk_gated_delta_rule (line 11) | def chunk_gated_delta_rule(self, method fused_recurrent_gated_delta_rule (line 27) | def fused_recurrent_gated_delta_rule(self, class GatedDeltaRuleBuilder (line 42) | class GatedDeltaRuleBuilder(ABC): method build (line 47) | def build() -> GatedDeltaRuleImpl: FILE: lmdeploy/pytorch/backends/graph_runner.py class GraphRunnerMeta (line 13) | class GraphRunnerMeta: function _get_capture_batch_size_impl (line 18) | def _get_capture_batch_size_impl(max_batches: int): class GraphRunner (line 29) | class GraphRunner: method __init__ (line 32) | def __init__(self, model: torch.nn.Module, model_config: ModelConfig, ... method __call__ (line 42) | def __call__(self, **kwargs): method get_model (line 46) | def get_model(self): method get_logits (line 50) | def get_logits(self, hidden_states: torch.Tensor): method prepare_inputs_for_generation (line 56) | def prepare_inputs_for_generation( method update_model_metas (line 69) | def update_model_metas( method get_input_processor (line 85) | def get_input_processor(self): method reset (line 92) | def reset(self): method get_meta (line 96) | def get_meta(self): method update_inputs (line 100) | def update_inputs(self, inputs): method get_capture_batch_sizes (line 103) | def get_capture_batch_sizes(self) -> List[int]: FILE: lmdeploy/pytorch/backends/linear.py class LinearImpl (line 9) | class LinearImpl(ABC): method update_weights (line 12) | def update_weights(self, weight: torch.Tensor, bias: Optional[torch.Te... method forward (line 17) | def forward(self, class LinearBuilder (line 29) | class LinearBuilder(ABC): method build (line 34) | def build(in_features: int, out_features: int, bias: bool = True, dtyp... FILE: lmdeploy/pytorch/backends/lora.py class AdapterInfo (line 11) | class AdapterInfo: method __post_init__ (line 21) | def __post_init__(self): class LoRAImpl (line 30) | class LoRAImpl(ABC): method forward (line 34) | def forward(self, class LoRABuilder (line 47) | class LoRABuilder(ABC): method build (line 52) | def build(): FILE: lmdeploy/pytorch/backends/moe.py class SoftmaxTopKImpl (line 10) | class SoftmaxTopKImpl(ABC): method get_group_offsets (line 15) | def get_group_offsets(n_groups: int, group_size: int, device: str): method forward (line 20) | def forward(self, x: torch.Tensor): class SoftmaxTopKBuilder (line 25) | class SoftmaxTopKBuilder(ABC): method build (line 30) | def build(top_k: int, dim: int = -1, n_groups: int = -1): class FusedMoEImpl (line 35) | class FusedMoEImpl(ABC): method update_weights (line 38) | def update_weights(self, gate_up_weights: torch.Tensor, down_weights: ... method ep_expert_list (line 42) | def ep_expert_list(self, world_size: int, rank: int): method forward (line 47) | def forward(self, class FusedMoEBuilder (line 61) | class FusedMoEBuilder(ABC): method build (line 66) | def build(top_k: int, class FusedMoEW8A8Impl (line 78) | class FusedMoEW8A8Impl(ABC): method update_weights (line 81) | def update_weights(self, gate_up_weights: torch.Tensor, down_weights: ... method ep_expert_list (line 86) | def ep_expert_list(self, world_size: int, rank: int): method forward (line 91) | def forward(self, class FusedMoEW8A8Builder (line 105) | class FusedMoEW8A8Builder(ABC): method build (line 110) | def build(top_k: int, class FusedMoEBlockedF8Impl (line 119) | class FusedMoEBlockedF8Impl(ABC): method __init__ (line 122) | def __init__(self): method update_weights (line 125) | def update_weights(self, gate_up_weights: torch.Tensor, down_weights: ... method ep_expert_list (line 130) | def ep_expert_list(self, world_size: int, rank: int): method set_scale_fmt (line 134) | def set_scale_fmt(self, scale_fmt: Optional[str]): method forward (line 139) | def forward(self, class FusedMoEBlockedF8Builder (line 156) | class FusedMoEBlockedF8Builder(ABC): method build (line 161) | def build(top_k: int, FILE: lmdeploy/pytorch/backends/moe_router.py class RouterNoauxTCImpl (line 8) | class RouterNoauxTCImpl(ABC): method forward (line 12) | def forward(self, logits: torch.Tensor, bias: torch.Tensor) -> Tuple[t... class RouterNoauxTCBuilder (line 17) | class RouterNoauxTCBuilder(ABC): method build (line 22) | def build( FILE: lmdeploy/pytorch/backends/multinomial_sampling.py class MultinomialSamplingImpl (line 7) | class MultinomialSamplingImpl(ABC): method forward (line 11) | def forward(scores: torch.Tensor, seeds: torch.LongTensor, offsets: to... class MultinomialSamplingBuilder (line 16) | class MultinomialSamplingBuilder(ABC): method build (line 21) | def build(): FILE: lmdeploy/pytorch/backends/norm.py class RMSNormImpl (line 7) | class RMSNormImpl(ABC): method forward (line 11) | def forward(self, x: torch.Tensor, weight: torch.Tensor, residual: tor... class RMSNormBuilder (line 16) | class RMSNormBuilder(ABC): method build (line 21) | def build(hidden_size: int, eps: float = 1e-6): class LayerNormImpl (line 26) | class LayerNormImpl(ABC): method forward (line 30) | def forward(self, x: torch.Tensor, weight: torch.Tensor, bias: torch.T... class LayerNormBuilder (line 35) | class LayerNormBuilder(ABC): method build (line 40) | def build(normalized_shape: int, eps: float = 1e-6): FILE: lmdeploy/pytorch/backends/nsa.py class NSAIndexMeta (line 9) | class NSAIndexMeta: class BaseNSAIndexFP8 (line 19) | class BaseNSAIndexFP8(ABC): method forward (line 22) | def forward(self, q: Tensor, k: Tensor, weights: Tensor, k_cache: Tens... class BaseNSAIndexFP8Builder (line 28) | class BaseNSAIndexFP8Builder: method build (line 32) | def build(topk: int, softmax_scale: float, block_size: int = 128, fill... FILE: lmdeploy/pytorch/backends/qmodules.py class RMSNormW8A8Impl (line 8) | class RMSNormW8A8Impl(ABC): method create_weight (line 12) | def create_weight(hidden_size: int, dtype: torch.dtype = None, device:... method forward (line 22) | def forward(self, x: torch.Tensor, weight: torch.Tensor, residual: tor... class RMSNormW8A8Builder (line 27) | class RMSNormW8A8Builder(ABC): method build (line 32) | def build(hidden_size: int, eps: float = 1e-6, quant_dtype: torch.dtyp... class LinearW8A8Impl (line 37) | class LinearW8A8Impl(ABC): method update_weights (line 40) | def update_weights(self, weight: torch.Tensor, scale: torch.Tensor, bi... method forward (line 45) | def forward(self, class LinearW8A8Builder (line 56) | class LinearW8A8Builder(ABC): method build (line 61) | def build(in_features: int, FILE: lmdeploy/pytorch/backends/rotary_embedding.py class RopeType (line 10) | class RopeType(Enum): class YarnParameters (line 22) | class YarnParameters: class LongRoPEScalingParameters (line 33) | class LongRoPEScalingParameters: class Llama3Parameters (line 43) | class Llama3Parameters: class FopeParameters (line 51) | class FopeParameters: class RotaryEmbeddingImpl (line 59) | class RotaryEmbeddingImpl(ABC): method forward (line 63) | def forward(self, x, position_ids, **kwargs): class RotaryEmbeddingBuilder (line 68) | class RotaryEmbeddingBuilder(ABC): method build (line 73) | def build( FILE: lmdeploy/pytorch/backends/selector.py function _get_backend (line 5) | def _get_backend(): function get_backend (line 28) | def get_backend(backend_type: str = None): function init_backend (line 39) | def init_backend(backend_type: str): FILE: lmdeploy/pytorch/backends/token_dispatcher.py class TokenDispatcherImpl (line 8) | class TokenDispatcherImpl(ABC): method permute (line 11) | def permute( method unpermute (line 25) | def unpermute( method indices_to_multihot (line 43) | def indices_to_multihot(self, topk_ids, topk_weight, num_experts): method dispatch (line 65) | def dispatch(self, hidden_states: torch.Tensor, probs: torch.Tensor, t... method combine (line 71) | def combine(self, hidden_states: torch.Tensor) -> torch.Tensor: FILE: lmdeploy/pytorch/block.py function _div_up (line 5) | def _div_up(x, n): function _round_up (line 10) | def _round_up(x, n): class LogicalTokenBlocks (line 15) | class LogicalTokenBlocks: method __init__ (line 19) | def __init__(self, blocks: np.ndarray = None): method reserve (line 29) | def reserve(self, size: int): method __setitem__ (line 37) | def __setitem__(self, *args, **kwargs): method __getitem__ (line 41) | def __getitem__(self, *args, **kwargs): method get_real_blocks (line 45) | def get_real_blocks(self): method append (line 49) | def append(self, blocks: np.ndarray): method __len__ (line 58) | def __len__(self): method resize (line 62) | def resize(self, num_blocks: int): method reset (line 67) | def reset(self): method clone (line 72) | def clone(self): FILE: lmdeploy/pytorch/check_env/adapter.py class AdapterChecker (line 5) | class AdapterChecker(BaseChecker): method __init__ (line 8) | def __init__(self, adapter_path: str, logger=None): method check (line 12) | def check(self): FILE: lmdeploy/pytorch/check_env/base.py function _red_text (line 11) | def _red_text(text: str): class BaseChecker (line 18) | class BaseChecker: method __init__ (line 21) | def __init__(self, logger: Logger = None): method get_logger (line 28) | def get_logger(self): method register_required_checker (line 32) | def register_required_checker(self, checker: 'BaseChecker'): method handle (line 36) | def handle(self): method log_and_exit (line 47) | def log_and_exit(self, e: Exception = None, mod_name: str = None, mess... method check (line 59) | def check(self): FILE: lmdeploy/pytorch/check_env/cuda.py class CudaChecker (line 5) | class CudaChecker(BaseChecker): method __init__ (line 8) | def __init__(self, model_format: str = None, logger=None) -> None: method check (line 12) | def check(self): FILE: lmdeploy/pytorch/check_env/deeplink.py class DeeplinkChecker (line 7) | class DeeplinkChecker(BaseChecker): method __init__ (line 10) | def __init__(self, device_type: str, logger=None) -> None: method check (line 14) | def check(self): FILE: lmdeploy/pytorch/check_env/dist.py class DistChecker (line 9) | class DistChecker(BaseChecker): method __init__ (line 12) | def __init__(self, tp: int, dp: int, ep: int, distributed_executor_bac... method check (line 22) | def check(self): FILE: lmdeploy/pytorch/check_env/model.py class ModelChecker (line 7) | class ModelChecker(BaseChecker): method __init__ (line 10) | def __init__(self, model_path: str, trust_remote_code: bool, dtype: st... method check_config (line 17) | def check_config(self, trans_version): method check_trans_version (line 31) | def check_trans_version(self, config, trans_version): method check_dtype (line 44) | def check_dtype(self, config): method check (line 72) | def check(self): FILE: lmdeploy/pytorch/check_env/torch.py class TorchChecker (line 5) | class TorchChecker(BaseChecker): method __init__ (line 8) | def __init__(self, device: str = 'cuda', logger=None) -> None: method check (line 12) | def check(self): FILE: lmdeploy/pytorch/check_env/transformers.py class TransformersChecker (line 10) | class TransformersChecker(BaseChecker): method check (line 13) | def check(self): FILE: lmdeploy/pytorch/check_env/triton.py class TritonChecker (line 10) | class TritonChecker(BaseChecker): method check_version (line 13) | def check_version(self): method check (line 31) | def check(self): FILE: lmdeploy/pytorch/check_env/triton_custom_add.py function _add_kernel (line 8) | def _add_kernel(A, B, C, size, BLOCK: tl.constexpr): function custom_add (line 17) | def custom_add(a, b): FILE: lmdeploy/pytorch/config.py function _update_torch_dtype (line 16) | def _update_torch_dtype(config: 'ModelConfig', dtype: str, device_type: ... class BackendConfig (line 64) | class BackendConfig: class SchedulerConfig (line 71) | class SchedulerConfig: class CacheConfig (line 83) | class CacheConfig: method __post_init__ (line 106) | def __post_init__(self): class TPMode (line 113) | class TPMode(enum.Enum): class DistConfig (line 120) | class DistConfig: method __post_init__ (line 138) | def __post_init__(self): method get_tp_by_layer (line 183) | def get_tp_by_layer(self, layer_type: str): method from_engine_config (line 198) | def from_engine_config(cls, engine_config: PytorchEngineConfig): function _override_hf_config_dict (line 214) | def _override_hf_config_dict(hf_config: dict, key: str, hf_overrides): function _overide_hf_config_cfg (line 234) | def _overide_hf_config_cfg(hf_config: list, key: str, hf_overrides): function _override_hf_config (line 252) | def _override_hf_config(hf_config: Any, key: str, hf_overrides): function override_hf_config (line 260) | def override_hf_config(hf_config: Any, hf_overrides: Dict[str, Any]): function _default_check_env (line 266) | def _default_check_env(device: str): function _patch_quantization_config (line 270) | def _patch_quantization_config(hf_config: Any, model_format: str = None): class ModelConfig (line 300) | class ModelConfig: method get_head_size (line 347) | def get_head_size(self): method from_pretrained (line 352) | def from_pretrained( method from_hf_config (line 413) | def from_hf_config( class UnmaskingStrategy (line 459) | class UnmaskingStrategy(enum.Enum): method from_str (line 470) | def from_str(cls, strategy: str): class DLLMConfig (line 484) | class DLLMConfig: class MiscConfig (line 492) | class MiscConfig: method from_engine_config (line 505) | def from_engine_config(cls, engine_config: PytorchEngineConfig): class SpecDecodeConfig (line 528) | class SpecDecodeConfig: method from_config (line 536) | def from_config( class QuantizationConfig (line 574) | class QuantizationConfig: method from_config (line 586) | def from_config(cls, hf_config: Any): method get_quant_method (line 644) | def get_quant_method(self, prefix: str = ''): method get (line 653) | def get(self, key, default=None): FILE: lmdeploy/pytorch/configurations/builder.py class AutoModelConfigBuilder (line 9) | class AutoModelConfigBuilder(ABC): method __init_subclass__ (line 13) | def __init_subclass__(cls) -> None: method register_builder (line 18) | def register_builder(cls, sub_cls): method condition (line 24) | def condition(cls, hf_config): method build (line 29) | def build(cls, hf_config, model_path: str = None, **kwargs): method update_num_kv_heads (line 56) | def update_num_kv_heads(cls, hf_config, tp, num_key_value_heads): FILE: lmdeploy/pytorch/configurations/chatglm.py class ChatGLMModelConfigBuilder (line 7) | class ChatGLMModelConfigBuilder(AutoModelConfigBuilder): method condition (line 10) | def condition(cls, hf_config): method build (line 15) | def build(cls, hf_config, model_path: str = None, **kwargs): FILE: lmdeploy/pytorch/configurations/cogvlm.py class CogVLMModelConfigBuilder (line 6) | class CogVLMModelConfigBuilder(AutoModelConfigBuilder): method condition (line 9) | def condition(cls, hf_config): method build (line 15) | def build(cls, hf_config, model_path: str = None, **kwargs): FILE: lmdeploy/pytorch/configurations/deepseek_v2.py class DeepseekV2ModelConfigBuilder (line 8) | class DeepseekV2ModelConfigBuilder(AutoModelConfigBuilder): method condition (line 11) | def condition(cls, hf_config): method build (line 16) | def build(cls, hf_config, model_path: str = None, is_draft_model: bool... FILE: lmdeploy/pytorch/configurations/deepseek_v32.py function _check_env_v32 (line 7) | def _check_env_v32(device: str = 'cuda'): class DeepseekV32ModelConfigBuilder (line 27) | class DeepseekV32ModelConfigBuilder(DeepseekV2ModelConfigBuilder): method condition (line 30) | def condition(cls, hf_config): method build (line 35) | def build(cls, hf_config, model_path: str | None = None, **kwargs): FILE: lmdeploy/pytorch/configurations/deepseek_vl2.py class DeepseekVLV2ModelConfigBuilder (line 6) | class DeepseekVLV2ModelConfigBuilder(AutoModelConfigBuilder): method condition (line 9) | def condition(cls, hf_config): method build (line 14) | def build(cls, hf_config, model_path: str = None, **kwargs): FILE: lmdeploy/pytorch/configurations/default.py class DefaultModelConfigBuilder (line 7) | class DefaultModelConfigBuilder(AutoModelConfigBuilder): method condition (line 10) | def condition(cls, hf_config): method build (line 15) | def build(cls, hf_config, model_path: str = None, **kwargs): FILE: lmdeploy/pytorch/configurations/gemma.py class GemmaModelConfigBuilder (line 6) | class GemmaModelConfigBuilder(AutoModelConfigBuilder): method condition (line 9) | def condition(cls, hf_config): method build (line 14) | def build(cls, hf_config, model_path: str = None, **kwargs): class GemmaVLModelConfigBuilder (line 21) | class GemmaVLModelConfigBuilder(AutoModelConfigBuilder): method condition (line 24) | def condition(cls, hf_config): method build (line 30) | def build(cls, hf_config, model_path: str = None, **kwargs): FILE: lmdeploy/pytorch/configurations/glm4.py class Glm4MoeLiteModelConfigBuilder (line 6) | class Glm4MoeLiteModelConfigBuilder(DeepseekV2ModelConfigBuilder): method condition (line 9) | def condition(cls, hf_config): method build (line 14) | def build(cls, hf_config, model_path: str = None, is_draft_model: bool... class Glm4MoeModelConfigBuilder (line 28) | class Glm4MoeModelConfigBuilder(DefaultModelConfigBuilder): method condition (line 31) | def condition(cls, hf_config): method build (line 36) | def build(cls, hf_config, model_path: str = None, is_draft_model: bool... FILE: lmdeploy/pytorch/configurations/gpt_oss.py class GptOSSModelConfigBuilder (line 6) | class GptOSSModelConfigBuilder(AutoModelConfigBuilder): method condition (line 9) | def condition(cls, hf_config): method build (line 14) | def build(cls, hf_config, model_path: str = None, **kwargs): FILE: lmdeploy/pytorch/configurations/interns1_pro.py class InterS1ProModelConfigBuilder (line 6) | class InterS1ProModelConfigBuilder(AutoModelConfigBuilder): method condition (line 9) | def condition(cls, hf_config): method build (line 14) | def build(cls, hf_config, model_path: str = None, **kwargs): FILE: lmdeploy/pytorch/configurations/internvl.py class InternVLModelConfigBuilder (line 6) | class InternVLModelConfigBuilder(AutoModelConfigBuilder): method condition (line 9) | def condition(cls, hf_config): method build (line 14) | def build(cls, hf_config, model_path: str = None, **kwargs): FILE: lmdeploy/pytorch/configurations/internvl3_hf.py class InternVL3ModelConfigBuilder (line 6) | class InternVL3ModelConfigBuilder(AutoModelConfigBuilder): method condition (line 9) | def condition(cls, hf_config): method build (line 14) | def build(cls, hf_config, model_path: str = None, **kwargs): FILE: lmdeploy/pytorch/configurations/llama.py class LlamaModelConfigBuilder (line 6) | class LlamaModelConfigBuilder(AutoModelConfigBuilder): method condition (line 9) | def condition(cls, hf_config): method build (line 14) | def build(cls, hf_config, model_path: str = None, is_draft_model: bool... FILE: lmdeploy/pytorch/configurations/llama4.py class Llama4ModelConfigBuilder (line 6) | class Llama4ModelConfigBuilder(AutoModelConfigBuilder): method condition (line 9) | def condition(cls, hf_config): method build (line 14) | def build(cls, hf_config, model_path: str = None, **kwargs): FILE: lmdeploy/pytorch/configurations/llava_hf.py class LlavaHfModelConfigBuilder (line 7) | class LlavaHfModelConfigBuilder(AutoModelConfigBuilder): method condition (line 10) | def condition(cls, hf_config): method build (line 15) | def build(cls, hf_config, model_path: str = None, **kwargs): FILE: lmdeploy/pytorch/configurations/minicpm3.py class MiniCPM3ModelConfigBuilder (line 7) | class MiniCPM3ModelConfigBuilder(AutoModelConfigBuilder): method condition (line 10) | def condition(cls, hf_config): method build (line 15) | def build(cls, hf_config, model_path: str = None, **kwargs): FILE: lmdeploy/pytorch/configurations/qwen.py class QwenModelConfigBuilder (line 6) | class QwenModelConfigBuilder(AutoModelConfigBuilder): method condition (line 9) | def condition(cls, hf_config): method build (line 14) | def build(cls, hf_config, model_path: str = None, **kwargs): FILE: lmdeploy/pytorch/configurations/qwen3_5.py class Qwen3_5ModelConfigBuilder (line 11) | class Qwen3_5ModelConfigBuilder(AutoModelConfigBuilder): method condition (line 14) | def condition(cls, hf_config): method build (line 19) | def build(cls, hf_config, model_path: str = None, tp: int = 1, **kwargs): FILE: lmdeploy/pytorch/configurations/qwen3_next.py function _check_env_qwen3_next (line 8) | def _check_env_qwen3_next(device: str): class Qwen3NextModelConfigBuilder (line 19) | class Qwen3NextModelConfigBuilder(AutoModelConfigBuilder): method condition (line 22) | def condition(cls, hf_config): method build (line 27) | def build(cls, hf_config, model_path: str = None, tp: int = 1, **kwargs): FILE: lmdeploy/pytorch/configurations/qwen3_vl.py class Qwen3VLModelConfigBuilder (line 6) | class Qwen3VLModelConfigBuilder(AutoModelConfigBuilder): method condition (line 9) | def condition(cls, hf_config): method build (line 14) | def build(cls, hf_config, model_path: str = None, **kwargs): FILE: lmdeploy/pytorch/configurations/sdar.py class SDARModelConfigBuilder (line 5) | class SDARModelConfigBuilder(AutoModelConfigBuilder): method condition (line 8) | def condition(cls, hf_config): method build (line 13) | def build(cls, hf_config, model_path: str = None, **kwargs): FILE: lmdeploy/pytorch/configurations/utils.py function flash_mla_available (line 9) | def flash_mla_available(): function flash_attn_v3_available (line 26) | def flash_attn_v3_available(): FILE: lmdeploy/pytorch/devices/device_manager.py class DeviceContext (line 9) | class DeviceContext: class DeviceManager (line 17) | class DeviceManager(CtxMgrBase[DeviceContext]): method __init__ (line 19) | def __init__(self): method register_context_callback (line 24) | def register_context_callback(self, callback: Callable): method unregister_context_callback (line 31) | def unregister_context_callback(self, handle: int): function get_device_manager (line 36) | def get_device_manager(): FILE: lmdeploy/pytorch/disagg/backend/base.py class MigrationBackendImpl (line 9) | class MigrationBackendImpl: method p2p_initialize (line 12) | def p2p_initialize(self, init_request: DistServeInitRequest): method register_memory_region (line 16) | def register_memory_region(self, register_mr_request: DistServeRegiste... method endpoint_info (line 20) | def endpoint_info(self, remote_engine_id: str, protocol: MigrationProt... method p2p_connect (line 24) | def p2p_connect(self, remote_engine_id: str, conn_req: DistServeKVTran... method p2p_migrate (line 28) | def p2p_migrate(self, assignment: MigrationAssignment, async_op: bool ... method store (line 32) | def store(self, assignment: MigrationAssignment, async_op: bool = False): method load (line 36) | def load(self, assignment: MigrationAssignment, async_op: bool = False): FILE: lmdeploy/pytorch/disagg/backend/dlslime.py class DLSlimeMigrationManagement (line 22) | class DLSlimeMigrationManagement: method __init__ (line 24) | def __init__(self, init_request: DistServeInitRequest): method register_memory_region (line 46) | def register_memory_region(self, register_mr_request: DistServeRegiste... method connect (line 54) | def connect(self, kvtransfer_endpoint_info: DistServeKVTransferEndpoin... method p2p_migrate (line 57) | async def p2p_migrate(self, assignment: MigrationAssignment): class DLSlimeBackend (line 75) | class DLSlimeBackend(MigrationBackendImpl): method __init__ (line 78) | def __init__(self): method p2p_initialize (line 81) | def p2p_initialize(self, init_request: DistServeInitRequest): method register_memory_region (line 84) | def register_memory_region(self, register_mr_request: DistServeRegiste... method endpoint_info (line 87) | def endpoint_info(self, remote_engine_id: str, protocol: MigrationProt... method p2p_connect (line 90) | def p2p_connect(self, remote_engine_id: str, conn_req: DistServeKVTran... method p2p_migrate (line 93) | async def p2p_migrate(self, assignment: MigrationAssignment, async_op:... method store (line 96) | def store(self, assignment: MigrationAssignment, async_op: bool = False): method load (line 99) | def load(self, assignment: MigrationAssignment, async_op: bool = False): FILE: lmdeploy/pytorch/disagg/backend/mooncake.py function get_rdma_nics (line 22) | def get_rdma_nics(): function get_local_ip_by_remote (line 48) | def get_local_ip_by_remote() -> str: class MooncakeMigrationManagement (line 68) | class MooncakeMigrationManagement: method __init__ (line 71) | def __init__(self, init_request: DistServeInitRequest): method _initialize_p2p (line 100) | def _initialize_p2p(self, init_request: DistServeInitRequest): method register_memory_region (line 123) | def register_memory_region(self, register_mr_request: DistServeRegiste... method endpoint_info (line 145) | def endpoint_info(self) -> Dict: method connect (line 164) | def connect(self, connect_request: DistServeKVTransferEndpointInfo): method p2p_migrate (line 178) | async def p2p_migrate(self, assignment: MigrationAssignment, async_op:... method _migrate (line 195) | def _migrate(self, assignment: MigrationAssignment): class MooncakeBackend (line 236) | class MooncakeBackend(MigrationBackendImpl): method __init__ (line 239) | def __init__(self): method p2p_initialize (line 242) | def p2p_initialize(self, init_request: DistServeInitRequest): method register_memory_region (line 245) | def register_memory_region(self, register_mr_request: DistServeRegiste... method endpoint_info (line 248) | def endpoint_info(self, remote_engine_id: int, protocol: MigrationProt... method p2p_connect (line 251) | def p2p_connect(self, remote_engine_id: str, connect_request: DistServ... method p2p_migrate (line 254) | async def p2p_migrate(self, assignment: MigrationAssignment, async_op:... method store (line 257) | def store(self, assignment: MigrationAssignment, async_op: bool = False): method load (line 260) | def load(self, assignment: MigrationAssignment, async_op: bool = False): FILE: lmdeploy/pytorch/disagg/config.py class ServingStrategy (line 8) | class ServingStrategy(enum.Enum): class EngineRole (line 22) | class EngineRole(enum.Enum): class MigrationBackend (line 40) | class MigrationBackend(enum.Enum): class RDMALinkType (line 47) | class RDMALinkType(enum.Enum): class DistServeRDMAConfig (line 54) | class DistServeRDMAConfig(BaseModel): class DistServeTCPConfig (line 72) | class DistServeTCPConfig(BaseModel): class DistServeNVLinkConfig (line 76) | class DistServeNVLinkConfig(BaseModel): class DistServeEngineConfig (line 80) | class DistServeEngineConfig(BaseModel): class MooncakeEngineConfig (line 112) | class MooncakeEngineConfig(DistServeEngineConfig): FILE: lmdeploy/pytorch/disagg/conn/engine_conn.py class EngineP2PConnection (line 24) | class EngineP2PConnection: method __init__ (line 26) | def __init__(self, engine: 'Engine'): method p2p_initialize (line 34) | def p2p_initialize(self, init_request: DistServeInitRequest): method p2p_connect (line 54) | def p2p_connect(self, conn_request: DistServeConnectionRequest): method p2p_drop_connect (line 62) | def p2p_drop_connect(self, drop_conn_request: DistServeDropConnectionR... method zmq_send (line 67) | async def zmq_send(self, remote_engine_id: str, remote_session_id: int): method handle_zmq_recv (line 71) | async def handle_zmq_recv(self, remote_engine_id: str): method zmq_disconnect (line 83) | async def zmq_disconnect(self, remote_engine_id: str): FILE: lmdeploy/pytorch/disagg/conn/protocol.py class MigrationProtocol (line 11) | class MigrationProtocol(enum.Enum): class DistServeConnectionStatus (line 27) | class DistServeConnectionStatus(enum.Enum): class DistServeInitRequest (line 33) | class DistServeInitRequest(BaseModel): class DistServeEngineEndpointInfo (line 49) | class DistServeEngineEndpointInfo(BaseModel): class DistServeKVTransferEndpointInfo (line 53) | class DistServeKVTransferEndpointInfo(BaseModel): class DistServeInitResponse (line 58) | class DistServeInitResponse(BaseModel): class DistServeConnectionRequest (line 69) | class DistServeConnectionRequest(BaseModel): class DistServeConnectionResponse (line 76) | class DistServeConnectionResponse(BaseModel): class MigrationRequest (line 80) | class MigrationRequest(BaseModel): class DistServeCacheFreeRequest (line 91) | class DistServeCacheFreeRequest(BaseModel): class DistServeDropConnectionRequest (line 96) | class DistServeDropConnectionRequest(BaseModel): FILE: lmdeploy/pytorch/disagg/conn/proxy_conn.py class PDConnectionStatus (line 23) | class PDConnectionStatus(enum.Enum): class PDConnectionState (line 29) | class PDConnectionState: method __init__ (line 32) | def __init__(self, status: PDConnectionStatus, event: asyncio.Event): method wait (line 36) | async def wait(self): method set_status (line 39) | def set_status(self, status: PDConnectionStatus): function get_server_api (line 43) | def get_server_api(url: str, api: str): class PDConnectionPool (line 47) | class PDConnectionPool: method __init__ (line 65) | def __init__(self): method reg_instance (line 94) | def reg_instance(self, role: EngineRole, endpoint: str): method dereg_instance (line 102) | def dereg_instance(self, endpoint: str): method shelf_prefill_session (line 115) | def shelf_prefill_session(self, conn_key: Tuple[str, str], session_id:... method unshelf_prefill_session (line 118) | def unshelf_prefill_session(self, conn_key: Tuple[str, str], session_i... method connect (line 121) | async def connect(self, conn_req: PDConnectionMessage): method is_connected (line 261) | def is_connected(self, p_url: str, d_url: str): method drop (line 267) | def drop(self, pd_key: Tuple[str, str]): FILE: lmdeploy/pytorch/disagg/messages.py class MigrationExecutionBatch (line 10) | class MigrationExecutionBatch(BaseModel): class AssignmentInstruct (line 17) | class AssignmentInstruct(BaseModel): class MigrationAssignment (line 25) | class MigrationAssignment(BaseModel): class PDConnectionMessage (line 32) | class PDConnectionMessage(BaseModel): class DistServeRegisterMRMessage (line 41) | class DistServeRegisterMRMessage(BaseModel): FILE: lmdeploy/pytorch/distributed.py class DistGroup (line 16) | class DistGroup: method close (line 25) | def close(self): function _build_tp_group_impl (line 39) | def _build_tp_group_impl(tp: int, function _build_attn_tp_group (line 89) | def _build_attn_tp_group(context: 'DistContext', function _build_mlp_tp_group (line 114) | def _build_mlp_tp_group(context: 'DistContext', function _build_moe_tp_group (line 144) | def _build_moe_tp_group(context: 'DistContext', function _build_tp_group (line 179) | def _build_tp_group(context: 'DistContext', timeout: timedelta, cpu_back... class DistContext (line 188) | class DistContext: method _build_ep_group (line 204) | def _build_ep_group(cls, context: 'DistContext', timeout: timedelta, c... method build (line 228) | def build(cls, rank: int = 0, dist_config: DistConfig = None, ccl_back... method close (line 261) | def close(self): class DistManager (line 281) | class DistManager(CtxMgrBase[DistContext]): method __init__ (line 284) | def __init__(self): method current_config (line 287) | def current_config(self) -> DistConfig: function get_dist_manager (line 292) | def get_dist_manager(): function get_world_rank (line 297) | def get_world_rank(): function get_tp_world_rank (line 306) | def get_tp_world_rank(layer_type: Optional[str] = None): function get_dp_world_rank (line 320) | def get_dp_world_rank(): function get_ep_world_rank (line 325) | def get_ep_world_rank(): function _check_group_device (line 330) | def _check_group_device(device: str): function get_process_group (line 336) | def get_process_group(device: str = None): function get_dist_group (line 341) | def get_dist_group(layer_type: str = 'attn'): function get_tp_group (line 355) | def get_tp_group(device: str = 'gpu', layer_type: str = 'attn'): function get_group (line 369) | def get_group(group_type: str, device: str): function all_reduce (line 379) | def all_reduce(tensor, op=ReduceOp.SUM, group='tp', async_op=False): function broadcast (line 386) | def broadcast(tensor, src, group='tp', async_op=False): function all_gather_object (line 393) | def all_gather_object(object_list, obj, group='tp'): function all_gather (line 399) | def all_gather(tensor_list, tensor, group='tp', async_op=False): function all_gather_into_tensor (line 405) | def all_gather_into_tensor(output_tensor, input_tensor, group='tp', asyn... function reduce_scatter (line 411) | def reduce_scatter(output, input_list, op=ReduceOp.SUM, group='tp', asyn... function gather_by_tp_sizes (line 418) | def gather_by_tp_sizes(x: torch.Tensor, function reduce_scatter_by_tp_sizes (line 433) | def reduce_scatter_by_tp_sizes(out: torch.Tensor, rank: int, tp_sizes: L... FILE: lmdeploy/pytorch/engine/base.py class EngineBase (line 6) | class EngineBase: method close (line 8) | def close(self) -> None: method start_loop (line 12) | def start_loop(self) -> None: method end_session (line 15) | def end_session(self, session_id: int): method p2p_initialize (line 19) | def p2p_initialize(self, conn_request: DistServeInitRequest): method p2p_connect (line 23) | def p2p_connect(self, conn_request: DistServeConnectionRequest): method p2p_drop_connect (line 27) | def p2p_drop_connect(self, drop_conn_request: DistServeDropConnectionR... method create_instance (line 35) | def create_instance(self, cuda_stream_id=0): class EngineInstanceBase (line 40) | class EngineInstanceBase: method async_end (line 42) | async def async_end(self, session_id: int): method async_cancel (line 46) | async def async_cancel(self, session_id: int): method async_stream_infer (line 50) | async def async_stream_infer(self, *args, **kwargs): FILE: lmdeploy/pytorch/engine/cache_engine.py function round_up (line 25) | def round_up(x: int, alignment: int) -> int: class CacheDesc (line 31) | class CacheDesc: method __post_init__ (line 37) | def __post_init__(self): function _get_kv_cache_dtype (line 43) | def _get_kv_cache_dtype(model_config: ModelConfig): class CacheEngine (line 54) | class CacheEngine: method __init__ (line 67) | def __init__( method cpu_cache (line 113) | def cpu_cache(self): method gpu_cache (line 118) | def gpu_cache(self): method num_gpu_blocks (line 123) | def num_gpu_blocks(self): method num_cpu_blocks (line 128) | def num_cpu_blocks(self): method _get_key_block_shape_impl (line 133) | def _get_key_block_shape_impl(cls, method _get_value_block_shape_impl (line 160) | def _get_value_block_shape_impl(cls, method get_k_cache_desc (line 189) | def get_k_cache_desc(cls, model_config: ModelConfig, cache_config: Cac... method get_v_cache_desc (line 208) | def get_v_cache_desc(cls, model_config: ModelConfig, cache_config: Cac... method get_quant_cache_descs (line 227) | def get_quant_cache_descs(cls, k_cache_desc: CacheDesc, v_cache_desc: ... method get_custom_cache_descs (line 241) | def get_custom_cache_descs(cls, model_config: ModelConfig, cache_confi... method allocate_caches (line 256) | def allocate_caches(cls, num_blocks: int, model_config: ModelConfig, c... method allocate_gpu_cache (line 286) | def allocate_gpu_cache(self): method allocate_cpu_cache (line 299) | def allocate_cpu_cache(self): method get_custom_cache_shape_impl (line 313) | def get_custom_cache_shape_impl(num_layers: int, num_blocks: int, bloc... method _allocate_single_custom_cache (line 318) | def _allocate_single_custom_cache(shape: Sequence[int], dtype: torch.d... method allocate_custom_cache (line 322) | def allocate_custom_cache(self, device: str): method _swap (line 338) | def _swap(self, src: List[torch.Tensor], dst: List[torch.Tensor], src_... method swap_in (line 360) | def swap_in(self, src_to_dst: Dict[int, int]) -> None: method swap_out (line 368) | def swap_out(self, src_to_dst: Dict[int, int]) -> None: method get_cache_block_size (line 377) | def get_cache_block_size(cls, cache_config: CacheConfig, model_config:... method p2p_initialize (line 399) | def p2p_initialize(self, migration_init_request: DistServeInitRequest)... method p2p_connect (line 420) | def p2p_connect(self, remote_engine_id: str, migration_conn_request: L... method migrate (line 423) | async def migrate(self, migration_execution_inputs: MigrationExecution... class StateCacheEngine (line 459) | class StateCacheEngine: method __init__ (line 462) | def __init__(self, cache_config: CacheConfig): method allocate_caches (line 469) | def allocate_caches(num_caches: int, state_shapes: List[Tuple[Tuple[in... method get_cache_state_size (line 495) | def get_cache_state_size(state_shapes: List[Tuple[Tuple[int], torch.dt... method state_caches (line 508) | def state_caches(self): method init_caches (line 512) | def init_caches(self, idx: torch.Tensor, mask: torch.Tensor): FILE: lmdeploy/pytorch/engine/config_builder.py class ConfigBuilder (line 11) | class ConfigBuilder: method update_engine_config (line 14) | def update_engine_config(engine_config: PytorchEngineConfig): method build_scheduler_config (line 46) | def build_scheduler_config(engine_config: PytorchEngineConfig): method build_cache_config (line 54) | def build_cache_config(engine_config: PytorchEngineConfig): method build_backend_config (line 73) | def build_backend_config(engine_config: PytorchEngineConfig): method build_dist_config (line 82) | def build_dist_config(engine_config: PytorchEngineConfig): method build_misc_config (line 88) | def build_misc_config(engine_config: PytorchEngineConfig): method build_specdecode_config (line 94) | def build_specdecode_config(target_model, speculative_config: Speculat... FILE: lmdeploy/pytorch/engine/engine.py class InferOutput (line 35) | class InferOutput: function _build_seq_meta (line 57) | def _build_seq_meta(cache_config: CacheConfig, seq_strategy: Any, sampli... function response_reqs (line 64) | def response_reqs(req_manager: RequestManager, class Engine (line 78) | class Engine(EngineBase): method __init__ (line 87) | def __init__( method from_pretrained (line 191) | def from_pretrained(cls, method _download_adapters (line 232) | def _download_adapters(self, adapters: Dict[str, str], engine_config: ... method _build_adapter_manager (line 246) | def _build_adapter_manager(self, adapters): method _bind_request_manager (line 249) | def _bind_request_manager(self): method _response (line 258) | def _response(self, resp: Response, resp_type: ResponseType, data: Any... method _get_max_session_len (line 262) | def _get_max_session_len(self): method _on_add_session (line 277) | def _on_add_session(self, reqs: List[Request], **kwargs): method _on_stop_session (line 289) | def _on_stop_session(self, reqs: List[Request], **kwargs): method _on_end_session (line 308) | def _on_end_session(self, reqs: List[Request], **kwargs): method _on_add_message (line 324) | def _on_add_message(self, reqs: List[Request], **kwargs): method _add_message (line 362) | def _add_message(self, reqs: List[Request]): method model_config (line 416) | def model_config(self) -> ModelConfig: method p2p_initialize (line 420) | def p2p_initialize(self, init_request: DistServeInitRequest): method p2p_connect (line 423) | def p2p_connect(self, conn_request: DistServeConnectionRequest): method p2p_drop_connect (line 426) | def p2p_drop_connect(self, drop_conn_request: DistServeDropConnectionR... method _loop_finally (line 429) | def _loop_finally(self): method update_params (line 435) | def update_params(self, request: Any): method sleep (line 439) | def sleep(self, level: int = 1): method wakeup (line 443) | def wakeup(self, tags: Optional[List[str]] = None): method async_loop (line 447) | async def async_loop(self): method close (line 475) | def close(self): method start (line 486) | def start(self): method stop (line 493) | def stop(self): method wait_tasks (line 498) | async def wait_tasks(self): method create_instance (line 511) | def create_instance(self, cuda_stream_id=0): method start_loop (line 522) | def start_loop(self): method end_session (line 526) | def end_session(self, session_id: int): method get_engine_config (line 533) | def get_engine_config(self): method get_schedule_metrics (line 536) | def get_schedule_metrics(self): FILE: lmdeploy/pytorch/engine/engine_checker.py class EngineChecker (line 12) | class EngineChecker(BaseChecker): method __init__ (line 15) | def __init__(self, method check (line 77) | def check(self): method _handle_impl (line 100) | def _handle_impl(self): method handle (line 103) | def handle(self): FILE: lmdeploy/pytorch/engine/engine_instance.py function _check_resp (line 17) | def _check_resp(resp: Response, state: ResponseType, warning_msg: str = ... function _check_resp_success (line 27) | def _check_resp_success(resp: Response, warning_msg: str = None): function async_try_add_session (line 32) | async def async_try_add_session(req_sender: RequestSender, session_id: i... function async_cancel (line 43) | async def async_cancel(req_sender: RequestSender, session_id: int): function try_add_session (line 50) | def try_add_session(req_sender: RequestSender, session_id: int): function end (line 61) | def end(req_sender: RequestSender, session_id: int): function cancel (line 67) | def cancel(req_sender: RequestSender, session_id: int): class EngineInstance (line 75) | class EngineInstance(EngineInstanceBase): method __init__ (line 82) | def __init__(self, engine: Engine): method __del__ (line 90) | def __del__(self): method _get_extra_outputs (line 94) | def _get_extra_outputs(self, resp: Response): method _async_try_add_session (line 110) | async def _async_try_add_session(self, session_id: int): method _try_add_session (line 118) | def _try_add_session(self, session_id: int): method async_stream_infer (line 126) | async def async_stream_infer(self, method async_infer (line 211) | async def async_infer(self, method stream_infer (line 240) | def stream_infer(self, method infer (line 277) | def infer(self, method async_end (line 298) | async def async_end(self, session_id: int): method end (line 302) | def end(self, session_id: int): method async_cancel (line 306) | async def async_cancel(self, session_id: int): method cancel (line 310) | def cancel(self, session_id: int): FILE: lmdeploy/pytorch/engine/engine_loop.py class CounterEvent (line 37) | class CounterEvent(asyncio.Event): method __init__ (line 39) | def __init__(self): method set (line 43) | def set(self): method clear (line 49) | def clear(self): class RunableEventAsync (line 55) | class RunableEventAsync: method __init__ (line 58) | def __init__(self, scheduler: 'Scheduler'): method wait (line 62) | async def wait(self): method set (line 66) | def set(self): function build_runable_event (line 74) | def build_runable_event(scheduler: 'Scheduler'): class EngineLoopConfig (line 80) | class EngineLoopConfig: method from_engine (line 91) | def from_engine(engine: 'Engine'): class EngineLoop (line 106) | class EngineLoop: method __init__ (line 109) | def __init__(self, method preprocess_loop (line 137) | async def preprocess_loop(self): method _log_resps (line 144) | def _log_resps(outputs: List[InferOutput]): method _send_resp (line 151) | def _send_resp(self, out: InferOutput): method _update_logprobs (line 169) | def _update_logprobs(step_outputs: List[InferOutput]): method _send_resps (line 186) | def _send_resps(self, step_outputs: List[InferOutput]): method send_response_loop (line 198) | async def send_response_loop(self): method _make_infer_outputs (line 212) | def _make_infer_outputs( method _main_loop_try_send_next_inputs (line 301) | async def _main_loop_try_send_next_inputs(self): method _main_loop_get_outputs (line 310) | async def _main_loop_get_outputs( method main_loop (line 332) | async def main_loop(self): method update_running_migration (line 365) | def update_running_migration(self, running: 'SeqList', next_token_ids:... method _migration_loop_migrate (line 382) | async def _migration_loop_migrate(self, migration_ready: 'SeqList'): method _migration_loop_get_outputs (line 410) | async def _migration_loop_get_outputs(self, migration_ready: 'SeqList'): method _migration_loop_process_ready (line 431) | async def _migration_loop_process_ready(self, migration_ready: 'SeqLis... method migration_loop (line 440) | async def migration_loop(self): method start (line 453) | def start(self, event_loop: asyncio.AbstractEventLoop): method wait_tasks (line 473) | async def wait_tasks(self): method stop (line 494) | def stop(self): method cancel (line 503) | def cancel(self): function build_engine_loop (line 511) | def build_engine_loop(engine: 'Engine'): FILE: lmdeploy/pytorch/engine/executor/__init__.py function get_distributed_executor_backend (line 12) | def get_distributed_executor_backend(world_size: int, dp: int, device_ty... function build_executor (line 56) | def build_executor( FILE: lmdeploy/pytorch/engine/executor/base.py class ExecutorBase (line 16) | class ExecutorBase: method __init__ (line 19) | def __init__(self, method download_models (line 45) | def download_models(self): method build_model (line 49) | def build_model(self): method gather_free_mem (line 53) | def gather_free_mem(self): method set_cache_config (line 57) | def set_cache_config(self, cache_config: CacheConfig, spec_cache_confi... method set_model_config (line 61) | def set_model_config(self, model_config: ModelConfig, spec_model_confi... method build_graph_runner (line 65) | def build_graph_runner(self): method build_cache_engine (line 69) | def build_cache_engine(self): method warmup (line 73) | def warmup(self): method sleep (line 77) | async def sleep(self, level: int = 1): method wakeup (line 81) | def wakeup(self, tags: Optional[List[str]] = None): method update_params (line 85) | def update_params(self, request: Any): method get_input_processor (line 89) | def get_input_processor(self): method start (line 93) | def start(self, forward_event: asyncio.Event): method wait_tasks (line 97) | async def wait_tasks(self): method stop (line 101) | def stop(self): method release (line 105) | def release(self): method forward_async (line 109) | async def forward_async(self, inputs): method get_output_async (line 113) | async def get_output_async(self): method p2p_initialize (line 119) | def p2p_initialize(self, remote_engine_config: DistServeInitRequest): method p2p_connect (line 123) | def p2p_connect(self, conn_request: List[DistServeKVTransferEndpointIn... method migrate (line 127) | async def migrate(self, batch: MigrationExecutionBatch): method _get_runtime_size (line 133) | def _get_runtime_size(self, num_free_gpu_mem: int, cache_block_size: i... method _adjust_block_size (line 148) | def _adjust_block_size(self): method _get_state_cache_mem (line 161) | def _get_state_cache_mem(self): method update_configs (line 185) | def update_configs(self): method init (line 241) | def init(self): method remote_log (line 258) | def remote_log(self, msg: str): FILE: lmdeploy/pytorch/engine/executor/base_worker.py class WorkerWrapperBase (line 20) | class WorkerWrapperBase: method __init__ (line 23) | def __init__( method init_process_group (line 57) | def init_process_group(self, rank: int, master_addr: str = None, maste... method pack_output (line 69) | def pack_output(self, output: Dict): method get_outputs (line 73) | async def get_outputs(self): method build_model (line 77) | def build_model(self): method get_free_mem (line 94) | def get_free_mem(self): method set_cache_config (line 98) | def set_cache_config(self, cache_config: CacheConfig, spec_cache_confi... method set_model_config (line 102) | def set_model_config(self, model_config: ModelConfig, spec_model_confi... method build_graph_runner (line 106) | def build_graph_runner(self): method build_cache_engine (line 110) | def build_cache_engine(self): method update_params (line 114) | def update_params(self, request: Any): method warmup (line 118) | def warmup(self): method sleep (line 122) | async def sleep(self, level: int = 1): method wakeup (line 126) | def wakeup(self, tags: Optional[List[str]] = None): method get_input_processor (line 130) | def get_input_processor(self): method start (line 134) | def start(self): method wait_tasks (line 139) | async def wait_tasks(self): method stop (line 152) | def stop(self): method stop_async (line 156) | async def stop_async(self): method forward_async (line 159) | async def forward_async(self, inputs): method get_output_async (line 163) | async def get_output_async(self): method release (line 169) | def release(self): method p2p_initialize (line 175) | def p2p_initialize(self, init_request: DistServeInitRequest): method p2p_connect (line 178) | def p2p_connect(self, remote_engine_id: str, conn_request: List[DistSe... method migrate (line 181) | async def migrate(self, inputs: MigrationExecutionBatch): FILE: lmdeploy/pytorch/engine/executor/dist_utils.py function find_available_port (line 11) | def find_available_port() -> bool: function setup_master_addr (line 20) | def setup_master_addr(addr: str, port: str): function init_dist_environ (line 32) | def init_dist_environ(rank: int, world_size: int): function init_process_group (line 38) | def init_process_group(rank: int, world_size: int): FILE: lmdeploy/pytorch/engine/executor/mp_executor.py function get_num_packages (line 37) | def get_num_packages(data_size): class Notifier (line 42) | class Notifier: method __init__ (line 44) | def __init__(self, num_receiver: int, mp_ctx: SpawnContext): method _update_event_id (line 49) | def _update_event_id(self): method set (line 52) | def set(self): method set_async (line 60) | async def set_async(self): method wait (line 71) | def wait(self): method wait_async (line 80) | async def wait_async(self): method close (line 89) | def close(self): class SharedBuffer (line 95) | class SharedBuffer: method __init__ (line 98) | def __init__(self, proc_id: int, notifier: Notifier, name: str = None): method acquire_buf (line 117) | def acquire_buf(self): method name (line 125) | def name(self): method pack_data (line 128) | def pack_data(self, data, receiver_mask): method send (line 144) | def send(self, data, receiver_mask: int = 0xff): method send_async (line 149) | async def send_async(self, data, receiver_mask: int = 0xff): method _receive_step0 (line 154) | def _receive_step0(self): method _receive_step1 (line 170) | def _receive_step1(self, dumped_data, is_receiver, remain_size): method receive (line 185) | def receive(self): method receive_async (line 191) | async def receive_async(self): method close (line 197) | def close(self): class MPExecutor (line 207) | class MPExecutor(ExecutorBase): method setup_master_addr (line 211) | def setup_master_addr(cls): method __init__ (line 220) | def __init__(self, method collective_rpc (line 286) | def collective_rpc(self, method collective_rpc_async (line 315) | async def collective_rpc_async(self, method download_models (line 343) | def download_models(self): method build_model (line 347) | def build_model(self): method gather_free_mem (line 351) | def gather_free_mem(self): method set_cache_config (line 356) | def set_cache_config(self, cache_config: CacheConfig, spec_cache_confi... method set_model_config (line 360) | def set_model_config(self, model_config: ModelConfig, spec_model_confi... method build_graph_runner (line 364) | def build_graph_runner(self): method build_cache_engine (line 368) | def build_cache_engine(self): method warmup (line 372) | def warmup(self): method _prefetch_outputs (line 376) | async def _prefetch_outputs(self): method start (line 381) | def start(self, forward_event: asyncio.Event): method wait_tasks (line 389) | async def wait_tasks(self): method forward_async (line 394) | async def forward_async(self, inputs): method get_output_async (line 398) | async def get_output_async(self): method get_input_processor (line 402) | def get_input_processor(self): method stop (line 406) | def stop(self): method release (line 411) | def release(self): class MPWorkerWrapper (line 424) | class MPWorkerWrapper(WorkerWrapperBase): method __init__ (line 427) | def __init__( class ExecutorProc (line 454) | class ExecutorProc: method __init__ (line 456) | def __init__(self, proc_id: int, mp_ctx: SpawnContext): method start (line 462) | def start(self, **kwargs): method close (line 472) | def close(self): method join (line 480) | def join(self): method _main_loop (line 485) | def _main_loop( method _task_wrapper (line 557) | async def _task_wrapper(func, args: List, kwargs: Dict, need_return: b... method _main_loop_impl (line 562) | async def _main_loop_impl(self, proc_id: int, comm_buf: SharedBuffer, ... FILE: lmdeploy/pytorch/engine/executor/ray_executor.py function _get_master_addr (line 31) | def _get_master_addr(): function _get_master_port (line 41) | def _get_master_port(): function get_ascend_device_rank_mapping (line 49) | def get_ascend_device_rank_mapping(master_addr): function _update_env_cuda_alloc_conf (line 85) | def _update_env_cuda_alloc_conf(env_vars: Dict): function _update_runtime_envs (line 108) | def _update_runtime_envs(runtime_env: Dict): function _update_runtime_env_nsys (line 118) | def _update_runtime_env_nsys(runtime_env: Dict): class RemoteLogger (line 132) | class RemoteLogger: method __init__ (line 135) | def __init__(self): method start (line 139) | def start(self, msg: str): method end (line 148) | def end(self, handle: int): class RayWorkerWrapper (line 155) | class RayWorkerWrapper(WorkerWrapperBase): method __init__ (line 158) | def __init__( method set_device (line 190) | def set_device(self, local_rank): method set_env (line 194) | def set_env(self, envs: Dict[str, str]): method get_node_ip (line 198) | def get_node_ip(self): method warmup_dist (line 202) | def warmup_dist(self): method pack_output (line 214) | def pack_output(self, output: Dict): method remote_log_start (line 218) | def remote_log_start(self, msg: str): method remote_log_end (line 222) | def remote_log_end(self, handle: int): method exit (line 226) | def exit(self): class RayExecutor (line 231) | class RayExecutor(ExecutorBase): method __init__ (line 234) | def __init__( method collective_rpc (line 312) | def collective_rpc(self, method build_model (line 324) | def build_model(self): method gather_free_mem (line 328) | def gather_free_mem(self): method set_cache_config (line 332) | def set_cache_config(self, cache_config: CacheConfig, spec_cache_confi... method set_model_config (line 336) | def set_model_config(self, model_config: ModelConfig, spec_model_confi... method build_graph_runner (line 340) | def build_graph_runner(self): method build_cache_engine (line 344) | def build_cache_engine(self): method update_params (line 348) | def update_params(self, request: Any): method warmup (line 352) | def warmup(self): method sleep (line 356) | def sleep(self, level: int = 1): method wakeup (line 360) | def wakeup(self, tags: Optional[List[str]] = None): method get_input_processor (line 366) | def get_input_processor(self): method _prefetch_task_callback (line 370) | def _prefetch_task_callback(self, task: asyncio.Task): method start (line 380) | def start(self, forward_event: asyncio.Event): method wait_tasks (line 388) | async def wait_tasks(self): method stop (line 427) | def stop(self): method release (line 445) | def release(self): method _compile_dag (line 465) | def _compile_dag(self): method forward_async (line 475) | async def forward_async(self, inputs): method get_output_async (line 500) | async def get_output_async(self): method remote_log (line 507) | def remote_log(self, msg: str): method _sort_workers (line 517) | def _sort_workers(self, driver_ip: str, workers: List[RayWorkerWrapper]): method _sort_workers_by_ip (line 547) | def _sort_workers_by_ip(self, ips, workers: List[RayWorkerWrapper]): method _valid_bundle_id (line 569) | def _valid_bundle_id(self, bundle_id: int): method _init_workers_ray (line 576) | def _init_workers_ray(self, placement_group: PlacementGroup, worker_kw... method _init_distributed_environment_by_device (line 615) | def _init_distributed_environment_by_device(self, device_str: str): method _init_ascend_distributed_environment (line 629) | def _init_ascend_distributed_environment(self, driver_ip): method p2p_initialize (line 661) | def p2p_initialize(self, init_request: DistServeInitRequest): method p2p_connect (line 664) | def p2p_connect(self, remote_engine_id: str, conn_request: List[DistSe... method migrate (line 671) | async def migrate(self, batch: MigrationExecutionBatch): FILE: lmdeploy/pytorch/engine/executor/uni_executor.py class UniExecutor (line 17) | class UniExecutor(ExecutorBase): method __init__ (line 20) | def __init__( method download_models (line 54) | def download_models(self): method build_model (line 58) | def build_model(self): method gather_free_mem (line 62) | def gather_free_mem(self): method set_cache_config (line 66) | def set_cache_config(self, cache_config: CacheConfig, spec_cache_confi... method set_model_config (line 70) | def set_model_config(self, model_config: ModelConfig, spec_model_confi... method build_graph_runner (line 74) | def build_graph_runner(self): method build_cache_engine (line 78) | def build_cache_engine(self): method warmup (line 82) | def warmup(self): method start (line 85) | def start(self, forward_event: asyncio.Event): method wait_tasks (line 89) | async def wait_tasks(self): method stop (line 93) | def stop(self): method release (line 97) | def release(self): method forward_async (line 101) | async def forward_async(self, inputs): method get_output_async (line 107) | async def get_output_async(self, dp_rank: int = 0): method get_input_processor (line 112) | def get_input_processor(self): method p2p_initialize (line 118) | def p2p_initialize(self, init_request: DistServeInitRequest): method p2p_connect (line 125) | def p2p_connect(self, remote_engine_id: str, conn_request: List[DistSe... method migrate (line 129) | async def migrate(self, batch: MigrationExecutionBatch): FILE: lmdeploy/pytorch/engine/guided_process.py class GuidedDecodingManager (line 13) | class GuidedDecodingManager: method __init__ (line 16) | def __init__(self, tokenizer: PreTrainedTokenizerBase, vocab_size: Opt... method get_processors (line 24) | def get_processors(self, session_ctx: List[Dict[str, Any]], method get_processor (line 55) | def get_processor(self, session_id: int, seq_id: int, schema: str, typ... method remove_processor (line 81) | def remove_processor(self, session_id: int): method allocate_batched_bitmap (line 87) | def allocate_batched_bitmap(self, batch_size: int) -> torch.Tensor: method fill_bitmap (line 90) | def fill_bitmap(self, processor: xgr.GrammarMatcher, guided_bitmask: t... method accept_token (line 93) | def accept_token(self, processor: xgr.GrammarMatcher, token: int) -> N... method apply_batched_bitmap (line 96) | def apply_batched_bitmap(self, logits: torch.Tensor, guided_bitmask: t... method clear (line 108) | def clear(self) -> None: FILE: lmdeploy/pytorch/engine/input_process.py class PreprocessInputResult (line 14) | class PreprocessInputResult: class BaseModelInputProcessor (line 21) | class BaseModelInputProcessor(ABC): method preprocess_input (line 25) | def preprocess_input(self, class DefaultModelInputProcessor (line 33) | class DefaultModelInputProcessor(BaseModelInputProcessor): method preprocess_input (line 36) | def preprocess_input(self, FILE: lmdeploy/pytorch/engine/inputs_maker.py function _tensorlize_block_offsets (line 31) | def _tensorlize_block_offsets(block_offsets, dtype=torch.int32): class InputsMakerConfig (line 45) | class InputsMakerConfig: method from_engine (line 59) | def from_engine(engine: 'Engine'): class LongContextChunker (line 72) | class LongContextChunker: method __init__ (line 75) | def __init__(self, max_prefill_token_num: int): method enabled (line 81) | def enabled(self): method is_long_context (line 85) | def is_long_context(self, seq: 'SchedulerSequence'): method set_seq (line 89) | def set_seq(self, seq: 'SchedulerSequence'): method multimodal_iter (line 108) | def multimodal_iter(self): method next_chunk_size (line 120) | def next_chunk_size(self): method is_last_chunk (line 153) | def is_last_chunk(self): method clear (line 159) | def clear(self): method update_step (line 166) | def update_step(self, inputs: ModelInputs): method check_enable (line 184) | def check_enable(self): class InputsMakerAsync (line 191) | class InputsMakerAsync: method __init__ (line 193) | def __init__( method _init_do_prefill (line 228) | def _init_do_prefill(self, config: InputsMakerConfig): method _create_vision_model_inputs (line 236) | def _create_vision_model_inputs(self, messages: 'SeqList', model_input... method torch_int_dtype (line 301) | def torch_int_dtype(self): method _set_adapter_ids (line 307) | def _set_adapter_ids(self, model_inputs: ModelInputs, messages: 'SeqLi... method create_model_inputs (line 318) | def create_model_inputs(self, messages: 'SeqList', is_prefill: bool): method create_model_inputs_long_context (line 386) | def create_model_inputs_long_context(self, method create_model_inputs_delta (line 443) | def create_model_inputs_delta(self): method create_model_inputs_delta_valid_only (line 487) | def create_model_inputs_delta_valid_only(self): method update_running_seqs (line 526) | def update_running_seqs(self, running: 'SeqList', inputs: Optional[Mod... method deactivate_evict_seqs (line 543) | def deactivate_evict_seqs(self): method _make_forward_inputs (line 557) | def _make_forward_inputs(self, prefill: bool, enable_empty: bool = Fal... method do_prefill_pnode (line 674) | def do_prefill_pnode(self): method do_prefill_default (line 677) | def do_prefill_default(self): method do_prefill_chunked (line 703) | def do_prefill_chunked(self): method _send_next_inputs_impl (line 711) | async def _send_next_inputs_impl(self, prefill: bool = None, enable_em... method send_next_inputs (line 725) | async def send_next_inputs(self): method prefetch_next_inputs (line 729) | async def prefetch_next_inputs(self): function build_inputs_maker (line 736) | def build_inputs_maker(engine: 'Engine'): FILE: lmdeploy/pytorch/engine/logits_process.py function _process_temperature_ (line 17) | def _process_temperature_(scores: torch.Tensor, temperature: torch.Tensor): function _process_bad_words_ (line 24) | def _process_bad_words_(scores: torch.Tensor, function _process_repetition_penalty_ (line 59) | def _process_repetition_penalty_(scores: torch.Tensor, input_ids: torch.... function _filter_topk_sorted_ (line 68) | def _filter_topk_sorted_(scores: torch.Tensor, topk: torch.LongTensor, f... function _filter_topp_sorted_ (line 78) | def _filter_topp_sorted_(scores: torch.Tensor, topp: torch.Tensor, filte... function _filter_minp_sorted_ (line 88) | def _filter_minp_sorted_(scores: torch.Tensor, minp: torch.Tensor, filte... function _ngram_one (line 99) | def _ngram_one(dtype: torch.dtype, device: torch.device, fill: int = 1): function ngram (line 103) | def ngram( function _filter_repetition_ngram_ (line 196) | def _filter_repetition_ngram_( function _multinomial_sampling (line 223) | def _multinomial_sampling(scores: torch.Tensor, class SamplingInputsDelta (line 236) | class SamplingInputsDelta: class SamplingInputs (line 243) | class SamplingInputs: method to_device (line 274) | def to_device(self, device: str, non_blocking: bool = False): method get_delta (line 288) | def get_delta(self) -> SamplingInputsDelta: method update_delta (line 298) | def update_delta(self, delta: SamplingInputsDelta): function _apply_custom_logits_processors (line 307) | def _apply_custom_logits_processors(batched_logits_processors, all_ids, ... function _torch_topk (line 316) | def _torch_topk(x: torch.Tensor, k: int, dim: int = -1, largest: bool = ... class FusedLogitsProcessor (line 327) | class FusedLogitsProcessor: method __init__ (line 330) | def __init__( method _wait_stream_once (line 348) | async def _wait_stream_once(self): method __call__ (line 354) | async def __call__(self, scores: torch.Tensor) -> torch.Tensor: method sampling (line 440) | def sampling(self, logits: torch.Tensor): method compute_logprobs (line 488) | def compute_logprobs(self, raw_logprobs: torch.Tensor, token_ids: torc... method cleanup_sessions (line 503) | def cleanup_sessions(self, session_ids: list[int]): FILE: lmdeploy/pytorch/engine/model_agent/__init__.py function build_model_agent (line 11) | def build_model_agent( FILE: lmdeploy/pytorch/engine/model_agent/agent.py class SleepWakeupState (line 42) | class SleepWakeupState: class BatchedLogProbs (line 49) | class BatchedLogProbs: method to_cpu (line 53) | def to_cpu(self): method to_numpy (line 57) | def to_numpy(self): method to_tensor (line 65) | def to_tensor(self): class BatchedOutputs (line 75) | class BatchedOutputs: method to_cpu (line 86) | def to_cpu(self): method to_numpy (line 99) | def to_numpy(self): method to_tensor (line 112) | def to_tensor(self): function msg_with_rank (line 126) | def msg_with_rank(rank: int, msg: str): function cache_swapping (line 131) | def cache_swapping(cache_engine: CacheEngine, swap_in_map: dict, swap_ou... function model_forward (line 148) | def model_forward( function _try_to_cuda (line 191) | def _try_to_cuda(val, non_blocking: bool = False): class DistGatherScalar (line 202) | class DistGatherScalar: method __init__ (line 205) | def __init__(self, val, size: int, device: str = 'cpu', group: dist.Pr... method async_wait (line 216) | async def async_wait(self, timeout: float = 0.001): class StepInputs (line 227) | class StepInputs: method merge (line 235) | def merge( method update_delta (line 270) | def update_delta( method step (line 282) | def step( class BaseModelAgent (line 312) | class BaseModelAgent: method __init__ (line 324) | def __init__( method all_context (line 419) | def all_context(self): method set_cache_config (line 425) | def set_cache_config(self, cache_config: CacheConfig, spec_cache_confi... method set_model_config (line 430) | def set_model_config(self, model_config: ModelConfig, spec_model_confi... method get_free_mem (line 435) | def get_free_mem(self): method warmup (line 442) | def warmup(self): method _slice_outs (line 495) | def _slice_outs(self, inputs: torch.Tensor, seq_length: torch.LongTens... method _postprocess_forward_output (line 499) | def _postprocess_forward_output(self, output: dict, inputs: ModelInputs): method _async_model_forward (line 507) | async def _async_model_forward( method async_sampling_logits (line 525) | async def async_sampling_logits(self, logits: torch.Tensor, sampling_i... method _push_output (line 548) | def _push_output(self, output: BatchedOutputs): method _broadcast_next_token (line 555) | def _broadcast_next_token(self, next_token_ids: torch.Tensor, extra_in... method _prepare_dp_v1 (line 565) | async def _prepare_dp_v1(self, inputs: ModelInputs): method _get_inputs_from_delta (line 624) | def _get_inputs_from_delta( method _prepare_inputs_prefill (line 637) | def _prepare_inputs_prefill( method _step_postprocess_with_output (line 664) | async def _step_postprocess_with_output(self, method _step_postprocess_without_output (line 721) | async def _step_postprocess_without_output( method _async_step (line 741) | async def _async_step( method _async_loop_background (line 916) | async def _async_loop_background(self, forward_event: asyncio.Event = ... method _async_loop_inputs_preprocess (line 932) | async def _async_loop_inputs_preprocess(self, forward_event: asyncio.E... method start (line 952) | def start(self, forward_event: asyncio.Event = None): method wait_tasks (line 977) | async def wait_tasks(self): method stop (line 991) | def stop(self): method stop_async (line 1006) | async def stop_async(self): method set_forward_inputs (line 1030) | def set_forward_inputs(self, inputs): method get_output_async (line 1035) | async def get_output_async(self): method _build_model (line 1051) | def _build_model(self): method build_model (line 1083) | def build_model(self): method build_graph_runner (line 1091) | def build_graph_runner(self): method build_cache_engine (line 1102) | def build_cache_engine(self): method _forward_impl (line 1119) | def _forward_impl(self, inputs: ModelInputs): method async_forward (line 1129) | async def async_forward(self, inputs: ModelInputs): method get_logits (line 1142) | def get_logits(self, hidden_states: torch.Tensor): method get_input_processor (line 1146) | def get_input_processor(self): method reset_graph_runner (line 1150) | def reset_graph_runner(self): method update_params (line 1158) | def update_params(self, request: UpdateParamsRequest): method sleep (line 1199) | async def sleep(self, level: int = 1): method wakeup (line 1213) | def wakeup(self, tags: Optional[List[str]] = None): method release (line 1237) | def release(self): FILE: lmdeploy/pytorch/engine/model_agent/inputs_maker.py class DefaultForwardInputsMaker (line 14) | class DefaultForwardInputsMaker: method __init__ (line 17) | def __init__(self, model_agent: 'BaseModelAgent'): method get (line 20) | async def get(self): method step (line 24) | def step(self): class DPForwardInputsMaker (line 30) | class DPForwardInputsMaker: method __init__ (line 33) | def __init__(self, model_agent: 'BaseModelAgent'): method _make_dummy_forward_inputs (line 46) | def _make_dummy_forward_inputs(self): method _gather_has_inputs (line 59) | async def _gather_has_inputs(self, has_inputs: bool = False): method _get_inputs (line 75) | async def _get_inputs(self): method get (line 89) | async def get(self): method step (line 104) | def step(self): function build_inputs_maker (line 111) | def build_inputs_maker(model_agent: 'BaseModelAgent'): FILE: lmdeploy/pytorch/engine/model_agent/profiler.py class AgentProfiler (line 13) | class AgentProfiler: method __init__ (line 15) | def __init__(self, dist_ctx: DistContext, stream: torch.Stream): method _build_profiler (line 35) | def _build_profiler(self): method dump (line 50) | def dump(self): method profile_task (line 70) | async def profile_task(self): method create_task (line 87) | def create_task(self): FILE: lmdeploy/pytorch/engine/mp_engine/__init__.py function build_mp_engine (line 5) | def build_mp_engine(backend: str, model_path: str, engine_config: Pytorc... FILE: lmdeploy/pytorch/engine/mp_engine/base.py class SessionState (line 18) | class SessionState: class MPEngine (line 22) | class MPEngine(EngineBase): method __init__ (line 24) | def __init__(self) -> None: method _collective_rpc (line 29) | def _collective_rpc(self, func, *args, **kwargs): method _collective_rpc_async (line 33) | async def _collective_rpc_async(self, func, *args, **kwargs): method _collective_rpc_streaming_async (line 37) | async def _collective_rpc_streaming_async(self, func, *args, **kwargs): method close (line 41) | def close(self) -> None: method start_loop (line 45) | def start_loop(self) -> None: method end_session (line 49) | def end_session(self, session_id: int): method sleep (line 53) | def sleep(self, level: int): method wakeup (line 57) | def wakeup(self, tags: Optional[List[str]] = None): method update_params (line 61) | def update_params(self, request: Any): method get_schedule_metrics (line 65) | def get_schedule_metrics(self): method p2p_initialize (line 69) | def p2p_initialize(self, conn_request: DistServeInitRequest): method p2p_connect (line 73) | def p2p_connect(self, conn_request: DistServeConnectionRequest): method p2p_drop_connect (line 77) | def p2p_drop_connect(self, drop_conn_request: DistServeDropConnectionR... method create_instance (line 85) | def create_instance(self, cuda_stream_id=0): class MPEngineInstance (line 90) | class MPEngineInstance(EngineInstanceBase): method __init__ (line 93) | def __init__(self, engine: MPEngine): method async_end (line 97) | async def async_end(self, session_id: int): method async_cancel (line 107) | async def async_cancel(self, session_id: int): method async_stream_infer (line 115) | async def async_stream_infer(self, session_id: int, *args, **kwargs): FILE: lmdeploy/pytorch/engine/mp_engine/base_worker.py class EngineInstancePool (line 17) | class EngineInstancePool: method __init__ (line 20) | def __init__(self, engine): method create_instance_pool (line 27) | def create_instance_pool(self, num_instance: int): method instance (line 36) | async def instance(self): method async_end (line 47) | async def async_end(self, session_id: int): method async_cancel (line 52) | async def async_cancel(self, session_id: int): method async_stream_infer (line 57) | async def async_stream_infer(self, *args, **kwargs): class EngineWorkerBase (line 64) | class EngineWorkerBase: method __init__ (line 67) | def __init__(self, engine: 'Engine'): method end_session (line 72) | def end_session(self, session_id: int): method get_engine_config (line 76) | def get_engine_config(self): method get_schedule_metrics (line 80) | def get_schedule_metrics(self): method p2p_initialize (line 84) | def p2p_initialize(self, conn_request: DistServeInitRequest): method p2p_connect (line 88) | def p2p_connect(self, conn_request: DistServeConnectionRequest): method p2p_drop_connect (line 92) | def p2p_drop_connect(self, drop_conn_request: DistServeDropConnectionR... method sleep (line 100) | def sleep(self, level: int = 1): method wakeup (line 104) | def wakeup(self, tags: Optional[List[str]] = None): method update_params (line 108) | def update_params(self, request: Any): method close (line 112) | def close(self) -> None: method instance_async_end (line 116) | async def instance_async_end(self, session_id: int): method instance_async_cancel (line 120) | async def instance_async_cancel(self, session_id: int): method instance_async_stream_infer (line 124) | async def instance_async_stream_infer(self, *args, **kwargs): class EngineOutputGather (line 130) | class EngineOutputGather: method __init__ (line 133) | def __init__(self): method get (line 136) | def get(self, stream_id): method add (line 141) | def add(self, stream_id, result): method pop (line 148) | def pop(self, stream_id, result): FILE: lmdeploy/pytorch/engine/mp_engine/ray_engine.py class RayEngineWorker (line 19) | class RayEngineWorker(EngineWorkerBase): method __init__ (line 21) | def __init__(self, method _stream_task_wrapper (line 40) | async def _stream_task_wrapper(self, stream_id: int, init_event: async... method create_stream_task (line 56) | async def create_stream_task(self, func, *args, **kwargs): method get_stream_task_result (line 69) | async def get_stream_task_result(self, stream_id: int): function _update_runtime_envs (line 87) | def _update_runtime_envs(runtime_env: Dict): class RayMPEngine (line 96) | class RayMPEngine(MPEngine): method __init__ (line 98) | def __init__(self, model_path: str, engine_config: PytorchEngineConfig... method _init_ray (line 107) | def _init_ray(self, engine_config: PytorchEngineConfig = None): method _create_worker (line 119) | def _create_worker(self, model_path: str, engine_config: PytorchEngine... method _collective_rpc (line 141) | def _collective_rpc(self, func, *args, **kwargs): method _collective_rpc_async (line 146) | async def _collective_rpc_async(self, func, *args, **kwargs): method _collective_rpc_streaming_async (line 151) | async def _collective_rpc_streaming_async(self, func, *args, **kwargs): method close (line 161) | def close(self) -> None: method start_loop (line 167) | def start_loop(self) -> None: FILE: lmdeploy/pytorch/engine/mp_engine/zmq_engine.py function cancel_async_tasks (line 20) | def cancel_async_tasks(loop: asyncio.AbstractEventLoop): class ZMQMPEngine (line 30) | class ZMQMPEngine(MPEngine): method __init__ (line 32) | def __init__(self, method _start_mp_proc (line 49) | def _start_mp_proc( method _mp_proc (line 84) | def _mp_proc( method _mp_proc_async (line 125) | async def _mp_proc_async(server, engine: 'Engine'): method _collective_rpc (line 167) | def _collective_rpc(self, func, *args, **kwargs): method _collective_rpc_async (line 171) | async def _collective_rpc_async(self, func, *args, **kwargs): method _collective_rpc_streaming_async (line 175) | async def _collective_rpc_streaming_async(self, func, *args, **kwargs): method close (line 180) | def close(self) -> None: method start_loop (line 195) | def start_loop(self) -> None: FILE: lmdeploy/pytorch/engine/mp_engine/zmq_rpc.py function _task_callback (line 19) | def _task_callback(task: asyncio.Task) -> None: class AsyncRPCServer (line 33) | class AsyncRPCServer: method __init__ (line 35) | def __init__(self): method get_port (line 52) | def get_port(self): method _get_next_stream_id (line 55) | def _get_next_stream_id(self): method register_method (line 60) | def register_method(self, name: str, func: Callable): method send_multipart (line 70) | def send_multipart(self, client_id: bytes, data: bytes): method call_method_default (line 77) | def call_method_default(self, client_id, method: Callable, request: Di... method _method_async_task (line 88) | async def _method_async_task(self, client_id, request_id, method: Call... method _method_async_streaming_task (line 97) | async def _method_async_streaming_task(self, stream_id: int, request_i... method get_stream_output (line 127) | async def get_stream_output(self, stream_id: int): method call_method_async (line 144) | async def call_method_async(self, client_id, method: Callable, request... method call_and_response (line 166) | async def call_and_response(self): method run (line 185) | async def run(self): method stop (line 210) | def stop(self): class AsyncRPCClient (line 216) | class AsyncRPCClient: method __init__ (line 218) | def __init__(self, port: int = 5555): method _set_reply_default (line 240) | def _set_reply_default(self, request_id: int, reply: Dict): method _set_reply (line 252) | def _set_reply(self, reply: Dict): method _poll_recv (line 256) | def _poll_recv(self, timeout: float = 3): method _try_start_listen (line 264) | def _try_start_listen(self): method call (line 271) | def call(self, method, *args, **kwargs): method _async_call_impl (line 290) | async def _async_call_impl(self, method, streaming, *args, **kwargs): method async_call (line 302) | async def async_call(self, method, *args, **kwargs): method async_stream_call (line 306) | async def async_stream_call(self, method, *args, **kwargs): method listen (line 315) | async def listen(self): method stop (line 329) | def stop(self): method close_sockets (line 336) | def close_sockets(self): FILE: lmdeploy/pytorch/engine/request.py class RequestType (line 14) | class RequestType(enum.Enum): class Response (line 26) | class Response: class Request (line 39) | class Request: function _run_until_complete (line 51) | def _run_until_complete(future: Awaitable): class RequestSender (line 64) | class RequestSender: method new (line 75) | def new(cls, sender_id: int, manager: 'RequestManager'): method req_que (line 81) | def req_que(self): method event_loop (line 86) | def event_loop(self): method is_loop_alive (line 90) | def is_loop_alive(self): method run_until_complete (line 94) | def run_until_complete(self, future: Awaitable): method _req_put (line 98) | def _req_put(self, reqs: Any): method _gather_request (line 102) | def _gather_request(self, req_types: List[RequestType], data: List[Any]): method batched_send_async (line 122) | def batched_send_async(self, req_types: List[RequestType], data: List[... method send_async (line 128) | def send_async(self, req_type: RequestType, data: Any): method async_recv (line 132) | async def async_recv(self, resp: Response, wait_main: bool = False) ->... method recv (line 149) | def recv(self, resp: Response) -> Response: method async_send (line 154) | async def async_send(self, req_type: RequestType, data: Any): method send (line 159) | def send(self, req_type: RequestType, data: Any) -> Response: class RequestManager (line 165) | class RequestManager: method __init__ (line 168) | def __init__(self): method prepare_send (line 186) | async def prepare_send(self): method sender_wait_loop (line 198) | async def sender_wait_loop(self): method create_loop_task (line 218) | def create_loop_task(self): method wait_tasks (line 232) | async def wait_tasks(self): method event_loop (line 251) | def event_loop(self): method set_main_loop_func (line 258) | def set_main_loop_func(self, loop: Callable[[Coroutine], asyncio.Task]): method stop_loop (line 262) | def stop_loop(self): method is_loop_alive (line 270) | def is_loop_alive(self): method build_sender (line 282) | def build_sender(self): method has_requests (line 290) | def has_requests(self): method get_all_requests (line 296) | async def get_all_requests(self) -> Dict[RequestType, List[Request]]: method bind_func (line 323) | def bind_func(self, req_type: RequestType, callback: Callable): method set_request_priority (line 327) | def set_request_priority(self, priority: List[RequestType]): method response (line 331) | def response(self, resp: Response): method process_request (line 335) | def process_request(self, req_type: RequestType, reqs: ReqList, **kwar... method step (line 350) | async def step(self, **kwargs): method run_until_complete (line 378) | def run_until_complete(self, future: Awaitable): FILE: lmdeploy/pytorch/envs.py function env_to_bool (line 7) | def env_to_bool( function env_to_int (line 28) | def env_to_int( function env_to_list_int (line 43) | def env_to_list_int( function env_to_float (line 59) | def env_to_float( function set_envs (line 78) | def set_envs(): function get_all_envs (line 164) | def get_all_envs(): FILE: lmdeploy/pytorch/kernels/cuda/activation.py function _silu_and_mul_kernel (line 18) | def _silu_and_mul_kernel( function silu_and_mul (line 63) | def silu_and_mul(gate_up: torch.Tensor, out: torch.Tensor = None): function _silu_and_mul_moe_ep_kernel (line 101) | def _silu_and_mul_moe_ep_kernel( function silu_and_mul_moe_ep (line 153) | def silu_and_mul_moe_ep(gate_up: torch.Tensor, mask_m: torch.Tensor, out... FILE: lmdeploy/pytorch/kernels/cuda/apply_rotary_pos_emb.py function _apply_rotary_impl (line 9) | def _apply_rotary_impl(x_l, x_h, cos_l, cos_h, sin_l, sin_h): function apply_rotary_pos_emb_qk_kernel (line 28) | def apply_rotary_pos_emb_qk_kernel( function apply_rotary_pos_emb (line 115) | def apply_rotary_pos_emb(q: Tensor, FILE: lmdeploy/pytorch/kernels/cuda/awq_kernels.py function get_cuda_autotune_config (line 7) | def get_cuda_autotune_config(): function _dequant_s4_to_f16x2 (line 21) | def _dequant_s4_to_f16x2(weight, shift: tl.constexpr, is_top: tl.constex... function _unpack_weight (line 61) | def _unpack_weight(weight): function awq_linear_kernel (line 91) | def awq_linear_kernel( function awq_linear (line 212) | def awq_linear(x, qweight, scales, qzeros): FILE: lmdeploy/pytorch/kernels/cuda/bitonic_topk.py function _indicator (line 20) | def _indicator(n_dims: core.constexpr, j: core.constexpr): function _flip_along_middle (line 27) | def _flip_along_middle(x, n_dims, i): function _compare_and_swap (line 36) | def _compare_and_swap(x, ids, flip, i: core.constexpr): function _bitonic_merge_hypercube (line 55) | def _bitonic_merge_hypercube(x, ids, stage: core.constexpr, order: core.... function _bitonic_merge (line 74) | def _bitonic_merge(x, ids, stage: tl.constexpr, order: tl.constexpr, n_d... function argsort (line 86) | def argsort(x, ids, dim: tl.constexpr = None, descending: tl.constexpr =... function _bitonic_topk_kernel0 (line 99) | def _bitonic_topk_kernel0(score_ptr, function _concate (line 135) | def _concate(a, b): function _split (line 145) | def _split(a, k): function _bitonic_topk_kernel1 (line 153) | def _bitonic_topk_kernel1(score_ptr, function bitonic_topk (line 202) | def bitonic_topk(scores: torch.Tensor, FILE: lmdeploy/pytorch/kernels/cuda/blocked_fp8_fused_moe.py function get_cuda_autotune_config (line 14) | def get_cuda_autotune_config(): function fused_moe_blocked_f8_kernel (line 28) | def fused_moe_blocked_f8_kernel( function fused_moe_blocked_fp8_kernel_launcher (line 173) | def fused_moe_blocked_fp8_kernel_launcher( function fused_moe_blocked_fp8 (line 260) | def fused_moe_blocked_fp8(input: torch.Tensor, FILE: lmdeploy/pytorch/kernels/cuda/blocked_gemm_fp8.py function fast_log2_ceil (line 17) | def fast_log2_ceil(x): function fast_pow2 (line 26) | def fast_pow2(x): function fast_round_scale (line 32) | def fast_round_scale(amax, fp8_max_inv): function _quant_fp8_kernel (line 37) | def _quant_fp8_kernel( function _quant_fp8_launcher (line 105) | def _quant_fp8_launcher(A: Tensor, group_size: int, out: Tensor, scales:... function quant_fp8 (line 159) | def quant_fp8(A: Tensor, function quant_fp8_tma (line 177) | def quant_fp8_tma(A: Tensor, function _gemm_fp8_tma_pre_hook (line 194) | def _gemm_fp8_tma_pre_hook(nargs): function _gemm_fp8_tma_kernel (line 214) | def _gemm_fp8_tma_kernel( function _gemm_fp8_kernel (line 302) | def _gemm_fp8_kernel( function blocked_gemm_fp8 (line 384) | def blocked_gemm_fp8(A: Tensor, function deep_gemm_fp8 (line 480) | def deep_gemm_fp8(A: Tensor, FILE: lmdeploy/pytorch/kernels/cuda/causal_conv1d.py function causal_conv1d_fwd (line 12) | def causal_conv1d_fwd(hidden_size, width, has_bias, activation, dtype, s... function causal_conv1d_fn (line 115) | def causal_conv1d_fn( function causal_conv1d_update_fwd (line 185) | def causal_conv1d_update_fwd(hidden_size: int, seqlen: int, state_len: i... function causal_conv1d_update (line 268) | def causal_conv1d_update(x, FILE: lmdeploy/pytorch/kernels/cuda/ds_index.py function _fp8_index_kernel (line 10) | def _fp8_index_kernel( function fp8_index (line 96) | def fp8_index(q: torch.Tensor, FILE: lmdeploy/pytorch/kernels/cuda/fill_kv_cache.py function _quant_int8 (line 11) | def _quant_int8(val): function _quant_int4 (line 21) | def _quant_int4(val1, val2): function _fill_kv_cache_kernel (line 35) | def _fill_kv_cache_kernel( function _fill_page_quant_int8 (line 126) | def _fill_page_quant_int8( function _fill_page_quant_int4 (line 170) | def _fill_page_quant_int4( function _fill_page_quant (line 215) | def _fill_page_quant(state_ptr, cache_ptr, scales_zeros_ptr, block_off, ... function _fill_kv_cache_quant_kernel (line 270) | def _fill_kv_cache_quant_kernel( function fill_kv_cache (line 401) | def fill_kv_cache(k_states: Tensor, function fast_log2_ceil (line 528) | def fast_log2_ceil(x): function fast_pow2 (line 537) | def fast_pow2(x): function fast_round_scale (line 543) | def fast_round_scale(amax, fp8_max_inv): function _quant_blocked_fp8 (line 548) | def _quant_blocked_fp8(x, function _fill_kv_cache_blocked_fp8_kernel (line 574) | def _fill_kv_cache_blocked_fp8_kernel( function fill_kv_cache_blocked_fp8 (line 692) | def fill_kv_cache_blocked_fp8(k_states: Tensor, FILE: lmdeploy/pytorch/kernels/cuda/flashattention.py function _get_block_d (line 26) | def _get_block_d(head_dim_k, head_dim_v): function softcapping (line 38) | def softcapping(qk, logit_softcapping: tl.constexpr): function _load_kv (line 48) | def _load_kv(ptrs, boundary_check: tl.constexpr): function _prefill_fwd_inner (line 57) | def _prefill_fwd_inner(acc, l_i, m_i, q, k_ptrs, v_ptrs, q1, k1_ptrs, lo... function _flash_prefill_fwd_kernel (line 164) | def _flash_prefill_fwd_kernel( function _kernel_meta_sm7x (line 385) | def _kernel_meta_sm7x(BLOCK_DK): function _kernel_meta_sm8x (line 393) | def _kernel_meta_sm8x(BLOCK_DK: int, shared_kv: bool): function _kernel_meta_sm86 (line 404) | def _kernel_meta_sm86(BLOCK_DK: int, shared_kv: bool): function _kernel_meta_sm9x (line 423) | def _kernel_meta_sm9x(BLOCK_DK: int, shared_kv: bool): function _kernel_meta_sm12x (line 441) | def _kernel_meta_sm12x(BLOCK_DK: int, shared_kv: bool): function _kernel_meta_rocm (line 467) | def _kernel_meta_rocm(BLOCK_DK: int, shared_kv: bool): function flash_attn_varlen_func (line 475) | def flash_attn_varlen_func( FILE: lmdeploy/pytorch/kernels/cuda/flatten_kv_cache.py function _flatten_kv_cache (line 11) | def _flatten_kv_cache( function _dequant_int4 (line 84) | def _dequant_int4(val, HEAD_DIM: tl.constexpr, BLOCK: tl.constexpr): function _flatten_kv_cache_quant (line 92) | def _flatten_kv_cache_quant( function flatten_kv_cache (line 195) | def flatten_kv_cache(k_caches: Tensor, function dequant_fp8 (line 339) | def dequant_fp8(x, scale, GROUP_SIZE: tl.constexpr): function flatten_kv_cache_mla_fp8_kernel (line 352) | def flatten_kv_cache_mla_fp8_kernel( function flatten_kv_cache_mla_fp8 (line 431) | def flatten_kv_cache_mla_fp8(k_caches: Tensor, FILE: lmdeploy/pytorch/kernels/cuda/fused_lora.py function get_autotune_config (line 7) | def get_autotune_config(): function _atomic_store (line 24) | def _atomic_store(ptrs, val, mask): function _fused_lora_kernel (line 43) | def _fused_lora_kernel( function fused_lora (line 142) | def fused_lora(input: torch.Tensor, FILE: lmdeploy/pytorch/kernels/cuda/fused_moe.py function get_cuda_autotune_config (line 12) | def get_cuda_autotune_config(): function _config_prune_func (line 83) | def _config_prune_func(config: list, *args, **kwargs): function fused_moe_kernel (line 103) | def fused_moe_kernel( function fused_moe_kernel_launcher (line 201) | def fused_moe_kernel_launcher( function _get_exp_mask_kernel (line 262) | def _get_exp_mask_kernel( function _get_exp_mask (line 297) | def _get_exp_mask(topk_ids: torch.Tensor, num_experts: int): function _get_start_end_kernel (line 327) | def _get_start_end_kernel( function get_start_end (line 377) | def get_start_end(exp_cum: torch.Tensor, exp_topk: torch.Tensor, topk: i... function _get_sorted_idx (line 410) | def _get_sorted_idx(topk_ids: torch.Tensor, num_experts: int): function _renormalize (line 426) | def _renormalize(topk_weights: torch.Tensor, renormalize: bool): function _make_intermediate (line 434) | def _make_intermediate(shape: tuple, dtype: torch.dtype, device: torch.d... function _moe_reduce_kernel (line 443) | def _moe_reduce_kernel( function moe_reduce (line 489) | def moe_reduce(hidden_states: torch.Tensor, topk_weights: torch.Tensor, ... function fused_moe (line 526) | def fused_moe(hidden_states: torch.Tensor, FILE: lmdeploy/pytorch/kernels/cuda/fused_moe_ep.py function _fwd_kernel_ep_scatter_step1 (line 13) | def _fwd_kernel_ep_scatter_step1( function _fwd_kernel_ep_scatter_step2 (line 42) | def _fwd_kernel_ep_scatter_step2( function ep_scatter (line 78) | def ep_scatter( function _fwd_kernel_ep_gather (line 127) | def _fwd_kernel_ep_gather( function ep_gather (line 171) | def ep_gather( function _deepgemm_grouped_bf16_nt_contiguous (line 208) | def _deepgemm_grouped_bf16_nt_contiguous( function fused_moe_v3 (line 218) | def fused_moe_v3( FILE: lmdeploy/pytorch/kernels/cuda/fused_noaux_tc.py function _noaux_routing_kernel (line 29) | def _noaux_routing_kernel( function fused_noaux_tc_routing (line 102) | def fused_noaux_tc_routing( FILE: lmdeploy/pytorch/kernels/cuda/gated_delta_rule.py function normalize_qk (line 13) | def normalize_qk(k_local: T.Buffer, q_local: T.Buffer, k_per_thr: int) -... function fused_recurrent_gated_delta_rule_fwd (line 33) | def fused_recurrent_gated_delta_rule_fwd(SEQLEN, function fused_recurrent_gated_delta_rule (line 240) | def fused_recurrent_gated_delta_rule( FILE: lmdeploy/pytorch/kernels/cuda/multinomial_sampling.py function _multinomial_sampling_kernel (line 8) | def _multinomial_sampling_kernel(Scores, Seeds, Offsets, Indices, Output... function multinomial_sampling (line 50) | def multinomial_sampling(scores: torch.Tensor, FILE: lmdeploy/pytorch/kernels/cuda/pagedattention.py function _fwd_grouped_split_kernel (line 37) | def _fwd_grouped_split_kernel( function _fwd_grouped_split_quant_kernel (line 222) | def _fwd_grouped_split_quant_kernel( function _reduce_split_kernel (line 453) | def _reduce_split_kernel( function _convert_pv (line 503) | def _convert_pv(p, v): function _kernel_meta_default (line 512) | def _kernel_meta_default(BLOCK_DMODEL: int, BLOCK_H: int): function _kernel_meta_sm8x (line 517) | def _kernel_meta_sm8x(BLOCK_DMODEL: int, BLOCK_H: int): function _kernel_meta_sm9x (line 527) | def _kernel_meta_sm9x(BLOCK_DMODEL: int, BLOCK_H: int): function _get_split_k (line 537) | def _get_split_k(device_idx: int, head_grid: int, batch_size: int, num_w... function flash_attn_with_kvcache (line 553) | def flash_attn_with_kvcache( FILE: lmdeploy/pytorch/kernels/cuda/rms_norm.py function _compute_rms_norm (line 11) | def _compute_rms_norm(x, w, eps: tl.constexpr, N_COLS: tl.constexpr): function add_rms_norm_kernel (line 22) | def add_rms_norm_kernel(input, weight, residual, output, out_residual, n... function _unsqueeze_to_3d (line 55) | def _unsqueeze_to_3d(tensor: Tensor) -> Tensor: function _squeeze_to_origin_dim (line 67) | def _squeeze_to_origin_dim(tensor: Tensor, origin_dim: int) -> Tensor: function rms_norm (line 79) | def rms_norm(hidden_states: Tensor, function torch_forward (line 165) | def torch_forward(hidden_states, weight, variance_epsilon=1e-6): function test_rms_norm (line 173) | def test_rms_norm(bsz, ctx_len, feat_len, dtype): FILE: lmdeploy/pytorch/kernels/cuda/utils.py function get_device_props (line 36) | def get_device_props(device=None): function is_cuda (line 52) | def is_cuda(): function supports_tma (line 57) | def supports_tma(): FILE: lmdeploy/pytorch/kernels/cuda/w8a8_fused_moe.py function get_cuda_autotune_config (line 12) | def get_cuda_autotune_config(): function fused_moe_w8a8_kernel (line 54) | def fused_moe_w8a8_kernel( function fused_moe_w8a8_kernel_launcher (line 155) | def fused_moe_w8a8_kernel_launcher( function fused_moe_w8a8 (line 218) | def fused_moe_w8a8(input: torch.Tensor, FILE: lmdeploy/pytorch/kernels/cuda/w8a8_triton_kernels.py function _linear (line 33) | def _linear( function _linear_add (line 112) | def _linear_add(A, B, C, residual_ptr, M, N, K, stride_am, stride_ak, st... function matmul_kernel_dynamic_quant (line 162) | def matmul_kernel_dynamic_quant(a, b, rms_scale, linear_scale, residual=... function _per_token_quant_int8 (line 225) | def _per_token_quant_int8( function per_token_quant_int8 (line 263) | def per_token_quant_int8(x, eps, quant_dtype=torch.int8): function _compute_rms_norm (line 299) | def _compute_rms_norm(x, w, eps: tl.constexpr, N_COLS: tl.constexpr): function rms_norm_quant_kernel (line 310) | def rms_norm_quant_kernel( function add_rms_norm_quant_kernel (line 345) | def add_rms_norm_quant_kernel( function rms_norm_dynamic_quant (line 390) | def rms_norm_dynamic_quant(x, w, eps, residual=None, quant_dtype=torch.i... function test_rms_and_linear (line 443) | def test_rms_and_linear(x, rms_weight, linear_weight, output_dtype=torch... function test_per_token_quant (line 473) | def test_per_token_quant(x, eps, quant_dtype=torch.int8): function bench_rms_and_linear (line 495) | def bench_rms_and_linear(M: int, provider: str, dtype: torch.dtype = tor... FILE: lmdeploy/pytorch/kernels/default/multinomial_sampling.py function multinomial_sampling (line 6) | def multinomial_sampling(scores: Tensor, seeds: LongTensor, offsets: Lon... FILE: lmdeploy/pytorch/kernels/default/w8a8_kernels.py function per_channel_quant (line 5) | def per_channel_quant(x: torch.Tensor, dtype: torch.dtype): FILE: lmdeploy/pytorch/kernels/dispatcher.py function _default_api (line 13) | def _default_api(*args, **kwargs): class ParamParser (line 18) | class ParamParser: method __init__ (line 20) | def __init__(self, param: inspect.Parameter) -> None: method name (line 23) | def name(self): method func_arg (line 27) | def func_arg(self): method func_input (line 44) | def func_input(self): class FunctionDispatcher (line 59) | class FunctionDispatcher: method __init__ (line 61) | def __init__(self, func_name: str): method device_callback (line 69) | def device_callback(self, context: DeviceContext): method load_func (line 73) | def load_func(self, device: str): method load_and_call (line 90) | def load_and_call(self, *args, **kwargs): method make_caller (line 98) | def make_caller(self, api: Callable = _default_api, globals=None): FILE: lmdeploy/pytorch/kernels/dlinfer/activation.py function silu_and_mul (line 6) | def silu_and_mul(input_tensor: Tensor, ) -> Tensor: FILE: lmdeploy/pytorch/kernels/dlinfer/apply_rotary_pos_emb.py function apply_rotary_pos_emb (line 8) | def apply_rotary_pos_emb( FILE: lmdeploy/pytorch/kernels/dlinfer/awq_kernels.py function awq_linear (line 8) | def awq_linear(x: Tensor, FILE: lmdeploy/pytorch/kernels/dlinfer/fill_kv_cache.py function fill_kv_cache (line 8) | def fill_kv_cache( FILE: lmdeploy/pytorch/kernels/dlinfer/flash_attention.py function flash_attention_fwd (line 6) | def flash_attention_fwd( FILE: lmdeploy/pytorch/kernels/dlinfer/fused_moe.py function fused_moe (line 8) | def fused_moe( FILE: lmdeploy/pytorch/kernels/dlinfer/fused_rotary_emb.py function fused_rotary_emb (line 7) | def fused_rotary_emb( FILE: lmdeploy/pytorch/kernels/dlinfer/linear.py function linear (line 8) | def linear(x: Tensor, weight: Tensor, bias: Optional[Tensor] = None, all... FILE: lmdeploy/pytorch/kernels/dlinfer/moe_gating_topk_softmax.py function moe_gating_topk_softmax (line 9) | def moe_gating_topk_softmax(router_logits: Tensor, topk: int, FILE: lmdeploy/pytorch/kernels/dlinfer/pagedattention.py function prefill_attention (line 8) | def prefill_attention( function paged_token_attention (line 77) | def paged_token_attention( function paged_attention_fwd (line 113) | def paged_attention_fwd( FILE: lmdeploy/pytorch/kernels/dlinfer/rms_norm.py function rms_norm (line 6) | def rms_norm(hidden_states: Tensor, weight: Tensor, epsilon: float = 1e-... FILE: lmdeploy/pytorch/kernels/dlinfer/w8a8_kernels.py function dynamic_quant (line 7) | def dynamic_quant(x: Tensor, quant_dtype: torch.dtype, quant_granularity... function linear_w8a8 (line 12) | def linear_w8a8( function rms_norm_w8a8 (line 29) | def rms_norm_w8a8( FILE: lmdeploy/pytorch/messages.py class InputEmbeddings (line 32) | class InputEmbeddings: method move_position (line 38) | def move_position(self, offset: int = 0): class SamplingParam (line 46) | class SamplingParam: method from_gen_config (line 71) | def from_gen_config(cls, gen_config: GenerationConfig): class MessageStatus (line 156) | class MessageStatus(enum.Enum): class SequenceMeta (line 180) | class SequenceMeta: class SequenceManager (line 187) | class SequenceManager: method __init__ (line 190) | def __init__(self, seq_meta: SequenceMeta) -> None: method _new_seq_id (line 197) | def _new_seq_id(self): method get_all_sequences (line 202) | def get_all_sequences(self): method get_sequences (line 206) | def get_sequences(self, states: MessageStatus): method num_sequences (line 210) | def num_sequences(self, status: MessageStatus): method add_sequence (line 214) | def add_sequence(self, seq: 'SchedulerSequence'): method remove_sequence (line 222) | def remove_sequence(self, seq: 'SchedulerSequence'): method update_sequence_status (line 230) | def update_sequence_status(self, seq: 'SchedulerSequence', new_status:... function _to_ndarray (line 244) | def _to_ndarray(token_ids) -> np.ndarray: class SchedulerSession (line 255) | class SchedulerSession: method __init__ (line 258) | def __init__(self, session_id: int, seq_manager: SequenceManager, sche... method add_sequence (line 265) | def add_sequence(self, method remove_sequence (line 307) | def remove_sequence(self, seq: 'SchedulerSequence'): function _div_up (line 315) | def _div_up(x, n): function _round_up (line 320) | def _round_up(x, n): class HistoryEmbeddings (line 325) | class HistoryEmbeddings: method __init__ (line 328) | def __init__(self, embeddings: List[InputEmbeddings] = None): method append (line 333) | def append(self, embeddings: List[InputEmbeddings]): method clone (line 336) | def clone(self): method copy (line 340) | def copy(self): method get_step (line 343) | def get_step(self, step: int) -> int: method embeddings (line 359) | def embeddings(self): method __len__ (line 363) | def __len__(self): method __getitem__ (line 367) | def __getitem__(self, *args, **kwargs): class _HistoryDataBase (line 372) | class _HistoryDataBase: method __init__ (line 377) | def __init__(self, data: np.ndarray = None, dtype: np.dtype = np.int64): method _create_empty_array (line 388) | def _create_empty_array(self, dtype): method _get_pad_width (line 395) | def _get_pad_width(self, reserve_size: int): method reserve (line 402) | def reserve(self, size: int): method get_real (line 413) | def get_real(self): method resize (line 419) | def resize(self, size: int): method append (line 426) | def append(self, new_data: np.ndarray): method __setitem__ (line 439) | def __setitem__(self, *args, **kwargs): method __getitem__ (line 443) | def __getitem__(self, *args, **kwargs): method __len__ (line 447) | def __len__(self): method clone (line 451) | def clone(self): method copy (line 457) | def copy(self): class HistoryTokenIds (line 462) | class HistoryTokenIds(_HistoryDataBase): method __init__ (line 466) | def __init__(self, token_ids: np.ndarray = None, dtype: np.dtype = np.... method _token_ids (line 470) | def _token_ids(self): method _token_ids (line 475) | def _token_ids(self, value): class HistoryRouterExperts (line 480) | class HistoryRouterExperts(_HistoryDataBase): method __init__ (line 485) | def __init__(self, expert_ids: np.ndarray = None, dtype: np.dtype = np... method _create_empty_array (line 488) | def _create_empty_array(self, dtype): method _get_pad_width (line 495) | def _get_pad_width(self, reserve_size: int): class HistoryLogits (line 500) | class HistoryLogits(_HistoryDataBase): method __init__ (line 505) | def __init__(self, logits: np.ndarray = None, dtype: np.dtype = np.int... method _create_empty_array (line 509) | def _create_empty_array(self, dtype): method _get_pad_width (line 516) | def _get_pad_width(self, reserve_size: int): method set_torch_dtype (line 520) | def set_torch_dtype(self, torch_dtype): method get_logits (line 524) | def get_logits(self): method clone (line 534) | def clone(self): class HistoryMultiModals (line 541) | class HistoryMultiModals: method __init__ (line 543) | def __init__(self, multimodals: MultiModalInputs = None): method get_datas (line 548) | def get_datas(self, start=0, end=-1): method add_inputs (line 562) | def add_inputs(self, input_mms: MultiModalInputs): method empty (line 570) | def empty(self): method update_multimodals (line 577) | def update_multimodals(input_mms: MultiModalInputs, prev_len: int): class UpdateTokenMode (line 586) | class UpdateTokenMode(enum.Enum): class SchedulerSequence (line 594) | class SchedulerSequence: method __post_init__ (line 626) | def __post_init__(self): method block_size (line 638) | def block_size(self) -> int: method history_image_num (line 643) | def history_image_num(self) -> int: method history_image_token_len (line 648) | def history_image_token_len(self) -> int: method session_id (line 653) | def session_id(self) -> int: method token_ids (line 658) | def token_ids(self) -> np.ndarray: method input_embeddings (line 665) | def input_embeddings(self) -> List[InputEmbeddings]: method history_ids (line 672) | def history_ids(self) -> np.ndarray: method all_ids (line 677) | def all_ids(self) -> np.ndarray: method valid_ids (line 682) | def valid_ids(self) -> np.ndarray: method generated_ids (line 687) | def generated_ids(self) -> np.ndarray: method return_routed_experts (line 693) | def return_routed_experts(self) -> bool: method routed_experts (line 697) | def routed_experts(self) -> np.ndarray: method append_routed_experts (line 707) | def append_routed_experts(self, routed_experts: Tensor | np.ndarray): method num_history_ids (line 718) | def num_history_ids(self): method num_token_ids (line 723) | def num_token_ids(self): method num_valid_ids (line 727) | def num_valid_ids(self): method num_images (line 731) | def num_images(self): method num_all_ids (line 735) | def num_all_ids(self): method num_blocks (line 740) | def num_blocks(self): method state (line 745) | def state(self) -> 'StateBase': method set_state (line 748) | def set_state(self, state: 'StateBase'): method status (line 753) | def status(self): method return_logits (line 757) | def return_logits(self): method logits (line 761) | def logits(self): method append_logits (line 765) | def append_logits(self, logits: Tensor | np.ndarray): method get_input_multimodals (line 776) | def get_input_multimodals(self): method record_event (line 782) | def record_event( method _update_embeddings (line 789) | def _update_embeddings(self, embeddings: List[InputEmbeddings]): method _update_multimodals (line 799) | def _update_multimodals(self, multimodals: MultiModalInputs): method update_token_ids (line 806) | def update_token_ids(self, method set_step (line 816) | def set_step(self, step: int): FILE: lmdeploy/pytorch/model_inputs.py class DPMeta (line 22) | class DPMeta: method _gather_tp_sizes (line 27) | def _gather_tp_sizes(tp: int, seqlen: int, num_tokens: List[int], dist... method build (line 41) | def build(cls, seqlen: int, num_tokens: List[int]): method sync_tp_size (line 57) | def sync_tp_size(self, tp_size: int): class VisionModelInputs (line 63) | class VisionModelInputs: method to_device (line 71) | def to_device(self, device: str, non_blocking: bool = False): method get_inputs (line 98) | def get_inputs(self, history_lengths: torch.Tensor, seq_lengths: torch... class ModelInputsDelta (line 125) | class ModelInputsDelta: method seq_length (line 141) | def seq_length(self): method fill_tensors (line 146) | def fill_tensors(self): method to_device (line 153) | def to_device(self, device: str, non_blocking: bool = False): method log_info (line 166) | def log_info(self): class ModelInputs (line 174) | class ModelInputs: method step (line 197) | def step(self, input_ids: torch.Tensor, step_seqlens: torch.Tensor = N... method to_device (line 211) | def to_device(self, device: str, non_blocking: bool = False): method build_dp_meta (line 225) | def build_dp_meta(self, num_tokens: List[int]): method log_info (line 229) | def log_info(self): class StepContext (line 237) | class StepContext: method new (line 275) | def new( method get_mask_and_position_ids (line 343) | def get_mask_and_position_ids(cls, inputs: ModelInputs): class BuildModelContext (line 387) | class BuildModelContext: class StepContextManager (line 398) | class StepContextManager(CtxMgrBase[StepContext]): method __init__ (line 400) | def __init__(self, build_ctx: BuildModelContext = None): method build_context (line 406) | def build_context( class StepCtxMgrApi (line 427) | class StepCtxMgrApi(CtxMgrBase[StepContextManager]): method __init__ (line 430) | def __init__(self): FILE: lmdeploy/pytorch/models/baichuan.py function _is_baichuan_13b (line 17) | def _is_baichuan_13b(config: Any): class BaichuanAttention (line 22) | class BaichuanAttention(nn.Module): method __init__ (line 25) | def __init__(self, config: Any, dtype: torch.dtype = None, device: tor... method forward (line 67) | def forward( class MLP (line 111) | class MLP(nn.Module): method __init__ (line 113) | def __init__(self, config: Any, dtype: torch.dtype = None, device: tor... method forward (line 139) | def forward(self, x): class DecoderLayer (line 146) | class DecoderLayer(nn.Module): method __init__ (line 149) | def __init__(self, config: Any, layer_idx: int, dtype: torch.dtype = N... method forward (line 174) | def forward( class BaichuanModel (line 205) | class BaichuanModel(nn.Module): method __init__ (line 208) | def __init__(self, config: Any, dtype: torch.dtype = None, device: tor... method forward (line 243) | def forward( method get_input_embeddings (line 283) | def get_input_embeddings(self): class BaichuanForCausalLM (line 288) | class BaichuanForCausalLM(nn.Module, CudaGraphMixin): method __init__ (line 298) | def __init__(self, method forward (line 315) | def forward( method get_logits (line 334) | def get_logits(self, hidden_states: torch.Tensor): method get_input_embeddings (line 338) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 342) | def prepare_inputs_for_generation( method load_weights (line 371) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/chatglm2.py class SelfAttention (line 26) | class SelfAttention(torch.nn.Module): method __init__ (line 32) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method _extract_rope (line 71) | def _extract_rope(states: torch.Tensor): method _fill_rope (line 79) | def _fill_rope(states: torch.Tensor, rope: torch.Tensor): method forward (line 87) | def forward( class MLP (line 134) | class MLP(nn.Module): method __init__ (line 137) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 165) | def forward(self, x): class GLMBlock (line 172) | class GLMBlock(torch.nn.Module): method __init__ (line 178) | def __init__(self, method forward (line 211) | def forward( class GLMTransformer (line 242) | class GLMTransformer(nn.Module): method __init__ (line 245) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method _get_layer (line 260) | def _get_layer(self, layer_number: int): method forward (line 264) | def forward( class Embedding (line 288) | class Embedding(nn.Module): method __init__ (line 291) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 298) | def forward(self, input_ids): class PatchEmbedding (line 307) | class PatchEmbedding(nn.Module): method __init__ (line 310) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 321) | def forward(self, images): class EVA2CLIPAttention (line 331) | class EVA2CLIPAttention(nn.Module): method __init__ (line 334) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 363) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class EVA2CLIPMLP (line 382) | class EVA2CLIPMLP(nn.Module): method __init__ (line 385) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 416) | def forward(self, x: torch.Tensor) -> torch.Tensor: class EVA2CLIPTransformerLayer (line 424) | class EVA2CLIPTransformerLayer(nn.Module): method __init__ (line 427) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 437) | def forward(self, hidden_states): class EVA2CLIPTransformer (line 448) | class EVA2CLIPTransformer(nn.Module): method __init__ (line 451) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 456) | def forward(self, hidden_states): class GLU (line 463) | class GLU(nn.Module): method __init__ (line 466) | def __init__(self, method forward (line 488) | def forward(self, x): class EVA2CLIPModel (line 497) | class EVA2CLIPModel(nn.Module): method __init__ (line 500) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 518) | def forward(self, images): class ChatGLMModel (line 539) | class ChatGLMModel(nn.Module): method __init__ (line 541) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 573) | def forward( method get_input_embeddings (line 611) | def get_input_embeddings(self): class ChatGLMForConditionalGeneration (line 616) | class ChatGLMForConditionalGeneration(nn.Module, DeployModelMixin, CudaG... method __init__ (line 619) | def __init__(self, method forward (line 632) | def forward( method get_logits (line 655) | def get_logits(self, hidden_states: torch.Tensor): method get_input_embeddings (line 659) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 663) | def prepare_inputs_for_generation( method _get_model_metas (line 708) | def _get_model_metas(self, context: StepContext): method update_model_metas (line 716) | def update_model_metas(self, method load_weights (line 793) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): method get_input_processor (line 832) | def get_input_processor(self) -> BaseModelInputProcessor: class ChatGLMInputProcessor (line 837) | class ChatGLMInputProcessor(BaseModelInputProcessor): method __init__ (line 840) | def __init__(self, config: PretrainedConfig, dtype) -> None: method preprocess_input (line 852) | def preprocess_input(self, FILE: lmdeploy/pytorch/models/cogvlm.py class VisionExpertAttention (line 25) | class VisionExpertAttention(nn.Module): method __init__ (line 28) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 92) | def forward( class MLP (line 160) | class MLP(nn.Module): method __init__ (line 163) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 193) | def forward(self, x): class VisionExpertMLP (line 200) | class VisionExpertMLP(nn.Module): method __init__ (line 203) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 210) | def forward( class CogVLMDecoderLayer (line 230) | class CogVLMDecoderLayer(nn.Module): method __init__ (line 233) | def __init__(self, method forward (line 262) | def forward( class PatchEmbedding (line 301) | class PatchEmbedding(nn.Module): method __init__ (line 304) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 315) | def forward(self, images): class EVA2CLIPAttention (line 325) | class EVA2CLIPAttention(nn.Module): method __init__ (line 328) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 357) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class EVA2CLIPMLP (line 376) | class EVA2CLIPMLP(nn.Module): method __init__ (line 379) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 410) | def forward(self, x: torch.Tensor) -> torch.Tensor: class EVA2CLIPTransformerLayer (line 418) | class EVA2CLIPTransformerLayer(nn.Module): method __init__ (line 421) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 431) | def forward(self, hidden_states): class EVA2CLIPTransformer (line 442) | class EVA2CLIPTransformer(nn.Module): method __init__ (line 445) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 450) | def forward(self, hidden_states): class GLU (line 457) | class GLU(nn.Module): method __init__ (line 460) | def __init__(self, method forward (line 482) | def forward(self, x): class EVA2CLIPModel (line 491) | class EVA2CLIPModel(nn.Module): method __init__ (line 494) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 515) | def forward(self, images): class CogVLMModel (line 536) | class CogVLMModel(nn.Module): method __init__ (line 539) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 574) | def forward( method get_input_embeddings (line 622) | def get_input_embeddings(self): class CogVLMForCausalLM (line 631) | class CogVLMForCausalLM(nn.Module, CudaGraphMixin, DeployModelMixin): method __init__ (line 641) | def __init__(self, method forward (line 660) | def forward( method get_logits (line 685) | def get_logits(self, hidden_states: torch.Tensor): method get_input_embeddings (line 689) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 693) | def prepare_inputs_for_generation( method load_weights (line 749) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): method _get_model_metas (line 792) | def _get_model_metas(self, context: StepContext): method update_model_metas (line 800) | def update_model_metas(self, method get_input_processor (line 870) | def get_input_processor(self) -> BaseModelInputProcessor: class CogVLMInputProcessor (line 875) | class CogVLMInputProcessor(BaseModelInputProcessor): method __init__ (line 878) | def __init__(self, config: PretrainedConfig, dtype) -> None: method preprocess_input (line 890) | def preprocess_input(self, input_ids: List[int], input_multimodals=Non... FILE: lmdeploy/pytorch/models/deepseek.py class DeepseekAttention (line 20) | class DeepseekAttention(nn.Module): method __init__ (line 23) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 62) | def forward( class DeepseekMoE (line 105) | class DeepseekMoE(nn.Module): method __init__ (line 108) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 159) | def forward(self, hidden_states: torch.Tensor): class DeepseekMLP (line 183) | class DeepseekMLP(nn.Module): method __init__ (line 186) | def __init__(self, method forward (line 223) | def forward(self, x): class DeepseekDecoderLayer (line 230) | class DeepseekDecoderLayer(nn.Module): method __init__ (line 233) | def __init__(self, method forward (line 260) | def forward( class DeepseekModel (line 291) | class DeepseekModel(nn.Module): method __init__ (line 294) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 317) | def forward( method get_input_embeddings (line 355) | def get_input_embeddings(self): class DeepseekForCausalLM (line 360) | class DeepseekForCausalLM(nn.Module, CudaGraphMixin): method __init__ (line 375) | def __init__(self, method forward (line 392) | def forward( method get_logits (line 411) | def get_logits(self, hidden_states: torch.Tensor): method get_input_embeddings (line 415) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 419) | def prepare_inputs_for_generation( method _load_weight_experts (line 448) | def _load_weight_experts(self, name: str, loaded_weight: torch.Tensor,... method load_weights (line 462) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/deepseek_mtp.py class DeepseekV2BMM (line 25) | class DeepseekV2BMM(nn.Module): method __init__ (line 28) | def __init__(self, batch: int, in_features: int, out_features: int, dt... method create_weight (line 42) | def create_weight(self, batch: int, in_features: int, out_features: in... method weight_loader (line 46) | def weight_loader(self, param: nn.Parameter, weight: torch.Tensor): method forward (line 50) | def forward(self, x: torch.Tensor, output: torch.Tensor): class DeepseekV2Attention (line 55) | class DeepseekV2Attention(DeepseekV2Attention): method __init__ (line 58) | def __init__(self, config: Any, dtype: torch.dtype = None, device: tor... method forward (line 161) | def forward( class DeepseekV2MoE (line 207) | class DeepseekV2MoE(nn.Module): method __init__ (line 210) | def __init__(self, config: Any, layer_idx, dtype: torch.dtype = None, ... method forward (line 247) | def forward(self, hidden_states: torch.Tensor): class DeepseekV2MLP (line 267) | class DeepseekV2MLP(nn.Module): method __init__ (line 270) | def __init__(self, method forward (line 306) | def forward(self, x): class DeepseekV2DecoderLayer (line 313) | class DeepseekV2DecoderLayer(DeepseekV2DecoderLayer): method __init__ (line 316) | def __init__(self, config: Any, layer_idx: int, dtype: torch.dtype = N... class SharedHead (line 343) | class SharedHead(nn.Module): method __init__ (line 346) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 352) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: function build_deepseek_rotary_embedding (line 356) | def build_deepseek_rotary_embedding(config: PretrainedConfig): class DeepSeekMultiTokenPredictorLayer (line 370) | class DeepSeekMultiTokenPredictorLayer(nn.Module): method __init__ (line 372) | def __init__( method forward (line 411) | def forward( class DeepSeekMultiTokenPredictor (line 447) | class DeepSeekMultiTokenPredictor(nn.Module): method __init__ (line 449) | def __init__( method forward (line 475) | def forward( method get_logits (line 498) | def get_logits( class DeepseekMTPModel (line 511) | class DeepseekMTPModel(nn.Module, CudaGraphMixin): method __init__ (line 513) | def __init__( method get_logits (line 535) | def get_logits(self, hidden_states: torch.Tensor, spec_step_idx: int =... method forward (line 539) | def forward( method make_buffers_cudagraph (line 558) | def make_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs): method fill_buffers_cudagraph (line 570) | def fill_buffers_cudagraph(self, graph_meta: CudaGraphMeta, input_ids:... method prepare_inputs_for_generation (line 583) | def prepare_inputs_for_generation( method _load_weight_experts (line 603) | def _load_weight_experts(self, name: str, loaded_weight: torch.Tensor,... method _load_weight_attention (line 617) | def _load_weight_attention(self, name: str, loaded_weight: torch.Tenso... method load_weights (line 710) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): method _rewrite_spec_layer_name (line 774) | def _rewrite_spec_layer_name(self, spec_layer: int, name: str) -> str: FILE: lmdeploy/pytorch/models/deepseek_v2.py class ExecType (line 29) | class ExecType(Enum): class BatchWorker (line 39) | class BatchWorker: method __init__ (line 41) | def __init__(self, tag: str, generator): method next (line 47) | def next(self): method done (line 59) | def done(self): function execute_batch (line 63) | def execute_batch(inputs: list, fn, delta_stages: int = 0, exec_type: Ex... function get_new_meta (line 177) | def get_new_meta(attn_metadata, start_idx: int, end_idx: int): function get_new_rotary_pos_emb (line 195) | def get_new_rotary_pos_emb(rotary_pos_emb, start_loc, end_loc): function get_new_input (line 201) | def get_new_input(hidden_states, rotary_pos_emb, past_key_values, residu... function get_split_flags (line 211) | def get_split_flags(attn_metadata, num=2): function split_input (line 254) | def split_input(hidden_states, function merge_output (line 309) | def merge_output(output_list): function yarn_get_mscale (line 321) | def yarn_get_mscale(scale=1, mscale=1): class DeepseekV2BMM (line 327) | class DeepseekV2BMM(nn.Module): method __init__ (line 330) | def __init__(self, batch: int, in_features: int, out_features: int, dt... method _update_batch (line 345) | def _update_batch(self, batch: int): method create_weight (line 351) | def create_weight(self, batch: int, in_features: int, out_features: in... method weight_loader (line 355) | def weight_loader(self, param: nn.Parameter, weight: torch.Tensor): method forward (line 361) | def forward(self, x: torch.Tensor, output: torch.Tensor): class DeepseekV2Attention (line 366) | class DeepseekV2Attention(nn.Module): method __init__ (line 369) | def __init__(self, config: Any, dtype: torch.dtype = None, device: tor... method _q_proj (line 472) | def _q_proj(self, hidden_states, num_heads: int, nope_size: int, pe_si... method _kv_proj (line 490) | def _kv_proj(self, hidden_states, nope_size: int): method _qkv_proj (line 502) | def _qkv_proj(self, hidden_states: torch.Tensor, num_heads: int): method forward (line 511) | def forward( class MoEGate (line 562) | class MoEGate(nn.Module): method __init__ (line 565) | def __init__(self, method _compute_scores (line 604) | def _compute_scores(self, logits: torch.Tensor): method _postprocess_topk_weight (line 615) | def _postprocess_topk_weight(self, topk_weight: torch.Tensor): method forward (line 625) | def forward(self, hidden_states: torch.Tensor): class DeepseekV2MoE (line 661) | class DeepseekV2MoE(nn.Module): method __init__ (line 664) | def __init__(self, config: Any, layer_idx, dtype: torch.dtype = None, ... method forward (line 720) | def forward(self, hidden_states: torch.Tensor): class DeepseekV2MLP (line 743) | class DeepseekV2MLP(nn.Module): method __init__ (line 746) | def __init__(self, method forward (line 798) | def forward(self, x): class DeepseekV2DecoderLayer (line 805) | class DeepseekV2DecoderLayer(nn.Module): method __init__ (line 808) | def __init__(self, config: Any, layer_idx: int, dtype: torch.dtype = N... method forward (line 836) | def forward( method forward_yield (line 866) | def forward_yield( class DeepseekV2Model (line 950) | class DeepseekV2Model(nn.Module): method __init__ (line 953) | def __init__(self, config: Any, dtype: torch.dtype = None, device: tor... method forward (line 987) | def forward( method forward_microbatch (line 1018) | def forward_microbatch( method forward_yieldlayers (line 1072) | def forward_yieldlayers(self, method get_input_embeddings (line 1092) | def get_input_embeddings(self): class DeepseekV2ForCausalLM (line 1097) | class DeepseekV2ForCausalLM(nn.Module, CudaGraphMixin): method __init__ (line 1100) | def __init__(self, method forward (line 1119) | def forward( method get_logits (line 1146) | def get_logits(self, hidden_states: torch.Tensor): method get_input_embeddings (line 1150) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 1154) | def prepare_inputs_for_generation( method _load_weight_experts (line 1173) | def _load_weight_experts(self, name: str, loaded_weight: torch.Tensor,... method _load_weight_attention (line 1187) | def _load_weight_attention(self, name: str, loaded_weight: torch.Tenso... method load_weights (line 1280) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/deepseek_v32.py function rotate_activation (line 21) | def rotate_activation(x: torch.Tensor) -> torch.Tensor: class LayerNorm (line 28) | class LayerNorm(nn.Module): method __init__ (line 31) | def __init__(self, dim: int, eps: float = 1e-6, device: torch.device =... method forward (line 40) | def forward(self, x: torch.Tensor): class Indexer (line 44) | class Indexer(nn.Module): method __init__ (line 46) | def __init__(self, config: Any, layer_idx: int, dtype: torch.dtype = N... method forward (line 87) | def forward(self, class DeepseekV32Attention (line 121) | class DeepseekV32Attention(DeepseekV2Attention): method __init__ (line 123) | def __init__(self, config: Any, layer_idx: int, dtype: torch.dtype = N... method _q_proj (line 229) | def _q_proj(self, hidden_states, num_heads: int, nope_size: int, pe_si... method _kv_proj (line 249) | def _kv_proj(self, hidden_states, nope_size: int): method _qkv_proj (line 261) | def _qkv_proj(self, hidden_states: torch.Tensor, num_heads: int): method forward (line 270) | def forward( class DeepseekV32DecoderLayer (line 320) | class DeepseekV32DecoderLayer(DeepseekV2DecoderLayer): method __init__ (line 322) | def __init__(self, config: Any, layer_idx: int, dtype: torch.dtype = N... class DeepseekV32Model (line 354) | class DeepseekV32Model(DeepseekV2Model): method __init__ (line 356) | def __init__(self, config: Any, dtype: torch.dtype = None, device: tor... class DeepseekV32ForCausalLM (line 393) | class DeepseekV32ForCausalLM(DeepseekV2ForCausalLM): method __init__ (line 395) | def __init__(self, FILE: lmdeploy/pytorch/models/deepseek_vl2.py class MlpProjector (line 23) | class MlpProjector(nn.Module): method __init__ (line 25) | def __init__(self, cfg, dtype): method forward (line 68) | def forward(self, x): class DeepseekVLV2ForCausalLM (line 105) | class DeepseekVLV2ForCausalLM(nn.Module, CudaGraphMixin, DeployModelMixin): method __init__ (line 107) | def __init__(self, method _init_vision_module (line 154) | def _init_vision_module( method prepare_inputs_embeds (line 173) | def prepare_inputs_embeds(self, method forward (line 305) | def forward( method get_logits (line 333) | def get_logits(self, hidden_states: torch.Tensor): method get_input_embeddings (line 337) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 341) | def prepare_inputs_for_generation( method load_weights (line 385) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): method get_input_processor (line 411) | def get_input_processor(self) -> BaseModelInputProcessor: class DeepSeekVLV2InputProcessor (line 416) | class DeepSeekVLV2InputProcessor(BaseModelInputProcessor): method __init__ (line 419) | def __init__(self, config: PretrainedConfig, dtype) -> None: method preprocess_input (line 425) | def preprocess_input(self, FILE: lmdeploy/pytorch/models/gemma.py class GemmaAttention (line 21) | class GemmaAttention(nn.Module): method __init__ (line 24) | def __init__(self, method forward (line 96) | def forward( method naive_attn_with_masks (line 162) | def naive_attn_with_masks( class GemmaMLP (line 205) | class GemmaMLP(nn.Module): method __init__ (line 208) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 237) | def forward(self, x): class GemmaDecoderLayer (line 245) | class GemmaDecoderLayer(nn.Module): method __init__ (line 248) | def __init__(self, method forward (line 290) | def forward( class Gemma3TextScaledWordEmbedding (line 334) | class Gemma3TextScaledWordEmbedding(nn.Embedding): method __init__ (line 338) | def __init__(self, method forward (line 347) | def forward(self, input_ids: torch.Tensor): class GemmaModel (line 351) | class GemmaModel(nn.Module): method __init__ (line 354) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method build_rope_emb (line 386) | def build_rope_emb(self, config: PretrainedConfig): method forward (line 428) | def forward( method get_input_embeddings (line 478) | def get_input_embeddings(self): class GemmaForCausalLM (line 483) | class GemmaForCausalLM(nn.Module, CudaGraphMixin): method __init__ (line 498) | def __init__(self, method forward (line 516) | def forward( method get_logits (line 539) | def get_logits(self, hidden_states: torch.Tensor): method get_input_embeddings (line 548) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 552) | def prepare_inputs_for_generation( method update_weights (line 581) | def update_weights(self): method load_weights (line 585) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/gemma3_vl.py class Gemma3RMSNorm (line 20) | class Gemma3RMSNorm(nn.Module): method __init__ (line 22) | def __init__(self, dim: int, eps: float = 1e-6): method _norm (line 27) | def _norm(self, x): method forward (line 30) | def forward(self, x): method extra_repr (line 37) | def extra_repr(self): class Gemma3MultiModalProjector (line 41) | class Gemma3MultiModalProjector(nn.Module): method __init__ (line 43) | def __init__(self, method forward (line 62) | def forward(self, vision_outputs: torch.Tensor): class Gemma3VLInputProcessor (line 80) | class Gemma3VLInputProcessor(BaseModelInputProcessor): method __init__ (line 83) | def __init__(self, config: PretrainedConfig, dtype) -> None: method preprocess_input (line 94) | def preprocess_input(self, class Gemma3ForConditionalGeneration (line 124) | class Gemma3ForConditionalGeneration(nn.Module, CudaGraphMixin, DeployMo... method __init__ (line 126) | def __init__(self, method get_input_embeddings (line 141) | def get_input_embeddings(self): method get_logits (line 144) | def get_logits(self, hidden_states: torch.Tensor): method get_image_features (line 148) | def get_image_features(self, pixel_values: torch.Tensor): method forward (line 162) | def forward( method prepare_attn_masks (line 212) | def prepare_attn_masks( method prepare_inputs_for_generation (line 269) | def prepare_inputs_for_generation( method tie_weights (line 302) | def tie_weights(self): method load_weights (line 305) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): method get_input_processor (line 337) | def get_input_processor(self) -> BaseModelInputProcessor: FILE: lmdeploy/pytorch/models/glm4.py class Glm4Attention (line 17) | class Glm4Attention(nn.Module): method __init__ (line 19) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method _extract_rope (line 55) | def _extract_rope(states: torch.Tensor): method _fill_rope (line 63) | def _fill_rope(states: torch.Tensor, rope: torch.Tensor): method forward (line 71) | def forward( class Glm4MLP (line 119) | class Glm4MLP(nn.Module): method __init__ (line 121) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 147) | def forward(self, x): class Glm4DecoderLayer (line 154) | class Glm4DecoderLayer(nn.Module): method __init__ (line 156) | def __init__(self, method forward (line 199) | def forward( class Glm4Model (line 236) | class Glm4Model(nn.Module): method __init__ (line 238) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 261) | def forward( class Glm4ForCausalLM (line 300) | class Glm4ForCausalLM(nn.Module, CudaGraphMixin): method __init__ (line 303) | def __init__(self, method forward (line 320) | def forward( method get_logits (line 339) | def get_logits(self, hidden_states: torch.Tensor): method update_weights (line 343) | def update_weights(self): method get_input_embeddings (line 348) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 352) | def prepare_inputs_for_generation( method load_weights (line 381) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/glm4_1v.py function _apply_mrope_selection (line 24) | def _apply_mrope_selection(hidden_states: torch.Tensor, mrope_position_i... class Glm4vTextModel (line 45) | class Glm4vTextModel(nn.Module): method __init__ (line 47) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 71) | def forward( class Glm4VisionMLP (line 115) | class Glm4VisionMLP(nn.Module): method __init__ (line 118) | def __init__(self, method forward (line 148) | def forward(self, x): class Glm4vVisionPatchEmbed (line 153) | class Glm4vVisionPatchEmbed(nn.Module): method __init__ (line 155) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 170) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class Glm4vVisionRotaryEmbedding (line 178) | class Glm4vVisionRotaryEmbedding(nn.Module): method __init__ (line 181) | def __init__(self, dim: int, theta: float = 10000.0, device: torch.dev... method forward (line 186) | def forward(self, seqlen: int) -> torch.Tensor: class Glm4vVisionPatchMerger (line 192) | class Glm4vVisionPatchMerger(nn.Module): method __init__ (line 194) | def __init__(self, method forward (line 230) | def forward(self, hidden_state: torch.Tensor) -> torch.Tensor: class Glm4vVisionEmbeddings (line 236) | class Glm4vVisionEmbeddings(nn.Module): method __init__ (line 238) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 250) | def forward(self, embeddings, lengths, image_shapes, h_coords, w_coord... class Glm4vVisionAttention (line 321) | class Glm4vVisionAttention(nn.Module): method __init__ (line 324) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 363) | def forward(self, hidden_states: torch.Tensor, cu_seqlens: torch.Tensor, class Glm4vVisionBlock (line 390) | class Glm4vVisionBlock(nn.Module): method __init__ (line 392) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 399) | def forward(self, class Glm4vVisionModel (line 417) | class Glm4vVisionModel(nn.Module): method __init__ (line 420) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method rot_pos_emb (line 450) | def rot_pos_emb(self, grid_thw): method forward (line 480) | def forward(self, hidden_states: torch.Tensor, cu_seqlens: torch.Tenso... class Glm4vForConditionalGeneration (line 511) | class Glm4vForConditionalGeneration(nn.Module, DeployModelMixin, CudaGra... method __init__ (line 525) | def __init__(self, method forward (line 550) | def forward( method get_logits (line 590) | def get_logits(self, hidden_states: torch.Tensor): method update_weights (line 594) | def update_weights(self): method get_input_embeddings (line 599) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 603) | def prepare_inputs_for_generation( method rename_weight (line 665) | def rename_weight(cls, name: str) -> str: method load_weights (line 675) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): method make_buffers_cudagraph (line 720) | def make_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs): method fill_buffers_cudagraph (line 731) | def fill_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs): method _get_model_metas (line 752) | def _get_model_metas(self, context: StepContext): method _update_model_meta_decoding (line 760) | def _update_model_meta_decoding(self, context: StepContext): method _get_multimodal_pos_ids (line 773) | def _get_multimodal_pos_ids(self, grid_thw: list, device: torch.device): method _update_model_meta_prefilling (line 784) | def _update_model_meta_prefilling(self, context: StepContext): method update_model_metas (line 828) | def update_model_metas(self, method get_input_processor (line 838) | def get_input_processor(self) -> BaseModelInputProcessor: class Glm4vInputProcessor (line 843) | class Glm4vInputProcessor(BaseModelInputProcessor): method __init__ (line 846) | def __init__(self, config: PretrainedConfig) -> None: method preprocess_input (line 849) | def preprocess_input(self, FILE: lmdeploy/pytorch/models/glm4_moe.py class Glm4MoeAttention (line 19) | class Glm4MoeAttention(nn.Module): method __init__ (line 22) | def __init__(self, method forward (line 78) | def forward( class Glm4MoeMLP (line 126) | class Glm4MoeMLP(nn.Module): method __init__ (line 129) | def __init__(self, method forward (line 165) | def forward(self, x): class Glm4MoE (line 172) | class Glm4MoE(nn.Module): method __init__ (line 175) | def __init__(self, method forward (line 241) | def forward(self, hidden_states: torch.Tensor): class Glm4MoeDecoderLayer (line 268) | class Glm4MoeDecoderLayer(nn.Module): method __init__ (line 271) | def __init__(self, method forward (line 298) | def forward( class Glm4MoeModel (line 328) | class Glm4MoeModel(nn.Module): method __init__ (line 331) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method _build_rotary_embedding (line 354) | def _build_rotary_embedding(self, config: PretrainedConfig): method forward (line 358) | def forward( class Glm4MoeForCausalLM (line 397) | class Glm4MoeForCausalLM(nn.Module, CudaGraphMixin): method __init__ (line 412) | def __init__(self, method forward (line 437) | def forward( method get_logits (line 456) | def get_logits(self, hidden_states: torch.Tensor): method get_input_embeddings (line 460) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 464) | def prepare_inputs_for_generation( method _load_weight_experts (line 493) | def _load_weight_experts(self, name: str, loaded_weight: torch.Tensor,... method _load_weight_fused_experts (line 511) | def _load_weight_fused_experts(self, name: str, loaded_weight: torch.T... method load_weights (line 536) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/glm4moe_mtp.py class Glm4MoeMTPDecoderLayer (line 17) | class Glm4MoeMTPDecoderLayer(Glm4MoeDecoderLayer): method __init__ (line 20) | def __init__(self, class Glm4MoeMTPModel (line 49) | class Glm4MoeMTPModel(DeepseekMTPModel): method __init__ (line 64) | def __init__(self, method _load_weight_experts (line 78) | def _load_weight_experts(self, name: str, loaded_weight: torch.Tensor,... method load_weights (line 92) | def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/gpt_oss.py class GptOssAttention (line 22) | class GptOssAttention(nn.Module): method __init__ (line 25) | def __init__(self, method build_sinks (line 84) | def build_sinks(cls, config: PretrainedConfig, device): method weight_loader_sinks (line 97) | def weight_loader_sinks(cls, param: nn.Parameter, loaded_weight: torch... method forward (line 104) | def forward( class GateupAct (line 147) | class GateupAct: method __init__ (line 149) | def __init__(self, limit: float = 7.0, alpha: float = 1.702): method _impl (line 154) | def _impl(self, gateup: torch.Tensor) -> torch.Tensor: method build (line 164) | def build(limit: float, alpha: float): method _try_compile (line 167) | def _try_compile(self, gateup: torch.Tensor) -> Callable: method __call__ (line 175) | def __call__(self, gateup: torch.Tensor) -> torch.Tensor: class GptOssExperts (line 183) | class GptOssExperts(nn.Module): method __init__ (line 186) | def __init__(self, method forward (line 218) | def forward(self, hidden_states: torch.Tensor, router_indices, routing... class GptOssTopKRouter (line 231) | class GptOssTopKRouter(nn.Module): method __init__ (line 234) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 242) | def forward(self, hidden_states): class GptOssMLP (line 251) | class GptOssMLP(nn.Module): method __init__ (line 254) | def __init__(self, method forward (line 264) | def forward(self, hidden_states, all_routed_experts: torch.Tensor = No... class GptOssDecoderLayer (line 272) | class GptOssDecoderLayer(nn.Module): method __init__ (line 275) | def __init__(self, method forward (line 306) | def forward( class GptOssModel (line 338) | class GptOssModel(nn.Module): method __init__ (line 340) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 363) | def forward( method get_input_embeddings (line 403) | def get_input_embeddings(self): class GptOssForCausalLM (line 408) | class GptOssForCausalLM(nn.Module, DeployModelMixinV1, CudaGraphMixin): method __init__ (line 419) | def __init__(self, method forward (line 436) | def forward( method get_input_embeddings (line 469) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 473) | def prepare_inputs_for_generation( method _load_weight_experts_gate_up (line 502) | def _load_weight_experts_gate_up(self, name: str, loaded_weight: torch... method _load_weight_experts_down (line 520) | def _load_weight_experts_down(self, name: str, loaded_weight: torch.Te... method _load_weight_experts (line 535) | def _load_weight_experts(self, name: str, loaded_weight: torch.Tensor,... method load_weights (line 543) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/internlm.py class InternLMAttention (line 18) | class InternLMAttention(nn.Module): method __init__ (line 21) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 60) | def forward( class InternLMMLP (line 103) | class InternLMMLP(nn.Module): method __init__ (line 106) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 132) | def forward(self, x): class InternLMDecoderLayer (line 139) | class InternLMDecoderLayer(nn.Module): method __init__ (line 142) | def __init__(self, method forward (line 171) | def forward( class InternLMModel (line 202) | class InternLMModel(nn.Module): method __init__ (line 205) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 248) | def forward( method get_input_embeddings (line 286) | def get_input_embeddings(self): class InternLMForCausalLM (line 291) | class InternLMForCausalLM(nn.Module, CudaGraphMixin): method __init__ (line 306) | def __init__(self, method forward (line 323) | def forward( method get_logits (line 342) | def get_logits(self, hidden_states: torch.Tensor): method get_input_embeddings (line 346) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 350) | def prepare_inputs_for_generation( method load_weights (line 379) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/internlm2.py class InternLM2Attention (line 18) | class InternLM2Attention(nn.Module): method __init__ (line 21) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 61) | def forward( class InternLM2MLP (line 104) | class InternLM2MLP(nn.Module): method __init__ (line 107) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 133) | def forward(self, x): class InternLM2DecoderLayer (line 140) | class InternLM2DecoderLayer(nn.Module): method __init__ (line 143) | def __init__(self, method forward (line 172) | def forward( class InternLM2Model (line 203) | class InternLM2Model(nn.Module): method __init__ (line 206) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 230) | def forward( method get_input_embeddings (line 268) | def get_input_embeddings(self): class InternLM2ForCausalLM (line 273) | class InternLM2ForCausalLM(nn.Module, DeployModelMixinV1, CudaGraphMixin): method __init__ (line 283) | def __init__(self, method forward (line 297) | def forward( method get_input_embeddings (line 316) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 320) | def prepare_inputs_for_generation( method load_lora_weights (line 349) | def load_lora_weights(self, weights: Iterable[Tuple[str, torch.Tensor]... method load_weights (line 373) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/internlm2_reward.py class InternLM2ForRewardModel (line 17) | class InternLM2ForRewardModel(nn.Module, CudaGraphMixin): method __init__ (line 27) | def __init__(self, method forward (line 40) | def forward( method get_logits (line 59) | def get_logits(self, hidden_states: torch.Tensor): method get_input_embeddings (line 63) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 67) | def prepare_inputs_for_generation( method load_lora_weights (line 92) | def load_lora_weights(self, weights: Iterable[Tuple[str, torch.Tensor]... method load_weights (line 116) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/internlm2_ve.py class InternLM2VEDecoderLayer (line 18) | class InternLM2VEDecoderLayer(nn.Module): method __init__ (line 21) | def __init__(self, method forward (line 54) | def forward( class InternLM2VEModel (line 94) | class InternLM2VEModel(nn.Module): method __init__ (line 97) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 141) | def forward( method get_input_embeddings (line 183) | def get_input_embeddings(self): class InternLM2VEForCausalLM (line 188) | class InternLM2VEForCausalLM(nn.Module, CudaGraphMixin): method __init__ (line 198) | def __init__(self, method forward (line 215) | def forward( method get_logits (line 238) | def get_logits(self, hidden_states: torch.Tensor): method support_cuda_graph (line 242) | def support_cuda_graph( method get_input_embeddings (line 256) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 260) | def prepare_inputs_for_generation( method load_weights (line 289) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/internlm3.py class InternLM3Attention (line 18) | class InternLM3Attention(nn.Module): method __init__ (line 21) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 62) | def forward( class InternLM3MLP (line 105) | class InternLM3MLP(nn.Module): method __init__ (line 108) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 135) | def forward(self, x): class InternLM3DecoderLayer (line 142) | class InternLM3DecoderLayer(nn.Module): method __init__ (line 145) | def __init__(self, method forward (line 174) | def forward( class InternLM3Model (line 205) | class InternLM3Model(nn.Module): method __init__ (line 208) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 233) | def forward( method get_input_embeddings (line 271) | def get_input_embeddings(self): class InternLM3ForCausalLM (line 276) | class InternLM3ForCausalLM(nn.Module, DeployModelMixinV1, CudaGraphMixin): method __init__ (line 291) | def __init__(self, method forward (line 304) | def forward( method get_input_embeddings (line 323) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 327) | def prepare_inputs_for_generation( method load_weights (line 356) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/interns1_pro.py class InternS1ProForConditionalGeneration (line 22) | class InternS1ProForConditionalGeneration(nn.Module, DeployModelMixinV1,... method __init__ (line 37) | def __init__(self, method forward (line 80) | def forward( method get_input_embeddings (line 148) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 152) | def prepare_inputs_for_generation( method rename_weight (line 236) | def rename_weight(cls, name: str) -> str: method _load_weight_experts (line 246) | def _load_weight_experts(self, name: str, loaded_weight: torch.Tensor,... method _load_weight_fused_experts (line 262) | def _load_weight_fused_experts(self, name: str, loaded_weight: torch.T... method load_weights (line 286) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): method get_input_processor (line 360) | def get_input_processor(self) -> BaseModelInputProcessor: class InternS1ProInputProcessor (line 365) | class InternS1ProInputProcessor(BaseModelInputProcessor): method __init__ (line 368) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype) -> None: method _make_image_mm_data (line 372) | def _make_image_mm_data(self, input_mm: Dict[str, Any]) -> MultiModalD... method _make_video_mm_data (line 390) | def _make_video_mm_data(self, input_mm: Dict[str, Any]) -> MultiModalD... method _make_time_series_mm_data (line 411) | def _make_time_series_mm_data(self, input_mm: Dict[str, Any]) -> Multi... method preprocess_input (line 429) | def preprocess_input(self, FILE: lmdeploy/pytorch/models/interns1_pro_ts.py class InternS1ProTimeSeriesEncoder (line 17) | class InternS1ProTimeSeriesEncoder(nn.Module): method __init__ (line 19) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method _make_causal_mask (line 58) | def _make_causal_mask(self, method _prepare_decoder_attention_mask (line 74) | def _prepare_decoder_attention_mask(self, input_shape, inputs_embeds, ... method forward (line 89) | def forward(self, input_features): class InternS1ProTimeSeriesConcatSubsampling (line 131) | class InternS1ProTimeSeriesConcatSubsampling(nn.Module): method __init__ (line 133) | def __init__(self, in_channels: int, concat_size: int): method forward (line 138) | def forward(self, ts_signals: torch.Tensor, ts_lens: torch.Tensor): class InternS1ProTimeSeriesFixPositionalEncoding (line 148) | class InternS1ProTimeSeriesFixPositionalEncoding(nn.Module): method __init__ (line 150) | def __init__(self, d_model, max_len=20000, dtype: torch.dtype = None, ... method forward (line 161) | def forward(self, x): class InternS1ProTimeSeriesMultiChannelAdaptiveSubsampling (line 167) | class InternS1ProTimeSeriesMultiChannelAdaptiveSubsampling(nn.Module): method __init__ (line 169) | def __init__(self, method forward (line 188) | def forward(self, inputs, input_lens, sr): method forward_encoder (line 222) | def forward_encoder(self, x): class InternS1ProTimeSeriesProjector (line 239) | class InternS1ProTimeSeriesProjector(nn.Module): method __init__ (line 241) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 256) | def forward(self, ts_features): class InternS1ProTimeSeriesModel (line 264) | class InternS1ProTimeSeriesModel(nn.Module): method __init__ (line 266) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 273) | def forward( FILE: lmdeploy/pytorch/models/internvl.py class Gating (line 25) | class Gating(nn.Module): method __init__ (line 27) | def __init__(self, hidden_size=2048, expansion_factor=4, dtype=None, d... method forward (line 52) | def forward(self, x): class CrossAttentionPooling (line 63) | class CrossAttentionPooling(nn.Module): method __init__ (line 65) | def __init__(self, dim, num_heads=16, dtype=None, device=None): method forward (line 97) | def forward(self, batched_tokens: list[torch.Tensor]): class InternVisionEmbeddings (line 136) | class InternVisionEmbeddings(nn.Module): method __init__ (line 139) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method _get_pos_embed (line 161) | def _get_pos_embed(self, pos_embed, H, W): method forward (line 169) | def forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor: function pre_rms_norm (line 191) | def pre_rms_norm(q: torch.Tensor, k: torch.Tensor) -> torch.Tensor: function post_rms_norm (line 202) | def post_rms_norm(q: torch.Tensor, k: torch.Tensor, weight_q: torch.Tens... class InternAttention (line 216) | class InternAttention(nn.Module): method __init__ (line 219) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method pre_rms_norm (line 270) | def pre_rms_norm(self, q: torch.Tensor, k: torch.Tensor) -> torch.Tensor: method post_rms_norm (line 274) | def post_rms_norm(self, q: torch.Tensor, k: torch.Tensor, variance: to... method qkv_norm (line 280) | def qkv_norm(self, q: torch.Tensor, k: torch.Tensor) -> Tuple[torch.Te... method forward (line 302) | def forward(self, hidden_states): class InternMLP (line 325) | class InternMLP(nn.Module): method __init__ (line 328) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 357) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class InternVisionEncoderLayer (line 364) | class InternVisionEncoderLayer(nn.Module): method __init__ (line 367) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method _attn (line 383) | def _attn(self, hidden_states): method _mlp (line 388) | def _mlp(self, hidden_states): method forward (line 392) | def forward( class InternVisionEncoder (line 401) | class InternVisionEncoder(nn.Module): method __init__ (line 404) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 410) | def forward( class InternVisionModel (line 423) | class InternVisionModel(nn.Module): method __init__ (line 426) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 433) | def forward( class InternVLChatModel (line 447) | class InternVLChatModel(nn.Module, DeployModelMixinV1, CudaGraphMixin): method __init__ (line 449) | def __init__(self, method compile_model (line 515) | def compile_model(self): method _mark_dynamic_once (line 532) | def _mark_dynamic_once(self, pixel_values, dims): method pixel_shuffle (line 540) | def pixel_shuffle(self, x, scale_factor=0.5): method extract_feature (line 552) | def extract_feature(self, pixel_values): method compress_visual_tokens_in_sentence (line 569) | def compress_visual_tokens_in_sentence( method get_image_num_per_sample (line 627) | def get_image_num_per_sample(self, input_ids: torch.Tensor, img_contex... method split_and_merge (line 642) | def split_and_merge(self, features: torch.Tensor, split_sizes: torch.T... method extract_feature_flash (line 657) | def extract_feature_flash(self, pixel_values, lengths): method extract_and_compress (line 685) | def extract_and_compress(self, pixel_values: torch.Tensor, input_ids: ... method update_forward_inputs (line 716) | def update_forward_inputs(self, input_ids: torch.Tensor, new_seqlens: ... method forward (line 757) | def forward( method get_input_embeddings (line 805) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 809) | def prepare_inputs_for_generation( method load_lora_weights (line 924) | def load_lora_weights(self, weights: Iterable[Tuple[str, torch.Tensor]... method load_weights (line 934) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): method get_input_processor (line 959) | def get_input_processor(self) -> BaseModelInputProcessor: class InternVLInputProcessor (line 964) | class InternVLInputProcessor(BaseModelInputProcessor): method __init__ (line 967) | def __init__(self, config: PretrainedConfig, dtype) -> None: method preprocess_input (line 978) | def preprocess_input(self, FILE: lmdeploy/pytorch/models/internvl3_hf.py function pre_rms_norm (line 27) | def pre_rms_norm(q: torch.Tensor, k: torch.Tensor) -> torch.Tensor: function post_rms_norm (line 38) | def post_rms_norm(q: torch.Tensor, k: torch.Tensor, weight_q: torch.Tens... class InternVLVisionPatchEmbeddings (line 52) | class InternVLVisionPatchEmbeddings(nn.Module): method __init__ (line 58) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 78) | def forward(self, pixel_values: torch.Tensor) -> torch.Tensor: class InternVLVisionEmbeddings (line 90) | class InternVLVisionEmbeddings(nn.Module): method __init__ (line 93) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method interpolate_pos_encoding (line 115) | def interpolate_pos_encoding(self, embeddings: torch.Tensor, height: i... method forward (line 140) | def forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor: class InternVLVisionAttention (line 158) | class InternVLVisionAttention(nn.Module): method __init__ (line 161) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method pre_rms_norm (line 212) | def pre_rms_norm(self, q: torch.Tensor, k: torch.Tensor) -> torch.Tensor: method post_rms_norm (line 216) | def post_rms_norm(self, q: torch.Tensor, k: torch.Tensor, variance: to... method qkv_norm (line 221) | def qkv_norm(self, q: torch.Tensor, k: torch.Tensor) -> Tuple[torch.Te... method forward (line 243) | def forward(self, hidden_states): class InternVLVisionMLP (line 266) | class InternVLVisionMLP(nn.Module): method __init__ (line 269) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 298) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class InternVLVisionLayer (line 305) | class InternVLVisionLayer(nn.Module): method __init__ (line 308) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method _attn (line 330) | def _attn(self, hidden_states): method _mlp (line 336) | def _mlp(self, hidden_states): method forward (line 341) | def forward( class InternVLVisionEncoder (line 350) | class InternVLVisionEncoder(nn.Module): method __init__ (line 353) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 359) | def forward( class InternVLVisionModel (line 372) | class InternVLVisionModel(nn.Module): method __init__ (line 375) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method get_input_embeddings (line 385) | def get_input_embeddings(self): method forward (line 388) | def forward( class InternVLMultiModalProjector (line 403) | class InternVLMultiModalProjector(nn.Module): method __init__ (line 405) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 434) | def forward(self, image_features): class InternVLForConditionalGeneration (line 442) | class InternVLForConditionalGeneration(nn.Module, DeployModelMixinV1, Cu... method __init__ (line 444) | def __init__(self, method compile_model (line 464) | def compile_model(self): method _mark_dynamic_once (line 481) | def _mark_dynamic_once(self, pixel_values, dims): method get_input_embeddings (line 489) | def get_input_embeddings(self): method get_image_features (line 493) | def get_image_features( method pixel_shuffle (line 539) | def pixel_shuffle(self, vision_features: torch.Tensor, scale_factor: f... method forward (line 573) | def forward( method prepare_inputs_for_generation (line 611) | def prepare_inputs_for_generation( method load_lora_weights (line 656) | def load_lora_weights(self, weights: Iterable[Tuple[str, torch.Tensor]... method rename_weight (line 667) | def rename_weight(cls, name: str) -> str: method load_weights (line 677) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): method get_input_processor (line 710) | def get_input_processor(self) -> BaseModelInputProcessor: class InternVLProcessor (line 715) | class InternVLProcessor(BaseModelInputProcessor): method __init__ (line 718) | def __init__(self, config: PretrainedConfig, dtype) -> None: method preprocess_input (line 722) | def preprocess_input(self, FILE: lmdeploy/pytorch/models/internvl_patch.py class InternVisionEmbeddings (line 10) | class InternVisionEmbeddings(nn.Module): method __init__ (line 13) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method _get_pos_embed (line 35) | def _get_pos_embed(self, pos_embed, H, W): method forward (line 43) | def forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor: class InternVisionPatchModel (line 58) | class InternVisionPatchModel(nn.Module): method __init__ (line 61) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 66) | def forward( FILE: lmdeploy/pytorch/models/llama.py class LlamaAttention (line 18) | class LlamaAttention(nn.Module): method __init__ (line 21) | def __init__(self, config: LlamaConfig, dtype: torch.dtype = None, dev... method forward (line 63) | def forward( class LlamaMLP (line 106) | class LlamaMLP(nn.Module): method __init__ (line 109) | def __init__(self, config: LlamaConfig, dtype: torch.dtype = None, dev... method forward (line 136) | def forward(self, x): class LlamaDecoderLayer (line 143) | class LlamaDecoderLayer(nn.Module): method __init__ (line 146) | def __init__(self, method forward (line 176) | def forward( class LlamaModel (line 207) | class LlamaModel(nn.Module): method __init__ (line 210) | def __init__(self, config: LlamaConfig, dtype: torch.dtype = None, dev... method forward (line 233) | def forward( method get_input_embeddings (line 278) | def get_input_embeddings(self): class LlamaForCausalLM (line 283) | class LlamaForCausalLM(nn.Module, CudaGraphMixin): method __init__ (line 298) | def __init__(self, method forward (line 316) | def forward( method update_weights (line 335) | def update_weights(self): method get_logits (line 340) | def get_logits(self, hidden_states: torch.Tensor): method get_input_embeddings (line 345) | def get_input_embeddings(self): method get_outputs_cudagraph (line 349) | def get_outputs_cudagraph(self, output_buffers: Dict[str, torch.Tensor... method prepare_inputs_for_generation (line 358) | def prepare_inputs_for_generation( method load_weights (line 387) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/llama4.py class Llama4TextAttention (line 22) | class Llama4TextAttention(nn.Module): method __init__ (line 25) | def __init__(self, method forward (line 77) | def forward( class Llama4TextMLP (line 126) | class Llama4TextMLP(nn.Module): method __init__ (line 129) | def __init__(self, method forward (line 168) | def forward(self, x): class Llama4TextMoe (line 175) | class Llama4TextMoe(nn.Module): method __init__ (line 178) | def __init__(self, config: Llama4TextConfig, dtype: torch.dtype = None... method forward (line 212) | def forward(self, hidden_states: torch.Tensor): class Llama4TextDecoderLayer (line 245) | class Llama4TextDecoderLayer(nn.Module): method __init__ (line 248) | def __init__(self, method forward (line 271) | def forward( class Llama4TextModel (line 303) | class Llama4TextModel(nn.Module): method __init__ (line 306) | def __init__(self, config: Llama4TextConfig, dtype: torch.dtype = None... method build_llama4_rotary_embedding (line 326) | def build_llama4_rotary_embedding(config: Llama4TextConfig): method forward (line 330) | def forward( class Llama4ForCausalLM (line 363) | class Llama4ForCausalLM(nn.Module): method __init__ (line 365) | def __init__(self, method forward (line 381) | def forward( method get_input_embeddings (line 400) | def get_input_embeddings(self): method get_logits (line 404) | def get_logits(self, hidden_states: torch.Tensor): class Llama4MultiModalProjector (line 409) | class Llama4MultiModalProjector(nn.Module): method __init__ (line 411) | def __init__(self, config: Llama4Config, dtype: torch.dtype = None, de... method forward (line 421) | def forward(self, image_features): class Llama4UnfoldConvolution (line 427) | class Llama4UnfoldConvolution(nn.Module): method __init__ (line 430) | def __init__(self, config: Llama4VisionConfig, dtype: torch.dtype = No... method forward (line 444) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class Llama4VisionRotaryEmbedding (line 452) | class Llama4VisionRotaryEmbedding(nn.Module): method __init__ (line 454) | def __init__(self, config: Llama4VisionConfig, dtype: torch.dtype = No... method forward (line 471) | def forward(self, hidden_states): function reshape_for_broadcast (line 475) | def reshape_for_broadcast(freqs_ci: torch.Tensor, query: torch.Tensor): function vision_apply_rotary_emb (line 481) | def vision_apply_rotary_emb( class Llama4VisionAttention (line 495) | class Llama4VisionAttention(nn.Module): method __init__ (line 498) | def __init__(self, config: Llama4VisionConfig, dtype: torch.dtype = No... method forward (line 525) | def forward( class Llama4VisionMLP (line 565) | class Llama4VisionMLP(nn.Module): method __init__ (line 568) | def __init__(self, config: Llama4VisionConfig, dtype: torch.dtype = No... method forward (line 585) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class Llama4VisionEncoderLayer (line 593) | class Llama4VisionEncoderLayer(nn.Module): method __init__ (line 596) | def __init__(self, config: Llama4VisionConfig, dtype: torch.dtype = No... method forward (line 606) | def forward( class Llama4VisionEncoder (line 632) | class Llama4VisionEncoder(nn.Module): method __init__ (line 635) | def __init__(self, config: Llama4VisionConfig, dtype: torch.dtype = No... method forward (line 641) | def forward( function pixel_shuffle (line 655) | def pixel_shuffle(input_tensor: torch.Tensor, shuffle_ratio: int): class Llama4VisionMLP2 (line 675) | class Llama4VisionMLP2(torch.nn.Module): method __init__ (line 677) | def __init__(self, config: Llama4VisionConfig, dtype: torch.dtype = No... method forward (line 695) | def forward(self, hidden_states): class Llama4VisionPixelShuffleMLP (line 702) | class Llama4VisionPixelShuffleMLP(nn.Module): method __init__ (line 704) | def __init__(self, config: Llama4VisionConfig, dtype: torch.dtype = No... method forward (line 711) | def forward(self, encoded_patches: torch.Tensor) -> torch.Tensor: class Llama4VisionModel (line 716) | class Llama4VisionModel(nn.Module): method __init__ (line 719) | def __init__(self, config: Llama4VisionConfig, dtype: torch.dtype = No... method get_input_embeddings (line 745) | def get_input_embeddings(self): method forward (line 750) | def forward( class Llama4ForConditionalGeneration (line 796) | class Llama4ForConditionalGeneration(nn.Module, CudaGraphMixin): method __init__ (line 798) | def __init__(self, method _update_quant_config (line 818) | def _update_quant_config(config: Llama4Config): method get_image_features (line 834) | def get_image_features( method get_input_embeddings (line 844) | def get_input_embeddings(self): method forward (line 848) | def forward( method get_logits (line 879) | def get_logits(self, hidden_states: torch.Tensor): method prepare_inputs_for_generation (line 883) | def prepare_inputs_for_generation( method load_weights (line 921) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): method get_input_processor (line 1004) | def get_input_processor(self) -> BaseModelInputProcessor: class Llama4InputProcessor (line 1009) | class Llama4InputProcessor(BaseModelInputProcessor): method __init__ (line 1012) | def __init__(self, config: Llama4Config, dtype) -> None: method preprocess_input (line 1018) | def preprocess_input(self, FILE: lmdeploy/pytorch/models/llama_eagle.py class EagleLlamaDecoderLayer (line 17) | class EagleLlamaDecoderLayer(LlamaDecoderLayer): method __init__ (line 19) | def __init__(self, class EagleLlamaModel (line 33) | class EagleLlamaModel(nn.Module): method __init__ (line 35) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 62) | def forward( method get_input_embeddings (line 98) | def get_input_embeddings(self): class EagleLlamaForCausalLM (line 103) | class EagleLlamaForCausalLM(nn.Module, CudaGraphMixin): method __init__ (line 117) | def __init__(self, config, ctx_mgr, dtype=None, device=None): method forward (line 125) | def forward( method prepare_inputs_for_generation (line 146) | def prepare_inputs_for_generation( method make_buffers_cudagraph (line 166) | def make_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs): method fill_buffers_cudagraph (line 178) | def fill_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs): method update_weights (line 199) | def update_weights(self): method get_input_embeddings (line 204) | def get_input_embeddings(self): method load_weights (line 208) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/llama_eagle3.py class Eagle3LlamaDecoderLayer (line 18) | class Eagle3LlamaDecoderLayer(LlamaDecoderLayer): method __init__ (line 21) | def __init__(self, method forward (line 50) | def forward( class Eagle3LlamaModel (line 80) | class Eagle3LlamaModel(nn.Module): method __init__ (line 82) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 109) | def forward( method get_input_embeddings (line 146) | def get_input_embeddings(self): class Eagle3LlamaForCausalLM (line 151) | class Eagle3LlamaForCausalLM(nn.Module, CudaGraphMixin): method __init__ (line 165) | def __init__(self, config, ctx_mgr, dtype=None, device=None): method forward (line 188) | def forward( method prepare_inputs_for_generation (line 209) | def prepare_inputs_for_generation( method get_logits (line 229) | def get_logits(self, hidden_states: torch.Tensor): method make_buffers_cudagraph (line 234) | def make_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs): method fill_buffers_cudagraph (line 248) | def fill_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs): method get_outputs_cudagraph (line 265) | def get_outputs_cudagraph(self, output_buffers: Dict[str, torch.Tensor... method get_input_embeddings (line 273) | def get_input_embeddings(self): method load_weights (line 277) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/llava.py class LlavaMultiModalProjector (line 23) | class LlavaMultiModalProjector(nn.Module): method __init__ (line 25) | def __init__(self, config: LlavaConfig, dtype: torch.dtype = None, dev... method forward (line 41) | def forward(self, image_features): class CLIPVisionEmbeddings (line 48) | class CLIPVisionEmbeddings(nn.Module): method __init__ (line 51) | def __init__(self, config, dtype: torch.dtype = None, device: torch.de... method interpolate_pos_encoding (line 82) | def interpolate_pos_encoding(self, embeddings: torch.Tensor, height: i... method forward (line 123) | def forward(self, pixel_values: torch.FloatTensor, interpolate_pos_enc... class CLIPAttention (line 141) | class CLIPAttention(nn.Module): method __init__ (line 144) | def __init__(self, config, dtype: torch.dtype = None, device: torch.de... method forward (line 174) | def forward( class CLIPMLP (line 205) | class CLIPMLP(nn.Module): method __init__ (line 208) | def __init__(self, config, dtype: torch.dtype = None, device: torch.de... method forward (line 234) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class CLIPEncoderLayer (line 242) | class CLIPEncoderLayer(nn.Module): method __init__ (line 245) | def __init__(self, config, dtype: torch.dtype = None, device: torch.de... method forward (line 253) | def forward( class CLIPEncoder (line 278) | class CLIPEncoder(nn.Module): method __init__ (line 281) | def __init__(self, config, dtype: torch.dtype = None, device: torch.de... method forward (line 287) | def forward( class CLIPVisionTransformer (line 309) | class CLIPVisionTransformer(nn.Module): method __init__ (line 312) | def __init__(self, config, dtype: torch.dtype = None, device: torch.de... method forward (line 322) | def forward( class CLIPVisionModel (line 347) | class CLIPVisionModel(nn.Module): method __init__ (line 350) | def __init__(self, config, dtype: torch.dtype = None, device: torch.de... method forward (line 354) | def forward(self, function build_vision_model (line 365) | def build_vision_model(vision_config, dtype: torch.dtype = None, device:... class LlavaForConditionalGeneration (line 375) | class LlavaForConditionalGeneration(nn.Module, CudaGraphMixin, DeployMod... method __init__ (line 377) | def __init__(self, method get_image_features (line 395) | def get_image_features(self, method forward (line 413) | def forward( method get_logits (line 442) | def get_logits(self, hidden_states: torch.Tensor): method get_input_embeddings (line 446) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 450) | def prepare_inputs_for_generation( method load_weights (line 495) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): method get_input_processor (line 529) | def get_input_processor(self) -> BaseModelInputProcessor: class LLavaInputProcessor (line 534) | class LLavaInputProcessor(BaseModelInputProcessor): method __init__ (line 537) | def __init__(self, config: PretrainedConfig, dtype) -> None: method preprocess_input (line 541) | def preprocess_input(self, function get_anyres_image_grid_shape (line 571) | def get_anyres_image_grid_shape(image_size, grid_pinpoints, patch_size): function unpad_image (line 585) | def unpad_image(tensor, original_size): function image_size_to_num_patches (line 609) | def image_size_to_num_patches(image_size, grid_pinpoints, patch_size: int): class LlavaNextForConditionalGeneration (line 628) | class LlavaNextForConditionalGeneration(LlavaForConditionalGeneration): method __init__ (line 630) | def __init__(self, method get_image_features (line 639) | def get_image_features( method pack_image_features (line 674) | def pack_image_features(self, image_features, image_sizes, vision_feat... method forward (line 720) | def forward( method get_input_processor (line 759) | def get_input_processor(self) -> BaseModelInputProcessor: method prepare_inputs_for_generation (line 763) | def prepare_inputs_for_generation( class LLavaNextInputProcessor (line 812) | class LLavaNextInputProcessor(BaseModelInputProcessor): method __init__ (line 815) | def __init__(self, config: PretrainedConfig, dtype) -> None: method preprocess_input (line 819) | def preprocess_input(self, FILE: lmdeploy/pytorch/models/minicpm3.py class MiniCPMAttention (line 22) | class MiniCPMAttention(nn.Module): method __init__ (line 25) | def __init__(self, config: Any, dtype: torch.dtype = None, device: tor... method forward (line 107) | def forward( class MiniCPMMLP (line 173) | class MiniCPMMLP(nn.Module): method __init__ (line 176) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 202) | def forward(self, x): class MiniCPMDecoderLayer (line 209) | class MiniCPMDecoderLayer(nn.Module): method __init__ (line 212) | def __init__(self, method forward (line 243) | def forward( class MiniCPM3Model (line 275) | class MiniCPM3Model(nn.Module): method __init__ (line 278) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 328) | def forward( method get_input_embeddings (line 363) | def get_input_embeddings(self): class MiniCPM3ForCausalLM (line 368) | class MiniCPM3ForCausalLM(nn.Module, CudaGraphMixin): method __init__ (line 378) | def __init__(self, method forward (line 395) | def forward( method update_weights (line 416) | def update_weights(self): method get_input_embeddings (line 421) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 425) | def prepare_inputs_for_generation( method load_weights (line 454) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/minicpmv26.py class MiniCPMV26Attention (line 17) | class MiniCPMV26Attention(nn.Module): method __init__ (line 20) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 60) | def forward( class MiniCPMV26MLP (line 103) | class MiniCPMV26MLP(nn.Module): method __init__ (line 106) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 132) | def forward(self, x): class MiniCPMV26DecoderLayer (line 139) | class MiniCPMV26DecoderLayer(nn.Module): method __init__ (line 142) | def __init__(self, method forward (line 171) | def forward( class MiniCPMV26Model (line 202) | class MiniCPMV26Model(nn.Module): method __init__ (line 205) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 228) | def forward( method get_input_embeddings (line 266) | def get_input_embeddings(self): class MiniCPMVForCausalLM (line 271) | class MiniCPMVForCausalLM(nn.Module, CudaGraphMixin): method __init__ (line 281) | def __init__(self, method forward (line 298) | def forward( method get_logits (line 318) | def get_logits(self, hidden_states: torch.Tensor): method update_weights (line 322) | def update_weights(self): method get_input_embeddings (line 327) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 331) | def prepare_inputs_for_generation( method load_weights (line 360) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/mistral.py class MistralAttention (line 18) | class MistralAttention(nn.Module): method __init__ (line 21) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 64) | def forward( class MistralMLP (line 107) | class MistralMLP(nn.Module): method __init__ (line 110) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 136) | def forward(self, x): class MistralDecoderLayer (line 143) | class MistralDecoderLayer(nn.Module): method __init__ (line 146) | def __init__(self, method forward (line 175) | def forward( class MistralModel (line 206) | class MistralModel(nn.Module): method __init__ (line 209) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 232) | def forward( method get_input_embeddings (line 270) | def get_input_embeddings(self): class MistralForCausalLM (line 275) | class MistralForCausalLM(nn.Module, CudaGraphMixin): method __init__ (line 290) | def __init__(self, method forward (line 307) | def forward( method get_logits (line 326) | def get_logits(self, hidden_states: torch.Tensor): method get_input_embeddings (line 330) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 334) | def prepare_inputs_for_generation( method load_weights (line 363) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/mixtral.py class MixtralAttention (line 17) | class MixtralAttention(nn.Module): method __init__ (line 20) | def __init__(self, config: Any, dtype: torch.dtype = None, device: tor... method forward (line 62) | def forward( class MixtralSparseMoeBlock (line 101) | class MixtralSparseMoeBlock(nn.Module): method __init__ (line 104) | def __init__(self, config: Any, dtype: torch.dtype = None, device: tor... method forward (line 139) | def forward(self, hidden_states: torch.Tensor): class MixtralDecoderLayer (line 156) | class MixtralDecoderLayer(nn.Module): method __init__ (line 159) | def __init__(self, config: Any, layer_idx: int, dtype: torch.dtype = N... method forward (line 178) | def forward( class MixtralModel (line 209) | class MixtralModel(nn.Module): method __init__ (line 212) | def __init__(self, config: Any, dtype: torch.dtype = None, device: tor... method forward (line 231) | def forward( method get_input_embeddings (line 263) | def get_input_embeddings(self): class MixtralForCausalLM (line 268) | class MixtralForCausalLM(nn.Module, CudaGraphMixin): method __init__ (line 271) | def __init__(self, method forward (line 287) | def forward( method get_logits (line 305) | def get_logits(self, hidden_states: torch.Tensor): method get_input_embeddings (line 309) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 313) | def prepare_inputs_for_generation( method load_weights (line 332) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/patch.py function _get_rewrite_qualname (line 24) | def _get_rewrite_qualname(origin_qualname: str, module_map: Dict[str, st... function _class_from_qualname (line 41) | def _class_from_qualname(qualname: str) -> Any: function _find_rewrite_module_qualname (line 61) | def _find_rewrite_module_qualname(model, module_map: Dict[str, str]): function get_rewrite_cls (line 96) | def get_rewrite_cls(model: torch.nn.Module, module_map: Dict[str, str] =... function _get_module_map (line 106) | def _get_module_map(): function update_custom_module_map (line 118) | def update_custom_module_map(module_map_path: str): function _get_model_class (line 156) | def _get_model_class(config, module_map): function build_model_from_hf_config (line 188) | def build_model_from_hf_config(model_config: PretrainedConfig, function build_patched_model (line 210) | def build_patched_model(config: ModelConfig, device: torch.device = None... function add_adapters (line 218) | def add_adapters(model: torch.nn.Module, function build_model_context (line 323) | def build_model_context(ctx: BuildModelContext): function get_build_model_context (line 333) | def get_build_model_context() -> BuildModelContext: function add_prefix (line 339) | def add_prefix(name: str, prefix: str) -> str: FILE: lmdeploy/pytorch/models/phi3.py class Phi3Attention (line 19) | class Phi3Attention(nn.Module): method __init__ (line 22) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 64) | def forward( class Phi3MLP (line 107) | class Phi3MLP(nn.Module): method __init__ (line 110) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 136) | def forward(self, x): class Phi3DecoderLayer (line 143) | class Phi3DecoderLayer(nn.Module): method __init__ (line 146) | def __init__(self, method forward (line 175) | def forward( class Phi3Model (line 206) | class Phi3Model(nn.Module): method __init__ (line 209) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 232) | def forward( method get_input_embeddings (line 270) | def get_input_embeddings(self): class Phi3ForCausalLM (line 275) | class Phi3ForCausalLM(nn.Module, DeployModelMixinV1, CudaGraphMixin): method __init__ (line 285) | def __init__(self, method forward (line 298) | def forward( method get_input_embeddings (line 317) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 321) | def prepare_inputs_for_generation( method load_weights (line 350) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/phi3_moe.py function sparsemixer (line 19) | def sparsemixer(scores, top_k, jitter_eps): class PhiMoEAttention (line 68) | class PhiMoEAttention(nn.Module): method __init__ (line 71) | def __init__(self, config: Any, dtype: torch.dtype = None, device: tor... method forward (line 112) | def forward( class PhiMoESparseMoeBlock (line 150) | class PhiMoESparseMoeBlock(nn.Module): method __init__ (line 153) | def __init__(self, config: Any, dtype: torch.dtype = None, device: tor... method forward (line 183) | def forward(self, hidden_states: torch.Tensor): class PhiMoEDecoderLayer (line 203) | class PhiMoEDecoderLayer(nn.Module): method __init__ (line 206) | def __init__(self, config: Any, layer_idx: int, dtype: torch.dtype = N... method forward (line 223) | def forward( class PhiMoEModel (line 253) | class PhiMoEModel(nn.Module): method __init__ (line 256) | def __init__(self, config: Any, dtype: torch.dtype = None, device: tor... method forward (line 304) | def forward( method get_input_embeddings (line 335) | def get_input_embeddings(self): class PhiMoEForCausalLM (line 340) | class PhiMoEForCausalLM(nn.Module, CudaGraphMixin): method __init__ (line 343) | def __init__(self, method forward (line 359) | def forward( method get_logits (line 377) | def get_logits(self, hidden_states: torch.Tensor): method get_input_embeddings (line 381) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 385) | def prepare_inputs_for_generation( method load_weights (line 404) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/phi3_v.py class Phi3ImageEmbedding (line 34) | class Phi3ImageEmbedding(nn.Module): method __init__ (line 38) | def __init__(self, method get_img_features (line 108) | def get_img_features(self, img_embeds: torch.FloatTensor) -> torch.Flo... method forward (line 124) | def forward( method hd_feature_transform (line 143) | def hd_feature_transform(self, image_features, image_sizes): method reshape_hd_patches_2x2merge (line 187) | def reshape_hd_patches_2x2merge(self, image_features, h_crop, w_crop): method add_image_newline (line 207) | def add_image_newline(self, image_features_hd): class Phi3VModel (line 220) | class Phi3VModel(Phi3Model): method __init__ (line 223) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 236) | def forward( class Phi3VForCausalLM (line 266) | class Phi3VForCausalLM(Phi3ForCausalLM): method __init__ (line 268) | def __init__(self, method forward (line 283) | def forward( method prepare_inputs_for_generation (line 308) | def prepare_inputs_for_generation( method load_weights (line 336) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): method get_input_processor (line 352) | def get_input_processor(self) -> BaseModelInputProcessor: class Phi3VInputProcessor (line 357) | class Phi3VInputProcessor(BaseModelInputProcessor): method __init__ (line 360) | def __init__(self, config: PretrainedConfig, dtype) -> None: method preprocess_input (line 364) | def preprocess_input(self, FILE: lmdeploy/pytorch/models/q_modules.py class QTensor (line 13) | class QTensor: method __post_init__ (line 22) | def __post_init__(self): method __getattr__ (line 25) | def __getattr__(self, name: str): class QRMSNorm (line 33) | class QRMSNorm(nn.Module): method __init__ (line 37) | def __init__(self, hidden_size, eps=1e-6, quant_dtype=torch.int8): method from_float (line 44) | def from_float(cls, mod: nn.Module, initialization: bool = True, quant... method forward (line 57) | def forward(self, hidden_states): class QLinear (line 70) | class QLinear(nn.Module): method __init__ (line 81) | def __init__(self, method from_float (line 101) | def from_float(cls, mod: nn.Module, initialization: bool = True, quant... method forward (line 123) | def forward(self, input): method extra_repr (line 145) | def extra_repr(self) -> str: FILE: lmdeploy/pytorch/models/qwen.py class QWenAttention (line 18) | class QWenAttention(torch.nn.Module): method __init__ (line 24) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 65) | def forward( class QWenMLP (line 108) | class QWenMLP(nn.Module): method __init__ (line 111) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 138) | def forward(self, x): class QWenBlock (line 145) | class QWenBlock(torch.nn.Module): method __init__ (line 151) | def __init__(self, method forward (line 183) | def forward( class QWenModel (line 214) | class QWenModel(nn.Module): method __init__ (line 216) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 245) | def forward( method get_input_embeddings (line 283) | def get_input_embeddings(self): class QWenLMHeadModel (line 288) | class QWenLMHeadModel(nn.Module, CudaGraphMixin): method __init__ (line 298) | def __init__(self, method forward (line 316) | def forward( method get_logits (line 335) | def get_logits(self, hidden_states: torch.Tensor): method get_input_embeddings (line 339) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 343) | def prepare_inputs_for_generation( method load_weights (line 372) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/qwen2.py class Qwen2Attention (line 18) | class Qwen2Attention(nn.Module): method __init__ (line 21) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 61) | def forward( class Qwen2MLP (line 104) | class Qwen2MLP(nn.Module): method __init__ (line 107) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 133) | def forward(self, x): class Qwen2DecoderLayer (line 140) | class Qwen2DecoderLayer(nn.Module): method __init__ (line 143) | def __init__(self, method forward (line 172) | def forward( class Qwen2Model (line 203) | class Qwen2Model(nn.Module): method __init__ (line 206) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 231) | def forward( method get_input_embeddings (line 269) | def get_input_embeddings(self): class Qwen2ForCausalLM (line 274) | class Qwen2ForCausalLM(nn.Module, DeployModelMixinV1, CudaGraphMixin): method __init__ (line 289) | def __init__(self, method forward (line 302) | def forward( method get_input_embeddings (line 321) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 325) | def prepare_inputs_for_generation( method load_weights (line 354) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/qwen2_5_vl.py class Qwen2_5_PatchEmbed (line 25) | class Qwen2_5_PatchEmbed(nn.Module): method __init__ (line 28) | def __init__(self, method forward (line 50) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class Qwen2_5_VisionRotaryEmbedding (line 58) | class Qwen2_5_VisionRotaryEmbedding(nn.Module): method __init__ (line 61) | def __init__(self, dim: int, theta: float = 10000.0, device: torch.dev... method forward (line 66) | def forward(self, seqlen: int) -> torch.Tensor: class Qwen2_5_VLVisionAttention (line 72) | class Qwen2_5_VLVisionAttention(nn.Module): method __init__ (line 75) | def __init__(self, method forward (line 120) | def forward(self, hidden_states: torch.Tensor, cu_seqlens: torch.Tensor, class Qwen2_5_VLMLP (line 147) | class Qwen2_5_VLMLP(nn.Module): method __init__ (line 150) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 176) | def forward(self, x): class Qwen2_5_VLVisionBlock (line 181) | class Qwen2_5_VLVisionBlock(nn.Module): method __init__ (line 184) | def __init__(self, method forward (line 198) | def forward(self, class Qwen2_5_VLPatchMerger (line 211) | class Qwen2_5_VLPatchMerger(nn.Module): method __init__ (line 214) | def __init__(self, method forward (line 230) | def forward(self, x: torch.Tensor) -> torch.Tensor: class Qwen2_5_VisionTransformerPretrainedModel (line 236) | class Qwen2_5_VisionTransformerPretrainedModel(nn.Module): method __init__ (line 239) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method rot_pos_emb (line 268) | def rot_pos_emb(self, grid_thw): method get_window_index (line 298) | def get_window_index(self, grid_thw): method forward (line 339) | def forward(self, class Qwen2_5_VLForConditionalGeneration (line 376) | class Qwen2_5_VLForConditionalGeneration(nn.Module, DeployModelMixinV1, ... method __init__ (line 391) | def __init__(self, method forward (line 416) | def forward( method get_input_embeddings (line 455) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 459) | def prepare_inputs_for_generation( method load_weights (line 529) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): method make_buffers_cudagraph (line 567) | def make_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs): method fill_buffers_cudagraph (line 578) | def fill_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs): method _get_model_metas (line 599) | def _get_model_metas(self, context: StepContext): method _update_model_meta_decoding (line 607) | def _update_model_meta_decoding(self, context: StepContext): method _get_multimodal_pos_ids (line 620) | def _get_multimodal_pos_ids(self, grid_thw: list, device: torch.device): method _update_model_meta_prefilling (line 631) | def _update_model_meta_prefilling(self, context: StepContext): method update_model_metas (line 675) | def update_model_metas(self, method get_input_processor (line 685) | def get_input_processor(self) -> BaseModelInputProcessor: class Qwen2_5_VLInputProcessor (line 690) | class Qwen2_5_VLInputProcessor(BaseModelInputProcessor): method __init__ (line 693) | def __init__(self, config: PretrainedConfig) -> None: method preprocess_input (line 696) | def preprocess_input(self, FILE: lmdeploy/pytorch/models/qwen2_moe.py class Qwen2MoeAttention (line 22) | class Qwen2MoeAttention(nn.Module): method __init__ (line 25) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 66) | def forward( class Qwen2MoeMLP (line 109) | class Qwen2MoeMLP(nn.Module): method __init__ (line 112) | def __init__(self, method forward (line 147) | def forward(self, x): class Qwen2MoeSparseMoeBlock (line 154) | class Qwen2MoeSparseMoeBlock(nn.Module): method __init__ (line 157) | def __init__(self, method forward (line 217) | def forward(self, hidden_states: torch.Tensor): class Qwen2MoeDecoderLayer (line 242) | class Qwen2MoeDecoderLayer(nn.Module): method __init__ (line 245) | def __init__(self, method forward (line 274) | def forward( class Qwen2MoeModel (line 305) | class Qwen2MoeModel(nn.Module): method __init__ (line 308) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 333) | def forward( method get_input_embeddings (line 371) | def get_input_embeddings(self): class Qwen2MoeForCausalLM (line 376) | class Qwen2MoeForCausalLM(nn.Module, DeployModelMixinV1, CudaGraphMixin): method __init__ (line 391) | def __init__(self, method forward (line 404) | def forward( method get_input_embeddings (line 423) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 427) | def prepare_inputs_for_generation( method _load_weight_experts (line 456) | def _load_weight_experts(self, name: str, loaded_weight: torch.Tensor,... method load_weights (line 470) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/qwen2_reward.py class Qwen2ForRewardModel (line 16) | class Qwen2ForRewardModel(nn.Module, CudaGraphMixin): method __init__ (line 31) | def __init__(self, method forward (line 53) | def forward( method get_logits (line 72) | def get_logits(self, hidden_states: torch.Tensor): method update_weights (line 77) | def update_weights(self): method get_input_embeddings (line 81) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 85) | def prepare_inputs_for_generation( method load_weights (line 106) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/qwen2_vl.py function _apply_mrope_selection (line 22) | def _apply_mrope_selection(hidden_states: torch.Tensor, mrope_position_i... class Qwen2Attention (line 43) | class Qwen2Attention(nn.Module): method __init__ (line 46) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 87) | def forward( class Qwen2MLP (line 130) | class Qwen2MLP(nn.Module): method __init__ (line 133) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 159) | def forward(self, x): class Qwen2DecoderLayer (line 166) | class Qwen2DecoderLayer(nn.Module): method __init__ (line 169) | def __init__(self, method forward (line 198) | def forward( class Qwen2Model (line 229) | class Qwen2Model(nn.Module): method __init__ (line 232) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 258) | def forward( method get_input_embeddings (line 301) | def get_input_embeddings(self): class PatchEmbed (line 306) | class PatchEmbed(nn.Module): method __init__ (line 309) | def __init__(self, method forward (line 331) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class VisionRotaryEmbedding (line 339) | class VisionRotaryEmbedding(nn.Module): method __init__ (line 342) | def __init__(self, dim: int, theta: float = 10000.0, device: torch.dev... method forward (line 347) | def forward(self, seqlen: int) -> torch.Tensor: class VisionAttention (line 353) | class VisionAttention(nn.Module): method __init__ (line 356) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 395) | def forward(self, hidden_states: torch.Tensor, cu_seqlens: torch.Tensor, class VisionMlp (line 422) | class VisionMlp(nn.Module): method __init__ (line 425) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 457) | def forward(self, x): class Qwen2VLVisionBlock (line 462) | class Qwen2VLVisionBlock(nn.Module): method __init__ (line 465) | def __init__(self, method forward (line 479) | def forward(self, class PatchMerger (line 497) | class PatchMerger(nn.Module): method __init__ (line 500) | def __init__(self, method forward (line 515) | def forward(self, x: torch.Tensor) -> torch.Tensor: class Qwen2VisionTransformerPretrainedModel (line 521) | class Qwen2VisionTransformerPretrainedModel(nn.Module): method __init__ (line 524) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method rot_pos_emb (line 549) | def rot_pos_emb(self, grid_thw): method forward (line 579) | def forward(self, hidden_states: torch.Tensor, cu_seqlens: torch.Tensor, class Qwen2VLForConditionalGeneration (line 597) | class Qwen2VLForConditionalGeneration(nn.Module, DeployModelMixinV1, Cud... method __init__ (line 612) | def __init__(self, method forward (line 637) | def forward( method get_input_embeddings (line 671) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 675) | def prepare_inputs_for_generation( method load_weights (line 732) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): method make_buffers_cudagraph (line 770) | def make_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs): method fill_buffers_cudagraph (line 781) | def fill_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs): method _get_model_metas (line 802) | def _get_model_metas(self, context: StepContext): method _update_model_meta_decoding (line 810) | def _update_model_meta_decoding(self, context: StepContext): method _get_multimodal_pos_ids (line 823) | def _get_multimodal_pos_ids(self, grid_thw: list, device: torch.device): method _update_model_meta_prefilling (line 834) | def _update_model_meta_prefilling(self, context: StepContext): method update_model_metas (line 878) | def update_model_metas(self, method get_input_processor (line 888) | def get_input_processor(self) -> BaseModelInputProcessor: class Qwen2VLInputProcessor (line 893) | class Qwen2VLInputProcessor(BaseModelInputProcessor): method __init__ (line 896) | def __init__(self, config: PretrainedConfig) -> None: method preprocess_input (line 899) | def preprocess_input(self, FILE: lmdeploy/pytorch/models/qwen3.py class Qwen3Attention (line 19) | class Qwen3Attention(nn.Module): method __init__ (line 22) | def __init__(self, method forward (line 77) | def forward( class Qwen3MLP (line 124) | class Qwen3MLP(nn.Module): method __init__ (line 127) | def __init__(self, method forward (line 161) | def forward(self, x): class Qwen3DecoderLayer (line 168) | class Qwen3DecoderLayer(nn.Module): method __init__ (line 171) | def __init__(self, method forward (line 207) | def forward( class Qwen3model (line 238) | class Qwen3model(nn.Module): method __init__ (line 241) | def __init__(self, method forward (line 274) | def forward( method get_input_embeddings (line 312) | def get_input_embeddings(self): class Qwen3ForCausalLM (line 317) | class Qwen3ForCausalLM(nn.Module, DeployModelMixinV1, CudaGraphMixin): method __init__ (line 332) | def __init__(self, method forward (line 346) | def forward( method get_input_embeddings (line 365) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 369) | def prepare_inputs_for_generation( method load_weights (line 398) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/qwen3_5.py class Qwen3_5VisionPatchEmbed (line 33) | class Qwen3_5VisionPatchEmbed(nn.Module): method __init__ (line 35) | def __init__(self, config, dtype: torch.dtype | None = None, device: t... method forward (line 51) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class Qwen3_5VisionMLP (line 59) | class Qwen3_5VisionMLP(nn.Module): method __init__ (line 62) | def __init__(self, method forward (line 99) | def forward(self, x): class Qwen3_5VisionBlock (line 104) | class Qwen3_5VisionBlock(nn.Module): method __init__ (line 107) | def __init__(self, method forward (line 122) | def forward(self, class Qwen3_5VisionPatchMerger (line 135) | class Qwen3_5VisionPatchMerger(nn.Module): method __init__ (line 137) | def __init__(self, method forward (line 167) | def forward(self, x: torch.Tensor) -> torch.Tensor: class Qwen3_5VisionModel (line 174) | class Qwen3_5VisionModel(nn.Module): method __init__ (line 177) | def __init__(self, method rot_pos_ids (line 205) | def rot_pos_ids(h: int, w: int, spatial_merge_size: int) -> torch.Tensor: method rot_pos_emb (line 231) | def rot_pos_emb(self, grid_thw: torch.Tensor) -> torch.Tensor: method fast_pos_embed_interpolate (line 247) | def fast_pos_embed_interpolate(self, grid_thw: List[List[int]]) -> tor... method forward (line 302) | def forward(self, hidden_states: torch.Tensor, cu_seqlens: torch.Tenso... class Qwen3_5MLP (line 317) | class Qwen3_5MLP(nn.Module): method __init__ (line 320) | def __init__(self, method forward (line 360) | def forward(self, x, all_routed_experts: torch.Tensor | None = None): class Qwen3_5GatedDeltaNet (line 367) | class Qwen3_5GatedDeltaNet(nn.Module): method __init__ (line 370) | def __init__( method get_A_log_exp (line 444) | def get_A_log_exp(self): method make_params (line 450) | def make_params(self, num_v_heads: int, device: torch.device | None): method weight_loader_qkv (line 461) | def weight_loader_qkv(self, param: torch.nn.Parameter, loaded_weight: ... method weight_loader_a_dt (line 471) | def weight_loader_a_dt(self, param: torch.nn.Parameter, loaded_weight:... method fix_zba_ordering (line 477) | def fix_zba_ordering(self, mixed_zba: torch.Tensor): method _load_state (line 490) | def _load_state(self, past_key_value: Tuple[torch.Tensor, torch.Tensor... method forward (line 494) | def forward( class Qwen3_5Attention (line 555) | class Qwen3_5Attention(nn.Module): method __init__ (line 558) | def __init__(self, method forward (line 630) | def forward( class Qwen3_5DecoderLayer (line 680) | class Qwen3_5DecoderLayer(nn.Module): method __init__ (line 683) | def __init__(self, method forward (line 728) | def forward( class Qwen3_5TextRotaryEmbedding (line 768) | class Qwen3_5TextRotaryEmbedding(nn.Module): method __init__ (line 771) | def __init__(self, config: PretrainedConfig, device=None): method compute_default_rope_parameters (line 793) | def compute_default_rope_parameters( method apply_interleaved_mrope (line 823) | def apply_interleaved_mrope(self, freqs, mrope_section): method forward (line 843) | def forward(self, x, position_ids): class Qwen3_5TextModel (line 860) | class Qwen3_5TextModel(nn.Module): method __init__ (line 863) | def __init__(self, method forward (line 896) | def forward( method get_input_embeddings (line 947) | def get_input_embeddings(self): class Qwen3_5Model (line 952) | class Qwen3_5Model(nn.Module): method __init__ (line 954) | def __init__(self, method forward (line 970) | def forward( method get_input_embeddings (line 1024) | def get_input_embeddings(self): class Qwen3_5ForConditionalGeneration (line 1029) | class Qwen3_5ForConditionalGeneration(nn.Module, DeployModelMixinV1, Cud... method __init__ (line 1044) | def __init__(self, method forward (line 1068) | def forward( method get_input_embeddings (line 1113) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 1117) | def prepare_inputs_for_generation( method load_weights (line 1198) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): method make_buffers_cudagraph (line 1264) | def make_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs): method fill_buffers_cudagraph (line 1280) | def fill_buffers_cudagraph(self, graph_meta: CudaGraphMeta, *args, **k... method _get_model_metas (line 1304) | def _get_model_metas(self, context: StepContext): method _update_model_meta_decoding (line 1312) | def _update_model_meta_decoding(self, context: StepContext): method _get_multimodal_pos_ids (line 1329) | def _get_multimodal_pos_ids(self, grid_thw: list, device: torch.device): method _update_model_meta_prefilling (line 1340) | def _update_model_meta_prefilling(self, context: StepContext): method update_model_metas (line 1429) | def update_model_metas(self, past_key_values: List[List[torch.Tensor]]... method get_input_processor (line 1437) | def get_input_processor(self) -> BaseModelInputProcessor: FILE: lmdeploy/pytorch/models/qwen3_5_moe.py class Qwen3_5MoeTopKRouter (line 24) | class Qwen3_5MoeTopKRouter(nn.Module): method __init__ (line 26) | def __init__(self, config, dtype: torch.dtype | None = None, device: t... method forward (line 33) | def forward(self, hidden_states): class Qwen3_5MoeSparseMoeBlock (line 44) | class Qwen3_5MoeSparseMoeBlock(nn.Module): method __init__ (line 47) | def __init__(self, method forward (line 97) | def forward(self, hidden_states: torch.Tensor, all_routed_experts: tor... class Qwen3_5MoeDecoderLayer (line 121) | class Qwen3_5MoeDecoderLayer(Qwen3_5DecoderLayer): method __init__ (line 124) | def __init__( class Qwen3_5MoeTextModel (line 172) | class Qwen3_5MoeTextModel(Qwen3_5TextModel): method __init__ (line 174) | def __init__(self, class Qwen3_5MoeModel (line 208) | class Qwen3_5MoeModel(Qwen3_5Model): method __init__ (line 210) | def __init__(self, class Qwen3_5MoeForConditionalGeneration (line 227) | class Qwen3_5MoeForConditionalGeneration(Qwen3_5ForConditionalGeneration): method __init__ (line 242) | def __init__(self, method _load_weight_experts (line 267) | def _load_weight_experts(self, name: str, loaded_weight: torch.Tensor,... method _load_weight_fused_experts (line 284) | def _load_weight_fused_experts(self, name: str, loaded_weight: torch.T... method load_weights (line 307) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/qwen3_moe.py class Qwen3MoeAttention (line 22) | class Qwen3MoeAttention(nn.Module): method __init__ (line 25) | def __init__(self, method forward (line 94) | def forward( class Qwen3MoeMLP (line 141) | class Qwen3MoeMLP(nn.Module): method __init__ (line 144) | def __init__(self, method forward (line 184) | def forward(self, x): class Qwen3MoeSparseMoeBlock (line 191) | class Qwen3MoeSparseMoeBlock(nn.Module): method __init__ (line 194) | def __init__(self, method forward (line 244) | def forward( class Qwen3MoeDecoderLayer (line 268) | class Qwen3MoeDecoderLayer(nn.Module): method __init__ (line 271) | def __init__( method forward (line 316) | def forward( class Qwen3MoeModel (line 348) | class Qwen3MoeModel(nn.Module): method __init__ (line 351) | def __init__(self, method forward (line 395) | def forward( method get_input_embeddings (line 435) | def get_input_embeddings(self): class Qwen3MoeForCausalLM (line 440) | class Qwen3MoeForCausalLM(nn.Module, DeployModelMixinV1, CudaGraphMixin): method __init__ (line 455) | def __init__( method forward (line 481) | def forward( method get_input_embeddings (line 514) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 518) | def prepare_inputs_for_generation( method _load_weight_experts (line 547) | def _load_weight_experts(self, name: str, loaded_weight: torch.Tensor,... method _load_weight_fused_experts (line 565) | def _load_weight_fused_experts(self, name: str, loaded_weight: torch.T... method load_weights (line 590) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/qwen3_next.py class Qwen3NextGatedDeltaNet (line 25) | class Qwen3NextGatedDeltaNet(nn.Module): method __init__ (line 28) | def __init__(self, method get_A_log_exp (line 96) | def get_A_log_exp(self): method make_params (line 102) | def make_params(self, num_v_heads: int, device: torch.device | None): method weight_loader_a_dt (line 113) | def weight_loader_a_dt(self, param: torch.nn.Parameter, loaded_weight:... method fix_query_key_value_ordering (line 119) | def fix_query_key_value_ordering(self, mixed_qkvz: torch.Tensor, mixed... method _load_state (line 145) | def _load_state(self, past_key_value: Tuple[torch.Tensor, torch.Tensor... method forward (line 149) | def forward( class Qwen3NextAttention (line 209) | class Qwen3NextAttention(nn.Module): method __init__ (line 212) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 268) | def forward( class Qwen3NextMLP (line 318) | class Qwen3NextMLP(nn.Module): method __init__ (line 321) | def __init__(self, method forward (line 356) | def forward(self, x): class Qwen3NextSparseMoeBlock (line 363) | class Qwen3NextSparseMoeBlock(nn.Module): method __init__ (line 366) | def __init__(self, method forward (line 427) | def forward(self, hidden_states: torch.Tensor): class Qwen3NextDecoderLayer (line 450) | class Qwen3NextDecoderLayer(nn.Module): method __init__ (line 453) | def __init__(self, method forward (line 486) | def forward( class Qwen3NextModel (line 525) | class Qwen3NextModel(nn.Module): method __init__ (line 528) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 554) | def forward( method get_input_embeddings (line 597) | def get_input_embeddings(self): class Qwen3NextForCausalLM (line 602) | class Qwen3NextForCausalLM(nn.Module, DeployModelMixinV1, CudaGraphMixin): method __init__ (line 617) | def __init__(self, method forward (line 630) | def forward( method get_input_embeddings (line 651) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 655) | def prepare_inputs_for_generation( method make_buffers_cudagraph (line 696) | def make_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs): method fill_buffers_cudagraph (line 707) | def fill_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs): method _load_weight_experts (line 719) | def _load_weight_experts(self, name: str, loaded_weight: torch.Tensor,... method load_weights (line 734) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/qwen3_vl.py class Qwen3VLTextRotaryEmbedding (line 29) | class Qwen3VLTextRotaryEmbedding(nn.Module): method __init__ (line 32) | def __init__(self, config: PretrainedConfig, device=None): method _pack_for_trans5 (line 53) | def _pack_for_trans5(self, config): method apply_interleaved_mrope (line 61) | def apply_interleaved_mrope(self, freqs, mrope_section): method forward (line 81) | def forward(self, x, position_ids): class Qwen3VLTextModel (line 100) | class Qwen3VLTextModel(Qwen3model): method __init__ (line 106) | def __init__(self, method forward (line 117) | def forward( method _deepstack_process (line 174) | def _deepstack_process(self, hidden_states: torch.Tensor, visual_pos_m... class Qwen3VLVisionPatchEmbed (line 184) | class Qwen3VLVisionPatchEmbed(nn.Module): method __init__ (line 186) | def __init__(self, config, dtype: torch.dtype = None, device: torch.de... method forward (line 202) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class Qwen3VLVisionMLP (line 210) | class Qwen3VLVisionMLP(nn.Module): method __init__ (line 213) | def __init__(self, method forward (line 248) | def forward(self, x): class Qwen3VLVisionBlock (line 253) | class Qwen3VLVisionBlock(nn.Module): method __init__ (line 256) | def __init__( method forward (line 273) | def forward(self, class Qwen3VLVisionPatchMerger (line 286) | class Qwen3VLVisionPatchMerger(nn.Module): method __init__ (line 288) | def __init__(self, method forward (line 321) | def forward(self, x: torch.Tensor) -> torch.Tensor: class Qwen3VLVisionModel (line 328) | class Qwen3VLVisionModel(nn.Module): method __init__ (line 331) | def __init__(self, method rot_pos_ids (line 374) | def rot_pos_ids(h: int, w: int, spatial_merge_size: int) -> torch.Tensor: method rot_pos_emb (line 400) | def rot_pos_emb(self, grid_thw: torch.Tensor) -> torch.Tensor: method fast_pos_embed_interpolate (line 416) | def fast_pos_embed_interpolate(self, grid_thw: List[List[int]]) -> tor... method forward (line 471) | def forward(self, hidden_states: torch.Tensor, cu_seqlens: torch.Tenso... class Qwen3VLForConditionalGeneration (line 491) | class Qwen3VLForConditionalGeneration(nn.Module, DeployModelMixinV1, Cud... method __init__ (line 506) | def __init__( method forward (line 542) | def forward( method get_input_embeddings (line 600) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 604) | def prepare_inputs_for_generation( method rename_weight (line 673) | def rename_weight(cls, name: str) -> str: method load_weights (line 683) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): method make_buffers_cudagraph (line 721) | def make_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs): method fill_buffers_cudagraph (line 732) | def fill_buffers_cudagraph(self, graph_meta: CudaGraphMeta, **kwargs): method _get_model_metas (line 753) | def _get_model_metas(self, context: StepContext): method _update_model_meta_decoding (line 761) | def _update_model_meta_decoding(self, context: StepContext): method _get_multimodal_pos_ids (line 774) | def _get_multimodal_pos_ids(self, grid_thw: list, device: torch.device): method _update_model_meta_prefilling (line 785) | def _update_model_meta_prefilling(self, context: StepContext): method update_model_metas (line 874) | def update_model_metas(self, method get_input_processor (line 884) | def get_input_processor(self) -> BaseModelInputProcessor: class Qwen3VLInputProcessor (line 889) | class Qwen3VLInputProcessor(BaseModelInputProcessor): method __init__ (line 892) | def __init__(self, config: PretrainedConfig) -> None: method _make_image_mm_data (line 895) | def _make_image_mm_data(self, input_mm: Dict[str, Any]) -> MultiModalD... method _make_video_mm_data (line 913) | def _make_video_mm_data(self, input_mm: Dict[str, Any]) -> MultiModalD... method preprocess_input (line 934) | def preprocess_input(self, FILE: lmdeploy/pytorch/models/qwen3_vl_moe.py class Qwen3VLMoeTextModel (line 18) | class Qwen3VLMoeTextModel(Qwen3MoeModel): method __init__ (line 24) | def __init__(self, method forward (line 35) | def forward( method _deepstack_process (line 92) | def _deepstack_process(self, hidden_states: torch.Tensor, visual_pos_m... class Qwen3VLMoeForConditionalGeneration (line 102) | class Qwen3VLMoeForConditionalGeneration(Qwen3VLForConditionalGeneration): method __init__ (line 117) | def __init__( method _load_weight_experts (line 132) | def _load_weight_experts(self, name: str, loaded_weight: torch.Tensor,... method _load_weight_fused_experts (line 148) | def _load_weight_fused_experts(self, name: str, loaded_weight: torch.T... method load_weights (line 172) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/sdar.py class SDARAttention (line 18) | class SDARAttention(nn.Module): method __init__ (line 21) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 68) | def forward( class SDARMLP (line 113) | class SDARMLP(nn.Module): method __init__ (line 116) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 142) | def forward(self, x): class SDARDecoderLayer (line 149) | class SDARDecoderLayer(nn.Module): method __init__ (line 152) | def __init__(self, method forward (line 181) | def forward( class SDARModel (line 212) | class SDARModel(nn.Module): method __init__ (line 215) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 238) | def forward( method get_input_embeddings (line 276) | def get_input_embeddings(self): class SDARForCausalLM (line 281) | class SDARForCausalLM(nn.Module, CudaGraphMixin): method __init__ (line 296) | def __init__(self, method forward (line 314) | def forward( method get_logits (line 333) | def get_logits(self, hidden_states: torch.Tensor): method update_weights (line 337) | def update_weights(self): method get_input_embeddings (line 342) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 346) | def prepare_inputs_for_generation( method load_weights (line 375) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/sdar_moe.py class SDARMoeAttention (line 19) | class SDARMoeAttention(nn.Module): method __init__ (line 22) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 69) | def forward( class SDARMoeMLP (line 114) | class SDARMoeMLP(nn.Module): method __init__ (line 117) | def __init__(self, method forward (line 147) | def forward(self, x): class SDARMoeSparseMoeBlock (line 154) | class SDARMoeSparseMoeBlock(nn.Module): method __init__ (line 157) | def __init__(self, method forward (line 198) | def forward(self, hidden_states: torch.Tensor): class SDARMoeDecoderLayer (line 214) | class SDARMoeDecoderLayer(nn.Module): method __init__ (line 217) | def __init__(self, method forward (line 250) | def forward( class SDARMoeModel (line 281) | class SDARMoeModel(nn.Module): method __init__ (line 284) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 307) | def forward( method get_input_embeddings (line 345) | def get_input_embeddings(self): class SDARMoeForCausalLM (line 350) | class SDARMoeForCausalLM(nn.Module, CudaGraphMixin): method __init__ (line 365) | def __init__(self, method forward (line 383) | def forward( method get_logits (line 402) | def get_logits(self, hidden_states: torch.Tensor): method update_weights (line 406) | def update_weights(self): method get_input_embeddings (line 411) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 415) | def prepare_inputs_for_generation( method _load_weight_experts (line 444) | def _load_weight_experts(self, name: str, loaded_weight: torch.Tensor,... method load_weights (line 459) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/siglip.py class SiglipVisionEmbeddings (line 15) | class SiglipVisionEmbeddings(nn.Module): method __init__ (line 17) | def __init__(self, method interpolate_pos_encoding (line 42) | def interpolate_pos_encoding(self, embeddings: torch.Tensor, height: i... method forward (line 81) | def forward(self, pixel_values: torch.FloatTensor, interpolate_pos_enc... class SiglipAttention (line 94) | class SiglipAttention(nn.Module): method __init__ (line 96) | def __init__(self, method forward (line 133) | def forward( class SiglipMLP (line 152) | class SiglipMLP(nn.Module): method __init__ (line 154) | def __init__(self, method forward (line 181) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class SiglipEncoderLayer (line 189) | class SiglipEncoderLayer(nn.Module): method __init__ (line 191) | def __init__(self, method forward (line 206) | def forward( class SiglipEncoder (line 223) | class SiglipEncoder(nn.Module): method __init__ (line 225) | def __init__(self, method forward (line 241) | def forward( class SiglipMultiheadAttentionPoolingHead (line 253) | class SiglipMultiheadAttentionPoolingHead(nn.Module): method __init__ (line 256) | def __init__( method forward (line 271) | def forward(self, hidden_state: torch.Tensor) -> torch.Tensor: class SiglipVisionTransformer (line 284) | class SiglipVisionTransformer(nn.Module): method __init__ (line 286) | def __init__( method forward (line 314) | def forward( class SiglipVisionModel (line 331) | class SiglipVisionModel(nn.Module): method __init__ (line 335) | def __init__( method get_input_embeddings (line 347) | def get_input_embeddings(self) -> nn.Module: method forward (line 350) | def forward( method load_weights (line 360) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]) ->... FILE: lmdeploy/pytorch/models/starcoder2.py class Starcoder2Attention (line 17) | class Starcoder2Attention(nn.Module): method __init__ (line 20) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 62) | def forward( class Starcoder2MLP (line 105) | class Starcoder2MLP(nn.Module): method __init__ (line 108) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 138) | def forward(self, x): class Starcoder2DecoderLayer (line 145) | class Starcoder2DecoderLayer(nn.Module): method __init__ (line 148) | def __init__(self, method forward (line 171) | def forward( class Starcoder2Model (line 201) | class Starcoder2Model(nn.Module): method __init__ (line 204) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 227) | def forward( method get_input_embeddings (line 265) | def get_input_embeddings(self): class Starcoder2ForCausalLM (line 270) | class Starcoder2ForCausalLM(nn.Module, CudaGraphMixin): method __init__ (line 281) | def __init__(self, method forward (line 298) | def forward( method get_logits (line 317) | def get_logits(self, hidden_states: torch.Tensor): method update_weights (line 321) | def update_weights(self): method get_input_embeddings (line 325) | def get_input_embeddings(self): method prepare_inputs_for_generation (line 329) | def prepare_inputs_for_generation( method load_weights (line 358) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): FILE: lmdeploy/pytorch/models/utils/cudagraph.py function _get_meta_flashattn (line 14) | def _get_meta_flashattn( function next_power_of_2 (line 54) | def next_power_of_2(n: int): class CudaGraphMeta (line 68) | class CudaGraphMeta: class CudaGraphMixin (line 85) | class CudaGraphMixin: method support_cuda_graph (line 88) | def support_cuda_graph( method make_output_buffers (line 100) | def make_output_buffers(self, output): method update_meta_flashattn (line 109) | def update_meta_flashattn(self, graph_meta: CudaGraphMeta, block_size:... method make_buffers_cudagraph (line 141) | def make_buffers_cudagraph(self, graph_meta: CudaGraphMeta, *args, pas... method fill_buffers_cudagraph (line 196) | def fill_buffers_cudagraph(self, graph_meta: CudaGraphMeta, input_ids:... method update_context_cudagraph (line 282) | def update_context_cudagraph(self, graph_meta: CudaGraphMeta, context:... method get_outputs_cudagraph (line 296) | def get_outputs_cudagraph(self, output_buffers: Dict[str, torch.Tensor... FILE: lmdeploy/pytorch/models/utils/micro_batch.py function enable_micro_batch (line 7) | def enable_micro_batch(param_name, index=-1): function split_batch (line 39) | def split_batch(func, param_name, index=-1, num_splits=2): FILE: lmdeploy/pytorch/models/utils/model.py class BaseModelMetaProcessor (line 15) | class BaseModelMetaProcessor: method update_inputs (line 18) | def update_inputs(self, inputs: ModelInputs, device: torch.device) -> ... method update_delta (line 22) | def update_delta(self, inputs: ModelInputs, delta: ModelInputsDelta) -... method merge (line 26) | def merge(self, inputs: ModelInputs, other: ModelInputs) -> ModelInputs: class DeployModelMixin (line 31) | class DeployModelMixin: method forward (line 33) | def forward(self, *args, **kwargs): method prepare_inputs_for_generation (line 37) | def prepare_inputs_for_generation( method load_weights (line 46) | def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): method get_logits (line 50) | def get_logits(self, hidden_states: torch.Tensor): method rename_weight (line 55) | def rename_weight(cls, name: str) -> str: method update_weights (line 59) | def update_weights(self): method update_model_metas (line 63) | def update_model_metas(self, method get_input_processor (line 70) | def get_input_processor(self) -> BaseModelInputProcessor: method get_modelmeta_processor (line 74) | def get_modelmeta_processor(self) -> BaseModelMetaProcessor: method update_quant_config (line 79) | def update_quant_config(cls, quant_config: QuantizationConfig): class DeployModelMixinV1 (line 112) | class DeployModelMixinV1(DeployModelMixin): method get_logits (line 114) | def get_logits(self, hidden_states: torch.Tensor): method get_input_embeddings (line 122) | def get_input_embeddings(self): method update_weights (line 126) | def update_weights(self): method build_lm_head (line 131) | def build_lm_head(self, function vlm_model (line 152) | def vlm_model(vlm_cls): function build_embedding (line 170) | def build_embedding(vocab_size: int, FILE: lmdeploy/pytorch/models/whisper.py class WhisperAttention (line 13) | class WhisperAttention(nn.Module): method __init__ (line 16) | def __init__( method forward (line 58) | def forward(self, hidden_states: torch.Tensor, attention_mask: torch.T... class WhisperEncoderLayer (line 79) | class WhisperEncoderLayer(nn.Module): method __init__ (line 81) | def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None... method forward (line 115) | def forward( FILE: lmdeploy/pytorch/multimodal/data_type.py class MultiModalData (line 13) | class MultiModalData: method __post_init__ (line 21) | def __post_init__(self): method to_device (line 25) | def to_device(self, device: str, non_blocking: bool = False): FILE: lmdeploy/pytorch/nn/activation.py class SiluAndMul (line 7) | class SiluAndMul(nn.Module): method __init__ (line 10) | def __init__(self, inplace: bool = True): method forward (line 16) | def forward(self, x: Tensor): class GeluAndMul (line 21) | class GeluAndMul(nn.Module): method __init__ (line 24) | def __init__(self, approximate: str = 'none'): method forward (line 30) | def forward(self, x: Tensor): FILE: lmdeploy/pytorch/nn/attention.py function _update_num_heads (line 12) | def _update_num_heads(num_heads: int, num_kv_heads: int): class Attention (line 20) | class Attention(nn.Module): method __init__ (line 23) | def __init__( method _lazy_init (line 72) | def _lazy_init(self, device): method forward (line 87) | def forward( method update_meta_flashmla (line 123) | def update_meta_flashmla(attn_metadata: AttentionMetadata, num_attenti... class FlashAttention (line 127) | class FlashAttention(nn.Module): method __init__ (line 130) | def __init__( method forward (line 165) | def forward(self, FILE: lmdeploy/pytorch/nn/embedding.py function pad_vocab_size (line 12) | def pad_vocab_size(vocab_size: int, pad_to: int = DEFAULT_VOCAB_PADDING_... class ParallelEmbedding (line 17) | class ParallelEmbedding(nn.Module): method __init__ (line 19) | def __init__( method create_weight (line 72) | def create_weight(vocab_size: int, hidden_size: int, dtype: torch.dtyp... method _weight_loader_tp_rowwise (line 82) | def _weight_loader_tp_rowwise(self, param: torch.nn.Parameter, loaded_... method weight_loader (line 93) | def weight_loader(self, param: torch.nn.Parameter, loaded_weight: torc... method forward (line 105) | def forward(self, x: torch.Tensor): FILE: lmdeploy/pytorch/nn/eplb.py class EPLBDispatchInfo (line 5) | class EPLBDispatchInfo: method __init__ (line 7) | def __init__(self, info) -> None: class EPLBManager (line 11) | class EPLBManager: method init_global_eplb_metadata (line 15) | def init_global_eplb_metadata(cls, ep_size: int, num_routed_experts: i... method num_physical_experts (line 24) | def num_physical_experts(cls) -> int: method topk_ids_logical_to_physical (line 28) | def topk_ids_logical_to_physical(cls, topk_ids: torch.Tensor, eplb_dis... method get_dispatch_info (line 32) | def get_dispatch_info(cls, ep_rank, layer_idx) -> EPLBDispatchInfo: FILE: lmdeploy/pytorch/nn/gated_delta.py function build_rmsnorm_gated (line 17) | def build_rmsnorm_gated(hidden_size: int, eps=1e-6, **kwargs): class GatedDeltaMeta (line 31) | class GatedDeltaMeta: method __init__ (line 33) | def __init__(self, num_tokens: int, conv_kernel_size: int, state_ids: ... class CausalConv1dFunc (line 56) | class CausalConv1dFunc: method __init__ (line 58) | def __init__(self, activation: str = 'silu'): method conv1d_func (line 66) | def conv1d_func(self, x: torch.Tensor, weight: torch.Tensor, bias: tor... method conv1d_update (line 106) | def conv1d_update( method __call__ (line 126) | def __call__( class GatedDelta (line 140) | class GatedDelta: method __init__ (line 142) | def __init__(self, use_qk_l2norm_in_kernel: bool = True): method __call__ (line 148) | def __call__( class CausalConv1d (line 194) | class CausalConv1d(nn.Module): method __init__ (line 197) | def __init__( method make_weight (line 232) | def make_weight( method register_weight (line 252) | def register_weight(self, weight: torch.Tensor, w_bias: torch.Tensor |... method weight_loader (line 261) | def weight_loader(self, param: torch.nn.Parameter, loaded_weight: torc... method forward (line 270) | def forward(self, x: torch.Tensor, conv_state: torch.Tensor, gated_del... function load_state (line 276) | def load_state(past_key_value: Tuple[torch.Tensor, torch.Tensor], gated_... FILE: lmdeploy/pytorch/nn/linear/__init__.py function build_linear (line 18) | def build_linear( function build_colwise_linear (line 103) | def build_colwise_linear( function build_rowwise_linear (line 147) | def build_rowwise_linear( function build_merged_colwise_linear (line 184) | def build_merged_colwise_linear( function build_qkv_proj (line 258) | def build_qkv_proj(in_features: int, function build_o_proj (line 334) | def build_o_proj( function build_gateup_linear (line 366) | def build_gateup_linear( function build_down_linear (line 400) | def build_down_linear( FILE: lmdeploy/pytorch/nn/linear/awq.py class AwqLinear (line 14) | class AwqLinear(LinearBase): method __init__ (line 17) | def __init__( method setup_loaders (line 55) | def setup_loaders(self): method register_all_parameters (line 67) | def register_all_parameters(self, method _get_io_features (line 84) | def _get_io_features(self, in_features: int, out_features: int, w_bit:... method _weight_loader_tp_colwise (line 94) | def _weight_loader_tp_colwise(self, param: torch.nn.Parameter, loaded_... method _weight_loader_tp_rowwise (line 113) | def _weight_loader_tp_rowwise(self, param: torch.nn.Parameter, loaded_... method weight_loader (line 132) | def weight_loader(self, param: torch.nn.Parameter, loaded_weight: torc... method create_weights (line 143) | def create_weights(self, in_features: int, out_features: int, w_bit: i... method update_weights (line 161) | def update_weights(self): method _forward_default (line 166) | def _forward_default(self, x, all_reduce, tp_sizes): class MergedAwqLinear (line 171) | class MergedAwqLinear(AwqLinear): method __init__ (line 174) | def __init__(self, method setup_loaders (line 208) | def setup_loaders(self): method _get_io_features (line 224) | def _get_io_features(self, in_features: int, out_features: int, w_bit:... method _update_all_out_features (line 228) | def _update_all_out_features(self, all_out_features: List[int], w_bit:... method weight_loader (line 238) | def weight_loader(self, param: torch.nn.Parameter, loaded_weight: torc... method weight_spliter_wz (line 262) | def weight_spliter_wz(self, loaded_weight: torch.Tensor): method weight_spliter_s (line 266) | def weight_spliter_s(self, loaded_weight: torch.Tensor): class QKVAwqLinear (line 271) | class QKVAwqLinear(MergedAwqLinear, QKVMixin): method __init__ (line 274) | def __init__(self, method _update_all_out_features (line 312) | def _update_all_out_features(self, all_out_features: List[int], w_bit:... method weight_loader (line 316) | def weight_loader(self, param: torch.nn.Parameter, loaded_weight: torc... method weight_spliter_wz (line 348) | def weight_spliter_wz(self, loaded_weight: torch.Tensor, layout: str =... method weight_spliter_s (line 365) | def weight_spliter_s(self, loaded_weight: torch.Tensor, layout: str = ... method weight_spliter_lora_b (line 382) | def weight_spliter_lora_b(self, loaded_weight: torch.Tensor): FILE: lmdeploy/pytorch/nn/linear/base.py class LinearForwardDPTP (line 16) | class LinearForwardDPTP: method __init__ (line 18) | def __init__(self, gemm_func: Callable, max_tokens_per_round: int = 81... method all_gather (line 33) | def all_gather(self, hidden_states: torch.Tensor, tp_sizes: List[int]): method reduce_scatter (line 38) | def reduce_scatter(self, hidden_states: torch.Tensor, out_states: torc... method _gemm_and_reduce_scatter (line 48) | def _gemm_and_reduce_scatter(self, hidden_states: torch.Tensor, output... method forward (line 55) | def forward(self, hidden_states: torch.Tensor): class LinearBase (line 106) | class LinearBase(nn.Module): method __init__ (line 109) | def __init__( method init_tp_args (line 135) | def init_tp_args(self, is_tp: bool, all_reduce: bool, colwise: bool, l... method get_tp_world_rank (line 171) | def get_tp_world_rank(self): method update_weights (line 176) | def update_weights(self): method _forward_default (line 180) | def _forward_default(self, x, all_reduce: bool, tp_sizes: List[int]): method _forward_lora (line 184) | def _forward_lora(self, x, tp_sizes: List[int] = None): method _forward_dp_tp (line 197) | def _forward_dp_tp(self, x): method forward (line 214) | def forward(self, x): FILE: lmdeploy/pytorch/nn/linear/blocked_fp8.py class BlockedF8Linear (line 16) | class BlockedF8Linear(LinearBase): method __init__ (line 19) | def __init__( method setup_loaders (line 59) | def setup_loaders(self): method register_all_parameters (line 66) | def register_all_parameters(self, method _get_io_features (line 80) | def _get_io_features(self, in_features: int, out_features: int, colwis... method _weight_loader_tp_colwise (line 89) | def _weight_loader_tp_colwise(self, param: torch.nn.Parameter, loaded_... method _weight_loader_tp_rowwise (line 95) | def _weight_loader_tp_rowwise(self, param: torch.nn.Parameter, loaded_... method weight_loader (line 108) | def weight_loader(self, param: torch.nn.Parameter, loaded_weight: torc... method weight_loader_with_quant (line 119) | def weight_loader_with_quant(self, param: torch.nn.Parameter, loaded_w... method create_weights (line 132) | def create_weights(self, in_features: int, out_features: int, bias: bo... method update_weights (line 144) | def update_weights(self): method _forward_default (line 149) | def _forward_default(self, x, all_reduce, tp_sizes): class MergedBlockedF8Linear (line 165) | class MergedBlockedF8Linear(BlockedF8Linear): method __init__ (line 168) | def __init__(self, method setup_loaders (line 208) | def setup_loaders(self): method _get_io_features (line 221) | def _get_io_features(self, in_features: int, out_features: int, colwis... method _update_all_out_features (line 225) | def _update_all_out_features(self, all_out_features: List[int], replic... method weight_loader (line 236) | def weight_loader(self, param: torch.nn.Parameter, loaded_weight: torc... method weight_loader_with_quant (line 250) | def weight_loader_with_quant(self, param: torch.nn.Parameter, loaded_w... method weight_spliter (line 263) | def weight_spliter(self, loaded_weight: torch.Tensor): method weight_spliter_lora_b (line 269) | def weight_spliter_lora_b(self, loaded_weight: torch.Tensor): class QKVBlockedF8Linear (line 273) | class QKVBlockedF8Linear(MergedBlockedF8Linear, QKVMixin): method __init__ (line 276) | def __init__(self, method _update_all_out_features (line 316) | def _update_all_out_features(self, all_out_features: List[int], replic... method weight_loader (line 320) | def weight_loader(self, param: torch.nn.Parameter, loaded_weight: torc... method weight_loader_with_quant (line 345) | def weight_loader_with_quant(self, param: torch.nn.Parameter, loaded_w... method weight_spliter (line 358) | def weight_spliter(self, loaded_weight: torch.Tensor, layout: str = 'd... FILE: lmdeploy/pytorch/nn/linear/default.py class BaseLinear (line 15) | class BaseLinear(LinearBase): method __init__ (line 18) | def __init__( method setup_loaders (line 50) | def setup_loaders(self): method register_all_parameters (line 56) | def register_all_parameters(self, weight: torch.Tensor, bias: Optional... method _get_io_features (line 65) | def _get_io_features(self, in_features: int, out_features: int, colwis... method _weight_loader_tp_colwise (line 74) | def _weight_loader_tp_colwise(self, param: torch.nn.Parameter, loaded_... method _weight_loader_tp_rowwise (line 80) | def _weight_loader_tp_rowwise(self, param: torch.nn.Parameter, loaded_... method weight_loader (line 93) | def weight_loader(self, param: torch.nn.Parameter, loaded_weight: torc... method create_weights (line 104) | def create_weights(self, in_features: int, out_features: int, bias: bo... method update_weights (line 113) | def update_weights(self): method _forward_default (line 118) | def _forward_default(self, x, all_reduce, tp_sizes): class MergedBaseLinear (line 133) | class MergedBaseLinear(BaseLinear): method __init__ (line 136) | def __init__(self, method setup_loaders (line 166) | def setup_loaders(self): method _get_io_features (line 174) | def _get_io_features(self, in_features: int, out_features: int, colwis... method _update_all_out_features (line 178) | def _update_all_out_features(self, all_out_features: List[int]): method weight_loader (line 187) | def weight_loader(self, param: torch.nn.Parameter, loaded_weight: torc... method weight_spliter (line 195) | def weight_spliter(self, loaded_weight: torch.Tensor): method weight_spliter_lora_b (line 199) | def weight_spliter_lora_b(self, loaded_weight: torch.Tensor): class QKVBaseLinear (line 203) | class QKVBaseLinear(MergedBaseLinear, QKVMixin): method __init__ (line 206) | def __init__(self, method _update_all_out_features (line 239) | def _update_all_out_features(self, all_out_features: List[int]): method weight_loader (line 243) | def weight_loader(self, param: torch.nn.Parameter, loaded_weight: torc... method weight_spliter (line 260) | def weight_spliter(self, loaded_weight: torch.Tensor, layout: str = 'd... method weight_spliter_lora_b (line 277) | def weight_spliter_lora_b(self, loaded_weight: torch.Tensor): FILE: lmdeploy/pytorch/nn/linear/lora.py class LoRA (line 12) | class LoRA(nn.Module): method __init__ (line 15) | def __init__(self, method forward (line 49) | def forward(self, x, base_output=None): method weight_loader_A (line 60) | def weight_loader_A(self, param: nn.Parameter, loaded_weight: torch.Te... method weight_loader_B (line 74) | def weight_loader_B(self, param: nn.Parameter, loaded_weight: torch.Te... FILE: lmdeploy/pytorch/nn/linear/utils.py function check_qkv_split_layout (line 14) | def check_qkv_split_layout(layout: str): function update_tp_args (line 20) | def update_tp_args(is_tp: bool, all_reduce: bool, colwise: bool, layer_t... class QKVMixin (line 32) | class QKVMixin: method __init__ (line 35) | def __init__(self, method get_qkv_out_feautures (line 54) | def get_qkv_out_feautures(self): method _get_qkv_out_features (line 58) | def _get_qkv_out_features(self, method _update_num_heads (line 69) | def _update_num_heads(self, is_tp: bool, tp: int, tp_rank: int, num_q_... method split_qkv (line 79) | def split_qkv(self, x: torch.Tensor): FILE: lmdeploy/pytorch/nn/linear/w8a8.py class W8A8Linear (line 14) | class W8A8Linear(LinearBase): method __init__ (line 17) | def __init__(self, method setup_loaders (line 49) | def setup_loaders(self): method register_all_parameters (line 56) | def register_all_parameters(self, weight: torch.Tensor, scale: torch.T... method _get_io_features (line 67) | def _get_io_features(self, in_features: int, out_features: int, colwis... method _weight_loader_tp_colwise (line 76) | def _weight_loader_tp_colwise(self, param: torch.nn.Parameter, loaded_... method _weight_loader_tp_rowwise (line 82) | def _weight_loader_tp_rowwise(self, param: torch.nn.Parameter, loaded_... method weight_loader (line 98) | def weight_loader(self, param: torch.nn.Parameter, loaded_weight: torc... method create_weights (line 109) | def create_weights(self, in_features: int, out_features: int, bias: bo... method update_weights (line 119) | def update_weights(self): method _forward_default (line 124) | def _forward_default(self, x, all_reduce, tp_sizes): class MergedW8A8Linear (line 129) | class MergedW8A8Linear(W8A8Linear): method __init__ (line 132) | def __init__(self, method setup_loaders (line 162) | def setup_loaders(self): method _get_io_features (line 172) | def _get_io_features(self, in_features: int, out_features: int, colwis... method _update_all_out_features (line 176) | def _update_all_out_features(self, all_out_features: List[int]): method weight_loader (line 185) | def weight_loader(self, param: torch.nn.Parameter, loaded_weight: torc... method weight_spliter (line 193) | def weight_spliter(self, loaded_weight: torch.Tensor): method weight_spliter_lora_b (line 197) | def weight_spliter_lora_b(self, loaded_weight: torch.Tensor): class QKVW8A8Linear (line 201) | class QKVW8A8Linear(MergedW8A8Linear, QKVMixin): method __init__ (line 204) | def __init__(self, method _update_all_out_features (line 239) | def _update_all_out_features(self, all_out_features: List[int]): method weight_loader (line 243) | def weight_loader(self, param: torch.nn.Parameter, loaded_weight: torc... method weight_spliter (line 260) | def weight_spliter(self, loaded_weight: torch.Tensor, layout: str = 'd... method weight_spliter_lora_b (line 277) | def weight_spliter_lora_b(self, loaded_weight: torch.Tensor): FILE: lmdeploy/pytorch/nn/moe/__init__.py function build_fused_moe (line 11) | def build_fused_moe( FILE: lmdeploy/pytorch/nn/moe/base.py class MoeType (line 16) | class MoeType(Enum): class SoftmaxTopK (line 23) | class SoftmaxTopK(nn.Module): method __init__ (line 26) | def __init__(self, top_k: int, dim: int = -1, n_groups: int = -1): method forward (line 32) | def forward(self, x: torch.Tensor): function update_dims (line 37) | def update_dims(hidden_dim: int, ffn_dim: int): function split_size (line 45) | def split_size(size: int, world_size: int, align: int): function moe_gather_inputs (line 54) | def moe_gather_inputs(hidden_states, topk_weights, topk_ids, group: Opti... function moe_reduce (line 76) | def moe_reduce(ret, rank: int, tp_mode: TPMode, group: Optional[dist.Pro... class MoEForwardDPTP (line 94) | class MoEForwardDPTP: method __init__ (line 96) | def __init__(self, gemm_func: Callable, max_tokens_per_round: int = 81... method all_gather (line 111) | def all_gather(self, hidden_states: torch.Tensor, topk_weights: torch.... method reduce_scatter (line 119) | def reduce_scatter(self, hidden_states: torch.Tensor, out_states: torc... method _gemm_and_reduce_scatter (line 129) | def _gemm_and_reduce_scatter(self, hidden_states: torch.Tensor, topk_w... method forward (line 137) | def forward(self, hidden_states: torch.Tensor, topk_weights: torch.Ten... function _renormalize (line 196) | def _renormalize(topk_weights: torch.Tensor, renormalize: bool): class DispatchInputs (line 205) | class DispatchInputs: method from_dict (line 213) | def from_dict(cls, input: Dict): method to_dict (line 224) | def to_dict(self) -> Dict: class FusedMoEBase (line 234) | class FusedMoEBase(nn.Module): method __init__ (line 237) | def __init__(self, tp: int, tp_mode: TPMode, do_renormalize: bool): method init_dist_args (line 243) | def init_dist_args(self, all_reduce: bool): method before_dispatch (line 274) | def before_dispatch(self, state: DispatchInputs): method dispatch (line 278) | def dispatch(self, state: Dict): method gemm (line 282) | def gemm(self, state: Dict): method combine (line 286) | def combine(self, state: Dict): method wait (line 290) | def wait(self, state: Dict): method forward_dptp (line 295) | def forward_dptp(self) -> MoEForwardDPTP: method forward_default (line 299) | def forward_default(self, hidden_states: torch.Tensor, topk_weights: t... method forward (line 312) | def forward(self, hidden_states: torch.Tensor, topk_weights: torch.Ten... method renormalize (line 319) | def renormalize(self, topk_weights): FILE: lmdeploy/pytorch/nn/moe/blocked_fp8.py class LinearWeightsBlockedF8 (line 16) | class LinearWeightsBlockedF8(LinearWeights): method __init__ (line 19) | def __init__(self, method update_weight (line 54) | def update_weight(self, weight: torch.Tensor, weight_scale_inv: torch.... method weight_loader_scale_ep (line 62) | def weight_loader_scale_ep(self, param: torch.nn.Parameter, loaded_wei... method _chunk_weight_tp (line 71) | def _chunk_weight_tp(self, weight: torch.Tensor, dim: int, world_size:... method weight_loader_tp_blocked_fp8 (line 76) | def weight_loader_tp_blocked_fp8(self, param: torch.nn.Parameter, load... method weight_loader_scale_tp (line 107) | def weight_loader_scale_tp(self, param: torch.nn.Parameter, loaded_wei... method weight_loader_with_quant (line 127) | def weight_loader_with_quant(self, param: torch.nn.Parameter, loaded_w... class FusedMoEBlockedF8 (line 142) | class FusedMoEBlockedF8(FusedMoEBase): method __init__ (line 145) | def __init__(self, method _update_args (line 226) | def _update_args(hidden_dim: int, ffn_dim: int, align: int): method update_weights (line 232) | def update_weights(self): method before_dispatch (line 240) | def before_dispatch(self, state: DispatchInputs): method dispatch (line 255) | def dispatch(self, state: Dict): method gemm (line 318) | def gemm(self, state: Dict): method combine (line 367) | def combine(self, state: Dict): method wait (line 408) | def wait(self, state): method fusedmoe_build (line 418) | def fusedmoe_build(self, low_latency_mode: bool = False): FILE: lmdeploy/pytorch/nn/moe/default.py class LinearWeights (line 14) | class LinearWeights(nn.Module): method __init__ (line 17) | def __init__(self, method setup_weight_loader (line 45) | def setup_weight_loader(self): method update_weight (line 59) | def update_weight(self, weight: torch.Tensor): method weight_loader_tp (line 66) | def weight_loader_tp(self, param: torch.nn.Parameter, loaded_weight: t... method weight_loader_ep (line 88) | def weight_loader_ep(self, param: torch.nn.Parameter, loaded_weight: t... class FusedMoE (line 108) | class FusedMoE(FusedMoEBase): method __init__ (line 111) | def __init__(self, method update_weights (line 183) | def update_weights(self): method before_dispatch (line 189) | def before_dispatch(self, state: DispatchInputs): method dispatch (line 202) | def dispatch(self, state: Dict): method gemm (line 268) | def gemm(self, state: Dict): method combine (line 314) | def combine(self, state: Dict): method wait (line 358) | def wait(self, state: Dict): method fusedmoe_build (line 369) | def fusedmoe_build(self, low_latency_mode: bool = False): FILE: lmdeploy/pytorch/nn/moe/route.py class NoauxTCRouter (line 9) | class NoauxTCRouter(torch.nn.Module): method __init__ (line 11) | def __init__( method forward (line 36) | def forward(self, router_logits: torch.Tensor, FILE: lmdeploy/pytorch/nn/moe/w8a8.py class LinearWeightsW8A8 (line 13) | class LinearWeightsW8A8(LinearWeights): method __init__ (line 16) | def __init__(self, method update_weight (line 42) | def update_weight(self, weight: torch.Tensor, scale: torch.Tensor): method weight_loader_scale_tp (line 50) | def weight_loader_scale_tp(self, param: torch.nn.Parameter, loaded_wei... class FusedMoEW8A8 (line 69) | class FusedMoEW8A8(FusedMoEBase): method __init__ (line 72) | def __init__(self, method update_weights (line 128) | def update_weights(self): method dispatch (line 136) | def dispatch(self, state: Dict): method gemm (line 154) | def gemm(self, state: Dict): method combine (line 164) | def combine(self, state: Dict): method wait (line 178) | def wait(self, state: Dict): FILE: lmdeploy/pytorch/nn/multinomial_sampling.py function multinomial_sampling (line 7) | def multinomial_sampling(scores: torch.Tensor, FILE: lmdeploy/pytorch/nn/norm.py class RMSNorm (line 14) | class RMSNorm(nn.Module): method __init__ (line 17) | def __init__( method weight_loader (line 57) | def weight_loader(self, param: nn.Parameter, loaded_weight: torch.Tens... method create_weight (line 64) | def create_weight(hidden_size: int, dtype: torch.dtype | None = None, ... method forward (line 73) | def forward(self, x: torch.Tensor, residual: torch.Tensor = None): class LayerNorm (line 78) | class LayerNorm(nn.Module): method __init__ (line 81) | def __init__(self, method create_weight (line 96) | def create_weight(hidden_size: int, method forward (line 113) | def forward(self, x: torch.Tensor, residual: torch.Tensor | None = None): FILE: lmdeploy/pytorch/nn/nsa.py class IndexerTopKFP8 (line 10) | class IndexerTopKFP8(nn.Module): method __init__ (line 12) | def __init__(self, topk: int, softmax_scale: float, block_size: int = ... method forward (line 18) | def forward( FILE: lmdeploy/pytorch/nn/rotary_embedding.py function get_rope_parameters (line 14) | def get_rope_parameters(config: PretrainedConfig): function _get_default_rope_parameters (line 23) | def _get_default_rope_parameters(config: PretrainedConfig): function _get_linear_scaling_rope_parameters (line 28) | def _get_linear_scaling_rope_parameters(config: PretrainedConfig): function _get_dynamic_ntk_parameters (line 35) | def _get_dynamic_ntk_parameters(config: PretrainedConfig): function _get_yarn_parameters (line 42) | def _get_yarn_parameters(config: PretrainedConfig): function _get_longrope_parameters (line 78) | def _get_longrope_parameters(config: PretrainedConfig): function _get_llama3_parameters (line 96) | def _get_llama3_parameters(config: PretrainedConfig): function _get_fope_parameters (line 108) | def _get_fope_parameters(config: PretrainedConfig): function build_rotary_params (line 125) | def build_rotary_params(config: PretrainedConfig): function build_rotary_embedding (line 153) | def build_rotary_embedding(dim: int, function get_rope_theta (line 190) | def get_rope_theta(config: PretrainedConfig, default: int = 10000) -> int: function build_rotary_embedding_from_config (line 200) | def build_rotary_embedding_from_config(config: PretrainedConfig, device:... class ApplyRotaryEmb (line 215) | class ApplyRotaryEmb(nn.Module): method __init__ (line 218) | def __init__(self): method forward (line 224) | def forward(self, query: Tensor, key: Tensor, cos: Tensor, sin: Tensor... class FopeRotaryEmbedding (line 250) | class FopeRotaryEmbedding(nn.Module): method __init__ (line 253) | def __init__(self, method update_num_kv_heads (line 287) | def update_num_kv_heads(num_key_value_heads: int): method weight_loader (line 298) | def weight_loader(self, param: nn.Parameter, loaded_weight: torch.Tens... method forward (line 312) | def forward(self, x: Tensor, position_ids: Tensor): FILE: lmdeploy/pytorch/nn/utils.py function div_up (line 5) | def div_up(a: int, b: int): function get_distribute_size (line 10) | def get_distribute_size(feature_size: int, world_size: int, rank: int, a... function chunk_aligned (line 23) | def chunk_aligned(weight: torch.Tensor, chunks: int, dim: int, align: int): FILE: lmdeploy/pytorch/paging/block_manager/__init__.py function build_block_manager (line 8) | def build_block_manager(cache_config: CacheConfig) -> BaseBlockManager: FILE: lmdeploy/pytorch/paging/block_manager/base_block_manager.py class LogicalMemory (line 10) | class LogicalMemory: method __init__ (line 13) | def __init__(self, num_blocks: int) -> None: method get_physical_blocks (line 20) | def get_physical_blocks(self, logical_address: np.ndarray): method num_blocks (line 26) | def num_blocks(self): class PhysicalAllocator (line 31) | class PhysicalAllocator: method __init__ (line 37) | def __init__(self, num_blocks: int, offset: int = 0): method allocate (line 44) | def allocate(self, num_blocks: int): method free (line 54) | def free(self, blocks: np.ndarray): method get_num_free_blocks (line 64) | def get_num_free_blocks(self): class LogicalAllocator (line 69) | class LogicalAllocator: method __init__ (line 72) | def __init__(self, num_cpu_blocks: int, num_gpu_blocks: int, num_gpu_r... method get_phy_allocator (line 85) | def get_phy_allocator(self, device: str): method allocate (line 94) | def allocate(self, num_blocks: int, device: str = 'gpu'): method free (line 113) | def free(self, blocks: np.ndarray): method get_num_free_blocks (line 139) | def get_num_free_blocks(self): method get_physical_blocks (line 143) | def get_physical_blocks(self, blocks: np.ndarray): method get_ref_count (line 147) | def get_ref_count(self, blocks: np.ndarray): method add_ref_count (line 151) | def add_ref_count(self, blocks: np.ndarray, value: np.ndarray): method get_access_time (line 155) | def get_access_time(self, blocks: np.ndarray): method update_access_time (line 159) | def update_access_time(self, blocks: np.ndarray): method cpu_mem_offset (line 164) | def cpu_mem_offset(self): method count_cpu_blocks (line 168) | def count_cpu_blocks(self, blocks: np.ndarray): method count_gpu_blocks (line 173) | def count_gpu_blocks(self, blocks: np.ndarray): method update_phy_map (line 178) | def update_phy_map(self, log_blocks: np.ndarray, phy_blocks: np.ndarray): method on_device (line 183) | def on_device(self, blocks: np.ndarray, device: str): class BaseBlockManager (line 202) | class BaseBlockManager: method __init__ (line 210) | def __init__(self, num_gpu_blocks: int, num_cpu_blocks: int, num_gpu_r... method num_required_blocks (line 219) | def num_required_blocks(cls, obj: SchedulerSequence, prealloc_size: in... method can_allocate (line 223) | def can_allocate(self, msg: SchedulerSequence, prealloc_size: int = 0): method allocate_msg (line 227) | def allocate_msg(self, msg: SchedulerSequence, prealloc_size: int = 0): method free (line 232) | def free(self, msg: SchedulerSequence): method try_swap_out (line 236) | def try_swap_out(self, msg: SchedulerSequence): method try_swap_in (line 240) | def try_swap_in(self, msg: SchedulerSequence): method get_block_table (line 244) | def get_block_table(self, msg: SchedulerSequence): method allocate (line 253) | def allocate(self, data: SchedulerSequence, prealloc_size: int = 0): method get_num_free_gpu_blocks (line 257) | def get_num_free_gpu_blocks(self) -> int: method get_num_free_cpu_blocks (line 261) | def get_num_free_cpu_blocks(self) -> int: method on_device (line 265) | def on_device(self, msg: SchedulerSequence, device: str): FILE: lmdeploy/pytorch/paging/block_manager/default_block_manager.py function _div_up (line 9) | def _div_up(x, n): class DefaultBlockManager (line 17) | class DefaultBlockManager(BaseBlockManager): method num_required_blocks (line 26) | def num_required_blocks(cls, obj: SchedulerSequence, prealloc_size: in... method can_allocate (line 33) | def can_allocate(self, msg: SchedulerSequence, prealloc_size: int = 0): method allocate_msg (line 39) | def allocate_msg(self, msg: SchedulerSequence, prealloc_size: int = 0): method free (line 48) | def free(self, msg: SchedulerSequence): method try_swap_out (line 53) | def try_swap_out(self, msg: SchedulerSequence): method try_swap_in (line 99) | def try_swap_in(self, msg: SchedulerSequence): FILE: lmdeploy/pytorch/paging/block_manager/window_block_manager.py function _num_blocks_to_drop (line 11) | def _num_blocks_to_drop(seq: SchedulerSequence, window_size: int): class WindowBlockManager (line 24) | class WindowBlockManager(DefaultBlockManager): method __init__ (line 32) | def __init__(self, num_gpu_blocks: int, num_cpu_blocks: int, window_si... method num_required_blocks (line 38) | def num_required_blocks(self, obj: SchedulerSequence, prealloc_size: i... method can_allocate (line 47) | def can_allocate(self, msg: SchedulerSequence, prealloc_size: int = 0): method allocate_msg (line 54) | def allocate_msg(self, msg: SchedulerSequence, prealloc_size: int = 0): FILE: lmdeploy/pytorch/paging/block_trie.py class PrefixCacheStats (line 15) | class PrefixCacheStats: method reset (line 20) | def reset(self): method hit_rate (line 24) | def hit_rate(self): class Node (line 28) | class Node: method __init__ (line 31) | def __init__(self, hash_key: int, block: int, tokens: np.ndarray, num_... method parent (line 40) | def parent(self): method parent (line 44) | def parent(self, val: 'Node'): method __lt__ (line 52) | def __lt__(self, other): method __le__ (line 55) | def __le__(self, other): class BlockTrie (line 59) | class BlockTrie: method __init__ (line 62) | def __init__(self, cache_config: CacheConfig, block_manager: BaseBlock... method hit_rate (line 74) | def hit_rate(self): method get_root (line 78) | def get_root(self, adapter_name: str): method match (line 84) | def match(self, seq: SchedulerSequence): method allocate (line 131) | def allocate(self, seq: SchedulerSequence): method evict (line 185) | def evict(self, max_num_blocks: int): FILE: lmdeploy/pytorch/paging/eviction_helper/__init__.py function build_eviction_helper (line 7) | def build_eviction_helper(scheduler, eviction_type: str): FILE: lmdeploy/pytorch/paging/eviction_helper/base_eviction_helper.py class BaseEvictionHelper (line 10) | class BaseEvictionHelper: method __init__ (line 13) | def __init__(self, scheduler: Scheduler): method need_swap_in (line 20) | def need_swap_in(self, seq: SchedulerSequence): method evict_for_seq (line 24) | def evict_for_seq(self, seq: SchedulerSequence, evictable_seqs: List[S... FILE: lmdeploy/pytorch/paging/eviction_helper/recompute_eviction_helper.py class RecomputeEvictionHelper (line 9) | class RecomputeEvictionHelper(BaseEvictionHelper): method __init__ (line 12) | def __init__(self, scheduler: Scheduler): method _evict_for_seq_default (line 20) | def _evict_for_seq_default(self, seq: SchedulerSequence, evictable_seq... method _evict_for_ssm (line 59) | def _evict_for_ssm(self, seq: SchedulerSequence, evictable_seqs: List[... FILE: lmdeploy/pytorch/paging/scheduler.py class SchedulerOutput (line 28) | class SchedulerOutput: class Scheduler (line 37) | class Scheduler: method __init__ (line 45) | def __init__( method create_status_list_property (line 70) | def create_status_list_property(status: MessageStatus): method create_num_status_method (line 80) | def create_num_status_method(status: MessageStatus): method create_has_status_method (line 89) | def create_has_status_method(status: MessageStatus): method add_session (line 118) | def add_session(self, session_id: int): method _schedule_migration (line 129) | def _schedule_migration(self): method _schedule_prefill (line 169) | def _schedule_prefill(self, prealloc_size: int = 0): method _schedule_decoding (line 227) | def _schedule_decoding(self, prealloc_size: int = 0): method schedule (line 280) | def schedule(self, is_prefill: bool, prealloc_size: int = 0): method schedule_running (line 291) | def schedule_running(self, running: SeqList, num_decode_tokens: int = ... method stop_session (line 327) | def stop_session(self, session_id: int): method end_session (line 338) | def end_session(self, session_id: int): method has_unfinished (line 354) | def has_unfinished(self): method get_block_tables (line 358) | def get_block_tables(self, seqs: SeqList): method evict_seqs (line 362) | def evict_seqs(self, running: SeqList): method activate_seqs (line 367) | def activate_seqs(self, running: SeqList, filter_status: MessageStatus... method deactivate_seqs (line 373) | def deactivate_seqs(self, running: SeqList, filter_status: MessageStat... method seqs_activation (line 379) | def seqs_activation(self, running: SeqList): method activate_migration_seqs (line 387) | def activate_migration_seqs(self, running: SeqList): method deactivate_migration_seqs (line 391) | def deactivate_migration_seqs(self, running: SeqList): method seqs_migration_activation (line 396) | def seqs_migration_activation(self, running: SeqList): method collect_migration_done (line 404) | def collect_migration_done(self): method schedule_metrics (line 409) | def schedule_metrics(self): FILE: lmdeploy/pytorch/paging/seq_states/states.py function _free_seq (line 10) | def _free_seq(seq: SchedulerSequence, scheduler: 'Scheduler'): class StateBase (line 19) | class StateBase: method __init_subclass__ (line 23) | def __init_subclass__(cls, **kargs) -> None: method build (line 29) | def build(cls, scheduler: 'Scheduler', seq: 'SchedulerSequence', statu... method __init__ (line 35) | def __init__(self, seq: SchedulerSequence, scheduler: 'Scheduler'): method to_state (line 39) | def to_state(self, new_state): method evict (line 44) | def evict(self): method activate (line 48) | def activate(self): method deactivate (line 52) | def deactivate(self): method finish (line 56) | def finish(self): method stop (line 60) | def stop(self): method free (line 64) | def free(self): class WaitingState (line 69) | class WaitingState(StateBase): method activate (line 73) | def activate(self): method evict (line 81) | def evict(self): class ReadyState (line 85) | class ReadyState(StateBase): method activate (line 89) | def activate(self): method evict (line 93) | def evict(self): class StoppedState (line 97) | class StoppedState(StateBase): method activate (line 101) | def activate(self): method evict (line 106) | def evict(self): class RunningState (line 110) | class RunningState(StateBase): method deactivate (line 114) | def deactivate(self): method finish (line 117) | def finish(self): class ToBeMigratedState (line 124) | class ToBeMigratedState(StateBase): method finish (line 128) | def finish(self): class MigrationWaitingState (line 132) | class MigrationWaitingState(StateBase): method activate (line 136) | def activate(self): method evict (line 139) | def evict(self): class MigrationReadyState (line 143) | class MigrationReadyState(StateBase): method activate (line 147) | def activate(self): method evict (line 150) | def evict(self): class MigrationDoneState (line 154) | class MigrationDoneState(StateBase): method activate (line 158) | def activate(self): method finish (line 161) | def finish(self): class MigrationRunningState (line 165) | class MigrationRunningState(StateBase): method deactivate (line 169) | def deactivate(self): method finish (line 172) | def finish(self): function build_seq_state (line 176) | def build_seq_state(scheduler: 'Scheduler', seq: 'SchedulerSequence', st... FILE: lmdeploy/pytorch/paging/state_manager.py class StateAllocator (line 8) | class StateAllocator: method __init__ (line 11) | def __init__(self, num_states: int, offset: int = 0): method allocate (line 16) | def allocate(self): method free (line 24) | def free(self, state_id: int): method get_num_free (line 31) | def get_num_free(self): class StateManager (line 35) | class StateManager: method __init__ (line 37) | def __init__(self, num_states: int, num_reserved: int = 0): method is_allocated (line 42) | def is_allocated(self, seq: SchedulerSequence): method allocate (line 46) | def allocate(self, seq: SchedulerSequence): method free (line 52) | def free(self, seq: SchedulerSequence): method get_num_free (line 59) | def get_num_free(self): function build_state_manager (line 64) | def build_state_manager(cache_config: CacheConfig) -> StateManager: FILE: lmdeploy/pytorch/ray.py function get_device_str (line 16) | def get_device_str(device_type: str = None) -> str: function get_resource_kwargs (line 31) | def get_resource_kwargs(device_str: str, resource_used: float = 0.01) ->... function _wait_until_pg_ready (line 42) | def _wait_until_pg_ready(current_placement_group: PlacementGroup): function _get_obj_store_memory (line 79) | def _get_obj_store_memory(dp: int = 1): function init_ray_cluster (line 97) | def init_ray_cluster(world_size: int, ray_address: str = None, dp: int =... class RayContext (line 146) | class RayContext: method __init__ (line 149) | def __init__(self, world_size: int, ray_address: str = None, dp: int =... method get_placement_group (line 159) | def get_placement_group(self): method shutdown (line 163) | def shutdown(self): FILE: lmdeploy/pytorch/spec_decode/__init__.py function build_spec_agent (line 7) | def build_spec_agent(specdecode_config: SpecDecodeConfig, FILE: lmdeploy/pytorch/spec_decode/base.py class BaseSpecModelAgent (line 13) | class BaseSpecModelAgent: method __init__ (line 16) | def __init__(self, enable: bool = False): method is_enabled (line 19) | def is_enabled(self): method set_cache_config (line 22) | def set_cache_config(self, cache_config: CacheConfig): method set_model_config (line 26) | def set_model_config(self, model_config: ModelConfig): method build_model (line 30) | def build_model(self, empty_init: bool, target_model=None, build_model... method build_graph_runner (line 34) | def build_graph_runner(self): method build_cache_engine (line 38) | def build_cache_engine(self, cache_stream: torch.cuda.Stream): method async_model_forward (line 42) | async def async_model_forward(self, next_token_ids: torch.Tensor, mode... method warmup (line 47) | def warmup(self, max_batches: int, target_model_config: ModelConfig): method reset_graph_runner (line 51) | def reset_graph_runner(self): method update_main_model_outputs (line 55) | def update_main_model_outputs(self, output: Dict[str, torch.Tensor], m... FILE: lmdeploy/pytorch/spec_decode/proposers/base.py function draft_model_forward (line 23) | def draft_model_forward( class BaseSpecProposer (line 58) | class BaseSpecProposer: method __init__ (line 60) | def __init__(self, specdecode_config: SpecDecodeConfig, device: torch.... method build_model (line 68) | def build_model(self, empty_init: bool, target_model: torch.nn.Module ... method get_outputs (line 88) | def get_outputs(self, method _forward (line 96) | def _forward(self, model_inputs: ModelInputs, cache_engine: CacheEngin... method update_inputs_decoding (line 105) | def update_inputs_decoding(self, model_inputs: ModelInputs, extra_inpu... method get_logits (line 124) | def get_logits(self, hidden_states: torch.Tensor): method get_target_hidden_size (line 136) | def get_target_hidden_size(self, model_config: ModelConfig): function build_specdecode_proposer (line 141) | def build_specdecode_proposer(specdecode_config: SpecDecodeConfig, devic... FILE: lmdeploy/pytorch/spec_decode/proposers/deepseek_mtp.py class DeepseekMTP (line 16) | class DeepseekMTP(BaseSpecProposer): method get_outputs (line 18) | def get_outputs(self, FILE: lmdeploy/pytorch/spec_decode/proposers/eagle.py class Eagle (line 8) | class Eagle(DeepseekMTP): FILE: lmdeploy/pytorch/spec_decode/proposers/eagle3.py class Eagle3 (line 18) | class Eagle3(DeepseekMTP): method build_model (line 20) | def build_model(self, empty_init: bool, target_model: torch.nn.Module ... method get_target_hidden_size (line 28) | def get_target_hidden_size(self, model_config: ModelConfig): method get_outputs (line 34) | def get_outputs(self, FILE: lmdeploy/pytorch/spec_decode/reject_sampler.py class SamplePolicy (line 10) | class SamplePolicy(enum.Enum): class RejectionSampler (line 16) | class RejectionSampler(nn.Module): method __init__ (line 18) | def __init__(self, sample_policy: SamplePolicy = SamplePolicy.ALL_GREE... method forward (line 22) | def forward( function rejection_sample (line 47) | def rejection_sample( function greedy_reject_sampler (line 66) | def greedy_reject_sampler(draft_token_ids, target_token_ids, bonus_token... FILE: lmdeploy/pytorch/spec_decode/spec_agent.py class SpecModelAgent (line 23) | class SpecModelAgent(BaseSpecModelAgent): method __init__ (line 26) | def __init__( method set_cache_config (line 48) | def set_cache_config(self, cache_config: CacheConfig): method set_model_config (line 52) | def set_model_config(self, model_config: ModelConfig): method build_model (line 56) | def build_model(self, empty_init: bool, target_model=None, build_model... method build_graph_runner (line 60) | def build_graph_runner(self): method build_cache_engine (line 69) | def build_cache_engine(self, cache_stream: torch.cuda.Stream): method _rejection_sampling (line 79) | def _rejection_sampling(self, next_token_ids, model_inputs: 'ModelInpu... method _forward_impl (line 125) | def _forward_impl(self, inputs: ModelInputs): method _async_forward (line 130) | async def _async_forward(self, inputs: ModelInputs): method _async_model_forward (line 140) | async def _async_model_forward(self, inputs: ModelInputs, extra_inputs... method async_model_forward (line 174) | async def async_model_forward( method warmup (line 187) | def warmup(self, max_batches: int, target_model_config: ModelConfig): method reset_graph_runner (line 228) | def reset_graph_runner(self): FILE: lmdeploy/pytorch/strategies/__init__.py function build_strategy_factory (line 5) | def build_strategy_factory(model_config: ModelConfig, FILE: lmdeploy/pytorch/strategies/ar/__init__.py class ARStrategyFactory (line 18) | class ARStrategyFactory(StrategyFactoryBase): method __init__ (line 20) | def __init__(self, model_config: ModelConfig): method build_cudagraph_strategy (line 24) | def build_cudagraph_strategy(self) -> 'CudagraphStrategy': method build_sampling_strategy (line 29) | def build_sampling_strategy(self) -> 'SamplingStrategy': method build_model_inputs_strategy (line 36) | def build_model_inputs_strategy(self) -> 'ModelInputsStrategy': method build_model_agent_strategy (line 41) | def build_model_agent_strategy(self) -> 'ModelAgentStrategy': method build_engine_strategy (line 46) | def build_engine_strategy(self, cache_config: 'CacheConfig', method build_sequence_strategy (line 52) | def build_sequence_strategy(self) -> SequenceStrategy: FILE: lmdeploy/pytorch/strategies/ar/cudagraph.py class ARCudagraphStrategy (line 5) | class ARCudagraphStrategy(CudagraphStrategy): method get_max_tokens (line 7) | def get_max_tokens(self, batch_size: int, origin_batch_size: int, num_... FILE: lmdeploy/pytorch/strategies/ar/engine.py class AREngineStrategy (line 7) | class AREngineStrategy(EngineStrategy): method __init__ (line 10) | def __init__(self, scheduler_config: SchedulerConfig, cache_config: Ca... method get_prealloc_size (line 14) | def get_prealloc_size(self, is_decoding: bool): method get_num_loops (line 18) | def get_num_loops(self, is_decoding: bool) -> int: method get_num_decode_tokens (line 22) | def get_num_decode_tokens(self) -> int: FILE: lmdeploy/pytorch/strategies/ar/model_agent.py function get_model_inputs_next_decoding (line 20) | def get_model_inputs_next_decoding(inputs: ModelInputs, input_ids: torch... class ARExtraInputs (line 45) | class ARExtraInputs(ExtraInputs): class ARExtraOutputs (line 50) | class ARExtraOutputs(ExtraOutputs): class ARStoppingCriteria (line 55) | class ARStoppingCriteria(StoppingCriteria): method clone (line 58) | def clone(self): method merge (line 62) | def merge(self, other: 'ARStoppingCriteria'): method update (line 67) | def update(self, delta: ModelInputsDelta): method step (line 74) | def step(self, class ARModelAgentStrategy (line 94) | class ARModelAgentStrategy(ModelAgentStrategy): method slice_outputs (line 96) | def slice_outputs(self, inputs: torch.Tensor, seq_length: torch.LongTe... method slice_extra_inputs (line 107) | def slice_extra_inputs(self, extra_inputs: ARExtraInputs, model_inputs... method step_sampling_inputs (line 113) | def step_sampling_inputs(self, sampling_inputs: SamplingInputs, next_t... method make_stopping_criteria (line 127) | def make_stopping_criteria(self, seqs: SeqList) -> ARStoppingCriteria: method make_extra_inputs (line 133) | def make_extra_inputs(self, seqs: 'SeqList', model_inputs: 'ModelInput... method make_extra_outputs (line 137) | def make_extra_outputs(self, extra_inputs: ARExtraInputs) -> ARExtraOu... method update_prefill_for_next_step (line 141) | def update_prefill_for_next_step( method update_decoding_for_next_step (line 153) | def update_decoding_for_next_step(self, model_inputs: 'ModelInputs', n... method post_sampling (line 161) | def post_sampling(self, inputs: 'ModelInputs', logits: torch.Tensor, n... method broadcast_next_token (line 167) | def broadcast_next_token(self, next_token_ids: torch.Tensor, extra_inp... FILE: lmdeploy/pytorch/strategies/ar/model_inputs.py function merge_model_inputs (line 13) | def merge_model_inputs(inputs: ModelInputs, other: ModelInputs) -> Model... class ARModelInputsStrategy (line 70) | class ARModelInputsStrategy(ModelInputsStrategy): method make_dummy (line 72) | def make_dummy(self, method merge (line 87) | def merge(self, inputs: ModelInputs, other: ModelInputs) -> ModelInputs: method index_select (line 92) | def index_select(inputs: ModelInputs, method update_inputs (line 162) | def update_inputs(self, inputs: ModelInputs, delta: 'ModelInputsDelta'... FILE: lmdeploy/pytorch/strategies/ar/sampling.py function _gather_all_ids (line 16) | def _gather_all_ids(pad_id: int, seqs: SeqList, sampling_inputs: Samplin... function _gather_generated_ids (line 32) | def _gather_generated_ids(pad_id: int, seqs: SeqList, sampling_inputs: S... function _get_num_ignore_eos (line 48) | def _get_num_ignore_eos(seqs: SeqList): class ARSamplingStrategy (line 54) | class ARSamplingStrategy(SamplingStrategy): method __init__ (line 57) | def __init__(self, pad_token_id: int) -> None: method make_sampling_inputs (line 63) | def make_sampling_inputs(self, seqs: SeqList) -> SamplingInputs: method on_session_end (line 230) | def on_session_end(self, session_id: int): method merge_sampling_delta (line 233) | def merge_sampling_delta( method step_sampling_delta (line 268) | def step_sampling_delta( method update_sampling_delta (line 287) | def update_sampling_delta( FILE: lmdeploy/pytorch/strategies/ar/sequence.py class SchedulerSequenceDefault (line 21) | class SchedulerSequenceDefault(SchedulerSequence): method update_token_ids (line 23) | def update_token_ids(self, method set_step (line 60) | def set_step(self, step: int): class ARSequenceStrategy (line 81) | class ARSequenceStrategy(SequenceStrategy): method make_sequence (line 83) | def make_sequence(self, method update_running (line 102) | def update_running(self, running: SeqList, batched_outputs: BatchedOut... FILE: lmdeploy/pytorch/strategies/ar_spec/__init__.py class ARSpecStrategyFactory (line 18) | class ARSpecStrategyFactory(StrategyFactoryBase): method __init__ (line 20) | def __init__(self, model_config: ModelConfig, specdecode_config: SpecD... method build_cudagraph_strategy (line 26) | def build_cudagraph_strategy(self) -> 'CudagraphStrategy': method build_sampling_strategy (line 31) | def build_sampling_strategy(self) -> 'SamplingStrategy': method build_model_inputs_strategy (line 38) | def build_model_inputs_strategy(self) -> 'ModelInputsStrategy': method build_model_agent_strategy (line 43) | def build_model_agent_strategy(self) -> 'ModelAgentStrategy': method build_engine_strategy (line 48) | def build_engine_strategy(self, cache_config: 'CacheConfig', method build_sequence_strategy (line 56) | def build_sequence_strategy(self) -> SequenceStrategy: FILE: lmdeploy/pytorch/strategies/ar_spec/cudagraph.py class ARSpecCudagraphStrategy (line 5) | class ARSpecCudagraphStrategy(CudagraphStrategy): method __init__ (line 7) | def __init__(self, num_spec_tokens: int): method get_max_tokens (line 11) | def get_max_tokens(self, batch_size: int, origin_batch_size: int, num_... FILE: lmdeploy/pytorch/strategies/ar_spec/engine.py class ARSpecEngineStrategy (line 7) | class ARSpecEngineStrategy(EngineStrategy): method __init__ (line 10) | def __init__(self, scheduler_config: SchedulerConfig, cache_config: Ca... method get_prealloc_size (line 15) | def get_prealloc_size(self, is_decoding: bool): method get_num_loops (line 20) | def get_num_loops(self, is_decoding: bool) -> int: method get_num_decode_tokens (line 24) | def get_num_decode_tokens(self) -> int: FILE: lmdeploy/pytorch/strategies/ar_spec/model_agent.py class ARSpecExtraInputs (line 22) | class ARSpecExtraInputs(ExtraInputs): method __repr__ (line 36) | def __repr__(self): method broadcast (line 43) | def broadcast(self, src: int, group, async_op=False): method merge (line 48) | def merge(self, other: 'ARSpecExtraInputs'): class ARSpecExtraOutputs (line 55) | class ARSpecExtraOutputs(ExtraOutputs): method __repr__ (line 60) | def __repr__(self): class ARSpecStoppingCriteria (line 65) | class ARSpecStoppingCriteria(ARStoppingCriteria): method clone (line 68) | def clone(self): method merge (line 72) | def merge(self, other: 'ARSpecStoppingCriteria'): method update (line 77) | def update(self, delta: ModelInputsDelta): method step (line 84) | def step(self, class ARSpecModelAgentStrategy (line 114) | class ARSpecModelAgentStrategy(ModelAgentStrategy): method __init__ (line 116) | def __init__(self, num_spec_tokens: int): method slice_outputs (line 119) | def slice_outputs(self, inputs: torch.Tensor, seq_length: torch.LongTe... method slice_extra_inputs (line 130) | def slice_extra_inputs(self, extra_inputs: ARSpecExtraInputs, model_in... method step_sampling_inputs (line 142) | def step_sampling_inputs(self, sampling_inputs: SamplingInputs, next_t... method make_stopping_criteria (line 152) | def make_stopping_criteria(self, seqs: SeqList) -> ARSpecStoppingCrite... method make_extra_inputs (line 158) | def make_extra_inputs(self, seqs: 'SeqList', model_inputs: 'ModelInput... method update_extra_inputs (line 162) | def update_extra_inputs(self, extra_inputs: ARSpecExtraInputs, delta: ... method make_extra_outputs (line 168) | def make_extra_outputs(self, extra_inputs: ARSpecExtraInputs) -> ARSpe... method update_prefill_for_next_step (line 174) | def update_prefill_for_next_step( method update_decoding_for_next_step (line 194) | def update_decoding_for_next_step(self, model_inputs: 'ModelInputs', n... method post_sampling (line 213) | def post_sampling(self, inputs: 'ModelInputs', logits: torch.Tensor, n... method make_dummy_next_token (line 218) | def make_dummy_next_token(self, inputs: 'ModelInputs', logits: torch.T... method broadcast_next_token (line 227) | def broadcast_next_token(self, next_token_ids: torch.Tensor, extra_inp... FILE: lmdeploy/pytorch/strategies/ar_spec/model_inputs.py class ARSpecModelInputsStrategy (line 11) | class ARSpecModelInputsStrategy(ModelInputsStrategy): method __init__ (line 13) | def __init__(self, num_spec_tokens: int): method make_dummy (line 16) | def make_dummy( method merge (line 41) | def merge(self, inputs: ModelInputs, other: ModelInputs) -> ModelInputs: method update_inputs (line 46) | def update_inputs(self, inputs: ModelInputs, delta: 'ModelInputsDelta'... FILE: lmdeploy/pytorch/strategies/ar_spec/sampling.py class ARSpecSamplingStrategy (line 5) | class ARSpecSamplingStrategy(ARSamplingStrategy): FILE: lmdeploy/pytorch/strategies/ar_spec/sequence.py class SchedulerSequenceARSpec (line 21) | class SchedulerSequenceARSpec(SchedulerSequenceDefault): method __post_init__ (line 23) | def __post_init__(self): method num_valid_ids (line 32) | def num_valid_ids(self): method num_spec_ids (line 36) | def num_spec_ids(self): method generated_ids (line 40) | def generated_ids(self) -> np.ndarray: method set_stop_pos (line 45) | def set_stop_pos(self, pos: int): method _update_token_ids_inputs (line 56) | def _update_token_ids_inputs(self, token_ids: np.ndarray): method _update_token_ids_prefill (line 67) | def _update_token_ids_prefill(self, token_ids: np.ndarray, draft_token... method _update_token_ids_decode (line 80) | def _update_token_ids_decode(self, token_ids: np.ndarray, draft_token_... method update_token_ids (line 110) | def update_token_ids(self, class ARSpecSequenceStrategy (line 140) | class ARSpecSequenceStrategy(ARSequenceStrategy): method make_sequence (line 142) | def make_sequence(self, method update_running (line 159) | def update_running(self, running: SeqList, batched_outputs: BatchedOut... FILE: lmdeploy/pytorch/strategies/base/__init__.py class StrategyFactoryBase (line 16) | class StrategyFactoryBase(ABC): method build_cudagraph_strategy (line 19) | def build_cudagraph_strategy(self) -> 'CudagraphStrategy': method build_sampling_strategy (line 24) | def build_sampling_strategy(self) -> 'SamplingStrategy': method build_model_inputs_strategy (line 29) | def build_model_inputs_strategy(self) -> 'ModelInputsStrategy': method build_model_agent_strategy (line 34) | def build_model_agent_strategy(self) -> 'ModelAgentStrategy': method build_engine_strategy (line 39) | def build_engine_strategy(self, cache_config: 'CacheConfig', method build_sequence_strategy (line 45) | def build_sequence_strategy(self) -> 'SequenceStrategy': FILE: lmdeploy/pytorch/strategies/base/cudagraph.py class CudagraphStrategy (line 5) | class CudagraphStrategy(ABC): method get_max_tokens (line 8) | def get_max_tokens(self, batch_size: int, origin_batch_size: int, num_... FILE: lmdeploy/pytorch/strategies/base/engine.py class EngineStrategy (line 5) | class EngineStrategy(ABC): method get_prealloc_size (line 9) | def get_prealloc_size(self, is_decoding: bool) -> int: method get_num_loops (line 14) | def get_num_loops(self, is_decoding: bool) -> int: method get_num_decode_tokens (line 19) | def get_num_decode_tokens(self) -> int: method get_num_required_tokens (line 23) | def get_num_required_tokens(self) -> int: FILE: lmdeploy/pytorch/strategies/base/model_agent.py function to_device (line 18) | def to_device(self, device: str, non_blocking: bool = False): class ExtraInputs (line 32) | class ExtraInputs(ABC): method to_device (line 34) | def to_device(self, device: str, non_blocking: bool = False): method broadcast (line 38) | def broadcast(self, src: int, group, async_op=False): method merge (line 42) | def merge(self, other: 'ExtraInputs'): class ExtraOutputs (line 48) | class ExtraOutputs(ABC): method to_device (line 50) | def to_device(self, device: str, non_blocking: bool = False): method to_cpu (line 54) | def to_cpu(self): method to_numpy (line 58) | def to_numpy(self): method to_tensor (line 71) | def to_tensor(self): class StoppingCriteria (line 86) | class StoppingCriteria(ABC): method clone (line 90) | def clone(self) -> 'StoppingCriteria': method merge (line 94) | def merge(self, other: 'StoppingCriteria') -> 'StoppingCriteria': method update (line 98) | def update(self, delta: 'ModelInputsDelta') -> 'StoppingCriteria': method step (line 102) | def step(self, method to_device (line 110) | def to_device(self, device: str, non_blocking: bool = False): class ModelAgentStrategy (line 115) | class ModelAgentStrategy(ABC): method slice_outputs (line 119) | def slice_outputs(self, inputs: torch.Tensor, seq_length: torch.LongTe... method slice_extra_inputs (line 124) | def slice_extra_inputs(self, extra_inputs: ExtraInputs, model_inputs: ... method make_stopping_criteria (line 130) | def make_stopping_criteria(self, seqs: 'SeqList') -> StoppingCriteria: method make_extra_inputs (line 135) | def make_extra_inputs(self, seqs: 'SeqList', model_inputs: 'ModelInput... method update_extra_inputs (line 139) | def update_extra_inputs(self, extra_inputs: ExtraInputs, delta: 'Model... method make_extra_outputs (line 144) | def make_extra_outputs(self, extra_inputs: ExtraInputs) -> ExtraOutputs: method step_sampling_inputs (line 149) | def step_sampling_inputs( method update_prefill_for_next_step (line 159) | def update_prefill_for_next_step( method update_decoding_for_next_step (line 171) | def update_decoding_for_next_step(self, model_inputs: 'ModelInputs', n... method post_sampling (line 178) | def post_sampling(self, inputs: 'ModelInputs', logits: torch.Tensor, n... method make_dummy_next_token (line 183) | def make_dummy_next_token(self, inputs: 'ModelInputs', logits: torch.T... method broadcast_next_token (line 191) | def broadcast_next_token(self, next_token_ids: torch.Tensor, extra_inp... FILE: lmdeploy/pytorch/strategies/base/model_inputs.py function make_dummy_inputs (line 11) | def make_dummy_inputs(batch_size: int, class ModelInputsStrategy (line 47) | class ModelInputsStrategy(ABC): method make_dummy (line 50) | def make_dummy(self, method merge (line 60) | def merge(self, inputs: ModelInputs, other: ModelInputs) -> ModelInputs: method update_inputs (line 65) | def update_inputs(self, inputs: ModelInputs, delta: 'ModelInputsDelta'... FILE: lmdeploy/pytorch/strategies/base/sampling.py class SamplingStrategy (line 16) | class SamplingStrategy(ABC): method make_sampling_inputs (line 20) | def make_sampling_inputs(self, seqs: SeqList) -> SamplingInputs: method on_session_end (line 25) | def on_session_end(self, session_id: int) -> None: method merge_sampling_delta (line 30) | def merge_sampling_delta( method step_sampling_delta (line 38) | def step_sampling_delta( method update_sampling_delta (line 48) | def update_sampling_delta( FILE: lmdeploy/pytorch/strategies/base/sequence.py class SequenceStrategy (line 14) | class SequenceStrategy(ABC): method make_sequence (line 17) | def make_sequence(self, method update_running (line 29) | def update_running(self, running: 'SeqList', batched_outputs: 'Batched... FILE: lmdeploy/pytorch/strategies/dllm/__init__.py class DLLMStrategyFactory (line 21) | class DLLMStrategyFactory(StrategyFactoryBase): method __init__ (line 23) | def __init__(self, model_config: ModelConfig, dllm_config: DLLMConfig): method _update_dllm_block_length (line 31) | def _update_dllm_block_length(self): method build_cudagraph_strategy (line 51) | def build_cudagraph_strategy(self) -> 'CudagraphStrategy': method build_sampling_strategy (line 56) | def build_sampling_strategy(self) -> 'SamplingStrategy': method build_model_inputs_strategy (line 63) | def build_model_inputs_strategy(self) -> 'ModelInputsStrategy': method build_model_agent_strategy (line 68) | def build_model_agent_strategy(self) -> 'ModelAgentStrategy': method build_engine_strategy (line 73) | def build_engine_strategy(self, cache_config: 'CacheConfig', method build_sequence_strategy (line 81) | def build_sequence_strategy(self) -> SequenceStrategy: FILE: lmdeploy/pytorch/strategies/dllm/cudagraph.py class DLLMCudagraphStrategy (line 5) | class DLLMCudagraphStrategy(CudagraphStrategy): method __init__ (line 7) | def __init__(self, block_size: int) -> None: method get_max_tokens (line 11) | def get_max_tokens(self, batch_size: int, origin_batch_size: int, num_... FILE: lmdeploy/pytorch/strategies/dllm/engine.py class DLLMEngineStrategy (line 12) | class DLLMEngineStrategy(EngineStrategy): method __init__ (line 15) | def __init__(self, scheduler_config: SchedulerConfig, cache_config: Ca... method _check (line 22) | def _check(self): method get_prealloc_size (line 32) | def get_prealloc_size(self, is_decoding: bool) -> int: method get_num_loops (line 42) | def get_num_loops(self, is_decoding: bool) -> int: method get_num_decode_tokens (line 52) | def get_num_decode_tokens(self) -> int: FILE: lmdeploy/pytorch/strategies/dllm/model_agent.py function get_model_inputs_next_decoding (line 24) | def get_model_inputs_next_decoding(inputs: ModelInputs, input_ids: torch... class DLLMExtraInputs (line 47) | class DLLMExtraInputs(ExtraInputs): method broadcast (line 51) | def broadcast(self, src: int, group, async_op=False): method merge (line 54) | def merge(self, other: 'DLLMExtraInputs'): class DLLMExtraOutputs (line 61) | class DLLMExtraOutputs(ExtraOutputs): function _check_stopwords_dllm (line 66) | def _check_stopwords_dllm(token_ids: torch.Tensor, stop_words: torch.Ten... class DLLMStoppingCriteria (line 100) | class DLLMStoppingCriteria(StoppingCriteria): method clone (line 104) | def clone(self) -> 'DLLMStoppingCriteria': method merge (line 108) | def merge(self, other: 'DLLMStoppingCriteria') -> 'DLLMStoppingCriteria': method update (line 114) | def update(self, delta: 'ModelInputsDelta') -> 'DLLMStoppingCriteria': method step (line 121) | def step(self, class DLLMModelAgentStrategy (line 157) | class DLLMModelAgentStrategy(ModelAgentStrategy): method __init__ (line 159) | def __init__(self, dllm_config: DLLMConfig, dllm_mask_token: int): method _update_dllm (line 166) | def _update_dllm(self, next_token_ids: torch.Tensor, dllm_mask: torch.... method slice_outputs (line 185) | def slice_outputs(self, inputs: torch.Tensor, seq_length: torch.LongTe... method slice_extra_inputs (line 200) | def slice_extra_inputs(self, extra_inputs: DLLMExtraInputs, model_inpu... method step_sampling_inputs (line 206) | def step_sampling_inputs(self, sampling_inputs: SamplingInputs, next_t... method make_stopping_criteria (line 223) | def make_stopping_criteria(self, seqs: SeqList) -> DLLMStoppingCriteria: method make_extra_inputs (line 238) | def make_extra_inputs(self, seqs: 'SeqList', model_inputs: 'ModelInput... method update_extra_inputs (line 250) | def update_extra_inputs(self, extra_inputs: DLLMExtraInputs, delta: 'M... method make_extra_outputs (line 260) | def make_extra_outputs(self, extra_inputs: DLLMExtraInputs) -> DLLMExt... method update_prefill_for_next_step (line 265) | def update_prefill_for_next_step( method update_decoding_for_next_step (line 285) | def update_decoding_for_next_step(self, model_inputs: 'ModelInputs', n... method post_sampling (line 297) | def post_sampling(self, inputs: 'ModelInputs', logits: torch.Tensor, n... method make_dummy_next_token (line 309) | def make_dummy_next_token(self, inputs: 'ModelInputs', logits: torch.T... method broadcast_next_token (line 316) | def broadcast_next_token(self, next_token_ids: torch.Tensor, extra_inp... FILE: lmdeploy/pytorch/strategies/dllm/model_inputs.py class DLLMModelInputsStrategy (line 8) | class DLLMModelInputsStrategy(ModelInputsStrategy): method __init__ (line 10) | def __init__(self, block_size: int): method make_dummy (line 13) | def make_dummy(self, method merge (line 27) | def merge(self, inputs: ModelInputs, other: ModelInputs) -> ModelInputs: method update_inputs (line 31) | def update_inputs(self, inputs: ModelInputs, delta: 'ModelInputsDelta'... FILE: lmdeploy/pytorch/strategies/dllm/sampling.py class DLLMSamplingStrategy (line 18) | class DLLMSamplingStrategy(ARSamplingStrategy): method __init__ (line 21) | def __init__(self, pad_token_id: int, dllm_block_length: int) -> None: method make_sampling_inputs (line 26) | def make_sampling_inputs(self, seqs: SeqList) -> SamplingInputs: method merge_sampling_delta (line 84) | def merge_sampling_delta( method update_sampling_delta (line 99) | def update_sampling_delta( method step_sampling_delta (line 119) | def step_sampling_delta( FILE: lmdeploy/pytorch/strategies/dllm/sequence.py class HistoryDLLMMask (line 27) | class HistoryDLLMMask(HistoryTokenIds): method __init__ (line 29) | def __init__(self, token_ids: np.ndarray = None, dtype: np.dtype = DLL... class SchedulerSequenceDLLM (line 34) | class SchedulerSequenceDLLM(SchedulerSequenceDefault): method __post_init__ (line 39) | def __post_init__(self): method dllm_mask (line 46) | def dllm_mask(self): method num_valid_ids (line 52) | def num_valid_ids(self): method generated_ids (line 56) | def generated_ids(self) -> np.ndarray: method all_dllm_mask (line 62) | def all_dllm_mask(self): method dllm_block_length (line 66) | def dllm_block_length(self): method dllm_mask_token (line 70) | def dllm_mask_token(self): method set_stop_pos (line 73) | def set_stop_pos(self, pos: int): method _update_token_ids_inputs (line 79) | def _update_token_ids_inputs(self, token_ids: np.ndarray, dllm_mask: n... method _update_token_ids_decode (line 119) | def _update_token_ids_decode(self, token_ids: np.ndarray, dllm_mask: n... method _update_token_ids_prefill (line 150) | def _update_token_ids_prefill(self, token_ids: np.ndarray, dllm_mask: ... method update_token_ids (line 165) | def update_token_ids(self, method set_step (line 197) | def set_step(self, step: int): class DLLMSequenceStrategy (line 208) | class DLLMSequenceStrategy(SequenceStrategy): method __init__ (line 210) | def __init__(self, block_size: int, dllm_mask_token: int) -> None: method make_sequence (line 214) | def make_sequence(self, method update_running (line 231) | def update_running(self, running: SeqList, batched_outputs: BatchedOut... FILE: lmdeploy/pytorch/strategies/dllm/unmasking.py class UnmaskingProcessor (line 13) | class UnmaskingProcessor: method __init__ (line 15) | def __init__(self, dllm_config: DLLMConfig): method _get_scores (line 18) | def _get_scores(self, logits: torch.Tensor, token_ids: torch.Tensor): method _get_denoise_num (line 24) | def _get_denoise_num(self): method low_confidence_static (line 34) | def low_confidence_static(self, logits: torch.Tensor, token_ids: torch... method low_confidence_dynamic (line 51) | def low_confidence_dynamic(self, logits: torch.Tensor, token_ids: torc... method sequential (line 69) | def sequential(self, dllm_mask: torch.Tensor): method __call__ (line 89) | def __call__(self, logits: torch.Tensor, input_ids: torch.Tensor, toke... FILE: lmdeploy/pytorch/third_party/deep_gemm/__init__.py function _log_jit_build (line 21) | def _log_jit_build(M: int, N: int, K: int): function fp8_gemm_nt (line 40) | def fp8_gemm_nt(a, b, d, c, recipe=None, compiled_dim='nk', disable_ue8m... function m_grouped_fp8_gemm_nt_contiguous (line 52) | def m_grouped_fp8_gemm_nt_contiguous(a, b, d, m_indices, recipe=None, co... function m_grouped_fp8_gemm_nt_masked (line 64) | def m_grouped_fp8_gemm_nt_masked(a, function get_mn_major_tma_aligned_tensor (line 83) | def get_mn_major_tma_aligned_tensor(x): FILE: lmdeploy/pytorch/third_party/flash_attn_interface.py function flash_attn_varlen_func (line 9) | def flash_attn_varlen_func(*args, **kwargs): function flash_attn_with_kvcache (line 18) | def flash_attn_with_kvcache(*args, **kwargs): FILE: lmdeploy/pytorch/tools/utils.py class Timer (line 6) | class Timer: method __init__ (line 9) | def __init__(self): method tic_cpu (line 13) | def tic_cpu(self): method toc_cpu (line 18) | def toc_cpu(self): method tic_cuda (line 25) | def tic_cuda(self): method toc_cuda (line 32) | def toc_cuda(self): method tic (line 41) | def tic(cls, is_cuda: bool = False) -> 'Timer': method toc (line 49) | def toc(self): method timing (line 59) | def timing(cls, is_cuda: bool = False) -> 'Timer': method format_duration (line 65) | def format_duration(duration: float, acc: int = 3): method format_flops (line 78) | def format_flops(flops: float, acc: int = 3): method formatted_print (line 96) | def formatted_print(out_info: dict, title: str = None): method print (line 108) | def print(self, flop: int = None, title: str = None): method toc_print (line 126) | def toc_print(self, flop: int = None, title: str = None): function visualize_pipe_out (line 130) | def visualize_pipe_out(outputs, enable_meta: bool = True): function visualize_chat_completions (line 209) | def visualize_chat_completions(outputs, enable_meta: bool = True): function dump_tilelang_source (line 233) | def dump_tilelang_source(kernel, path: str = 'sources/tvm_kernels.cu'): FILE: lmdeploy/pytorch/transformers/__init__.py function register_config (line 10) | def register_config(model_type: str): function config_from_pretrained (line 21) | def config_from_pretrained(pretrained_model_name_or_path: str, **kwargs): FILE: lmdeploy/pytorch/transformers/configuration_deepseek_v32.py class DeepseekV32Config (line 6) | class DeepseekV32Config(DeepseekV3Config): method __init__ (line 9) | def __init__(self, index_head_dim=128, index_n_heads=64, index_topk=20... FILE: lmdeploy/pytorch/utils.py function get_gpu_memory (line 16) | def get_gpu_memory(device_id: int = None) -> int: function get_cpu_memory (line 24) | def get_cpu_memory() -> int: function bind_sigature (line 29) | def bind_sigature(input_names: str, args: Sequence, kwargs: Dict): function singleton (line 38) | def singleton(cls): class CtxMgrBase (line 59) | class CtxMgrBase(Generic[T]): method __init__ (line 62) | def __init__(self, default: Optional[T] = None): method current_context (line 65) | def current_context(self) -> Optional[T]: method set_context (line 69) | def set_context(self, context: Optional[T]): method context (line 74) | def context(self, context: T): function maybe_register_config_serialize_by_value (line 85) | def maybe_register_config_serialize_by_value(trust_remote_code: bool) ->... function monkey_patch_hf_modules_cache (line 148) | def monkey_patch_hf_modules_cache(): function wait_for_async_tasks (line 185) | async def wait_for_async_tasks(tasks: Sequence[asyncio.Task], function cancel_async_tasks (line 222) | async def cancel_async_tasks(tasks: Sequence[asyncio.Task]): FILE: lmdeploy/pytorch/weight_loader/model_weight_loader.py function load_weight (line 19) | def load_weight(param: torch.nn.Parameter, loaded_weight: torch.Tensor, ... function default_weight_loader (line 28) | def default_weight_loader(param: torch.nn.Parameter, loaded_weight: torc... function _get_weight_type (line 38) | def _get_weight_type(model_path: str, use_safetensors: bool = None): function _get_weight_map (line 62) | def _get_weight_map(model_path: str, weight_type: str): function _get_weight_path (line 78) | def _get_weight_path(model_path: str, weight_type: str): function _get_safetensors_weights_iterator (line 91) | def _get_safetensors_weights_iterator(file: str, prefix: str): function _get_pt_weights_iterator (line 101) | def _get_pt_weights_iterator(file: str, prefix: str): class ModelWeightLoader (line 115) | class ModelWeightLoader: method __init__ (line 118) | def __init__(self, model_path: str, prefix: str = None): method _get_shard_paths (line 128) | def _get_shard_paths(model_path: str, is_sharded: bool, weight_type: s... method _get_weights_iterator (line 139) | def _get_weights_iterator(self, path: str): method _skip_dummy_iterator (line 148) | def _skip_dummy_iterator(iterator, dummy_prefix: list): method _rename_weights_iterator (line 155) | def _rename_weights_iterator(iterator, model: torch.nn.Module): method load_model_weights (line 162) | def load_model_weights( function load_model_weights (line 193) | def load_model_weights(model: torch.nn.Module, checkpoint_path: str, pre... FILE: lmdeploy/serve/core/async_engine.py class GenOut (line 33) | class GenOut: method to_response (line 47) | def to_response(self, index: int = 0) -> Response: class AsyncEngine (line 66) | class AsyncEngine: method __init__ (line 96) | def __init__(self, method close (line 152) | def close(self): method __enter__ (line 156) | def __enter__(self): method __exit__ (line 159) | def __exit__(self, exc_type, exc_value, traceback): method _build_turbomind (line 162) | def _build_turbomind(self, model_path: str, backend_config: TurbomindE... method _build_pytorch (line 167) | def _build_pytorch(self, method _build_stat_loggers (line 176) | def _build_stat_loggers(self): method get_schedule_metrics (line 194) | def get_schedule_metrics(self): method do_log_stats (line 197) | async def do_log_stats(self): method stop_all_session (line 203) | async def stop_all_session(self): method sleep (line 209) | def sleep(self, level: int = 1): method wakeup (line 221) | def wakeup(self, tags: List[str] | None = None): method _determine_gen_config (line 242) | def _determine_gen_config(self, session, input_ids, gen_config: Genera... method safe_run (line 265) | async def safe_run(self, handle, session, **kwargs): method generate (line 280) | async def generate( method start_loop (line 545) | def start_loop(self, loop, use_async_api=False): method free_cache (line 573) | def free_cache(self, session_id: int): method p2p_initialize (line 579) | def p2p_initialize(self, init_request: DistServeInitRequest): method p2p_connect (line 582) | def p2p_connect(self, conn_request: List[DistServeConnectionRequest]): method p2p_drop_connect (line 585) | def p2p_drop_connect(self, drop_conn_request: List[DistServeDropConnec... method async_get_reward_score (line 590) | async def async_get_reward_score(self, input_ids: List) -> List[float]: method async_get_logits (line 606) | async def async_get_logits(self, FILE: lmdeploy/serve/core/exceptions.py class SafeRunException (line 5) | class SafeRunException(Exception): FILE: lmdeploy/serve/core/vl_async_engine.py class VLAsyncEngine (line 12) | class VLAsyncEngine(AsyncEngine): method __init__ (line 15) | def __init__(self, method close (line 42) | def close(self): FILE: lmdeploy/serve/managers/session_manager.py class Session (line 17) | class Session: method __init__ (line 20) | def __init__(self, session_id: int, session_mgr: SessionManager, **kwa... method update (line 33) | def update(self, **kwargs): method __repr__ (line 39) | def __repr__(self) -> str: method __str__ (line 46) | def __str__(self) -> str: method reset (line 57) | def reset(self): method request_handle (line 73) | async def request_handle(self): method async_abort (line 102) | async def async_abort(self): method async_close (line 108) | async def async_close(self): method abort (line 122) | def abort(self): method close (line 127) | def close(self): method _run (line 132) | def _run(self, coro): class RequestHandlePool (line 137) | class RequestHandlePool: method __init__ (line 161) | def __init__(self, engine, size: int): method get (line 167) | async def get(self): method put (line 177) | def put(self, handle): method clear (line 182) | def clear(self): class SessionManager (line 188) | class SessionManager: method __init__ (line 191) | def __init__(self): method get (line 199) | def get(self, session_id: int | None = None, **kwargs) -> Session: method async_abort_all (line 213) | async def async_abort_all(self): method has (line 223) | def has(self, session_id): method remove (line 226) | def remove(self, session: Session): method clear (line 229) | def clear(self): method attach_event_loop (line 234) | def attach_event_loop(self, loop): method build_request_handle_pool (line 237) | def build_request_handle_pool(self, engine, size): FILE: lmdeploy/serve/openai/api_client.py function get_model_list (line 10) | def get_model_list(api_url: str, headers: dict = None): function json_loads (line 27) | def json_loads(content): class APIClient (line 38) | class APIClient: method __init__ (line 48) | def __init__(self, api_server_url: str, api_key: Optional[str] = None,... method available_models (line 61) | def available_models(self): method encode (line 68) | def encode(self, method chat_completions_v1 (line 90) | def chat_completions_v1( method completions_v1 (line 175) | def completions_v1( FILE: lmdeploy/serve/openai/api_server.py class VariableInterface (line 55) | class VariableInterface: method get_session (line 70) | def get_session(session_id: int) -> int: method get_session_manager (line 78) | def get_session_manager(): method get_engine_config (line 82) | def get_engine_config(): function get_model_list (line 90) | def get_model_list(): function available_models (line 102) | def available_models(): function create_error_response (line 110) | def create_error_response(status: HTTPStatus, message: str, error_type='... function check_request (line 122) | def check_request(request) -> JSONResponse | None: function _create_completion_logprobs (line 150) | def _create_completion_logprobs(tokenizer: Tokenizer, function _create_chat_completion_logprobs (line 209) | def _create_chat_completion_logprobs(tokenizer: Tokenizer, function health (line 246) | async def health() -> Response: function terminate (line 252) | async def terminate(): function logit_bias_logits_processor (line 265) | def logit_bias_logits_processor(logit_bias: dict[int, float] | dict[str,... function chat_completions_v1 (line 296) | async def chat_completions_v1(request: ChatCompletionRequest, raw_reques... function completions_v1 (line 683) | async def completions_v1(request: CompletionRequest, raw_request: Reques... function generate (line 927) | async def generate(request: GenerateReqInput, raw_request: Request = None): function create_embeddings (line 1044) | async def create_embeddings(request: EmbeddingsRequest, raw_request: Req... function encode (line 1050) | async def encode(request: EncodeRequest, raw_request: Request = None): function pooling (line 1080) | async def pooling(request: PoolingRequest, raw_request: Request = None): function update_params (line 1134) | def update_params(request: UpdateParamsRequest, raw_request: Request = N... function sleep (line 1141) | async def sleep(raw_request: Request = None): function wakeup (line 1148) | async def wakeup(raw_request: Request = None): function is_sleeping (line 1156) | async def is_sleeping(): function engine_info (line 1165) | async def engine_info(): function p2p_initialize (line 1181) | async def p2p_initialize(init_request: DistServeInitRequest): function p2p_connect (line 1186) | async def p2p_connect(conn_request: DistServeConnectionRequest): function p2p_drop_connect (line 1191) | async def p2p_drop_connect(drop_conn_request: DistServeDropConnectionReq... function free_cache (line 1196) | async def free_cache(cache_free_request: DistServeCacheFreeRequest) -> J... function abort_request (line 1206) | async def abort_request(request: AbortRequest, raw_request: Request = No... function chat_interactive_v1 (line 1222) | async def chat_interactive_v1(request, raw_request: Request = None): function handle_torchrun (line 1228) | def handle_torchrun(): function startup_event (line 1242) | async def startup_event(): function shutdown_event (line 1267) | async def shutdown_event(): function validation_exception_handler (line 1273) | async def validation_exception_handler(request: Request, exc: RequestVal... class ConcurrencyLimitMiddleware (line 1284) | class ConcurrencyLimitMiddleware(BaseHTTPMiddleware): method __init__ (line 1286) | def __init__(self, app: FastAPI, max_concurrent_requests: int): method dispatch (line 1290) | async def dispatch(self, request: Request, call_next): function set_parsers (line 1296) | def set_parsers(reasoning_parser: str | None = None, tool_parser: str | ... function mount_metrics (line 1318) | def mount_metrics(app: FastAPI, backend_config: PytorchEngineConfig | Tu... function create_lifespan_handler (line 1333) | def create_lifespan_handler(backend_config: PytorchEngineConfig | Turbom... function serve (line 1365) | def serve(model_path: str, FILE: lmdeploy/serve/openai/harmony_utils.py function get_encoding (line 14) | def get_encoding(): function get_streamable_parser_for_assistant (line 21) | def get_streamable_parser_for_assistant() -> 'StreamableParser': class GptOssChatParser (line 25) | class GptOssChatParser: method __init__ (line 27) | def __init__(self): method parse_streaming (line 30) | def parse_streaming(self, tokens: List[int]) -> DeltaMessage: method parse_full (line 79) | def parse_full(self, tokens: List[int]) -> ChatMessage: FILE: lmdeploy/serve/openai/launch_server.py function find_available_ports (line 19) | def find_available_ports(num: int) -> List[int]: function get_host_ip (line 42) | def get_host_ip(): function _run_server (line 50) | def _run_server(gpu_ids: List[int], model_path: str, **kwargs): function cleanup_processes (line 59) | def cleanup_processes(processes: List[mp.Process]): function launch_server (line 83) | def launch_server(num_nodes: int, FILE: lmdeploy/serve/openai/protocol.py class ErrorResponse (line 11) | class ErrorResponse(BaseModel): class ModelPermission (line 20) | class ModelPermission(BaseModel): class ModelCard (line 36) | class ModelCard(BaseModel): class ModelList (line 47) | class ModelList(BaseModel): class UsageInfo (line 53) | class UsageInfo(BaseModel): class Function (line 60) | class Function(BaseModel): class Tool (line 67) | class Tool(BaseModel): class ToolChoiceFuncName (line 73) | class ToolChoiceFuncName(BaseModel): class ToolChoice (line 78) | class ToolChoice(BaseModel): class StreamOptions (line 84) | class StreamOptions(BaseModel): class JsonSchema (line 89) | class JsonSchema(BaseModel): class ResponseFormat (line 101) | class ResponseFormat(BaseModel): class ChatCompletionRequest (line 108) | class ChatCompletionRequest(BaseModel): class FunctionCall (line 173) | class FunctionCall(BaseModel): class ToolCall (line 179) | class ToolCall(BaseModel): class ExtractedToolCallInformation (line 186) | class ExtractedToolCallInformation(BaseModel): class ChatMessage (line 197) | class ChatMessage(BaseModel): class LogProbs (line 206) | class LogProbs(BaseModel): class TopLogprob (line 213) | class TopLogprob(BaseModel): class ChatCompletionTokenLogprob (line 219) | class ChatCompletionTokenLogprob(BaseModel): class ChoiceLogprobs (line 226) | class ChoiceLogprobs(BaseModel): class ChatCompletionResponseChoice (line 230) | class ChatCompletionResponseChoice(BaseModel): class ChatCompletionResponse (line 238) | class ChatCompletionResponse(BaseModel): class DeltaFunctionCall (line 248) | class DeltaFunctionCall(BaseModel): class DeltaToolCall (line 254) | class DeltaToolCall(BaseModel): class DeltaMessage (line 261) | class DeltaMessage(BaseModel): class ChatCompletionResponseStreamChoice (line 270) | class ChatCompletionResponseStreamChoice(BaseModel): class ChatCompletionStreamResponse (line 278) | class ChatCompletionStreamResponse(BaseModel): class CompletionRequest (line 288) | class CompletionRequest(BaseModel): class CompletionResponseChoice (line 327) | class CompletionResponseChoice(BaseModel): class CompletionResponse (line 336) | class CompletionResponse(BaseModel): class CompletionResponseStreamChoice (line 346) | class CompletionResponseStreamChoice(BaseModel): class CompletionStreamResponse (line 355) | class CompletionStreamResponse(BaseModel): class EmbeddingsRequest (line 365) | class EmbeddingsRequest(BaseModel): class EmbeddingsResponse (line 372) | class EmbeddingsResponse(BaseModel): class PoolingRequest (line 380) | class PoolingRequest(BaseModel): class PoolingResponse (line 397) | class PoolingResponse(BaseModel): class EncodeRequest (line 407) | class EncodeRequest(BaseModel): class EncodeResponse (line 414) | class EncodeResponse(BaseModel): class GenerateResponse (line 420) | class GenerateResponse(BaseModel): class UpdateParamsRequest (line 429) | class UpdateParamsRequest(BaseModel): class GenerateReqInput (line 442) | class GenerateReqInput(BaseModel): class GenerateReqMetaOutput (line 476) | class GenerateReqMetaOutput(BaseModel): class GenerateReqOutput (line 485) | class GenerateReqOutput(BaseModel): class AbortRequest (line 491) | class AbortRequest(BaseModel): FILE: lmdeploy/serve/openai/reasoning_parser/deepseek_r1_reasoning_parser.py class DeepSeekR1ReasoningParser (line 12) | class DeepSeekR1ReasoningParser(ReasoningParser): method __init__ (line 19) | def __init__(self, tokenizer: object): method extract_reasoning_content_streaming (line 36) | def extract_reasoning_content_streaming( method extract_reasoning_content (line 107) | def extract_reasoning_content(self, model_output: str, request: ChatCo... FILE: lmdeploy/serve/openai/reasoning_parser/qwen_qwq_reasoning_parser.py class QwenQwQReasoningParser (line 11) | class QwenQwQReasoningParser(ReasoningParser): method __init__ (line 18) | def __init__(self, tokenizer: object): method extract_reasoning_content_streaming (line 29) | def extract_reasoning_content_streaming( method extract_reasoning_content (line 97) | def extract_reasoning_content(self, model_output: str, request: ChatCo... FILE: lmdeploy/serve/openai/reasoning_parser/reasoning_parser.py class ReasoningParser (line 13) | class ReasoningParser: method __init__ (line 15) | def __init__(self, tokenizer: object): method vocab (line 19) | def vocab(self) -> Dict[str, int]: method extract_reasoning_content_streaming (line 24) | def extract_reasoning_content_streaming( method extract_reasoning_content (line 44) | def extract_reasoning_content(self, model_output: str, request: ChatCo... FILE: lmdeploy/serve/openai/serving_chat_completion.py function check_request (line 10) | def check_request(request: ChatCompletionRequest, server_context: 'Varia... FILE: lmdeploy/serve/openai/serving_completion.py function check_request (line 10) | def check_request(request: CompletionRequest, server_context: 'VariableI... FILE: lmdeploy/serve/openai/serving_generate.py function check_request (line 10) | def check_request(request: GenerateReqInput, server_context: 'VariableIn... FILE: lmdeploy/serve/openai/tool_parser/internlm2_parser.py class Internlm2ToolParser (line 21) | class Internlm2ToolParser(ToolParser): method __init__ (line 23) | def __init__(self, tokenizer: object): method adjust_request (line 27) | def adjust_request(self, request: ChatCompletionRequest) -> ChatComple... method get_argments (line 35) | def get_argments(self, obj): method extract_tool_calls_streaming (line 42) | def extract_tool_calls_streaming( method extract_tool_calls (line 158) | def extract_tool_calls( FILE: lmdeploy/serve/openai/tool_parser/llama3_parser.py class Llama3JsonToolParser (line 21) | class Llama3JsonToolParser(ToolParser): method __init__ (line 28) | def __init__(self, tokenizer: object): method extract_tool_calls (line 41) | def extract_tool_calls(self, model_output: str, request: ChatCompletio... method extract_tool_calls_streaming (line 65) | def extract_tool_calls_streaming( FILE: lmdeploy/serve/openai/tool_parser/qwen2d5_parser.py class Qwen2d5ToolParser (line 21) | class Qwen2d5ToolParser(ToolParser): method __init__ (line 23) | def __init__(self, tokenizer: object): method get_argments (line 30) | def get_argments(self, obj): method extract_tool_calls_streaming (line 37) | def extract_tool_calls_streaming( method extract_tool_calls (line 153) | def extract_tool_calls( FILE: lmdeploy/serve/openai/tool_parser/qwen3_parser.py class ParserState (line 19) | class ParserState(object): method reset_tool_call (line 27) | def reset_tool_call(self): class Qwen3ToolParser (line 33) | class Qwen3ToolParser(ToolParser): method __init__ (line 40) | def __init__(self, tokenizer: object): method get_argments (line 46) | def get_argments(self, obj): method _split (line 57) | def _split(self, parser_state: ParserState, parsing_content: str): method _parse_delta_tool_call (line 80) | def _parse_delta_tool_call(self, parser_state: ParserState, tool_conte... method extract_tool_calls_streaming (line 113) | def extract_tool_calls_streaming( method extract_tool_calls (line 150) | def extract_tool_calls( FILE: lmdeploy/serve/openai/tool_parser/qwen3coder_parser.py class ParserState (line 19) | class ParserState(object): method reset_tool_call (line 26) | def reset_tool_call(self): class Qwen3CoderToolParser (line 32) | class Qwen3CoderToolParser(ToolParser): method __init__ (line 40) | def __init__(self, tokenizer: object): method _split (line 51) | def _split(self, parser_state: ParserState, parsing_content: str) -> T... method _extract_params (line 69) | def _extract_params(self, content: str) -> Tuple[Optional[str], Dict[s... method extract_tool_calls_streaming (line 120) | def extract_tool_calls_streaming( method extract_tool_calls (line 204) | def extract_tool_calls( FILE: lmdeploy/serve/openai/tool_parser/tool_parser.py class ToolParser (line 15) | class ToolParser: method __init__ (line 21) | def __init__(self, tokenizer: object): method vocab (line 31) | def vocab(self) -> Dict[str, int]: method adjust_request (line 36) | def adjust_request(self, request: ChatCompletionRequest) -> ChatComple... method extract_tool_calls (line 40) | def extract_tool_calls(self, model_output: str, request: ChatCompletio... method extract_tool_calls_streaming (line 49) | def extract_tool_calls_streaming( FILE: lmdeploy/serve/openai/tool_parser/utils.py function find_common_prefix (line 12) | def find_common_prefix(s1: str, s2: str) -> str: function find_common_suffix (line 32) | def find_common_suffix(s1: str, s2: str) -> str: function extract_intermediate_diff (line 49) | def extract_intermediate_diff(curr: str, old: str) -> str: function find_all_indices (line 80) | def find_all_indices(string: str, substring: str) -> List[int]: function partial_json_loads (line 97) | def partial_json_loads(input_str: str, flags: Allow) -> Tuple[Any, int]: function is_complete_json (line 107) | def is_complete_json(input_str: str) -> bool: function consume_space (line 115) | def consume_space(i: int, s: str) -> int: FILE: lmdeploy/serve/processors/multimodal.py class MultimodalProcessor (line 19) | class MultimodalProcessor: method __init__ (line 23) | def __init__(self, method merge_message_content (line 42) | def merge_message_content(msg: Dict) -> Dict: method _parse_multimodal_item (line 93) | def _parse_multimodal_item(i: int, in_messages: List[Dict], out_messag... method async_parse_multimodal_item (line 139) | async def async_parse_multimodal_item(messages: List[Dict], method get_prompt_input (line 156) | async def get_prompt_input(self, method format_prompts (line 229) | def format_prompts(prompts: Any) -> List[Dict]: method _is_openai_message (line 247) | def _is_openai_message(message) -> bool: method _is_str_images_pair (line 252) | def _is_str_images_pair(message) -> bool: method _is_image (line 263) | def _is_image(obj) -> bool: method _is_image_list (line 269) | def _is_image_list(obj) -> bool: method _re_format_prompt_images_pair (line 273) | def _re_format_prompt_images_pair(prompt: Tuple) -> Dict: method _has_multimodal_input (line 305) | def _has_multimodal_input(self, messages: List[Dict]) -> bool: method _get_text_prompt_input (line 312) | async def _get_text_prompt_input(self, method _get_multimodal_prompt_input (line 345) | async def _get_multimodal_prompt_input(self, FILE: lmdeploy/serve/proxy/proxy.py class Status (line 41) | class Status(BaseModel): class Node (line 50) | class Node(BaseModel): function heart_beat_controller (line 59) | def heart_beat_controller(proxy_controller): class NodeManager (line 66) | class NodeManager: method __init__ (line 81) | def __init__(self, method get_nodes (line 117) | def get_nodes(self, role: EngineRole) -> dict[str, Status]: method hybrid_nodes (line 122) | def hybrid_nodes(self): method prefill_nodes (line 126) | def prefill_nodes(self): method decode_nodes (line 130) | def decode_nodes(self): method update_config_file (line 133) | def update_config_file(self): method add (line 147) | def add(self, node_url: str, status: Status | None = None): method remove (line 175) | def remove(self, node_url: str): method terminate_node (line 182) | def terminate_node(self, node_url: str): method terminate_all_nodes (line 205) | def terminate_all_nodes(self): method remove_stale_nodes_by_expiration (line 214) | def remove_stale_nodes_by_expiration(self): method model_list (line 233) | def model_list(self): method status (line 242) | def status(self): method get_node_url (line 246) | def get_node_url(self, model_name: str, role: EngineRole = EngineRole.... method check_request_model (line 315) | async def check_request_model(self, model_name) -> JSONResponse | None: method handle_unavailable_model (line 322) | def handle_unavailable_model(self, model_name): method handle_api_timeout (line 335) | def handle_api_timeout(self, node_url): method stream_generate (line 344) | async def stream_generate(self, request: dict, node_url: str, endpoint... method generate (line 363) | async def generate(self, request: dict, node_url: str, endpoint: str): method forward_raw_request_stream_generate (line 379) | async def forward_raw_request_stream_generate(self, raw_request: Reque... method forward_raw_request_generate (line 401) | async def forward_raw_request_generate(self, raw_request: Request, nod... method pre_call (line 414) | def pre_call(self, node_url): method post_call (line 423) | def post_call(self, node_url: str, start: int): method create_background_tasks (line 434) | def create_background_tasks(self, url: str, start: int): method _prepare_headers (line 445) | def _prepare_headers(self, raw_request: Request) -> dict[str, str]: function available_models (line 470) | def available_models(): function node_status (line 479) | def node_status(): function add_node (line 488) | def add_node(node: Node, raw_request: Request = None): function remove_node (line 509) | def remove_node(node: Node): function terminate_node (line 522) | def terminate_node(node: Node): function terminate_node_all (line 536) | def terminate_node_all(): function connection_warmup (line 549) | async def connection_warmup(): function cache_block_gc_to_be_migrated (line 563) | async def cache_block_gc_to_be_migrated(): function chat_completions_v1 (line 569) | async def chat_completions_v1(request: ChatCompletionRequest, raw_reques... function completions_v1 (line 737) | async def completions_v1(request: CompletionRequest, raw_request: Reques... function proxy (line 879) | def proxy(server_name: str = '0.0.0.0', FILE: lmdeploy/serve/proxy/streaming_response.py class ProxyStreamingResponse (line 10) | class ProxyStreamingResponse(StreamingResponse): method __init__ (line 13) | def __init__(self, content, **kwargs): method stream_response (line 16) | async def stream_response(self, send) -> None: method _convert_headers_to_asgi (line 69) | def _convert_headers_to_asgi(self, headers: dict) -> list[tuple[bytes,... FILE: lmdeploy/serve/proxy/utils.py class RoutingStrategy (line 18) | class RoutingStrategy(enum.Enum): method from_str (line 25) | def from_str(cls, name): class ErrorCodes (line 38) | class ErrorCodes(enum.Enum): class APIServerException (line 52) | class APIServerException(Exception): method __init__ (line 54) | def __init__(self, status_code: int, body: bytes, headers: dict | None... FILE: lmdeploy/serve/utils/server_utils.py function validate_json_request (line 14) | def validate_json_request(raw_request: Request): class AuthenticationMiddleware (line 21) | class AuthenticationMiddleware: method __init__ (line 32) | def __init__(self, app: ASGIApp, tokens: list[str]) -> None: method verify_token (line 43) | def verify_token(self, headers: Headers) -> bool: method __call__ (line 60) | def __call__(self, scope: Scope, receive: Receive, send: Send) -> Awai... FILE: lmdeploy/tokenizer.py class DetokenizeState (line 16) | class DetokenizeState: method as_tuple (line 34) | def as_tuple(self) -> Tuple: class HuggingFaceTokenizer (line 39) | class HuggingFaceTokenizer: method __init__ (line 46) | def __init__(self, model_dir: str): method _check_transformers_version (line 70) | def _check_transformers_version(self, model_dir: str): method get_vocab (line 94) | def get_vocab(self): method vocab_size (line 99) | def vocab_size(self): method vocab_size_with_added (line 104) | def vocab_size_with_added(self): method bos_token_id (line 112) | def bos_token_id(self): method eos_token_id (line 117) | def eos_token_id(self): method prefix_space_tokens (line 122) | def prefix_space_tokens(self): method _maybe_add_prefix_space (line 132) | def _maybe_add_prefix_space(self, tokens: List[int], decoded: str): method maybe_decode_bytes (line 141) | def maybe_decode_bytes(self): method indexes_containing_token (line 152) | def indexes_containing_token(self, token: str): method encode (line 192) | def encode(self, s: str, add_bos: bool = True, add_special_tokens: boo... method decode (line 211) | def decode(self, t: Sequence[int], offset: Optional[int] = None, skip_... method _convert_tokens_to_string_with_added_encoders (line 233) | def _convert_tokens_to_string_with_added_encoders( method detokenize_incrementally (line 267) | def detokenize_incrementally(self, method __call__ (line 338) | def __call__(self, s: Union[str, Sequence[str]]): class ChatGLM4Tokenizer (line 350) | class ChatGLM4Tokenizer(HuggingFaceTokenizer): method __init__ (line 353) | def __init__(self, model_path): method encode (line 365) | def encode(self, s: str, add_bos: bool = True, add_special_tokens: boo... class ChatGLMTokenizer (line 372) | class ChatGLMTokenizer(HuggingFaceTokenizer): method __init__ (line 375) | def __init__(self, model_path): class GptOssTokenizer (line 388) | class GptOssTokenizer(HuggingFaceTokenizer): method __init__ (line 391) | def __init__(self, model_dir: str): method detokenize_incrementally (line 398) | def detokenize_incrementally(self, class Tokenizer (line 417) | class Tokenizer: method __init__ (line 424) | def __init__(self, model_path: str): method vocab_size (line 445) | def vocab_size(self): method bos_token_id (line 450) | def bos_token_id(self): method eos_token_id (line 455) | def eos_token_id(self): method get_vocab (line 459) | def get_vocab(self): method encode (line 463) | def encode(self, s: str, add_bos: bool = True, add_special_tokens: boo... method decode (line 483) | def decode( method detokenize_incrementally (line 502) | def detokenize_incrementally(self, method __call__ (line 528) | def __call__(self, s: Union[str, Sequence[str]]): method indexes_containing_token (line 538) | def indexes_containing_token(self, token): FILE: lmdeploy/turbomind/__init__.py function bootstrap (line 4) | def bootstrap(): FILE: lmdeploy/turbomind/deploy/config.py function config_from_dict (line 16) | def config_from_dict(cls, env): function config_to_dict (line 31) | def config_to_dict(config): class ModelConfig (line 42) | class ModelConfig: method verify (line 109) | def verify(self): class RopeParam (line 118) | class RopeParam: class AttentionConfig (line 134) | class AttentionConfig: class LoraConfig (line 143) | class LoraConfig: class TurbomindModelConfig (line 153) | class TurbomindModelConfig: method update_from_engine_config (line 159) | def update_from_engine_config(self, config: TurbomindEngineConfig): method from_dict (line 210) | def from_dict(cls, config: dict = {}): method to_dict (line 218) | def to_dict(self): method session_len (line 225) | def session_len(self): method weight_type (line 229) | def weight_type(self): method group_size (line 233) | def group_size(self): method vocab_size (line 237) | def vocab_size(self): method __str__ (line 240) | def __str__(self): FILE: lmdeploy/turbomind/deploy/converter.py function get_input_model_registered_name (line 20) | def get_input_model_registered_name(model_path: str, model_format: str): function get_output_model_registered_name_and_config (line 34) | def get_output_model_registered_name_and_config(model_path: str, model_f... function get_tm_model (line 151) | def get_tm_model(model_path, FILE: lmdeploy/turbomind/deploy/loader.py class BaseLoader (line 24) | class BaseLoader(ABC): method __init__ (line 26) | def __init__(self, model_path: str, pattern, mappings: list): method get_index (line 32) | def get_index(self, index_name: str, file_pattern: str) -> Tuple[dict,... method map_key (line 48) | def map_key(self, key: str): method items (line 58) | def items(self) -> Iterator[Tuple[int, dict]]: class SafetensorsLoader (line 62) | class SafetensorsLoader(BaseLoader): method __init__ (line 64) | def __init__(self, model_path: str, pattern: str, mappings: list, inde... method items (line 82) | def items(self): class PytorchLoader (line 108) | class PytorchLoader(BaseLoader): method __init__ (line 110) | def __init__(self, model_path: str, pattern: str, mappings: list, inde... method items (line 118) | def items(self): class StateDictLoader (line 148) | class StateDictLoader: method __init__ (line 155) | def __init__(self, queue: Queue, pattern: str, mappings: list): method items (line 159) | def items(self): function create_loader (line 177) | def create_loader(model_path: Union[str, Queue], pattern: str, mappings:... FILE: lmdeploy/turbomind/deploy/module.py function permute_v2 (line 12) | def permute_v2(x: torch.Tensor, size_per_head: int = 128): function permute_v2_partial (line 25) | def permute_v2_partial(x: torch.Tensor, size_per_head: int, rotary_dim: ... function merge_qkv_v2 (line 51) | def merge_qkv_v2(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, tp: ... function merge_qkvg_v2 (line 68) | def merge_qkvg_v2(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, gat... function transpose (line 86) | def transpose(x): function pad_out_dims (line 90) | def pad_out_dims(x: torch.Tensor, dims: int): function pad_in_dims (line 96) | def pad_in_dims(x: torch.Tensor, dims: int): function get_lora_flags (line 107) | def get_lora_flags(kind: str): class Module (line 111) | class Module(ABC): method __init__ (line 113) | def __init__(self, model: BaseOutputModel): method __call__ (line 116) | def __call__(self, *args, **kwargs): method apply (line 120) | def apply(self, idx: int, r: BaseReader): class LayerNorm (line 124) | class LayerNorm(Module): method apply (line 126) | def apply(self, i: int, r: BaseReader): class Ffn (line 133) | class Ffn(Module): method __init__ (line 141) | def __init__(self, model: BaseOutputModel): method _export (line 149) | def _export(self, inter_size: int, fmt: str, idx: int, w123, kind: str... method apply (line 167) | def apply(self, i: int, r: BaseReader): class MoeFfn (line 176) | class MoeFfn(Ffn): method __init__ (line 188) | def __init__(self, model: BaseOutputModel): method apply (line 194) | def apply(self, i: int, r: BaseReader): class Attn (line 224) | class Attn(Module): method __init__ (line 232) | def __init__(self, model: BaseOutputModel): method _split_q_gate (line 245) | def _split_q_gate(self, q): method _reorder_and_merge (line 267) | def _reorder_and_merge(self, qkvo, gs: int): method _repeat_kv (line 295) | def _repeat_kv(self, qkvo, gs: int, kind: str): method _export (line 320) | def _export(self, idx: int, qkvo, kind: str, pack_fn, apply_gs=[], **k... method apply (line 346) | def apply(self, i: int, r: BaseReader): class MLA (line 366) | class MLA(Module): method __init__ (line 375) | def __init__(self, model: BaseOutputModel): method _export (line 378) | def _export(self, idx: int, xs, kind: str, pack_fn, **kwargs): method apply (line 473) | def apply(self, i: int, r: BaseReader): class LinearAttn (line 484) | class LinearAttn(Module): method __init__ (line 487) | def __init__(self, model: BaseOutputModel): method _tp_interleave_qkv (line 494) | def _tp_interleave_qkv(self, tensor, dim): method apply (line 522) | def apply(self, i: int, r: BaseReader): class Misc (line 576) | class Misc(Module): method apply (line 584) | def apply(self, i: int, r: BaseReader): class Transformer (line 611) | class Transformer: method __init__ (line 613) | def __init__(self, model: BaseOutputModel): method __call__ (line 629) | def __call__(self, i: int, r: BaseReader): FILE: lmdeploy/turbomind/deploy/parameter.py function identity (line 7) | def identity(x): function to_half (line 11) | def to_half(x: torch.Tensor): function to_float (line 15) | def to_float(x: torch.Tensor): function to_fp8 (line 19) | def to_fp8(x: torch.Tensor): function pack_u4_row (line 24) | def pack_u4_row(x: torch.Tensor) -> torch.Tensor: function generate_zero_point (line 33) | def generate_zero_point(g): class Parameter (line 43) | class Parameter: method take (line 47) | def take(cls, keys: list[str]): method __call__ (line 59) | def __call__(cls, f, g, i): class QuantWeightOnly (line 63) | class QuantWeightOnly(Parameter): method __call__ (line 66) | def __call__(self, f, g, i): class WeightScaleInv (line 72) | class WeightScaleInv(Parameter): method __call__ (line 76) | def __call__(self, f, g, i): class CompressedWeight (line 81) | class CompressedWeight(Parameter): method __init__ (line 84) | def __init__(self, xs): method __call__ (line 89) | def __call__(self, f, g, i): class Mxfp4Weight (line 98) | class Mxfp4Weight(Parameter): method __call__ (line 101) | def __call__(self, f, g, i): class Weight (line 106) | class Weight(Parameter): method __call__ (line 109) | def __call__(self, f, g, i): class Bias (line 113) | class Bias(Parameter): method __call__ (line 116) | def __call__(self, f, g, i): class PLora (line 120) | class PLora(Parameter): method __call__ (line 123) | def __call__(self, f, g, i): function get_params (line 128) | def get_params(keys: list[str], bias=0): FILE: lmdeploy/turbomind/deploy/policy.py function to_cuda (line 7) | def to_cuda(x: torch.Tensor, *args): function get_u4_slices (line 11) | def get_u4_slices(x: torch.Tensor, dtype: torch.dtype) -> List[torch.Ten... function unpack_awq_gemm (line 20) | def unpack_awq_gemm(x: torch.Tensor) -> torch.Tensor: function process_awq_gemm (line 27) | def process_awq_gemm(x: torch.Tensor, kind: str): function process_gptq (line 36) | def process_gptq(x: torch.Tensor, kind: str): function process_mxfp4 (line 49) | def process_mxfp4(x: torch.Tensor, kind: str): function process_fp8 (line 60) | def process_fp8(x: torch.Tensor, kind: str): function process_compressed_tensor (line 71) | def process_compressed_tensor(x: torch.Tensor, kind: str): function get_input_policy (line 82) | def get_input_policy(model_format): FILE: lmdeploy/turbomind/deploy/source_model/baichuan.py class BaichuanReader (line 9) | class BaichuanReader(LlamaReader): method _attn (line 12) | def _attn(self, i: int, kind: str): class BaichuanModel (line 25) | class BaichuanModel(LlamaModel): class Baichuan2Reader (line 31) | class Baichuan2Reader(BaichuanReader): method output_weight (line 34) | def output_weight(self): class Baichuan2Model (line 45) | class Baichuan2Model(LlamaModel): FILE: lmdeploy/turbomind/deploy/source_model/base.py class BaseReader (line 11) | class BaseReader(ABC): method __init__ (line 14) | def __init__(self): method transform (line 17) | def transform(self, x: Union[torch.Tensor, None], kind: str) -> Union[... method _transform (line 21) | def _transform(self, x: torch.Tensor, kind: str): class BaseInputModel (line 26) | class BaseInputModel(ABC): method __init__ (line 29) | def __init__(self, model_path: str, tokenizer_path: str, **kwargs): method model_info (line 40) | def model_info(self) -> Dict: method readers (line 45) | def readers(self) -> Iterator[BaseReader]: FILE: lmdeploy/turbomind/deploy/source_model/deepseek2.py class DeepSeek2Reader (line 10) | class DeepSeek2Reader(LlamaReader): method moe_ffn_gate (line 12) | def moe_ffn_gate(self, i, kind): method moe_ffn_expert (line 15) | def moe_ffn_expert(self, e=None, i=None, kind=None): method _ffn (line 26) | def _ffn(self, i: int, kind: str): method ffn (line 45) | def ffn(self, i: int, kind: str): method mla (line 48) | def mla(self, i: int, kind: str): method mla_norm (line 58) | def mla_norm(self, i: int): function get_yarn_params (line 66) | def get_yarn_params(rope_scaling: dict): class DeepSeek2Model (line 88) | class DeepSeek2Model(LlamaModel): method model_info (line 92) | def model_info(self): FILE: lmdeploy/turbomind/deploy/source_model/deepseek_vl.py class DeepSeekVLReader (line 10) | class DeepSeekVLReader(LlamaReader): method __init__ (line 19) | def __init__(self, new_params: dict, unused_params: dict, last_bin: bo... method attn_norm (line 23) | def attn_norm(self, i: int): method ffn_norm (line 27) | def ffn_norm(self, i: int): class DeepSeekVLModel (line 33) | class DeepSeekVLModel(LlamaModel): method model_info (line 38) | def model_info(self): FILE: lmdeploy/turbomind/deploy/source_model/glm4.py class Glm4Reader (line 12) | class Glm4Reader(LlamaReader): method _attn (line 22) | def _attn(self, i: int, kind: str): method attn_norm (line 39) | def attn_norm(self, i: int): method _ffn (line 43) | def _ffn(self, i: int, kind: str): method ffn_norm (line 52) | def ffn_norm(self, i: int): class Glm4Model (line 58) | class Glm4Model(LlamaModel): method __init__ (line 63) | def __init__(self, model_path: str, tokenizer_path: str, **kwargs): method model_info (line 69) | def model_info(self): FILE: lmdeploy/turbomind/deploy/source_model/glm4_moe_lite.py class Glm4MoeLiteReader (line 12) | class Glm4MoeLiteReader(DeepSeek2Reader): method moe_ffn_gate_correction_bias (line 25) | def moe_ffn_gate_correction_bias(self, i: int): class Glm4MoeLiteModel (line 31) | class Glm4MoeLiteModel(DeepSeek2Model): method model_info (line 39) | def model_info(self): FILE: lmdeploy/turbomind/deploy/source_model/gpt_oss.py function map_experts (line 9) | def map_experts(str): class GptOssReader (line 17) | class GptOssReader(LlamaReader): method moe_ffn_expert (line 21) | def moe_ffn_expert(self, e=None, i=None, kind=None): method moe_ffn_gate (line 38) | def moe_ffn_gate(self, i, kind): method attn_sinks (line 41) | def attn_sinks(self, i): class GptOssModel (line 46) | class GptOssModel(LlamaModel): method model_info (line 50) | def model_info(self): FILE: lmdeploy/turbomind/deploy/source_model/internlm2.py class InternLM2Reader (line 11) | class InternLM2Reader(LlamaReader): method filter (line 25) | def filter(self, pattern: str, i: int | None): method _attn (line 63) | def _attn(self, i: int, kind: str): method attn_norm (line 88) | def attn_norm(self, i: int): method _ffn (line 92) | def _ffn(self, i: int, kind: str): method ffn_norm (line 103) | def ffn_norm(self, i: int): class InternLM2Model (line 109) | class InternLM2Model(LlamaModel): FILE: lmdeploy/turbomind/deploy/source_model/internvl.py class InternVLReader (line 9) | class InternVLReader(LlamaReader): class InternVL2Reader (line 20) | class InternVL2Reader(InternLM2Reader): class InternVL3d5Reader (line 30) | class InternVL3d5Reader(Qwen3Reader): class InternVL3d5Qwen3MoEReader (line 38) | class InternVL3d5Qwen3MoEReader(Qwen3MoeReader): class InternVL3d5GptOSSReader (line 46) | class InternVL3d5GptOSSReader(GptOssReader): class InternS1Reader (line 54) | class InternS1Reader(Qwen3MoeReader): class InternS1MiniReader (line 64) | class InternS1MiniReader(Qwen3Reader): class InternVLModel (line 74) | class InternVLModel(LlamaModel): method __init__ (line 77) | def __init__(self, model_path: str, tokenizer_path: str, **kwargs): method model_info (line 100) | def model_info(self): FILE: lmdeploy/turbomind/deploy/source_model/llama.py class LlamaReader (line 14) | class LlamaReader(BaseReader): method __init__ (line 29) | def __init__(self, new_params: dict, unused_params: dict, last_bin: bo... method quant_weight_fp8 (line 44) | def quant_weight_fp8(self): method filter (line 65) | def filter(self, pattern: str, i: int | None): method tok_embeddings (line 72) | def tok_embeddings(self): method norm_weight (line 76) | def norm_weight(self): method output_weight (line 80) | def output_weight(self): method _transform (line 84) | def _transform(self, x: torch.Tensor, kind: str): method _attn (line 87) | def _attn(self, i: int, kind: str): method attn (line 96) | def attn(self, i: int, kind: str): method attn_norm (line 101) | def attn_norm(self, i: int): method _ffn (line 105) | def _ffn(self, i: int, kind: str): method ffn (line 116) | def ffn(self, i: int, kind: str): method ffn_norm (line 121) | def ffn_norm(self, i: int): class LlamaModel (line 127) | class LlamaModel(BaseInputModel): method __init__ (line 132) | def __init__(self, model_path: str, tokenizer_path: str, **kwargs: dict): method readers (line 146) | def readers(self): method model_info (line 154) | def model_info(self): FILE: lmdeploy/turbomind/deploy/source_model/llava.py class LlavaReader (line 10) | class LlavaReader(LlamaReader): method __init__ (line 19) | def __init__(self, new_params: dict, unused_params: dict, last_bin: bo... class LlavaModel (line 25) | class LlavaModel(LlamaModel): method __init__ (line 28) | def __init__(self, model_path: str, tokenizer_path: str, **kwargs): method model_info (line 38) | def model_info(self): FILE: lmdeploy/turbomind/deploy/source_model/minicpmv.py class MiniCPMVReader (line 10) | class MiniCPMVReader(LlamaReader): class MiniCPMVModel (line 21) | class MiniCPMVModel(LlamaModel): method model_info (line 25) | def model_info(self): FILE: lmdeploy/turbomind/deploy/source_model/mixtral.py class MixtralReader (line 7) | class MixtralReader(LlamaReader): method moe_ffn_expert (line 9) | def moe_ffn_expert(self, e=None, i=None, kind=None): method moe_ffn_gate (line 20) | def moe_ffn_gate(self, i, kind): class MixtralModel (line 25) | class MixtralModel(LlamaModel): method model_info (line 29) | def model_info(self): FILE: lmdeploy/turbomind/deploy/source_model/molmo.py class MolmoReader (line 12) | class MolmoReader(LlamaReader): method tok_embeddings (line 25) | def tok_embeddings(self): method attn_norm (line 34) | def attn_norm(self, i: int): method _attn (line 38) | def _attn(self, i: int, kind: str): method _ffn (line 62) | def _ffn(self, i: int, kind: str): method ffn_norm (line 71) | def ffn_norm(self, i: int): class MolmoModel (line 77) | class MolmoModel(LlamaModel): method __init__ (line 81) | def __init__(self, model_path: str, tokenizer_path: str, **kwargs): method model_info (line 87) | def model_info(self): FILE: lmdeploy/turbomind/deploy/source_model/qwen.py class QwenReader (line 14) | class QwenReader(LlamaReader): method _attn (line 25) | def _attn(self, i: int, kind: str): method attn_norm (line 38) | def attn_norm(self, i: int): method _ffn (line 42) | def _ffn(self, i: int, kind: str): method ffn_norm (line 51) | def ffn_norm(self, i: int): class QwenModel (line 57) | class QwenModel(LlamaModel): method model_info (line 62) | def model_info(self): class Qwen2Model (line 106) | class Qwen2Model(LlamaModel): method model_info (line 114) | def model_info(self): class Qwen2MoeReader (line 120) | class Qwen2MoeReader(LlamaReader): method moe_ffn_expert (line 122) | def moe_ffn_expert(self, e=None, i=None, kind=None): method moe_ffn_gate (line 133) | def moe_ffn_gate(self, i, kind): method _ffn (line 136) | def _ffn(self, i: int, kind: str): method ffn (line 147) | def ffn(self, i: int, kind: str): method moe_ffn_shared_gate (line 152) | def moe_ffn_shared_gate(self, i): class Qwen2MoeModel (line 157) | class Qwen2MoeModel(LlamaModel): method model_info (line 161) | def model_info(self): class Qwen3Reader (line 174) | class Qwen3Reader(LlamaReader): method qk_norm (line 176) | def qk_norm(self, i: int): class Qwen3Model (line 185) | class Qwen3Model(LlamaModel): method model_info (line 188) | def model_info(self): class Qwen3MoeReader (line 195) | class Qwen3MoeReader(Qwen2MoeReader): method qk_norm (line 197) | def qk_norm(self, i: int): class Qwen3MoeModel (line 206) | class Qwen3MoeModel(LlamaModel): method model_info (line 209) | def model_info(self): class Qwen3_5ReaderMixin (line 223) | class Qwen3_5ReaderMixin: method __init__ (line 237) | def __init__(self, *args, **kwargs): method attn_norm (line 248) | def attn_norm(self, i: int): method ffn_norm (line 254) | def ffn_norm(self, i: int): method norm_weight (line 260) | def norm_weight(self): method qk_norm (line 266) | def qk_norm(self, i: int): method _attn (line 272) | def _attn(self, i: int, kind: str): method _awq_dequant (line 299) | def _awq_dequant(self, prefix: str): method linear_attn (line 315) | def linear_attn(self, i: int, kind: str): method linear_norm (line 339) | def linear_norm(self, i: int, kind: str = 'weight'): class Qwen3_5Reader (line 346) | class Qwen3_5Reader(Qwen3_5ReaderMixin, Qwen3Reader): class Qwen3_5Model (line 351) | class Qwen3_5Model(Qwen3Model): method model_info (line 354) | def model_info(self): class Qwen3_5MoeReader (line 389) | class Qwen3_5MoeReader(Qwen3_5ReaderMixin, Qwen3MoeReader): method _unpacked_moe_expert (line 391) | def _unpacked_moe_expert(self, e: int, i: int, kind: str): method moe_ffn_expert (line 406) | def moe_ffn_expert(self, e=None, i=None, kind=None): class Qwen3_5MoeModel (line 417) | class Qwen3_5MoeModel(Qwen3MoeModel): method map_packed_qwen35_experts (line 421) | def map_packed_qwen35_experts(name: str): method readers (line 428) | def readers(self): method model_info (line 442) | def model_info(self): FILE: lmdeploy/turbomind/deploy/source_model/xcomposer2.py class Xcomposer2Reader (line 7) | class Xcomposer2Reader(InternLM2Reader): method _attn (line 14) | def _attn(self, i, kind): class Xcomposer2Model (line 23) | class Xcomposer2Model(InternLM2Model): method _lora_cfg_7b (line 28) | def _lora_cfg_7b(self): method _lora_cfg_4khd_7b (line 32) | def _lora_cfg_4khd_7b(self, model_info: dict): method model_info (line 45) | def model_info(self): FILE: lmdeploy/turbomind/deploy/target_model/base.py function tprint (line 18) | def tprint(*args, **kwargs): function _weight_dtype_map (line 28) | def _weight_dtype_map(weight_type: str, default=None): function _pad_inter_size (line 36) | def _pad_inter_size(inter_size: int, group_size: int, tp: int): class BaseOutputModel (line 44) | class BaseOutputModel(ABC): method __init__ (line 47) | def __init__(self, input_model: BaseInputModel, cfg: TurbomindModelCon... method single_to_list (line 90) | def single_to_list(self, config: dict, keys): method update_model_config (line 98) | def update_model_config(self): method update_attention_config (line 108) | def update_attention_config(self): method update_lora_config (line 114) | def update_lora_config(self): method export_config (line 120) | def export_config(self) -> None: method export_weight (line 127) | def export_weight(self, param: torch.Tensor, name: str) -> None: method save_split (line 184) | def save_split(self, tensor: torch.Tensor, name: str, split_dim=None, ... method export (line 218) | def export(self) -> None: method export_iter (line 229) | def export_iter(self): method tm_config (line 236) | def tm_config(self): FILE: lmdeploy/turbomind/deploy/target_model/fp.py class TurbomindModel (line 7) | class TurbomindModel(BaseOutputModel): FILE: lmdeploy/turbomind/supported_models.py function is_supported (line 67) | def is_supported(model_path: str): FILE: lmdeploy/turbomind/tokenizer_info.py class VocabType (line 27) | class VocabType(Enum): class TokenizerInfo (line 59) | class TokenizerInfo(_xgr.TokenizerInfo): method __init__ (line 72) | def __init__( method _is_tiktoken_tokenizer (line 107) | def _is_tiktoken_tokenizer(tokenizer: PreTrainedTokenizerBase) -> bool: method _is_sentencepiece_tokenizer (line 120) | def _is_sentencepiece_tokenizer(tokenizer: PreTrainedTokenizerBase) ->... method from_huggingface (line 134) | def from_huggingface( FILE: lmdeploy/turbomind/turbomind.py function _construct_stop_or_bad_words (line 44) | def _construct_stop_or_bad_words(words: List[int] = None): function _np_dict_to_tm_dict (line 52) | def _np_dict_to_tm_dict(np_dict: dict): function _tm_dict_to_torch_dict (line 61) | def _tm_dict_to_torch_dict(tm_dict: _tm.TensorMap): function complete_parallel_config (line 72) | def complete_parallel_config(cfg: TurbomindEngineConfig): function update_parallel_config (line 87) | def update_parallel_config(cfg: TurbomindEngineConfig): class TurboMind (line 115) | class TurboMind: method __init__ (line 130) | def __init__(self, method _check_unloaded_tm_params (line 177) | def _check_unloaded_tm_params(self): method _load_weights (line 184) | def _load_weights(self): method _process_weights (line 193) | def _process_weights(self): method _create_engine (line 199) | def _create_engine(self): method _create_weight (line 206) | def _create_weight(self, model_comm): method _get_model_params (line 220) | def _get_model_params(self): method _postprocess_config (line 248) | def _postprocess_config(self, tm_config: TurbomindModelConfig, engine_... method _from_hf (line 267) | def _from_hf(self, model_path: str, engine_config: TurbomindEngineConf... method sleep (line 288) | def sleep(self, level: int = 1): method wakeup (line 294) | def wakeup(self, tags: Optional[list[str]] = None): method update_params (line 302) | def update_params(self, request: UpdateParamsRequest): method from_pretrained (line 345) | def from_pretrained(cls, method close (line 376) | def close(self): method create_instance (line 388) | def create_instance(self, cuda_stream_id=0): method get_schedule_metrics (line 398) | def get_schedule_metrics(self): function _get_logits (line 408) | def _get_logits(outputs, offset: int): function _get_last_hidden_state (line 417) | def _get_last_hidden_state(outputs, offset: int): function _get_logprobs_impl (line 426) | def _get_logprobs_impl(logprob_vals: torch.Tensor, logprob_idxs: torch.T... function _get_logprobs (line 460) | def _get_logprobs(outputs, output_logprobs: int): function _get_metrics (line 475) | def _get_metrics(metrics): class StreamingSemaphore (line 497) | class StreamingSemaphore: method __init__ (line 499) | def __init__(self): method acquire (line 504) | async def acquire(self): method release (line 513) | def release(self): class TurboMindInstance (line 520) | class TurboMindInstance: method __init__ (line 528) | def __init__(self, tm_model: TurboMind, config: TurbomindModelConfig, ... method model_inst (line 556) | def model_inst(self): method _create_model_instance (line 561) | def _create_model_instance(self): method _get_extra_output_processors (line 565) | def _get_extra_output_processors(self, outputs: Dict[str, torch.Tensor... method prepare_embeddings (line 584) | def prepare_embeddings(self, input_embeddings=None, input_embedding_ra... method prepare_mrope (line 608) | def prepare_mrope(self, input_meta: Dict[str, Any], input_len: int): method prepare_inputs (line 615) | def prepare_inputs(self, method async_cancel (line 642) | async def async_cancel(self, session_id: int = None): method async_end_cb (line 645) | def async_end_cb(self, fut: asyncio.Future, status: int): method async_end (line 650) | async def async_end(self, session_id): method async_signal_cb (line 655) | def async_signal_cb(self, s: StreamingSemaphore): method async_stream_infer (line 659) | async def async_stream_infer(self, method _get_error_output (line 799) | def _get_error_output(self, status): method _get_generation_config (line 802) | def _get_generation_config(self, cfg: GenerationConfig): FILE: lmdeploy/utils.py class _ASNI_COLOR (line 18) | class _ASNI_COLOR: function can_colorize (line 28) | def can_colorize(*, no_color: bool | None = None, force_color: bool | No... class ColorFormatter (line 56) | class ColorFormatter(logging.Formatter): method format (line 67) | def format(self, record: LogRecord): class FilterDuplicateWarning (line 79) | class FilterDuplicateWarning(logging.Filter): method __init__ (line 86) | def __init__(self, name: str = 'lmdeploy'): method filter (line 90) | def filter(self, record: LogRecord) -> bool: function get_logger (line 112) | def get_logger(name: str | None = None, function filter_suffix (line 180) | def filter_suffix(response: str, suffixes: list[str] | None = None) -> str: function _stop_words (line 199) | def _stop_words(stop_words: list[int | str], tokenizer: object): function get_hf_gen_cfg (line 222) | def get_hf_gen_cfg(path: str): function get_model (line 231) | def get_model(pretrained_model_name_or_path: str, download_dir: str = No... function logging_timer (line 253) | def logging_timer(op_name: str, logger: Logger, level: int = logging.DEB... function _get_and_verify_max_len (line 297) | def _get_and_verify_max_len( function get_max_batch_size (line 366) | def get_max_batch_size(device_type: str): function is_bf16_supported (line 392) | def is_bf16_supported(device_type: str = 'cuda'): function try_import_deeplink (line 433) | def try_import_deeplink(device_type: str): function serialize_state_dict (line 449) | def serialize_state_dict(state_dict: dict) -> str: function is_dlblas_installed (line 481) | def is_dlblas_installed(): class FlattenedTensorMetadata (line 494) | class FlattenedTensorMetadata: class FlattenedTensorBucket (line 504) | class FlattenedTensorBucket: method __init__ (line 507) | def __init__( method get_flattened_tensor (line 550) | def get_flattened_tensor(self) -> torch.Tensor: method get_metadata (line 554) | def get_metadata(self) -> list[FlattenedTensorMetadata]: method reconstruct_tensors (line 558) | def reconstruct_tensors(self) -> list[tuple[str, torch.Tensor]]: FILE: lmdeploy/version.py function parse_version_info (line 8) | def parse_version_info(version_str: str) -> Tuple: FILE: lmdeploy/vl/constants.py class Modality (line 7) | class Modality(str, Enum): FILE: lmdeploy/vl/engine.py function _raise_exception_on_finish (line 17) | def _raise_exception_on_finish(task: asyncio.Task) -> None: function _accepts_arg (line 27) | def _accepts_arg(func, arg_name: str) -> bool: class ImageEncoder (line 32) | class ImageEncoder: method __init__ (line 35) | def __init__( method preprocess (line 50) | async def preprocess(self, method async_infer (line 63) | async def async_infer(self, messages: List[Dict]) -> List[Dict]: method wrap_for_pytorch (line 76) | async def wrap_for_pytorch( method wrap_for_turbomind (line 117) | async def wrap_for_turbomind( FILE: lmdeploy/vl/media/base.py class MediaIO (line 11) | class MediaIO(ABC, Generic[_T]): method load_bytes (line 14) | def load_bytes(self, data: bytes) -> _T: method load_base64 (line 18) | def load_base64(self, media_type: str, data: str) -> _T: method load_file (line 22) | def load_file(self, filepath: Path) -> _T: FILE: lmdeploy/vl/media/connection.py function _load_http_url (line 23) | def _load_http_url(url_spec: ParseResult, media_io: MediaIO[_M]) -> _M: function _load_data_url (line 40) | def _load_data_url(url_spec: ParseResult, media_io: MediaIO[_M]) -> _M: function _load_file_url (line 54) | def _load_file_url(url_spec: ParseResult, media_io: MediaIO[_M]) -> _M: function load_from_url (line 61) | def load_from_url(url: str, media_io: MediaIO[_M]) -> _M: FILE: lmdeploy/vl/media/image.py class ImageMediaIO (line 15) | class ImageMediaIO(MediaIO[Image.Image]): method __init__ (line 17) | def __init__(self, image_mode: str = 'RGB', **kwargs) -> None: method load_bytes (line 24) | def load_bytes(self, data: bytes) -> Image.Image: method load_base64 (line 28) | def load_base64(self, media_type: str, data: str) -> Image.Image: method load_file (line 31) | def load_file(self, file_path: Path) -> Image.Image: method encode_base64 (line 37) | def encode_base64(self, image: Image.Image, image_format: str = 'PNG')... FILE: lmdeploy/vl/media/time_series.py class TimeSeriesMediaIO (line 16) | class TimeSeriesMediaIO(MediaIO[npt.NDArray]): method __init__ (line 18) | def __init__(self, **kwargs): method load_bytes (line 24) | def load_bytes(self, data: bytes) -> npt.NDArray: method load_base64 (line 28) | def load_base64(self, media_type: str, data: str) -> npt.NDArray: method load_file (line 31) | def load_file(self, filepath: Path) -> npt.NDArray: method encode_base64 (line 56) | def encode_base64(self, data: npt.NDArray) -> str: FILE: lmdeploy/vl/media/video.py class VideoMediaIO (line 23) | class VideoMediaIO(MediaIO[tuple[npt.NDArray, dict[str, Any]]]): method __init__ (line 25) | def __init__( method _get_video_loader_backend (line 40) | def _get_video_loader_backend(self) -> VideoLoader: method load_bytes (line 63) | def load_bytes(self, data: bytes) -> tuple[npt.NDArray, dict[str, Any]]: method load_base64 (line 66) | def load_base64(self, media_type: str, data: str) -> tuple[npt.NDArray... method load_file (line 97) | def load_file(self, filepath: Path) -> tuple[npt.NDArray, dict[str, An... method encode_base64 (line 100) | def encode_base64( FILE: lmdeploy/vl/media/video_loader.py class VideoLoader (line 21) | class VideoLoader: method load_bytes (line 25) | def load_bytes(self, data: bytes, num_frames: int = -1, **kwargs) -> t... method smart_nframes (line 29) | def smart_nframes(self, total_frames_num: int, num_frames: int, fps: i... class OpenCVVideoLoader (line 47) | class OpenCVVideoLoader(VideoLoader): method get_cv2_video_api (line 49) | def get_cv2_video_api(self): method _read_frames (line 65) | def _read_frames( method load_file (line 117) | def load_file( method load_bytes (line 130) | def load_bytes( class DecordVideoLoader (line 183) | class DecordVideoLoader(VideoLoader): method load_file (line 186) | def load_file(self, method load_bytes (line 211) | def load_bytes(self, class TorchCodecVideoLoader (line 234) | class TorchCodecVideoLoader(VideoLoader): method load_file (line 237) | def load_file(self, method load_bytes (line 266) | def load_bytes(self, class TorchVisionVideoLoader (line 289) | class TorchVisionVideoLoader(VideoLoader): method load_file (line 292) | def load_file(self, method load_bytes (line 322) | def load_bytes(self, FILE: lmdeploy/vl/model/base.py class VisionModel (line 15) | class VisionModel(ABC): method __init__ (line 19) | def __init__(self, method get_pad_token_id (line 35) | def get_pad_token_id(self, model_path, hf_config): method build_preprocessor (line 48) | def build_preprocessor(self, ): method build_model (line 56) | def build_model(self, ): method preprocess (line 65) | def preprocess(self, messages: List[Dict]) -> List[Dict]: method has_input_ids (line 108) | def has_input_ids(self, messages: List[Dict]) -> bool: method forward (line 120) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li... method to_pytorch (line 135) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star... method to_turbomind (line 151) | def to_turbomind(self, messages, chat_template, tokenizer, sequence_st... method collect_multimodal_items (line 168) | def collect_multimodal_items(messages): method IMAGE_TOKEN_included (line 198) | def IMAGE_TOKEN_included(messages): method to_pytorch_with_input_ids (line 218) | def to_pytorch_with_input_ids(self, messages): method to_pytorch_aux (line 255) | def to_pytorch_aux(self, messages, prompt, IMAGE_TOKEN, tokenizer, seq... method to_turbomind_aux (line 290) | def to_turbomind_aux(self, messages, prompt, IMAGE_TOKEN, tokenizer, s... method match (line 327) | def match(cls, config: AutoConfig): FILE: lmdeploy/vl/model/builder.py function load_vl_model (line 40) | def load_vl_model(model_path: str, FILE: lmdeploy/vl/model/cogvlm.py class CogVLMVisionModel (line 11) | class CogVLMVisionModel(VisionModel): method build_preprocessor (line 16) | def build_preprocessor(self): method build_model (line 33) | def build_model(self): method preprocess (line 42) | def preprocess(self, messages: List[Dict]) -> List[Dict]: method proc_messages (line 58) | def proc_messages(messages, chat_template, sequence_start): method to_pytorch (line 88) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star... FILE: lmdeploy/vl/model/deepseek.py function check_deepseek_vl_install (line 15) | def check_deepseek_vl_install(): class DeepSeekVisionModel (line 26) | class DeepSeekVisionModel(VisionModel): method build_preprocessor (line 31) | def build_preprocessor(self): method build_model (line 39) | def build_model(self): method preprocess (line 89) | def preprocess(self, messages: List[Dict]) -> List[Dict]: method forward (line 108) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li... method proc_messages (line 135) | def proc_messages(messages, chat_template, sequence_start): method to_pytorch (line 167) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star... method to_turbomind (line 171) | def to_turbomind(self, messages, chat_template, tokenizer, sequence_st... FILE: lmdeploy/vl/model/deepseek_vl2.py function check_deepseek_vl2_install (line 15) | def check_deepseek_vl2_install(): function check_trans_version (line 25) | def check_trans_version(): class DeepSeek2VisionModel (line 39) | class DeepSeek2VisionModel(VisionModel): method match (line 45) | def match(cls, config: AutoConfig): method build_preprocessor (line 52) | def build_preprocessor(self): method build_model (line 64) | def build_model(self): method preprocess (line 70) | def preprocess(self, messages: List[Dict]) -> List[Dict]: method forward (line 106) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li... method proc_single_message (line 121) | def proc_single_message(message): method proc_messages (line 150) | def proc_messages(messages, chat_template, sequence_start): method to_pytorch (line 162) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star... method to_turbomind (line 166) | def to_turbomind(self, messages, chat_template, tokenizer, sequence_st... FILE: lmdeploy/vl/model/gemma3_vl.py class Gemma3ImagesKwargs (line 14) | class Gemma3ImagesKwargs(ImagesKwargs): class Gemma3ProcessorKwargs (line 22) | class Gemma3ProcessorKwargs(ProcessingKwargs, total=False): class Gemma3VisionModel (line 38) | class Gemma3VisionModel(VisionModel): method __init__ (line 43) | def __init__(self, method build_preprocessor (line 51) | def build_preprocessor(self): method build_model (line 58) | def build_model(self): method preprocess (line 64) | def preprocess(self, messages: List[Dict]) -> List[Dict]: method forward (line 94) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li... method proc_messages (line 109) | def proc_messages(messages, chat_template, sequence_start): method to_pytorch (line 126) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star... method to_turbomind (line 130) | def to_turbomind(self, messages, chat_template, tokenizer, sequence_st... FILE: lmdeploy/vl/model/glm4_1v.py class GLM4_1_VisionModel (line 13) | class GLM4_1_VisionModel(VisionModel): method match (line 19) | def match(cls, config: AutoConfig): method build_preprocessor (line 26) | def build_preprocessor(self): method build_model (line 33) | def build_model(self): method preprocess (line 36) | def preprocess(self, messages: List[Dict]) -> List[Dict]: method proc_messages (line 55) | def proc_messages(messages, chat_template, sequence_start): method to_pytorch (line 77) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star... FILE: lmdeploy/vl/model/glm4_v.py class GLM4VisionModel (line 13) | class GLM4VisionModel(VisionModel): method match (line 19) | def match(cls, config: AutoConfig): method build_preprocessor (line 26) | def build_preprocessor(self): method build_model (line 38) | def build_model(self): method preprocess (line 47) | def preprocess(self, messages: List[Dict]) -> List[Dict]: method proc_messages (line 71) | def proc_messages(messages, chat_template, sequence_start): method to_pytorch (line 89) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star... FILE: lmdeploy/vl/model/interns1_pro.py function check_transformers (line 15) | def check_transformers(): class InternS1ProVisionModel (line 24) | class InternS1ProVisionModel(VisionModel): method build_preprocessor (line 32) | def build_preprocessor(self): method get_processor_args (line 52) | def get_processor_args(self, mm_processor_kwargs: Optional[Dict[str, A... method check_time_series_input (line 88) | def check_time_series_input(self, messages): method _preprocess_image (line 94) | def _preprocess_image(self, method _preprocess_video (line 113) | def _preprocess_video(self, method _preprocess_time_series (line 144) | def _preprocess_time_series(self, method preprocess (line 185) | def preprocess(self, messages: List[Dict], mm_processor_kwargs: Dict[s... method proc_messages (line 209) | def proc_messages(self, method to_pytorch_aux_video (line 242) | def to_pytorch_aux_video(self, messages, prompt, VIDEO_TOKEN, tokenize... method to_pytorch_aux_ts (line 286) | def to_pytorch_aux_ts(self, messages, prompt, TS_TOKEN, tokenizer, seq... method to_pytorch (line 320) | def to_pytorch(self, method build_model (line 342) | def build_model(self): method forward (line 347) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li... method to_turbomind (line 351) | def to_turbomind(self, FILE: lmdeploy/vl/model/internvl.py function find_closest_aspect_ratio (line 14) | def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height... function dynamic_preprocess (line 31) | def dynamic_preprocess(image, min_num=1, max_num=6, image_size=448, use_... class InternVLVisionModel (line 67) | class InternVLVisionModel(VisionModel): method __init__ (line 72) | def __init__(self, method build_preprocessor (line 83) | def build_preprocessor(self): method build_model (line 117) | def build_model(self): method _preprocess_v1_5 (line 143) | def _preprocess_v1_5(self, image, params=None): method _forward_v1_5 (line 159) | def _forward_v1_5(self, inputs, max_batch_size): method _preprocess (line 174) | def _preprocess(self, image, params=None): method _forward (line 179) | def _forward(self, inputs, max_batch_size): method preprocess (line 193) | def preprocess(self, messages: List[Dict]) -> List[Dict]: method forward (line 210) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li... method proc_messages (line 227) | def proc_messages( method to_pytorch (line 270) | def to_pytorch(self, method to_turbomind (line 285) | def to_turbomind(self, FILE: lmdeploy/vl/model/internvl3_hf.py class InternVLImagesKwargs (line 15) | class InternVLImagesKwargs(ImagesKwargs, total=False): class InternVLProcessorKwargs (line 21) | class InternVLProcessorKwargs(ProcessingKwargs, total=False): class InternVL3VisionModel (line 35) | class InternVL3VisionModel(InternVLVisionModel): method __init__ (line 40) | def __init__(self, method build_preprocessor (line 49) | def build_preprocessor(self): method build_model (line 57) | def build_model(self): method preprocess (line 86) | def preprocess(self, messages: List[Dict]) -> List[Dict]: method forward (line 119) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li... FILE: lmdeploy/vl/model/internvl_llava.py function check_llava_install (line 19) | def check_llava_install(): function _intern_vision_model__from_pretrained (line 28) | def _intern_vision_model__from_pretrained(vision_tower_name: str): function _intern_vl_model__from_pretrained (line 37) | def _intern_vl_model__from_pretrained(vision_tower_name: str): function init_empty_vit (line 49) | def init_empty_vit(): class InternVLLlavaVisionModel (line 61) | class InternVLLlavaVisionModel(LlavaVisionModel): method match (line 65) | def match(cls, config: AutoConfig): method build_preprocessor (line 74) | def build_preprocessor(self): method build_model (line 77) | def build_model(self): method preprocess (line 128) | def preprocess(self, messages: List[Dict]) -> List[Dict]: method forward (line 133) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li... FILE: lmdeploy/vl/model/llama4.py function check_trans_version (line 13) | def check_trans_version(): class LLama4VisionModel (line 27) | class LLama4VisionModel(VisionModel): method match (line 33) | def match(cls, config: AutoConfig): method build_preprocessor (line 38) | def build_preprocessor(self): method build_model (line 55) | def build_model(self): method preprocess (line 61) | def preprocess(self, messages: List[Dict]) -> List[Dict]: method forward (line 87) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li... method proc_messages (line 102) | def proc_messages(messages, chat_template, sequence_start): method to_pytorch_aux (line 121) | def to_pytorch_aux(self, messages, prompt, IMAGE_TOKEN, tokenizer, seq... method to_pytorch (line 156) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star... method to_turbomind (line 160) | def to_turbomind(self, messages, chat_template, tokenizer, sequence_st... FILE: lmdeploy/vl/model/llava.py function check_llava_install (line 21) | def check_llava_install(): function _clip_vision_tower_load_model (line 31) | def _clip_vision_tower_load_model(self, **kwargs): function init_llava_vision_tower (line 42) | def init_llava_vision_tower(config): function select_best_resolution (line 55) | def select_best_resolution(original_size, possible_resolutions): function resize_and_pad_image (line 86) | def resize_and_pad_image(image, target_resolution): function divide_to_patches (line 121) | def divide_to_patches(image, patch_size): function process_anyres_image (line 142) | def process_anyres_image(image, processor, grid_pinpoints): function expand2square (line 171) | def expand2square(pil_img, background_color): function process_images (line 185) | def process_images(images, image_processor, model_cfg): class LlavaVisionModel (line 205) | class LlavaVisionModel(LlavaHfVisionModel): method match (line 209) | def match(cls, config: AutoConfig): method build_preprocessor (line 224) | def build_preprocessor(self): method build_model (line 234) | def build_model(self): method encode_images (line 292) | def encode_images(self, images: torch.Tensor) -> torch.Tensor: method preprocess (line 298) | def preprocess(self, messages: List[Dict]) -> List[Dict]: method forward (line 314) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li... FILE: lmdeploy/vl/model/llava_hf.py class LlavaHfVisionModel (line 16) | class LlavaHfVisionModel(VisionModel): method build_preprocessor (line 21) | def build_preprocessor(self): method build_model (line 33) | def build_model(self): method preprocess (line 58) | def preprocess(self, messages: List[Dict]) -> List[Dict]: method forward (line 74) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li... method proc_messages (line 109) | def proc_messages(messages, chat_template, sequence_start): method to_pytorch (line 126) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star... method to_turbomind (line 130) | def to_turbomind(self, messages, chat_template, tokenizer, sequence_st... FILE: lmdeploy/vl/model/llava_next.py class LlavaNextVisionModel (line 16) | class LlavaNextVisionModel(LlavaHfVisionModel): method build_preprocessor (line 21) | def build_preprocessor(self): method build_model (line 34) | def build_model(self): method preprocess (line 66) | def preprocess(self, messages: List[Dict]) -> List[Dict]: method forward (line 102) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li... FILE: lmdeploy/vl/model/minicpmv.py class MiniCPMVModel (line 18) | class MiniCPMVModel(VisionModel): method __init__ (line 23) | def __init__(self, method build_preprocessor (line 38) | def build_preprocessor(self): method build_model (line 44) | def build_model(self): method _get_slice_image (line 72) | def _get_slice_image(self, image: Image): method _reshape_by_patch (line 82) | def _reshape_by_patch(self, slice_images): method _preprocess_v2_5 (line 97) | def _preprocess_v2_5(self, image: Image, params: Dict = None) -> Dict: method _preprocess_v2_6 (line 111) | def _preprocess_v2_6(self, image: Image, params: Dict = None) -> Dict: method preprocess (line 133) | def preprocess(self, messages: List[Dict]) -> List[Dict]: method forward (line 149) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li... method proc_messages (line 202) | def proc_messages(self, messages, chat_template, sequence_start): method to_pytorch (line 237) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star... method to_turbomind (line 241) | def to_turbomind(self, messages, chat_template, tokenizer, sequence_st... FILE: lmdeploy/vl/model/mllama.py function check_transformers (line 8) | def check_transformers(): class MllamaVLModel (line 17) | class MllamaVLModel(VisionModel): method build_preprocessor (line 22) | def build_preprocessor(self): method preprocess (line 27) | def preprocess(self, messages: List[Dict]) -> List[Dict]: method build_model (line 39) | def build_model(self): method proc_messages (line 49) | def proc_messages(messages, chat_template, sequence_start): method to_pytorch (line 66) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star... FILE: lmdeploy/vl/model/molmo.py class MolmoVisionModel (line 16) | class MolmoVisionModel(VisionModel): method build_preprocessor (line 21) | def build_preprocessor(self): method build_model (line 27) | def build_model(self): method preprocess (line 53) | def preprocess(self, messages: List[Dict]) -> List[Dict]: method forward (line 78) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li... method proc_messages (line 129) | def proc_messages(messages): method to_pytorch (line 148) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star... method to_turbomind (line 151) | def to_turbomind(self, messages, chat_template, tokenizer, sequence_st... FILE: lmdeploy/vl/model/phi3_vision.py class Phi3VisionModel (line 11) | class Phi3VisionModel(LlavaHfVisionModel): method build_preprocessor (line 16) | def build_preprocessor(self): method build_model (line 23) | def build_model(self): method preprocess (line 32) | def preprocess(self, messages: List[Dict]) -> List[Dict]: FILE: lmdeploy/vl/model/qwen.py class QwenVisionModel (line 16) | class QwenVisionModel(VisionModel): method build_preprocessor (line 21) | def build_preprocessor(self): method build_model (line 33) | def build_model(self): method preprocess (line 72) | def preprocess(self, messages: List[Dict]) -> List[Dict]: method forward (line 88) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li... method proc_messages (line 113) | def proc_messages(messages, chat_template, sequence_start): method to_pytorch (line 134) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star... method to_turbomind (line 138) | def to_turbomind(self, messages, chat_template, tokenizer, sequence_st... FILE: lmdeploy/vl/model/qwen2.py function check_qwen_vl_deps_install (line 8) | def check_qwen_vl_deps_install(): class Qwen2VLModel (line 23) | class Qwen2VLModel(VisionModel): method build_preprocessor (line 28) | def build_preprocessor(self): method preprocess (line 36) | def preprocess(self, messages: list[dict]) -> list[dict]: method build_model (line 57) | def build_model(self): method forward (line 94) | def forward(self, messages: list[dict], max_batch_size: int = 1) -> li... method proc_messages (line 126) | def proc_messages(self, messages, chat_template, sequence_start, chat_... method get_mrope_info (line 163) | def get_mrope_info(seq_len: int, method to_pytorch (line 187) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star... method to_turbomind (line 192) | def to_turbomind(self, messages, chat_template, tokenizer, sequence_st... FILE: lmdeploy/vl/model/qwen3.py function check_transformers (line 14) | def check_transformers(): class Qwen3VLModel (line 23) | class Qwen3VLModel(VisionModel): method build_preprocessor (line 28) | def build_preprocessor(self): method get_processor_args (line 44) | def get_processor_args(self, mm_processor_kwargs: Dict[str, Any] | Non... method _preprocess_image (line 80) | def _preprocess_image(self, method _preprocess_video (line 99) | def _preprocess_video(self, method preprocess (line 130) | def preprocess(self, messages: List[Dict], mm_processor_kwargs: Dict[s... method proc_messages (line 150) | def proc_messages(self, messages, chat_template, sequence_start, chat_... method to_pytorch_aux_video (line 172) | def to_pytorch_aux_video(self, messages, prompt, VIDEO_TOKEN, tokenize... method to_pytorch (line 216) | def to_pytorch(self, method build_model (line 231) | def build_model(self): method forward (line 236) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li... method to_turbomind (line 240) | def to_turbomind(self, FILE: lmdeploy/vl/model/qwen3_5.py function check_transformers (line 12) | def check_transformers(): class Qwen3_5Model (line 21) | class Qwen3_5Model(Qwen3VLModel): method build_preprocessor (line 26) | def build_preprocessor(self): FILE: lmdeploy/vl/model/utils.py function disable_transformers_logging (line 11) | def disable_transformers_logging(): function disable_logging (line 21) | def disable_logging(): function _set_func (line 29) | def _set_func(origin_func_path: str | None, rewrite_func: Callable, orig... function rewrite_ctx (line 75) | def rewrite_ctx(origin_func_path: list[str | Callable], rewrite_func: li... function add_device_hook (line 93) | def add_device_hook(module: torch.nn.Module, device: torch.device, fn: C... FILE: lmdeploy/vl/model/xcomposer2.py function check_xcomposer_install (line 21) | def check_xcomposer_install(): class ModelType (line 31) | class ModelType(enum.Enum): function get_xcomposer_type (line 38) | def get_xcomposer_type(model_path: str) -> Tuple[ModelType, Any]: function _CLIPVisionModel_from_pretrained (line 54) | def _CLIPVisionModel_from_pretrained(vision_tower_name): function init_empty_vit (line 62) | def init_empty_vit(model_path): class Xcomposer2VisionModel (line 87) | class Xcomposer2VisionModel(VisionModel): method __init__ (line 90) | def __init__(self, method match (line 103) | def match(cls, config: AutoConfig): method build_preprocessor (line 114) | def build_preprocessor(self): method build_model (line 136) | def build_model(self): method _preprocess_2d5 (line 183) | def _preprocess_2d5(self, image: Image, params: Dict) -> Dict: method _preprocess_7b (line 193) | def _preprocess_7b(self, image: Image, params: Dict) -> Dict: method _preprocess_4khd_7b (line 198) | def _preprocess_4khd_7b(self, image: Image, params: Dict) -> Dict: method preprocess (line 207) | def preprocess(self, messages: List[Dict]) -> List[Dict]: method forward (line 223) | def forward(self, messages: List[Dict], max_batch_size: int = 1) -> Li... method proc_messages (line 257) | def proc_messages(messages, chat_template, sequence_start, model_type): method to_pytorch (line 284) | def to_pytorch(self, messages, chat_template, tokenizer, sequence_star... method to_turbomind (line 288) | def to_turbomind(self, messages, chat_template, tokenizer, sequence_st... FILE: lmdeploy/vl/model/yi.py function _build_vision_projector (line 19) | def _build_vision_projector(config, delay_load=False, **kwargs): function _build_vision_tower (line 57) | def _build_vision_tower(vision_tower_cfg, **kwargs): function init_yi_model (line 74) | def init_yi_model(): class YiVisionModel (line 85) | class YiVisionModel(LlavaVisionModel): method match (line 89) | def match(cls, config: AutoConfig): method build_preprocessor (line 98) | def build_preprocessor(self): method build_model (line 109) | def build_model(self): method preprocess (line 120) | def preprocess(self, messages: List[Dict]) -> List[Dict]: FILE: lmdeploy/vl/tools/merge_xcomposer2d5_task.py function main (line 11) | def main(src_path: str, dst_path: str, task: str): FILE: lmdeploy/vl/utils.py function load_image (line 13) | def load_image(image_url: str, **kwargs) -> Image.Image: function load_video (line 19) | def load_video(video_url: str, **kwargs) -> Tuple[npt.NDArray, Dict[str,... function load_time_series (line 26) | def load_time_series(ts_url: str, **kwargs) -> npt.NDArray: function encode_image_base64 (line 32) | def encode_image_base64(image: str | Image.Image, format: str = 'PNG', *... function encode_video_base64 (line 40) | def encode_video_base64(video: str | npt.NDArray, format: str = 'JPEG', ... function encode_time_series_base64 (line 49) | def encode_time_series_base64(data: str | npt.NDArray, **kwargs) -> str: FILE: setup.py function get_target_device (line 13) | def get_target_device(): function readme (line 17) | def readme(): function get_version (line 23) | def get_version(): function get_turbomind_deps (line 35) | def get_turbomind_deps(): function parse_requirements (line 58) | def parse_requirements(fname='requirements.txt', with_version=True): FILE: src/turbomind/comm/barrier.h function class (line 13) | class Barrier { function namespace (line 47) | namespace turbomind::comm { FILE: src/turbomind/comm/cuda_ipc/bootstrap.h function namespace (line 12) | namespace turbomind::comm { function getNranks (line 53) | int getNranks() function getNranksPerNode (line 58) | int getNranksPerNode() function send (line 63) | void send(void* data, int size, int peer, int tag) FILE: src/turbomind/comm/cuda_ipc/common.h function namespace (line 5) | namespace turbomind::comm { FILE: src/turbomind/comm/cuda_ipc/cuda_ipc_comm.h function namespace (line 17) | namespace turbomind::comm { function multicast_capability_ (line 211) | int multicast_capability_{false}; FILE: src/turbomind/comm/cuda_ipc/group_sum.h function namespace (line 7) | namespace turbomind::comm { FILE: src/turbomind/comm/cuda_ipc/mscclpp.h function namespace (line 9) | namespace mscclpp { FILE: src/turbomind/comm/cuda_ipc/semaphore.h function namespace (line 8) | namespace turbomind::comm { function SystemSemaphoreInfo (line 57) | SystemSemaphoreInfo* handle() FILE: src/turbomind/comm/device_comm.cc type turbomind::comm (line 6) | namespace turbomind::comm { function DeviceComm (line 14) | DeviceComm CreateDeviceCommunicator(const std::string& backend, int n_... FILE: src/turbomind/comm/device_comm.h function namespace (line 13) | namespace turbomind::comm { FILE: src/turbomind/comm/env.h function is_set (line 17) | static auto value = [] { FILE: src/turbomind/comm/gloo/gloo_comm.cc type turbomind::comm (line 28) | namespace turbomind::comm { function createGlooDevice (line 33) | std::shared_ptr<::gloo::transport::Device> createGlooDevice() class Store (line 55) | class Store: public ::gloo::rendezvous::PrefixStore { method Store (line 57) | explicit Store(const std::string& host, int port, const std::string&... method New (line 65) | std::shared_ptr New(const std::string& prefix) class GlobalStoreFactory (line 79) | class GlobalStoreFactory { method GlobalStoreFactory (line 81) | static GlobalStoreFactory& Instance() method New (line 87) | std::string New() method Load (line 101) | std::shared_ptr Load(const std::string& info) method GlobalStoreFactory (line 119) | GlobalStoreFactory() {} type GlooCommImpl (line 127) | struct GlooCommImpl: public HostCommImpl { type SplitInfo (line 129) | struct SplitInfo { method GlooCommImpl (line 144) | GlooCommImpl(std::shared_ptr store, int n_ranks, int rank): method rank (line 155) | int rank() const override method n_ranks (line 160) | int n_ranks() const override method is_same_process (line 165) | bool is_same_process() const override method Split (line 170) | std::shared_ptr Split(int color, int key) override method Sync (line 188) | void Sync(bool blocking) override method Broadcast (line 194) | void Broadcast(void* data, int count, DataType dtype, int root, copy... method AllGather (line 224) | void AllGather(void* data, int count, DataType dtype, copy_fn copy, ... method Broadcast (line 254) | void Broadcast(void* data, int count, DataType dtype, int root) method AllGather (line 262) | void AllGather(void* data, int count, DataType dtype) method ReduceFunc (line 269) | static ReduceFunc getReduceFunc(DataType dtype, RedOp red_op) method AllReduce (line 310) | void AllReduce(void* data, int count, DataType dtype, RedOp red_op) ... class GlooGroupId (line 344) | class GlooGroupId: public HostGroupId { method Initialize (line 346) | void Initialize() override method Export (line 352) | void Export(std::ostream& os) override method Import (line 357) | void Import(std::istream& is) override method HostComm (line 364) | HostComm CreateCommunicator(int n_ranks, int rank, int node_rank = 0... function CreateGlooGroupId (line 376) | std::unique_ptr CreateGlooGroupId() FILE: src/turbomind/comm/gloo/hybrid_comm.cc type turbomind::comm (line 6) | namespace turbomind::comm { type HybridCommImpl (line 11) | struct HybridCommImpl: public HostCommImpl { method HybridCommImpl (line 13) | HybridCommImpl(int n_ranks, int rank, int node_rank, HostGroupId* gl... method HybridCommImpl (line 30) | HybridCommImpl(std::shared_ptr gloo_comm, std::shared_... method init_inter_comm (line 45) | void init_inter_comm() method Split (line 68) | std::shared_ptr Split(int color, int key) override method rank (line 80) | int rank() const override method n_ranks (line 85) | int n_ranks() const override method is_same_process (line 90) | bool is_same_process() const override method Sync (line 95) | void Sync(bool blocking) override method Broadcast (line 103) | void Broadcast(void* data, int count, DataType dtype, int root, copy... method Broadcast (line 115) | void Broadcast(void* data, int count, DataType dtype, int root, copy... method AllGather (line 127) | void AllGather(void* data, int count, DataType dtype, copy_fn copy, ... method AllGather (line 136) | void AllGather(void* data, int count, DataType dtype, copy_fn copy) method AllReduce (line 146) | void AllReduce(void* data, int count, DataType dtype, RedOp red_op) ... class HybridGroupId (line 175) | class HybridGroupId: public HostGroupId { method HybridGroupId (line 177) | HybridGroupId() method Initialize (line 183) | void Initialize() override method Export (line 189) | void Export(std::ostream& os) override method Import (line 195) | void Import(std::istream& is) override method HostComm (line 201) | HostComm CreateCommunicator(int n_ranks, int rank, int node_rank) function CreateHybridGroupId (line 215) | std::unique_ptr CreateHybridGroupId() FILE: src/turbomind/comm/gloo/tcp_store.cc type turbomind::comm (line 14) | namespace turbomind::comm { type CheckResponseType (line 22) | enum class CheckResponseType : uint8_t type QueryType (line 28) | enum class QueryType : uint8_t type Buffer (line 51) | struct Buffer { method append (line 55) | void append(T val) method append (line 61) | void append(const std::vector& vec) method append (line 67) | void append(const std::string& str) method count (line 78) | size_t count() const function validate (line 84) | void validate(std::shared_ptr<::gloo::transport::tcp::Socket>& socket) function ping (line 92) | void ping(std::shared_ptr<::gloo::transport::tcp::Socket>& socket) FILE: src/turbomind/comm/gloo/tcp_store.h function namespace (line 11) | namespace turbomind::comm { FILE: src/turbomind/comm/gloo/test_ipc_comm.cc type Store (line 22) | struct Store { method Store (line 32) | Store(const std::string& hostname, const std::string& port, int nnodes... method start (line 75) | void start() method stop (line 81) | void stop() type TestGlooComm (line 88) | struct TestGlooComm { method TestGlooComm (line 97) | TestGlooComm(const std::string& host, const std::string& port, int nno... method init (line 103) | void init() method test_broadcast (line 132) | void test_broadcast() method test_allgather (line 180) | void test_allgather() method test_allreduce (line 241) | void test_allreduce() method test_perf (line 272) | void test_perf() function main (line 367) | int main(int argc, char* argv[]) FILE: src/turbomind/comm/host_comm.cc type turbomind::comm (line 5) | namespace turbomind::comm { function CreateHostGroupId (line 15) | std::unique_ptr CreateHostGroupId(const std::string& back... FILE: src/turbomind/comm/host_comm.h type class (line 19) | enum class function noexcept (line 72) | const noexcept function namespace (line 86) | namespace detail { function class (line 190) | class HostGroupId { FILE: src/turbomind/comm/test_host_comm.cc function main (line 11) | int main(int argc, char* argv[]) FILE: src/turbomind/comm/thread_comm.cc type turbomind::comm (line 15) | namespace turbomind::comm { type ThreadCommImpl (line 17) | struct ThreadCommImpl: public HostCommImpl { class State (line 19) | class State { method State (line 21) | explicit State(int n): n_{n}, channels_(n * n), barrier_{n} {} method sync (line 28) | void sync() method ThreadCommImpl (line 44) | ThreadCommImpl(int n_ranks, std::shared_ptr state, int rank): method rank (line 49) | int rank() const override method n_ranks (line 54) | int n_ranks() const override method is_same_process (line 59) | bool is_same_process() const override method Split (line 69) | std::shared_ptr Split(int color, int key) override method Sync (line 104) | void Sync(bool blocking) override method Broadcast (line 135) | void Broadcast(void* data, int count, DataType dtype, int root, copy... method AllGather (line 168) | void AllGather(void* data, int count, DataType dtype, copy_fn copy, ... method reduce (line 201) | static void reduce(void* src, int n, void* dst, int offset) method reduce_fn (line 221) | static reduce_fn get_reduce(DataType dtype, RedOp red_op) method AllReduce (line 259) | void AllReduce(void* data, int count, DataType dtype, RedOp red_op) ... class ThreadGroupId (line 295) | class ThreadGroupId: public HostGroupId { method Initialize (line 297) | void Initialize() override method Export (line 302) | void Export(std::ostream& os) override method Import (line 310) | void Import(std::istream& is) override method HostComm (line 319) | HostComm CreateCommunicator(int n_ranks, int rank, int node_rank = 0... type Internal (line 338) | struct Internal { function CreateThreadGroupId (line 347) | std::unique_ptr CreateThreadGroupId() function save (line 353) | void save(Archive& ar, const std::shared_ptr& p) function load (line 359) | void load(Archive& ar, std::shared_ptr& p) FILE: src/turbomind/core/allocator.cc type turbomind::core (line 8) | namespace turbomind::core { function Stream (line 12) | Stream AllocatorImpl::stream() const noexcept class CudaMemPoolAllocator (line 17) | class CudaMemPoolAllocator: public AllocatorImpl { method CudaMemPoolAllocator (line 19) | CudaMemPoolAllocator(Stream stream, bool use_default_pool): method deallocate (line 53) | void deallocate(void* p, ssize_t) override method Device (line 58) | Device device() const noexcept override method Stream (line 63) | Stream stream() const noexcept override method trim (line 68) | void trim(size_t bytes_to_keep) class CudaAllocator (line 80) | class CudaAllocator: public AllocatorImpl { method deallocate (line 89) | void deallocate(void* p, ssize_t) override method Device (line 94) | Device device() const noexcept override class CudaHostAllocator (line 100) | class CudaHostAllocator: public AllocatorImpl { method deallocate (line 109) | void deallocate(void* p, ssize_t) override method Device (line 114) | Device device() const noexcept override class HostAllocator (line 120) | class HostAllocator: public AllocatorImpl { method deallocate (line 127) | void deallocate(void* p, ssize_t) override method Device (line 132) | Device device() const noexcept override FILE: src/turbomind/core/allocator.h function DeviceType (line 14) | enum class DeviceType : int function namespace (line 45) | namespace turbomind::core { function class (line 63) | class AllocatorImpl { function explicit (line 124) | explicit StackAllocatorImpl(shared_ptr underlying_impl): ... function deallocate (line 231) | void deallocate(void* p, ssize_t size) override FILE: src/turbomind/core/buffer.cc type turbomind::core (line 7) | namespace turbomind::core { function Buffer (line 9) | Buffer Buffer::view(DataType dtype) const function Buffer (line 23) | Buffer Buffer::slice(ssize_t base, ssize_t size) const function Copy (line 46) | void Copy(const Buffer& a, ssize_t n, Ref b_, const Stream& st... function Copy (line 57) | void Copy(const Buffer& a, ssize_t n, Ref b_) function Copy (line 62) | void Copy(const Buffer& a, Ref b_, const Stream& stream) function Copy (line 68) | void Copy(const Buffer& a, Ref b_) type detail (line 73) | namespace detail { function Clear (line 85) | void Clear(Ref b_, const Stream& stream) function Clear (line 93) | void Clear(Ref b_) FILE: src/turbomind/core/buffer.h function namespace (line 15) | namespace turbomind::core { function explicit (line 22) | explicit Buffer(DataType dtype): Buffer() function Buffer (line 130) | Buffer borrow() const function Buffer (line 169) | inline Buffer empty_like(const Buffer& buffer, Device device) FILE: src/turbomind/core/check.cc type turbomind::core (line 10) | namespace turbomind::core { function StripSrcPrefix (line 14) | std::string StripSrcPrefix(const char* file) function ReportNullError (line 83) | void ReportNullError(const char* file, int line, const char* expr) FILE: src/turbomind/core/check.h function class (line 25) | class CheckErrorStream { function class (line 59) | class CheckOpStringBuilder { function string (line 74) | string* MakeCheckOpString(const T1& v1, const T2& v2) FILE: src/turbomind/core/common.h function namespace (line 11) | namespace turbomind::core { FILE: src/turbomind/core/context.cc type turbomind::core (line 7) | namespace turbomind::core { type ContextStorage (line 11) | struct ContextStorage { method ContextStorage (line 26) | ContextStorage() method push (line 31) | void push(const Stream& stream) method push (line 41) | void push(const Allocator& alloc) method pop (line 62) | void pop() method ContextStorage (line 79) | static ContextStorage& instance() function Stream (line 103) | Stream& Context::stream() function Allocator (line 110) | Allocator& Context::host_alloc() function Allocator (line 117) | Allocator& Context::device_alloc() function Allocator (line 124) | Allocator& Context::pinned_alloc() function Allocator (line 131) | Allocator& Context::alloc(Device device) FILE: src/turbomind/core/context.h function namespace (line 7) | namespace turbomind::core { FILE: src/turbomind/core/copy.cc type turbomind::core (line 12) | namespace turbomind::core { type CUmemcpyFlags_enum (line 16) | enum CUmemcpyFlags_enum type CUmemcpySrcAccessOrder_enum (line 22) | enum CUmemcpySrcAccessOrder_enum type CUmemcpyAttributes_st (line 31) | struct CUmemcpyAttributes_st { FILE: src/turbomind/core/copy.h function namespace (line 8) | namespace turbomind::core { FILE: src/turbomind/core/core.h function namespace (line 14) | namespace turbomind { FILE: src/turbomind/core/cuda_data_type.h function namespace (line 14) | namespace turbomind { function DataType (line 40) | constexpr DataType from_cuda_dtype(cudaDataType type) { FILE: src/turbomind/core/data_type.h type __half (line 12) | struct __half type __nv_bfloat16 (line 13) | struct __nv_bfloat16 type __nv_fp8_e4m3 (line 14) | struct __nv_fp8_e4m3 type __nv_fp8_e5m2 (line 15) | struct __nv_fp8_e5m2 function namespace (line 17) | namespace turbomind { function numel (line 233) | ptrdiff_t numel(std::ptrdiff_t size) { return numel(data_type_v, size... FILE: src/turbomind/core/interval.h function class (line 11) | class Interval { function explicit (line 27) | explicit Interval(int first): first_{first}, last_{INT_MAX} {}; function Size (line 43) | Size size() const noexcept FILE: src/turbomind/core/layout.cc type turbomind::core (line 7) | namespace turbomind::core { function Layout (line 42) | Layout Layout::coalesce() const noexcept function Layout (line 68) | Layout Layout::view(vector shape) const FILE: src/turbomind/core/layout.h function namespace (line 9) | namespace turbomind::core { function Layout (line 82) | Layout permute(const vector& dims) const function Layout (line 93) | Layout transpose(int a, int b) const function offset (line 103) | ssize_t offset(const vector& idxs) const function offset (line 114) | ssize_t offset(ssize_t idx0) const function Layout (line 127) | Layout squeeze(int dim) const function std (line 169) | inline std::string to_string(const Layout& x) FILE: src/turbomind/core/module.cc type turbomind::core (line 6) | namespace turbomind::core { FILE: src/turbomind/core/module.h function namespace (line 7) | namespace turbomind::core { FILE: src/turbomind/core/ranges.h function namespace (line 3) | namespace turbomind::core { FILE: src/turbomind/core/serdes.h function namespace (line 9) | namespace turbomind::core { function T (line 45) | T* data() const function size_ (line 104) | size_t size_{} function size (line 106) | size_t size() FILE: src/turbomind/core/state.h function namespace (line 10) | namespace turbomind { function Swap (line 38) | void Swap() FILE: src/turbomind/core/stream.cc type turbomind::core (line 5) | namespace turbomind::core { function Stream (line 7) | Stream Stream::create(int priority) FILE: src/turbomind/core/stream.h function namespace (line 8) | namespace turbomind::core { FILE: src/turbomind/core/tensor.cc type turbomind::core (line 7) | namespace turbomind::core { function Tensor (line 15) | Tensor& TensorMap::at(const std::string& key) function Tensor (line 34) | Tensor* TensorMap::try_(const std::string& key) function Copy (line 43) | void Copy(const Tensor& src, Ref dst_, const Stream& stream) function Copy (line 55) | void Copy(const Tensor& src, Ref dst_) function Clear (line 60) | void Clear(Ref a_, const Stream& stream) function Clear (line 69) | void Clear(Ref a_) function Copy (line 76) | void Copy(const Tensor& src, Tensor& dst, Stream& stream) function Copy (line 167) | void Copy(const Tensor& src, Tensor&& dst, Stream& stream) FILE: src/turbomind/core/tensor.h function namespace (line 12) | namespace turbomind::core { function byte_size (line 69) | ssize_t byte_size() const noexcept function Tensor (line 113) | Tensor view(std::vector shape) const function Tensor (line 160) | Tensor slice(std::vector base, std::vector shape) const function Tensor (line 177) | Tensor borrow() const FILE: src/turbomind/engine/batch.h function namespace (line 9) | namespace turbomind { FILE: src/turbomind/engine/engine.cc type turbomind (line 28) | namespace turbomind { type RequestData (line 34) | struct RequestData { function serdes (line 43) | void serdes(Archive& ar, RequestData& r) type Engine::Impl (line 51) | struct Engine::Impl { method Run (line 90) | void Run(BatchOp op, int phase, Ref env) method Start (line 95) | void Start() type State (line 138) | struct State { method size (line 147) | int size() const noexcept type Data (line 155) | struct Data { FILE: src/turbomind/engine/engine.h function namespace (line 12) | namespace turbomind { FILE: src/turbomind/engine/gateway.cc type turbomind (line 9) | namespace turbomind { FILE: src/turbomind/engine/gateway.h function bind (line 31) | void bind(const std::vector& seq_ids, int rank) function unbind (line 41) | void unbind(const std::vector& seq_ids, int rank) FILE: src/turbomind/engine/model_executor.cc type turbomind (line 16) | namespace turbomind { type ModelExecutor::Impl (line 21) | struct ModelExecutor::Impl { method InternalThreadEntry (line 33) | void InternalThreadEntry() method Run (line 56) | void Run(BatchData& d) method Impl (line 78) | Impl(LanguageModel& model, method Start (line 94) | void Start() FILE: src/turbomind/engine/model_executor.h function namespace (line 13) | namespace turbomind { FILE: src/turbomind/engine/model_request.cc type turbomind (line 17) | namespace turbomind { FILE: src/turbomind/engine/model_request.h function namespace (line 10) | namespace xgrammar { function namespace (line 14) | namespace turbomind { FILE: src/turbomind/engine/queue.h function close (line 36) | void close() FILE: src/turbomind/engine/request.cc type turbomind (line 7) | namespace turbomind { function UpdateState (line 46) | void UpdateState(Request& r, int status, int seq_len) FILE: src/turbomind/engine/request.h function namespace (line 16) | namespace xgrammar { function namespace (line 21) | namespace turbomind { type Request (line 87) | struct Request { type RequestCache (line 139) | struct RequestCache { FILE: src/turbomind/engine/request_queue.cc type turbomind (line 8) | namespace turbomind { FILE: src/turbomind/engine/request_queue.h function push (line 18) | void push(std::shared_ptr r) FILE: src/turbomind/engine/signal_buffer.h function aborted_ (line 58) | bool aborted_{false}; FILE: src/turbomind/generation/base_param.h function namespace (line 5) | namespace turbomind { FILE: src/turbomind/generation/generation.cc type turbomind (line 25) | namespace turbomind { type GenerationData (line 31) | struct GenerationData { type Generation::Impl (line 43) | struct Generation::Impl { method Impl (line 77) | Impl(DataType dtype, method Setup (line 124) | void Setup(int phase, TensorMap& env) method Prepare (line 203) | void Prepare(int phase, TensorMap& env) method Unprep (line 216) | void Unprep(int phase, TensorMap& env) method Fetch (line 227) | void Fetch(int phase, TensorMap& env) method Update (line 241) | void Update(int phase, TensorMap& env) method Forward (line 246) | void Forward(int phase, TensorMap& env) FILE: src/turbomind/generation/generation.h function namespace (line 10) | namespace turbomind { FILE: src/turbomind/generation/guided_decoding.cc type turbomind (line 10) | namespace turbomind { type GuidedDecoding::Data (line 12) | struct GuidedDecoding::Data { FILE: src/turbomind/generation/guided_decoding.h function namespace (line 10) | namespace turbomind { FILE: src/turbomind/generation/logits_processor.cc type turbomind (line 29) | namespace turbomind { type LogitsProcessor::Data (line 31) | struct LogitsProcessor::Data { method Data (line 33) | Data(int max_batch_size, DeviceType device) FILE: src/turbomind/generation/logits_processor.h function namespace (line 25) | namespace turbomind { FILE: src/turbomind/generation/sampling.cc type turbomind (line 29) | namespace turbomind { type SamplingData (line 31) | struct SamplingData { method SamplingData (line 33) | explicit SamplingData(int max_batch_size, DeviceType device) FILE: src/turbomind/generation/sampling.h function namespace (line 7) | namespace turbomind { FILE: src/turbomind/generation/stop_criteria.cc type turbomind (line 11) | namespace turbomind { type StopCriteriaData (line 13) | struct StopCriteriaData { method StopCriteriaData (line 14) | explicit StopCriteriaData(int batch_size) FILE: src/turbomind/generation/stop_criteria.h function namespace (line 23) | namespace turbomind { FILE: src/turbomind/generation/utils.h function namespace (line 7) | namespace turbomind { FILE: src/turbomind/kernels/activation.h function namespace (line 5) | namespace turbomind { FILE: src/turbomind/kernels/activation_kernels.h function namespace (line 23) | namespace turbomind { FILE: src/turbomind/kernels/apply_token_bitmask_inplace_cuda.h function namespace (line 3) | namespace turbomind { FILE: src/turbomind/kernels/attention/arch.h function namespace (line 5) | namespace turbomind::arch { FILE: src/turbomind/kernels/attention/attention.h function namespace (line 7) | namespace turbomind { FILE: src/turbomind/kernels/attention/attention_params.h type LinearIteratorParams (line 14) | struct LinearIteratorParams { type BlockIteratorParams (line 20) | struct BlockIteratorParams { function cp_rank (line 85) | int cp_rank{0} function offset_q (line 87) | int offset_q{0} function CacheIterFactory (line 102) | CacheIterFactory apply(const Param& param) FILE: src/turbomind/kernels/attention/attention_template.h function namespace (line 10) | namespace turbomind { FILE: src/turbomind/kernels/attention/attention_universal.h function namespace (line 19) | namespace attention { function hi_end_ (line 65) | int hi_end_{1} function __device__ (line 67) | __device__ bool check_h(int hi) function Vec (line 194) | Vec vec_Q[ITER_S][ITER_C]{} function __device__ (line 340) | __device__ AttentionUniversal(int q_group_size, int q_head_per_cta, int ... function attention_kernel (line 606) | void attention_kernel(typename Kernel::ParamType params, FILE: src/turbomind/kernels/attention/block.h function namespace (line 11) | namespace turbomind { function TM_HOST_DEVICE (line 66) | TM_HOST_DEVICE auto k_data(char* block, int ti) const FILE: src/turbomind/kernels/attention/block_iterator.h function namespace (line 8) | namespace turbomind { FILE: src/turbomind/kernels/attention/cp_utils.h function namespace (line 6) | namespace turbomind { FILE: src/turbomind/kernels/attention/cta_map.h function namespace (line 5) | namespace turbomind::attention { type ReduceCtaMap (line 130) | struct ReduceCtaMap { function query_idx (line 135) | int query_idx() function __device__ (line 139) | static __device__ int head_idx() function __device__ (line 143) | static __device__ int split_idx() FILE: src/turbomind/kernels/attention/decoding.h function namespace (line 7) | namespace turbomind { FILE: src/turbomind/kernels/attention/decoding_template.h function namespace (line 10) | namespace turbomind { FILE: src/turbomind/kernels/attention/desc.h function namespace (line 10) | namespace turbomind::attention { FILE: src/turbomind/kernels/attention/impl.h function namespace (line 5) | namespace turbomind { FILE: src/turbomind/kernels/attention/impl_16816.h function namespace (line 13) | namespace turbomind::attention { type StateQK (line 150) | struct StateQK { function __device__ (line 173) | __device__ void Load(int k, int pipe_iter) function ComputeQK (line 203) | void function Transform (line 231) | struct StatePV { function ComputePV (line 277) | void FILE: src/turbomind/kernels/attention/impl_1688.h function namespace (line 12) | namespace turbomind::attention { function Load (line 130) | struct StateQK { function __device__ (line 159) | __device__ void Transform(int k) {} type StatePV (line 185) | struct StatePV { function Load (line 196) | void Load(int k, int pipe_iter) function __device__ (line 207) | __device__ void Transform(int k) {} FILE: src/turbomind/kernels/attention/impl_81616.h function __device__ (line 149) | __device__ static void Sync() function __device__ (line 173) | static __device__ int2 get_warp_ids() function Load (line 262) | struct StateQK { function __device__ (line 402) | __device__ void Load(int m, int pipe_iter) function tmp_L (line 571) | float tmp_L{} FILE: src/turbomind/kernels/attention/impl_884.h function namespace (line 14) | namespace turbomind::attention { function Transform (line 95) | struct SharedStorage { function Load (line 230) | struct StatePV { function __device__ (line 260) | __device__ void Transform(int k) {} function tmp_L (line 344) | float tmp_L{} FILE: src/turbomind/kernels/attention/impl_m16n8.h function namespace (line 7) | namespace turbomind::attention { FILE: src/turbomind/kernels/attention/impl_simt.h function namespace (line 17) | namespace turbomind::attention { function __device__ (line 289) | __device__ void Load(int n, int pipe_iter) function __device__ (line 314) | __device__ void Transform(int n) function ComputeQK (line 325) | void type StatePV (line 374) | struct StatePV { function Load (line 388) | void Load(int k, int pipe_iter) function __device__ (line 413) | __device__ void Transform(int k) function ComputePV (line 424) | void function tmp_L (line 492) | float tmp_L{} FILE: src/turbomind/kernels/attention/iterator.h function namespace (line 11) | namespace turbomind { function __device__ (line 78) | __device__ explicit BaseSmemIterator(T* smem): smem_{smem} type Fragment (line 86) | struct Fragment { function Prefetch (line 93) | void Prefetch(Args... args) function Load (line 101) | void Load(const CacheIter& cache_iter, Fragment& frag, int max_s) function __device__ (line 107) | __device__ void Save(const Fragment& frag) FILE: src/turbomind/kernels/attention/iterator_sm70.h function namespace (line 8) | namespace turbomind { FILE: src/turbomind/kernels/attention/iterator_sm80.h function namespace (line 10) | namespace turbomind { FILE: src/turbomind/kernels/attention/kernel.h function namespace (line 7) | namespace turbomind::attention { FILE: src/turbomind/kernels/attention/kernel_impl.h function namespace (line 14) | namespace turbomind::attention { FILE: src/turbomind/kernels/attention/kv_cache_utils_v2.h function namespace (line 8) | namespace turbomind { FILE: src/turbomind/kernels/attention/linear_iterator.h function namespace (line 5) | namespace turbomind { FILE: src/turbomind/kernels/attention/mainloop.h function namespace (line 5) | namespace turbomind::attention { FILE: src/turbomind/kernels/attention/mainloop_sm70.h function namespace (line 9) | namespace turbomind::attention { FILE: src/turbomind/kernels/attention/mainloop_sm80.h function namespace (line 11) | namespace turbomind::attention { FILE: src/turbomind/kernels/attention/quantization.h function T (line 21) | T Infinity() function T (line 41) | constexpr T Max(T a, T b) function T (line 65) | constexpr T Min(T a, T b) function y (line 216) | uint32_t y{} function y (line 238) | uint32_t y{} function y (line 264) | uint32_t y{} function y (line 285) | uint32_t y{} function __device__ (line 420) | __device__ auto operator()(const Array& v) const -> Array function __device__ (line 497) | __device__ ConvertKvCache(half scale, half zero) function __device__ (line 668) | __device__ ConvertKvCache(T scale, T zero): scale_{scale}, zero_{zero} {} FILE: src/turbomind/kernels/attention/reduce.h function namespace (line 12) | namespace turbomind::attention { FILE: src/turbomind/kernels/attention/reference.h function namespace (line 12) | namespace turbomind { FILE: src/turbomind/kernels/attention/registrar.h function namespace (line 11) | namespace turbomind::attention { function std (line 33) | inline std::vector& gKernelFactories() type Registrar (line 39) | struct Registrar { FILE: src/turbomind/kernels/attention/registry.h function namespace (line 10) | namespace turbomind::attention { FILE: src/turbomind/kernels/attention/rotary_embedding.h function namespace (line 8) | namespace turbomind { FILE: src/turbomind/kernels/attention/test_utils.h function namespace (line 10) | namespace turbomind { FILE: src/turbomind/kernels/attention/utils.cc type turbomind (line 9) | namespace turbomind { function GetSplitCount (line 11) | int GetSplitCount( FILE: src/turbomind/kernels/attention/utils.h function namespace (line 5) | namespace turbomind { FILE: src/turbomind/kernels/ban_bad_words.h function namespace (line 23) | namespace turbomind { FILE: src/turbomind/kernels/core/array.h function namespace (line 11) | namespace turbomind { FILE: src/turbomind/kernels/core/array_ops.h function namespace (line 10) | namespace turbomind { function copy (line 160) | void copy(const Array& src, Array& dst) function __device__ (line 166) | inline __device__ void copy(const Array (&src)[M], Array (&d... function Store (line 175) | void Store(T* dst, const Array& src) function __device__ (line 206) | inline __device__ void Stcs(T* __restrict__ dst, const Array& src) function Stcg (line 231) | void Stcg(T* __restrict__ dst, const Array& src) function else (line 266) | else if constexpr (sizeof(Array) == sizeof(uint)) { function else (line 269) | else if constexpr (sizeof(Array) == sizeof(uint16_t)) { function else (line 272) | else if constexpr (sizeof(Array) == sizeof(uint8_t)) { function else (line 291) | else if constexpr (sizeof(Array) == sizeof(uint)) { function else (line 294) | else if constexpr (sizeof(Array) == sizeof(uint16_t)) { function else (line 297) | else if constexpr (sizeof(Array) == sizeof(uint8_t)) { function else (line 316) | else if constexpr (sizeof(Array) == sizeof(uint)) { function else (line 319) | else if constexpr (sizeof(Array) == sizeof(uint16_t)) { function else (line 322) | else if constexpr (sizeof(Array) == sizeof(uint8_t)) { function else (line 339) | else if constexpr (sizeof(Array) == sizeof(uint)) { function else (line 342) | else if constexpr (sizeof(Array) == sizeof(uint16_t)) { function else (line 345) | else if constexpr (sizeof(Array) == sizeof(uint8_t)) { function else (line 381) | else if constexpr (sizeof(Array) == sizeof(uint)) { function __device__ (line 391) | inline __device__ void StShared(uint32_t uintptr, Array& src) function __device__ (line 454) | __inline__ __device__ uint transpose_m8n8_b16_warp_shuffle(uint value) function __device__ (line 474) | __inline__ __device__ uint transpose_m8n8_b16_movmatrix(uint a) function __device__ (line 487) | __inline__ __device__ uint32_t transpose_m8n8_b16(uint32_t a) FILE: src/turbomind/kernels/core/data_type.h function namespace (line 14) | namespace turbomind { FILE: src/turbomind/kernels/core/floating_point.h function namespace (line 7) | namespace turbomind { FILE: src/turbomind/kernels/core/layout.h function namespace (line 6) | namespace turbomind { type Offset (line 96) | struct Offset { function __device__ (line 115) | __device__ SmemAccessor(Pointer ptr): ptr_{ptr} function __device__ (line 117) | __device__ T& operator()(int s, int c) function __device__ (line 122) | __device__ T& operator()(int s, int c, int offset) function __device__ (line 127) | __device__ T& operator()(int idx) function __host__ (line 139) | __host__ __device__ Stride(T0 v0, T1 v1): v0{v0}, v1{v1} {} FILE: src/turbomind/kernels/core/math.h function namespace (line 10) | namespace turbomind { FILE: src/turbomind/kernels/core/meta.h function value_type (line 22) | constexpr value_type operator()() const noexcept FILE: src/turbomind/kernels/core/mma.h function namespace (line 9) | namespace turbomind { FILE: src/turbomind/kernels/core/pipe_iter.h function namespace (line 5) | namespace turbomind { FILE: src/turbomind/kernels/core/smem.h function namespace (line 9) | namespace turbomind { FILE: src/turbomind/kernels/core/sub_byte_ptr.h function __host__ (line 16) | constexpr __host__ __device__ SubBytePtr(char* ptr): ptr_(ptr) {} FILE: src/turbomind/kernels/core/sync.h function namespace (line 5) | namespace turbomind { FILE: src/turbomind/kernels/core/thread_map.h function namespace (line 10) | namespace turbomind { function Print (line 117) | Print(TMap) FILE: src/turbomind/kernels/decoding_kernels.h function namespace (line 23) | namespace turbomind { FILE: src/turbomind/kernels/gemm/arch.h function namespace (line 5) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/arch/config_simt.h function namespace (line 16) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/arch/config_sm70_s884.h function namespace (line 20) | namespace turbomind::gemm::sm70_s884 { FILE: src/turbomind/kernels/gemm/arch/config_sm75_s16816.h function namespace (line 18) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/arch/config_sm80_s16816.h function namespace (line 20) | namespace turbomind::gemm::sm80_s16816 { FILE: src/turbomind/kernels/gemm/arch/mma_simt.h function namespace (line 10) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/arch/mma_sm70.h function namespace (line 10) | namespace turbomind::gemm { function __device__ (line 38) | __device__ static constexpr OffsetC static_offset_C() FILE: src/turbomind/kernels/gemm/arch/mma_sm80.h function namespace (line 10) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/arch/operand_simt.h function namespace (line 14) | namespace turbomind::gemm { type GetSmemLayout (line 93) | struct GetSmemLayout { // m-major type GetSmemLayout_Pack (line 104) | struct GetSmemLayout_Pack { type GetSmemLayout (line 137) | struct GetSmemLayout { // m-major FILE: src/turbomind/kernels/gemm/arch/operand_sm70_s884.h function namespace (line 13) | namespace turbomind::gemm { type GetSmemLayout (line 131) | struct GetSmemLayout { // m-major FILE: src/turbomind/kernels/gemm/arch/operand_sm80_s16816.h function namespace (line 15) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/arch/smem_copy_simt.h function namespace (line 11) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/arch/smem_copy_sm70.h function __device__ (line 21) | __device__ static int2 unique(int thread_idx, int pack_idx) function __device__ (line 33) | __device__ static int2 get_offset(int thread_idx) function __device__ (line 56) | __device__ static int2 unique(int thread_idx, int pack_idx) function __device__ (line 68) | __device__ static int2 get_offset(int thread_idx) function __device__ (line 90) | __device__ static int2 unique(int thread_idx, int pack_idx) function __device__ (line 101) | __device__ static int2 get_offset(int thread_idx) FILE: src/turbomind/kernels/gemm/arch/smem_copy_sm80.h function apply (line 15) | static void apply(S src_ptr, D dst_ptr) function apply (line 30) | static void apply(S src_ptr, D dst_ptr) function apply (line 45) | static void apply(S src_ptr, D dst_ptr) function __device__ (line 66) | __device__ static int2 get_offset(int thread_idx) // -> (m, k) function __device__ (line 97) | __device__ static int2 get_offset(int thread_idx) function __device__ (line 131) | __device__ static int2 get_offset(int thread_idx) function copy (line 152) | static void copy(S&& src_ptr, D&& dst_ptr, bool) function __device__ (line 169) | __device__ static int2 unique(int thread_idx, int pack_idx) function __device__ (line 184) | __device__ static int2 get_offset(int thread_idx) function copy (line 192) | static void copy(S&& src_ptr, D&& dst_ptr, bool mask) function __device__ (line 200) | __device__ static int2 unique(int thread_idx, int pack_idx) FILE: src/turbomind/kernels/gemm/cast.h function namespace (line 8) | namespace turbomind { FILE: src/turbomind/kernels/gemm/context.h function namespace (line 8) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/convert.h function namespace (line 8) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/cp_async.h function namespace (line 13) | namespace turbomind { function __device__ (line 178) | __device__ static void apply(int smem_ptr, const void* __restrict__ src,... function __device__ (line 184) | __device__ static void apply(int smem_ptr, const void* __restrict__ src,... function __device__ (line 196) | __device__ static void apply(int smem_ptr, const void* __restrict__ src,... function __device__ (line 202) | __device__ static void apply(int smem_ptr, const void* __restrict__ src,... FILE: src/turbomind/kernels/gemm/cta_map.h function namespace (line 9) | namespace turbomind::gemm { function TM_HOST_DEVICE (line 38) | TM_HOST_DEVICE static int get_log_tile(int2 tiled_mn, int N) function TM_HOST_DEVICE (line 43) | TM_HOST_DEVICE static dim3 get_grid_shape(int3 tiled_shape, int log_tile) function TM_DEVICE (line 51) | TM_DEVICE static int3 get_tile_offset(int log_tile) function TM_HOST_DEVICE (line 90) | TM_HOST_DEVICE static int get_log_tile(int2 tiled_mn, int tile_size) function TM_HOST_DEVICE (line 111) | TM_HOST_DEVICE std::true_type init(int block_idx_x, int block_idx_y, int... function TM_HOST_DEVICE (line 194) | TM_HOST_DEVICE static int get_log_tile(int2 tiled_mn, int tile_size) function TM_HOST_DEVICE (line 199) | TM_HOST_DEVICE dim3 get_grid_shape() FILE: src/turbomind/kernels/gemm/desc.h function namespace (line 12) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/dispatch_cache.h function namespace (line 9) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/epilogue.h function namespace (line 16) | namespace turbomind::gemm { type MatrixCombination_v3 (line 108) | struct MatrixCombination_v3 { function apply (line 161) | static void apply(Array& x) type Silu (line 171) | struct Silu { type EpilogueParam (line 178) | struct EpilogueParam { FILE: src/turbomind/kernels/gemm/format.h function namespace (line 7) | namespace turbomind::gemm { type Converter (line 23) | struct Converter FILE: src/turbomind/kernels/gemm/gemm.h function namespace (line 12) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/gemm_universal.h type GemmParam (line 22) | struct GemmParam { function __device__ (line 30) | __device__ MatrixData resolve_op(const MatrixParam& param, int gemm_id) type SharedStorage (line 77) | struct SharedStorage { function __device__ (line 95) | __device__ void operator()(const Param& param, const EpilogueParam& epi_... function typename (line 134) | typename OperandB::GmemIter gmem_B{mat_B, {offset_n, offset_k}, {extent_... function typename (line 140) | typename OperandV::GmemIter gmem_V{mat_V, offset_V, extent_V}; function gemm_kernel (line 172) | void gemm_kernel(Param param, EpilogueParam epi_param, Scheduler sched) FILE: src/turbomind/kernels/gemm/gemm_universal_sm90.h function namespace (line 28) | namespace turbomind::gemm { function wgmma_impl (line 84) | void function wgmma (line 91) | void wgmma(uint64_t desc_a, uint64_t desc_b, float (&frag_C)[N], bool cl... function typename (line 121) | static constexpr typename cute::MMA_Traits::Shape_MNK MMA_Shape{} function __device__ (line 193) | __device__ void operator()(const CUtensorMap& tm_a, function scale_U (line 395) | float scale_U{} FILE: src/turbomind/kernels/gemm/gemm_universal_sm90_v2.h function namespace (line 35) | namespace turbomind::gemm { function wgmma_impl (line 96) | void function wgmma (line 103) | void wgmma(uint64_t desc_a, uint64_t desc_b, float (&frag_C)[N], bool cl... function namespace (line 146) | namespace arch { function __device__ (line 294) | __device__ void operator()(const CUtensorMap& tm_a, function pred_V (line 500) | uint32_t pred_V{} function iter_V (line 501) | int iter_V{} function scale_accum (line 541) | auto scale_accum = [&](int m) { // cta_n = mma_iter_n * wg_n * mma_atom_n function gmma (line 567) | auto gmma = [&](int m) { FILE: src/turbomind/kernels/gemm/gemm_universal_sm90_v3.h function namespace (line 39) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/gemm_universal_sm90_v4.h function namespace (line 37) | namespace turbomind::gemm { function wgmma_impl (line 98) | void function wgmma (line 105) | void wgmma(uint64_t desc_a, uint64_t desc_b, float (&frag_C)[N], bool cl... function namespace (line 148) | namespace arch { function __device__ (line 305) | __device__ void operator()(const CUtensorMap& tm_a, function epi_barrier (line 445) | auto epi_barrier = [&](int phase) { // 0, 1 function pred_V (line 518) | uint32_t pred_V{} function iter_V (line 519) | int iter_V{} function scale_accum (line 568) | auto scale_accum = [&](int m) { // cta_n = mma_iter_n * wg_n * mma_atom_n function gmma (line 594) | auto gmma = [&](int m) { type EmptyBarrier (line 744) | struct EmptyBarrier { FILE: src/turbomind/kernels/gemm/gemm_universal_sm90_v5.h function namespace (line 41) | namespace turbomind::gemm { function __device__ (line 139) | __device__ void operator()(const CUtensorMap& tm_a, function __device__ (line 665) | __device__ auto Fetch_V(const MatrixParam& param_V, FILE: src/turbomind/kernels/gemm/gpu_metric.h function namespace (line 7) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/iterator.h function namespace (line 13) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/iterator_sm70.h function namespace (line 17) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/iterator_sm80.h function namespace (line 18) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/iterator_sm90.h function namespace (line 6) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/kernel.h function namespace (line 16) | namespace turbomind::gemm { type ClusteringParam (line 113) | struct ClusteringParam { FILE: src/turbomind/kernels/gemm/kernel_impl.h function transpose (line 138) | auto transpose = [](MatrixLayout x) { function GemmParam (line 198) | GemmParam param{ function GetMaxSplits (line 233) | int GetMaxSplits(const int4& shape, int swizzle, size_t bsize, size_t ps... FILE: src/turbomind/kernels/gemm/kernel_impl_sm90.h function __launch_bounds__ (line 38) | void __launch_bounds__(Kernel::CTA_SIZE, 1) gemm_kernel_name(const __gri... function Launch (line 148) | int Launch(const Operation& operation, FILE: src/turbomind/kernels/gemm/mainloop_sm70.h function namespace (line 17) | namespace turbomind::gemm { function Binding (line 238) | Binding gmem_iters{gmem_A, gmem_B, gmem_U, gmem_V}; FILE: src/turbomind/kernels/gemm/mainloop_sm80_v2.h function __device__ (line 26) | __device__ void Advance() function operator (line 31) | operator bool() type GroupIter (line 38) | struct GroupIter function __device__ (line 39) | __device__ void Advance() {} function operator (line 40) | operator bool() function __device__ (line 52) | __device__ SmemIter(Pointer base): base_{base}, pointer{base}, pipe_iter... function Binding (line 258) | Binding gmem_iters{gmem_A, gmem_B, gmem_U, gmem_V}; function SmemCopyA (line 309) | SmemCopyA smem_copy_A{{offset_m, offset_k}} function SmemCopyU (line 310) | SmemCopyU smem_copy_U{{offset_m, offset_k}} function SmemCopyB (line 311) | SmemCopyB smem_copy_B{{offset_n, offset_k}} function SmemCopyV (line 312) | SmemCopyV smem_copy_V{{offset_n, offset_k}} function preload (line 314) | auto preload = [&](int k) { FILE: src/turbomind/kernels/gemm/matrix_ptr.h type __align__ (line 9) | struct __align__ type MatrixParam (line 15) | struct MatrixParam { type MatrixData (line 22) | struct MatrixData { function MatrixParam (line 27) | inline MatrixParam to_param(void* ptr, MatrixLayout layout) function StridedPtr (line 40) | StridedPtr ptr{param.ptr, param.stride}; function else (line 46) | else if constexpr (mode == Striding::kIndexed) { function __device__ (line 62) | __device__ MatrixData resolve(const MatrixParam& param, int g) FILE: src/turbomind/kernels/gemm/moe_utils_v2.h function namespace (line 10) | namespace turbomind { FILE: src/turbomind/kernels/gemm/operand.h type GetSmemLayout (line 20) | struct GetSmemLayout { type GetGmemIter (line 29) | struct GetGmemIter { FILE: src/turbomind/kernels/gemm/predicate.h function namespace (line 8) | namespace turbomind::gemm { function __device__ (line 47) | __device__ void set(int, int) {} function __device__ (line 49) | __device__ void clear() FILE: src/turbomind/kernels/gemm/registry.h function namespace (line 8) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/scaled_gmma_fp8_sm90.h function namespace (line 13) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/simt.h function namespace (line 5) | namespace turbomind::gemm::simt { FILE: src/turbomind/kernels/gemm/sm90_utils.h function namespace (line 15) | namespace turbomind::gemm { function wgmma_impl (line 76) | void function wgmma (line 83) | void wgmma(uint64_t desc_a, uint64_t desc_b, float (&frag_C)[N], bool cl... function namespace (line 126) | namespace arch { FILE: src/turbomind/kernels/gemm/smem_copy.h function namespace (line 14) | namespace turbomind::gemm { function __device__ (line 54) | __device__ SmemAccessorV2(get_pointer_type ptr): base_{ptr} function __device__ (line 55) | __device__ T& operator()(int m, int k) function __device__ (line 70) | __device__ static int2 get_offset(int thread_idx) // -> (m, k) type SmemCopyAtom_Pack_v3 (line 90) | struct SmemCopyAtom_Pack_v3 { type SmemCopy (line 116) | struct SmemCopy { function Accessor (line 172) | Accessor smem{src_ptr}; FILE: src/turbomind/kernels/gemm/test/quantization.h function namespace (line 9) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/test/quantization_impl.h function namespace (line 15) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/test/reference.h function namespace (line 9) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/test/test_gemm_v2.cc type TestParameter (line 10) | struct TestParameter: Testbed_v3::Parameter { method TestParameter (line 11) | TestParameter(DataType dtype, DataType wtype, DataType itype, int grou... function main (line 21) | int main() FILE: src/turbomind/kernels/gemm/test/test_utils.h function namespace (line 14) | namespace turbomind { FILE: src/turbomind/kernels/gemm/test/testbed_v3.h type Parameter (line 29) | struct Parameter { function invoke (line 57) | auto invoke = [&](auto t) { type Testbed_v3 (line 80) | struct Testbed_v3 function Route (line 156) | void Route() function GenerateWeight (line 226) | void GenerateWeight() function GenerateWeight (line 240) | void GenerateWeight(DenseWeight& original, DenseWeight& quant, DenseWeig... function GetReference (line 290) | void GetReference() function MatrixLayout (line 310) | const MatrixLayout desc_D{d.dtype(), kRowMajor, (int)d.shape(0), (int)d.... function Run (line 346) | void Run() FILE: src/turbomind/kernels/gemm/thread_group_map.h function namespace (line 12) | namespace turbomind::gemm { function __device__ (line 87) | __device__ static int3 get_offset(int group_id) function Print_ (line 101) | Print_(TMap) FILE: src/turbomind/kernels/gemm/thread_map.h function namespace (line 13) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/tiled_mma.h function namespace (line 17) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/tma.h function namespace (line 9) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/transform.h function namespace (line 13) | namespace turbomind::gemm { type Transform_HMMA_SIMT_B (line 108) | struct Transform_HMMA_SIMT_B { FILE: src/turbomind/kernels/gemm/tuner/cache_utils.h function namespace (line 7) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/tuner/measurer.h function namespace (line 10) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/tuner/params.cc type turbomind::gemm (line 9) | namespace turbomind::gemm { function ParseTuningParams (line 11) | void ParseTuningParams(TuningParams& params, const std::string& str) function ParseTuningSequence (line 37) | std::vector ParseTuningSequence(const std::string& str) function GenerateTuningSequence (line 66) | std::vector GenerateTuningSequence(const std::vector> GetDefaultTuningGenerators() FILE: src/turbomind/kernels/gemm/tuner/sampler.h function namespace (line 10) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/tuner/stats.h function namespace (line 5) | namespace turbomind::gemm { function add_sample (line 31) | void add_sample(float x) noexcept FILE: src/turbomind/kernels/gemm/tuner/stopping_criterion.cc type turbomind::gemm (line 6) | namespace turbomind::gemm { type stopping_criterions (line 8) | namespace stopping_criterions { class Optimistic (line 10) | class Optimistic: public StoppingCriterion { method Optimistic (line 12) | Optimistic(int min_iter, int max_iter, float max_ms) method should_stop (line 18) | bool should_stop(const Stats& stats) override function CreateStoppingCriterion (line 31) | std::unique_ptr CreateStoppingCriterion(int min_ite... FILE: src/turbomind/kernels/gemm/tuner/stopping_criterion.h function namespace (line 6) | namespace turbomind::gemm { FILE: src/turbomind/kernels/gemm/types.h function Order (line 15) | enum class Order : int type MMA_Tag (line 42) | typedef enum MMA_Tag type Op_Tag (line 50) | typedef enum Op_Tag function MMA_Tag (line 60) | constexpr MMA_Tag get_mma_tag(Pack pack) function Op_Tag (line 65) | constexpr Op_Tag get_operand_tag(Pack pack) function get_pack_num (line 70) | constexpr int get_pack_num(Pack pack) type class (line 75) | enum class type class (line 99) | enum class type class (line 124) | enum class type QuantDesc (line 131) | struct QuantDesc { function std (line 141) | inline std::string to_string(QuantDesc desc) function DispatchPolicy (line 151) | enum class DispatchPolicy : int FILE: src/turbomind/kernels/gemm/utils.h function namespace (line 8) | namespace turbomind::gemm { function int2 (line 115) | static constexpr int2 apply(int2 mk) FILE: src/turbomind/kernels/gpt_kernels.h function namespace (line 26) | namespace turbomind { FILE: src/turbomind/kernels/logprob_kernels.h function namespace (line 19) | namespace turbomind { FILE: src/turbomind/kernels/norm/rms_norm.h function namespace (line 7) | namespace turbomind { FILE: src/turbomind/kernels/penalty_types.h function namespace (line 24) | namespace turbomind { FILE: src/turbomind/kernels/quantization.h function namespace (line 3) | namespace turbomind { FILE: src/turbomind/kernels/sampling_kernels.h function namespace (line 24) | namespace turbomind { FILE: src/turbomind/kernels/sampling_penalty_kernels.h function namespace (line 24) | namespace turbomind { FILE: src/turbomind/kernels/sampling_topk_kernels.h function namespace (line 21) | namespace turbomind { FILE: src/turbomind/kernels/sampling_topp_kernels.h function namespace (line 20) | namespace turbomind { FILE: src/turbomind/kernels/stop_criteria_kernels.h function namespace (line 22) | namespace turbomind { FILE: src/turbomind/kernels/test_quantization.cc function main (line 13) | int main() FILE: src/turbomind/kernels/unfused_attention_kernels.h function namespace (line 18) | namespace turbomind { FILE: src/turbomind/macro.h type uint (line 9) | typedef unsigned int uint; FILE: src/turbomind/models/input_processor.cc type turbomind (line 11) | namespace turbomind { type InputProcessor::Impl (line 15) | struct InputProcessor::Impl { method Impl (line 17) | Impl(const EngineParam& engine, const ModelParam& model, int phases): method Add (line 38) | int Add(RequestCache& c) method Add (line 106) | void Add(int phase, TensorMap& env) method Setup (line 117) | void Setup(int phase, TensorMap& env) method Prepare (line 179) | void Prepare(int phase, TensorMap& env) method PatchEmbedding (line 204) | void PatchEmbedding(int phase, Tensor& embeds, BatchCopy& copy) type Data (line 217) | struct Data { FILE: src/turbomind/models/input_processor.h function namespace (line 6) | namespace turbomind { FILE: src/turbomind/models/language_model.cc type turbomind (line 29) | namespace turbomind { type LanguageModel::Impl (line 35) | struct LanguageModel::Impl { type Data (line 67) | struct Data { method Run (line 84) | void Run(BatchOp op, int phase, TensorMap& env) function Tensor (line 201) | Tensor LanguageModel::Impl::LookupEmbedding(const Buffer_& input_... function Tensor (line 270) | Tensor LanguageModel::Impl::PostEmbedding(const Tensor& features, Buff... function ModelParam (line 511) | const ModelParam& LanguageModel::model_param() const noexcept function AttentionParam (line 516) | const AttentionParam& LanguageModel::attn_param() const noexcept FILE: src/turbomind/models/language_model.h function namespace (line 10) | namespace turbomind { FILE: src/turbomind/models/llama/Barrier.h function namespace (line 11) | namespace turbomind { FILE: src/turbomind/models/llama/BlockManager.cc type turbomind (line 10) | namespace turbomind { function Snapshot (line 251) | Snapshot BlockManager::TakeSnapshot() FILE: src/turbomind/models/llama/BlockManager.h function namespace (line 21) | namespace turbomind { FILE: src/turbomind/models/llama/BlockTrie.cc type turbomind (line 6) | namespace turbomind { function hash (line 8) | size_t hash(const std::vector& vec) FILE: src/turbomind/models/llama/BlockTrie.h function namespace (line 10) | namespace turbomind { FILE: src/turbomind/models/llama/GatedDeltaNetLayer.cc type turbomind (line 10) | namespace turbomind { function linear_layer_index (line 136) | static int linear_layer_index(int layer_id, const std::vector& la... FILE: src/turbomind/models/llama/GatedDeltaNetLayer.h function namespace (line 10) | namespace turbomind { FILE: src/turbomind/models/llama/GatedDeltaNetWeight.cc type turbomind (line 5) | namespace turbomind { function concat_weights_4 (line 73) | static void FILE: src/turbomind/models/llama/GatedDeltaNetWeight.h function namespace (line 7) | namespace turbomind { FILE: src/turbomind/models/llama/LlamaDecoderLayerWeight.cc type turbomind (line 34) | namespace turbomind { function is_fuse_silu_act (line 36) | static bool is_fuse_silu_act() FILE: src/turbomind/models/llama/LlamaDecoderLayerWeight.h function namespace (line 29) | namespace turbomind { FILE: src/turbomind/models/llama/LlamaDenseWeight.cc type turbomind (line 19) | namespace turbomind { function Convert (line 89) | static void Convert(LlamaDenseWeight& dense, bool is_grouped, cudaStre... function ConvertBlockscaleFP8Native (line 221) | static void ConvertBlockscaleFP8Native(LlamaDenseWeight& dense, cudaSt... function Interleave (line 381) | static void Interleave(const Tensor& a, const Tensor& b, Tensor& c, cu... function interleave (line 420) | void interleave(LlamaDenseWeight& c, LlamaDenseWeight& a, LlamaDenseWe... function Chunk (line 446) | static void Chunk(const Tensor& a, const Tensor& b, Tensor& c, cudaStr... function chunk (line 481) | void chunk(LlamaDenseWeight& c, LlamaDenseWeight& a, LlamaDenseWeight&... function LinkExperts (line 630) | void LinkExperts(std::function experts, int n,... FILE: src/turbomind/models/llama/LlamaDenseWeight.h function namespace (line 30) | namespace turbomind { type LlamaAttentionWeight (line 85) | struct LlamaAttentionWeight function window_size (line 121) | int window_size{} function Module (line 124) | struct LlamaFfnWeight: core::Module { function Module (line 156) | struct MoeFfnWeight: core::Module { FILE: src/turbomind/models/llama/LlamaFfnLayer.cc type turbomind (line 25) | namespace turbomind { FILE: src/turbomind/models/llama/LlamaFfnLayer.h function namespace (line 28) | namespace turbomind { FILE: src/turbomind/models/llama/LlamaLinear.h function namespace (line 11) | namespace turbomind { FILE: src/turbomind/models/llama/LlamaWeight.cc type turbomind (line 30) | namespace turbomind { FILE: src/turbomind/models/llama/LlamaWeight.h function namespace (line 30) | namespace turbomind { FILE: src/turbomind/models/llama/SequenceManager.cc type turbomind (line 15) | namespace turbomind { function vector2string (line 18) | std::string vector2string(const std::vector& data) function Sequence (line 142) | const Sequence* SequenceManager::Create(uint64_t id) function Sequence (line 159) | const Sequence* SequenceManager::Get(uint64_t id) type Schedule (line 366) | struct Schedule { method Schedule (line 384) | Schedule(Snapshot snapshot, int size, int max_fwd_tokens, int max_tm... method Unlock (line 396) | int Unlock(const Sequences& seqs, int vidx) type Transaction (line 434) | struct Transaction { method Transaction (line 449) | explicit Transaction( method Process (line 460) | void Process() method Commit (line 496) | void Commit() function SortByKey (line 533) | static void SortByKey(const std::vector& keys, std::vector&..... FILE: src/turbomind/models/llama/SequenceManager.h function namespace (line 15) | namespace turbomind { type Outcome (line 124) | struct Outcome { FILE: src/turbomind/models/llama/bench_conv1d_silu.cc type Args (line 18) | struct Args { method DataType (line 28) | static DataType ParseDtype(const char* s) method Args (line 38) | static Args Parse(int argc, char** argv) method Print (line 70) | void Print() const function benchmark_kernel (line 85) | static float function cpu_conv1d_silu (line 126) | static void cpu_conv1d_silu(T* h_out, function main (line 170) | int main(int argc, char** argv) FILE: src/turbomind/models/llama/bench_gated_delta_net.cc type Args (line 15) | struct Args { method DataType (line 25) | static DataType ParseDtype(const char* s) method Args (line 37) | static Args Parse(int argc, char** argv) method Print (line 69) | void Print() const function benchmark_kernel (line 84) | static float function main (line 112) | int main(int argc, char** argv) FILE: src/turbomind/models/llama/context.h function namespace (line 17) | namespace turbomind { FILE: src/turbomind/models/llama/gated_delta_net_kernels.h function namespace (line 9) | namespace turbomind { FILE: src/turbomind/models/llama/llama_kernels.h function namespace (line 10) | namespace turbomind { FILE: src/turbomind/models/llama/llama_params.h function namespace (line 15) | namespace turbomind { function HasLinearAttention (line 81) | inline bool HasLinearAttention(const ModelParam& model_param) type MoeParam (line 92) | struct MoeParam { type AttentionParam (line 116) | struct AttentionParam { type EngineParam (line 126) | struct EngineParam { FILE: src/turbomind/models/llama/llama_rope.h function namespace (line 11) | namespace turbomind { type YarnRopeParam (line 35) | struct YarnRopeParam { type Llama3RopeParam (line 41) | struct Llama3RopeParam { type MropeRopeParam (line 47) | struct MropeRopeParam { type RopeParam (line 51) | struct RopeParam { type YarnRopeKernelParam (line 66) | struct YarnRopeKernelParam { type Llama3RopeKernelParam (line 73) | struct Llama3RopeKernelParam { type MropeRopeKernelParam (line 79) | struct MropeRopeKernelParam { type RopeKernelParam (line 88) | struct RopeKernelParam { function init_rope_kernel_param (line 101) | inline void init_rope_kernel_param(const RopeParam& rope, RopeKernelPara... function else (line 140) | else if (rope.type == RopeType::kLlama3) { function else (line 150) | else if (rope.type == RopeType::kMrope) { FILE: src/turbomind/models/llama/llama_utils.h function namespace (line 10) | namespace turbomind { FILE: src/turbomind/models/llama/mla_utils.h function namespace (line 8) | namespace turbomind { FILE: src/turbomind/models/llama/moe_ffn_layer.cc type turbomind (line 20) | namespace turbomind { FILE: src/turbomind/models/llama/moe_ffn_layer.h function namespace (line 11) | namespace turbomind { FILE: src/turbomind/models/llama/unified_attention_layer.cc type turbomind (line 54) | namespace turbomind { type AttentionData (line 56) | struct AttentionData { type Stat (line 57) | struct Stat { function init_dynamic_ntk (line 183) | static void init_dynamic_ntk(RequestCache& cache, const RopeParam& rope) function Tensor (line 377) | Tensor UnifiedAttentionLayer::core_attention(Tensor& qkv, const Forwar... function Tensor (line 592) | Tensor UnifiedAttentionLayer::forward_mla(const Tensor& hidden_state, ... FILE: src/turbomind/models/llama/unified_attention_layer.h function namespace (line 35) | namespace turbomind { FILE: src/turbomind/models/llama/unified_decoder.cc type turbomind (line 23) | namespace turbomind { FILE: src/turbomind/models/llama/unified_decoder.h function namespace (line 12) | namespace turbomind { FILE: src/turbomind/models/output_processor.cc type turbomind (line 10) | namespace turbomind { type OutputProcessor::Impl (line 15) | struct OutputProcessor::Impl { method Impl (line 25) | Impl(const ModelParam& model, type Data (line 40) | struct Data { type Matching (line 50) | struct Matching { method Add (line 69) | void Add(int phase, TensorMap& env) method Setup (line 90) | void Setup(int phase, TensorMap& env) method Prepare (line 159) | void Prepare(int phase, TensorMap& env) method OutputHiddenStates (line 168) | void OutputHiddenStates(const Ranges& ranges, const Tensor& h, int t... method ComputeAndOutputLogits (line 181) | void ComputeAndOutputLogits(const Data& data, const Tensor& h, const... method OutputLogits (line 210) | void OutputLogits(Ranges& ranges_, const Tensor& l, int type, const ... method OutputLogitsImpl (line 220) | bool OutputLogitsImpl( method OutputHiddenStatesAndLogits (line 260) | void OutputHiddenStatesAndLogits(int phase, TensorMap& env, int type) FILE: src/turbomind/models/output_processor.h function namespace (line 6) | namespace turbomind { FILE: src/turbomind/python/bind.cpp function DLDevice (line 36) | DLDevice getDLDevice(const Tensor& tensor) function DLManagedTensor (line 64) | DLManagedTensor* TritonTensorToDLManagedTensor(Tensor& tensor) function getMemoryType (line 142) | ft::DeviceType getMemoryType(DLDevice device) function getDataType (line 155) | ft::DataType getDataType(DLDataType data_type) function DLManagedTensorToTritonTensor (line 214) | std::shared_ptr DLManagedTensorToTritonTensor(DLManagedTensor* t... function safe_memcpy (line 230) | static void safe_memcpy(void* dst, const void* src, size_t size) type ScopedGIL (line 277) | struct ScopedGIL { method ScopedGIL (line 278) | ScopedGIL(const ScopedGIL&) = delete; method ScopedGIL (line 279) | ScopedGIL& operator=(const ScopedGIL&) = delete; method ScopedGIL (line 280) | ScopedGIL(ScopedGIL&&) = delete; method ScopedGIL (line 281) | ScopedGIL& operator=(ScopedGIL&&) = delete; method ScopedGIL (line 282) | ScopedGIL() function PYBIND11_MODULE (line 295) | PYBIND11_MODULE(_turbomind, m) FILE: src/turbomind/python/dlpack.h type DLPackVersion (line 61) | typedef struct { type DLDeviceType (line 75) | typedef enum type DLDevice (line 126) | typedef struct { type DLDataTypeCode (line 139) | typedef enum type DLDataType (line 176) | typedef struct { type DLTensor (line 194) | typedef struct { type DLManagedTensor (line 253) | typedef struct DLManagedTensor { type DLManagedTensorVersioned (line 284) | struct DLManagedTensorVersioned { FILE: src/turbomind/python/xgrammar_bind.cpp function CommonEncodedVocabType (line 27) | static const std::vector function TokenizerInfo (line 46) | TokenizerInfo TokenizerInfo_Init(const std::vector& enc... function TokenizerInfo_GetVocabType (line 57) | int TokenizerInfo_GetVocabType(const TokenizerInfo& tokenizer) function TokenizerInfo_GetDecodedVocab (line 62) | std::vector TokenizerInfo_GetDecodedVocab(const TokenizerInfo... function PYBIND11_MODULE (line 75) | PYBIND11_MODULE(_xgrammar, m) FILE: src/turbomind/turbomind.cc type turbomind (line 35) | namespace turbomind { function get_moe_method (line 42) | static std::optional get_moe_method() function parse_default_rope_param (line 67) | static void parse_default_rope_param(const YAML::Node& node, RopeParam... function parse_linear_rope_param (line 77) | static void parse_linear_rope_param(const YAML::Node& node, RopeParam&... function parse_dynamic_rope_param (line 83) | static void parse_dynamic_rope_param(const YAML::Node& node, RopeParam... function parse_yarn_rope_param (line 89) | static void parse_yarn_rope_param(const YAML::Node& node, RopeParam& p... function parse_llama3_rope_param (line 97) | static void parse_llama3_rope_param(const YAML::Node& node, RopeParam&... function parse_mrope_rope_param (line 105) | static void parse_mrope_rope_param(const YAML::Node& node, RopeParam& ... function parse_rope_param (line 113) | static void parse_rope_param(const YAML::Node& node, RopeParam& rope) function DataType (line 142) | static DataType data_type_from_string(std::string str) type TurboMind::Impl (line 169) | struct TurboMind::Impl { method CreateRequest (line 207) | unique_ptr CreateRequest() method CreateWeights (line 216) | void CreateWeights(int index) method TensorMap (line 228) | TensorMap GetWeights(int index) method ProcessWeights (line 238) | void ProcessWeights(int index) method Sleep (line 256) | void Sleep(int index, int level) method WakeUp (line 282) | void WakeUp(int index, const std::vector& tags) method HandleMissingParams (line 308) | void HandleMissingParams() function Join (line 644) | static std::string Join(Iter first, Iter last, const std::string& delim) type Channel (line 728) | struct Channel { function TensorMap (line 791) | TensorMap TurboMind::GetWeights(int index) FILE: src/turbomind/turbomind.h function namespace (line 13) | namespace turbomind { FILE: src/turbomind/utils/anomaly_handler.h function namespace (line 15) | namespace turbomind { FILE: src/turbomind/utils/constant.h function namespace (line 5) | namespace turbomind { FILE: src/turbomind/utils/cuda_utils.cc type turbomind (line 22) | namespace turbomind { function syncAndCheck (line 24) | void syncAndCheck(const char* const file, int const line) function printMatrix (line 47) | void printMatrix(T* ptr, int m, int k, int stride, bool is_device_ptr) function printMatrix (line 89) | void printMatrix(unsigned long long* ptr, int m, int k, int stride, bo... function printMatrix (line 126) | void printMatrix(int* ptr, int m, int k, int stride, bool is_device_ptr) function printMatrix (line 165) | void printMatrix(size_t* ptr, int m, int k, int stride, bool is_device... function check_max_val (line 204) | void check_max_val(const T* result, const int size) function check_abs_mean_val (line 226) | void check_abs_mean_val(const T* result, const int size) function getSMVersion (line 246) | int getSMVersion() function getSMCount (line 257) | int getSMCount() function getDeviceName (line 266) | std::string getDeviceName() function getDevice (line 275) | int getDevice() function getDeviceCount (line 282) | int getDeviceCount() function trim_default_mempool (line 289) | void trim_default_mempool(int device_id) FILE: src/turbomind/utils/cuda_utils.h function namespace (line 39) | namespace turbomind { FILE: src/turbomind/utils/logger.cc type turbomind (line 20) | namespace turbomind { function Logger (line 22) | Logger& Logger::getLogger() FILE: src/turbomind/utils/logger.h function namespace (line 25) | namespace turbomind { function setLevel (line 71) | void setLevel(const Level level) function std (line 96) | inline const std::string getLevelName(const Level level) function std (line 101) | inline const std::string getPrefix(const Level level) function std (line 106) | inline const std::string getPrefix(const Level level, const int rank) FILE: src/turbomind/utils/memory_utils.h function namespace (line 21) | namespace turbomind { FILE: src/turbomind/utils/metrics.h function namespace (line 8) | namespace turbomind { FILE: src/turbomind/utils/monotonic.h function namespace (line 7) | namespace turbomind { FILE: src/turbomind/utils/nvtx_utils.cc type ft_nvtx (line 24) | namespace ft_nvtx { function getScope (line 25) | std::string getScope() function addScope (line 29) | void addScope(std::string name) function setScope (line 34) | void setScope(std::string name) function resetScope (line 39) | void resetScope() function setDeviceDomain (line 44) | void setDeviceDomain(int deviceId) function resetDeviceDomain (line 49) | void resetDeviceDomain() function getDeviceDomain (line 54) | int getDeviceDomain() function isEnableNvtx (line 59) | bool isEnableNvtx() function ftNvtxRangePush (line 69) | void ftNvtxRangePush(std::string name) function ftNvtxRangePop (line 82) | void ftNvtxRangePop() FILE: src/turbomind/utils/nvtx_utils.h function namespace (line 19) | namespace ft_nvtx { FILE: src/turbomind/utils/parser.cc type turbomind (line 8) | namespace turbomind { function ParseArgsList (line 10) | std::vector> ParseArgsList(const s... function ParseListOrTuple (line 26) | std::vector ParseListOrTuple(const std::string& str) FILE: src/turbomind/utils/parser.h function namespace (line 4) | namespace turbomind { FILE: src/turbomind/utils/string_utils.h function namespace (line 24) | namespace turbomind { FILE: src/turbomind/utils/test_utils.h function namespace (line 23) | namespace turbomind { FILE: tests/csrc/unittests/gtest_utils.h function initRandomInt (line 116) | void initRandomInt(int* ptr, size_t size, int minval, int maxval) { function namespace (line 143) | namespace math { type testing (line 165) | typedef testing::Types SamplingTypes; type testing (line 167) | typedef testing::Types SamplingTypes; type testing (line 171) | typedef testing::Types SamplingTypes; type testing (line 173) | typedef testing::Types SamplingTypes; type testing (line 177) | typedef testing::Types FloatType; type testing (line 178) | typedef testing::Types FloatAndHalfTypes; type FloatAndHalfTypes (line 180) | typedef FloatAndHalfTypes SupportTypes; type testing (line 182) | typedef testing::Types FloatHalfBf16Types; type FloatHalfBf16Types (line 183) | typedef FloatHalfBf16Types SupportTypes; function class (line 186) | class FtTestBase: public testing::Test { FILE: tests/csrc/unittests/unittest_utils.h function class (line 39) | class TestFailureError: public std::exception { function initRandomInt (line 167) | void initRandomInt(int* ptr, size_t size, int minval, int maxval) function printMatrixWithLimit (line 205) | void printMatrixWithLimit(T* ptr, int m, int k, int stride, bool is_devi... FILE: tests/pytorch/config/test_hf_overrides.py class TestHFOverrides (line 4) | class TestHFOverrides: method hf_config (line 7) | def hf_config(self): method test_hf_overrides (line 11) | def test_hf_overrides(self, hf_config): FILE: tests/pytorch/engine/test_logits_process.py function test_process_temperature (line 9) | def test_process_temperature(): function test_process_bad_words (line 27) | def test_process_bad_words(): function test_processrepetition_penalty (line 52) | def test_processrepetition_penalty(): function test_filter_topk_sorted (line 75) | def test_filter_topk_sorted(): function test_filter_topp_sorted (line 93) | def test_filter_topp_sorted(): function test_filter_minp_sorted (line 111) | def test_filter_minp_sorted(): function test_filter_ngram (line 129) | def test_filter_ngram(): FILE: tests/pytorch/engine/test_request.py class TestRequestHander (line 11) | class TestRequestHander: method event_loop (line 14) | def event_loop(self): method manager (line 25) | def manager(self): method test_bind (line 28) | def test_bind(self, manager, event_loop): FILE: tests/pytorch/engine/test_zmq_rpc.py class TestZMQRPC (line 5) | class TestZMQRPC: method sub_proc (line 7) | def sub_proc(self, shared_dict=None, condition=None): method async_main (line 38) | async def async_main(self, port): method test_zmq_rpc (line 58) | def test_zmq_rpc(self): FILE: tests/pytorch/kernel/test_activation.py class TestSiluAndMul (line 5) | class TestSiluAndMul: method seqlen (line 8) | def seqlen(self, request): method feat_size (line 12) | def feat_size(self, request): method x (line 16) | def x(self, seqlen, feat_size): method gt (line 20) | def gt(self, x): method test_silu_and_mul (line 27) | def test_silu_and_mul(self, x, gt): class TestSiluAndMulMoEEP (line 34) | class TestSiluAndMulMoEEP: method num_experts (line 37) | def num_experts(self, request): method seqlen (line 41) | def seqlen(self, request): method feat_size (line 45) | def feat_size(self, request): method dtype (line 49) | def dtype(self): method x (line 53) | def x(self, num_experts, seqlen, feat_size, dtype): method mask_m (line 57) | def mask_m(self, num_experts, seqlen): method elem_mask (line 62) | def elem_mask(self, mask_m, seqlen): method gt (line 67) | def gt(self, x): method test_silu_and_mul (line 75) | def test_silu_and_mul(self, x, mask_m, elem_mask, gt): FILE: tests/pytorch/kernel/test_apply_rotary.py function _rotate_half (line 7) | def _rotate_half(x): function _bf16_mark (line 14) | def _bf16_mark(): class TestApplyRotary (line 18) | class TestApplyRotary: method dtype (line 21) | def dtype(self, request): method batch_size (line 25) | def batch_size(self): method num_heads_q (line 29) | def num_heads_q(self, request): method num_heads_k (line 33) | def num_heads_k(self, request): method feature_dim (line 37) | def feature_dim(self): method seq_length (line 41) | def seq_length(self, batch_size): method max_seqlen (line 45) | def max_seqlen(self, seq_length): method q_states (line 49) | def q_states(self, seq_length, num_heads_q, feature_dim, dtype): method k_states (line 53) | def k_states(self, seq_length, num_heads_k, feature_dim, dtype): method position_ids_1d (line 57) | def position_ids_1d(self, seq_length, max_seqlen): method cached_cos (line 61) | def cached_cos(self, max_seqlen, feature_dim, dtype): method cached_sin (line 65) | def cached_sin(self, max_seqlen, feature_dim, dtype): method cos (line 69) | def cos(self, cached_cos, position_ids_1d): method sin (line 73) | def sin(self, cached_sin, position_ids_1d): method gt (line 77) | def gt(self, q_states, k_states, cos, sin, position_ids_1d): method test_apply_rotary (line 87) | def test_apply_rotary(self, q_states, k_states, cos, sin, gt): FILE: tests/pytorch/kernel/test_bitonic_topk.py class TestBitonicTopk (line 5) | class TestBitonicTopk: method device (line 8) | def device(self): method k (line 12) | def k(self): method q_seqlens (line 16) | def q_seqlens(self, device): method kv_seqlens (line 22) | def kv_seqlens(self, device): method batch_size (line 28) | def batch_size(self, kv_seqlens): method max_kv_len (line 32) | def max_kv_len(self, kv_seqlens): method scores (line 36) | def scores(self, q_seqlens, max_kv_len, device): method gt (line 41) | def gt(self, scores, q_seqlens, kv_seqlens, k): method test_bitonic_topk (line 60) | def test_bitonic_topk(self, scores, q_seqlens, kv_seqlens, k, gt): FILE: tests/pytorch/kernel/test_causal_conv1d.py function do_test (line 5) | def do_test(): class TestCausalConv1dUpdate (line 17) | class TestCausalConv1dUpdate: method device (line 20) | def device(self): method batch (line 24) | def batch(self): method hidden_size (line 28) | def hidden_size(self): method width (line 32) | def width(self): method x (line 36) | def x(self, batch, hidden_size, device): method weight (line 40) | def weight(self, hidden_size, width, device): method conv_state (line 44) | def conv_state(self, batch, hidden_size, width, device): method bias (line 50) | def bias(self, hidden_size, device): method conv_state_indices (line 54) | def conv_state_indices(self, batch, device): method activation (line 59) | def activation(self, request): method test_causal_conv1d_update (line 62) | def test_causal_conv1d_update(self, x, conv_state, weight, bias, activ... class TestCausalConv1dFn (line 85) | class TestCausalConv1dFn: method device (line 88) | def device(self): method hidden_size (line 92) | def hidden_size(self): method seqlen (line 96) | def seqlen(self): method seq_idx (line 100) | def seq_idx(self, seqlen, device): method x (line 107) | def x(self, hidden_size, seqlen, device): method weight (line 111) | def weight(self, hidden_size, device): method bias (line 115) | def bias(self, hidden_size, device): method activation (line 119) | def activation(self, request): method test_causal_conv1d_fn (line 122) | def test_causal_conv1d_fn(self, x, weight, bias, activation, seq_idx): FILE: tests/pytorch/kernel/test_ds_index.py function _make_A (line 5) | def _make_A(M, K, group_size, out_dtype, device): class TestDSIndex (line 28) | class TestDSIndex: method num_heads (line 31) | def num_heads(self): method head_dim (line 35) | def head_dim(self): method block_size (line 39) | def block_size(self): method device (line 43) | def device(self): method q_seqlens (line 47) | def q_seqlens(self, request): method kv_seqlens (line 51) | def kv_seqlens(self, request): method k_seqlens (line 55) | def k_seqlens(self, kv_seqlens, device): method cu_seqlen_q (line 59) | def cu_seqlen_q(self, q_seqlens, device): method cu_seqlen_kv (line 63) | def cu_seqlen_kv(self, kv_seqlens, device): method query (line 67) | def query(self, q_seqlens, num_heads, head_dim, device): method q (line 76) | def q(self, query): method q_s (line 80) | def q_s(self, query): method key (line 84) | def key(self, kv_seqlens, head_dim): method k (line 93) | def k(self, key): method k_s (line 97) | def k_s(self, key): method cache_key (line 101) | def cache_key(self, k, k_s, kv_seqlens, block_size, head_dim): method k_cache (line 130) | def k_cache(self, cache_key): method k_s_cache (line 134) | def k_s_cache(self, cache_key): method block_offset (line 138) | def block_offset(self, cache_key): method test_fp8_index (line 143) | def test_fp8_index(self, q, q_s, k_cache, k_s_cache, cu_seqlen_q, k_se... FILE: tests/pytorch/kernel/test_fill_kv_cache.py function _div_up (line 5) | def _div_up(a, b): function quant (line 9) | def quant(kv: torch.Tensor, nbits: int = 8): class TestFillKVCache (line 22) | class TestFillKVCache: method num_heads (line 25) | def num_heads(self): method head_dim (line 29) | def head_dim(self): method block_size (line 33) | def block_size(self): method seq_lens (line 37) | def seq_lens(self, request): method history_lens (line 41) | def history_lens(self, request): method batch_size (line 45) | def batch_size(self, seq_lens): method kv_lens (line 49) | def kv_lens(self, seq_lens, history_lens): method max_q_seq_length (line 53) | def max_q_seq_length(self, seq_lens): method num_tokens (line 57) | def num_tokens(self, seq_lens): method num_blocks_per_input (line 61) | def num_blocks_per_input(self, kv_lens, block_size): method max_num_blocks (line 65) | def max_num_blocks(self, num_blocks_per_input): method q_seq_length (line 69) | def q_seq_length(self, seq_lens): method q_start_loc (line 73) | def q_start_loc(self, q_seq_length): method kv_seq_length (line 78) | def kv_seq_length(self, kv_lens): method k_states (line 82) | def k_states(self, num_tokens, num_heads, head_dim): method v_states (line 86) | def v_states(self, k_states): method k_caches (line 90) | def k_caches(self, batch_size, max_num_blocks, block_size, num_heads, ... method v_caches (line 95) | def v_caches(self, k_caches): method block_offsets (line 99) | def block_offsets(self, num_blocks_per_input): method gt (line 108) | def gt(self, k_states, v_states, k_caches, v_caches, seq_lens, history... method test_fill_kv_cache (line 144) | def test_fill_kv_cache(self, k_states, v_states, k_caches, v_caches, b... class TestFillKVCacheInt8 (line 154) | class TestFillKVCacheInt8(TestFillKVCache): method head_dim (line 157) | def head_dim(self, request): method k_caches (line 161) | def k_caches(self, batch_size, max_num_blocks, block_size, num_heads, ... method v_caches (line 166) | def v_caches(self, k_caches): method k_scales_zeros (line 170) | def k_scales_zeros(self, batch_size, max_num_blocks, block_size, num_h... method v_scales_zeros (line 175) | def v_scales_zeros(self, k_scales_zeros): method nbits (line 179) | def nbits(self): method gt (line 183) | def gt(self, k_states, v_states, k_caches, v_caches, seq_lens, history... method test_fill_kv_cache (line 233) | def test_fill_kv_cache(self, k_states, v_states, k_caches, v_caches, k... class TestFillKVCacheInt4 (line 245) | class TestFillKVCacheInt4(TestFillKVCacheInt8): method k_caches (line 248) | def k_caches(self, batch_size, max_num_blocks, block_size, num_heads, ... method nbits (line 253) | def nbits(self): method test_fill_kv_cache (line 262) | def test_fill_kv_cache(self, k_states, v_states, k_caches, v_caches, k... class TestFillKVCacheBlockedFP8 (line 277) | class TestFillKVCacheBlockedFP8(TestFillKVCache): method initialize (line 280) | def initialize(self): method scale_fmt (line 287) | def scale_fmt(self, request): method quant_dtype (line 291) | def quant_dtype(self): method num_heads (line 295) | def num_heads(self): method head_dim (line 299) | def head_dim(self): method block_size (line 303) | def block_size(self): method group_size (line 307) | def group_size(self): method cu_seqlen_q (line 311) | def cu_seqlen_q(self, q_start_loc, q_seq_length): method k_caches (line 318) | def k_caches(self, batch_size, max_num_blocks, block_size, num_heads, ... method v_caches (line 323) | def v_caches(self, k_caches): method ks_caches (line 327) | def ks_caches(self, batch_size, max_num_blocks, block_size, num_heads,... method vs_caches (line 332) | def vs_caches(self, ks_caches): method gt (line 336) | def gt(self, k_states, v_states, group_size, quant_dtype, scale_fmt): method uncache (line 354) | def uncache(self, k_caches, ks_caches, v_caches, vs_caches, cu_seqlen_... method test_fill_kv_cache (line 386) | def test_fill_kv_cache(self, k_states, v_states, k_caches, v_caches, k... FILE: tests/pytorch/kernel/test_flash_attention.py function _conti_input (line 7) | def _conti_input(data, q_seqlens): function _make_bias (line 13) | def _make_bias(q_seqlens, history_lens, neg_val, causal): function _make_bias_alibi (line 36) | def _make_bias_alibi(q_seqlens, history_lens, neg_val, causal, alibi_slo... function _make_block_sparse_bias (line 59) | def _make_block_sparse_bias(q_seqlens: torch.Tensor, history_lens: torch... function _naive_attention (line 80) | def _naive_attention(batched_q, batched_kv, bias, sinks=None): function _naive_window_attention (line 117) | def _naive_window_attention(q, k, v, seqlens_q, seqlens_k, window_size): class TestFlashAttention (line 149) | class TestFlashAttention: method dtype (line 152) | def dtype(self): method head_dim_k (line 156) | def head_dim_k(self, request): method head_dim_v (line 160) | def head_dim_v(self, request): method num_heads_q (line 164) | def num_heads_q(self, request): method num_heads_k (line 168) | def num_heads_k(self, request): method causal (line 172) | def causal(self, request): method q_seqlens (line 176) | def q_seqlens(self, request): method cu_seqlens_q (line 180) | def cu_seqlens_q(self, q_seqlens): method history_lens (line 186) | def history_lens(self, request): method kv_seqlens (line 190) | def kv_seqlens(self, q_seqlens, history_lens): method cu_seqlens_k (line 194) | def cu_seqlens_k(self, kv_seqlens): method batched_q (line 200) | def batched_q(self, q_seqlens, num_heads_q, head_dim_k, dtype): method batched_kv (line 208) | def batched_kv(self, q_seqlens, history_lens, num_heads_k, head_dim_k,... method conti_q (line 218) | def conti_q(self, q_seqlens, batched_q): method conti_kv (line 222) | def conti_kv(self, kv_seqlens, batched_kv): method mask (line 230) | def mask(self, q_seqlens, history_lens, causal): method gt (line 235) | def gt(self, batched_q, batched_kv, mask): method conti_gt (line 239) | def conti_gt(self, gt, q_seqlens): method test_flash_attention (line 248) | def test_flash_attention(self, conti_q, conti_kv, q_seqlens, cu_seqlen... method win_size (line 263) | def win_size(self, request): method window_gt (line 267) | def window_gt(self, conti_q, conti_kv, q_seqlens, kv_seqlens, win_size): method test_window_attention (line 283) | def test_window_attention(self, conti_q, conti_kv, q_seqlens, cu_seqle... method sinks (line 299) | def sinks(self, num_heads_q, dtype): method sink_gt (line 303) | def sink_gt(self, batched_q, batched_kv, mask, sinks): method conti_sink_gt (line 307) | def conti_sink_gt(self, sink_gt, q_seqlens): method test_sinks (line 316) | def test_sinks(self, conti_q, conti_kv, q_seqlens, cu_seqlens_q, cu_se... method block_sparse_size (line 333) | def block_sparse_size(self): method block_sparse_mask (line 337) | def block_sparse_mask(self, q_seqlens, history_lens, block_sparse_size): method block_sparse_gt (line 342) | def block_sparse_gt(self, batched_q, batched_kv, block_sparse_mask): method test_block_sparse_attention (line 350) | def test_block_sparse_attention(self, conti_q, conti_kv, q_seqlens, cu... method alibi_slopes (line 368) | def alibi_slopes(self, num_heads_q): method alibi_bias (line 372) | def alibi_bias(self, q_seqlens, history_lens, causal, alibi_slopes): method alibi_gt (line 377) | def alibi_gt(self, batched_q, batched_kv, alibi_bias): method conti_alibi_gt (line 381) | def conti_alibi_gt(self, alibi_gt, q_seqlens): method test_alibi (line 392) | def test_alibi(self, conti_q, conti_kv, q_seqlens, cu_seqlens_q, cu_se... FILE: tests/pytorch/kernel/test_flatten_kv_cache.py function _div_up (line 5) | def _div_up(a, b): class TestFlattenKVCache (line 9) | class TestFlattenKVCache: method out_dtype (line 12) | def out_dtype(self): method num_heads (line 16) | def num_heads(self): method head_dim (line 20) | def head_dim(self): method block_size (line 24) | def block_size(self): method kv_lens (line 28) | def kv_lens(self): method batch_size (line 32) | def batch_size(self, kv_lens): method num_blocks_per_input (line 36) | def num_blocks_per_input(self, kv_lens, block_size): method max_num_blocks (line 40) | def max_num_blocks(self, num_blocks_per_input): method out_size (line 44) | def out_size(self, kv_lens): method kv_seqlens (line 48) | def kv_seqlens(self, kv_lens): method k_caches (line 52) | def k_caches(self, batch_size, max_num_blocks, block_size, num_heads, ... method v_caches (line 57) | def v_caches(self, k_caches): method block_offsets (line 61) | def block_offsets(self, num_blocks_per_input): method gt (line 70) | def gt(self, k_caches, v_caches, kv_lens, block_offsets, block_size, n... method test_flatten_kv_cache (line 89) | def test_flatten_kv_cache(self, k_caches, v_caches, kv_seqlens, block_... function precise_round (line 97) | def precise_round(x: torch.Tensor): function quant (line 101) | def quant(kv: torch.Tensor, nbits: int = 8): class TestFlattenKVCacheQuant8 (line 114) | class TestFlattenKVCacheQuant8(TestFlattenKVCache): method nbits (line 117) | def nbits(self): method atol (line 121) | def atol(self): method rtol (line 125) | def rtol(self): method k_quant (line 129) | def k_quant(self, k_caches, nbits): method v_quant (line 133) | def v_quant(self, v_caches, nbits): method test_flatten_kv_cache (line 136) | def test_flatten_kv_cache(self, k_quant, v_quant, kv_seqlens, block_of... class TestFlattenKVCacheQuant4 (line 160) | class TestFlattenKVCacheQuant4(TestFlattenKVCacheQuant8): method nbits (line 163) | def nbits(self): method atol (line 167) | def atol(self): method rtol (line 171) | def rtol(self): class TestFlattenKVCacheMLAFP8 (line 176) | class TestFlattenKVCacheMLAFP8(TestFlattenKVCache): method out_dtype (line 179) | def out_dtype(self): method num_heads (line 183) | def num_heads(self): method head_dim (line 187) | def head_dim(self): method block_size (line 191) | def block_size(self): method k_cache_mla (line 195) | def k_cache_mla(self, k_caches): method _dequant (line 207) | def _dequant(self, k_cache_mla): method gt (line 219) | def gt(self, k_cache_mla, kv_lens, block_offsets, block_size, num_head... method test_flatten_kv_cache (line 236) | def test_flatten_kv_cache(self, k_cache_mla, kv_seqlens, block_offsets... FILE: tests/pytorch/kernel/test_fuse_moe_blocked_fp8.py function _make_A (line 5) | def _make_A(M, K, group_size, out_dtype, device='cuda'): function _make_B (line 26) | def _make_B(E, K, N, group_size, out_dtype, device='cuda'): function _get_sorted_idx (line 55) | def _get_sorted_idx(topk_idx: torch.Tensor, num_experts: int): class TestFusedMoEFP8KernelLauncher (line 64) | class TestFusedMoEFP8KernelLauncher: method dtype (line 67) | def dtype(self): method quant_dtype (line 71) | def quant_dtype(self): method device (line 75) | def device(self): method N (line 79) | def N(self): method K (line 83) | def K(self): method M (line 87) | def M(self): method num_experts (line 91) | def num_experts(self): method top_k (line 95) | def top_k(self): method group_size (line 99) | def group_size(self): method build_A (line 103) | def build_A(self, M, K, group_size, quant_dtype, device): method A (line 107) | def A(self, build_A, dtype): method A_quant (line 111) | def A_quant(self, build_A): method A_scale (line 115) | def A_scale(self, build_A): method build_B (line 119) | def build_B(self, num_experts, N, K, group_size, quant_dtype, device): method B (line 123) | def B(self, build_B, dtype): method B_quant (line 127) | def B_quant(self, build_B): method B_scale (line 131) | def B_scale(self, build_B): method bias (line 135) | def bias(self, build_B, dtype): method router_weights (line 140) | def router_weights(self, M, num_experts, device, dtype): method topk_weights (line 144) | def topk_weights(self, router_weights, top_k): method topk_idx (line 148) | def topk_idx(self, topk_weights): method sort_and_cnt (line 152) | def sort_and_cnt(self, topk_idx, num_experts): method sorted_idx (line 156) | def sorted_idx(self, sort_and_cnt): method exp_tok_cnt (line 160) | def exp_tok_cnt(self, sort_and_cnt): method exp_end (line 164) | def exp_end(self, exp_tok_cnt): method exp_start (line 168) | def exp_start(self, exp_end, exp_tok_cnt): method gt (line 172) | def gt(self, A, B, bias, top_k, sorted_idx, exp_start, exp_end, M): method test_launcher (line 191) | def test_launcher(self, A_quant, A_scale, B, B_quant, B_scale, bias, s... class TestFusedMoeBlockedFP8 (line 216) | class TestFusedMoeBlockedFP8: method dtype (line 219) | def dtype(self): method quant_dtype (line 223) | def quant_dtype(self): method device (line 227) | def device(self): method in_size (line 231) | def in_size(self): method seq_len (line 235) | def seq_len(seq_len): method hidden_size (line 239) | def hidden_size(self): method out_size (line 243) | def out_size(self): method num_experts (line 247) | def num_experts(self): method top_k (line 251) | def top_k(self): method group_size (line 255) | def group_size(self): method renormalize (line 259) | def renormalize(self): method build_hidden_states (line 263) | def build_hidden_states(self, seq_len, in_size, group_size, quant_dtyp... method hidden_states (line 267) | def hidden_states(self, build_hidden_states, dtype): method states_quanted (line 271) | def states_quanted(self, build_hidden_states): method states_scale (line 275) | def states_scale(self, build_hidden_states): method build_w1 (line 279) | def build_w1(self, num_experts, hidden_size, in_size, group_size, quan... method w1 (line 283) | def w1(self, build_w1, dtype): method w1_quant (line 287) | def w1_quant(self, build_w1): method w1_scale (line 291) | def w1_scale(self, build_w1): method build_w2 (line 295) | def build_w2(self, num_experts, out_size, hidden_size, group_size, qua... method w2 (line 304) | def w2(self, build_w2, dtype): method w2_quant (line 308) | def w2_quant(self, build_w2): method w2_scale (line 312) | def w2_scale(self, build_w2): method router_logits (line 316) | def router_logits(self, seq_len, num_experts, dtype, device): method topk_logits (line 320) | def topk_logits(self, router_logits, top_k): method topk_weights (line 325) | def topk_weights(self, topk_logits): method topk_idx (line 329) | def topk_idx(self, topk_logits): method gt (line 333) | def gt(self, hidden_states, w1, w2, topk_weights, topk_idx, top_k, ren... method test_fused_moe (line 339) | def test_fused_moe(self, states_quanted, states_scale, w1_quant, w1_sc... FILE: tests/pytorch/kernel/test_fused_lora.py class TestFusedLoRA (line 7) | class TestFusedLoRA: method dtype (line 10) | def dtype(self): method head_size (line 14) | def head_size(self): method out_head_size (line 18) | def out_head_size(self): method seq_lens (line 22) | def seq_lens(self, request): method ranks (line 26) | def ranks(self): method start_loc (line 30) | def start_loc(self, seq_lens): method input (line 34) | def input(self, seq_lens, head_size, dtype): method adapter_ids (line 39) | def adapter_ids(self, seq_lens, ranks): method scaling (line 47) | def scaling(self, ranks): method lora_a (line 51) | def lora_a(self, ranks, head_size, dtype): method lora_b (line 59) | def lora_b(self, ranks, out_head_size, dtype): method fused_lora_a (line 67) | def fused_lora_a(self, lora_a): method fused_lora_b (line 71) | def fused_lora_b(self, lora_b): method gt (line 75) | def gt(self, input, start_loc, seq_lens, adapter_ids, lora_a, lora_b, ... method test_fused_lora (line 90) | def test_fused_lora(self, input, fused_lora_a, fused_lora_b, start_loc... FILE: tests/pytorch/kernel/test_fused_moe.py function _get_sorted_idx (line 6) | def _get_sorted_idx(topk_idx: torch.Tensor, num_experts: int): class TestFusedMoEKernelLauncher (line 14) | class TestFusedMoEKernelLauncher: method dtype (line 17) | def dtype(self): method device (line 21) | def device(self): method N (line 25) | def N(self): method K (line 29) | def K(self): method M (line 33) | def M(self): method num_experts (line 37) | def num_experts(self): method top_k (line 41) | def top_k(self): method A (line 45) | def A(self, M, K, device, dtype): method B (line 50) | def B(self, num_experts, N, K, device, dtype): method bias (line 55) | def bias(self, num_experts, N, device, dtype): method router_weights (line 59) | def router_weights(self, M, num_experts, device, dtype): method topk_weights (line 63) | def topk_weights(self, router_weights, top_k): method topk_idx (line 67) | def topk_idx(self, topk_weights): method sort_and_cnt (line 71) | def sort_and_cnt(self, topk_idx, num_experts): method sorted_idx (line 75) | def sorted_idx(self, sort_and_cnt): method exp_tok_cnt (line 79) | def exp_tok_cnt(self, sort_and_cnt): method exp_end (line 83) | def exp_end(self, exp_tok_cnt): method exp_start (line 87) | def exp_start(self, exp_end, exp_tok_cnt): method gt (line 91) | def gt(self, A, B, bias, top_k, topk_idx): method test_launcher (line 107) | def test_launcher(self, A, B, bias, sorted_idx, exp_start, exp_end, to... function _mlp_forward (line 126) | def _mlp_forward(hidden_states, gate_proj, up_proj, down_proj): class TestFusedMoe (line 132) | class TestFusedMoe: method dtype (line 135) | def dtype(self): method device (line 139) | def device(self): method in_size (line 143) | def in_size(self): method seq_len (line 147) | def seq_len(seq_len): method hidden_size (line 151) | def hidden_size(self): method out_size (line 155) | def out_size(self): method num_experts (line 159) | def num_experts(self): method top_k (line 163) | def top_k(self): method renormalize (line 167) | def renormalize(self): method hidden_states (line 171) | def hidden_states(self, seq_len, in_size, dtype, device): method w1 (line 176) | def w1(self, num_experts, hidden_size, in_size, dtype, device): method w2 (line 181) | def w2(self, num_experts, out_size, hidden_size, dtype, device): method router_logits (line 186) | def router_logits(self, seq_len, num_experts, dtype, device): method topk_logits (line 190) | def topk_logits(self, router_logits, top_k): method topk_weights (line 195) | def topk_weights(self, topk_logits): method topk_idx (line 199) | def topk_idx(self, topk_logits): method gt (line 203) | def gt(self, hidden_states, w1, w2, topk_weights, topk_idx, renormalize): method test_fused_moe (line 221) | def test_fused_moe(self, hidden_states, w1, w2, topk_weights, topk_idx... class TestFusedMoeW8A8 (line 227) | class TestFusedMoeW8A8(TestFusedMoe): method quant_states (line 230) | def quant_states(self, hidden_states): method quant_weight (line 235) | def quant_weight(self, w): method quant_w1 (line 245) | def quant_w1(self, w1): method quant_w2 (line 250) | def quant_w2(self, w2): method test_fused_moe (line 255) | def test_fused_moe(self, quant_states, quant_w1, quant_w2, topk_weight... FILE: tests/pytorch/kernel/test_gated_delta_rule.py function do_test (line 5) | def do_test(): function naive_recurrent_gdr (line 13) | def naive_recurrent_gdr( class TestRecurrentGatedDeltaRule (line 59) | class TestRecurrentGatedDeltaRule: method auto_context (line 62) | def auto_context(self): method batch (line 75) | def batch(self): method num_heads (line 79) | def num_heads(self): method seqlen (line 83) | def seqlen(self): method head_dim (line 87) | def head_dim(self): method use_qk_l2norm_in_kernel (line 91) | def use_qk_l2norm_in_kernel(self, request): method q (line 95) | def q(self, batch, seqlen, num_heads, head_dim): method k (line 99) | def k(self, batch, seqlen, num_heads, head_dim): method v (line 103) | def v(self, batch, seqlen, num_heads, head_dim): method g (line 107) | def g(self, batch, seqlen, num_heads): method beta (line 111) | def beta(self, batch, seqlen, num_heads): method initial_state (line 115) | def initial_state(self, batch, num_heads, head_dim): method gt (line 119) | def gt(self, q, k, v, g, beta, initial_state, use_qk_l2norm_in_kernel): method test_fused_gated_delta_rule (line 130) | def test_fused_gated_delta_rule(self, q, k, v, g, beta, initial_state,... FILE: tests/pytorch/kernel/test_gemm_fp8.py function _make_quant_val (line 5) | def _make_quant_val(shape, out_dtype): function fast_log2_ceil_torch (line 17) | def fast_log2_ceil_torch(x: torch.Tensor) -> torch.Tensor: function fast_pow2_torch (line 27) | def fast_pow2_torch(x: torch.Tensor) -> torch.Tensor: function fast_round_scale_torch (line 32) | def fast_round_scale_torch(amax: torch.Tensor, fp8_max_inv: torch.Tensor... function _make_quant_scale_ue8m0 (line 36) | def _make_quant_scale_ue8m0(shape, out_dtype): function _make_quant_scale (line 44) | def _make_quant_scale(shape, out_dtype, scale_fmt: str = None): function _make_A (line 56) | def _make_A(M, K, group_size, out_dtype, scale_fmt: str = None): function _aligned_size (line 69) | def _aligned_size(a, b): function _make_B (line 73) | def _make_B(K, N, group_size, out_dtype, scale_fmt: str = None): class TestQuantFP8 (line 91) | class TestQuantFP8: method M (line 94) | def M(self, request): method K (line 98) | def K(self): method group_size (line 102) | def group_size(self): method out_dtype (line 106) | def out_dtype(self): method scale_fmt (line 110) | def scale_fmt(self, request): method build_A (line 114) | def build_A(self, M, K, group_size, out_dtype, scale_fmt): method A (line 118) | def A(self, build_A): method quant_A (line 122) | def quant_A(self, build_A): method scale (line 126) | def scale(self, build_A): method gt (line 130) | def gt(self, quant_A, scale): method test_quant_fp8 (line 135) | def test_quant_fp8(self, A, group_size, out_dtype, scale_fmt, gt): class TestGemmFP8 (line 147) | class TestGemmFP8: method M (line 150) | def M(self): method N (line 154) | def N(self): method K (line 159) | def K(self): method group_size (line 163) | def group_size(self): method quant_dtype (line 167) | def quant_dtype(self): method out_dtype (line 171) | def out_dtype(self): method build_A (line 175) | def build_A(self, M, K, group_size, quant_dtype): method A (line 179) | def A(self, build_A, out_dtype): method quant_A (line 183) | def quant_A(self, build_A): method scale_A (line 187) | def scale_A(self, build_A): method build_B (line 191) | def build_B(self, K, N, group_size, quant_dtype): method B (line 195) | def B(self, build_B, out_dtype): method quant_B (line 199) | def quant_B(self, build_B): method scale_B (line 203) | def scale_B(self, build_B): method gt (line 207) | def gt(self, A, B): method test_gemm_fp8 (line 210) | def test_gemm_fp8(self, quant_A, scale_A, quant_B, scale_B, out_dtype,... FILE: tests/pytorch/kernel/test_moe_route.py function reference_noaux_tc_routing (line 5) | def reference_noaux_tc_routing( class TestNoauxTC (line 40) | class TestNoauxTC: method auto_context (line 43) | def auto_context(self): method batch_size (line 56) | def batch_size(self): method num_experts (line 60) | def num_experts(self): method logits (line 64) | def logits(self, batch_size, num_experts): method bias (line 68) | def bias(self, num_experts): method kwargs (line 72) | def kwargs(self): method gt (line 83) | def gt(self, logits, bias, kwargs): method test_noaux_tc_router (line 86) | def test_noaux_tc_router(self, logits, bias, kwargs, gt): FILE: tests/pytorch/kernel/test_multinomial_sampling.py function _bf16_mark (line 7) | def _bf16_mark(): class TestMultinomialSampling (line 11) | class TestMultinomialSampling: method num_tokens (line 14) | def num_tokens(self, request): method select_ids (line 18) | def select_ids(self, request): method batch_size (line 22) | def batch_size(self, select_ids): method dtype (line 26) | def dtype(self, request): method scores (line 30) | def scores(self, num_tokens, batch_size, select_ids, dtype): method seeds (line 38) | def seeds(self, batch_size): method offsets (line 42) | def offsets(self, batch_size): method indices (line 46) | def indices(self, scores): method gt (line 53) | def gt(self, batch_size, select_ids, indices): method test_multinomial_sampling (line 62) | def test_multinomial_sampling(self, scores, seeds, offsets, indices, gt): FILE: tests/pytorch/kernel/test_paged_attention.py function _conti_input (line 7) | def _conti_input(data, seq_lens): function _make_bias (line 13) | def _make_bias(q_seqlens, history_lens, neg_val): function _make_alibi_bias (line 28) | def _make_alibi_bias(q_seqlens, history_lens, neg_val, alibi_slopes): function _make_block_sparse_bias (line 49) | def _make_block_sparse_bias(q_seqlens: torch.Tensor, history_lens: torch... function _make_blocked_cache (line 70) | def _make_blocked_cache(batched_k, function _naive_attention (line 104) | def _naive_attention(batched_q, batched_kv, bias, sinks=None): function _naive_window_attention (line 141) | def _naive_window_attention(q, k, v, seqlens_q, seqlens_k, window_size): class TestPagedAttentionBase (line 173) | class TestPagedAttentionBase: method dtype (line 176) | def dtype(self): method feat_dim (line 180) | def feat_dim(self, request): method feat_dim_v (line 184) | def feat_dim_v(self, request): method num_heads_q (line 188) | def num_heads_q(self, request): method num_heads_k (line 192) | def num_heads_k(self, request): method block_size (line 196) | def block_size(self, request): method layout (line 200) | def layout(self, request): method history_lens (line 204) | def history_lens(self, request): method seq_len (line 208) | def seq_len(self): method seq_lens (line 212) | def seq_lens(self, seq_len, history_lens): method kv_seqlens (line 216) | def kv_seqlens(self, seq_lens, history_lens): method batched_q (line 220) | def batched_q(self, seq_len, kv_seqlens, num_heads_q, feat_dim, dtype): method batched_kv (line 227) | def batched_kv(self, kv_seqlens, num_heads_k, feat_dim, feat_dim_v, dt... method conti_q (line 236) | def conti_q(self, seq_lens, batched_q): method block_offsets (line 240) | def block_offsets(self, kv_seqlens, block_size): method conti_kv (line 254) | def conti_kv(self, batched_kv, history_lens): method blocked_kv (line 261) | def blocked_kv(self, batched_kv, kv_seqlens, history_lens, block_offse... method mask (line 269) | def mask(self, history_lens): method gt (line 275) | def gt(self, batched_q, batched_kv, mask): method conti_gt (line 279) | def conti_gt(self, gt, seq_lens): class TestPagedAttention (line 283) | class TestPagedAttention(TestPagedAttentionBase): method test_paged_attention (line 291) | def test_paged_attention(self, conti_q, blocked_kv, block_offsets, kv_... method win_size (line 304) | def win_size(self, request): method window_gt (line 308) | def window_gt(self, conti_q, conti_kv, seq_lens, history_lens, win_size): method test_window_attention (line 326) | def test_window_attention(self, conti_q, blocked_kv, block_offsets, kv... class TestPagedAttentionSink (line 340) | class TestPagedAttentionSink(TestPagedAttentionBase): method sinks (line 343) | def sinks(self, num_heads_q, dtype): method sink_gt (line 347) | def sink_gt(self, batched_q, batched_kv, mask, sinks): method conti_sink_gt (line 351) | def conti_sink_gt(self, sink_gt, seq_lens): method test_paged_attention (line 360) | def test_paged_attention(self, conti_q, blocked_kv, block_offsets, kv_... function quant (line 375) | def quant(kv: torch.Tensor, nbits: int = 8): function _make_blocked_cache_quant (line 388) | def _make_blocked_cache_quant(batched_k, batched_v, seq_lens, history_le... class TestPagedAttentionInt8 (line 423) | class TestPagedAttentionInt8(TestPagedAttention): method nbits (line 426) | def nbits(self): method blocked_kv (line 430) | def blocked_kv(self, batched_kv, seq_lens, history_lens, block_offsets... method test_paged_attention (line 441) | def test_paged_attention(self, conti_q, blocked_kv, block_offsets, kv_... method test_window_attention (line 467) | def test_window_attention(self, conti_q, blocked_kv, block_offsets, kv... class TestPagedAttentionInt4 (line 486) | class TestPagedAttentionInt4(TestPagedAttentionInt8): method nbits (line 489) | def nbits(self): class TestPagedAttentionBlockDecoding (line 493) | class TestPagedAttentionBlockDecoding(TestPagedAttentionBase): method seq_len (line 496) | def seq_len(self): method mask (line 500) | def mask(self, seq_lens, history_lens, seq_len): method gt (line 505) | def gt(self, batched_q, batched_kv, mask): method conti_gt (line 509) | def conti_gt(self, gt, seq_lens): method test_paged_attention (line 518) | def test_paged_attention(self, conti_q, blocked_kv, block_offsets, kv_... class TestPagedAttentionAlibi (line 532) | class TestPagedAttentionAlibi(TestPagedAttentionBase): method alibi_slopes (line 535) | def alibi_slopes(self, num_heads_q): method mask (line 539) | def mask(self, seq_lens, history_lens, alibi_slopes): method test_paged_attention (line 549) | def test_paged_attention(self, conti_q, blocked_kv, block_offsets, kv_... FILE: tests/pytorch/kernel/test_rms_norm.py function _bf16_mark (line 7) | def _bf16_mark(): class TestRMSNorm (line 11) | class TestRMSNorm: method initialize (line 14) | def initialize(self): method dtype (line 21) | def dtype(self, request): method input_shape (line 25) | def input_shape(self, request): method hidden_size (line 29) | def hidden_size(self, input_shape): method input (line 33) | def input(self, dtype, input_shape): method weight (line 37) | def weight(self, dtype, hidden_size): method eps (line 41) | def eps(self): method gt (line 45) | def gt(self, input, weight, eps): method test_rms_norm (line 54) | def test_rms_norm(self, input, weight, eps, gt): method residual (line 61) | def residual(self, dtype, input_shape): method gt_residual (line 65) | def gt_residual(self, input, residual, weight, eps): method test_rms_norm_residual (line 77) | def test_rms_norm_residual(self, input, residual, weight, eps, gt_resi... FILE: tests/pytorch/nn/test_embedding.py function parallel_emb (line 14) | def parallel_emb(rank: int, world_size: int, vocab_size: int, feat_size:... class TestEmbedding (line 45) | class TestEmbedding: method vocab_size (line 48) | def vocab_size(self, request): method feat_size (line 52) | def feat_size(self, request): method padding_idx (line 56) | def padding_idx(self, request): method dtype (line 60) | def dtype(self, request): method tp (line 64) | def tp(self, request): method seqlen (line 68) | def seqlen(self, request): method weight (line 72) | def weight(self, vocab_size, feat_size, dtype): method x (line 76) | def x(self, seqlen, vocab_size): method gt (line 80) | def gt(self, x, vocab_size, feat_size, padding_idx, dtype, weight): method test_embedding (line 97) | def test_embedding(self, vocab_size, feat_size, padding_idx, seqlen, t... FILE: tests/pytorch/paging/test_block_manager.py class TestAllocator (line 13) | class TestAllocator: method num_gpu_blocks (line 16) | def num_gpu_blocks(self): method num_cpu_blocks (line 20) | def num_cpu_blocks(self): method allocator (line 24) | def allocator(self, num_cpu_blocks, num_gpu_blocks): method test_alloc (line 27) | def test_alloc(self, allocator, num_cpu_blocks, num_gpu_blocks): method test_full (line 53) | def test_full(self, allocator, num_cpu_blocks, num_gpu_blocks): class TestDefaultBlockManager (line 75) | class TestDefaultBlockManager: method block_size (line 78) | def block_size(self): method num_cpu_blocks (line 82) | def num_cpu_blocks(self): method num_gpu_blocks (line 86) | def num_gpu_blocks(self): method max_batch_size (line 90) | def max_batch_size(self): method cache_config (line 94) | def cache_config(self, block_size, num_cpu_blocks, num_gpu_blocks, max... method scheduler_config (line 101) | def scheduler_config(self, max_batch_size): method seq_meta (line 108) | def seq_meta(self, block_size): method scheduler (line 114) | def scheduler(self, cache_config, scheduler_config, seq_meta): method block_mgr (line 118) | def block_mgr(self, scheduler): method test_alloc (line 121) | def test_alloc(self, scheduler, block_mgr, num_gpu_blocks): method test_num_required_blocks (line 146) | def test_num_required_blocks(self, scheduler, block_mgr): method test_append_slot (line 167) | def test_append_slot(self, scheduler, block_mgr, num_gpu_blocks): method test_swap (line 193) | def test_swap(self, scheduler, block_mgr, num_gpu_blocks): class TestWindowBlockManager (line 233) | class TestWindowBlockManager: method window_size (line 236) | def window_size(self): method block_size (line 240) | def block_size(self): method num_cpu_blocks (line 244) | def num_cpu_blocks(self): method num_gpu_blocks (line 248) | def num_gpu_blocks(self): method max_batch_size (line 252) | def max_batch_size(self): method cache_config (line 256) | def cache_config(self, block_size, num_cpu_blocks, num_gpu_blocks, max... method scheduler_config (line 264) | def scheduler_config(self, max_batch_size): method seq_meta (line 271) | def seq_meta(self, block_size): method scheduler (line 277) | def scheduler(self, cache_config, scheduler_config, seq_meta): method block_mgr (line 281) | def block_mgr(self, scheduler): method test_alloc (line 284) | def test_alloc(self, scheduler, block_mgr, num_gpu_blocks): method test_win_alloc (line 309) | def test_win_alloc(self, scheduler, block_mgr, num_gpu_blocks, window_... FILE: tests/pytorch/paging/test_block_trie.py class TestBlockTire (line 9) | class TestBlockTire: method block_size (line 12) | def block_size(self): method num_cpu_blocks (line 16) | def num_cpu_blocks(self): method num_gpu_blocks (line 20) | def num_gpu_blocks(self): method max_batch_size (line 24) | def max_batch_size(self): method cache_config (line 28) | def cache_config(self, block_size, num_cpu_blocks, num_gpu_blocks, max... method scheduler_config (line 36) | def scheduler_config(self, max_batch_size): method seq_meta (line 43) | def seq_meta(self, block_size): method scheduler (line 49) | def scheduler(self, cache_config, scheduler_config, seq_meta): method block_mgr (line 53) | def block_mgr(self, scheduler): method block_trie (line 57) | def block_trie(self, scheduler): method test_allocate (line 60) | def test_allocate(self, block_trie, block_mgr, scheduler): method test_match (line 99) | def test_match(self, block_trie, block_mgr, scheduler): method test_evict (line 137) | def test_evict(self, block_trie, scheduler, num_gpu_blocks): FILE: tests/pytorch/paging/test_scheduler.py class TestScheduler (line 9) | class TestScheduler: method block_size (line 12) | def block_size(self): method num_cpu_blocks (line 16) | def num_cpu_blocks(self): method num_gpu_blocks (line 20) | def num_gpu_blocks(self): method max_batch_size (line 24) | def max_batch_size(self): method cache_config (line 28) | def cache_config(self, block_size, num_cpu_blocks, num_gpu_blocks, max... method scheduler_config (line 35) | def scheduler_config(self, max_batch_size): method seq_meta (line 42) | def seq_meta(self, block_size): method scheduler (line 48) | def scheduler(self, cache_config, scheduler_config, seq_meta): method test_schedule_base (line 51) | def test_schedule_base(self, scheduler, block_size, num_gpu_blocks): method test_update (line 76) | def test_update(self, scheduler, block_size, num_gpu_blocks): method test_evict (line 119) | def test_evict(self, scheduler, block_size, num_gpu_blocks, num_cpu_bl... FILE: tests/test_lmdeploy/test_auto_backend.py class TestAutoBackend (line 8) | class TestAutoBackend: method turbomind_workspace (line 11) | def turbomind_workspace(self): method models (line 17) | def models(self): method test_turbomind_is_supported (line 41) | def test_turbomind_is_supported(self, turbomind_workspace, models): method test_autoget_backend (line 47) | def test_autoget_backend(self, turbomind_workspace, models): FILE: tests/test_lmdeploy/test_content_merge.py class TestMergeMessageContent (line 6) | class TestMergeMessageContent: method test_missing_content_field (line 9) | def test_missing_content_field(self): method test_explicit_none_content (line 33) | def test_explicit_none_content(self): method test_string_content_unchanged (line 57) | def test_string_content_unchanged(self): method test_single_text_block (line 65) | def test_single_text_block(self): method test_multiple_text_blocks_newline_join (line 72) | def test_multiple_text_blocks_newline_join(self): method test_mixed_content_types (line 95) | def test_mixed_content_types(self): method test_empty_list_content (line 120) | def test_empty_list_content(self): method test_list_with_non_text_blocks_only (line 127) | def test_list_with_non_text_blocks_only(self): method test_preserve_all_message_fields (line 148) | def test_preserve_all_message_fields(self): method test_text_block_with_missing_text_field (line 171) | def test_text_block_with_missing_text_field(self): method test_gpt_oss_tool_call_scenario (line 195) | def test_gpt_oss_tool_call_scenario(self): function test_merge_message_content_parametrized (line 269) | def test_merge_message_content_parametrized(msg, expected_content): function test_batch_message_processing (line 275) | def test_batch_message_processing(): FILE: tests/test_lmdeploy/test_grammar.py function test_guided_matrix (line 62) | def test_guided_matrix(model_id, backend_name, backend_factory, schema_t... function test_mix_guided_matrix (line 102) | def test_mix_guided_matrix(model_id, backend_name, backend_factory): FILE: tests/test_lmdeploy/test_harmony_gpt_oss_parser.py function _install_openai_harmony_stub (line 18) | def _install_openai_harmony_stub(): class DummyParser (line 53) | class DummyParser: class _Msg (line 66) | class _Msg: method __init__ (line 68) | def __init__(self, channel, recipient): method __init__ (line 72) | def __init__(self): method process (line 78) | def process(self, token): function _chat_completion_v1 (line 117) | def _chat_completion_v1(request, token_chunks: List[List[int]]): function _stream_parse (line 163) | def _stream_parse(request, token_chunks: List[List[int]]): function _t (line 193) | def _t(s: str) -> List[int]: function test_parser_stream_basic (line 226) | def test_parser_stream_basic(token_chunks: List[List[int]], expects: Lis... function test_parser_stream_multiple_calls_indices (line 242) | def test_parser_stream_multiple_calls_indices(): function test_parser_stream_interleaved_channels (line 261) | def test_parser_stream_interleaved_channels(): function test_parser_stream_two_calls_same_func (line 277) | def test_parser_stream_two_calls_same_func(token_chunks: List[List[int]]... function test_open_tool_call_no_args (line 291) | def test_open_tool_call_no_args(): function test_parser_nonstream (line 310) | def test_parser_nonstream(token_chunks: List[List[int]], expects: List[T... FILE: tests/test_lmdeploy/test_lite/test_quantization/test_utils/test_cal_qparams.py function test_cal_qparams (line 10) | def test_cal_qparams(): FILE: tests/test_lmdeploy/test_messages.py function test_engine_generation_config (line 9) | def test_engine_generation_config(): function test_update_from_hf_gen_cfg (line 24) | def test_update_from_hf_gen_cfg(model_path): FILE: tests/test_lmdeploy/test_model.py function test_HFChatTemplate_get_prompt_sequence_start_True (line 61) | def test_HFChatTemplate_get_prompt_sequence_start_True(model_path): function test_HFChatTemplate_message2prompt_sequence_start_True (line 73) | def test_HFChatTemplate_message2prompt_sequence_start_True(model_path): function test_base_model (line 85) | def test_base_model(): function test_vicuna (line 92) | def test_vicuna(): function test_prefix_response (line 110) | def test_prefix_response(): function test_internlm_chat (line 117) | def test_internlm_chat(): function test_baichuan (line 137) | def test_baichuan(): function test_llama2 (line 149) | def test_llama2(): function test_codellama_completion (line 169) | def test_codellama_completion(): function test_codellama_infilling (line 180) | def test_codellama_infilling(): function test_codellama_chat (line 195) | def test_codellama_chat(): function test_codellama_python_specialist (line 206) | def test_codellama_python_specialist(): function test_codellama_others (line 216) | def test_codellama_others(): function test_deepseek_vl2 (line 226) | def test_deepseek_vl2(model_path_or_name): function test_qwen3 (line 251) | def test_qwen3(model_path, enable_thinking): function test_HFChatTemplate_get_prompt_sequence_start_False_Qwen (line 322) | def test_HFChatTemplate_get_prompt_sequence_start_False_Qwen(model_path): function test_HFChatTemplate_get_prompt_sequence_start_False_Qwen3_5 (line 332) | def test_HFChatTemplate_get_prompt_sequence_start_False_Qwen3_5(model_pa... function test_HFChatTemplate_DeepSeek_V3 (line 342) | def test_HFChatTemplate_DeepSeek_V3(model_path): function test_HFChatTemplate_DeepSeek_thinking (line 351) | def test_HFChatTemplate_DeepSeek_thinking(model_path): function test_HFChatTemplate_Qwen3_VL_with_vision_id (line 360) | def test_HFChatTemplate_Qwen3_VL_with_vision_id(model_path): function test_gemma_chat_template (line 423) | def test_gemma_chat_template(model_path): FILE: tests/test_lmdeploy/test_pipeline.py class TestBackendInference (line 13) | class TestBackendInference: method backend_config (line 17) | def backend_config(self, backend): method pipe (line 28) | def pipe(self, backend_config): method test_infer_single_string (line 39) | def test_infer_single_string(self, pipe): method test_infer_batch_strings (line 50) | def test_infer_batch_strings(self, pipe): method test_infer_openai_format (line 61) | def test_infer_openai_format(self, pipe): method test_infer_with_generation_config (line 76) | def test_infer_with_generation_config(self, pipe): method test_call_method (line 85) | def test_call_method(self, pipe): method test_stream_infer_single (line 93) | def test_stream_infer_single(self, pipe): method test_stream_infer_batch (line 107) | def test_stream_infer_batch(self, pipe): method test_stream_infer_with_session (line 123) | def test_stream_infer_with_session(self, pipe): method test_chat_streaming (line 166) | def test_chat_streaming(self, pipe): method test_chat_non_streaming (line 185) | def test_chat_non_streaming(self, pipe): method test_chat_multi_turn (line 199) | def test_chat_multi_turn(self, pipe): method test_session_creation (line 217) | def test_session_creation(self, pipe): method test_get_ppl_single (line 226) | def test_get_ppl_single(self, pipe): method test_get_ppl_batch (line 241) | def test_get_ppl_batch(self, pipe): method test_stream_infer_stream_response_parameter (line 257) | def test_stream_infer_stream_response_parameter(self, pipe): method test_infer_different_max_tokens (line 267) | def test_infer_different_max_tokens(self, pipe, max_new_tokens): method test_batch_infer_different_gen_configs (line 275) | def test_batch_infer_different_gen_configs(self, pipe): method test_infer_zero_tokens (line 285) | def test_infer_zero_tokens(self, pipe): FILE: tests/test_lmdeploy/test_qwen3_parser.py class DummyTokenizer (line 19) | class DummyTokenizer: method decode (line 21) | def decode(self, token_ids: List[int]) -> str: method encode (line 24) | def encode(self, text: str) -> List[int]: function _chat_completion_v1 (line 175) | def _chat_completion_v1( function _stream_parse (line 266) | def _stream_parse(request: ChatCompletionRequest, text_sequence: List[st... function test_parser_stream (line 300) | def test_parser_stream(text_sequence: List[str], expects: List[TestExpec... function test_parser_nonstream (line 320) | def test_parser_nonstream(text_sequence: List[str], expects: List[TestEx... function test_no_think_nonstream (line 338) | def test_no_think_nonstream(): FILE: tests/test_lmdeploy/test_qwen3coder_parser.py class DummyTokenizer (line 18) | class DummyTokenizer: method decode (line 20) | def decode(self, token_ids: List[int]) -> str: method encode (line 23) | def encode(self, text: str) -> List[int]: function _chat_completion_v1 (line 56) | def _chat_completion_v1( function _stream_parse (line 149) | def _stream_parse(request: ChatCompletionRequest, text_sequence: List[st... function test_parser_stream (line 188) | def test_parser_stream(text_sequence: List[str], expects: List[TestExpec... function test_parser_nonstream (line 215) | def test_parser_nonstream(text_sequence: List[str], expects: List[TestEx... function test_no_think_nonstream (line 233) | def test_no_think_nonstream(): FILE: tests/test_lmdeploy/test_tokenizer.py function test_tokenizer (line 17) | def test_tokenizer(model_path, input, interval, add_special_tokens, skip... function test_tokenizer_with_stop_words (line 39) | def test_tokenizer_with_stop_words(model_path, stop_words): function test_qwen_vl_decode_special (line 45) | def test_qwen_vl_decode_special(): function test_glm4_special_token (line 55) | def test_glm4_special_token(): function test_check_transformers_version (line 73) | def test_check_transformers_version(model_path): FILE: tests/test_lmdeploy/test_turbomind/test_converter.py function test_torch_dtype_fallback (line 11) | def test_torch_dtype_fallback(): function test_ffn_reader_kind_none (line 27) | def test_ffn_reader_kind_none(): function test_registered_models (line 70) | def test_registered_models(): function test_update_from_engine_config (line 109) | def test_update_from_engine_config(): function test_dtype (line 148) | def test_dtype(): FILE: tests/test_lmdeploy/test_utils.py function test_get_and_verify_max_len (line 6) | def test_get_and_verify_max_len(): FILE: tests/test_lmdeploy/test_vl/test_hf_chat_template.py function get_model_and_chat_template (line 9) | def get_model_and_chat_template(model_path): function mock_messages (line 23) | def mock_messages(): function mock_pure_img_messages (line 36) | def mock_pure_img_messages(): function mock_pure_text_messages (line 46) | def mock_pure_text_messages(): class TestInternVLHFChatTemplate (line 56) | class TestInternVLHFChatTemplate: method models (line 59) | def models(self): method test_proc_messages (line 73) | def test_proc_messages(self, models, mock_messages): method test_proc_pure_img_messages (line 85) | def test_proc_pure_img_messages(self, models, mock_pure_img_messages): method test_proc_pure_text_messages (line 97) | def test_proc_pure_text_messages(self, models, mock_pure_text_messages): class TestQwenVLChatTemplate (line 108) | class TestQwenVLChatTemplate: method models (line 111) | def models(self): method test_proc_messages (line 136) | def test_proc_messages(self, models, mock_messages): method test_pure_img_messages (line 146) | def test_pure_img_messages(self, models, mock_pure_img_messages): method test_pure_text_messages (line 156) | def test_pure_text_messages(self, models, mock_pure_text_messages): FILE: tests/test_lmdeploy/test_vl/test_nonhf_chat_template.py function get_model_and_chat_template (line 9) | def get_model_and_chat_template(model_path): class TestInternVLChatTemplate (line 22) | class TestInternVLChatTemplate: method internvl3_5 (line 25) | def internvl3_5(self): method internvl3 (line 40) | def internvl3(self): method internvl2_5 (line 54) | def internvl2_5(self): method internvl2 (line 68) | def internvl2(self): method mock_messages (line 82) | def mock_messages(self): method mock_IMAGE_TOKEN_messages (line 94) | def mock_IMAGE_TOKEN_messages(self): method test_internvl3_5 (line 104) | def test_internvl3_5(self, internvl3_5, mock_messages): method test_internvl3_5_backward_compatibility (line 116) | def test_internvl3_5_backward_compatibility(self, internvl3_5, mock_IM... method test_internvl3 (line 128) | def test_internvl3(self, internvl3, mock_messages): method test_internvl3_backward_compatibility (line 141) | def test_internvl3_backward_compatibility(self, internvl3, mock_IMAGE_... method test_internvl2_5 (line 153) | def test_internvl2_5(self, internvl2_5, mock_messages): method test_internvl2_5_backward_compatibility (line 166) | def test_internvl2_5_backward_compatibility(self, internvl2_5, mock_IM... method test_internvl2 (line 178) | def test_internvl2(self, internvl2, mock_messages): method test_internvl2_backward_compatibility (line 190) | def test_internvl2_backward_compatibility(self, internvl2, mock_IMAGE_... FILE: tests/test_lmdeploy/test_vl/test_qwen3vl_processor.py function qwen3vl_model (line 17) | def qwen3vl_model(request): function sample_messages (line 25) | def sample_messages(): function test_qwen3vl_preprocess_with_custom_pixels (line 44) | def test_qwen3vl_preprocess_with_custom_pixels(qwen3vl_model, sample_mes... FILE: tests/test_lmdeploy/test_vl/test_vl_encode.py function test_image_encode_decode (line 9) | def test_image_encode_decode(): function test_video_encode_decode (line 22) | def test_video_encode_decode(): function test_time_series_encode_decode (line 45) | def test_time_series_encode_decode(): function test_image_modes (line 57) | def test_image_modes(): function test_truncated_image (line 68) | def test_truncated_image(): function test_single_frame_video (line 75) | def test_single_frame_video(): function test_video_sampling_params (line 85) | def test_video_sampling_params(): function test_invalid_inputs (line 115) | def test_invalid_inputs():