SYMBOL INDEX (7608 symbols across 621 files) FILE: archive/csrc/custom_marlin/binding.cpp function PYBIND11_MODULE (line 20) | PYBIND11_MODULE(vLLMMarlin, m) { FILE: archive/csrc/custom_marlin/test_cuda_graph.py function setup_seed (line 14) | def setup_seed(seed): function get_usable_mem (line 33) | def get_usable_mem(): function exp_range (line 42) | def exp_range(start, stop, step = 2): function timing (line 48) | def timing(func, iters, epochs=100): class LinearMarlin (line 88) | class LinearMarlin(nn.Linear): method __init__ (line 94) | def __init__( method forward (line 168) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor) -> torch.... function benchLinearMarlin (line 208) | def benchLinearMarlin(input_dim, output_dim):#, out_file function printMinMax (line 314) | def printMinMax(tensor): FILE: archive/csrc/custom_marlin/utils/format24.py function _calculate_meta_reordering_scatter_offsets (line 21) | def _calculate_meta_reordering_scatter_offsets(m, meta_ncols, meta_dtype, function sparse_semi_structured_from_dense_cutlass (line 52) | def sparse_semi_structured_from_dense_cutlass(dense): function sparse_semi_structured_to_dense_cutlass (line 184) | def sparse_semi_structured_to_dense_cutlass(sparse, meta_reordered): function mask_creator (line 279) | def mask_creator(tensor): FILE: archive/csrc/custom_marlin/utils/marlin_24_perms.py function get_perms_24 (line 21) | def get_perms_24(num_bits: int): FILE: archive/csrc/custom_marlin/utils/marlin_perms.py function get_perms (line 21) | def get_perms(num_bits: int): FILE: archive/csrc/custom_marlin/utils/marlin_utils.py function is_marlin_supported (line 31) | def is_marlin_supported(): function marlin_permute_weights (line 35) | def marlin_permute_weights(q_w, size_k, size_n, perm, tile=MARLIN_TILE): function marlin_weights (line 50) | def marlin_weights(q_w, size_k, size_n, num_bits, perm): function marlin_permute_scales (line 70) | def marlin_permute_scales(s, size_k, size_n, group_size, scale_perm, function marlin_quantize (line 81) | def marlin_quantize( function inject_24 (line 119) | def inject_24(w, size_k, size_n): function check_24 (line 127) | def check_24(w, num_rows_to_sample=50, _verbose=False): function compress_quantized_24_weight (line 154) | def compress_quantized_24_weight(q_24, size_k, size_n, num_bits): function marlin_24_quantize (line 177) | def marlin_24_quantize( function compute_max_diff (line 218) | def compute_max_diff(output, output_ref): class MarlinWorkspace (line 223) | class MarlinWorkspace: method __init__ (line 225) | def __init__(self, out_features, min_thread_n, max_parallel, device): FILE: archive/csrc/custom_marlin/utils/quant_utils.py function get_pack_factor (line 9) | def get_pack_factor(num_bits): function permute_rows (line 14) | def permute_rows(q_w: torch.Tensor, w_ref: torch.Tensor, group_size: int): function dequantize_weights (line 40) | def dequantize_weights(qweight, qzeros, scales, g_idx, bits=4, group_siz... function quantize_weights (line 67) | def quantize_weights(w: torch.Tensor, num_bits: int, group_size: int, function sort_weights (line 137) | def sort_weights(q_w: torch.Tensor, g_idx: torch.Tensor): function gptq_pack (line 153) | def gptq_pack( function gptq_unpack (line 176) | def gptq_unpack( FILE: archive/csrc/ktransformers_ext/bench/bench_attention.py function bench_linear (line 41) | def bench_linear(cache_seqlen: int): FILE: archive/csrc/ktransformers_ext/bench/bench_attention_torch.py function bench_linear (line 29) | def bench_linear(cache_seqlen: int, device): FILE: archive/csrc/ktransformers_ext/bench/bench_linear.py function bench_linear (line 28) | def bench_linear(quant_mode: str): FILE: archive/csrc/ktransformers_ext/bench/bench_linear_torch.py function bench_linear (line 26) | def bench_linear(quant_mode: str): FILE: archive/csrc/ktransformers_ext/bench/bench_mlp.py function bench_mlp (line 28) | def bench_mlp(quant_mode: str): FILE: archive/csrc/ktransformers_ext/bench/bench_mlp_torch.py function act_fn (line 26) | def act_fn(x): function mlp_torch (line 29) | def mlp_torch(input, gate_proj, up_proj, down_proj): function bench_mlp (line 47) | def bench_mlp(quant_mode: str): FILE: archive/csrc/ktransformers_ext/bench/bench_moe.py function bench_moe (line 31) | def bench_moe(quant_mode: str): FILE: archive/csrc/ktransformers_ext/bench/bench_moe_amx.py function bench_moe (line 29) | def bench_moe(quant_mode: str): FILE: archive/csrc/ktransformers_ext/bench/bench_moe_torch.py function act_fn (line 28) | def act_fn(x): function mlp_torch (line 31) | def mlp_torch(input, gate_proj, up_proj, down_proj): function moe_torch (line 49) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj): function bench_moe (line 80) | def bench_moe(quant_mode: str): FILE: archive/csrc/ktransformers_ext/cpu_backend/backend.cpp type bitmask (line 93) | struct bitmask FILE: archive/csrc/ktransformers_ext/cpu_backend/backend.h type ThreadStatus (line 21) | enum ThreadStatus { type ThreadState (line 27) | struct ThreadState { function class (line 33) | class Backend { FILE: archive/csrc/ktransformers_ext/cpu_backend/cpuinfer.h function class (line 36) | class CPUInfer { function submit (line 58) | void submit(std::pair params) { function sync (line 65) | void sync() { function submit_with_cuda_stream (line 69) | void submit_with_cuda_stream(intptr_t user_cuda_stream, std::pair class GetAllKVCacheOneLayerBindings (line 93) | class GetAllKVCacheOneLayerBindings { type Args (line 95) | struct Args { method inner (line 102) | static void inner(void *args) { method cpuinfer_interface (line 108) | static std::pair class GetAndUpdateKVCacheFp16Bindings (line 117) | class GetAndUpdateKVCacheFp16Bindings { type Args (line 119) | struct Args { method inner (line 131) | static void inner(void *args) { method cpuinfer_interface (line 139) | static std::pair class GetKVCacheFp16Bindings (line 157) | class GetKVCacheFp16Bindings { type Args (line 159) | struct Args { method inner (line 170) | static void inner(void *args) { method cpuinfer_interface (line 177) | static std::pair class UpdateKVCacheFp16Bindings (line 194) | class UpdateKVCacheFp16Bindings { type Args (line 196) | struct Args { method inner (line 208) | static void inner(void *args) { method cpuinfer_interface (line 216) | static std::pair class UpdateImportanceBindings (line 235) | class UpdateImportanceBindings { type Args (line 237) | struct Args { method inner (line 248) | static void inner(void *args) { method cpuinfer_interface (line 255) | static std::pair class AttnWithKVCacheBindings (line 272) | class AttnWithKVCacheBindings { type Args (line 274) | struct Args { method inner (line 292) | static void inner(void *args) { method cpuinfer_interface (line 301) | static std::pair class ClearImportanceAllLayersBindings (line 328) | class ClearImportanceAllLayersBindings { type Args (line 330) | struct Args { method inner (line 338) | static void inner(void *args) { method cpuinfer_interface (line 345) | static std::pair class CalcAnchorAllLayersBindinds (line 359) | class CalcAnchorAllLayersBindinds { type Args (line 361) | struct Args { method inner (line 369) | static void inner(void *args) { method cpuinfer_interface (line 376) | static std::pair class LoadKVCacheBindings (line 390) | class LoadKVCacheBindings { type Args (line 392) | struct Args { method inner (line 397) | static void inner(void *args) { method cpuinfer_interface (line 402) | static std::pair class DumpKVCacheBindings (line 409) | class DumpKVCacheBindings { type Args (line 411) | struct Args { method inner (line 418) | static void inner(void *args) { method cpuinfer_interface (line 424) | static std::pair class LinearBindings (line 435) | class LinearBindings { class WarmUpBindinds (line 437) | class WarmUpBindinds { type Args (line 439) | struct Args { method inner (line 443) | static void inner(void *args) { method cpuinfer_interface (line 447) | static std::pair class ForwardBindings (line 453) | class ForwardBindings { type Args (line 455) | struct Args { method inner (line 462) | static void inner(void *args) { method cpuinfer_interface (line 467) | static std::pair class MLPBindings (line 477) | class MLPBindings { class WarmUpBindinds (line 479) | class WarmUpBindinds { type Args (line 481) | struct Args { method inner (line 485) | static void inner(void *args) { method cpuinfer_interface (line 489) | static std::pair cpuinfer_interface(MLP &mlp) { class ForwardBindings (line 494) | class ForwardBindings { type Args (line 496) | struct Args { method inner (line 503) | static void inner(void *args) { method cpuinfer_interface (line 508) | static std::pair class MOEBindings (line 518) | class MOEBindings { class WarmUpBindinds (line 520) | class WarmUpBindinds { type Args (line 522) | struct Args { method inner (line 526) | static void inner(void *args) { method cpuinfer_interface (line 530) | static std::pair cpuinfer_interface(MOE &moe) { class ForwardBindings (line 535) | class ForwardBindings { type Args (line 537) | struct Args { method inner (line 548) | static void inner(void *args) { method cpuinfer_interface (line 554) | static std::pair class AMX_MOEBindings (line 574) | class AMX_MOEBindings { class WarmUpBindings (line 576) | class WarmUpBindings { type Args (line 578) | struct Args { method inner (line 582) | static void inner(void *args) { method cpuinfer_interface (line 586) | static std::pair cpuinfer_interface(AMX_MOE &... class LoadWeightsBindings (line 591) | class LoadWeightsBindings { type Args (line 593) | struct Args { method inner (line 597) | static void inner(void *args) { method cpuinfer_interface (line 601) | static std::pair cpuinfer_interface(AMX_MOE &... class ForwardBindings (line 606) | class ForwardBindings { type Args (line 608) | struct Args { method inner (line 619) | static void inner(void *args) { method cpuinfer_interface (line 625) | static std::pair function PYBIND11_MODULE (line 643) | PYBIND11_MODULE(cpuinfer_ext, m) { FILE: archive/csrc/ktransformers_ext/operators/amx/la/amx.hpp type amx (line 41) | namespace amx { function enable_amx (line 63) | inline bool enable_amx() { type TileConfig (line 80) | struct alignas(64) TileConfig { method TileConfig (line 89) | TileConfig() { method set_row_col (line 97) | void set_row_col(int i, uint8_t row, uint16_t col) { method set_config (line 102) | void set_config() { _tile_loadconfig(this); } method load_data (line 104) | static void load_data(int to, void *from, size_t stride) { method store_data (line 135) | static void store_data(int from, void *to, size_t stride) { function debug_tile (line 169) | inline void debug_tile(int t) { function debug_tiles (line 182) | inline void debug_tiles(int to = 8) { function debug_m512 (line 188) | inline void debug_m512(__m512 x) { function transpose_16x16_32bit (line 198) | inline void transpose_16x16_32bit(__m512i *v) { function transpose_16x16_32bit (line 273) | inline void transpose_16x16_32bit(__m512i *v, size_t stride) { type GemmKernel224BF (line 348) | struct GemmKernel224BF { method recommended_nth (line 363) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO... method split_range_n (line 365) | static std::pair split_range_n(int n, int ith, int nth) { method config (line 371) | static void config() { method load_a (line 390) | static void load_a(dt *a, size_t lda) { method load_b (line 395) | static void load_b(dt *b, size_t ldb) { method clean_c (line 400) | static void clean_c() { method load_c (line 407) | static void load_c(output_t *c, size_t ldc) { method store_c (line 414) | static void store_c(output_t *c, size_t ldc) { method run_tile (line 421) | static void run_tile() { type BufferA (line 428) | struct BufferA { method required_size (line 432) | static size_t required_size(int max_m, int k) { return max_m * k *... method BufferA (line 434) | BufferA(int max_m, int k, void *ptr) : max_m(max_m), k(k) { method from_mat (line 441) | void from_mat(int m, ggml_bf16_t *src, int ith, int nth) { method ggml_bf16_t (line 460) | ggml_bf16_t *get_submat(int m, int k, int m_begin, int k_begin) { type BufferB (line 469) | struct BufferB { method required_size (line 473) | static size_t required_size(int n, int k) { return n * k * sizeof(... method BufferB (line 475) | BufferB(int n, int k, void *ptr) : n(n), k(k) { method from_mat (line 482) | void from_mat(ggml_bf16_t *src, int ith, int nth) { method ggml_bf16_t (line 505) | ggml_bf16_t *get_submat(int n, int k, int n_begin, int k_begin) { type BufferC (line 516) | struct BufferC { method required_size (line 520) | static size_t required_size(int max_m, int n) { return max_m * n *... method BufferC (line 522) | BufferC(int max_m, int n, void *ptr) : max_m(max_m), n(n) { method to_mat (line 529) | void to_mat(int m, ggml_bf16_t *dst, int ith, int nth) { type GemmKernel224Int8 (line 558) | struct GemmKernel224Int8 { method recommended_nth (line 573) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO... method split_range_n (line 575) | static std::pair split_range_n(int n, int ith, int nth) { method config (line 581) | static void config() { method load_a (line 600) | static void load_a(dt *a, size_t lda) { method load_b (line 605) | static void load_b(dt *b, size_t ldb) { method clean_c (line 610) | static void clean_c() { method load_c (line 617) | static void load_c(output_t *c, size_t ldc) { method store_c (line 624) | static void store_c(output_t *c, size_t ldc) { method run_tile (line 631) | static void run_tile() { type BufferA (line 638) | struct BufferA { method required_size (line 643) | static size_t required_size(int max_m, int k) { return max_m * k *... method BufferA (line 645) | BufferA(int max_m, int k, void *ptr) : max_m(max_m), k(k) { method from_mat (line 653) | void from_mat(int m, ggml_bf16_t *src, int ith, int nth) { type BufferB (line 708) | struct BufferB { method required_size (line 713) | static size_t required_size(int n, int k) { return n * k * sizeof(... method BufferB (line 715) | BufferB(int n, int k, void *ptr) : n(n), k(k) { method from_mat (line 723) | void from_mat(ggml_bf16_t *src, int ith, int nth) { type BufferC (line 787) | struct BufferC { method required_size (line 791) | static size_t required_size(int max_m, int n) { return max_m * n *... method BufferC (line 793) | BufferC(int max_m, int n, void *ptr) : max_m(max_m), n(n) { method to_mat (line 800) | void to_mat(int m, ggml_bf16_t *dst, int ith, int nth) { function mat_mul (line 829) | inline void mat_mul(int m, int n, int k, std::shared_ptr Tuple[torch.Ten... function weight_dequant_kernel (line 57) | def weight_dequant_kernel(x_ptr, s_ptr, y_ptr, M, N, BLOCK_SIZE: tl.cons... function weight_dequant (line 85) | def weight_dequant(x: torch.Tensor, s: torch.Tensor, block_size: int = 1... function fp8_gemm_kernel (line 117) | def fp8_gemm_kernel(a_ptr, b_ptr, c_ptr, function fp8_gemm (line 172) | def fp8_gemm(a: torch.Tensor, a_s: torch.Tensor, b: torch.Tensor, b_s: t... FILE: archive/ktransformers/local_chat.py function local_chat (line 76) | def local_chat( FILE: archive/ktransformers/local_chat_test.py function local_chat (line 55) | def local_chat( FILE: archive/ktransformers/models/ascend/custom_ascend_modeling_deepseek_v3.py class KNPUDeepseekV3ForCausalLM (line 31) | class KNPUDeepseekV3ForCausalLM(DeepseekV3PreTrainedModel): method __init__ (line 36) | def __init__( method init_wrapper (line 54) | def init_wrapper(self, use_cuda_graph, device, max_batch_size, max_pag... method batch_embeddings (line 57) | def batch_embeddings(self, batch: ForwardBatchInput, device="npu:0", i... method print_callback (line 111) | def print_callback(self, param): method forward (line 118) | def forward( method flash_infer_attn_plan (line 215) | def flash_infer_attn_plan(self, batch: ForwardBatchInput, bsz_tensors,... FILE: archive/ktransformers/models/ascend/custom_ascend_modeling_qwen3.py class KNPUQwen3MoeForCausalLM (line 39) | class KNPUQwen3MoeForCausalLM(Qwen3MoePreTrainedModel): method __init__ (line 44) | def __init__( method init_wrapper (line 84) | def init_wrapper(self): method batch_embeddings (line 90) | def batch_embeddings( method forward (line 158) | def forward( method flash_infer_attn_plan (line 275) | def flash_infer_attn_plan( FILE: archive/ktransformers/models/configuration_deepseek.py class DeepseekV2Config (line 11) | class DeepseekV2Config(PretrainedConfig): method __init__ (line 113) | def __init__( FILE: archive/ktransformers/models/configuration_deepseek_v3.py class DeepseekV3Config (line 7) | class DeepseekV3Config(PretrainedConfig): method __init__ (line 106) | def __init__( FILE: archive/ktransformers/models/configuration_glm4_moe.py class Glm4MoeConfig (line 26) | class Glm4MoeConfig(PretrainedConfig): method __init__ (line 170) | def __init__( FILE: archive/ktransformers/models/configuration_llama.py class LlamaConfig (line 26) | class LlamaConfig(PretrainedConfig): method __init__ (line 143) | def __init__( FILE: archive/ktransformers/models/configuration_qwen2_moe.py class Qwen2MoeConfig (line 24) | class Qwen2MoeConfig(PretrainedConfig): method __init__ (line 115) | def __init__( FILE: archive/ktransformers/models/configuration_qwen3_moe.py class Qwen3MoeConfig (line 25) | class Qwen3MoeConfig(PretrainedConfig): method __init__ (line 161) | def __init__( FILE: archive/ktransformers/models/configuration_qwen3_next.py class Qwen3NextConfig (line 25) | class Qwen3NextConfig(PretrainedConfig): method __init__ (line 180) | def __init__( FILE: archive/ktransformers/models/configuration_smallthinker.py class SmallthinkerConfig (line 4) | class SmallthinkerConfig(PretrainedConfig): method __init__ (line 65) | def __init__(self, FILE: archive/ktransformers/models/custom_cache.py class StaticCache (line 27) | class StaticCache(transformers.StaticCache): method __init__ (line 45) | def __init__(self, config: PretrainedConfig, max_batch_size: int, max_... method max_batch_size (line 140) | def max_batch_size(self): method max_cache_len (line 144) | def max_cache_len(self): method update (line 147) | def update( method get_seq_length (line 204) | def get_seq_length(self, layer_idx: Optional[int] = 0) -> int: method change_seq_length (line 211) | def change_seq_length(self, bias: Optional[int] = 0) -> int: method get_max_length (line 219) | def get_max_length(self) -> Optional[int]: method get_usable_length (line 223) | def get_usable_length(self, kv_seq_len, layer_idx: Optional[int] = 0) ... method reset (line 226) | def reset(self): method remove_suffix (line 238) | def remove_suffix(self, start_pos): method get_max_cache_shape (line 249) | def get_max_cache_shape(self) -> Tuple[int, int, int, int]: class KVC2StaticCache (line 253) | class KVC2StaticCache: method __init__ (line 258) | def __init__(self, config: PretrainedConfig, max_batch_size, page_size... method load (line 275) | def load(self, inference_context): method update (line 289) | def update( method get_seq_length (line 328) | def get_seq_length(self, layer_idx: Optional[int] = 0) -> int: method get_usable_length (line 332) | def get_usable_length(self, kv_seq_len, layer_idx: Optional[int] = 0) ... method change_seq_length (line 335) | def change_seq_length(self, bias: Optional[int] = 0) -> int: method get_max_length (line 339) | def get_max_length(self) -> Optional[int]: method reset (line 343) | def reset(self, inference_context): method get_page_table (line 354) | def get_page_table(self, mini_batch, bsz_tensors: torch.tensor = None,... class KDeepSeekV3Cache (line 387) | class KDeepSeekV3Cache(nn.Module): method __init__ (line 388) | def __init__( method load (line 406) | def load(self, inference_context: "sched_ext.InferenceContext"): method update (line 414) | def update( method get_page_table (line 450) | def get_page_table(self, cache_position: torch.Tensor, q_indptr: torch... class KGQACache (line 468) | class KGQACache(nn.Module): method __init__ (line 469) | def __init__( method load (line 486) | def load(self, inference_context: "sched_ext.InferenceContext"): method get_page_table (line 501) | def get_page_table(self, cache_position: torch.Tensor, q_indptr: torch... method get_k_cache (line 519) | def get_k_cache(self, layer_idx): method get_v_cache (line 522) | def get_v_cache(self, layer_idx): class KVC2Qwen3Cache (line 526) | class KVC2Qwen3Cache(nn.Module): method __init__ (line 528) | def __init__(self, config, max_batch_size, page_size=256, method load (line 547) | def load(self, inference_context): method update (line 575) | def update( method get_k_cache (line 635) | def get_k_cache(self, layer_idx): method get_v_cache (line 638) | def get_v_cache(self, layer_idx): method get_page_table (line 642) | def get_page_table( FILE: archive/ktransformers/models/custom_modeling_deepseek_v2.py class KDeepseekV2ForCausalLM (line 21) | class KDeepseekV2ForCausalLM(DeepseekV2PreTrainedModel): method __init__ (line 25) | def __init__( method init_wrapper (line 40) | def init_wrapper(self, use_cuda_graph, device, max_batch_size, max_pag... method batch_embeddings (line 57) | def batch_embeddings(self, batch: ForwardBatchInput, device="cuda:0"): method forward (line 71) | def forward( method flash_infer_attn_plan (line 140) | def flash_infer_attn_plan(self, batch: ForwardBatchInput, bsz_tensors,... FILE: archive/ktransformers/models/custom_modeling_deepseek_v3.py class KDeepseekV3ForCausalLM (line 27) | class KDeepseekV3ForCausalLM(DeepseekV3PreTrainedModel): method __init__ (line 31) | def __init__( method init_wrapper (line 43) | def init_wrapper(self, use_cuda_graph, device, max_batch_size, max_pag... method batch_embeddings (line 61) | def batch_embeddings(self, batch: ForwardBatchInput, device="cuda:0"): method forward (line 75) | def forward( method flash_infer_attn_plan (line 136) | def flash_infer_attn_plan(self, batch: ForwardBatchInput, bsz_tensors,... FILE: archive/ktransformers/models/custom_modeling_glm4_moe.py class KGlm4MoeForCausalLM (line 27) | class KGlm4MoeForCausalLM(Glm4MoePreTrainedModel): method __init__ (line 31) | def __init__( method init_wrapper (line 45) | def init_wrapper(self, use_cuda_graph, device, max_batch_token, max_ba... method batch_embeddings (line 49) | def batch_embeddings(self, batch: ForwardBatchInput, device="cuda:0"): method forward (line 63) | def forward( method flash_infer_attn_plan (line 111) | def flash_infer_attn_plan(self, batch: ForwardBatchInput, bsz_tensors,... FILE: archive/ktransformers/models/custom_modeling_qwen2_moe.py class KQwen2MoeForCausalLM (line 27) | class KQwen2MoeForCausalLM(Qwen2MoePreTrainedModel): method __init__ (line 31) | def __init__( method init_wrapper (line 44) | def init_wrapper(self, use_cuda_graph, device, max_batch_token, max_ba... method batch_embeddings (line 48) | def batch_embeddings(self, batch: ForwardBatchInput, device="cuda:0"): method forward (line 62) | def forward( method flash_infer_attn_plan (line 120) | def flash_infer_attn_plan(self, batch: ForwardBatchInput, bsz_tensors,... FILE: archive/ktransformers/models/custom_modeling_qwen3_moe.py class KQwen3MoeForCausalLM (line 27) | class KQwen3MoeForCausalLM(Qwen3MoePreTrainedModel): method __init__ (line 31) | def __init__( method init_wrapper (line 44) | def init_wrapper(self, use_cuda_graph, device, max_batch_token, max_ba... method batch_embeddings (line 48) | def batch_embeddings(self, batch: ForwardBatchInput, device="cuda:0"): method forward (line 62) | def forward( method flash_infer_attn_plan (line 120) | def flash_infer_attn_plan(self, batch: ForwardBatchInput, bsz_tensors,... FILE: archive/ktransformers/models/custom_modeling_qwen3_next.py class KQwen3NextForCausalLM (line 27) | class KQwen3NextForCausalLM(Qwen3NextPreTrainedModel): method __init__ (line 31) | def __init__( method init_wrapper (line 46) | def init_wrapper(self, use_cuda_graph, device, max_batch_token, max_ba... method batch_embeddings (line 50) | def batch_embeddings(self, batch: ForwardBatchInput, device="cuda:0"): method reset_conv_states (line 63) | def reset_conv_states(self): method forward (line 69) | def forward( method flash_infer_attn_plan (line 127) | def flash_infer_attn_plan(self, batch: ForwardBatchInput, bsz_tensors,... FILE: archive/ktransformers/models/custom_modeling_smallthinker.py class KSmallThinkerForCausalLM (line 27) | class KSmallThinkerForCausalLM(SmallthinkerPreTrainedModel): method __init__ (line 31) | def __init__( method init_wrapper (line 45) | def init_wrapper(self, use_cuda_graph, device, max_batch_token, max_ba... method batch_embeddings (line 49) | def batch_embeddings(self, batch: ForwardBatchInput, device="cuda:0"): method forward (line 63) | def forward( method flash_infer_attn_plan (line 110) | def flash_infer_attn_plan(self, batch: ForwardBatchInput, bsz_tensors,... FILE: archive/ktransformers/models/modeling_deepseek.py function _get_unpad_data (line 88) | def _get_unpad_data(attention_mask): class DeepseekV2RMSNorm (line 102) | class DeepseekV2RMSNorm(nn.Module): method __init__ (line 103) | def __init__(self, hidden_size, eps=1e-6): method forward (line 112) | def forward(self, hidden_states): class DeepseekV2RotaryEmbedding (line 123) | class DeepseekV2RotaryEmbedding(nn.Module): method __init__ (line 124) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi... method forward (line 136) | def forward(self, x, position_ids): class DeepseekV2LinearScalingRotaryEmbedding (line 152) | class DeepseekV2LinearScalingRotaryEmbedding(DeepseekV2RotaryEmbedding): method __init__ (line 155) | def __init__( method _set_cos_sin_cache (line 167) | def _set_cos_sin_cache(self, seq_len, device, dtype): class DeepseekV2DynamicNTKScalingRotaryEmbedding (line 182) | class DeepseekV2DynamicNTKScalingRotaryEmbedding(DeepseekV2RotaryEmbeddi... method __init__ (line 185) | def __init__( method _set_cos_sin_cache (line 197) | def _set_cos_sin_cache(self, seq_len, device, dtype): function yarn_find_correction_dim (line 222) | def yarn_find_correction_dim( function yarn_find_correction_range (line 231) | def yarn_find_correction_range( function yarn_get_mscale (line 243) | def yarn_get_mscale(scale=1, mscale=1): function yarn_linear_ramp_mask (line 249) | def yarn_linear_ramp_mask(min, max, dim): class DeepseekV2YarnRotaryEmbedding (line 257) | class DeepseekV2YarnRotaryEmbedding(DeepseekV2RotaryEmbedding): method __init__ (line 258) | def __init__( method forward (line 313) | def forward(self, x, position_ids): function rotate_half (line 329) | def rotate_half(x): function apply_rotary_pos_emb (line 337) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di... class DeepseekV2MLP (line 367) | class DeepseekV2MLP(nn.Module): method __init__ (line 368) | def __init__(self, config, hidden_size=None, intermediate_size=None): method forward (line 381) | def forward(self, x): class MoEGate (line 386) | class MoEGate(nn.Module): method __init__ (line 387) | def __init__(self, config): method reset_parameters (line 408) | def reset_parameters(self) -> None: method forward (line 413) | def forward(self, hidden_states): class AddAuxiliaryLoss (line 493) | class AddAuxiliaryLoss(torch.autograd.Function): method forward (line 500) | def forward(ctx, x, loss): method backward (line 507) | def backward(ctx, grad_output): class DeepseekV2MoE (line 513) | class DeepseekV2MoE(nn.Module): method __init__ (line 518) | def __init__(self, config): method forward (line 558) | def forward(self, hidden_states): method moe_infer (line 581) | def moe_infer(self, x, topk_ids, topk_weight): function repeat_kv (line 657) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: class DeepseekV2Attention (line 671) | class DeepseekV2Attention(nn.Module): method __init__ (line 674) | def __init__(self, config: DeepseekV2Config, layer_idx: Optional[int] ... method _init_rope (line 741) | def _init_rope(self): method _shape (line 787) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): method forward (line 794) | def forward( class DeepseekV2FlashAttention2 (line 893) | class DeepseekV2FlashAttention2(DeepseekV2Attention): method __init__ (line 900) | def __init__(self, *args, **kwargs): method forward (line 908) | def forward( method _flash_attention_forward (line 1038) | def _flash_attention_forward( method _upad_input (line 1129) | def _upad_input( class DeepseekV2DecoderLayer (line 1180) | class DeepseekV2DecoderLayer(nn.Module): method __init__ (line 1181) | def __init__(self, config: DeepseekV2Config, layer_idx: int): method forward (line 1205) | def forward( class DeepseekV2PreTrainedModel (line 1291) | class DeepseekV2PreTrainedModel(PreTrainedModel): method _init_weights (line 1301) | def _init_weights(self, module): class DeepseekV2Model (line 1387) | class DeepseekV2Model(DeepseekV2PreTrainedModel): method __init__ (line 1395) | def __init__(self, config: DeepseekV2Config): method get_input_embeddings (line 1416) | def get_input_embeddings(self): method set_input_embeddings (line 1419) | def set_input_embeddings(self, value): method forward (line 1423) | def forward( method _update_causal_mask (line 1563) | def _update_causal_mask( class DeepseekV2ForCausalLM (line 1644) | class DeepseekV2ForCausalLM(DeepseekV2PreTrainedModel): method __init__ (line 1647) | def __init__(self, config): method get_input_embeddings (line 1656) | def get_input_embeddings(self): method set_input_embeddings (line 1659) | def set_input_embeddings(self, value): method get_output_embeddings (line 1662) | def get_output_embeddings(self): method set_output_embeddings (line 1665) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 1668) | def set_decoder(self, decoder): method get_decoder (line 1671) | def get_decoder(self): method forward (line 1678) | def forward( method prepare_inputs_for_generation (line 1773) | def prepare_inputs_for_generation( method _reorder_cache (line 1851) | def _reorder_cache(past_key_values, beam_idx): class DeepseekV2ForSequenceClassification (line 1878) | class DeepseekV2ForSequenceClassification(DeepseekV2PreTrainedModel): method __init__ (line 1879) | def __init__(self, config): method get_input_embeddings (line 1888) | def get_input_embeddings(self): method set_input_embeddings (line 1891) | def set_input_embeddings(self, value): method forward (line 1895) | def forward( FILE: archive/ktransformers/models/modeling_deepseek_v3.py function _get_unpad_data (line 87) | def _get_unpad_data(attention_mask): class DeepseekV3RMSNorm (line 101) | class DeepseekV3RMSNorm(nn.Module): method __init__ (line 102) | def __init__(self, hidden_size, eps=1e-6): method forward (line 111) | def forward(self, hidden_states): class DeepseekV3RotaryEmbedding (line 122) | class DeepseekV3RotaryEmbedding(nn.Module): method __init__ (line 123) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi... method _set_cos_sin_cache (line 142) | def _set_cos_sin_cache(self, seq_len, device, dtype): method forward (line 154) | def forward(self, x, seq_len=None): class DeepseekV3LinearScalingRotaryEmbedding (line 166) | class DeepseekV3LinearScalingRotaryEmbedding(DeepseekV3RotaryEmbedding): method __init__ (line 169) | def __init__( method _set_cos_sin_cache (line 180) | def _set_cos_sin_cache(self, seq_len, device, dtype): class DeepseekV3DynamicNTKScalingRotaryEmbedding (line 195) | class DeepseekV3DynamicNTKScalingRotaryEmbedding(DeepseekV3RotaryEmbeddi... method __init__ (line 198) | def __init__( method _set_cos_sin_cache (line 209) | def _set_cos_sin_cache(self, seq_len, device, dtype): function yarn_find_correction_dim (line 234) | def yarn_find_correction_dim( function yarn_find_correction_range (line 243) | def yarn_find_correction_range( function yarn_get_mscale (line 255) | def yarn_get_mscale(scale=1, mscale=1): function yarn_linear_ramp_mask (line 261) | def yarn_linear_ramp_mask(min, max, dim): class DeepseekV3YarnRotaryEmbedding (line 270) | class DeepseekV3YarnRotaryEmbedding(DeepseekV3RotaryEmbedding): method __init__ (line 272) | def __init__( method _set_cos_sin_cache (line 293) | def _set_cos_sin_cache(self, seq_len, device, dtype): function rotate_half (line 339) | def rotate_half(x): function apply_rotary_pos_emb (line 347) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): class DeepseekV3MLP (line 382) | class DeepseekV3MLP(nn.Module): method __init__ (line 383) | def __init__(self, config, hidden_size=None, intermediate_size=None): method forward (line 396) | def forward(self, x): class MoEGate (line 401) | class MoEGate(nn.Module): method __init__ (line 402) | def __init__(self, config): method reset_parameters (line 425) | def reset_parameters(self) -> None: method forward (line 430) | def forward(self, hidden_states): class DeepseekV3MoE (line 483) | class DeepseekV3MoE(nn.Module): method __init__ (line 488) | def __init__(self, config): method forward (line 530) | def forward(self, hidden_states): method moe_infer (line 543) | def moe_infer(self, x, topk_ids, topk_weight): function repeat_kv (line 620) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: class DeepseekV3Attention (line 635) | class DeepseekV3Attention(nn.Module): method __init__ (line 638) | def __init__(self, config: DeepseekV3Config, layer_idx: Optional[int] ... method _init_rope (line 705) | def _init_rope(self): method _shape (line 751) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): method forward (line 758) | def forward( class DeepseekV3FlashAttention2 (line 869) | class DeepseekV3FlashAttention2(DeepseekV3Attention): method __init__ (line 876) | def __init__(self, *args, **kwargs): method forward (line 884) | def forward( method _flash_attention_forward (line 1020) | def _flash_attention_forward( method _upad_input (line 1100) | def _upad_input( class DeepseekV3DecoderLayer (line 1152) | class DeepseekV3DecoderLayer(nn.Module): method __init__ (line 1153) | def __init__(self, config: DeepseekV3Config, layer_idx: int): method forward (line 1177) | def forward( class DeepseekV3PreTrainedModel (line 1265) | class DeepseekV3PreTrainedModel(PreTrainedModel): method _init_weights (line 1274) | def _init_weights(self, module): class DeepseekV3Model (line 1360) | class DeepseekV3Model(DeepseekV3PreTrainedModel): method __init__ (line 1368) | def __init__(self, config: DeepseekV3Config): method get_input_embeddings (line 1389) | def get_input_embeddings(self): method set_input_embeddings (line 1392) | def set_input_embeddings(self, value): method forward (line 1396) | def forward( method _update_causal_mask (line 1530) | def _update_causal_mask( class DeepseekV3ForCausalLM (line 1610) | class DeepseekV3ForCausalLM(DeepseekV3PreTrainedModel, GenerationMixin): method __init__ (line 1613) | def __init__(self, config): method get_input_embeddings (line 1622) | def get_input_embeddings(self): method set_input_embeddings (line 1625) | def set_input_embeddings(self, value): method get_output_embeddings (line 1628) | def get_output_embeddings(self): method set_output_embeddings (line 1631) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 1634) | def set_decoder(self, decoder): method get_decoder (line 1637) | def get_decoder(self): method forward (line 1644) | def forward( method prepare_inputs_for_generation (line 1749) | def prepare_inputs_for_generation( method _reorder_cache (line 1814) | def _reorder_cache(past_key_values, beam_idx): class DeepseekV3ForSequenceClassification (line 1841) | class DeepseekV3ForSequenceClassification(DeepseekV3PreTrainedModel): method __init__ (line 1842) | def __init__(self, config): method get_input_embeddings (line 1851) | def get_input_embeddings(self): method set_input_embeddings (line 1854) | def set_input_embeddings(self, value): method forward (line 1858) | def forward( FILE: archive/ktransformers/models/modeling_glm4_moe.py function repeat_kv (line 45) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: function eager_attention_forward (line 57) | def eager_attention_forward( function rotate_half (line 83) | def rotate_half(x): function apply_rotary_pos_emb (line 90) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di... class Glm4MoeAttention (line 128) | class Glm4MoeAttention(nn.Module): method __init__ (line 131) | def __init__(self, config: Glm4MoeConfig, layer_idx: Optional[int] = N... method forward (line 156) | def forward( class Glm4MoeMLP (line 208) | class Glm4MoeMLP(nn.Module): method __init__ (line 209) | def __init__(self, config, hidden_size=None, intermediate_size=None): method forward (line 220) | def forward(self, x): class Glm4MoeTopkRouter (line 225) | class Glm4MoeTopkRouter(nn.Module): method __init__ (line 226) | def __init__(self, config: Glm4MoeConfig): method get_topk_indices (line 240) | def get_topk_indices(self, scores): method forward (line 259) | def forward(self, hidden_states): class Glm4MoeRMSNorm (line 273) | class Glm4MoeRMSNorm(nn.Module): method __init__ (line 274) | def __init__(self, hidden_size, eps=1e-6): method forward (line 283) | def forward(self, hidden_states): method extra_repr (line 290) | def extra_repr(self): class Glm4MoeMoE (line 294) | class Glm4MoeMoE(nn.Module): method __init__ (line 299) | def __init__(self, config): method moe (line 313) | def moe(self, hidden_states: torch.Tensor, topk_indices: torch.Tensor,... method forward (line 339) | def forward(self, hidden_states): class Glm4MoeDecoderLayer (line 349) | class Glm4MoeDecoderLayer(GradientCheckpointingLayer): method __init__ (line 350) | def __init__(self, config: Glm4MoeConfig, layer_idx: int): method forward (line 364) | def forward( class Glm4MoePreTrainedModel (line 398) | class Glm4MoePreTrainedModel(PreTrainedModel): method _init_weights (line 414) | def _init_weights(self, module): class Glm4MoeRotaryEmbedding (line 430) | class Glm4MoeRotaryEmbedding(nn.Module): method __init__ (line 431) | def __init__(self, config: Glm4MoeConfig, device=None): method forward (line 450) | def forward(self, x, position_ids): class Glm4MoeModel (line 465) | class Glm4MoeModel(Glm4MoePreTrainedModel): method __init__ (line 468) | def __init__(self, config: Glm4MoeConfig): method get_input_embeddings (line 484) | def get_input_embeddings(self): method set_input_embeddings (line 487) | def set_input_embeddings(self, value): method forward (line 492) | def forward( class Glm4MoeForCausalLM (line 551) | class Glm4MoeForCausalLM(Glm4MoePreTrainedModel, GenerationMixin): method __init__ (line 556) | def __init__(self, config): method get_input_embeddings (line 565) | def get_input_embeddings(self): method set_input_embeddings (line 568) | def set_input_embeddings(self, value): method get_output_embeddings (line 571) | def get_output_embeddings(self): method set_output_embeddings (line 574) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 577) | def set_decoder(self, decoder): method get_decoder (line 580) | def get_decoder(self): method forward (line 585) | def forward( FILE: archive/ktransformers/models/modeling_llama.py class LlamaRMSNorm (line 58) | class LlamaRMSNorm(nn.Module): method __init__ (line 59) | def __init__(self, hidden_size, eps=1e-6): method forward (line 67) | def forward(self, hidden_states): class LlamaRotaryEmbedding (line 78) | class LlamaRotaryEmbedding(nn.Module): method __init__ (line 79) | def __init__( method _dynamic_frequency_update (line 134) | def _dynamic_frequency_update(self, position_ids, device): method forward (line 159) | def forward(self, x, position_ids): class LlamaLinearScalingRotaryEmbedding (line 190) | class LlamaLinearScalingRotaryEmbedding(LlamaRotaryEmbedding): method __init__ (line 193) | def __init__(self, *args, **kwargs): class LlamaDynamicNTKScalingRotaryEmbedding (line 202) | class LlamaDynamicNTKScalingRotaryEmbedding(LlamaRotaryEmbedding): method __init__ (line 205) | def __init__(self, *args, **kwargs): function rotate_half (line 215) | def rotate_half(x): function apply_rotary_pos_emb (line 222) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di... class LlamaMLP (line 249) | class LlamaMLP(nn.Module): method __init__ (line 250) | def __init__(self, config): method forward (line 266) | def forward(self, x): function repeat_kv (line 300) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: class LlamaAttention (line 314) | class LlamaAttention(nn.Module): method __init__ (line 317) | def __init__(self, config: LlamaConfig, layer_idx: Optional[int] = None): method forward (line 364) | def forward( class LlamaFlashAttention2 (line 496) | class LlamaFlashAttention2(LlamaAttention): method __init__ (line 503) | def __init__(self, *args, **kwargs): method forward (line 511) | def forward( class LlamaSdpaAttention (line 627) | class LlamaSdpaAttention(LlamaAttention): method forward (line 635) | def forward( class LlamaDecoderLayer (line 745) | class LlamaDecoderLayer(nn.Module): method __init__ (line 746) | def __init__(self, config: LlamaConfig, layer_idx: int): method forward (line 760) | def forward( class LlamaPreTrainedModel (line 854) | class LlamaPreTrainedModel(PreTrainedModel): method _init_weights (line 866) | def _init_weights(self, module): class LlamaModel (line 956) | class LlamaModel(LlamaPreTrainedModel): method __init__ (line 964) | def __init__(self, config: LlamaConfig): method get_input_embeddings (line 985) | def get_input_embeddings(self): method set_input_embeddings (line 988) | def set_input_embeddings(self, value): method forward (line 992) | def forward( method _update_causal_mask (line 1133) | def _update_causal_mask( class LlamaForCausalLM (line 1236) | class LlamaForCausalLM(LlamaPreTrainedModel): method __init__ (line 1239) | def __init__(self, config): method get_input_embeddings (line 1248) | def get_input_embeddings(self): method set_input_embeddings (line 1251) | def set_input_embeddings(self, value): method get_output_embeddings (line 1254) | def get_output_embeddings(self): method set_output_embeddings (line 1257) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 1260) | def set_decoder(self, decoder): method get_decoder (line 1263) | def get_decoder(self): method forward (line 1270) | def forward( method prepare_inputs_for_generation (line 1376) | def prepare_inputs_for_generation( class LlamaForSequenceClassification (line 1440) | class LlamaForSequenceClassification(LlamaPreTrainedModel): method __init__ (line 1441) | def __init__(self, config): method get_input_embeddings (line 1450) | def get_input_embeddings(self): method set_input_embeddings (line 1453) | def set_input_embeddings(self, value): method forward (line 1457) | def forward( class LlamaForQuestionAnswering (line 1567) | class LlamaForQuestionAnswering(LlamaPreTrainedModel): method __init__ (line 1571) | def __init__(self, config): method get_input_embeddings (line 1579) | def get_input_embeddings(self): method set_input_embeddings (line 1582) | def set_input_embeddings(self, value): method forward (line 1586) | def forward( class LlamaForTokenClassification (line 1668) | class LlamaForTokenClassification(LlamaPreTrainedModel): method __init__ (line 1669) | def __init__(self, config): method get_input_embeddings (line 1685) | def get_input_embeddings(self): method set_input_embeddings (line 1688) | def set_input_embeddings(self, value): method forward (line 1692) | def forward( FILE: archive/ktransformers/models/modeling_mixtral.py function load_balancing_loss_func (line 89) | def load_balancing_loss_func( function _get_unpad_data (line 166) | def _get_unpad_data(attention_mask): class MixtralRMSNorm (line 179) | class MixtralRMSNorm(nn.Module): method __init__ (line 180) | def __init__(self, hidden_size, eps=1e-6): method forward (line 188) | def forward(self, hidden_states): method extra_repr (line 195) | def extra_repr(self): class MixtralRotaryEmbedding (line 201) | class MixtralRotaryEmbedding(nn.Module): method __init__ (line 202) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi... method forward (line 215) | def forward(self, x, position_ids): function rotate_half (line 231) | def rotate_half(x): function apply_rotary_pos_emb (line 240) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): function repeat_kv (line 270) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: class MixtralAttention (line 284) | class MixtralAttention(nn.Module): method __init__ (line 290) | def __init__(self, config: MixtralConfig, layer_idx: Optional[int] = N... method _shape (line 327) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): method forward (line 330) | def forward( class MixtralFlashAttention2 (line 406) | class MixtralFlashAttention2(MixtralAttention): method forward (line 413) | def forward( method _flash_attention_forward (line 549) | def _flash_attention_forward( method _upad_input (line 660) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m... class MixtralSdpaAttention (line 706) | class MixtralSdpaAttention(MixtralAttention): method forward (line 714) | def forward( class MixtralBlockSparseTop2MLP (line 803) | class MixtralBlockSparseTop2MLP(nn.Module): method __init__ (line 804) | def __init__(self, config: MixtralConfig): method forward (line 815) | def forward(self, hidden_states): class MixtralSparseMoeBlock (line 821) | class MixtralSparseMoeBlock(nn.Module): method __init__ (line 833) | def __init__(self, config): method forward (line 848) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class MixtralDecoderLayer (line 889) | class MixtralDecoderLayer(nn.Module): method __init__ (line 890) | def __init__(self, config: MixtralConfig, layer_idx: int): method forward (line 900) | def forward( class MixtralPreTrainedModel (line 992) | class MixtralPreTrainedModel(PreTrainedModel): method _init_weights (line 1002) | def _init_weights(self, module): class MixtralModel (line 1091) | class MixtralModel(MixtralPreTrainedModel): method __init__ (line 1099) | def __init__(self, config: MixtralConfig): method get_input_embeddings (line 1115) | def get_input_embeddings(self): method set_input_embeddings (line 1118) | def set_input_embeddings(self, value): method forward (line 1123) | def forward( method _update_causal_mask (line 1256) | def _update_causal_mask( class MixtralForCausalLM (line 1337) | class MixtralForCausalLM(MixtralPreTrainedModel): method __init__ (line 1340) | def __init__(self, config): method get_input_embeddings (line 1351) | def get_input_embeddings(self): method set_input_embeddings (line 1354) | def set_input_embeddings(self, value): method get_output_embeddings (line 1357) | def get_output_embeddings(self): method set_output_embeddings (line 1360) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 1363) | def set_decoder(self, decoder): method get_decoder (line 1366) | def get_decoder(self): method forward (line 1372) | def forward( method prepare_inputs_for_generation (line 1482) | def prepare_inputs_for_generation( class MixtralForSequenceClassification (line 1545) | class MixtralForSequenceClassification(MixtralPreTrainedModel): method __init__ (line 1546) | def __init__(self, config): method get_input_embeddings (line 1555) | def get_input_embeddings(self): method set_input_embeddings (line 1558) | def set_input_embeddings(self, value): method forward (line 1562) | def forward( class MixtralForTokenClassification (line 1661) | class MixtralForTokenClassification(MixtralPreTrainedModel): method __init__ (line 1662) | def __init__(self, config): method get_input_embeddings (line 1678) | def get_input_embeddings(self): method set_input_embeddings (line 1681) | def set_input_embeddings(self, value): method forward (line 1685) | def forward( FILE: archive/ktransformers/models/modeling_qwen2_moe.py function load_balancing_loss_func (line 76) | def load_balancing_loss_func( function _get_unpad_data (line 153) | def _get_unpad_data(attention_mask): class Qwen2MoeRMSNorm (line 166) | class Qwen2MoeRMSNorm(nn.Module): method __init__ (line 167) | def __init__(self, hidden_size, eps=1e-6): method forward (line 175) | def forward(self, hidden_states): class Qwen2MoeRotaryEmbedding (line 183) | class Qwen2MoeRotaryEmbedding(nn.Module): method __init__ (line 184) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi... method forward (line 196) | def forward(self, x, position_ids): function rotate_half (line 213) | def rotate_half(x): function apply_rotary_pos_emb (line 221) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di... class Qwen2MoeMLP (line 249) | class Qwen2MoeMLP(nn.Module): method __init__ (line 250) | def __init__(self, config, intermediate_size=None): method forward (line 260) | def forward(self, x): function repeat_kv (line 265) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: class Qwen2MoeAttention (line 278) | class Qwen2MoeAttention(nn.Module): method __init__ (line 284) | def __init__(self, config: Qwen2MoeConfig, layer_idx: Optional[int] = ... method forward (line 321) | def forward( class Qwen2MoeFlashAttention2 (line 396) | class Qwen2MoeFlashAttention2(Qwen2MoeAttention): method __init__ (line 406) | def __init__(self, *args, **kwargs): method forward (line 414) | def forward( method _flash_attention_forward (line 546) | def _flash_attention_forward( method _upad_input (line 663) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m... class Qwen2MoeSdpaAttention (line 707) | class Qwen2MoeSdpaAttention(Qwen2MoeAttention): method forward (line 715) | def forward( class Qwen2MoeSparseMoeBlock (line 803) | class Qwen2MoeSparseMoeBlock(nn.Module): method __init__ (line 804) | def __init__(self, config): method forward (line 819) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class Qwen2MoeDecoderLayer (line 865) | class Qwen2MoeDecoderLayer(nn.Module): method __init__ (line 866) | def __init__(self, config: Qwen2MoeConfig, layer_idx: int): method forward (line 882) | def forward( class Qwen2MoePreTrainedModel (line 979) | class Qwen2MoePreTrainedModel(PreTrainedModel): method _init_weights (line 990) | def _init_weights(self, module): class Qwen2MoeModel (line 1083) | class Qwen2MoeModel(Qwen2MoePreTrainedModel): method __init__ (line 1091) | def __init__(self, config: Qwen2MoeConfig): method get_input_embeddings (line 1107) | def get_input_embeddings(self): method set_input_embeddings (line 1110) | def set_input_embeddings(self, value): method forward (line 1114) | def forward( method _update_causal_mask (line 1247) | def _update_causal_mask( class Qwen2MoeForCausalLM (line 1328) | class Qwen2MoeForCausalLM(Qwen2MoePreTrainedModel): method __init__ (line 1331) | def __init__(self, config): method get_input_embeddings (line 1343) | def get_input_embeddings(self): method set_input_embeddings (line 1346) | def set_input_embeddings(self, value): method get_output_embeddings (line 1349) | def get_output_embeddings(self): method set_output_embeddings (line 1352) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 1355) | def set_decoder(self, decoder): method get_decoder (line 1358) | def get_decoder(self): method forward (line 1363) | def forward( method prepare_inputs_for_generation (line 1472) | def prepare_inputs_for_generation( method _reorder_cache (line 1550) | def _reorder_cache(past_key_values, beam_idx): class Qwen2MoeForSequenceClassification (line 1575) | class Qwen2MoeForSequenceClassification(Qwen2MoePreTrainedModel): method __init__ (line 1576) | def __init__(self, config): method get_input_embeddings (line 1585) | def get_input_embeddings(self): method set_input_embeddings (line 1588) | def set_input_embeddings(self, value): method forward (line 1592) | def forward( class Qwen2MoeForTokenClassification (line 1691) | class Qwen2MoeForTokenClassification(Qwen2MoePreTrainedModel): method __init__ (line 1692) | def __init__(self, config): method get_input_embeddings (line 1708) | def get_input_embeddings(self): method set_input_embeddings (line 1711) | def set_input_embeddings(self, value): method forward (line 1715) | def forward( FILE: archive/ktransformers/models/modeling_qwen3_moe.py function rotate_half (line 65) | def rotate_half(x): function apply_rotary_pos_emb (line 72) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di... function repeat_kv (line 99) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: function eager_attention_forward (line 111) | def eager_attention_forward( class Qwen3MoeAttention (line 137) | class Qwen3MoeAttention(nn.Module): method __init__ (line 140) | def __init__(self, config: Qwen3MoeConfig, layer_idx: int): method forward (line 183) | def forward( class Qwen3MoeMLP (line 234) | class Qwen3MoeMLP(nn.Module): method __init__ (line 235) | def __init__(self, config, intermediate_size=None): method forward (line 245) | def forward(self, x): class Qwen3MoeSparseMoeBlock (line 250) | class Qwen3MoeSparseMoeBlock(nn.Module): method __init__ (line 251) | def __init__(self, config): method forward (line 263) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class Qwen3MoeRMSNorm (line 303) | class Qwen3MoeRMSNorm(nn.Module): method __init__ (line 304) | def __init__(self, hidden_size, eps=1e-6): method forward (line 313) | def forward(self, hidden_states): method extra_repr (line 320) | def extra_repr(self): class Qwen3MoeDecoderLayer (line 324) | class Qwen3MoeDecoderLayer(nn.Module): method __init__ (line 325) | def __init__(self, config: Qwen3MoeConfig, layer_idx: int): method forward (line 344) | def forward( function _compute_default_rope_parameters (line 421) | def _compute_default_rope_parameters( class Qwen3MoeRotaryEmbedding (line 461) | class Qwen3MoeRotaryEmbedding(nn.Module): method __init__ (line 462) | def __init__(self, config: Qwen3MoeConfig, device=None): method _dynamic_frequency_update (line 485) | def _dynamic_frequency_update(self, position_ids, device): method forward (line 505) | def forward(self, x, position_ids): class Qwen3MoePreTrainedModel (line 550) | class Qwen3MoePreTrainedModel(PreTrainedModel): method _init_weights (line 564) | def _init_weights(self, module): class Qwen3MoeModel (line 647) | class Qwen3MoeModel(Qwen3MoePreTrainedModel): method __init__ (line 655) | def __init__(self, config: Qwen3MoeConfig): method get_input_embeddings (line 671) | def get_input_embeddings(self): method set_input_embeddings (line 674) | def set_input_embeddings(self, value): method forward (line 678) | def forward( method _update_causal_mask (line 796) | def _update_causal_mask( method _prepare_4d_causal_attention_mask_with_cache_position (line 880) | def _prepare_4d_causal_attention_mask_with_cache_position( class KwargsForCausalLM (line 950) | class KwargsForCausalLM(): ... function load_balancing_loss_func (line 953) | def load_balancing_loss_func( class Qwen3MoeForCausalLM (line 1035) | class Qwen3MoeForCausalLM(Qwen3MoePreTrainedModel, GenerationMixin): method __init__ (line 1040) | def __init__(self, config): method get_input_embeddings (line 1052) | def get_input_embeddings(self): method set_input_embeddings (line 1055) | def set_input_embeddings(self, value): method get_output_embeddings (line 1058) | def get_output_embeddings(self): method set_output_embeddings (line 1061) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 1064) | def set_decoder(self, decoder): method get_decoder (line 1067) | def get_decoder(self): method forward (line 1073) | def forward( class Qwen3MoeForSequenceClassification (line 1199) | class Qwen3MoeForSequenceClassification(Qwen3MoePreTrainedModel): method __init__ (line 1200) | def __init__(self, config): method get_input_embeddings (line 1209) | def get_input_embeddings(self): method set_input_embeddings (line 1212) | def set_input_embeddings(self, value): method forward (line 1216) | def forward( class Qwen3MoeForTokenClassification (line 1298) | class Qwen3MoeForTokenClassification(Qwen3MoePreTrainedModel): method __init__ (line 1299) | def __init__(self, config): method get_input_embeddings (line 1315) | def get_input_embeddings(self): method set_input_embeddings (line 1318) | def set_input_embeddings(self, value): method forward (line 1327) | def forward( class Qwen3MoeForQuestionAnswering (line 1386) | class Qwen3MoeForQuestionAnswering(Qwen3MoePreTrainedModel): method __init__ (line 1389) | def __init__(self, config): method get_input_embeddings (line 1397) | def get_input_embeddings(self): method set_input_embeddings (line 1400) | def set_input_embeddings(self, value): method forward (line 1404) | def forward( FILE: archive/ktransformers/models/modeling_qwen3_next.py class Qwen3NextRMSNormGated (line 82) | class Qwen3NextRMSNormGated(nn.Module): method __init__ (line 83) | def __init__(self, hidden_size, eps=1e-6, **kwargs): method forward (line 88) | def forward(self, hidden_states, gate=None): class Qwen3NextDynamicCache (line 100) | class Qwen3NextDynamicCache: method __init__ (line 116) | def __init__(self, config: Qwen3NextConfig): method __len__ (line 130) | def __len__(self): method __getitem__ (line 133) | def __getitem__(self, layer_idx: int) -> tuple[torch.Tensor, torch.Ten... method update (line 136) | def update( method reorder_cache (line 152) | def reorder_cache(self, beam_idx: torch.LongTensor): method get_seq_length (line 167) | def get_seq_length(self, layer_idx: Optional[int] = 0) -> int: method get_mask_sizes (line 175) | def get_mask_sizes(self, cache_position: torch.Tensor, layer_idx: int)... method has_previous_state (line 188) | def has_previous_state(self): class Qwen3NextRotaryEmbedding (line 193) | class Qwen3NextRotaryEmbedding(nn.Module): method __init__ (line 196) | def __init__(self, config: Qwen3NextConfig, device=None): method forward (line 215) | def forward(self, x, position_ids): class Qwen3NextRMSNorm (line 229) | class Qwen3NextRMSNorm(nn.Module): method __init__ (line 230) | def __init__(self, dim: int, eps: float = 1e-6): method _norm (line 237) | def _norm(self, x): method forward (line 240) | def forward(self, x): method extra_repr (line 247) | def extra_repr(self): function rotate_half (line 251) | def rotate_half(x): function apply_rotary_pos_emb (line 259) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di... function repeat_kv (line 299) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: function eager_attention_forward (line 311) | def eager_attention_forward( class Qwen3NextAttention (line 337) | class Qwen3NextAttention(nn.Module): method __init__ (line 340) | def __init__(self, config: Qwen3NextConfig, layer_idx: int): method forward (line 369) | def forward( function apply_mask_to_padding_states (line 420) | def apply_mask_to_padding_states(hidden_states, attention_mask): function torch_causal_conv1d_update (line 436) | def torch_causal_conv1d_update( function torch_chunk_gated_delta_rule (line 454) | def torch_chunk_gated_delta_rule( function torch_recurrent_gated_delta_rule (line 534) | def torch_recurrent_gated_delta_rule( class Qwen3NextGatedDeltaNet (line 576) | class Qwen3NextGatedDeltaNet(nn.Module): method __init__ (line 577) | def __init__(self, config: Qwen3NextConfig, layer_idx: int): method fix_query_key_value_ordering (line 645) | def fix_query_key_value_ordering(self, mixed_qkvz, mixed_ba): method forward (line 674) | def forward( class Qwen3NextMLP (line 792) | class Qwen3NextMLP(nn.Module): method __init__ (line 793) | def __init__(self, config, intermediate_size=None): method forward (line 803) | def forward(self, x): class Qwen3NextSparseMoeBlock (line 808) | class Qwen3NextSparseMoeBlock(nn.Module): method __init__ (line 809) | def __init__(self, config): method forward (line 824) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class Qwen3NextDecoderLayer (line 871) | class Qwen3NextDecoderLayer(GradientCheckpointingLayer): method __init__ (line 872) | def __init__(self, config: Qwen3NextConfig, layer_idx: int): method forward (line 894) | def forward( class Qwen3NextPreTrainedModel (line 966) | class Qwen3NextPreTrainedModel(PreTrainedModel): method _init_weights (line 982) | def _init_weights(self, module): class Qwen3NextModel (line 989) | class Qwen3NextModel(Qwen3NextPreTrainedModel): method __init__ (line 990) | def __init__(self, config: Qwen3NextConfig): method forward (line 1004) | def forward( method _update_linear_attn_mask (line 1068) | def _update_linear_attn_mask(self, attention_mask, cache_position): function load_balancing_loss_func (line 1081) | def load_balancing_loss_func( class Qwen3NextForCausalLM (line 1164) | class Qwen3NextForCausalLM(Qwen3NextPreTrainedModel, GenerationMixin): method __init__ (line 1169) | def __init__(self, config): method forward (line 1183) | def forward( class Qwen3NextForSequenceClassification (line 1268) | class Qwen3NextForSequenceClassification(GenericForSequenceClassificatio... class Qwen3NextForTokenClassification (line 1272) | class Qwen3NextForTokenClassification(GenericForTokenClassification, Qwe... class Qwen3NextForQuestionAnswering (line 1276) | class Qwen3NextForQuestionAnswering(GenericForQuestionAnswering, Qwen3Ne... FILE: archive/ktransformers/models/modeling_smallthinker.py class SmallthinkerHierarchicalMLP (line 33) | class SmallthinkerHierarchicalMLP(nn.Module): method __init__ (line 34) | def __init__(self, config: SmallthinkerConfig): method forward (line 49) | def forward(self, secondary_gate_input: torch.Tensor, hidden_states: t... class SmallthinkerMoeBlock (line 70) | class SmallthinkerMoeBlock(nn.Module): method __init__ (line 71) | def __init__(self, config: SmallthinkerConfig): method forward (line 81) | def forward(self, router_input: torch.Tensor, hidden_states: torch.Ten... class SmallthinkerDenseMlpBlock (line 130) | class SmallthinkerDenseMlpBlock(nn.Module): method __init__ (line 131) | def __init__(self, config: SmallthinkerConfig): method forward (line 140) | def forward(self, router_input: torch.Tensor, hidden_states: torch.Ten... class SmallthinkerRMSNorm (line 146) | class SmallthinkerRMSNorm(nn.Module): method __init__ (line 147) | def __init__(self, hidden_size, eps=1e-6): method forward (line 156) | def forward(self, hidden_states): method extra_repr (line 163) | def extra_repr(self): function rotate_half (line 167) | def rotate_half(x): function apply_rotary_pos_emb (line 174) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di... function repeat_kv (line 201) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: function eager_attention_forward (line 213) | def eager_attention_forward( class SmallthinkerAttention (line 239) | class SmallthinkerAttention(nn.Module): method __init__ (line 240) | def __init__(self, config: SmallthinkerConfig, layer_idx: int): method forward (line 257) | def forward( class SmallthinkerDecoderLayer (line 317) | class SmallthinkerDecoderLayer(nn.Module): method __init__ (line 318) | def __init__(self, config: SmallthinkerConfig, layer_idx: int): method forward (line 327) | def forward( class SmallthinkerRotaryEmbedding (line 396) | class SmallthinkerRotaryEmbedding(nn.Module): method __init__ (line 397) | def __init__(self, config: SmallthinkerConfig, device=None): method forward (line 416) | def forward(self, x, position_ids): class SmallthinkerPreTrainedModel (line 430) | class SmallthinkerPreTrainedModel(PreTrainedModel): method _init_weights (line 444) | def _init_weights(self, module): class SmallthinkerModel (line 459) | class SmallthinkerModel(SmallthinkerPreTrainedModel): method __init__ (line 460) | def __init__(self, config: SmallthinkerConfig): method get_input_embeddings (line 477) | def get_input_embeddings(self): method set_input_embeddings (line 480) | def set_input_embeddings(self, value): method forward (line 485) | def forward( method _update_causal_mask (line 601) | def _update_causal_mask( method _prepare_4d_causal_attention_mask_with_cache_position (line 688) | def _prepare_4d_causal_attention_mask_with_cache_position( class KwargsForCausalLM (line 756) | class KwargsForCausalLM(FlashAttentionKwargs): ... function load_balancing_loss_func (line 759) | def load_balancing_loss_func( class SmallThinkerForCausalLM (line 842) | class SmallThinkerForCausalLM(SmallthinkerPreTrainedModel, GenerationMix... method __init__ (line 844) | def __init__(self, config): method get_input_embeddings (line 855) | def get_input_embeddings(self): method set_input_embeddings (line 858) | def set_input_embeddings(self, value): method get_output_embeddings (line 861) | def get_output_embeddings(self): method set_output_embeddings (line 864) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 867) | def set_decoder(self, decoder): method get_decoder (line 870) | def get_decoder(self): method forward (line 875) | def forward( FILE: archive/ktransformers/operators/RoPE.py class RotaryEmbedding (line 34) | class RotaryEmbedding(BaseInjectedModule, DeepseekV2RotaryEmbedding): method __init__ (line 35) | def __init__( method load (line 55) | def load(self): class RotaryEmbeddingV3 (line 64) | class RotaryEmbeddingV3(BaseInjectedModule): method __init__ (line 65) | def __init__( method forward (line 83) | def forward(self, x, position_ids): method load (line 98) | def load(self): method _init (line 105) | def _init(self, dim, max_position_embeddings, base, device, scaling_fa... class RotaryEmbeddingV2 (line 115) | class RotaryEmbeddingV2(BaseInjectedModule, LlamaRotaryEmbedding): method __init__ (line 116) | def __init__( method load (line 141) | def load(self): class YarnRotaryEmbedding (line 152) | class YarnRotaryEmbedding(BaseInjectedModule, DeepseekV2YarnRotaryEmbedd... method __init__ (line 153) | def __init__( method load (line 182) | def load(self): class YarnRotaryEmbeddingV3 (line 222) | class YarnRotaryEmbeddingV3(BaseInjectedModule): method __init__ (line 223) | def __init__( method load (line 240) | def load(self): method forward (line 262) | def forward(self, x, position_ids): method _init (line 277) | def _init( class DynamicNTKScalingRotaryEmbedding (line 328) | class DynamicNTKScalingRotaryEmbedding( method __init__ (line 331) | def __init__( method load (line 354) | def load(self): class RotaryEmbeddingV4 (line 367) | class RotaryEmbeddingV4(BaseInjectedModule): method __init__ (line 368) | def __init__( method forward (line 386) | def forward(self, x, position_ids): method load (line 401) | def load(self): method _init (line 408) | def _init(self, dim, max_position_embeddings, base, device, scaling_fa... class KQwen3MoeRotaryEmbedding (line 418) | class KQwen3MoeRotaryEmbedding(BaseInjectedModule, DeepseekV2RotaryEmbed... method __init__ (line 419) | def __init__( method load (line 439) | def load(self): class KSmallthinkerRotaryEmbedding (line 445) | class KSmallthinkerRotaryEmbedding(BaseInjectedModule, SmallthinkerRotar... method __init__ (line 446) | def __init__( method load (line 466) | def load(self): method forward (line 473) | def forward(self, x, position_ids): class KGlm4MoeRotaryEmbedding (line 486) | class KGlm4MoeRotaryEmbedding(BaseInjectedModule, Glm4MoeRotaryEmbedding): method __init__ (line 487) | def __init__( method load (line 507) | def load(self): method forward (line 514) | def forward(self, x, position_ids): FILE: archive/ktransformers/operators/ascend/ascend_attention.py function apply_rotary_pos_emb_fusion (line 39) | def apply_rotary_pos_emb_fusion(q, k, cos, sin, unsqueeze_dim=1): class MatMulOps (line 51) | class MatMulOps(object): method execute (line 52) | def execute(self, x_input): class DynamicQuantOps (line 64) | class DynamicQuantOps(object): method execute (line 69) | def execute(self, x_input): class KDeepseekV2AttentionW8A8A2 (line 74) | class KDeepseekV2AttentionW8A8A2(BaseInjectedModule, DeepseekV2Attention): class PageKVWrapper (line 78) | class PageKVWrapper(object): method __init__ (line 83) | def __init__(self, past_key_value: StaticCache): method update (line 91) | def update(self, compressed_kv, k_pe, layer_idx, cache_kwargs): method get_usable_length (line 94) | def get_usable_length(self, kv_seq_len, layer_idx): method get_seq_length (line 97) | def get_seq_length(self, layer_idx): method get_block_table (line 100) | def get_block_table(self, layer_idx): method init_page_kv_wrapper (line 103) | def init_page_kv_wrapper(self, past_key_value: StaticCache): method __init__ (line 106) | def __init__(self, method forward_chunck (line 140) | def forward_chunck( method forward_paged (line 329) | def forward_paged( method forward_windows (line 416) | def forward_windows( method forward (line 486) | def forward( class KDeepseekV2AttentionW8A8A2Serve (line 512) | class KDeepseekV2AttentionW8A8A2Serve(BaseInjectedModule, DeepseekV2Atte... method __init__ (line 516) | def __init__(self, method print_callback (line 541) | def print_callback(self, param): method forward (line 554) | def forward( method forward_paged (line 757) | def forward_paged( function rotate_half (line 851) | def rotate_half(x): function apply_rotary_pos_emb (line 856) | def apply_rotary_pos_emb(q, k, cos, sin, unsqueeze_dim=1): class KQwen3MoeAttentionW8A8A2Serve (line 864) | class KQwen3MoeAttentionW8A8A2Serve(BaseInjectedModule, Qwen3MoeAttention): method __init__ (line 868) | def __init__(self, method _linear_w8a8a2 (line 903) | def _linear_w8a8a2(self, x: torch.Tensor, proj: nn.Module, name: str) ... method forward (line 923) | def forward(self, method _forward_prefill (line 997) | def _forward_prefill( method forward_paged (line 1155) | def forward_paged( FILE: archive/ktransformers/operators/ascend/ascend_experts.py class KExpertsCPUW8A8 (line 38) | class KExpertsCPUW8A8(KExpertsCPU): method forward (line 40) | def forward(self, input_tensor, expert_ids, weights, bsz_tensor=None, ... class KTransformersExpertsW8A8 (line 70) | class KTransformersExpertsW8A8(KTransformersExperts): method forward (line 71) | def forward(self, input_tensor, expert_ids, weights, cuda_graph_idx=No... class KDeepseekV3MoEW8A8 (line 82) | class KDeepseekV3MoEW8A8(KDeepseekV3MoE): method forward (line 83) | def forward(self, hidden_states, stream=None, para_stream=None): method cpu_moe_kexperts (line 176) | def cpu_moe_kexperts(self, moe_kexperts_param) -> torch.Tensor: method moe_kexperts (line 181) | def moe_kexperts(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_w... class KQwen3MoeSparseMoeBlockW8A8 (line 185) | class KQwen3MoeSparseMoeBlockW8A8(BaseInjectedModule): method __init__ (line 186) | def __init__( method set_inference_mode (line 226) | def set_inference_mode(self, mode: InferenceState): method cpu_moe_kexperts (line 231) | def cpu_moe_kexperts(self, moe_kexperts_param): method moe_kexperts (line 242) | def moe_kexperts( method forward (line 260) | def forward( FILE: archive/ktransformers/operators/ascend/ascend_gate.py class KDeepseekV3GateA2 (line 8) | class KDeepseekV3GateA2(KMoEGate): method load (line 9) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method forward (line 25) | def forward(self, hidden_states) -> torch.Tensor: FILE: archive/ktransformers/operators/ascend/ascend_layernorm.py class KDeepseekV3RMSNormW8A8 (line 32) | class KDeepseekV3RMSNormW8A8(BaseInjectedModule): method __init__ (line 33) | def __init__(self, method forward (line 46) | def forward(self, hidden_states): method load (line 51) | def load(self): method unload (line 55) | def unload(self): class KQwen3MoeRMSNormW8A8 (line 61) | class KQwen3MoeRMSNormW8A8(BaseInjectedModule): method __init__ (line 62) | def __init__(self, method forward (line 78) | def forward(self, x: torch.Tensor): method load (line 91) | def load(self): method unload (line 104) | def unload(self): class KQwen3FinalRMSNormNPU (line 108) | class KQwen3FinalRMSNormNPU(nn.Module): method __init__ (line 109) | def __init__(self, orig_module: nn.Module): method forward (line 123) | def forward(self, x: torch.Tensor): FILE: archive/ktransformers/operators/ascend/ascend_linear.py class KLinearW8A8 (line 34) | class KLinearW8A8(KLinearBase): method __init__ (line 35) | def __init__( method load_weight (line 46) | def load_weight(self, override_key: str | None = None, device: str | N... method load (line 102) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method unload (line 106) | def unload(self): class KLinearTorchW8A8A2 (line 110) | class KLinearTorchW8A8A2(KLinearW8A8): method __init__ (line 111) | def __init__( method forward (line 131) | def forward(self, x: torch.Tensor, bsz_tensor) -> torch.Tensor: method load (line 136) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method unload (line 184) | def unload(self): class KTransformersLinearW8A8A2 (line 200) | class KTransformersLinearW8A8A2(BaseInjectedModule, KLinearW8A8): method __init__ (line 201) | def __init__( method forward (line 229) | def forward(self, x, bsz_tensor=None): method load (line 238) | def load(self, w: dict | nn.Parameter | tuple | None = None, mode: Inf... method unload (line 272) | def unload(self): method set_inference_mode (line 279) | def set_inference_mode(self, mode: InferenceState): FILE: archive/ktransformers/operators/ascend/ascend_mlp.py class KDeepseekV3MLPW8A8A2V1 (line 26) | class KDeepseekV3MLPW8A8A2V1(BaseInjectedModule, DeepseekV3MLP): method forward (line 28) | def forward(self, x, is_prefill=None, use_cuda_graph=False): class KDeepseekV3MLPW8A8A2V2 (line 63) | class KDeepseekV3MLPW8A8A2V2(BaseInjectedModule, DeepseekV3MLP): method forward (line 65) | def forward(self, x, is_prefill=None, use_cuda_graph=False): class KQwen3MoeMLPW8A8A2 (line 92) | class KQwen3MoeMLPW8A8A2(BaseInjectedModule, Qwen3MoeMLP): method forward (line 94) | def forward(self, x, is_prefill=None, use_cuda_graph=False): FILE: archive/ktransformers/operators/attention.py function rotate_half (line 41) | def rotate_half(x): class KDeepseekV2Attention (line 48) | class KDeepseekV2Attention(BaseInjectedModule, DeepseekV2Attention): method __init__ (line 52) | def __init__(self, method get_absorbed (line 69) | def get_absorbed(self) -> Tuple[torch.Tensor, torch.Tensor]: method forward_chunck (line 77) | def forward_chunck( method forward_linux_triton (line 196) | def forward_linux_triton( method forward_linux_flashinfer (line 349) | def forward_linux_flashinfer( method forward_windows (line 525) | def forward_windows( method forward_xpu (line 591) | def forward_xpu( method forward (line 685) | def forward( class KLlamaAttention (line 746) | class KLlamaAttention(BaseInjectedModule): method __init__ (line 749) | def __init__(self, method apply_rotary_pos_emb (line 760) | def apply_rotary_pos_emb(self, q, k, cos, sin, position_ids=None, unsq... method forward (line 785) | def forward( class KQwen3MoeAttentionIPEXLLM (line 876) | class KQwen3MoeAttentionIPEXLLM(BaseInjectedModule, Qwen3MoeAttention): method __init__ (line 877) | def __init__(self, method forward (line 893) | def forward( FILE: archive/ktransformers/operators/balance_serve_attention.py function rotate_half (line 26) | def rotate_half(x): class flashinfer_attn (line 32) | class flashinfer_attn(BaseInjectedModule, DeepseekV2Attention): method __init__ (line 33) | def __init__(self, method get_absorbed (line 48) | def get_absorbed(self) -> Tuple[torch.Tensor, torch.Tensor]: method forward (line 65) | def forward(self, class KQwen2MoeAttention (line 120) | class KQwen2MoeAttention(BaseInjectedModule, Qwen2MoeAttention): method __init__ (line 121) | def __init__(self, method apply_rotary_pos_emb (line 137) | def apply_rotary_pos_emb(self, q, k, cos, sin, position_ids=None, unsq... method forward (line 164) | def forward(self, class KQwen3MoeAttention (line 206) | class KQwen3MoeAttention(BaseInjectedModule, Qwen3MoeAttention): method __init__ (line 207) | def __init__(self, method apply_rotary_pos_emb (line 223) | def apply_rotary_pos_emb(self, q, k, cos, sin, position_ids=None, unsq... method forward (line 250) | def forward(self, class deepseek_torch_attn (line 296) | class deepseek_torch_attn(BaseInjectedModule, DeepseekV2Attention): method __init__ (line 297) | def __init__(self, method get_absorbed (line 312) | def get_absorbed(self) -> Tuple[torch.Tensor, torch.Tensor]: method forward (line 330) | def forward(self, class KSmallthinkerAttention (line 462) | class KSmallthinkerAttention(BaseInjectedModule, SmallthinkerAttention): method __init__ (line 463) | def __init__(self, method apply_rotary_pos_emb (line 477) | def apply_rotary_pos_emb(self, q, k, cos, sin, position_ids=None, unsq... method forward (line 503) | def forward(self, class KGlm4MoeAttention (line 555) | class KGlm4MoeAttention(BaseInjectedModule, Glm4MoeAttention): method __init__ (line 556) | def __init__(self, method apply_rotary_pos_emb (line 570) | def apply_rotary_pos_emb( method forward (line 598) | def forward(self, class KQwen3NextAttention (line 654) | class KQwen3NextAttention(BaseInjectedModule, Qwen3NextAttention): method __init__ (line 655) | def __init__(self, method apply_rotary_pos_emb (line 670) | def apply_rotary_pos_emb(self, q, k, cos, sin, position_ids=None, unsq... method forward (line 709) | def forward(self, class KQwen3NextGatedDeltaNet (line 763) | class KQwen3NextGatedDeltaNet(BaseInjectedModule, Qwen3NextGatedDeltaNet): method __init__ (line 764) | def __init__(self, method fix_query_key_value_ordering (line 778) | def fix_query_key_value_ordering(self, mixed_qkvz, mixed_ba): method forward (line 807) | def forward( FILE: archive/ktransformers/operators/base_operator.py class BaseInjectedModule (line 12) | class BaseInjectedModule(nn.Module): method __init__ (line 14) | def __init__(self, method __getattr__ (line 31) | def __getattr__(self, name: str) -> Any: method __setattr__ (line 51) | def __setattr__(self, name: str, value: Tensor | nn.Module) -> None: method forward (line 58) | def forward(self, *args, **kwargs): method load (line 61) | def load(self): FILE: archive/ktransformers/operators/cpuinfer.py class CPUInferKVCache (line 29) | class CPUInferKVCache: method __init__ (line 30) | def __init__( method load_kvcache (line 100) | def load_kvcache(self, tensor_file_path: str): method dump_kvcache (line 105) | def dump_kvcache( method update_cache_total_len (line 135) | def update_cache_total_len(self, cache_total_len: int): method attn (line 143) | def attn( method update_kvcache_one_block_fp16 (line 256) | def update_kvcache_one_block_fp16( method get_kvcache_one_block_fp16 (line 292) | def get_kvcache_one_block_fp16( method update_importance_one_block (line 328) | def update_importance_one_block( method get_importance_one_block (line 354) | def get_importance_one_block( method get_anchor_one_block (line 380) | def get_anchor_one_block(self, anchor: torch.Tensor, layer_id: int, bl... method update_anchor_one_block (line 406) | def update_anchor_one_block( method calc_anchor_all_layers (line 434) | def calc_anchor_all_layers( method clear_importance_all_layers (line 473) | def clear_importance_all_layers( method get_cache_total_len (line 512) | def get_cache_total_len(self): method update_kvcache_q4 (line 515) | def update_kvcache_q4( method update_kvcache_fp16 (line 528) | def update_kvcache_fp16( method get_kvcache_q4 (line 550) | def get_kvcache_q4( method get_kvcache_fp16 (line 563) | def get_kvcache_fp16( method get_and_update_kvcache_fp16 (line 584) | def get_and_update_kvcache_fp16( method update_importance (line 606) | def update_importance( method get_attn_sparsity (line 627) | def get_attn_sparsity( method attn_with_kvcache (line 665) | def attn_with_kvcache( method get_all_kvcache_one_layer (line 704) | def get_all_kvcache_one_layer( method get_importance (line 713) | def get_importance( method get_anchor (line 720) | def get_anchor( class CPUInfer (line 728) | class CPUInfer: method __init__ (line 732) | def __init__(self, thread_num): method submit (line 738) | def submit(self, task): method submit_with_cuda_stream (line 741) | def submit_with_cuda_stream(self, current_cuda_stream, task): method sync (line 744) | def sync(self): method sync_with_cuda_stream (line 747) | def sync_with_cuda_stream(self, current_cuda_stream): FILE: archive/ktransformers/operators/dynamic_attention.py class DynamicScaledDotProductAttention (line 30) | class DynamicScaledDotProductAttention: method __init__ (line 34) | def __init__( method get_attn_score_one_block (line 233) | def get_attn_score_one_block( method get_preselect_block_table_and_attn_score (line 271) | def get_preselect_block_table_and_attn_score( method get_attn_score (line 374) | def get_attn_score( method swap_in_and_swap_out (line 467) | def swap_in_and_swap_out(self, layer_idx, past_len, q_len, key, value): method calc_anchor (line 518) | def calc_anchor(self, cache_seqlens: int): method clear_importance (line 533) | def clear_importance(self, cache_seqlens: int): method clear_kvcache (line 549) | def clear_kvcache(self, cache_seqlens: int): method get_attn_sparsity (line 564) | def get_attn_sparsity( method apply (line 605) | def apply( method save (line 762) | def save(self, path: str, length: int): method load (line 775) | def load(self, path: str, length: int): FILE: archive/ktransformers/operators/experts.py function deduplicate_and_sort (line 48) | def deduplicate_and_sort(lst): function generate_cuda_graphs (line 50) | def generate_cuda_graphs(chunk_size: int) -> list: class KExpertsBase (line 68) | class KExpertsBase(ABC): method __init__ (line 69) | def __init__(self, key: str, gguf_loader: GGUFLoader, config: Pretrain... method forward (line 77) | def forward(self, input_tensor, expert_ids, weights): method load (line 81) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method unload (line 85) | def unload(): method load_weights (line 88) | def load_weights(self, override_key: str | None = None, device: str = ... method load_multi (line 136) | def load_multi(self, key: str, keys: list[str], device: str = "cpu"): class KExpertsCPU (line 143) | class KExpertsCPU(KExpertsBase): method __init__ (line 152) | def __init__( method load (line 169) | def load(self, w: dict | nn.Parameter | tuple | None = None, device:st... method submit_for_one_decode (line 293) | def submit_for_one_decode(self, input_tensor, expert_ids, weights, bsz... method sync_for_one_decode (line 310) | def sync_for_one_decode(self, cuda_graph_idx=0): method forward (line 320) | def forward(self, input_tensor, expert_ids, weights, bsz_tensor=None, ... method unload (line 364) | def unload(self): method load_weights (line 367) | def load_weights(self, override_key: str | None = None, device: str = ... class KExpertsMarlin (line 437) | class KExpertsMarlin(KExpertsBase): method __init__ (line 440) | def __init__( method load (line 466) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method unload (line 499) | def unload(self): method load_weights (line 506) | def load_weights(self, override_key: str | None = None): method forward (line 525) | def forward(self, hidden_states_cpu: torch.Tensor, selected_experts_cp... class KExpertsTorch (line 562) | class KExpertsTorch(KExpertsBase): method __init__ (line 568) | def __init__( method load (line 589) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method unload (line 617) | def unload(self): method load_weights (line 623) | def load_weights(self, override_key: str | None = None): method forward (line 642) | def forward(self, hidden_states_cpu: torch.Tensor, selected_experts_cp... class KTransformersExperts (line 686) | class KTransformersExperts(BaseInjectedModule, KExpertsBase): method __init__ (line 687) | def __init__(self, method load (line 712) | def load(self, w: dict = None, mode: InferenceState = None, warmup: b... method unload (line 732) | def unload(self): method forward (line 739) | def forward(self, input_tensor, expert_ids, weights): method set_inference_mode (line 749) | def set_inference_mode(self, mode: InferenceState): class KQwen2MoeSparseMoeBlock (line 770) | class KQwen2MoeSparseMoeBlock(BaseInjectedModule, Qwen2MoeSparseMoeBlock): method forward (line 771) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: method moe_kexperts (line 825) | def moe_kexperts(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_w... method moe_infer_simple (line 831) | def moe_infer_simple(self, hidden_states_cpu: torch.Tensor, selected_e... method moe_infer (line 845) | def moe_infer(self, hidden_states_cpu: torch.Tensor, selected_experts_... class KDeepseekV2MoE (line 874) | class KDeepseekV2MoE(BaseInjectedModule, DeepseekV2MoE): method forward (line 875) | def forward(self, hidden_states): method moe_kexperts (line 915) | def moe_kexperts(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_w... method moe_infer_simple (line 921) | def moe_infer_simple( method moe_infer (line 939) | def moe_infer(self, x, topk_ids, topk_weight): class KDeepseekV3MoE (line 972) | class KDeepseekV3MoE(BaseInjectedModule, DeepseekV3MoE): method forward (line 974) | def forward(self, hidden_states): method moe_kexperts (line 1017) | def moe_kexperts(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_w... method moe_infer_simple (line 1023) | def moe_infer_simple( method moe_infer (line 1041) | def moe_infer(self, x, topk_ids, topk_weight): class KMistralSparseMoEBlock (line 1074) | class KMistralSparseMoEBlock(BaseInjectedModule, MixtralSparseMoeBlock): method forward (line 1076) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: method moe_kexperts (line 1123) | def moe_kexperts(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_w... method moe_infer_simple (line 1129) | def moe_infer_simple(self, hidden_states_cpu: torch.Tensor, selected_e... method moe_infer (line 1143) | def moe_infer(self, hidden_states_cpu: torch.Tensor, selected_experts_... class KDeepseekV3MoEV2 (line 1172) | class KDeepseekV3MoEV2(BaseInjectedModule, DeepseekV3MoE): method forward (line 1173) | def forward(self, hidden_states, bsz_tensor, cuda_graph_idx=0): method moe_on_cpuinfer (line 1215) | def moe_on_cpuinfer(self, x: torch.Tensor, topk_ids: torch.Tensor, top... method moe_infer_simple (line 1222) | def moe_infer_simple( method moe_infer (line 1240) | def moe_infer(self, x, topk_ids, topk_weight): class KTransformersExpertsV2 (line 1273) | class KTransformersExpertsV2(BaseInjectedModule, KExpertsBase): method __init__ (line 1274) | def __init__(self, method load (line 1305) | def load(self, w: dict = None, mode: InferenceState = None, warmup: b... method unload (line 1325) | def unload(self): method forward (line 1332) | def forward(self, input_tensor, expert_ids, weights, bsz_tensor, cuda_... method set_inference_mode (line 1342) | def set_inference_mode(self, mode: InferenceState): class KSmallthinkerExperts (line 1353) | class KSmallthinkerExperts(BaseInjectedModule, KExpertsBase): method __init__ (line 1354) | def __init__(self, method load (line 1378) | def load(self, w: dict = None, mode: InferenceState = None, warmup: b... method unload (line 1398) | def unload(self): method forward (line 1405) | def forward(self, input_tensor, expert_ids, weights, bsz_tensor, cuda_... method set_inference_mode (line 1415) | def set_inference_mode(self, mode: InferenceState): class KGlm4Experts (line 1425) | class KGlm4Experts(BaseInjectedModule, KExpertsBase): method __init__ (line 1426) | def __init__(self, method load (line 1450) | def load(self, w: dict = None, mode: InferenceState = None, warmup: b... method unload (line 1470) | def unload(self): method forward (line 1477) | def forward(self, input_tensor, expert_ids, weights, bsz_tensor, cuda_... method set_inference_mode (line 1487) | def set_inference_mode(self, mode: InferenceState): class KQwen2MoeSparseMoeBlockV2 (line 1498) | class KQwen2MoeSparseMoeBlockV2(BaseInjectedModule, Qwen2MoeSparseMoeBlo... method forward (line 1499) | def forward(self, hidden_states, bsz_tensor, cuda_graph_idx=0): method moe_on_cpuinfer (line 1553) | def moe_on_cpuinfer(self, x: torch.Tensor, topk_ids: torch.Tensor, top... method moe_infer_simple (line 1560) | def moe_infer_simple( method moe_infer (line 1578) | def moe_infer(self, x, topk_ids, topk_weight): class KQwen3MoeSparseMoeBlockV2 (line 1611) | class KQwen3MoeSparseMoeBlockV2(BaseInjectedModule, Qwen3MoeSparseMoeBlo... method forward (line 1612) | def forward(self, hidden_states, bsz_tensor=None, cuda_graph_idx=0): method moe_on_cpuinfer (line 1675) | def moe_on_cpuinfer(self, x: torch.Tensor, topk_ids: torch.Tensor, top... method moe_infer_simple (line 1682) | def moe_infer_simple( method moe_infer (line 1700) | def moe_infer(self, x, topk_ids, topk_weight): class KSmallthinkerMoeBlock (line 1734) | class KSmallthinkerMoeBlock(BaseInjectedModule, SmallthinkerMoeBlock): method forward (line 1735) | def forward(self, router_input: torch.Tensor, hidden_states: torch.Ten... method moe_on_cpuinfer (line 1809) | def moe_on_cpuinfer(self, x: torch.Tensor, topk_ids: torch.Tensor, top... method moe_infer_simple (line 1816) | def moe_infer_simple( method moe_infer (line 1834) | def moe_infer(self, x, topk_ids, topk_weight): class KGlm4MoeMoE (line 1868) | class KGlm4MoeMoE(BaseInjectedModule, Glm4MoeMoE): method forward (line 1869) | def forward(self, hidden_states, bsz_tensor=None, cuda_graph_idx=0): method moe_on_cpuinfer (line 1915) | def moe_on_cpuinfer(self, x: torch.Tensor, topk_ids: torch.Tensor, top... method moe_infer_simple (line 1922) | def moe_infer_simple( method moe_infer (line 1940) | def moe_infer(self, x, topk_ids, topk_weight): class KQwen3NextSparseMoeBlockV2 (line 1974) | class KQwen3NextSparseMoeBlockV2(BaseInjectedModule, Qwen3NextSparseMoeB... method forward (line 1975) | def forward(self, hidden_states, bsz_tensor=None, cuda_graph_idx=0): method moe_on_cpuinfer (line 2041) | def moe_on_cpuinfer(self, x: torch.Tensor, topk_ids: torch.Tensor, top... method moe_infer_simple (line 2048) | def moe_infer_simple( method moe_infer (line 2066) | def moe_infer(self, x, topk_ids, topk_weight): FILE: archive/ktransformers/operators/flashinfer_batch_prefill_wrapper.py function setup_seed (line 13) | def setup_seed(seed): class flashInferAttn (line 34) | class flashInferAttn(): method __init__ (line 37) | def __init__(self, method plan (line 72) | def plan(self, method calc_batch_indices (line 106) | def calc_batch_indices(self, ragged_size = None): method forward (line 114) | def forward(self, q, k_cache, v_cache, k, v): function testCudaGraph (line 123) | def testCudaGraph(): function testAttentionFlashInfer (line 267) | def testAttentionFlashInfer( FILE: archive/ktransformers/operators/flashinfer_wrapper.py function attention_ref_torch (line 30) | def attention_ref_torch( class MLAWrapper (line 78) | class MLAWrapper(): method __init__ (line 79) | def __init__(self, method plan (line 117) | def plan(self, method run (line 160) | def run(self, q_nope, q_pe, ckv, k_pe, return_lse = False): class MLAWrapperSingleton (line 163) | class MLAWrapperSingleton(): method get_instance (line 167) | def get_instance(cls, device, *args, **kwargs)->MLAWrapper: method make_instance (line 173) | def make_instance(cls, device, *args, **kwargs): method plan_all (line 177) | def plan_all(cls, qo_indptr, method need_plan_all (line 206) | def need_plan_all(cls): method reset_buffer (line 211) | def reset_buffer(cls): method update_buffer (line 216) | def update_buffer(cls, max_pages): function checksame (line 222) | def checksame(): FILE: archive/ktransformers/operators/gate.py class KMoEGateBase (line 15) | class KMoEGateBase(ABC): method __init__ (line 16) | def __init__(self, method forward (line 32) | def forward(self, input_tensor, expert_ids, weights): method load (line 36) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method unload (line 40) | def unload(): method load_weights (line 43) | def load_weights(self, override_key: str | None = None, device: str = ... method load_multi (line 84) | def load_multi(self, key: str, keys: list[str], device: str = "cpu"): class KMoEGate (line 91) | class KMoEGate(BaseInjectedModule, KMoEGateBase): method __init__ (line 92) | def __init__( method forward (line 107) | def forward(self, hidden_states) -> torch.Tensor: method load (line 110) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method unload (line 122) | def unload(self): class KMoEGateQwen2Moe (line 129) | class KMoEGateQwen2Moe(BaseInjectedModule, KMoEGateBase): method __init__ (line 130) | def __init__( method forward (line 159) | def forward(self, hidden_states) -> torch.Tensor: method load (line 177) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method unload (line 191) | def unload(self): class KMoEGateIPEXLLM (line 198) | class KMoEGateIPEXLLM(KMoEGate): method __init__ (line 199) | def __init__( method forward (line 214) | def forward(self, hidden_states) -> torch.Tensor: FILE: archive/ktransformers/operators/layernorm.py class RMSNorm (line 46) | class RMSNorm(DeepseekV3RMSNorm, BaseInjectedModule): method __init__ (line 47) | def __init__(self, method forward (line 59) | def forward( method forward_native (line 77) | def forward_native( class KQwen2MoeRMSNorm (line 87) | class KQwen2MoeRMSNorm(Qwen2MoeRMSNorm, BaseInjectedModule): method __init__ (line 88) | def __init__(self, method forward (line 100) | def forward( method forward_native (line 118) | def forward_native( class KQwen3MoeRMSNorm (line 128) | class KQwen3MoeRMSNorm(Qwen3MoeRMSNorm, BaseInjectedModule): method __init__ (line 129) | def __init__(self, method forward (line 141) | def forward( method forward_native (line 162) | def forward_native( class KQwen3NextRMSNorm (line 171) | class KQwen3NextRMSNorm(Qwen3NextRMSNorm, BaseInjectedModule): method __init__ (line 172) | def __init__(self, method _norm (line 184) | def _norm(self, x): method forward (line 187) | def forward(self, x, num_tokens_tensors, residual = None): method extra_repr (line 201) | def extra_repr(self): class KSmallthinkerRMSNorm (line 205) | class KSmallthinkerRMSNorm(SmallthinkerRMSNorm, BaseInjectedModule): method __init__ (line 206) | def __init__(self, method forward (line 218) | def forward( method forward_native (line 239) | def forward_native( class KGlm4MoeRMSNorm (line 248) | class KGlm4MoeRMSNorm(Glm4MoeRMSNorm, BaseInjectedModule): method __init__ (line 249) | def __init__(self, method forward (line 261) | def forward( method forward_native (line 282) | def forward_native( class DeepseekV3RMSNormTorch (line 293) | class DeepseekV3RMSNormTorch(DeepseekV3RMSNorm, BaseInjectedModule): method __init__ (line 294) | def __init__(self, method forward (line 306) | def forward( class KDeepseekRMSNormIPEXLLM (line 325) | class KDeepseekRMSNormIPEXLLM(DeepseekV3RMSNorm, BaseInjectedModule): method __init__ (line 326) | def __init__(self, method forward (line 339) | def forward(self, x: torch.Tensor) -> torch.Tensor: method load (line 347) | def load(self): FILE: archive/ktransformers/operators/linear.py class KLinearBase (line 57) | class KLinearBase(ABC): method __init__ (line 58) | def __init__( method forward (line 89) | def forward(self, x: torch.Tensor) -> torch.Tensor: method load_weight (line 92) | def load_weight(self, override_key: str | None = None, device: str | N... method load_multi (line 143) | def load_multi(self, key: str, keys: list[str], device: str = "cpu"): method load (line 150) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method unload (line 154) | def unload(self): class KLinearTorch (line 158) | class KLinearTorch(KLinearBase): method __init__ (line 159) | def __init__( method forward (line 174) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor=None, **kw... method load (line 185) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method unload (line 212) | def unload(self): class KLinearQ8 (line 218) | class KLinearQ8(KLinearBase): method __init__ (line 219) | def __init__( method forward (line 237) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor=None) -> t... method _dequantize_weight (line 254) | def _dequantize_weight(self, q_matrix, scales, bits=8): method _quantize_weight (line 290) | def _quantize_weight(self, matrix, bits=8): method load (line 345) | def load(self, w: Union[Dict, nn.Parameter, Tuple, None] = None, devic... method unload (line 376) | def unload(self): class KLinearFP8 (line 388) | class KLinearFP8(KLinearBase): method __init__ (line 394) | def __init__( method forward (line 409) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor) -> torch.... method load (line 416) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method unload (line 431) | def unload(self): class VLinearMarlin (line 439) | class VLinearMarlin(KLinearBase): method __init__ (line 445) | def __init__( method load (line 477) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method forward (line 525) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor = None) ->... method unload (line 564) | def unload(self): method _pad_input (line 574) | def _pad_input(self, x): class KLinearMarlin (line 595) | class KLinearMarlin(KLinearBase): method __init__ (line 601) | def __init__( method load (line 633) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method forward (line 679) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor=None, **kw... method unload (line 713) | def unload(self): class KLinearCPUInfer (line 723) | class KLinearCPUInfer(KLinearBase): method __init__ (line 725) | def __init__( method forward (line 748) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor = None) ->... method load (line 787) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method load_weights (line 808) | def load_weights(self, w: dict | nn.Parameter | tuple | None = None, d... method unload (line 821) | def unload(self): class KLinearIPEXLLM (line 827) | class KLinearIPEXLLM(KLinearBase): method __init__ (line 828) | def __init__( method forward (line 846) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor = None) ->... method load (line 857) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method unload (line 890) | def unload(self): class KTransformersLinear (line 906) | class KTransformersLinear(BaseInjectedModule, KLinearBase): method __init__ (line 907) | def __init__( method forward (line 935) | def forward(self, x, bsz_tensor=None): method load (line 944) | def load(self, w: dict | nn.Parameter | tuple | None = None, mode: Inf... method unload (line 966) | def unload(self): method set_inference_mode (line 973) | def set_inference_mode(self, mode: InferenceState): FILE: archive/ktransformers/operators/mlp.py class kDeepseekV3MLP (line 10) | class kDeepseekV3MLP(DeepseekV3MLP, BaseInjectedModule): method __init__ (line 11) | def __init__(self, method forward (line 22) | def forward(self, x, bsz_tensor): class KQwen2MoeMLP (line 25) | class KQwen2MoeMLP(Qwen2MoeMLP, BaseInjectedModule): method __init__ (line 26) | def __init__(self, method forward (line 37) | def forward(self, x, bsz_tensor): class KSmallthinkerDenseMlpBlock (line 42) | class KSmallthinkerDenseMlpBlock(SmallthinkerDenseMlpBlock, BaseInjected... method __init__ (line 43) | def __init__(self, method forward (line 53) | def forward(self, x, bsz_tensor): class KGlm4MoeMLP (line 57) | class KGlm4MoeMLP(Glm4MoeMLP, BaseInjectedModule): method __init__ (line 58) | def __init__(self, method forward (line 68) | def forward(self, x, bsz_tensor): FILE: archive/ktransformers/operators/models.py class KQwen2MoeModel (line 185) | class KQwen2MoeModel(BaseInjectedModule): method __init__ (line 193) | def __init__( method forward (line 212) | def forward( method load_layer_to (line 443) | def load_layer_to(self, layer: Qwen2MoeDecoderLayer, target: Inference... class KDeepseekV2Model (line 547) | class KDeepseekV2Model(BaseInjectedModule): method __init__ (line 555) | def __init__( method forward (line 574) | def forward( method load_layer_to (line 843) | def load_layer_to(self, layer: DeepseekV2DecoderLayer, target: Inferen... class LlamaPreTrainedModel (line 969) | class LlamaPreTrainedModel(PreTrainedModel): method _init_weights (line 981) | def _init_weights(self, module): class KLlamaModel (line 993) | class KLlamaModel(BaseInjectedModule): method __init__ (line 1003) | def __init__( method get_input_embeddings (line 1050) | def get_input_embeddings(self): method set_input_embeddings (line 1053) | def set_input_embeddings(self, value): method forward (line 1057) | def forward( method forward_chunk (line 1194) | def forward_chunk( method _update_causal_mask (line 1295) | def _update_causal_mask( FILE: archive/ktransformers/operators/triton_attention.py function tanh (line 11) | def tanh(x): function _fwd_grouped_kernel_stage1 (line 16) | def _fwd_grouped_kernel_stage1( function _decode_grouped_att_m_fwd (line 165) | def _decode_grouped_att_m_fwd( function _fwd_kernel_stage2 (line 258) | def _fwd_kernel_stage2( function _decode_softmax_reducev_fwd (line 313) | def _decode_softmax_reducev_fwd( function decode_attention_fwd_grouped (line 358) | def decode_attention_fwd_grouped( FILE: archive/ktransformers/operators/triton_attention_prefill.py function _fwd_kernel (line 24) | def _fwd_kernel( function context_attention_fwd (line 159) | def context_attention_fwd( FILE: archive/ktransformers/optimize/optimize.py function inject (line 28) | def inject(module, local_optimization_dict, model_config:AutoConfig ,ggu... function del_meta (line 56) | def del_meta(module:nn.Module): function gen_optimize_config (line 67) | def gen_optimize_config(module: nn.Module, out_data: Mapping, rule_list:... function translate_model_config (line 121) | def translate_model_config(model_config: PretrainedConfig): function optimize_and_load_gguf (line 129) | def optimize_and_load_gguf(module: nn.Module, rule_file: str, gguf_path:... FILE: archive/ktransformers/server/api/ollama/completions.py class OllamaGenerateCompletionRequest (line 21) | class OllamaGenerateCompletionRequest(BaseModel): class OllamaGenerationStreamResponse (line 45) | class OllamaGenerationStreamResponse(BaseModel): class OllamaGenerationResponse (line 51) | class OllamaGenerationResponse(BaseModel): function generate (line 58) | async def generate(request: Request, input: OllamaGenerateCompletionRequ... class OllamaChatCompletionMessage (line 103) | class OllamaChatCompletionMessage(BaseModel): class OllamaChatCompletionRequest (line 107) | class OllamaChatCompletionRequest(BaseModel): class OllamaChatCompletionStreamResponse (line 113) | class OllamaChatCompletionStreamResponse(BaseModel): class OllamaChatCompletionResponse (line 126) | class OllamaChatCompletionResponse(BaseModel): function chat (line 140) | async def chat(request: Request, input: OllamaChatCompletionRequest): class OllamaModel (line 227) | class OllamaModel(BaseModel): function tags (line 235) | async def tags(): class OllamaModelInfo (line 240) | class OllamaModelInfo(BaseModel): class OllamaShowRequest (line 244) | class OllamaShowRequest(BaseModel): class OllamaShowDetial (line 249) | class OllamaShowDetial(BaseModel): class OllamaShowResponse (line 257) | class OllamaShowResponse(BaseModel): class Config (line 264) | class Config: function show (line 268) | async def show(request: Request, input: OllamaShowRequest): FILE: archive/ktransformers/server/api/openai/__init__.py function post_db_creation_operations (line 14) | def post_db_creation_operations(): FILE: archive/ktransformers/server/api/openai/assistants/assistants.py function create_assistant (line 19) | async def create_assistant( function list_assistants (line 26) | async def list_assistants( function list_assistants_with_status (line 38) | async def list_assistants_with_status( function retrieve_assistant (line 48) | async def retrieve_assistant( function modify_assistant (line 55) | async def modify_assistant( function delete_assistant (line 63) | async def delete_assistant(assistant_id: str): function get_related_thread (line 69) | async def get_related_thread(assistant_id: ObjectID): function create_default_assistant (line 74) | def create_default_assistant(): function test_create_assistant (line 90) | def test_create_assistant(): FILE: archive/ktransformers/server/api/openai/assistants/messages.py function create_message (line 16) | async def create_message(thread_id: str, msg: MessageCreate): function list_messages (line 26) | async def list_messages( function retrieve_message (line 38) | async def retrieve_message(thread_id: ObjectID, message_id: ObjectID): function modify_message (line 43) | async def modify_message(thread_id: ObjectID, message_id: ObjectID, msg:... function delete_message (line 49) | async def delete_message(thread_id: ObjectID, message_id: ObjectID): FILE: archive/ktransformers/server/api/openai/assistants/runs.py function create_run (line 20) | async def create_run(request: Request, thread_id: str, run_create: RunCr... function create_thread_and_run (line 40) | async def create_thread_and_run(run_thread: RunThreadCreate): function list_runs (line 45) | async def list_runs( function retrieve_run (line 56) | async def retrieve_run( function modify_run (line 67) | async def modify_run( function submit_tool_outputs_to_run (line 76) | async def submit_tool_outputs_to_run(thread_id: str, run_id: str, submit... function cancel_run (line 81) | async def cancel_run(thread_id: str, run_id: str): FILE: archive/ktransformers/server/api/openai/assistants/threads.py function create_thread (line 14) | async def create_thread(thread: ThreadCreate): function list_threads (line 19) | async def list_threads(limit: Optional[int] = 20, order: Order = Order.D... function retrieve_thread (line 24) | async def retrieve_thread(thread_id: ObjectID): function modify_thread (line 29) | async def modify_thread(thread_id: ObjectID, thread: ThreadModify): function delete_thread (line 34) | async def delete_thread(thread_id: ObjectID): FILE: archive/ktransformers/server/api/openai/endpoints/chat.py class Choice (line 22) | class Choice(BaseModel): class ChatCompletion (line 30) | class ChatCompletion(BaseModel): class ChatCompletionMessageToolCallFunction (line 41) | class ChatCompletionMessageToolCallFunction(BaseModel): class ChatCompletionMessageToolCall (line 45) | class ChatCompletionMessageToolCall(BaseModel): class ChatCompletionMessage (line 50) | class ChatCompletionMessage(BaseModel): function list_models (line 58) | async def list_models(): function getTools (line 61) | def getTools(buffer): function get_tool_instructions (line 117) | def get_tool_instructions(): function chat_completion (line 136) | async def chat_completion(request: Request, create: ChatCompletionCreate): FILE: archive/ktransformers/server/api/openai/legacy/completions.py function create_completion (line 15) | async def create_completion(request:Request, create:CompletionCreate): FILE: archive/ktransformers/server/api/web/system.py function system_info (line 8) | def system_info(): FILE: archive/ktransformers/server/args.py class ArgumentParser (line 10) | class ArgumentParser: method __init__ (line 11) | def __init__(self, cfg): method parse_args (line 14) | def parse_args(self): FILE: archive/ktransformers/server/backend/args.py class ConfigArgs (line 6) | class ConfigArgs(BaseModel): class Config (line 15) | class Config: FILE: archive/ktransformers/server/backend/base.py class BackendInterfaceBase (line 27) | class BackendInterfaceBase: method __init__ (line 36) | def __init__(self, args:ConfigArgs = default_args): method inference (line 40) | async def inference(self,local_messages,request_unique_id:Optional[str... method report_last_time_performance (line 57) | def report_last_time_performance(self): class ThreadContext (line 70) | class ThreadContext: method __init__ (line 89) | def __init__(self, run: RunObject,interface:BackendInterfaceBase, args... method get_local_messages (line 102) | def get_local_messages(self): method update_by_run (line 109) | def update_by_run(self,run:RunObject,args:ConfigArgs = default_args): method put_user_message (line 113) | def put_user_message(self, message: MessageObject): method delete_user_message (line 119) | def delete_user_message(self,message_id: ObjectID): method work (line 122) | async def work(self)->AsyncIterator: FILE: archive/ktransformers/server/backend/context_manager.py class ThreadContextManager (line 17) | class ThreadContextManager: method __init__ (line 22) | def __init__(self,interface) -> None: method get_context_by_run_object (line 29) | async def get_context_by_run_object(self, run: RunObject) -> ThreadCon... method get_context_by_thread_id (line 57) | async def get_context_by_thread_id(self, thread_id: ObjectID) -> Optio... FILE: archive/ktransformers/server/backend/interfaces/balance_serve.py function chat_stream (line 102) | async def chat_stream(queue: asyncio.Queue, tokenizer: AutoTokenizer): function fill_generated_tokens (line 122) | def fill_generated_tokens(query_updates: list[sched_ext.QueryUpdate], ge... function report_last_time_performance (line 132) | def report_last_time_performance(profiler: Profiler): class Engine (line 144) | class Engine: method __init__ (line 152) | def __init__(self, args: ConfigArgs = default_args, generated_token_qu... method sampling (line 300) | def sampling(self, forward_output: ForwardBatchOutput): method loop (line 323) | def loop(self): class BalanceServeThreadContext (line 383) | class BalanceServeThreadContext(ThreadContext): method get_local_messages (line 384) | def get_local_messages(self): function init_distributed (line 392) | def init_distributed(rank: int, function run_engine (line 408) | def run_engine(args, token_queue, broadcast_endpoint, event, kvcache_eve... class BalanceServeInterface (line 427) | class BalanceServeInterface(BackendInterfaceBase): method __init__ (line 443) | def __init__(self, args: ConfigArgs = default_args, input_args=None): method get_params (line 529) | def get_params(self, temperature: Optional[float] = None, top_p: Optio... method run_queue_proxy (line 550) | def run_queue_proxy(self): method lifespan (line 556) | async def lifespan(self, app: FastAPI): method queue_proxy (line 560) | async def queue_proxy(self): method tokenize_prompt (line 577) | def tokenize_prompt(self, prompt: str): method format_and_tokenize_input_ids (line 581) | def format_and_tokenize_input_ids(self, thread_id: ObjectID, messages:... method inference (line 601) | async def inference(self, local_messages, thread_id: str, temperature:... FILE: archive/ktransformers/server/backend/interfaces/exllamav2.py class ExllamaThreadContext (line 14) | class ExllamaThreadContext(ThreadContext): method __init__ (line 15) | def __init__(self, run: RunObject, args: ConfigArgs = default_args) ->... method get_interface (line 18) | def get_interface(self): method get_local_messages (line 21) | def get_local_messages(self): class ExllamaInterface (line 27) | class ExllamaInterface(BackendInterfaceBase): method __init__ (line 29) | def __init__(self, args: ConfigArgs = ...): method tokenize_prompt (line 32) | def tokenize_prompt(self, prompt: str) -> torch.Tensor: method inference (line 35) | async def inference(self,local_messages,request_unique_id:Optional[str... FILE: archive/ktransformers/server/backend/interfaces/ktransformers.py class KTransformersThreadContext (line 52) | class KTransformersThreadContext(TransformersThreadContext): class KTransformersInterface (line 56) | class KTransformersInterface(TransformersInterface): method __init__ (line 57) | def __init__(self, args: ConfigArgs = default_args, input_args=None): method decode_one_tokens (line 130) | def decode_one_tokens(self): method prefill (line 206) | def prefill(self, input_ids: torch.Tensor, is_new: bool, temperature: ... method active_cache_position (line 353) | def active_cache_position(self): method sampling (line 357) | def sampling(self, logits, do_sample): method verify_by_tokenid (line 377) | def verify_by_tokenid(self, main_token: int, draft_token: int): method verify_speculative_decoding (line 380) | def verify_speculative_decoding(self, main_prob: torch.Tensor, draft_p... method logits_to_token (line 397) | def logits_to_token(self, logits: torch.Tensor): method inference (line 410) | async def inference(self, local_messages, thread_id: str, temperature:... method sync_inference (line 424) | def sync_inference(self, local_messages, thread_id: str, temperature: ... FILE: archive/ktransformers/server/backend/interfaces/transformers.py class TextStreamer (line 47) | class TextStreamer: method __init__ (line 49) | def __init__(self, tokenizer: "AutoTokenizer", skip_prompt: bool = Fal... method reset (line 59) | def reset(self): method put (line 63) | def put(self, value) -> Optional[str]: method end (line 93) | def end(self) -> Optional[str]: method _is_chinese_char (line 106) | def _is_chinese_char(self, cp): class TransformersThreadContext (line 131) | class TransformersThreadContext(ThreadContext): method get_local_messages (line 132) | def get_local_messages(self): class TransformersInterface (line 140) | class TransformersInterface(BackendInterfaceBase): method __init__ (line 156) | def __init__(self, args: ConfigArgs = default_args): method current_ids (line 175) | def current_ids(self): method active_cache_position (line 179) | def active_cache_position(self): method tokenize_prompt (line 182) | def tokenize_prompt(self, prompt: str): method format_and_tokenize_input_ids (line 186) | def format_and_tokenize_input_ids(self, thread_id: ObjectID, messages:... method append_new_tokens (line 224) | def append_new_tokens(self, new_tokens: int) -> Optional[str]: method tf_logits_warper (line 231) | def tf_logits_warper(generation_config): method prepare_logits_wrapper (line 282) | def prepare_logits_wrapper(self, inputs, device, temperature: Optional... method logits_to_token (line 301) | def logits_to_token(self, logits: torch.Tensor): method decode_one_tokens (line 316) | def decode_one_tokens(self): method prefill (line 332) | def prefill(self, input_ids: torch.Tensor, is_new: bool, temperature: ... method generate (line 409) | def generate(self): method check_is_new (line 445) | def check_is_new(self, thread_id: str): method inference (line 458) | async def inference(self, local_messages, thread_id: str, temperature:... FILE: archive/ktransformers/server/balance_serve/inference/config.py class ModelConfig (line 21) | class ModelConfig: method __init__ (line 58) | def __init__(self, config): method load_config (line 72) | def load_config(self): class ParallelConfig (line 90) | class ParallelConfig: method __init__ (line 91) | def __init__( class AttnConfig (line 100) | class AttnConfig: method __init__ (line 106) | def __init__(self, config): class SamplerConfig (line 113) | class SamplerConfig(): method __init__ (line 118) | def __init__(self, config): function load_yaml_config (line 123) | def load_yaml_config(file_path): class LLMConfig (line 130) | class LLMConfig: method __init__ (line 137) | def __init__(self, config_file): FILE: archive/ktransformers/server/balance_serve/inference/distributed/communication_op.py function tensor_model_parallel_all_reduce (line 15) | def tensor_model_parallel_all_reduce(input_: torch.Tensor, bsz_tensor: t... function tensor_model_parallel_all_gather (line 20) | def tensor_model_parallel_all_gather( function tensor_model_parallel_gather (line 27) | def tensor_model_parallel_gather( function broadcast_tensor_dict (line 34) | def broadcast_tensor_dict( FILE: archive/ktransformers/server/balance_serve/inference/distributed/cuda_wrapper.py class cudaIpcMemHandle_t (line 21) | class cudaIpcMemHandle_t(ctypes.Structure): class Function (line 26) | class Function: function find_loaded_library (line 32) | def find_loaded_library(lib_name) -> Optional[str]: class CudaRTLibrary (line 58) | class CudaRTLibrary: method __init__ (line 100) | def __init__(self, so_file: Optional[str] = None): method CUDART_CHECK (line 120) | def CUDART_CHECK(self, result: cudaError_t) -> None: method cudaGetErrorString (line 125) | def cudaGetErrorString(self, error: cudaError_t) -> str: method cudaSetDevice (line 128) | def cudaSetDevice(self, device: int) -> None: method cudaDeviceSynchronize (line 131) | def cudaDeviceSynchronize(self) -> None: method cudaDeviceReset (line 134) | def cudaDeviceReset(self) -> None: method cudaMalloc (line 137) | def cudaMalloc(self, size: int) -> ctypes.c_void_p: method cudaFree (line 142) | def cudaFree(self, devPtr: ctypes.c_void_p) -> None: method cudaMemset (line 145) | def cudaMemset(self, devPtr: ctypes.c_void_p, value: int, method cudaMemcpy (line 149) | def cudaMemcpy(self, dst: ctypes.c_void_p, src: ctypes.c_void_p, method cudaIpcGetMemHandle (line 155) | def cudaIpcGetMemHandle(self, method cudaIpcOpenMemHandle (line 162) | def cudaIpcOpenMemHandle(self, FILE: archive/ktransformers/server/balance_serve/inference/distributed/custom_all_reduce.py function _can_p2p (line 25) | def _can_p2p(rank: int, world_size: int) -> bool: function is_weak_contiguous (line 37) | def is_weak_contiguous(inp: torch.Tensor): class CustomAllreduce (line 44) | class CustomAllreduce: method __init__ (line 49) | def __init__( method create_shared_buffer (line 179) | def create_shared_buffer( method free_shared_buffer (line 204) | def free_shared_buffer( method capture (line 212) | def capture(self): method register_graph_buffers (line 226) | def register_graph_buffers(self): method should_custom_ar (line 244) | def should_custom_ar(self, inp: torch.Tensor): method all_reduce (line 259) | def all_reduce( method custom_all_reduce (line 284) | def custom_all_reduce(self, input: torch.Tensor, bsz_tensor: torch.Ten... method close (line 302) | def close(self): method __del__ (line 309) | def __del__(self): FILE: archive/ktransformers/server/balance_serve/inference/distributed/custom_all_reduce_utils.py function producer (line 19) | def producer( function consumer (line 53) | def consumer( function can_actually_p2p (line 94) | def can_actually_p2p( function gpu_p2p_access_check (line 194) | def gpu_p2p_access_check(src: int, tgt: int) -> bool: FILE: archive/ktransformers/server/balance_serve/inference/distributed/parallel_state.py class GraphCaptureContext (line 43) | class GraphCaptureContext: function _split_tensor_dict (line 50) | def _split_tensor_dict( function _get_unique_name (line 79) | def _get_unique_name(name: str) -> str: function _register_group (line 95) | def _register_group(group: "GroupCoordinator") -> None: function inplace_all_reduce (line 101) | def inplace_all_reduce(tensor: torch.Tensor, group_name: str) -> None: function inplace_all_reduce_fake (line 108) | def inplace_all_reduce_fake(tensor: torch.Tensor, group_name: str) -> None: function outplace_all_reduce (line 118) | def outplace_all_reduce(tensor: torch.Tensor, group_name: str, bsz_tenso... function outplace_all_reduce_fake (line 125) | def outplace_all_reduce_fake(tensor: torch.Tensor, group_name: str, bsz_... class GroupCoordinator (line 136) | class GroupCoordinator: method __init__ (line 169) | def __init__( method first_rank (line 271) | def first_rank(self): method last_rank (line 276) | def last_rank(self): method is_first_rank (line 281) | def is_first_rank(self): method is_last_rank (line 286) | def is_last_rank(self): method next_rank (line 291) | def next_rank(self): method prev_rank (line 298) | def prev_rank(self): method graph_capture (line 305) | def graph_capture( method all_reduce (line 352) | def all_reduce(self, input_: torch.Tensor, bsz_tensor: torch.Tensor, i... method _all_reduce_out_place (line 406) | def _all_reduce_out_place(self, input_: torch.Tensor, bsz_tensor: torc... method _all_reduce_in_place (line 414) | def _all_reduce_in_place(self, input_: torch.Tensor) -> None: method all_gather (line 421) | def all_gather(self, input_: torch.Tensor, dim: int = -1) -> torch.Ten... method gather (line 464) | def gather( method broadcast (line 499) | def broadcast(self, input_: torch.Tensor, src: int = 0): method broadcast_object (line 514) | def broadcast_object(self, obj: Optional[Any] = None, src: int = 0): method broadcast_object_list (line 538) | def broadcast_object_list( method send_object (line 555) | def send_object(self, obj: Any, dst: int) -> None: method recv_object (line 582) | def recv_object(self, src: int) -> Any: method broadcast_tensor_dict (line 618) | def broadcast_tensor_dict( method send_tensor_dict (line 700) | def send_tensor_dict( method recv_tensor_dict (line 753) | def recv_tensor_dict( method barrier (line 815) | def barrier(self): method send (line 824) | def send(self, tensor: torch.Tensor, dst: Optional[int] = None) -> None: method recv (line 836) | def recv( method destroy (line 852) | def destroy(self): function get_world_group (line 870) | def get_world_group() -> GroupCoordinator: function init_world_group (line 875) | def init_world_group( function init_model_parallel_group (line 891) | def init_model_parallel_group( function get_tp_group (line 918) | def get_tp_group() -> GroupCoordinator: function get_pp_group (line 929) | def get_pp_group() -> GroupCoordinator: function graph_capture (line 939) | def graph_capture(): function set_custom_all_reduce (line 962) | def set_custom_all_reduce(enable: bool): function init_distributed_environment (line 967) | def init_distributed_environment( function initialize_model_parallel (line 1014) | def initialize_model_parallel( function ensure_model_parallel_initialized (line 1091) | def ensure_model_parallel_initialized( function model_parallel_is_initialized (line 1120) | def model_parallel_is_initialized(): function patch_tensor_parallel_group (line 1129) | def patch_tensor_parallel_group(tp_group: GroupCoordinator): function get_tensor_model_parallel_world_size (line 1153) | def get_tensor_model_parallel_world_size(): function get_tensor_model_parallel_rank (line 1158) | def get_tensor_model_parallel_rank(): function destroy_model_parallel (line 1163) | def destroy_model_parallel(): function destroy_distributed_environment (line 1176) | def destroy_distributed_environment(): function cleanup_dist_env_and_memory (line 1185) | def cleanup_dist_env_and_memory(shutdown_ray: bool = False): function in_the_same_node_as (line 1199) | def in_the_same_node_as(pg: ProcessGroup, source_rank: int = 0) -> List[... FILE: archive/ktransformers/server/balance_serve/inference/distributed/pynccl.py class PyNcclCommunicator (line 21) | class PyNcclCommunicator: method __init__ (line 23) | def __init__( method all_reduce (line 119) | def all_reduce( method send (line 143) | def send(self, tensor: torch.Tensor, dst: int, stream=None): method recv (line 161) | def recv(self, tensor: torch.Tensor, src: int, stream=None): method change_state (line 180) | def change_state( FILE: archive/ktransformers/server/balance_serve/inference/distributed/pynccl_wrapper.py class ncclUniqueId (line 41) | class ncclUniqueId(ctypes.Structure): class ncclDataTypeEnum (line 51) | class ncclDataTypeEnum: method from_torch (line 70) | def from_torch(cls, dtype: torch.dtype) -> int: class ncclRedOpTypeEnum (line 93) | class ncclRedOpTypeEnum: method from_torch (line 102) | def from_torch(cls, op: ReduceOp) -> int: class Function (line 117) | class Function: class NCCLLibrary (line 123) | class NCCLLibrary: method __init__ (line 184) | def __init__(self, so_file: Optional[str] = None): method ncclGetErrorString (line 215) | def ncclGetErrorString(self, result: ncclResult_t) -> str: method NCCL_CHECK (line 218) | def NCCL_CHECK(self, result: ncclResult_t) -> None: method ncclGetVersion (line 223) | def ncclGetVersion(self) -> str: method ncclGetUniqueId (line 233) | def ncclGetUniqueId(self) -> ncclUniqueId: method ncclCommInitRank (line 239) | def ncclCommInitRank(self, world_size: int, unique_id: ncclUniqueId, method ncclAllReduce (line 247) | def ncclAllReduce(self, sendbuff: buffer_type, recvbuff: buffer_type, method ncclSend (line 259) | def ncclSend(self, sendbuff: buffer_type, count: int, datatype: int, method ncclRecv (line 264) | def ncclRecv(self, recvbuff: buffer_type, count: int, datatype: int, method ncclCommDestroy (line 269) | def ncclCommDestroy(self, comm: ncclComm_t) -> None: FILE: archive/ktransformers/server/balance_serve/inference/distributed/utils.py function ensure_divisibility (line 17) | def ensure_divisibility(numerator, denominator): function divide (line 24) | def divide(numerator, denominator): function split_tensor_along_last_dim (line 31) | def split_tensor_along_last_dim( function get_pp_indices (line 59) | def get_pp_indices( class StatelessProcessGroup (line 92) | class StatelessProcessGroup: method __post_init__ (line 113) | def __post_init__(self): method send_obj (line 119) | def send_obj(self, obj: Any, dst: int): method expire_data (line 127) | def expire_data(self): method recv_obj (line 138) | def recv_obj(self, src: int) -> Any: method broadcast_obj (line 146) | def broadcast_obj(self, obj: Optional[Any], src: int) -> Any: method all_gather_obj (line 164) | def all_gather_obj(self, obj: Any) -> list[Any]: method barrier (line 176) | def barrier(self): method create (line 185) | def create( FILE: archive/ktransformers/server/balance_serve/inference/forward_batch.py class ForwardMiniBatchCombine (line 18) | class ForwardMiniBatchCombine: method __init__ (line 36) | def __init__(self, prefill_querys_info: list[QueryInfo], decode_querys... method fill (line 99) | def fill(self, prefill_querys_info: list[QueryInfo], decode_querys_inf... method __str__ (line 168) | def __str__(self): class ForwardMiniBatchSplit (line 177) | class ForwardMiniBatchSplit: method __init__ (line 202) | def __init__( method fill (line 466) | def fill( method __str__ (line 719) | def __str__(self): class ForwardBatchInput (line 732) | class ForwardBatchInput: method __init__ (line 739) | def __init__(self, batch : sched_ext.BatchQueryTodo = None, query_mana... method gen_max_forward_batch (line 769) | def gen_max_forward_batch( method fill (line 821) | def fill(self, batch : sched_ext.BatchQueryTodo = None, query_manager:... class ForwardBatchOutput (line 845) | class ForwardBatchOutput: method __init__ (line 856) | def __init__(self): method merge (line 867) | def merge(self, new_output): method __str__ (line 877) | def __str__(self): FILE: archive/ktransformers/server/balance_serve/inference/model_runner.py function pad_num_tokens (line 53) | def pad_num_tokens(num_tokens): function deduplicate_and_sort (line 56) | def deduplicate_and_sort(lst): function generate_cuda_graphs (line 58) | def generate_cuda_graphs(chunk_size: int) -> list: class ModelRunner (line 69) | class ModelRunner: method __init__ (line 80) | def __init__(self, model = None, cache = None, device = None, use_cuda... method model_attn_plan (line 135) | def model_attn_plan(self, batch, cuda_graph_idx=0): method warmup (line 151) | def warmup(self): method warmup_npu (line 206) | def warmup_npu(self): method run (line 267) | def run(self, batch: sched_ext.BatchQueryTodo = None, query_manager: Q... method run_split (line 349) | def run_split(self, batch: sched_ext.BatchQueryTodo = None, query_mana... method replay (line 465) | def replay(self, cuda_graph_idx=-1): method sync (line 478) | def sync(self, calc_time = True): function get_or_create_model_runner (line 484) | def get_or_create_model_runner(model=None, cache=None, device=None, use_... FILE: archive/ktransformers/server/balance_serve/inference/query_manager.py class QueryInfo (line 13) | class QueryInfo: method __init__ (line 32) | def __init__(self, id, query_length: int, max_length: int, page_size: ... method check_stop (line 58) | def check_stop(self): method print (line 93) | def print(self): class QueryManager (line 101) | class QueryManager: method __init__ (line 108) | def __init__(self, max_length = 65536, page_size = 256, device = torch... method print (line 114) | def print(self, hint: str = ""): method add_query (line 122) | def add_query(self, batch: sched_ext.BatchQueryTodo): method update (line 148) | def update(self, batch: sched_ext.BatchQueryTodo) -> list[sched_ext.Qu... FILE: archive/ktransformers/server/balance_serve/inference/sampling/penaltylib/orchestrator.py class _ReqLike (line 9) | class _ReqLike: class _BatchLike (line 14) | class _BatchLike: method batch_size (line 17) | def batch_size(self): class BatchedPenalizerOrchestrator (line 21) | class BatchedPenalizerOrchestrator: method __init__ (line 27) | def __init__( method reqs (line 51) | def reqs(self): method batch_size (line 54) | def batch_size(self): method cumulate_input_tokens (line 57) | def cumulate_input_tokens( method cumulate_output_tokens (line 74) | def cumulate_output_tokens( method apply (line 94) | def apply(self, logits: torch.Tensor) -> torch.Tensor: method filter (line 113) | def filter( method merge (line 149) | def merge(self, their: "BatchedPenalizerOrchestrator"): class _TokenIDs (line 171) | class _TokenIDs: method __init__ (line 185) | def __init__( method occurrence_count (line 204) | def occurrence_count(self) -> torch.Tensor: class _BatchedPenalizer (line 244) | class _BatchedPenalizer(abc.ABC): method __init__ (line 252) | def __init__(self, orchestrator: BatchedPenalizerOrchestrator): method is_prepared (line 255) | def is_prepared(self) -> bool: method is_required (line 258) | def is_required(self) -> bool: method prepare (line 261) | def prepare(self): method prepare_if_required (line 266) | def prepare_if_required(self): method teardown (line 273) | def teardown(self): method cumulate_input_tokens (line 278) | def cumulate_input_tokens(self, input_ids: _TokenIDs): method cumulate_output_tokens (line 284) | def cumulate_output_tokens(self, output_ids: _TokenIDs): method apply (line 290) | def apply(self, logits: torch.Tensor) -> torch.Tensor: method filter (line 296) | def filter( method merge (line 307) | def merge(self, their: "_BatchedPenalizer"): method _is_required (line 316) | def _is_required(self) -> bool: method _prepare (line 323) | def _prepare(self): method _teardown (line 331) | def _teardown(self): method _cumulate_input_tokens (line 339) | def _cumulate_input_tokens(self, input_ids: _TokenIDs): method _cumulate_output_tokens (line 347) | def _cumulate_output_tokens(self, output_ids: _TokenIDs): method _apply (line 355) | def _apply(self, logits: torch.Tensor) -> torch.Tensor: method _filter (line 363) | def _filter( method _merge (line 372) | def _merge(self, their: "_BatchedPenalizer"): FILE: archive/ktransformers/server/balance_serve/inference/sampling/penaltylib/penalizers/frequency_penalty.py class BatchedFrequencyPenalizer (line 8) | class BatchedFrequencyPenalizer(_BatchedPenalizer): method _is_required (line 16) | def _is_required(self) -> bool: method _prepare (line 22) | def _prepare(self): method _teardown (line 46) | def _teardown(self): method _cumulate_input_tokens (line 53) | def _cumulate_input_tokens(self, input_ids: _TokenIDs): method _cumulate_output_tokens (line 56) | def _cumulate_output_tokens(self, output_ids: _TokenIDs): method _apply (line 61) | def _apply(self, logits: torch.Tensor) -> torch.Tensor: method _filter (line 65) | def _filter( method _merge (line 73) | def _merge(self, their: "BatchedFrequencyPenalizer"): FILE: archive/ktransformers/server/balance_serve/inference/sampling/penaltylib/penalizers/min_new_tokens.py class BatchedMinNewTokensPenalizer (line 8) | class BatchedMinNewTokensPenalizer(_BatchedPenalizer): method _is_required (line 17) | def _is_required(self) -> bool: method _prepare (line 22) | def _prepare(self): method _teardown (line 72) | def _teardown(self): method _cumulate_input_tokens (line 81) | def _cumulate_input_tokens(self, input_ids: _TokenIDs): method _cumulate_output_tokens (line 84) | def _cumulate_output_tokens(self, output_ids: _TokenIDs): method _apply (line 87) | def _apply(self, logits: torch.Tensor) -> torch.Tensor: method _filter (line 92) | def _filter( method _merge (line 99) | def _merge(self, their: "BatchedMinNewTokensPenalizer"): FILE: archive/ktransformers/server/balance_serve/inference/sampling/penaltylib/penalizers/presence_penalty.py class BatchedPresencePenalizer (line 8) | class BatchedPresencePenalizer(_BatchedPenalizer): method _is_required (line 16) | def _is_required(self) -> bool: method _prepare (line 22) | def _prepare(self): method _teardown (line 46) | def _teardown(self): method _cumulate_input_tokens (line 53) | def _cumulate_input_tokens(self, input_ids: _TokenIDs): method _cumulate_output_tokens (line 56) | def _cumulate_output_tokens(self, output_ids: _TokenIDs): method _apply (line 60) | def _apply(self, logits: torch.Tensor) -> torch.Tensor: method _filter (line 64) | def _filter( method _merge (line 72) | def _merge(self, their: "BatchedPresencePenalizer"): FILE: archive/ktransformers/server/balance_serve/inference/sampling/penaltylib/penalizers/repetition_penalty.py class BatchedRepetitionPenalizer (line 8) | class BatchedRepetitionPenalizer(_BatchedPenalizer): method _is_required (line 16) | def _is_required(self) -> bool: method _prepare (line 22) | def _prepare(self): method _teardown (line 46) | def _teardown(self): method _cumulate_input_tokens (line 53) | def _cumulate_input_tokens(self, input_ids: _TokenIDs): method _cumulate_output_tokens (line 57) | def _cumulate_output_tokens(self, output_ids: _TokenIDs): method _apply (line 61) | def _apply(self, logits: torch.Tensor) -> torch.Tensor: method _filter (line 68) | def _filter( method _merge (line 76) | def _merge(self, their: "BatchedRepetitionPenalizer"): FILE: archive/ktransformers/server/balance_serve/inference/sampling/sampler.py class SamplingOptions (line 25) | class SamplingOptions(): method __init__ (line 38) | def __init__(self, bsz = 1, device = torch.device('cuda'), pretrained_... class Sampler (line 59) | class Sampler(nn.Module): method __init__ (line 60) | def __init__(self): method forward (line 63) | def forward( FILE: archive/ktransformers/server/balance_serve/sched_rpc.py class SchedulerServer (line 31) | class SchedulerServer: method __init__ (line 32) | def __init__(self, settings, main_args): method run_scheduler (line 50) | def run_scheduler(self): method stop_scheduler (line 54) | def stop_scheduler(self): method start_proxy (line 58) | def start_proxy(self): method worker_routine (line 63) | def worker_routine(self): method start_rpc_service (line 129) | def start_rpc_service(self): method stop_rpc_service (line 148) | def stop_rpc_service(self): function start_server (line 154) | def start_server(settings, main_args): class SchedulerClient (line 160) | class SchedulerClient: method __init__ (line 161) | def __init__(self, sched_port): method __del__ (line 169) | def __del__(self): method send_request (line 173) | def send_request(self, method, params=None): method add_query (line 190) | def add_query(self, query): method cancel_query (line 194) | def cancel_query(self, query_id): method update_last_batch (line 197) | def update_last_batch(self, updates): method rebuild_inferece_context (line 202) | def rebuild_inferece_context(self,response): method get_inference_context_raw (line 210) | def get_inference_context_raw(self): FILE: archive/ktransformers/server/balance_serve/settings.py function create_sched_settings (line 19) | def create_sched_settings(args): function create_sched_settings_qwen2moe (line 71) | def create_sched_settings_qwen2moe(args): function create_sched_settings_qwen3moe (line 125) | def create_sched_settings_qwen3moe(args): function create_sched_settings_glm4moe (line 177) | def create_sched_settings_glm4moe(args): function create_sched_settings_smallthinker (line 229) | def create_sched_settings_smallthinker(args): function create_sched_settings_qwen3next (line 281) | def create_sched_settings_qwen3next(args): FILE: archive/ktransformers/server/config/config.py class Config (line 20) | class Config(metaclass=Singleton): method load (line 26) | def load() -> dict: method to_path (line 53) | def to_path(path: str) -> str: method __init__ (line 61) | def __init__(self): FILE: archive/ktransformers/server/config/log.py class DailyRotatingFileHandler (line 25) | class DailyRotatingFileHandler(BaseRotatingHandler): method __init__ (line 32) | def __init__(self, filename, backupCount=0, encoding=None, delay=False... method shouldRollover (line 46) | def shouldRollover(self, record): method doRollover (line 59) | def doRollover(self): method _compute_fn (line 78) | def _compute_fn(self): method _open (line 84) | def _open(self): method delete_expired_files (line 106) | def delete_expired_files(self): class Logger (line 132) | class Logger(object): method __init__ (line 144) | def __init__(self, level: str = 'info'): FILE: archive/ktransformers/server/config/singleton.py class Singleton (line 13) | class Singleton(abc.ABCMeta, type): method __call__ (line 24) | def __call__(cls, *args, **kwds): class AbstractSingleton (line 29) | class AbstractSingleton(abc.ABC, metaclass=Singleton): FILE: archive/ktransformers/server/crud/assistants/assistants.py class AssistantDatabaseManager (line 12) | class AssistantDatabaseManager: method __init__ (line 13) | def __init__(self) -> None: method create_assistant_object (line 16) | def create_assistant_object(self, assistant: AssistantCreate) -> Assis... method db_count_assistants (line 25) | def db_count_assistants(self) -> int: method db_create_assistant (line 29) | def db_create_assistant(self, assistant: AssistantCreate): method db_list_assistants (line 34) | def db_list_assistants(self, limit: Optional[int], order: Order) -> Li... method db_get_assistant_by_id (line 44) | def db_get_assistant_by_id(self, assistant_id: str) -> Optional[Assist... method db_update_assistant_by_id (line 53) | def db_update_assistant_by_id(self, assistant_id: str, assistant: Assi... method db_delete_assistant_by_id (line 60) | def db_delete_assistant_by_id(self, assistant_id: str): FILE: archive/ktransformers/server/crud/assistants/messages.py class MessageDatabaseManager (line 10) | class MessageDatabaseManager: method __init__ (line 11) | def __init__(self) -> None: method create_db_message_by_core (line 15) | def create_db_message_by_core(message: MessageCore): method create_db_message (line 19) | def create_db_message(self, message: MessageCreate): method db_add_message (line 22) | def db_add_message(self, message: Message): method db_create_message (line 27) | def db_create_message(self, thread_id: str, message: MessageCreate, st... method create_message_object (line 35) | def create_message_object(thread_id: ObjectID, run_id: ObjectID, messa... method db_sync_message (line 47) | def db_sync_message(self, message: MessageObject): method db_list_messages_of_thread (line 54) | def db_list_messages_of_thread( method db_get_message_by_id (line 72) | def db_get_message_by_id(self, thread_id: ObjectID, message_id: Object... method db_delete_message_by_id (line 80) | def db_delete_message_by_id(self, thread_id: ObjectID, message_id: Obj... FILE: archive/ktransformers/server/crud/assistants/runs.py class RunsDatabaseManager (line 10) | class RunsDatabaseManager: method __init__ (line 11) | def __init__(self) -> None: method create_run_object (line 14) | def create_run_object(self, thread_id: ObjectID, run: RunCreate) -> Ru... method db_create_run (line 26) | def db_create_run(self, thread_id: str, run: RunCreate): method db_sync_run (line 40) | def db_sync_run(self, run: RunObject) -> None: method db_get_run (line 47) | def db_get_run(self, run_id: ObjectID) -> RunObject: FILE: archive/ktransformers/server/crud/assistants/threads.py class ThreadsDatabaseManager (line 15) | class ThreadsDatabaseManager: method __init__ (line 16) | def __init__(self) -> None: method db_create_thread (line 21) | def db_create_thread(self, thread: ThreadCreate): method db_get_thread_by_id (line 54) | def db_get_thread_by_id(self, thread_id: ObjectID): method db_list_threads (line 59) | def db_list_threads(self, limit: Optional[int], order: Order) -> List[... method db_list_threads_preview (line 71) | def db_list_threads_preview(self, limit: Optional[int], order: Order) ... method db_delete_thread_by_id (line 88) | def db_delete_thread_by_id(self, thread_id: ObjectID): FILE: archive/ktransformers/server/exceptions.py function db_exception (line 4) | def db_exception(): function not_implemented (line 11) | def not_implemented(what): function internal_server_error (line 18) | def internal_server_error(what): function request_error (line 22) | def request_error(what): FILE: archive/ktransformers/server/main.py function mount_app_routes (line 29) | def mount_app_routes(mount_app: FastAPI): function create_app (line 37) | def create_app(): function update_web_port (line 57) | def update_web_port(config_file: str): function mount_index_routes (line 69) | def mount_index_routes(app: FastAPI): function run_api (line 83) | def run_api(app, host, port, **kwargs): function custom_openapi (line 96) | def custom_openapi(app): function verify_arg (line 111) | def verify_arg(args): function main (line 127) | def main(): FILE: archive/ktransformers/server/models/assistants/assistants.py class Assistant (line 7) | class Assistant(Base): FILE: archive/ktransformers/server/models/assistants/messages.py class Message (line 7) | class Message(Base): FILE: archive/ktransformers/server/models/assistants/run_steps.py class RunStep (line 7) | class RunStep(Base): FILE: archive/ktransformers/server/models/assistants/runs.py class Run (line 7) | class Run(Base): FILE: archive/ktransformers/server/models/assistants/threads.py class Thread (line 7) | class Thread(Base): FILE: archive/ktransformers/server/schemas/assistants/assistants.py class AssistantBase (line 21) | class AssistantBase(BaseModel): method validate_tools (line 28) | def validate_tools(cls, value): method validate_tool_resources (line 51) | def validate_tool_resources(cls, value): method convert_meta_data (line 70) | def convert_meta_data(cls, values): class AssistantCreate (line 79) | class AssistantCreate(AssistantBase): class AssistantBuildStatus (line 83) | class AssistantBuildStatus(BaseModel): class Status (line 84) | class Status(Enum): method to_stream_reply (line 112) | def to_stream_reply(self) -> str: class AssistantObject (line 116) | class AssistantObject(AssistantBase, ObjectWithCreatedTime): method as_api_response (line 123) | def as_api_response(self): method get_related_threads_ids (line 126) | def get_related_threads_ids(self) -> List[ObjectID]: method get_related_threads_objects (line 133) | def get_related_threads_objects(self) -> List: method append_related_threads (line 145) | def append_related_threads(self, thread_ids: List[ObjectID]): method update_build_status (line 156) | async def update_build_status(self, events: AsyncIterable) -> AsyncIte... method get_build_status (line 178) | def get_build_status(self) -> AssistantBuildStatus: method sync_db (line 182) | def sync_db(self)->None: method get_encoded_instruction (line 191) | def get_encoded_instruction(self,encode_fn:Callable)->torch.Tensor: class AssistantModify (line 198) | class AssistantModify(AssistantBase): FILE: archive/ktransformers/server/schemas/assistants/messages.py class IncompleteDetails (line 15) | class IncompleteDetails(BaseModel): class ContentType (line 19) | class ContentType(Enum): class ContentObject (line 25) | class ContentObject(BaseModel): class ImageFile (line 29) | class ImageFile(BaseModel): class ImageFileObject (line 34) | class ImageFileObject(ContentObject): class ImageUrl (line 38) | class ImageUrl(BaseModel): class ImageUrlObject (line 43) | class ImageUrlObject(ContentObject): class Annotation (line 47) | class Annotation(BaseModel): class Text (line 51) | class Text(BaseModel): class TextObject (line 56) | class TextObject(ContentObject): method filter_append (line 62) | def filter_append(self,text:str): class Attachment (line 72) | class Attachment(BaseModel): class Role (line 77) | class Role(Enum): method is_user (line 81) | def is_user(self)->bool: class MessageCore (line 85) | class MessageCore(BaseModel): method convert_meta_data (line 92) | def convert_meta_data(cls,values): class MessageBase (line 98) | class MessageBase(MessageCore): class Status (line 99) | class Status(Enum): class MessageObject (line 116) | class MessageObject(MessageBase, ObjectWithCreatedTime): method get_text_content (line 120) | def get_text_content(self) -> str: method get_encoded_content (line 129) | async def get_encoded_content(self,encode_fn:Callable): method get_attached_files (line 142) | def get_attached_files(self): method append_message_delta (line 147) | def append_message_delta(self,text:str): method sync_db (line 150) | def sync_db(self): method stream_response_with_event (line 160) | def stream_response_with_event(self, event: MessageBase.Status) -> Mes... class MessageStreamResponse (line 169) | class MessageStreamResponse(BaseModel): method to_stream_reply (line 173) | def to_stream_reply(self): class MessageCreate (line 177) | class MessageCreate(BaseModel): method convert_meta_data (line 184) | def convert_meta_data(cls,values): method to_core (line 189) | def to_core(self) -> MessageCore: class MessageModify (line 206) | class MessageModify(BaseModel): method convert_meta_data (line 210) | def convert_meta_data(cls,values): FILE: archive/ktransformers/server/schemas/assistants/runs.py class ToolCall (line 13) | class ToolCall(BaseModel): class SubmitToolOutputs (line 19) | class SubmitToolOutputs(BaseModel): class RequiredAction (line 23) | class RequiredAction(BaseModel): class LastError (line 28) | class LastError(BaseModel): class IncompleteDetails (line 33) | class IncompleteDetails(BaseModel): class Usage (line 37) | class Usage(BaseModel): class TruncationStrategy (line 43) | class TruncationStrategy(BaseModel): class ToolChoiceType (line 48) | class ToolChoiceType(Enum): class RunBase (line 54) | class RunBase(BaseModel): class Status (line 55) | class Status(Enum): method convert_meta_data (line 84) | def convert_meta_data(cls,values): method set_compute_save (line 89) | def set_compute_save(self,save:int): class RunObject (line 104) | class RunObject(RunBase, ObjectWithCreatedTime): method stream_response_with_event (line 105) | def stream_response_with_event(self,event:RunBase.Status)->RunStreamRe... method sync_db (line 114) | def sync_db(self): method create_message_creation_step (line 123) | def create_message_creation_step(self): class RunStreamResponse (line 127) | class RunStreamResponse(BaseModel): method to_stream_reply (line 130) | def to_stream_reply(self): class RunCreate (line 133) | class RunCreate(BaseModel): method convert_meta_data (line 144) | def convert_meta_data(cls,values): class RunThreadCreate (line 159) | class RunThreadCreate(BaseModel): method convert_meta_data (line 169) | def convert_meta_data(cls,values): class RunModify (line 184) | class RunModify(BaseModel): method convert_meta_data (line 188) | def convert_meta_data(cls,values): class ToolOutput (line 194) | class ToolOutput(BaseModel): class RunSubmit (line 199) | class RunSubmit(BaseModel): FILE: archive/ktransformers/server/schemas/assistants/streaming.py class TextObjectWithIndex (line 15) | class TextObjectWithIndex(TextObject): class ImageFileObjectWithIndex (line 19) | class ImageFileObjectWithIndex(ImageFileObject): class ImageUrlObjectWithIndex (line 23) | class ImageUrlObjectWithIndex(ImageUrlObject): class MessageDeltaImpl (line 31) | class MessageDeltaImpl(BaseModel): class MessageDelta (line 36) | class MessageDelta(Object): method to_stream_reply (line 39) | def to_stream_reply(self): function text_delta (line 43) | def text_delta(index: int, text: str): function append_message_delta (line 47) | def append_message_delta(self: MessageObject, text: str): class RunStepDeltaImpl (line 63) | class RunStepDeltaImpl(BaseModel): class RunStepDelta (line 67) | class RunStepDelta(Object): method to_stream_reply (line 70) | def to_stream_reply(self): class Done (line 74) | class Done(): method to_stream_reply (line 75) | def to_stream_reply(self): function check_client_link (line 79) | async def check_client_link(request: Request, async_events: AsyncIterable): function add_done (line 86) | async def add_done(async_events: AsyncIterable): function to_stream_reply (line 92) | async def to_stream_reply(async_events: AsyncIterable): function filter_api_event (line 100) | async def filter_api_event(async_events: AsyncIterable): function filter_chat_chunk (line 106) | async def filter_chat_chunk(async_events: AsyncIterable): function filter_by_types (line 112) | async def filter_by_types(async_events: AsyncIterable, types: List): function api_stream_response (line 120) | def api_stream_response(request: Request, async_events: AsyncIterable): function chat_stream_response (line 124) | def chat_stream_response(request: Request, async_events: AsyncIterable): function stream_response (line 128) | def stream_response(request: Request, async_events: AsyncIterable): function check_link_response (line 132) | def check_link_response(request: Request, async_events: AsyncIterable): function wrap_async_generator_into_queue (line 136) | def wrap_async_generator_into_queue(async_events: AsyncIterable) -> asyn... function unwrap_async_queue (line 151) | async def unwrap_async_queue(queue: asyncio.Queue) -> AsyncIterable: function unwrap_async_queue_slow (line 163) | async def unwrap_async_queue_slow(queue: asyncio.Queue) -> AsyncIterable: FILE: archive/ktransformers/server/schemas/assistants/threads.py class ThreadBase (line 12) | class ThreadBase(BaseModel): method convert_meta_data (line 16) | def convert_meta_data(cls,values): class ThreadObject (line 24) | class ThreadObject(ThreadBase, ObjectWithCreatedTime): method check_is_related_threads (line 28) | def check_is_related_threads(self)->Self: class StreamEvent (line 34) | class StreamEvent(Enum): method to_stream_reply (line 37) | def to_stream_reply(self,event:StreamEvent): class ThreadCreate (line 41) | class ThreadCreate(ThreadBase): class ThreadModify (line 45) | class ThreadModify(ThreadBase): FILE: archive/ktransformers/server/schemas/assistants/tool.py class ToolType (line 9) | class ToolType(str, Enum): class ToolBase (line 16) | class ToolBase(BaseModel): class CodeInterpreter (line 20) | class CodeInterpreter(ToolBase): class FileSearch (line 24) | class FileSearch(ToolBase): class RelatedThreads (line 28) | class RelatedThreads(ToolBase): class FuntionTool (line 32) | class FuntionTool(ToolBase): class CodeInterpreterResource (line 41) | class CodeInterpreterResource(BaseModel): class FileSearchResource (line 45) | class FileSearchResource(BaseModel): class RelatedThreadsResource (line 50) | class RelatedThreadsResource(BaseModel): FILE: archive/ktransformers/server/schemas/base.py class Object (line 12) | class Object(BaseModel): class ObjectWithCreatedTime (line 20) | class ObjectWithCreatedTime(Object): class Order (line 25) | class Order(str, Enum): method to_sqlalchemy_order (line 29) | def to_sqlalchemy_order(self): class DeleteResponse (line 41) | class DeleteResponse(Object): class OperationResponse (line 44) | class OperationResponse(BaseModel): FILE: archive/ktransformers/server/schemas/conversation.py class ThreadPreview (line 9) | class ThreadPreview(BaseModel): FILE: archive/ktransformers/server/schemas/endpoints/chat.py class CompletionUsage (line 13) | class CompletionUsage(BaseModel): class Role (line 22) | class Role(Enum): class Message (line 29) | class Message(BaseModel): method to_tokenizer_message (line 36) | def to_tokenizer_message(self): class FunctionParameters (line 48) | class FunctionParameters(BaseModel): class FunctionDefinition (line 53) | class FunctionDefinition(BaseModel): class ToolFunction (line 58) | class ToolFunction(BaseModel): class Tool (line 61) | class Tool(BaseModel): class ChatCompletionCreate (line 65) | class ChatCompletionCreate(BaseModel): method get_tokenizer_messages (line 79) | def get_tokenizer_messages(self): class ChatCompletionChunk (line 82) | class ChatCompletionChunk(BaseModel): method to_stream_reply (line 92) | def to_stream_reply(self): class RawUsage (line 95) | class RawUsage(BaseModel): FILE: archive/ktransformers/server/schemas/legacy/completions.py class CompletionCreate (line 7) | class CompletionCreate(BaseModel): method get_tokenizer_messages (line 16) | def get_tokenizer_messages(self): class FinishReason (line 22) | class FinishReason(Enum): class Choice (line 26) | class Choice(BaseModel): class CompletionObject (line 33) | class CompletionObject(Object): method set_token (line 40) | def set_token(self,token:str): method append_token (line 45) | def append_token(self,token:str): method to_stream_reply (line 50) | def to_stream_reply(self): FILE: archive/ktransformers/server/utils/create_interface.py function create_interface (line 19) | def create_interface(config: Config, default_args: ConfigArgs, input_arg... class GlobalContextManager (line 38) | class GlobalContextManager: class GlobalInterface (line 40) | class GlobalInterface: function get_thread_context_manager (line 43) | def get_thread_context_manager() -> GlobalContextManager: function get_interface (line 45) | def get_interface() -> GlobalInterface: FILE: archive/ktransformers/server/utils/multi_timer.py function format_time (line 4) | def format_time(seconds): class Profiler (line 20) | class Profiler: method __init__ (line 21) | def __init__(self): method create_timer (line 25) | def create_timer(self, name): method start_timer (line 32) | def start_timer(self, name): method pause_timer (line 40) | def pause_timer(self, name): method get_timer_sec (line 48) | def get_timer_sec(self, name): method get_all_timers (line 57) | def get_all_timers(self): method report_timer_string (line 63) | def report_timer_string(self, name): method create_and_start_timer (line 66) | def create_and_start_timer(self, name): method inc (line 72) | def inc(self,key:str,delta:int=1): method set_counter (line 75) | def set_counter(self,key:str,to=0): method get_counter (line 78) | def get_counter(self,key:str): FILE: archive/ktransformers/server/utils/serve_profiling.py class ProfStatKey (line 8) | class ProfStatKey(StrEnum): class ProfTimeStat (line 15) | class ProfTimeStat: method __init__ (line 16) | def __init__(self): method record_start_time (line 30) | def record_start_time(self): method add_time_stat (line 34) | def add_time_stat(self, key: ProfStatKey, time_ns, is_prefill): method print_all (line 45) | def print_all(self): method reset_all (line 58) | def reset_all(self): class ProfStatItem (line 65) | class ProfStatItem: method __init__ (line 66) | def __init__(self): method add_item (line 75) | def add_item(self, cost_time_ns): method reset (line 88) | def reset(self): method get_stat (line 94) | def get_stat(self): FILE: archive/ktransformers/server/utils/sql_utils.py class SQLUtil (line 27) | class SQLUtil(metaclass=Singleton): method __init__ (line 34) | def __init__(self) -> None: method get_db (line 40) | def get_db(self): method init_engine (line 53) | def init_engine(cfg: Config): method create_sqllite_url (line 70) | def create_sqllite_url(cfg): method db_add_commit_refresh (line 89) | def db_add_commit_refresh(self, session: Session, what): method db_merge_commit (line 104) | def db_merge_commit(self, session: Session, what): method db_update_commit_refresh (line 115) | def db_update_commit_refresh(self, session: Session, existing, what): FILE: archive/ktransformers/tests/AIME_2024/eval_api.py function generate_text (line 16) | def generate_text(api_url,question , model_name, stream=False, auth_toke... function load_data (line 40) | def load_data(file_path): function get_score (line 54) | def get_score(pred, answer): function run_eval_api (line 74) | def run_eval_api( function main (line 120) | def main(output_path, api_url, model_name, auth_token, format_tabs,probl... FILE: archive/ktransformers/tests/AIME_2024/evaluation.py function filter_answer (line 2) | def filter_answer(completion: str) -> str: FILE: archive/ktransformers/tests/AIME_2024/prompts.py function instruct_prompt (line 1) | def instruct_prompt(prompt: str) -> str: FILE: archive/ktransformers/tests/UT/test_kdeepseek_attention_w8a8a2serve_npu.py class DummyConfig (line 15) | class DummyConfig: method __init__ (line 16) | def __init__(self, hidden_size=4, num_attention_heads=1): class DummyOrigAttn (line 21) | class DummyOrigAttn(nn.Module): method __init__ (line 22) | def __init__(self, config=None, layer_idx=0): class DummyDynamicQuantOps (line 35) | class DummyDynamicQuantOps: method execute (line 36) | def execute(self, inputs): class DummyMatMulOps (line 41) | class DummyMatMulOps: method execute (line 42) | def execute(self, inputs): class DummyQuantProj (line 47) | class DummyQuantProj(nn.Module): method __init__ (line 48) | def __init__(self, dim): class DummyStaticCache (line 57) | class DummyStaticCache: method __init__ (line 58) | def __init__(self, page_size=16): method get_usable_length (line 61) | def get_usable_length(self, kv_seq_len, layer_idx): method update (line 64) | def update(self, combined, layer_idx, cache_kwargs): class DummyNpuFusedAttention (line 68) | class DummyNpuFusedAttention: method __call__ (line 69) | def __call__(self, q, k, v, **kwargs): method out (line 77) | def out(self, q, k, v, workspace=None, class DummyOpsNpu (line 92) | class DummyOpsNpu: method npu_fused_infer_attention_score (line 93) | def npu_fused_infer_attention_score(self, q, k, v, **kwargs): function fake_apply_rotary_pos_emb_fusion (line 101) | def fake_apply_rotary_pos_emb_fusion(q_pe, k_pe, cos, sin): function build_attention_module (line 104) | def build_attention_module(q_lora_rank=None): function _patch_env (line 175) | def _patch_env(monkeypatch): function test_print_callback_smoke (line 225) | def test_print_callback_smoke(): function _common_inputs_prefill (line 241) | def _common_inputs_prefill(): function test_forward_prefill_with_mask (line 261) | def test_forward_prefill_with_mask(): function test_forward_prefill_without_mask_and_q_lora (line 298) | def test_forward_prefill_without_mask_and_q_lora(): function test_forward_decode_paged_path (line 335) | def test_forward_decode_paged_path(): function test_forward_prefill_layer_idx_none_raises (line 378) | def test_forward_prefill_layer_idx_none_raises(): function test_forward_prefill_attn_output_shape_mismatch_raises (line 408) | def test_forward_prefill_attn_output_shape_mismatch_raises(monkeypatch): function test_forward_paged_use_npu_graph (line 452) | def test_forward_paged_use_npu_graph(monkeypatch): FILE: archive/ktransformers/tests/UT/test_kdeepseek_ln_npu.py class DummyOrigModule (line 16) | class DummyOrigModule(nn.Module): method __init__ (line 17) | def __init__(self, hidden_size=4, variance_epsilon=1e-5): class DummySafeTensorLoader (line 23) | class DummySafeTensorLoader: method __init__ (line 24) | def __init__(self): method load_tensor (line 28) | def load_tensor(self, name: str): class DummyGGUFLoader (line 33) | class DummyGGUFLoader: method __init__ (line 34) | def __init__(self, safetensor_loader: DummySafeTensorLoader): class DummyConfig (line 38) | class DummyConfig: class FakeRMSNorm (line 42) | class FakeRMSNorm: method __init__ (line 43) | def __init__(self): method __call__ (line 46) | def __call__(self, hidden_states, weight, eps): function build_rms_module (line 53) | def build_rms_module(hidden_size=4, eps=1e-5, safetensor_loader=None): function patch_utils_and_npu (line 70) | def patch_utils_and_npu(monkeypatch): function get_fake_rms (line 81) | def get_fake_rms(): function test_forward_preserves_shape_and_dtype (line 85) | def test_forward_preserves_shape_and_dtype(): function test_forward_with_bfloat16_dtype (line 103) | def test_forward_with_bfloat16_dtype(): function test_forward_uses_bias (line 114) | def test_forward_uses_bias(): function test_load_from_safetensor_loader (line 132) | def test_load_from_safetensor_loader(): function test_unload_sets_weight_and_bias_to_none_idempotent (line 150) | def test_unload_sets_weight_and_bias_to_none_idempotent(): FILE: archive/ktransformers/tests/function_call_test.py function send_messages (line 3) | def send_messages(messages): FILE: archive/ktransformers/tests/humaneval/eval_api.py function generate_text (line 11) | def generate_text(api_url,question , model_name, stream=False, auth_toke... function run_eval_api (line 35) | def run_eval_api( function main (line 81) | def main(output_path, api_url, model_name, auth_token, format_tabs,probl... FILE: archive/ktransformers/tests/humaneval/evaluation.py function filter_code (line 2) | def filter_code(completion: str) -> str: function fix_indents (line 14) | def fix_indents(text: str) -> str: FILE: archive/ktransformers/tests/humaneval/prompts.py function instruct_prompt (line 1) | def instruct_prompt(prompt: str) -> str: function standard_prompt (line 5) | def standard_prompt(prompt: str) -> str: function write_prompt (line 9) | def write_prompt(prompt: str) -> str: function replit_glaive_prompt (line 13) | def replit_glaive_prompt(prompt: str) -> str: FILE: archive/ktransformers/tests/mmlu_pro_test.py class DataEvaluator (line 16) | class DataEvaluator: method __init__ (line 17) | def __init__(self): method load_data (line 21) | def load_data(self, file_path): method get_prompt (line 45) | def get_prompt(self, record): method post_processing (line 56) | def post_processing(self, text): method score (line 65) | def score(self, pred, answers): function generate_text (line 80) | def generate_text(api_url, question, model_name, stream=False): function main (line 105) | def main(concurrent_requests, data_evaluator: DataEvaluator, result_file... FILE: archive/ktransformers/tests/mmlu_test.py class DataEvaluator (line 16) | class DataEvaluator: method __init__ (line 17) | def __init__(self): method load_data (line 21) | def load_data(self, file_path): method get_prompt (line 36) | def get_prompt(self, record): method post_processing (line 47) | def post_processing(self, text): method score (line 56) | def score(self, pred, answers): function generate_text (line 71) | def generate_text(api_url, question, model_name, stream=False): function main (line 96) | def main(concurrent_requests, data_evaluator: DataEvaluator, result_file... FILE: archive/ktransformers/tests/mmlu_test_multi.py function extract_final_answer (line 19) | def extract_final_answer(text): class DataEvaluator (line 62) | class DataEvaluator: method __init__ (line 63) | def __init__(self): method load_data (line 66) | def load_data(self, file_path): method get_prompt (line 77) | def get_prompt(self, record): method post_processing (line 85) | def post_processing(self, text): method score (line 92) | def score(self, pred, answer): function generate_text (line 100) | def generate_text(api_url, question, model_name, stream=False): function main (line 120) | def main(concurrent_requests, data_evaluator: DataEvaluator, result_file... FILE: archive/ktransformers/tests/parse_cover_info.py function main (line 7) | def main(): FILE: archive/ktransformers/tests/score.py function wait_for_server (line 7) | def wait_for_server(base_url: str, timeout: int = None) -> None: function enqueue_output (line 63) | def enqueue_output(out, queue): FILE: archive/ktransformers/tests/test_client.py function fetch_event_stream (line 15) | async def fetch_event_stream(session, payload, request_id, stream): function main (line 77) | async def main(prompt_id, model, stream, max_tokens, temperature, top_p): FILE: archive/ktransformers/tests/test_prefix.py function fetch_message_once (line 18) | async def fetch_message_once(session, request_id, messages, max_tokens, ... function multi_turn_conversation (line 79) | async def multi_turn_conversation(session, request_id, rounds, max_token... function main (line 104) | async def main(concurrent_requests, rounds, max_tokens, model): FILE: archive/ktransformers/tests/test_pytorch_q8.py class LinearModel (line 4) | class LinearModel(torch.nn.Module): method __init__ (line 5) | def __init__(self, in_features, out_features): method forward (line 9) | def forward(self, x): FILE: archive/ktransformers/tests/test_speed.py function fetch_event_stream (line 48) | async def fetch_event_stream(session, request_id, prompt, max_tokens, mo... function main (line 137) | async def main(concurrent_requests , prompt, max_tokens, model): FILE: archive/ktransformers/tests/triton_fp8gemm_test.py function test_fp8_gemm_vs_torch_matmul (line 21) | def test_fp8_gemm_vs_torch_matmul(): function test_fp8_gemm_vs_torch_matmul_load (line 48) | def test_fp8_gemm_vs_torch_matmul_load(): function test_fp8_gemm_tplops (line 71) | def test_fp8_gemm_tplops(): FILE: archive/ktransformers/util/ascend/ascend_utils.py function setup_model_parallel (line 33) | def setup_model_parallel(distributed_timeout_minutes: int = 30, tp: int ... function get_tensor_parallel_size (line 90) | def get_tensor_parallel_size(): function get_tensor_parallel_group (line 95) | def get_tensor_parallel_group(): function get_tensor_parallel_rank (line 100) | def get_tensor_parallel_rank(): function get_data_parallel_size (line 105) | def get_data_parallel_size(): function get_data_parallel_gloo (line 110) | def get_data_parallel_gloo(): function get_data_parallel_group (line 115) | def get_data_parallel_group(): function get_data_parallel_rank (line 120) | def get_data_parallel_rank(): function get_nccl_options (line 126) | def get_nccl_options(pg_name, nccl_comm_cfgs): function get_safetensors_cut_weight (line 137) | def get_safetensors_cut_weight(name: str, weights: torch.Tensor): function get_absort_weight (line 166) | def get_absort_weight(model, config): function allredeuce_warpper (line 198) | def allredeuce_warpper(func): FILE: archive/ktransformers/util/cuda_graph_runner.py class CUDAGraphRunner (line 10) | class CUDAGraphRunner: method __init__ (line 12) | def __init__(self): method capture (line 17) | def capture( method forward (line 63) | def forward( method __call__ (line 83) | def __call__(self, *args, **kwargs): FILE: archive/ktransformers/util/custom_gguf.py class GGMLQuantizationType (line 40) | class GGMLQuantizationType(IntEnum): function quant_shape_to_byte_shape (line 105) | def quant_shape_to_byte_shape(shape: Sequence[int], quant_type: GGMLQuan... function read_value (line 177) | def read_value(f, data_type): function dequantize_q2_k (line 225) | def dequantize_q2_k(data): function dequantize_q2_k_gpu (line 262) | def dequantize_q2_k_gpu(data, device:str ="cuda", target_dtype = torch.g... function dequantize_q3_k (line 272) | def dequantize_q3_k(data): function dequantize_q3_k_gpu (line 314) | def dequantize_q3_k_gpu(data, device:str ="cuda", target_dtype = torch.g... function dequantize_q4_k (line 324) | def dequantize_q4_k(data): function dequantize_q4_k_gpu (line 346) | def dequantize_q4_k_gpu(data, device:str ="cuda", target_dtype = torch.g... function dequantize_q5_k (line 356) | def dequantize_q5_k(data): function dequantize_q5_k_gpu (line 412) | def dequantize_q5_k_gpu(data, device:str ="cuda", target_dtype = torch.g... function dequantize_q6_k (line 422) | def dequantize_q6_k(data): function dequantize_q6_k_gpu (line 471) | def dequantize_q6_k_gpu(data: np.ndarray, device:str = "cuda", target_dt... function dequantize_iq4_xs (line 482) | def dequantize_iq4_xs(data): function dequantize_iq4_xs_gpu (line 512) | def dequantize_iq4_xs_gpu(data: np.ndarray, device:str = "cuda", target_... function dequantize_q4_0 (line 521) | def dequantize_q4_0(data): function dequantize_q4_0_gpu (line 536) | def dequantize_q4_0_gpu(data, device:str = "cuda", target_dtype = torch.... function dequantize_q5_0 (line 539) | def dequantize_q5_0(data): function dequantize_q5_0_gpu (line 560) | def dequantize_q5_0_gpu(data, device:str = "cuda", target_dtype = torch.... function dequantize_q8_0 (line 563) | def dequantize_q8_0(data): function dequantize_q8_0_gpu (line 572) | def dequantize_q8_0_gpu(data, device:str = "cuda", target_dtype = torch.... function dequantize_f32 (line 584) | def dequantize_f32(data): function dequantize_f32_gpu (line 587) | def dequantize_f32_gpu(data, device, target_dtype = torch.get_default_dt... function dequantize_f16 (line 594) | def dequantize_f16(data): function dequantize_f16_gpu (line 597) | def dequantize_f16_gpu(data, device, target_dtype = torch.get_default_dt... function dequantize_bf16_gpu (line 604) | def dequantize_bf16_gpu(data, device, target_dtype = torch.get_default_d... function translate_name_to_gguf_mixtral (line 642) | def translate_name_to_gguf_mixtral(name): function translate_name_to_gguf (line 665) | def translate_name_to_gguf(name): FILE: archive/ktransformers/util/custom_loader.py class ModelLoader (line 28) | class ModelLoader(ABC): method has_tensor (line 35) | def has_tensor(cls, name: str): class SafeTensorLoader (line 47) | class SafeTensorLoader(ModelLoader): method __init__ (line 53) | def __init__(self, file_path: str): method __load_tensor_file_map (line 56) | def __load_tensor_file_map(self, file_path: str): method load_tensor (line 96) | def load_tensor(self, key: str, device: str = "cpu"): method load_experts (line 114) | def load_experts(self, key: str, device: str="cpu"): method load_gate (line 225) | def load_gate(self, key: str, device: str="cpu"): method close_all_handles (line 252) | def close_all_handles(self): method load_dequantized_tensor (line 257) | def load_dequantized_tensor(self, key: str, device: str = "cpu"): method has_tensor (line 275) | def has_tensor(self, name: str): class GGUFLoader (line 278) | class GGUFLoader(ModelLoader): method __init__ (line 284) | def __init__(self, gguf_path: str, quantize: str = None): method load_gguf (line 323) | def load_gguf(self, f): method get_mmap_tensor (line 405) | def get_mmap_tensor(self, name): method get_undequanted_tensor_and_ggml_type (line 416) | def get_undequanted_tensor_and_ggml_type(self, name): method load_expert_tensor (line 424) | def load_expert_tensor(self, name, data, expert_id, elements_per_exper... method load_gguf_tensor (line 453) | def load_gguf_tensor(self, name: str, device:str = "cpu", target_dtype... method has_tensor (line 518) | def has_tensor(self, name: str): method get_ggml_type (line 522) | def get_ggml_type(self, name: str): class ModelLoaderFactory (line 528) | class ModelLoaderFactory: method create_loader (line 535) | def create_loader(path: str): class W8A8SafeTensorLoader (line 600) | class W8A8SafeTensorLoader(SafeTensorLoader): method load_tensor (line 601) | def load_tensor(self, key: str, device: str = "cpu"): method load_dequantized_tensor (line 625) | def load_dequantized_tensor(self, key: str, device: str = "cpu"): FILE: archive/ktransformers/util/modeling_rope_utils.py function _compute_default_rope_parameters (line 29) | def _compute_default_rope_parameters( function _compute_linear_scaling_rope_parameters (line 71) | def _compute_linear_scaling_rope_parameters( function _compute_dynamic_ntk_parameters (line 112) | def _compute_dynamic_ntk_parameters( function _compute_yarn_parameters (line 163) | def _compute_yarn_parameters( function _compute_longrope_parameters (line 259) | def _compute_longrope_parameters( function _compute_llama3_parameters (line 322) | def _compute_llama3_parameters( function _check_received_keys (line 378) | def _check_received_keys( function _validate_default_rope_parameters (line 407) | def _validate_default_rope_parameters(config: PretrainedConfig, ignore_k... function _validate_linear_scaling_rope_parameters (line 415) | def _validate_linear_scaling_rope_parameters(config: PretrainedConfig, i... function _validate_dynamic_scaling_rope_parameters (line 427) | def _validate_dynamic_scaling_rope_parameters(config: PretrainedConfig, ... function _validate_yarn_parameters (line 441) | def _validate_yarn_parameters(config: PretrainedConfig, ignore_keys: Opt... function _validate_longrope_parameters (line 479) | def _validate_longrope_parameters(config: PretrainedConfig, ignore_keys:... function _validate_llama3_parameters (line 529) | def _validate_llama3_parameters(config: PretrainedConfig, ignore_keys: O... function rope_config_validation (line 576) | def rope_config_validation(config: PretrainedConfig, ignore_keys: Option... FILE: archive/ktransformers/util/npu_graph_runner.py class NPUGraphRunner (line 14) | class NPUGraphRunner: method __init__ (line 16) | def __init__(self, deviceId): method init (line 23) | def init(self, batch_size, seq_length): method destroy (line 32) | def destroy(self): method capture (line 37) | def capture( method forward (line 65) | def forward( method launch_callback (line 86) | def launch_callback(self, func, data, block, stream): method __call__ (line 89) | def __call__(self, *args, **kwargs): function check_runner (line 94) | def check_runner(deviceId: int): function destory_runner (line 101) | def destory_runner(deviceId: int): function get_or_create_runner (line 107) | def get_or_create_runner(deviceId: int): FILE: archive/ktransformers/util/textstream.py class TextStreamer (line 2) | class TextStreamer: method __init__ (line 4) | def __init__(self, tokenizer: "AutoTokenizer", skip_prompt: bool = Fal... method reset (line 14) | def reset(self): method put (line 18) | def put(self, value)->Optional[str]: method end (line 49) | def end(self)->Optional[str]: method _is_chinese_char (line 62) | def _is_chinese_char(self, cp): FILE: archive/ktransformers/util/utils.py function get_use_npu_graph (line 56) | def get_use_npu_graph(): class StatKey (line 62) | class StatKey(StrEnum): class TimeStat (line 74) | class TimeStat: method __init__ (line 75) | def __init__(self): method record_start_time (line 89) | def record_start_time(self): method add_time_stat (line 93) | def add_time_stat(self, key: StatKey, time_ns, is_prefill): method print_all (line 104) | def print_all(self): method reset_all (line 117) | def reset_all(self): class StatItem (line 124) | class StatItem: method __init__ (line 125) | def __init__(self): method add_item (line 131) | def add_item(self, cost_time_ns): method reset (line 137) | def reset(self): method get_stat (line 143) | def get_stat(self): function get_free_ports (line 157) | def get_free_ports(n: int, continue_prot: list): function get_current_device (line 173) | def get_current_device(): function get_compute_capability (line 179) | def get_compute_capability(device:torch.device = None): function set_module (line 193) | def set_module(model, submodule_key, module): function set_param (line 207) | def set_param(module: nn.Module, name: str, weights: torch.Tensor): function get_device (line 214) | def get_device(gguf_module_key:str, device_map:dict): function get_all_used_cuda_device (line 220) | def get_all_used_cuda_device(device_map:dict): function load_cur_state_dict_npu (line 232) | def load_cur_state_dict_npu(module: nn.Module, gguf_loader: ModelLoader,... function load_cur_state_dict (line 263) | def load_cur_state_dict(module: nn.Module, gguf_loader: ModelLoader, pre... function sync_all_device (line 310) | def sync_all_device(all_device_list): function xpu_fp16_model (line 323) | def xpu_fp16_model(config): function load_weights (line 335) | def load_weights(module:nn.Module, gguf_loader:ModelLoader, prefix='', d... function tf_logits_warper (line 344) | def tf_logits_warper(generation_config): function prefill_and_generate (line 394) | def prefill_and_generate(model, tokenizer, inputs, max_new_tokens=10000,... class InferenceState (line 809) | class InferenceState(enum.Enum): FILE: archive/ktransformers/util/vendors.py class GPUVendor (line 7) | class GPUVendor(IntEnum): class DeviceManager (line 15) | class DeviceManager: method __init__ (line 19) | def __init__(self): method _detect_gpu_vendor (line 23) | def _detect_gpu_vendor(self) -> GPUVendor: method _get_available_devices (line 60) | def _get_available_devices(self) -> List[int]: method get_device_str (line 75) | def get_device_str(self, device_id: Union[int, str]) -> str: method to_torch_device (line 102) | def to_torch_device(self, device_id: Union[int, str] = 0) -> torch.dev... method move_tensor_to_device (line 126) | def move_tensor_to_device(self, tensor: torch.Tensor, device_id: Union... method is_available (line 140) | def is_available(self, index: int = 0) -> bool: method get_all_devices (line 155) | def get_all_devices(self) -> List[int]: function get_device (line 168) | def get_device(device_id: Union[int, str] = 0) -> torch.device: function to_device (line 180) | def to_device(tensor: torch.Tensor, device_id: Union[int, str] = 0) -> t... FILE: archive/ktransformers/util/weight_loader.py class ModelLoader (line 8) | class ModelLoader(ABC): method load_tensor (line 15) | def load_tensor(self, name: str, device: str = "cpu") -> torch.Tensor: method supports_format (line 30) | def supports_format(cls, path: str) -> bool: class SafeTensorLoader (line 43) | class SafeTensorLoader(ModelLoader): method __init__ (line 48) | def __init__(self, path: str): method _load_tensor_file_map (line 59) | def _load_tensor_file_map(self, path: str) -> None: method load_tensor (line 102) | def load_tensor(self, name: str, device: str = "cpu") -> torch.Tensor: method load_dequantized_tensor (line 122) | def load_dequantized_tensor(self, name: str, device: str = "cpu") -> t... method close_all_handles (line 148) | def close_all_handles(self) -> None: method supports_format (line 157) | def supports_format(cls, path: str) -> bool: class GGUFLoader (line 185) | class GGUFLoader(ModelLoader): method __init__ (line 190) | def __init__(self, path: str): method _load_gguf (line 228) | def _load_gguf(self, f) -> None: method _read_value (line 287) | def _read_value(self, f, data_type) -> Any: method load_tensor (line 310) | def load_tensor(self, name: str, device: str = "cpu") -> torch.Tensor: method load_gguf_tensor (line 324) | def load_gguf_tensor(self, name: str, device: str = "cpu", target_dtyp... method supports_format (line 346) | def supports_format(cls, path: str) -> bool: FILE: archive/ktransformers/website/src/api/assistant.ts function filterAndConvert (line 3) | function filterAndConvert( type IAssistantData (line 12) | interface IAssistantData { FILE: archive/ktransformers/website/src/api/run.ts type IRunData (line 4) | interface IRunData { function cancelRun (line 87) | async function cancelRun(threadId: string, runId: string){ FILE: archive/ktransformers/website/src/assets/iconfont/iconfont.js function s (line 1) | function s(){h||(h=!0,e())} function d (line 1) | function d(){try{a.documentElement.doScroll("left")}catch(t){return void... FILE: archive/ktransformers/website/src/conf/config.ts type Window (line 2) | interface Window { FILE: archive/ktransformers/website/src/utils/copy.ts function showCopySuccessMessage (line 75) | function showCopySuccessMessage() { function showCopyErrorMessage (line 93) | function showCopyErrorMessage() { FILE: archive/ktransformers/website/src/utils/types.ts type IAssistant (line 1) | interface IAssistant { type IAssistantWithStatus (line 17) | interface IAssistantWithStatus { type IMessage (line 34) | interface IMessage { type IThread (line 51) | interface IThread { type IRun (line 59) | interface IRun { type IFile (line 88) | interface IFile { type IMessageData (line 97) | interface IMessageData { type IThreadAndMessageAndAssistant (line 104) | interface IThreadAndMessageAndAssistant { type IDeleteResult (line 110) | interface IDeleteResult { type IBuildData (line 115) | interface IBuildData { FILE: archive/merge_tensors/merge_safetensor_gguf.py function read_safetensor_keys_from_folder (line 15) | def read_safetensor_keys_from_folder(folder_path)->dict: function translate_name (line 58) | def translate_name(name:str)->str: function combine_tensor_sources (line 71) | def combine_tensor_sources(safetensor_path:str, gguf_path:str): function write_combined_tensor (line 97) | def write_combined_tensor(target_tensor_map: dict, output_path: str, ggu... function main (line 190) | def main(): FILE: archive/merge_tensors/merge_safetensor_gguf_for_qwen3.py function read_safetensor_keys_from_folder (line 27) | def read_safetensor_keys_from_folder(folder_path) -> dict: function translate_name (line 60) | def translate_name(name: str) -> str: function combine_tensor_sources (line 69) | def combine_tensor_sources(safetensor_path: str, gguf_path: str): function write_combined_tensor (line 103) | def write_combined_tensor(target_tensor_map: dict, output_path: str, ggu... function main (line 198) | def main(): FILE: archive/setup.py class CpuInstructInfo (line 62) | class CpuInstructInfo: class VersionInfo (line 72) | class VersionInfo: method get_musa_bare_metal_version (line 80) | def get_musa_bare_metal_version(self, musa_dir): method get_rocm_bare_metal_version (line 90) | def get_rocm_bare_metal_version(self, rocm_dir): method get_cuda_bare_metal_version (line 154) | def get_cuda_bare_metal_version(self, cuda_dir): method get_cuda_version_of_torch (line 163) | def get_cuda_version_of_torch(self): method get_platform (line 170) | def get_platform(self,): method get_cpu_instruct (line 181) | def get_cpu_instruct(self,): method get_torch_version (line 224) | def get_torch_version(self,): method get_flash_version (line 229) | def get_flash_version(self,): method get_package_version (line 238) | def get_package_version(self, full_version=False): class BuildWheelsCommand (line 263) | class BuildWheelsCommand(_bdist_wheel): method get_wheel_name (line 264) | def get_wheel_name(self,): method run (line 274) | def run(self): function colored (line 304) | def colored(text, color=None, bold=False): function split_line (line 316) | def split_line(text: str) -> List[str]: function colored (line 337) | def colored(text, color=None, bold=False): function split_line (line 349) | def split_line(text: str) -> List[str]: function run_command_with_live_tail (line 365) | def run_command_with_live_tail(ext: str, command: List[str], output_line... class CMakeExtension (line 475) | class CMakeExtension(Extension): method __init__ (line 476) | def __init__(self, name: str, sourcedir: str) -> None: function get_cmake_abi_args (line 481) | def get_cmake_abi_args(cmake_args): class CMakeBuild (line 488) | class CMakeBuild(BuildExtension): method build_extension (line 490) | def build_extension(self, ext) -> None: FILE: archive/third_party/llamafile/micros.h function GetQueryPerformanceFrequency (line 19) | static long long GetQueryPerformanceFrequency() { function GetQueryPerformanceCounter (line 24) | static long long GetQueryPerformanceCounter() { function micros (line 31) | static long long micros(void) { FILE: archive/third_party/llamafile/numba.h function rand32 (line 8) | inline int rand32(void) { function popcount (line 15) | inline int popcount(unsigned x) { function hamming (line 23) | inline int hamming(int x, int y) { function float01 (line 27) | inline float float01(unsigned x) { // (0,1) function numba (line 31) | inline float numba(void) { // (-10,10) FILE: archive/third_party/llamafile/sgemm.h type ggml_tensor (line 13) | struct ggml_tensor type ggml_compute_params (line 14) | struct ggml_compute_params type ggml_compute_params (line 31) | struct ggml_compute_params type ggml_tensor (line 31) | struct ggml_tensor type ggml_tensor (line 31) | struct ggml_tensor type ggml_tensor (line 31) | struct ggml_tensor type ggml_tensor (line 31) | struct ggml_tensor type ggml_tensor (line 32) | struct ggml_tensor type ggml_tensor (line 32) | struct ggml_tensor type ggml_tensor (line 32) | struct ggml_tensor type ggml_compute_params (line 44) | struct ggml_compute_params type ggml_tensor (line 44) | struct ggml_tensor type ggml_tensor (line 44) | struct ggml_tensor type ggml_tensor (line 44) | struct ggml_tensor type ggml_tensor (line 44) | struct ggml_tensor type ggml_compute_params (line 45) | struct ggml_compute_params type ggml_tensor (line 45) | struct ggml_tensor type ggml_tensor (line 45) | struct ggml_tensor type ggml_tensor (line 45) | struct ggml_tensor type ggml_tensor (line 45) | struct ggml_tensor type ggml_compute_params (line 46) | struct ggml_compute_params type ggml_tensor (line 46) | struct ggml_tensor type ggml_tensor (line 46) | struct ggml_tensor type ggml_tensor (line 46) | struct ggml_tensor type ggml_tensor (line 46) | struct ggml_tensor type ggml_compute_params (line 47) | struct ggml_compute_params type ggml_tensor (line 47) | struct ggml_tensor type ggml_tensor (line 47) | struct ggml_tensor type ggml_tensor (line 47) | struct ggml_tensor type ggml_tensor (line 47) | struct ggml_tensor type ggml_compute_params (line 48) | struct ggml_compute_params type ggml_tensor (line 48) | struct ggml_tensor type ggml_tensor (line 48) | struct ggml_tensor type ggml_tensor (line 48) | struct ggml_tensor type ggml_tensor (line 48) | struct ggml_tensor type ggml_compute_params (line 49) | struct ggml_compute_params type ggml_tensor (line 49) | struct ggml_tensor type ggml_tensor (line 49) | struct ggml_tensor type ggml_tensor (line 49) | struct ggml_tensor type ggml_tensor (line 49) | struct ggml_tensor type ggml_compute_params (line 50) | struct ggml_compute_params type ggml_tensor (line 50) | struct ggml_tensor type ggml_tensor (line 50) | struct ggml_tensor type ggml_tensor (line 50) | struct ggml_tensor type ggml_tensor (line 50) | struct ggml_tensor type ggml_compute_params (line 51) | struct ggml_compute_params type ggml_tensor (line 51) | struct ggml_tensor type ggml_tensor (line 51) | struct ggml_tensor type ggml_tensor (line 51) | struct ggml_tensor type ggml_tensor (line 51) | struct ggml_tensor type ggml_compute_params (line 52) | struct ggml_compute_params type ggml_tensor (line 52) | struct ggml_tensor type ggml_tensor (line 52) | struct ggml_tensor type ggml_tensor (line 52) | struct ggml_tensor type ggml_tensor (line 52) | struct ggml_tensor FILE: archive/third_party/llamafile/sgemm_arm.cpp type GemmFuncs (line 32) | struct GemmFuncs { type ggml_compute_params (line 34) | struct ggml_compute_params type ggml_tensor (line 34) | struct ggml_tensor type ggml_tensor (line 34) | struct ggml_tensor type ggml_tensor (line 34) | struct ggml_tensor type ggml_tensor (line 34) | struct ggml_tensor method GemmFuncs (line 39) | GemmFuncs() { function llamafile_sgemm (line 190) | bool llamafile_sgemm(long m, long n, long k, const void* A, long lda, co... function llamafile_mixmul (line 198) | bool llamafile_mixmul(const ggml_compute_params* params, const ggml_tens... function llamafile_mixmul_iqk (line 202) | bool llamafile_mixmul_iqk(long Nx, long Ny, long ne00, int ne11, int typ... FILE: archive/third_party/llamafile/sgemm_x86.cpp type GemmFuncs (line 32) | struct GemmFuncs { type ggml_compute_params (line 34) | struct ggml_compute_params type ggml_tensor (line 34) | struct ggml_tensor type ggml_tensor (line 34) | struct ggml_tensor type ggml_tensor (line 34) | struct ggml_tensor type ggml_tensor (line 34) | struct ggml_tensor method GemmFuncs (line 39) | GemmFuncs() { function llamafile_sgemm (line 190) | bool llamafile_sgemm(long m, long n, long k, const void* A, long lda, co... function llamafile_mixmul (line 198) | bool llamafile_mixmul(const ggml_compute_params* params, const ggml_tens... function llamafile_mixmul_iqk (line 202) | bool llamafile_mixmul_iqk(long Nx, long Ny, long ne00, int ne11, int typ... FILE: archive/third_party/llamafile/tinyblas_cpu.h function tinyBLAS_not_supported (line 85) | bool tinyBLAS_not_supported(const char* file, int line) { function unhalf (line 90) | inline float unhalf(ggml_fp16_t d) { function unhalf (line 93) | inline float unhalf(ggml_bf16_t d) { function float (line 112) | struct ggml_type_trait { function ggml_bf16_t (line 116) | struct ggml_type_trait { function ggml_fp16_t (line 120) | struct ggml_type_trait { function block_q8_0 (line 124) | struct ggml_type_trait { function __m128 (line 132) | inline __m128 add(__m128 x, __m128 y) { function __m128 (line 135) | inline __m128 sub(__m128 x, __m128 y) { function __m128 (line 138) | inline __m128 mul(__m128 x, __m128 y) { function __m256 (line 144) | inline __m256 add(__m256 x, __m256 y) { function __m256 (line 147) | inline __m256 sub(__m256 x, __m256 y) { function __m256 (line 150) | inline __m256 mul(__m256 x, __m256 y) { function __m512 (line 156) | inline __m512 add(__m512 x, __m512 y) { function __m512 (line 159) | inline __m512 sub(__m512 x, __m512 y) { function __m512 (line 162) | inline __m512 mul(__m512 x, __m512 y) { function float32x4_t (line 168) | inline float32x4_t add(float32x4_t x, float32x4_t y) { function float32x4_t (line 171) | inline float32x4_t sub(float32x4_t x, float32x4_t y) { function float32x4_t (line 174) | inline float32x4_t mul(float32x4_t x, float32x4_t y) { function float16x8_t (line 180) | inline float16x8_t add(float16x8_t x, float16x8_t y) { function float16x8_t (line 183) | inline float16x8_t sub(float16x8_t x, float16x8_t y) { function float16x8_t (line 186) | inline float16x8_t mul(float16x8_t x, float16x8_t y) { function U (line 198) | U madd(T a, T b, U c) { function U (line 210) | U madder(T a, T b, U c, U* e) { function float32x4_t (line 218) | inline float32x4_t badder(float32x4_t a, float b, float32x4_t c, float32... function __m256 (line 229) | inline __m256 madd(__m256 a, __m256 b, __m256 c) { function __m512 (line 235) | inline __m512 madd(__m512 a, __m512 b, __m512 c) { function float32x4_t (line 243) | inline float32x4_t madd(float32x4_t a, float32x4_t b, float32x4_t c) { function float16x8_t (line 249) | inline float16x8_t madd(float16x8_t a, float16x8_t b, float16x8_t c) { function __m512 (line 258) | inline __m512 madd(__m512bh x, __m512bh y, __m512 z) { function __m512 (line 262) | inline __m512 madder(__m512bh x, __m512bh y, __m512 z, __m512* _) { function hsum (line 271) | inline float hsum(float32x4_t x) { function hsum (line 277) | inline float hsum(float16x8_t x) { function hsum (line 284) | inline float hsum(__m128 x) { function hsum (line 300) | inline float hsum(__m256 x) { function hsum (line 306) | inline float hsum(__m512 x) { function load (line 318) | inline float load(const float* p) { function load (line 322) | inline float load(const ggml_fp16_t* p) { function load (line 326) | inline float load(const ggml_bf16_t* p) { function float32x4_t (line 332) | inline float32x4_t load(const float* p) { function float32x4_t (line 336) | inline float32x4_t load(const ggml_bf16_t* p) { function float16x8_t (line 341) | inline float16x8_t load(const ggml_fp16_t* p) { function float32x4_t (line 345) | inline float32x4_t load(const ggml_fp16_t* p) { function __m128 (line 353) | inline __m128 load(const float* p) { function __m256 (line 360) | inline __m256 load(const float* p) { function __m256 (line 367) | inline __m256 load(const ggml_bf16_t* p) { function __m256 (line 375) | inline __m256 load(const ggml_fp16_t* p) { function __m512 (line 382) | inline __m512 load(const float* p) { function __m512 (line 386) | inline __m512 load(const ggml_fp16_t* p) { function __m512 (line 390) | inline __m512 load(const ggml_bf16_t* p) { function __m512bh (line 398) | inline __m512bh load(const ggml_bf16_t* p) { function __m512bh (line 402) | inline __m512bh load(const float* p) { function store (line 410) | inline void store(float* p, float f) { function store (line 414) | inline void store(ggml_fp16_t* p, float f) { function store (line 418) | inline void store(ggml_bf16_t* p, float f) { function gemm (line 616) | void gemm(long m0, long m, long n0, long n) { function gemm (line 759) | void gemm(long m0, long m, long n0, long n) { function int8x16_t (line 797) | inline int8x16_t load_lo(const block_q8_0* b) { function int8x16_t (line 801) | inline int8x16_t load_hi(const block_q8_0* b) { function int8x16_t (line 805) | inline int8x16_t load_lo(const block_q4_0* b) { function int8x16_t (line 810) | inline int8x16_t load_hi(const block_q4_0* b) { function gemm (line 982) | void gemm(long m0, long m, long n0, long n) { function __m256i (line 1020) | inline __m256i load(const block_q8_0* b) { function __m256i (line 1024) | inline __m256i load(const block_q4_0* b) { function __m256 (line 1032) | inline __m256 updot(__m256i u, __m256i s) { FILE: archive/third_party/llamafile/tinyblas_cpu_mixmul_amd_avx.cpp function llamafile_mixmul_needs (line 13) | size_t llamafile_mixmul_needs(const ggml_tensor* weights, const ggml_ten... FILE: archive/third_party/llamafile/tinyblas_cpu_mixmul_arm80.cpp function llamafile_mixmul_needs (line 13) | size_t llamafile_mixmul_needs(const ggml_tensor* weights, const ggml_ten... FILE: archive/third_party/llamafile/tinyblas_cpu_unsupported.cpp function llamafile_sgemm_unsupported (line 25) | bool llamafile_sgemm_unsupported(long m, long n, long k, const void* A, ... function llamafile_mixmul_unsupported (line 29) | bool llamafile_mixmul_unsupported(const struct ggml_compute_params* params, function iqk_mul_mat_moe_unsupported (line 37) | bool iqk_mul_mat_moe_unsupported(long, long, long, int, int, const void*... FILE: archive/third_party/nlohmann/json.hpp function NLOHMANN_JSON_NAMESPACE_BEGIN (line 239) | NLOHMANN_JSON_NAMESPACE_BEGIN function NLOHMANN_JSON_NAMESPACE_END (line 250) | NLOHMANN_JSON_NAMESPACE_END type would_call_std_ (line 2833) | struct would_call_std_ type value_t (line 2891) | enum class value_t : std::uint8_t function NLOHMANN_JSON_NAMESPACE_END (line 2956) | NLOHMANN_JSON_NAMESPACE_END function NLOHMANN_JSON_NAMESPACE_BEGIN (line 3047) | NLOHMANN_JSON_NAMESPACE_BEGIN function NLOHMANN_JSON_NAMESPACE_BEGIN (line 3092) | NLOHMANN_JSON_NAMESPACE_BEGIN function NLOHMANN_JSON_NAMESPACE_BEGIN (line 3281) | NLOHMANN_JSON_NAMESPACE_BEGIN class json_pointer (line 3428) | class json_pointer type ordered_map (line 3439) | struct ordered_map function NLOHMANN_JSON_NAMESPACE_BEGIN (line 3450) | NLOHMANN_JSON_NAMESPACE_BEGIN function NLOHMANN_JSON_NAMESPACE_BEGIN (line 4241) | NLOHMANN_JSON_NAMESPACE_BEGIN function NLOHMANN_JSON_NAMESPACE_END (line 4369) | NLOHMANN_JSON_NAMESPACE_END function NLOHMANN_JSON_NAMESPACE_END (line 4601) | NLOHMANN_JSON_NAMESPACE_END function NLOHMANN_JSON_NAMESPACE_BEGIN (line 4645) | NLOHMANN_JSON_NAMESPACE_BEGIN function NLOHMANN_JSON_NAMESPACE_BEGIN (line 4653) | NLOHMANN_JSON_NAMESPACE_BEGIN function NLOHMANN_JSON_NAMESPACE_BEGIN (line 4668) | NLOHMANN_JSON_NAMESPACE_BEGIN function NLOHMANN_JSON_NAMESPACE_BEGIN (line 5181) | NLOHMANN_JSON_NAMESPACE_BEGIN function NLOHMANN_JSON_NAMESPACE_END (line 5363) | NLOHMANN_JSON_NAMESPACE_END function NLOHMANN_JSON_NAMESPACE_BEGIN (line 5411) | NLOHMANN_JSON_NAMESPACE_BEGIN type adl_serializer (line 5840) | struct adl_serializer method from_json (line 5845) | static auto from_json(BasicJsonType && j, TargetType& val) noexcept( method from_json (line 5855) | static auto from_json(BasicJsonType && j) noexcept( method to_json (line 5865) | static auto to_json(BasicJsonType& j, TargetType && val) noexcept( function set_subtype (line 5945) | void set_subtype(subtype_type subtype_) noexcept function subtype_type (line 5953) | constexpr subtype_type subtype() const noexcept function has_subtype (line 5960) | constexpr bool has_subtype() const noexcept function clear_subtype (line 5967) | void clear_subtype() noexcept function NLOHMANN_JSON_NAMESPACE_BEGIN (line 6005) | NLOHMANN_JSON_NAMESPACE_BEGIN function NLOHMANN_JSON_NAMESPACE_BEGIN (line 6175) | NLOHMANN_JSON_NAMESPACE_BEGIN function json_sax_dom_parser (line 6816) | explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptio... function json_sax_dom_parser (line 6821) | json_sax_dom_parser(const json_sax_dom_parser&) = delete; function json_sax_dom_parser (line 6822) | json_sax_dom_parser(json_sax_dom_parser&&) = default; function null (line 6827) | bool null() function boolean (line 6833) | bool boolean(bool val) function number_integer (line 6839) | bool number_integer(number_integer_t val) function number_unsigned (line 6845) | bool number_unsigned(number_unsigned_t val) function number_float (line 6851) | bool number_float(number_float_t val, const string_t& /*unused*/) function string (line 6857) | bool string(string_t& val) function binary (line 6863) | bool binary(binary_t& val) function start_object (line 6869) | bool start_object(std::size_t len) function key (line 6881) | bool key(string_t& val) function end_object (line 6891) | bool end_object() function start_array (line 6901) | bool start_array(std::size_t len) function end_array (line 6913) | bool end_array() function parse_error (line 6924) | bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, function is_errored (line 6936) | constexpr bool is_errored() const class json_sax_dom_callback_parser (line 6985) | class json_sax_dom_callback_parser method json_sax_dom_callback_parser (line 6996) | json_sax_dom_callback_parser(BasicJsonType& r, method json_sax_dom_callback_parser (line 7005) | json_sax_dom_callback_parser(const json_sax_dom_callback_parser&) = de... method json_sax_dom_callback_parser (line 7006) | json_sax_dom_callback_parser(json_sax_dom_callback_parser&&) = default; method json_sax_dom_callback_parser (line 7007) | json_sax_dom_callback_parser& operator=(const json_sax_dom_callback_pa... method json_sax_dom_callback_parser (line 7008) | json_sax_dom_callback_parser& operator=(json_sax_dom_callback_parser&&... method null (line 7011) | bool null() method boolean (line 7017) | bool boolean(bool val) method number_integer (line 7023) | bool number_integer(number_integer_t val) method number_unsigned (line 7029) | bool number_unsigned(number_unsigned_t val) method number_float (line 7035) | bool number_float(number_float_t val, const string_t& /*unused*/) method string (line 7041) | bool string(string_t& val) method binary (line 7047) | bool binary(binary_t& val) method start_object (line 7053) | bool start_object(std::size_t len) method key (line 7071) | bool key(string_t& val) method end_object (line 7088) | bool end_object() method start_array (line 7124) | bool start_array(std::size_t len) method end_array (line 7141) | bool end_array() method parse_error (line 7174) | bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, method is_errored (line 7186) | constexpr bool is_errored() const method handle_value (line 7208) | std::pair handle_value(Value&& v, const bool ski... class json_sax_acceptor (line 7292) | class json_sax_acceptor method null (line 7301) | bool null() method boolean (line 7306) | bool boolean(bool /*unused*/) method number_integer (line 7311) | bool number_integer(number_integer_t /*unused*/) method number_unsigned (line 7316) | bool number_unsigned(number_unsigned_t /*unused*/) method number_float (line 7321) | bool number_float(number_float_t /*unused*/, const string_t& /*unused*/) method string (line 7326) | bool string(string_t& /*unused*/) method binary (line 7331) | bool binary(binary_t& /*unused*/) method start_object (line 7336) | bool start_object(std::size_t /*unused*/ = static_cast(-1)) method key (line 7341) | bool key(string_t& /*unused*/) method end_object (line 7346) | bool end_object() method start_array (line 7351) | bool start_array(std::size_t /*unused*/ = static_cast(-1)) method end_array (line 7356) | bool end_array() method parse_error (line 7361) | bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/... function NLOHMANN_JSON_NAMESPACE_BEGIN (line 7399) | NLOHMANN_JSON_NAMESPACE_BEGIN function reset (line 8694) | void reset() noexcept function char_int_type (line 8711) | char_int_type get() function unget (line 8748) | void unget() function add (line 8775) | void add(char_int_type c) function number_unsigned_t (line 8792) | constexpr number_unsigned_t get_number_unsigned() const noexcept function number_float_t (line 8798) | constexpr number_float_t get_number_float() const noexcept function string_t (line 8804) | string_t& get_string() function position_t (line 8814) | constexpr position_t get_position() const noexcept function get_token_string (line 8822) | std::string get_token_string() const function JSON_HEDLEY_RETURNS_NON_NULL (line 8846) | JSON_HEDLEY_RETURNS_NON_NULL function skip_bom (line 8860) | bool skip_bom() function skip_whitespace (line 8874) | void skip_whitespace() function token_type (line 8883) | token_type scan() function NLOHMANN_JSON_NAMESPACE_BEGIN (line 9031) | NLOHMANN_JSON_NAMESPACE_BEGIN function NLOHMANN_JSON_NAMESPACE_END (line 9171) | NLOHMANN_JSON_NAMESPACE_END function NLOHMANN_JSON_NAMESPACE_BEGIN (line 12195) | NLOHMANN_JSON_NAMESPACE_BEGIN function NLOHMANN_JSON_NAMESPACE_BEGIN (line 12717) | NLOHMANN_JSON_NAMESPACE_BEGIN function NLOHMANN_JSON_NAMESPACE_END (line 12833) | NLOHMANN_JSON_NAMESPACE_END function NLOHMANN_JSON_NAMESPACE_BEGIN (line 12887) | NLOHMANN_JSON_NAMESPACE_BEGIN function pointer (line 13189) | pointer operator->() const function iter_impl (line 13231) | iter_impl operator++(int)& // NOLINT(cert-dcl21-cpp) function iter_impl (line 13242) | iter_impl& operator++() function iter_impl (line 13282) | iter_impl operator--(int)& // NOLINT(cert-dcl21-cpp) function iter_impl (line 13293) | iter_impl& operator--() function iter_impl (line 13441) | iter_impl& operator+=(difference_type i) function iter_impl (line 13478) | iter_impl& operator-=(difference_type i) function iter_impl (line 13487) | iter_impl operator+(difference_type i) const function friend (line 13498) | friend iter_impl operator+(difference_type i, const iter_impl& it) function iter_impl (line 13509) | iter_impl operator-(difference_type i) const function difference_type (line 13520) | difference_type operator-(const iter_impl& other) const function reference (line 13549) | reference operator[](difference_type n) const function reference (line 13603) | reference value() const function NLOHMANN_JSON_NAMESPACE_BEGIN (line 13637) | NLOHMANN_JSON_NAMESPACE_BEGIN function NLOHMANN_JSON_NAMESPACE_BEGIN (line 13770) | NLOHMANN_JSON_NAMESPACE_BEGIN function NLOHMANN_JSON_NAMESPACE_BEGIN (line 13830) | NLOHMANN_JSON_NAMESPACE_BEGIN function NLOHMANN_BASIC_JSON_TPL_DECLARATION (line 13850) | NLOHMANN_BASIC_JSON_TPL_DECLARATION function json_pointer (line 13862) | explicit json_pointer(const string_t& s = "") function string_t (line 13868) | string_t to_string() const function friend (line 13889) | friend std::ostream& operator<<(std::ostream& o, const json_pointer& ptr) function json_pointer (line 13898) | json_pointer& operator/=(const json_pointer& ptr) function json_pointer (line 13908) | json_pointer& operator/=(string_t token) function json_pointer (line 13916) | json_pointer& operator/=(std::size_t array_idx) function friend (line 13923) | friend json_pointer operator/(const json_pointer& lhs, function friend (line 13931) | friend json_pointer operator/(const json_pointer& lhs, string_t token) /... function friend (line 13938) | friend json_pointer operator/(const json_pointer& lhs, std::size_t array... function json_pointer (line 13945) | json_pointer parent_pointer() const function pop_back (line 13959) | void pop_back() function string_t (line 13971) | const string_t& back() const function push_back (line 13983) | void push_back(const string_t& token) function push_back (line 13990) | void push_back(string_t&& token) function empty (line 13997) | bool empty() const noexcept function BasicJsonType (line 14074) | BasicJsonType& get_and_create(BasicJsonType& j) const function BasicJsonType (line 14154) | BasicJsonType& get_unchecked(BasicJsonType* ptr) const function BasicJsonType (line 14222) | BasicJsonType& get_checked(BasicJsonType* ptr) const function BasicJsonType (line 14280) | const BasicJsonType& get_unchecked(const BasicJsonType* ptr) const function BasicJsonType (line 14329) | const BasicJsonType& get_checked(const BasicJsonType* ptr) const function contains (line 14378) | bool contains(const BasicJsonType* ptr) const function split (line 14466) | static std::vector split(const string_t& reference_string) function BasicJsonType (line 14606) | static BasicJsonType function convert (line 14635) | json_pointer convert() const& function convert (line 14642) | json_pointer convert()&& function NLOHMANN_JSON_NAMESPACE_BEGIN (line 14808) | NLOHMANN_JSON_NAMESPACE_BEGIN function NLOHMANN_JSON_NAMESPACE_BEGIN (line 14931) | NLOHMANN_JSON_NAMESPACE_BEGIN function NLOHMANN_JSON_NAMESPACE_END (line 15053) | NLOHMANN_JSON_NAMESPACE_END function NLOHMANN_JSON_NAMESPACE_BEGIN (line 16918) | NLOHMANN_JSON_NAMESPACE_BEGIN function NLOHMANN_JSON_NAMESPACE_END (line 18015) | NLOHMANN_JSON_NAMESPACE_END function hex_bytes (line 18675) | static std::string hex_bytes(std::uint8_t byte) function is_negative_number (line 18686) | bool is_negative_number(NumberType x) function is_negative_number (line 18692) | bool is_negative_number(NumberType /*unused*/) function dump_integer (line 18712) | void dump_integer(NumberType x) function dump_float (line 18797) | void dump_float(number_float_t x) function dump_float (line 18818) | void dump_float(number_float_t x, std::true_type /*is_ieee_single_or_dou... function dump_float (line 18826) | void dump_float(number_float_t x, std::false_type /*is_ieee_single_or_do... function decode (line 18898) | static std::uint8_t decode(std::uint8_t& state, std::uint32_t& codep, co... function number_unsigned_t (line 18938) | number_unsigned_t remove_sign(number_unsigned_t x) function number_unsigned_t (line 18953) | inline number_unsigned_t remove_sign(number_integer_t x) noexcept function ordered_map (line 19039) | ordered_map() noexcept(noexcept(Container())) : Container{} {} function ordered_map (line 19040) | explicit ordered_map(const Allocator& alloc) noexcept(noexcept(Container... function ordered_map (line 19042) | ordered_map(It first, It last, const Allocator& alloc = Allocator()) function ordered_map (line 19044) | ordered_map(std::initializer_list init, const Allocator& all... function emplace (line 19047) | std::pair emplace(const key_type& key, T&& t) function emplace (line 19062) | std::pair emplace(KeyType && key, T && t) function T (line 19075) | T& operator[](const key_type& key) function T (line 19082) | T & operator[](KeyType && key) function T (line 19087) | const T& operator[](const key_type& key) const function T (line 19094) | const T & operator[](KeyType && key) const function T (line 19099) | T& at(const key_type& key) function T (line 19114) | T & at(KeyType && key) // NOLINT(cppcoreguidelines-missing-std-forward) function T (line 19127) | const T& at(const key_type& key) const function T (line 19142) | const T & at(KeyType && key) const // NOLINT(cppcoreguidelines-missing-s... function size_type (line 19155) | size_type erase(const key_type& key) function size_type (line 19176) | size_type erase(KeyType && key) // NOLINT(cppcoreguidelines-missing-std-... function iterator (line 19195) | iterator erase(iterator pos) function iterator (line 19200) | iterator erase(iterator first, iterator last) function size_type (line 19253) | size_type count(const key_type& key) const function size_type (line 19267) | size_type count(KeyType && key) const // NOLINT(cppcoreguidelines-missin... function iterator (line 19279) | iterator find(const key_type& key) function iterator (line 19293) | iterator find(KeyType && key) // NOLINT(cppcoreguidelines-missing-std-fo... function const_iterator (line 19305) | const_iterator find(const key_type& key) const function insert (line 19317) | std::pair insert( value_type&& value ) function insert (line 19322) | std::pair insert( const value_type& value ) function insert (line 19340) | void insert(InputIt first, InputIt last) function NLOHMANN_JSON_NAMESPACE_BEGIN (line 19367) | NLOHMANN_JSON_NAMESPACE_BEGIN function set_parents (line 19994) | void set_parents() function iterator (line 20031) | iterator set_parents(iterator it, typename iterator::difference_type cou... function reference (line 20044) | reference set_parent(reference j, std::size_t old_capacity = static_cast... function basic_json (line 20106) | basic_json(const value_t v) function basic_json (line 20114) | basic_json(std::nullptr_t = nullptr) noexcept // NOLINT(bugprone-excepti... function basic_json (line 20126) | basic_json(CompatibleType && val) noexcept(noexcept( // NOLINT(bugprone-... function basic_json (line 20140) | basic_json(const BasicJsonType& val) function basic_json (line 20193) | basic_json(initializer_list_t init, function JSON_HEDLEY_WARN_UNUSED_RESULT (line 20251) | JSON_HEDLEY_WARN_UNUSED_RESULT function JSON_HEDLEY_WARN_UNUSED_RESULT (line 20262) | JSON_HEDLEY_WARN_UNUSED_RESULT function JSON_HEDLEY_WARN_UNUSED_RESULT (line 20273) | JSON_HEDLEY_WARN_UNUSED_RESULT function JSON_HEDLEY_WARN_UNUSED_RESULT (line 20284) | JSON_HEDLEY_WARN_UNUSED_RESULT function JSON_HEDLEY_WARN_UNUSED_RESULT (line 20295) | JSON_HEDLEY_WARN_UNUSED_RESULT function JSON_HEDLEY_WARN_UNUSED_RESULT (line 20303) | JSON_HEDLEY_WARN_UNUSED_RESULT function basic_json (line 20311) | basic_json(size_type cnt, const basic_json& val): function basic_json (line 20323) | basic_json(InputIT first, InputIT last) function basic_json (line 20432) | basic_json(const JsonRef& ref) : basic_json(ref.moved_or_copied()) {} function basic_json (line 20436) | basic_json(const basic_json& other) function basic_json (line 20505) | basic_json(basic_json&& other) noexcept function basic_json (line 20522) | basic_json& operator=(basic_json other) noexcept ( function value_t (line 20585) | constexpr value_t type() const noexcept function is_primitive (line 20592) | constexpr bool is_primitive() const noexcept function is_structured (line 20599) | constexpr bool is_structured() const noexcept function is_null (line 20606) | constexpr bool is_null() const noexcept function is_boolean (line 20613) | constexpr bool is_boolean() const noexcept function is_number (line 20620) | constexpr bool is_number() const noexcept function is_number_integer (line 20627) | constexpr bool is_number_integer() const noexcept function is_number_unsigned (line 20634) | constexpr bool is_number_unsigned() const noexcept function is_number_float (line 20641) | constexpr bool is_number_float() const noexcept function is_object (line 20648) | constexpr bool is_object() const noexcept function is_array (line 20655) | constexpr bool is_array() const noexcept function is_string (line 20662) | constexpr bool is_string() const noexcept function is_binary (line 20669) | constexpr bool is_binary() const noexcept function is_discarded (line 20676) | constexpr bool is_discarded() const noexcept function object_t (line 20707) | object_t* get_impl_ptr(object_t* /*unused*/) noexcept function object_t (line 20713) | constexpr const object_t* get_impl_ptr(const object_t* /*unused*/) const... function array_t (line 20719) | array_t* get_impl_ptr(array_t* /*unused*/) noexcept function array_t (line 20725) | constexpr const array_t* get_impl_ptr(const array_t* /*unused*/) const n... function string_t (line 20731) | string_t* get_impl_ptr(string_t* /*unused*/) noexcept function string_t (line 20737) | constexpr const string_t* get_impl_ptr(const string_t* /*unused*/) const... function boolean_t (line 20743) | boolean_t* get_impl_ptr(boolean_t* /*unused*/) noexcept function boolean_t (line 20749) | constexpr const boolean_t* get_impl_ptr(const boolean_t* /*unused*/) con... function number_integer_t (line 20755) | number_integer_t* get_impl_ptr(number_integer_t* /*unused*/) noexcept function number_integer_t (line 20761) | constexpr const number_integer_t* get_impl_ptr(const number_integer_t* /... function number_unsigned_t (line 20767) | number_unsigned_t* get_impl_ptr(number_unsigned_t* /*unused*/) noexcept function number_unsigned_t (line 20773) | constexpr const number_unsigned_t* get_impl_ptr(const number_unsigned_t*... function number_float_t (line 20779) | number_float_t* get_impl_ptr(number_float_t* /*unused*/) noexcept function number_float_t (line 20785) | constexpr const number_float_t* get_impl_ptr(const number_float_t* /*unu... function binary_t (line 20791) | binary_t* get_impl_ptr(binary_t* /*unused*/) noexcept function binary_t (line 20797) | constexpr const binary_t* get_impl_ptr(const binary_t* /*unused*/) const... function ReferenceType (line 20814) | static ReferenceType get_ref_impl(ThisType& obj) function get_ptr (line 20847) | constexpr auto get_ptr() const noexcept -> decltype(std::declval /*unused*/) const noexcept(no... function BasicJsonType (line 20964) | BasicJsonType get_impl(detail::priority_tag<2> /*unused*/) const function basic_json (line 20987) | basic_json get_impl(detail::priority_tag<3> /*unused*/) const function get_impl (line 21000) | constexpr auto get_impl(detail::priority_tag<4> /*unused*/) const noexcept function get (line 21076) | auto get() noexcept -> decltype(std::declval().template g... function ValueType (line 21089) | ValueType & get_to(ValueType& v) const noexcept(noexcept( function ValueType (line 21102) | ValueType & get_to(ValueType& v) const function Array (line 21113) | Array get_to(T (&v)[N]) const // NOLINT(cppcoreguidelines-avoid-c-arrays... function ReferenceType (line 21125) | ReferenceType get_ref() function ReferenceType (line 21136) | ReferenceType get_ref() const function binary_t (line 21195) | binary_t& get_binary() function binary_t (line 21207) | const binary_t& get_binary() const function reference (line 21229) | reference at(size_type idx) function const_reference (line 21252) | const_reference at(size_type idx) const function reference (line 21275) | reference at(const typename object_t::key_type& key) function reference (line 21295) | reference at(KeyType && key) function const_reference (line 21313) | const_reference at(const typename object_t::key_type& key) const function const_reference (line 21333) | const_reference at(KeyType && key) const function reference (line 21351) | reference operator[](size_type idx) function const_reference (line 21397) | const_reference operator[](size_type idx) const function reference (line 21410) | reference operator[](typename object_t::key_type key) function const_reference (line 21432) | const_reference operator[](const typename object_t::key_type& key) const function reference (line 21448) | reference operator[](T* key) function const_reference (line 21454) | const_reference operator[](T* key) const function reference (line 21463) | reference operator[](KeyType && key) function const_reference (line 21487) | const_reference operator[](KeyType && key) const class ValueType (line 21513) | class ValueType function ReturnType (line 21542) | ReturnType value(const typename object_t::key_type& key, ValueType && de... function ValueType (line 21568) | ValueType value(KeyType && key, const ValueType& default_value) const function ReturnType (line 21595) | ReturnType value(KeyType && key, ValueType && default_value) const function ValueType (line 21618) | ValueType value(const json_pointer& ptr, const ValueType& default_value)... function ReturnType (line 21643) | ReturnType value(const json_pointer& ptr, ValueType && default_value) const function ValueType (line 21667) | ValueType value(const ::nlohmann::json_pointer& ptr, cons... function ReturnType (line 21678) | ReturnType value(const ::nlohmann::json_pointer& ptr, Val... function reference (line 21685) | reference front() function const_reference (line 21692) | const_reference front() const function reference (line 21699) | reference back() function const_reference (line 21708) | const_reference back() const function IteratorType (line 21720) | IteratorType erase(IteratorType pos) function IteratorType (line 21790) | IteratorType erase(IteratorType first, IteratorType last) function erase_internal (line 21858) | private: function size_type (line 21874) | size_type erase_internal(KeyType && key) function size_type (line 21906) | size_type erase(KeyType && key) function erase (line 21913) | void erase(const size_type idx) function iterator (line 21942) | iterator find(const typename object_t::key_type& key) function const_iterator (line 21956) | const_iterator find(const typename object_t::key_type& key) const function iterator (line 21972) | iterator find(KeyType && key) function const_iterator (line 21988) | const_iterator find(KeyType && key) const function size_type (line 22002) | size_type count(const typename object_t::key_type& key) const function size_type (line 22012) | size_type count(KeyType && key) const function contains (line 22020) | bool contains(const typename object_t::key_type& key) const function contains (line 22029) | bool contains(KeyType && key) const function contains (line 22036) | bool contains(const json_pointer& ptr) const function contains (line 22043) | bool contains(const typename ::nlohmann::json_pointer& pt... function iterator (line 22059) | iterator begin() noexcept function const_iterator (line 22068) | const_iterator begin() const noexcept function const_iterator (line 22075) | const_iterator cbegin() const noexcept function iterator (line 22084) | iterator end() noexcept function const_iterator (line 22093) | const_iterator end() const noexcept function const_iterator (line 22100) | const_iterator cend() const noexcept function reverse_iterator (line 22109) | reverse_iterator rbegin() noexcept function const_reverse_iterator (line 22116) | const_reverse_iterator rbegin() const noexcept function reverse_iterator (line 22123) | reverse_iterator rend() noexcept function const_reverse_iterator (line 22130) | const_reverse_iterator rend() const noexcept function const_reverse_iterator (line 22137) | const_reverse_iterator crbegin() const noexcept function const_reverse_iterator (line 22144) | const_reverse_iterator crend() const noexcept function iterator_wrapper (line 22156) | static iteration_proxy iterator_wrapper(reference ref) noexcept function iterator_wrapper (line 22167) | static iteration_proxy iterator_wrapper(const_reference ... function items (line 22174) | iteration_proxy items() noexcept function items (line 22181) | iteration_proxy items() const noexcept function empty (line 22197) | bool empty() const noexcept function size_type (line 22236) | size_type size() const noexcept function size_type (line 22275) | size_type max_size() const noexcept function clear (line 22318) | void clear() noexcept function push_back (line 22379) | void push_back(basic_json&& val) function reference (line 22404) | reference operator+=(basic_json&& val) function push_back (line 22412) | void push_back(const basic_json& val) function reference (line 22436) | reference operator+=(const basic_json& val) function push_back (line 22444) | void push_back(const typename object_t::value_type& val) function reference (line 22467) | reference operator+=(const typename object_t::value_type& val) function push_back (line 22475) | void push_back(initializer_list_t init) function reference (line 22491) | reference operator+=(initializer_list_t init) function reference (line 22500) | reference emplace_back(Args&& ... args) function emplace (line 22525) | std::pair emplace(Args&& ... args) function iterator (line 22557) | iterator insert_iterator(const_iterator pos, Args&& ... args) function iterator (line 22576) | iterator insert(const_iterator pos, const basic_json& val) function iterator (line 22596) | iterator insert(const_iterator pos, basic_json&& val) function iterator (line 22603) | iterator insert(const_iterator pos, size_type cnt, const basic_json& val) function iterator (line 22623) | iterator insert(const_iterator pos, const_iterator first, const_iterator... function iterator (line 22654) | iterator insert(const_iterator pos, initializer_list_t ilist) function insert (line 22674) | void insert(const_iterator first, const_iterator last) function update (line 22699) | void update(const_reference j, bool merge_objects = false) function update (line 22706) | void update(const_iterator first, const_iterator last, bool merge_object... function swap (line 22753) | void swap(reference other) noexcept ( function friend (line 22770) | friend void swap(reference left, reference right) noexcept ( function swap (line 22782) | void swap(array_t& other) // NOLINT(bugprone-exception-escape,cppcoregui... function swap (line 22798) | void swap(object_t& other) // NOLINT(bugprone-exception-escape,cppcoregu... function swap (line 22814) | void swap(string_t& other) // NOLINT(bugprone-exception-escape,cppcoregu... function swap (line 22830) | void swap(binary_t& other) // NOLINT(bugprone-exception-escape,cppcoregu... function swap (line 22846) | void swap(typename binary_t::container_type& other) // NOLINT(bugprone-e... function else (line 22935) | else if(compares_unordered(lhs, rhs))\ function compares_unordered (line 22964) | bool compares_unordered(const_reference rhs, bool inverse = false) const... function friend (line 23077) | friend bool operator==(const_reference lhs, const_reference rhs) noexcept function friend (line 23109) | friend bool operator!=(const_reference lhs, const_reference rhs) noexcept function friend (line 23166) | friend bool operator<=(const_reference lhs, const_reference rhs) noexcept function friend (line 23195) | friend bool operator>(const_reference lhs, const_reference rhs) noexcept function friend (line 23225) | friend bool operator>=(const_reference lhs, const_reference rhs) noexcept function friend (line 23266) | friend std::ostream& operator<<(std::ostream& o, const basic_json& j) function JSON_HEDLEY_WARN_UNUSED_RESULT (line 23305) | JSON_HEDLEY_WARN_UNUSED_RESULT function JSON_HEDLEY_WARN_UNUSED_RESULT (line 23319) | JSON_HEDLEY_WARN_UNUSED_RESULT function basic_json (line 23333) | static basic_json parse(detail::span_input_adapter&& i, function accept (line 23346) | static bool accept(InputType&& i, function accept (line 23355) | static bool accept(IteratorType first, IteratorType last, function accept (line 23363) | static bool accept(detail::span_input_adapter&& i, function sax_parse (line 23373) | static bool sax_parse(InputType&& i, SAX* sax, function sax_parse (line 23388) | static bool sax_parse(IteratorType first, IteratorType last, SAX* sax, function sax_parse (line 23407) | static bool sax_parse(detail::span_input_adapter&& i, SAX* sax, function JSON_HEDLEY_RETURNS_NON_NULL (line 23448) | JSON_HEDLEY_RETURNS_NON_NULL type data (line 23480) | struct data method data (line 23488) | data(const value_t v) method data (line 23493) | data(size_type cnt, const basic_json& val) method data (line 23499) | data() noexcept = default; method data (line 23500) | data(data&&) noexcept = default; method data (line 23501) | data(const data&) noexcept = delete; method data (line 23502) | data& operator=(data&&) noexcept = delete; method data (line 23503) | data& operator=(const data&) noexcept = delete; function to_cbor (line 23537) | static void to_cbor(const basic_json& j, detail::output_adapter o) function to_msgpack (line 23551) | static std::vector to_msgpack(const basic_json& j) function to_msgpack (line 23560) | static void to_msgpack(const basic_json& j, detail::output_adapter o) function to_ubjson (line 23574) | static std::vector to_ubjson(const basic_json& j, function to_ubjson (line 23585) | static void to_ubjson(const basic_json& j, detail::output_adapter o, function to_bjdata (line 23601) | static std::vector to_bjdata(const basic_json& j, function to_bjdata (line 23612) | static void to_bjdata(const basic_json& j, detail::output_adapter o, function to_bson (line 23628) | static std::vector to_bson(const basic_json& j) function to_bson (line 23637) | static void to_bson(const basic_json& j, detail::output_adapter o) function JSON_HEDLEY_WARN_UNUSED_RESULT (line 23652) | JSON_HEDLEY_WARN_UNUSED_RESULT function JSON_HEDLEY_WARN_UNUSED_RESULT (line 23668) | JSON_HEDLEY_WARN_UNUSED_RESULT function basic_json (line 23684) | static basic_json from_cbor(const T* ptr, std::size_t len, function basic_json (line 23694) | static basic_json from_cbor(detail::span_input_adapter&& i, function JSON_HEDLEY_WARN_UNUSED_RESULT (line 23710) | JSON_HEDLEY_WARN_UNUSED_RESULT function JSON_HEDLEY_WARN_UNUSED_RESULT (line 23725) | JSON_HEDLEY_WARN_UNUSED_RESULT function basic_json (line 23740) | static basic_json from_msgpack(const T* ptr, std::size_t len, function basic_json (line 23749) | static basic_json from_msgpack(detail::span_input_adapter&& i, function JSON_HEDLEY_WARN_UNUSED_RESULT (line 23764) | JSON_HEDLEY_WARN_UNUSED_RESULT function JSON_HEDLEY_WARN_UNUSED_RESULT (line 23779) | JSON_HEDLEY_WARN_UNUSED_RESULT function basic_json (line 23794) | static basic_json from_ubjson(const T* ptr, std::size_t len, function basic_json (line 23803) | static basic_json from_ubjson(detail::span_input_adapter&& i, function JSON_HEDLEY_WARN_UNUSED_RESULT (line 23818) | JSON_HEDLEY_WARN_UNUSED_RESULT function JSON_HEDLEY_WARN_UNUSED_RESULT (line 23833) | JSON_HEDLEY_WARN_UNUSED_RESULT function JSON_HEDLEY_WARN_UNUSED_RESULT (line 23848) | JSON_HEDLEY_WARN_UNUSED_RESULT function JSON_HEDLEY_WARN_UNUSED_RESULT (line 23863) | JSON_HEDLEY_WARN_UNUSED_RESULT function basic_json (line 23878) | static basic_json from_bson(const T* ptr, std::size_t len, function basic_json (line 23887) | static basic_json from_bson(detail::span_input_adapter&& i, function reference (line 23909) | reference operator[](const json_pointer& ptr) function reference (line 23916) | reference operator[](const ::nlohmann::json_pointer& ptr) function const_reference (line 23923) | const_reference operator[](const json_pointer& ptr) const function const_reference (line 23930) | const_reference operator[](const ::nlohmann::json_pointer... function reference (line 23937) | reference at(const json_pointer& ptr) function reference (line 23944) | reference at(const ::nlohmann::json_pointer& ptr) function const_reference (line 23951) | const_reference at(const json_pointer& ptr) const function const_reference (line 23958) | const_reference at(const ::nlohmann::json_pointer& ptr) c... function basic_json (line 23965) | basic_json flatten() const function basic_json (line 23974) | basic_json unflatten() const function patch_inplace (line 23990) | void patch_inplace(const basic_json& json_patch) function basic_json (line 24261) | basic_json patch(const basic_json& json_patch) const function JSON_HEDLEY_WARN_UNUSED_RESULT (line 24270) | JSON_HEDLEY_WARN_UNUSED_RESULT function merge_patch (line 24413) | void merge_patch(const basic_json& apply_patch) function NLOHMANN_BASIC_JSON_TPL_DECLARATION (line 24444) | NLOHMANN_BASIC_JSON_TPL_DECLARATION function NLOHMANN_JSON_NAMESPACE_END (line 24481) | NLOHMANN_JSON_NAMESPACE_END FILE: archive/third_party/nlohmann/json_fwd.hpp class json_pointer (line 156) | class json_pointer type ordered_map (line 167) | struct ordered_map FILE: kt-kernel/bench/bench_attention.py function bench_linear (line 41) | def bench_linear(cache_seqlen: int): FILE: kt-kernel/bench/bench_attention_torch.py function bench_linear (line 29) | def bench_linear(cache_seqlen: int, device): FILE: kt-kernel/bench/bench_bf16_moe.py function get_git_commit (line 44) | def get_git_commit(): function get_system_info (line 62) | def get_system_info(): function record_results (line 84) | def record_results(result, filename=json_path): function generate_bf16_weights (line 90) | def generate_bf16_weights(shape: tuple): function bench_bf16_moe (line 105) | def bench_bf16_moe(): FILE: kt-kernel/bench/bench_fp8_moe.py function get_git_commit (line 45) | def get_git_commit(): function get_system_info (line 63) | def get_system_info(): function record_results (line 85) | def record_results(result, filename=json_path): function generate_fp8_weights_direct (line 91) | def generate_fp8_weights_direct(shape: tuple, group_size: int = 128): function bench_fp8_moe (line 121) | def bench_fp8_moe(): FILE: kt-kernel/bench/bench_fp8_perchannel_moe.py function get_git_commit (line 44) | def get_git_commit(): function get_system_info (line 62) | def get_system_info(): function record_results (line 84) | def record_results(result, filename=json_path): function generate_fp8_perchannel_weights_direct (line 90) | def generate_fp8_perchannel_weights_direct(shape: tuple): function bench_fp8_perchannel_moe (line 115) | def bench_fp8_perchannel_moe(): FILE: kt-kernel/bench/bench_k2_moe_amx.py function get_git_commit (line 41) | def get_git_commit(): function get_system_info (line 63) | def get_system_info(): function record_results (line 116) | def record_results(result, filename=json_path): function pack_to_int32 (line 121) | def pack_to_int32(value: torch.Tensor, num_bits: int, packed_dim: int = ... function pack_tensor_per_row (line 154) | def pack_tensor_per_row(q: torch.Tensor, num_bits: int) -> torch.Tensor: function quantize_k2_tensor (line 161) | def quantize_k2_tensor(weights: torch.Tensor, group_size: int): function build_quantized_layer_weights (line 180) | def build_quantized_layer_weights(): function bench_k2_moe (line 211) | def bench_k2_moe(): FILE: kt-kernel/bench/bench_k2_write_buffer.py function get_git_commit (line 41) | def get_git_commit(): function get_system_info (line 63) | def get_system_info(): function record_results (line 116) | def record_results(result, filename=json_path): function allocate_weights (line 121) | def allocate_weights(): function build_moe (line 145) | def build_moe(layer_idx=0): function allocate_buffers (line 194) | def allocate_buffers(buffer_shapes): function bench_write_buffer (line 225) | def bench_write_buffer(): FILE: kt-kernel/bench/bench_linear.py function bench_linear (line 30) | def bench_linear(quant_mode: str): FILE: kt-kernel/bench/bench_linear_torch.py function bench_linear (line 26) | def bench_linear(quant_mode: str): FILE: kt-kernel/bench/bench_mla.py function get_git_commit (line 65) | def get_git_commit(): function get_system_info (line 92) | def get_system_info(): function record_results (line 145) | def record_results(result, filename=json_path): function bench_mla (line 153) | def bench_mla(quant_mode: str): FILE: kt-kernel/bench/bench_mlp.py function bench_mlp (line 30) | def bench_mlp(quant_mode: str): FILE: kt-kernel/bench/bench_mlp_torch.py function act_fn (line 26) | def act_fn(x): function mlp_torch (line 29) | def mlp_torch(input, gate_proj, up_proj, down_proj): function bench_mlp (line 47) | def bench_mlp(quant_mode: str): FILE: kt-kernel/bench/bench_moe.py function get_git_commit (line 41) | def get_git_commit(): function get_system_info (line 68) | def get_system_info(): function record_results (line 121) | def record_results(result, filename=json_path): function bench_moe (line 129) | def bench_moe(quant_mode: str): FILE: kt-kernel/bench/bench_moe_amx.py function get_git_commit (line 46) | def get_git_commit(): function get_system_info (line 73) | def get_system_info(): function record_results (line 135) | def record_results(result, filename=json_path): function bench_moe (line 143) | def bench_moe(quant_mode: str): FILE: kt-kernel/bench/bench_moe_amx_k.py function get_git_commit (line 47) | def get_git_commit(): function get_system_info (line 74) | def get_system_info(): function record_results (line 136) | def record_results(result, filename=json_path): function bench_moe (line 144) | def bench_moe(quant_mode: str): FILE: kt-kernel/bench/bench_moe_kernel.py function get_git_commit (line 53) | def get_git_commit(): function get_system_info (line 80) | def get_system_info(): function record_results (line 142) | def record_results(result, filename=json_path): function bench_moe (line 150) | def bench_moe(quant_mode: str): FILE: kt-kernel/bench/bench_moe_kernel_tiling.py function maybe_get_class (line 33) | def maybe_get_class(module, name): function main (line 37) | def main(): FILE: kt-kernel/bench/bench_moe_kml.py function get_git_commit (line 47) | def get_git_commit(): function get_system_info (line 74) | def get_system_info(): function record_results (line 136) | def record_results(result, filename=json_path): function bench_moe (line 144) | def bench_moe(quant_mode: str): FILE: kt-kernel/bench/bench_moe_torch.py function act_fn (line 28) | def act_fn(x): function mlp_torch (line 31) | def mlp_torch(input, gate_proj, up_proj, down_proj): function moe_torch (line 49) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj): function bench_moe (line 80) | def bench_moe(quant_mode: str): FILE: kt-kernel/bench/bench_write_buffer.py function get_git_commit (line 50) | def get_git_commit(): function get_system_info (line 66) | def get_system_info(): function record_results (line 93) | def record_results(result, filename=json_path): function div_up (line 98) | def div_up(a, b): function allocate_weights_fp8 (line 107) | def allocate_weights_fp8(): function allocate_weights_fp8_perchannel (line 152) | def allocate_weights_fp8_perchannel(): function build_moe_fp8 (line 195) | def build_moe_fp8(layer_idx=0): function build_moe_fp8_perchannel (line 226) | def build_moe_fp8_perchannel(layer_idx=0): function allocate_buffers_fp8 (line 258) | def allocate_buffers_fp8(buffer_shapes): function allocate_buffers_fp8_perchannel (line 292) | def allocate_buffers_fp8_perchannel(buffer_shapes): function allocate_weights_bf16 (line 331) | def allocate_weights_bf16(): function build_moe_bf16 (line 352) | def build_moe_bf16(layer_idx=0): function allocate_buffers_bf16 (line 379) | def allocate_buffers_bf16(buffer_shapes): function bench_write_buffer (line 413) | def bench_write_buffer(quant_mode: str): function main (line 525) | def main(quant_modes=None): FILE: kt-kernel/bench/compare_moe_performance.py class EnvironmentConfig (line 30) | class EnvironmentConfig: method apply (line 34) | def apply(self): function get_cpu_count (line 84) | def get_cpu_count() -> int: function get_physical_cpu_count (line 111) | def get_physical_cpu_count() -> int: class TestConfig (line 192) | class TestConfig: method __post_init__ (line 204) | def __post_init__(self): method total_configurations (line 213) | def total_configurations(self) -> int: function get_numa_count (line 216) | def get_numa_count() -> int: class SystemConfig (line 251) | class SystemConfig: method __post_init__ (line 255) | def __post_init__(self): class ThreadConfig (line 264) | class ThreadConfig: method from_thread_count (line 271) | def from_thread_count(cls, thread_count: int, numa_count: int, cpu_cor... function get_system_info (line 290) | def get_system_info() -> Dict[str, any]: class BenchmarkResult (line 343) | class BenchmarkResult: method to_dict (line 354) | def to_dict(self) -> Dict: class CheckpointState (line 358) | class CheckpointState: method to_dict (line 366) | def to_dict(self) -> Dict: method from_dict (line 376) | def from_dict(cls, data: Dict) -> 'CheckpointState': class CheckpointManager (line 387) | class CheckpointManager: method __init__ (line 389) | def __init__(self, checkpoint_dir: str = None): method _signal_handler (line 399) | def _signal_handler(self, signum, frame): method save_checkpoint (line 403) | def save_checkpoint(self, state: CheckpointState): method load_checkpoint (line 421) | def load_checkpoint(self) -> Optional[CheckpointState]: method clear_checkpoint (line 437) | def clear_checkpoint(self): function bench_ktransformers_moe (line 443) | def bench_ktransformers_moe(test_config: TestConfig, quant_mode: str, ql... function run_sgl_int4_with_numactl (line 619) | def run_sgl_int4_with_numactl(test_config: TestConfig, qlen: int, function run_sgl_with_numactl (line 874) | def run_sgl_with_numactl(test_config: TestConfig, qlen: int, function save_results (line 1063) | def save_results(results: List[BenchmarkResult], test_config: TestConfig... function print_summary_table (line 1089) | def print_summary_table(results: List[BenchmarkResult]): function main (line 1116) | def main(): FILE: kt-kernel/bench/multi_bench_moe.py function expand_param_dict (line 46) | def expand_param_dict(param_dict): function update_bench_parameters (line 77) | def update_bench_parameters(params): function main (line 96) | def main(): FILE: kt-kernel/bench/upload-bench-json.py function insert_jsonl_file (line 28) | def insert_jsonl_file(file_path): FILE: kt-kernel/cpu_backend/cpuinfer.h function class (line 34) | class CPUInfer { function submit (line 78) | void submit(std::pair params) { function submit_with_cuda_stream (line 85) | void submit_with_cuda_stream(intptr_t user_cuda_stream, std::pair None: method forward (line 423) | def forward(self, hidden_states): class DeepseekV3MoE (line 476) | class DeepseekV3MoE(nn.Module): method __init__ (line 481) | def __init__(self, config): method forward (line 523) | def forward(self, hidden_states): method moe_infer (line 536) | def moe_infer(self, x, topk_ids, topk_weight): function repeat_kv (line 613) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: class DeepseekV3Attention (line 628) | class DeepseekV3Attention(nn.Module): method __init__ (line 631) | def __init__(self, config: DeepseekV3Config, layer_idx: Optional[int] ... method _init_rope (line 698) | def _init_rope(self): method _shape (line 744) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): method forward (line 751) | def forward( class DeepseekV3FlashAttention2 (line 862) | class DeepseekV3FlashAttention2(DeepseekV3Attention): method __init__ (line 869) | def __init__(self, *args, **kwargs): method forward (line 877) | def forward( method _flash_attention_forward (line 1013) | def _flash_attention_forward( method _upad_input (line 1093) | def _upad_input( class DeepseekV3DecoderLayer (line 1145) | class DeepseekV3DecoderLayer(nn.Module): method __init__ (line 1146) | def __init__(self, config: DeepseekV3Config, layer_idx: int): method forward (line 1170) | def forward( class DeepseekV3PreTrainedModel (line 1256) | class DeepseekV3PreTrainedModel(PreTrainedModel): method _init_weights (line 1265) | def _init_weights(self, module): class DeepseekV3Model (line 1351) | class DeepseekV3Model(DeepseekV3PreTrainedModel): method __init__ (line 1359) | def __init__(self, config: DeepseekV3Config): method get_input_embeddings (line 1380) | def get_input_embeddings(self): method set_input_embeddings (line 1383) | def set_input_embeddings(self, value): method forward (line 1387) | def forward( method _update_causal_mask (line 1521) | def _update_causal_mask( class DeepseekV3ForCausalLM (line 1601) | class DeepseekV3ForCausalLM(DeepseekV3PreTrainedModel): method __init__ (line 1604) | def __init__(self, config): method get_input_embeddings (line 1613) | def get_input_embeddings(self): method set_input_embeddings (line 1616) | def set_input_embeddings(self, value): method get_output_embeddings (line 1619) | def get_output_embeddings(self): method set_output_embeddings (line 1622) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 1625) | def set_decoder(self, decoder): method get_decoder (line 1628) | def get_decoder(self): method forward (line 1635) | def forward( method prepare_inputs_for_generation (line 1731) | def prepare_inputs_for_generation( method _reorder_cache (line 1796) | def _reorder_cache(past_key_values, beam_idx): class DeepseekV3ForSequenceClassification (line 1823) | class DeepseekV3ForSequenceClassification(DeepseekV3PreTrainedModel): method __init__ (line 1824) | def __init__(self, config): method get_input_embeddings (line 1833) | def get_input_embeddings(self): method set_input_embeddings (line 1836) | def set_input_embeddings(self, value): method forward (line 1840) | def forward( FILE: kt-kernel/examples/repro_llamafile_re.py function getenv_int (line 43) | def getenv_int(name: str, default: int) -> int: function get_stream_for (line 50) | def get_stream_for(device: torch.device | str): function main (line 57) | def main() -> int: FILE: kt-kernel/examples/test_apply_rope.py function rotate_half (line 4) | def rotate_half(x): function apply_rotary_pos_emb (line 10) | def apply_rotary_pos_emb(q, cos, sin, position_ids=None, unsqueeze_dim=1): function my_apply (line 37) | def my_apply(q,cos,sin): FILE: kt-kernel/examples/test_awq_moe_amx.py function pack (line 17) | def pack(imatrix: torch.Tensor, direction: str = "row"): function act_fn (line 60) | def act_fn(x): function generate_original_weights (line 64) | def generate_original_weights(): function generate_awq_quantized_weights (line 94) | def generate_awq_quantized_weights(): function mlp_torch (line 203) | def mlp_torch(input, gate_proj, up_proj, down_proj, debug_expert_id=None... function moe_torch (line 225) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj,... function test_online_int4_kgroup_moe (line 290) | def test_online_int4_kgroup_moe(): function test_awq_moe (line 413) | def test_awq_moe(): function compare_quantization_methods (line 552) | def compare_quantization_methods(): FILE: kt-kernel/examples/test_bf16_moe.py function act_fn (line 41) | def act_fn(x): function mlp_torch (line 46) | def mlp_torch(input, gate_proj, up_proj, down_proj): function moe_torch (line 55) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj): function build_bf16_weights (line 89) | def build_bf16_weights(): function build_moes_from_bf16_data (line 131) | def build_moes_from_bf16_data(bf16_data: dict): function run_bf16_moe_test (line 159) | def run_bf16_moe_test(): FILE: kt-kernel/examples/test_deepseekv3.py function read_gguf_file (line 34) | def read_gguf_file(gguf_file_path): function read_gguf_directory (line 67) | def read_gguf_directory(directory): function find_weights (line 94) | def find_weights(name, weights): function get_torch_tensor_from_gguf (line 111) | def get_torch_tensor_from_gguf(gguf_weights, name): function get_torch_tensor_and_type_from_gguf (line 115) | def get_torch_tensor_and_type_from_gguf(gguf_weights, name): function type_to_ggml_type (line 119) | def type_to_ggml_type(type): function build_mla (line 130) | def build_mla(layer_idx, json_config, gguf_weights): function build_ffn (line 210) | def build_ffn(layer_idx, json_config, gguf_weights): function build_moegate (line 280) | def build_moegate(layer_idx, json_config, gguf_weights): function build_llm (line 306) | def build_llm(json_config, gguf_weights): function start_chat (line 395) | def start_chat(content=None): FILE: kt-kernel/examples/test_deepseekv3_prefill.py function read_gguf_file (line 32) | def read_gguf_file(gguf_file_path): function read_gguf_directory (line 65) | def read_gguf_directory(directory): function find_weights (line 92) | def find_weights(name, weights): function get_torch_tensor_from_gguf (line 109) | def get_torch_tensor_from_gguf(gguf_weights, name): function get_torch_tensor_and_type_from_gguf (line 113) | def get_torch_tensor_and_type_from_gguf(gguf_weights, name): function type_to_ggml_type (line 117) | def type_to_ggml_type(type): function build_mla (line 128) | def build_mla(layer_idx, json_config, gguf_weights): function build_ffn (line 208) | def build_ffn(layer_idx, json_config, gguf_weights): function build_moegate (line 278) | def build_moegate(layer_idx, json_config, gguf_weights): function build_llm (line 304) | def build_llm(json_config, gguf_weights): function start_chat (line 388) | def start_chat(): FILE: kt-kernel/examples/test_deepseekv3_prefill_speed.py function read_gguf_file (line 39) | def read_gguf_file(gguf_file_path): function read_gguf_directory (line 72) | def read_gguf_directory(directory): function find_weights (line 99) | def find_weights(name, weights): function get_torch_tensor_from_gguf (line 116) | def get_torch_tensor_from_gguf(gguf_weights, name): function get_torch_tensor_and_type_from_gguf (line 120) | def get_torch_tensor_and_type_from_gguf(gguf_weights, name): function type_to_ggml_type (line 124) | def type_to_ggml_type(type): function build_mla (line 135) | def build_mla(layer_idx, json_config, gguf_weights): function build_ffn (line 215) | def build_ffn(layer_idx, json_config, gguf_weights): function build_moegate (line 285) | def build_moegate(layer_idx, json_config, gguf_weights): function build_llm (line 311) | def build_llm(json_config, gguf_weights): function start_chat (line 401) | def start_chat(content=None): FILE: kt-kernel/examples/test_fp8_moe.py function act_fn (line 44) | def act_fn(x): function mlp_torch (line 49) | def mlp_torch(input, gate_proj, up_proj, down_proj): function moe_torch (line 58) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj): function fp8_e4m3_to_float (line 96) | def fp8_e4m3_to_float(fp8_val: int) -> float: function float_to_fp8_e4m3 (line 119) | def float_to_fp8_e4m3(val: float) -> int: function quantize_to_fp8_blockwise (line 162) | def quantize_to_fp8_blockwise(weights: torch.Tensor, group_size: int = 1... function dequantize_fp8_blockwise (line 235) | def dequantize_fp8_blockwise(fp8_weights: torch.Tensor, scales: torch.Te... function build_random_fp8_weights (line 272) | def build_random_fp8_weights(): function build_moes_from_fp8_data (line 341) | def build_moes_from_fp8_data(fp8_data: dict): function run_fp8_moe_test (line 372) | def run_fp8_moe_test(): FILE: kt-kernel/examples/test_fp8_perchannel_moe.py function act_fn (line 42) | def act_fn(x): function mlp_torch (line 47) | def mlp_torch(input, gate_proj, up_proj, down_proj): function moe_torch (line 56) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj): function fp8_e4m3_to_float (line 94) | def fp8_e4m3_to_float(fp8_val: int) -> float: function float_to_fp8_e4m3 (line 117) | def float_to_fp8_e4m3(val: float) -> int: function quantize_to_fp8_perchannel (line 160) | def quantize_to_fp8_perchannel(weights: torch.Tensor): function dequantize_fp8_perchannel (line 211) | def dequantize_fp8_perchannel(fp8_weights: torch.Tensor, scales: torch.T... function build_random_fp8_perchannel_weights (line 235) | def build_random_fp8_perchannel_weights(): function build_moes_from_fp8_perchannel_data (line 290) | def build_moes_from_fp8_perchannel_data(fp8_data: dict): function run_fp8_perchannel_moe_test (line 322) | def run_fp8_perchannel_moe_test(): FILE: kt-kernel/examples/test_gate.py function load_fp32_tensor (line 38) | def load_fp32_tensor(file_path, shape): class MoEGate (line 47) | class MoEGate(nn.Module): method __init__ (line 48) | def __init__(self, config): method reset_parameters (line 67) | def reset_parameters(self) -> None: method forward (line 72) | def forward(self, hidden_states): function torch_gate (line 159) | def torch_gate(hidden_states): function cpuinfer_gate (line 169) | def cpuinfer_gate(hidden_states): FILE: kt-kernel/examples/test_k2_moe_amx.py function _pattern_uniform (line 30) | def _pattern_uniform(groups: int) -> torch.Tensor: function _pattern_alternating (line 34) | def _pattern_alternating(groups: int) -> torch.Tensor: function _pattern_ramp (line 40) | def _pattern_ramp(groups: int) -> torch.Tensor: function act_fn (line 52) | def act_fn(x): function mlp_torch (line 56) | def mlp_torch(input, gate_proj, up_proj, down_proj): function moe_torch (line 68) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj): function pack_to_int32 (line 101) | def pack_to_int32(value: torch.Tensor, num_bits: int, packed_dim: Litera... function pack_tensor_per_row (line 136) | def pack_tensor_per_row(q: torch.Tensor, num_bits: int) -> torch.Tensor: function quantize_k2_tensor (line 143) | def quantize_k2_tensor(weights: torch.Tensor, group_size: int): function build_structured_tensor (line 171) | def build_structured_tensor(shape: torch.Size, pattern: str) -> torch.Te... function prepare_k2_quantized_weights (line 191) | def prepare_k2_quantized_weights(pattern: str) -> Dict[str, torch.Tensor]: function build_moes_from_quantized_data (line 218) | def build_moes_from_quantized_data(quant_data: Dict[str, torch.Tensor]): function run_case (line 246) | def run_case(pattern: str) -> Dict[str, float]: function run_k2_moe_test (line 306) | def run_k2_moe_test(): FILE: kt-kernel/examples/test_k2_write_buffer.py function make_cpu_infer (line 13) | def make_cpu_infer(thread_num=80): function build_config (line 17) | def build_config(cpuinfer, expert_num, num_experts_per_tok, hidden_size,... function allocate_weights (line 27) | def allocate_weights(expert_num, hidden_size, intermediate_size, group_s... function test_with_tp (line 52) | def test_with_tp(gpu_tp_count): function main (line 312) | def main(): FILE: kt-kernel/examples/test_mla.py function read_gguf_file (line 20) | def read_gguf_file(gguf_file_path): function get_torch_tensor_from_gguf (line 53) | def get_torch_tensor_from_gguf(gguf_weights, name): function get_torch_tensor_and_type_from_gguf (line 57) | def get_torch_tensor_and_type_from_gguf(gguf_weights, name): function type_to_ggml_type (line 61) | def type_to_ggml_type(type): function read_gguf_directory (line 141) | def read_gguf_directory(directory): function test_cpu_mla (line 214) | def test_cpu_mla(): function load_fp16_tensor (line 295) | def load_fp16_tensor(file_path, shape): function load_fp32_tensor (line 305) | def load_fp32_tensor(file_path, shape): function test_torch (line 314) | def test_torch(): FILE: kt-kernel/examples/test_mla_qlen.py function read_gguf_file (line 20) | def read_gguf_file(gguf_file_path): function get_torch_tensor_from_gguf (line 53) | def get_torch_tensor_from_gguf(gguf_weights, name): function get_torch_tensor_and_type_from_gguf (line 57) | def get_torch_tensor_and_type_from_gguf(gguf_weights, name): function type_to_ggml_type (line 61) | def type_to_ggml_type(type): function read_gguf_directory (line 141) | def read_gguf_directory(directory): function build_mla (line 214) | def build_mla(): function load_fp32_tensor (line 289) | def load_fp32_tensor(file_path, shape): FILE: kt-kernel/examples/test_mla_quant.py function load_fp32_tensor_raw (line 20) | def load_fp32_tensor_raw(file_path): function load_fp16_tensor (line 28) | def load_fp16_tensor(file_path, shape=None): function load_fp32_tensor (line 39) | def load_fp32_tensor(file_path, shape): function test_torch (line 48) | def test_torch(): FILE: kt-kernel/examples/test_mla_simple.py function torch_attn (line 136) | def torch_attn(hidden_states: torch.Tensor, function torch_attn_for_test (line 284) | def torch_attn_for_test(hidden_states,kv_cache,): function test_mla_simple (line 287) | def test_mla_simple(): FILE: kt-kernel/examples/test_mla_torch.py function torch_attn (line 178) | def torch_attn( FILE: kt-kernel/examples/test_mlp.py function act_fn (line 33) | def act_fn(x): function mlp_torch (line 37) | def mlp_torch(input, gate_proj, up_proj, down_proj): FILE: kt-kernel/examples/test_moe.py function act_fn (line 44) | def act_fn(x): function mlp_torch (line 48) | def mlp_torch(input, gate_proj, up_proj, down_proj): function moe_torch (line 56) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj): function to_cpuinfer_tensor (line 88) | def to_cpuinfer_tensor(tensor, type): function from_cpuinfer_tensor (line 93) | def from_cpuinfer_tensor(tensor, size, type): FILE: kt-kernel/examples/test_moe_amx.py function act_fn (line 25) | def act_fn(x): function mlp_torch (line 29) | def mlp_torch(input, gate_proj, up_proj, down_proj, debug_expert_id=None... function moe_torch (line 51) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj,... function test_moe (line 101) | def test_moe(quant_mode: str): FILE: kt-kernel/examples/test_moe_kernel.py function act_fn (line 39) | def act_fn(x): function mlp_torch (line 43) | def mlp_torch(input, gate_proj, up_proj, down_proj): function moe_torch (line 51) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj): function test_moe (line 83) | def test_moe(quant_mode: str): FILE: kt-kernel/examples/test_moe_kml.py function act_fn (line 33) | def act_fn(x): function mlp_torch (line 37) | def mlp_torch(input, gate_proj, up_proj, down_proj): function moe_torch (line 45) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj): function test_moe (line 77) | def test_moe(quant_mode: str): FILE: kt-kernel/examples/test_rope.cpp function create_random_vector (line 8) | std::vector create_random_vector(size_t total_size, std::vector& vec, const char* fil... function cpp_torch_rope_with_apply_single (line 38) | std::pair, std::vector> cpp_torch_rope_with_ap... function main (line 69) | int main() { FILE: kt-kernel/examples/test_rope.py function load_fp16_tensor (line 35) | def load_fp16_tensor(file_path, shape): function load_fp32_tensor (line 42) | def load_fp32_tensor(file_path, shape): function torch_rope (line 62) | def torch_rope(q, k): FILE: kt-kernel/examples/test_softmax.py function load_fp16_tensor (line 6) | def load_fp16_tensor(file_path, shape): FILE: kt-kernel/examples/test_write_buffer.py function make_cpu_infer (line 28) | def make_cpu_infer(thread_num=80): function div_up (line 32) | def div_up(a, b): function build_config_fp8 (line 36) | def build_config_fp8(cpuinfer, expert_num, num_experts_per_tok, hidden_s... function build_config_fp8_perchannel (line 46) | def build_config_fp8_perchannel(cpuinfer, expert_num, num_experts_per_to... function build_config_bf16 (line 57) | def build_config_bf16(cpuinfer, expert_num, num_experts_per_tok, hidden_... function allocate_weights_fp8 (line 64) | def allocate_weights_fp8(expert_num, hidden_size, intermediate_size, gro... function allocate_weights_fp8_perchannel (line 99) | def allocate_weights_fp8_perchannel(expert_num, hidden_size, intermediat... function allocate_weights_bf16 (line 126) | def allocate_weights_bf16(expert_num, hidden_size, intermediate_size): function test_fp8_write_buffer (line 145) | def test_fp8_write_buffer(gpu_tp_count): function test_fp8_perchannel_write_buffer (line 355) | def test_fp8_perchannel_write_buffer(gpu_tp_count): function test_bf16_write_buffer (line 544) | def test_bf16_write_buffer(gpu_tp_count): function test_with_tp (line 706) | def test_with_tp(quant_mode: str, gpu_tp_count: int): function main (line 718) | def main(quant_modes=None): FILE: kt-kernel/examples/torch_attention.py class KDeepSeekV3Cache (line 13) | class KDeepSeekV3Cache(nn.Module): method __init__ (line 14) | def __init__( method update (line 33) | def update( method get_page_table (line 69) | def get_page_table(self, cache_position: torch.Tensor, q_indptr: torch... function rotate_half (line 90) | def rotate_half(x): function apply_rotary_pos_emb (line 96) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di... class DeepseekV2RMSNorm (line 127) | class DeepseekV2RMSNorm(nn.Module): method __init__ (line 128) | def __init__(self, hidden_size, eps=1e-6): method forward (line 136) | def forward(self, hidden_states): class DeepseekV2RotaryEmbedding (line 144) | class DeepseekV2RotaryEmbedding(nn.Module): method __init__ (line 145) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi... method forward (line 157) | def forward(self, x, position_ids): class DeepseekV3RotaryEmbedding (line 172) | class DeepseekV3RotaryEmbedding(nn.Module): method __init__ (line 173) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi... method _set_cos_sin_cache (line 192) | def _set_cos_sin_cache(self, seq_len, device, dtype): method forward (line 205) | def forward(self, x, seq_len=None): function yarn_find_correction_dim (line 216) | def yarn_find_correction_dim( function yarn_find_correction_range (line 225) | def yarn_find_correction_range( function yarn_linear_ramp_mask (line 236) | def yarn_linear_ramp_mask(min, max, dim): function yarn_get_mscale (line 244) | def yarn_get_mscale(scale=1, mscale=1): class DeepseekV3YarnRotaryEmbedding (line 249) | class DeepseekV3YarnRotaryEmbedding(DeepseekV3RotaryEmbedding): method __init__ (line 251) | def __init__( method _set_cos_sin_cache (line 272) | def _set_cos_sin_cache(self, seq_len, device, dtype): FILE: kt-kernel/ext_bindings.cpp function to_float_ptr (line 64) | py::object to_float_ptr(uintptr_t input_ptr, int size, ggml_type type) { function from_float_ptr (line 88) | py::object from_float_ptr(uintptr_t input_ptr, int size, ggml_type type) { function void_ptr_nested_to_uint (line 118) | std::vector> void_ptr_nested_to_uint(const std::v... function uint_to_void_ptr_nested (line 131) | std::vector> uint_to_void_ptr_nested(const std::vector cpuinfer_interface(std::shared_... class LoadWeightsBindings (line 173) | class LoadWeightsBindings { type Args (line 175) | struct Args { method inner (line 179) | static void inner(void* args) { method cpuinfer_interface (line 183) | static std::pair cpuinfer_interface(std::shared_... method cpuinfer_interface (line 194) | static std::pair cpuinfer_interface(std::shared_... class ForwardBindings (line 198) | class ForwardBindings { type Args (line 200) | struct Args { method inner (line 211) | static void inner(void* args) { method cpuinfer_interface (line 216) | static std::pair cpuinfer_interface(std::shared_... method cpuinfer_interface (line 222) | static std::pair cpuinfer_interface(std::shared_... function bind_moe_module (line 231) | void bind_moe_module(py::module_& moe_module, const char* name) { function PYBIND11_MODULE (line 302) | PYBIND11_MODULE(kt_kernel_ext, m) { FILE: kt-kernel/operators/amx/awq-moe.hpp class AMX_AWQ_MOE_TP (line 30) | class AMX_AWQ_MOE_TP : public AMX_MOE_BASE> { method write_weights (line 51) | inline void write_weights(std::filesystem::path prefix, std::string ma... method write_weights (line 73) | inline void write_weights(std::filesystem::path prefix, std::string ma... method read_weights (line 127) | inline void read_weights(std::filesystem::path prefix, std::string mat... method read_weights (line 151) | inline bool read_weights(std::filesystem::path prefix, std::string mat... method read_awq_weights (line 210) | inline void read_awq_weights(std::filesystem::path prefix, std::string... method load_check (line 253) | inline void load_check() { method verify_load_right (line 258) | void verify_load_right() { method dump_buffer_b (line 290) | inline void dump_buffer_b(const std::string& quantization_type, int ex... method convert_zeros_to_mins_avx (line 373) | inline void convert_zeros_to_mins_avx(const uint32_t* zeros_int4_packe... method AMX_AWQ_MOE_TP (line 392) | AMX_AWQ_MOE_TP() = default; method AMX_AWQ_MOE_TP (line 394) | AMX_AWQ_MOE_TP(GeneralMOEConfig config, int tp_part_idx_ = 0) : Base(c... method derived_init (line 396) | void derived_init() { method buffer_a_required_size_impl (line 428) | size_t buffer_a_required_size_impl(size_t m, size_t k) const { method buffer_b_required_size_impl (line 431) | size_t buffer_b_required_size_impl(size_t n, size_t k) const { method buffer_c_required_size_impl (line 434) | size_t buffer_c_required_size_impl(size_t m, size_t n) const { return ... method make_buffer_a_impl (line 436) | std::shared_ptr make_buffer_a_impl(size_t m, size... method make_buffer_b_impl (line 439) | std::shared_ptr make_buffer_b_impl(size_t n, size... method make_buffer_c_impl (line 442) | std::shared_ptr make_buffer_c_impl(size_t m, size... method do_gate_up_gemm (line 450) | void do_gate_up_gemm(bool do_up, int expert_idx, int ith, int nth, int... method do_down_gemm (line 465) | void do_down_gemm(int expert_idx, int ith, int nth, int qlen) { method load_weights (line 486) | void load_weights() { class TP_MOE> (line 649) | class TP_MOE> : public TP_MOE> { method AMX_BF16_MOE_TP (line 49) | AMX_BF16_MOE_TP() = default; method AMX_BF16_MOE_TP (line 51) | AMX_BF16_MOE_TP(GeneralMOEConfig config, int tp_part_idx_ = 0) : Base(... method derived_init (line 55) | void derived_init() { method buffer_a_required_size_impl (line 66) | size_t buffer_a_required_size_impl(size_t m, size_t k) const { return ... method buffer_b_required_size_impl (line 68) | size_t buffer_b_required_size_impl(size_t n, size_t k) const { method buffer_c_required_size_impl (line 72) | size_t buffer_c_required_size_impl(size_t m, size_t n) const { return ... method make_buffer_a_impl (line 74) | std::shared_ptr make_buffer_a_impl(size_t m, size... method make_buffer_b_impl (line 78) | std::shared_ptr make_buffer_b_impl(size_t n, size... method make_buffer_c_impl (line 82) | std::shared_ptr make_buffer_c_impl(size_t m, size... method do_gate_up_gemm (line 90) | void do_gate_up_gemm(bool do_up, int expert_idx, int ith, int nth, int... method do_down_gemm (line 104) | void do_down_gemm(int expert_idx, int ith, int nth, int qlen) { method dump_buffer_b (line 118) | inline void dump_buffer_b(int expert_idx, const std::string& matrix_ty... method load_weights (line 159) | void load_weights() { method fast_memcpy_64 (line 211) | static inline void fast_memcpy_64(void* __restrict dst, const void* __... method fast_stream_64 (line 217) | static inline void fast_stream_64(void* __restrict dst, const void* __... method fast_memcpy (line 223) | static inline void fast_memcpy(void* __restrict dst, const void* __res... method unpack_nk_block_bf16 (line 248) | static inline void unpack_nk_block_bf16(const ggml_bf16_t* src, ggml_b... method write_weights_to_buffer (line 301) | void write_weights_to_buffer(int gpu_tp_count, [[maybe_unused]] int cp... class TP_MOE> (line 432) | class TP_MOE> : public TP_MOE> { method AMX_FP8_MOE_TP (line 47) | AMX_FP8_MOE_TP() = default; method AMX_FP8_MOE_TP (line 49) | AMX_FP8_MOE_TP(GeneralMOEConfig config, int tp_part_idx_ = 0) : Base(c... method derived_init (line 53) | void derived_init() { method buffer_a_required_size_impl (line 66) | size_t buffer_a_required_size_impl(size_t m, size_t k) const { return ... method buffer_b_required_size_impl (line 67) | size_t buffer_b_required_size_impl(size_t n, size_t k) const { method buffer_c_required_size_impl (line 70) | size_t buffer_c_required_size_impl(size_t m, size_t n) const { return ... method make_buffer_a_impl (line 72) | std::shared_ptr make_buffer_a_impl(size_t m, size... method make_buffer_b_impl (line 75) | std::shared_ptr make_buffer_b_impl(size_t n, size... method make_buffer_c_impl (line 78) | std::shared_ptr make_buffer_c_impl(size_t m, size... method do_gate_up_gemm (line 86) | void do_gate_up_gemm(bool do_up, int expert_idx, int ith, int nth, int... method do_down_gemm (line 95) | void do_down_gemm(int expert_idx, int ith, int nth, int qlen) { method dump_buffer_b (line 105) | inline void dump_buffer_b(const std::string& quantization_type, int ex... method load_weights (line 173) | void load_weights() { method fast_memcpy_64 (line 228) | static inline void fast_memcpy_64(void* __restrict dst, const void* __... method fast_memcpy (line 234) | static inline void fast_memcpy(void* __restrict dst, const void* __res... method unpack_nk_block (line 259) | static inline void unpack_nk_block(const uint8_t* src, uint8_t* dst, s... method unpack_4nk_blocks (line 325) | static inline void unpack_4nk_blocks(const uint8_t* src[4], uint8_t* d... method write_weights_to_buffer (line 385) | void write_weights_to_buffer(int gpu_tp_count, [[maybe_unused]] int cp... class TP_MOE> (line 629) | class TP_MOE> : public TP_MOE make_buffer_a_impl(size_t m, size... method make_buffer_b_impl (line 76) | std::shared_ptr make_buffer_b_impl(size_t n, size... method make_buffer_c_impl (line 80) | std::shared_ptr make_buffer_c_impl(size_t m, size... method do_gate_up_gemm (line 88) | void do_gate_up_gemm(bool do_up, int expert_idx, int ith, int nth, int... method do_down_gemm (line 99) | void do_down_gemm(int expert_idx, int ith, int nth, int qlen) { method fast_memcpy_64 (line 107) | static inline void fast_memcpy_64(void* __restrict dst, const void* __... method fast_memcpy (line 113) | static inline void fast_memcpy(void* __restrict dst, const void* __res... method unpack_nk_block (line 138) | static inline void unpack_nk_block(const uint8_t* src, uint8_t* dst, s... method unpack_4nk_blocks (line 204) | static inline void unpack_4nk_blocks(const uint8_t* src[4], uint8_t* d... method write_weights_to_buffer (line 259) | void write_weights_to_buffer(int gpu_tp_count, [[maybe_unused]] int cp... method load_weights (line 504) | void load_weights() { class TP_MOE> (line 559) | class TP_MOE> : public TP_MOE> { method AMX_K2_MOE_TP (line 46) | AMX_K2_MOE_TP() = default; method AMX_K2_MOE_TP (line 48) | AMX_K2_MOE_TP(GeneralMOEConfig config, int tp_part_idx_ = 0) : Base(co... method derived_init (line 50) | void derived_init() { method buffer_a_required_size_impl (line 64) | size_t buffer_a_required_size_impl(size_t m, size_t k) const { method buffer_b_required_size_impl (line 67) | size_t buffer_b_required_size_impl(size_t n, size_t k) const { method buffer_c_required_size_impl (line 70) | size_t buffer_c_required_size_impl(size_t m, size_t n) const { return ... method make_buffer_a_impl (line 72) | std::shared_ptr make_buffer_a_impl(size_t m, size... method make_buffer_b_impl (line 75) | std::shared_ptr make_buffer_b_impl(size_t n, size... method make_buffer_c_impl (line 78) | std::shared_ptr make_buffer_c_impl(size_t m, size... method do_gate_up_gemm (line 86) | void do_gate_up_gemm(bool do_up, int expert_idx, int ith, int nth, int... method do_down_gemm (line 101) | void do_down_gemm(int expert_idx, int ith, int nth, int qlen) { method load_weights (line 120) | void load_weights() { method fast_memcpy (line 189) | static inline void fast_memcpy(void* __restrict dst, const void* __res... method fast_fp32_to_bf16 (line 210) | static inline void fast_fp32_to_bf16(ggml_bf16_t* __restrict dst, cons... method write_weights_to_buffer (line 244) | void write_weights_to_buffer(int gpu_tp_count, int cpu_tp_count, int e... class TP_MOE> (line 455) | class TP_MOE> : public TP_MOE { method BufferASmallKGroupImpl (line 459) | BufferASmallKGroupImpl(int max_m, int k, int k_group_size, void* ptr... method from_mat (line 462) | void from_mat(int m, ggml_bf16_t* src, int ith, int nth) { type BufferBInt4Impl (line 514) | struct BufferBInt4Impl { method required_size (line 529) | static size_t required_size(int n, int k) { return sizeof(int8_t) * ... method BufferBInt4Impl (line 531) | BufferBInt4Impl(int n, int k, void* ptr) : n(n), k(k) { method __m128i (line 543) | static __m128i round_4bit_s8(__m128i x) { method from_mat (line 557) | void from_mat(ggml_bf16_t* src, int ith, int nth) { method dt (line 647) | dt* get_submat(int n, int k, int n_begin, int k_begin) { type BufferBKGroupImpl (line 662) | struct BufferBKGroupImpl { method required_size (line 677) | static size_t required_size(int n, int k, int k_group_size) { method BufferBKGroupImpl (line 682) | BufferBKGroupImpl(int n, int k, int k_group_size, void* ptr) : n(n),... method __m128i (line 697) | static __m128i round_4bit_s8(__m128i x) { method from_mat (line 711) | void from_mat(ggml_bf16_t* src, int ith, int nth) { method dt (line 817) | dt* get_submat(int n, int k, int n_begin, int k_begin) { type BufferBInt4WithZeroImpl (line 836) | struct BufferBInt4WithZeroImpl { method required_size (line 851) | static size_t required_size(int n, int k) { return sizeof(int8_t) * ... method BufferBInt4WithZeroImpl (line 853) | BufferBInt4WithZeroImpl(int n, int k, void* ptr) : n(n), k(k) { method __m128i (line 867) | static __m128i round_4bit_u8(__m128i x) { method from_mat (line 875) | void from_mat(ggml_bf16_t* src, int ith, int nth) { method dt (line 970) | dt* get_submat(int n, int k, int n_begin, int k_begin) { type BufferBInt4KGroupImpl (line 988) | struct BufferBInt4KGroupImpl { method required_size (line 999) | static size_t required_size(int n, int k, int k_group_size) { method BufferBInt4KGroupImpl (line 1003) | BufferBInt4KGroupImpl(int n, int k, int k_group_size, void* ptr) : n... method from_raw_mat (line 1020) | void from_raw_mat(uint8_t* proj, int ith, int nth) { method dt (line 1033) | dt* get_submat(int n, int k, int n_begin, int k_begin) { method split_range_n (line 1047) | static std::pair split_range_n(int n, int ith, int nth) { type BufferBInt4WithZeroKGroupImpl (line 1057) | struct BufferBInt4WithZeroKGroupImpl { method required_size (line 1072) | static size_t required_size(int n, int k, int k_group_size) { method BufferBInt4WithZeroKGroupImpl (line 1076) | BufferBInt4WithZeroKGroupImpl(int n, int k, int k_group_size, void* ... method __m128i (line 1091) | static __m128i round_4bit_u8(__m128i x) { method from_raw_mat (line 1099) | void from_raw_mat(uint8_t* proj, int ith, int nth) { method from_mat (line 1135) | void from_mat(ggml_bf16_t* src, int ith, int nth) { method dt (line 1243) | dt* get_submat(int n, int k, int n_begin, int k_begin) { type BufferBInt4WithZeroLowKGroupImpl (line 1265) | struct BufferBInt4WithZeroLowKGroupImpl { method required_size (line 1280) | static size_t required_size(int n, int k, int k_group_size) { method BufferBInt4WithZeroLowKGroupImpl (line 1284) | BufferBInt4WithZeroLowKGroupImpl(int n, int k, int k_group_size, voi... method __m128i (line 1299) | static __m128i round_4bit_u8(__m128i x) { method from_raw_mat (line 1307) | void from_raw_mat(uint8_t* proj, int ith, int nth) { method from_mat (line 1343) | void from_mat(ggml_bf16_t* src, int ith, int nth) { method dt (line 1460) | dt* get_submat(int n, int k, int n_begin, int k_begin) { type BufferCImpl (line 1482) | struct BufferCImpl { method required_size (line 1490) | static size_t required_size(int max_m, int n) { return sizeof(float)... method BufferCImpl (line 1492) | BufferCImpl(int max_m, int n, void* ptr) : max_m(max_m), n(n) { method set_data (line 1502) | void set_data(void* ptr) { method to_mat (line 1507) | void to_mat(int m, ggml_bf16_t* dst, int ith, int nth) { type BufferCReduceImpl (line 1536) | struct BufferCReduceImpl { method required_size (line 1545) | static size_t required_size(int max_m, int n) { method BufferCReduceImpl (line 1550) | BufferCReduceImpl(int max_m, int n, void* ptr) : max_m(max_m), n(n) { method set_data (line 1560) | void set_data(void* ptr) { method to_mat (line 1567) | void to_mat(int m, ggml_bf16_t* dst, int ith, int nth) { method clear_int_buffer (line 1603) | void clear_int_buffer() { std::memset(int_c, 0, sizeof(int32_t) * ma... method convert_int_to_float (line 1606) | void convert_int_to_float(int m) { FILE: kt-kernel/operators/amx/la/amx_config.hpp type amx (line 33) | namespace amx { function enable_amx (line 59) | inline bool enable_amx() { type TileConfig (line 99) | struct alignas(64) TileConfig { method TileConfig (line 108) | TileConfig() { method set_row_col (line 116) | void set_row_col(int i, uint8_t row, uint16_t col) { method set_config (line 121) | void set_config() { _tile_loadconfig(this); } method load_data (line 123) | static void load_data(int to, void* from, size_t stride) { method store_data (line 154) | static void store_data(int from, void* to, size_t stride) { FILE: kt-kernel/operators/amx/la/amx_kernels.hpp type amx (line 18) | namespace amx { type dpb133 (line 55) | struct dpb133 { type GemmKernel133 (line 88) | struct GemmKernel133 { method recommended_nth (line 98) | static int recommended_nth(int m) { return (m + M_STEP - 1) / M_STEP; } method config (line 100) | static void config() { method run_full_tile (line 115) | static void run_full_tile(const TA* a, size_t lda, const TB* b, size... method run_full_tile_zero (line 136) | static void run_full_tile_zero(const TA* a, size_t lda, const TB* b,... method convert_full_tile_b_to_vnni_inplace (line 158) | static void convert_full_tile_b_to_vnni_inplace(void* b) { transpose... type ATile (line 161) | struct ATile { method partial_load (line 163) | void partial_load(TA* a, int m, int k, size_t lda) { method partial_load_quant (line 172) | void partial_load_quant(block_q4_0* a, int m, int k, size_t lda) { method partial_load_quant (line 182) | void partial_load_quant(block_q8_0* a, int m, int k, size_t lda) { method partial_load_quant (line 192) | void partial_load_quant(TA* a, int m, size_t lda) { method partial_load_quant (line 206) | void partial_load_quant(block_q4_K* a, int m, int inner_block_idx,... method partial_load_quant (line 221) | void partial_load_quant(blocks_aligned_q8_0_ref a, int m, int k, i... type BTile (line 231) | struct alignas(64) BTile { method partial_load (line 235) | void partial_load(TB* b, int n, int k, size_t ldb) { method partial_load_quant (line 244) | void partial_load_quant(block_q8_0* b, int n, int k, size_t ldb) { method partial_load_quant (line 258) | void partial_load_quant(blocks_aligned_q8_0_ref b, int n, int k, i... method load_from (line 272) | void load_from(TB* b, size_t ldb) { method run_full_ac (line 282) | void run_full_ac(TA* a, size_t lda, TC* c, size_t ldc) { type BTileSum (line 288) | struct alignas(64) BTileSum { method partial_load_quant (line 292) | void partial_load_quant(block_q8_K* b, int n, int inner_block_idx,... type CTile (line 310) | struct alignas(64) CTile { method partial_load (line 314) | void partial_load(TC* c, int m, int n, size_t ldc) { method partial_store (line 322) | void partial_store(TC* c, int m, int n, size_t ldc) { method to_fp32 (line 330) | void to_fp32() { type PartialTiles (line 340) | struct PartialTiles { method partial_run (line 344) | void partial_run(int m, int n, int k, TA* a, size_t lda, TB* b, si... method partial_run_quant (line 353) | void partial_run_quant(int m, int n, int k, QA* a, size_t lda, blo... method partial_run_quant_ac (line 371) | void partial_run_quant_ac(int m, int n, int k, QA* a, size_t lda, ... method partial_run_quant_ac (line 388) | void partial_run_quant_ac(int m, int n, int k, AQA a, int a_blck_s... type PartialTilesSum (line 406) | struct PartialTilesSum { method partial_run_quant_ac (line 411) | void partial_run_quant_ac(int m, int n, int inner_block_idx, block... type GemmKernel133BF (line 429) | struct GemmKernel133BF { method recommended_nth (line 441) | static int recommended_nth(int m) { return (m + M_STEP - 1) / M_STEP; } method config (line 442) | static void config() { method run_full_tile (line 460) | static void run_full_tile(const dt* a, size_t lda, const dt* b, size... type ATile (line 484) | struct ATile { method partial_load (line 487) | void partial_load(dt* a, int m, int k, size_t lda) { type BTile (line 497) | struct alignas(64) BTile { method full_load (line 500) | void full_load(dt* b, size_t ldb) { partial_load(b, TILE_N, TILE_K... method partial_load (line 502) | void partial_load(dt* b, int n, int k, size_t ldb) { method run_full_ac (line 512) | void run_full_ac(TA* a, size_t lda, TC* c, size_t ldc) { type CTile (line 517) | struct alignas(64) CTile { method partial_load (line 520) | void partial_load(float* c, int m, int n, size_t ldc) { method partial_store (line 529) | void partial_store(float* c, int m, int n, size_t ldc) { type PartialTiles (line 539) | struct PartialTiles { method partial_run (line 543) | void partial_run(int m, int n, int k, dt* a, size_t lda, dt* b, si... function T2 (line 554) | constexpr T2 convert_to(const T1& value) { type GemmKernel224BF (line 564) | struct GemmKernel224BF { method name (line 579) | static std::string name() { return "BF16"; } method recommended_nth (line 581) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO... method split_range_n (line 583) | static std::pair split_range_n(int n, int ith, int nth) { method config (line 589) | static void config() { method load_a (line 607) | static void load_a(dt* a, size_t lda) { method load_b (line 617) | static void load_b(dt* b, size_t ldb) { method clean_c (line 627) | static void clean_c() { method load_c (line 636) | static void load_c(output_t* c, size_t ldc) { method store_c (line 648) | static void store_c(output_t* c, size_t ldc) { method run_tile (line 660) | static void run_tile() { type BufferA (line 669) | struct BufferA { method required_size (line 673) | static size_t required_size(int max_m, int k) { return sizeof(ggml... method BufferA (line 675) | BufferA(int max_m, int k, void* ptr) : max_m(max_m), k(k) { method set_data (line 682) | void set_data(void* new_ptr) { a = reinterpret_cast(... method from_mat (line 684) | void from_mat(int m, ggml_bf16_t* src, int ith, int nth) { method ggml_bf16_t (line 703) | ggml_bf16_t* get_submat(int m, int k, int m_begin, int k_begin) { type BufferB (line 712) | struct BufferB { method required_size (line 717) | static size_t required_size(int n, int k) { return sizeof(ggml_bf1... method BufferB (line 719) | BufferB(int n, int k, void* ptr) : n(n), k(k) { method set_data (line 726) | void set_data(void* new_ptr) { b = reinterpret_cast(... method from_mat (line 728) | void from_mat(ggml_bf16_t* src, int ith, int nth) { method ggml_bf16_t (line 751) | ggml_bf16_t* get_submat(int n, int k, int n_begin, int k_begin) { type BufferC (line 762) | struct BufferC { method required_size (line 776) | static size_t required_size(int max_m, int n) { return sizeof(floa... method BufferC (line 778) | BufferC(int max_m, int n, void* ptr) : max_m(max_m), n(n) { method set_data (line 785) | void set_data(void* new_ptr) { c = reinterpret_cast(new_pt... method to_mat (line 787) | void to_mat(int m, ggml_bf16_t* dst, int ith, int nth) { type GemmKernel224Int8 (line 816) | struct GemmKernel224Int8 { method name (line 833) | static std::string name() { return "INT8"; } method recommended_nth (line 835) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO... method split_range_n (line 837) | static std::pair split_range_n(int n, int ith, int nth) { method config (line 843) | static void config() { method load_a (line 861) | static void load_a(dt* a, size_t lda) { method load_b (line 871) | static void load_b(dt* b, size_t ldb) { method clean_c (line 881) | static void clean_c() { method load_c (line 890) | static void load_c(output_t* c, size_t ldc) { method store_c (line 902) | static void store_c(output_t* c, size_t ldc) { method run_tile (line 914) | static void run_tile() { type BufferB (line 926) | struct BufferB { method required_size (line 932) | static size_t required_size(int n, int k) { return sizeof(int8_t) ... method BufferB (line 934) | BufferB(int n, int k, void* ptr) : n(n), k(k) { method from_mat (line 946) | void from_mat(ggml_bf16_t* src, int ith, int nth) { // CHECK: nth... method amx_kernel (line 1010) | static void amx_kernel(int m, int n, int k, int m_begin, int n_begin... method avx_kernel (line 1025) | static void avx_kernel(int m, int n, int k, int m_begin, int n_begin... method apply_scale (line 1053) | static void apply_scale(int m, int n, int m_begin, int n_begin, floa... type GemmKernel224Int4 (line 1073) | struct GemmKernel224Int4 { method name (line 1093) | static std::string name() { return "INT4"; } method recommended_nth (line 1095) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO... method split_range_n (line 1097) | static std::pair split_range_n(int n, int ith, int nth) { method config (line 1103) | static void config() { method __m512i (line 1140) | static __m512i hi_mask() { return *((__m512i*)(&hi_mask_arr[0])); } method __m128i (line 1141) | static __m128i hi_mask_128() { return *((__m128i*)(&hi_mask_arr[0])); } method __m512i (line 1142) | static __m512i lo_mask() { return *((__m512i*)(&lo_mask_arr[0])); } method __m128i (line 1143) | static __m128i lo_mask_128() { return *((__m128i*)(&lo_mask_arr[0])); } method __m128i (line 1144) | static __m128i si_mask_128() { return *((__m128i*)(&sign_mask_arr[0]... method load_b_hi (line 1146) | static void load_b_hi(dt* b, size_t ldb) { method load_b_lo (line 1169) | static void load_b_lo(dt* b, size_t ldb) { method load_a (line 1193) | static void load_a(dt* a, size_t lda) { method clean_c (line 1203) | static void clean_c() { method load_c (line 1212) | static void load_c(output_t* c, size_t ldc) { method store_c (line 1224) | static void store_c(output_t* c, size_t ldc) { method run_tile (line 1236) | static void run_tile() { method avx_kernel (line 1249) | static void avx_kernel(int m, int n, int k, int m_begin, int n_begin... method amx_kernel (line 1279) | static void amx_kernel(int m, int n, int k, int m_begin, int n_begin... method apply_scale (line 1322) | static void apply_scale(int m, int n, int m_begin, int n_begin, floa... type GemmKernel224Int4_1 (line 1364) | struct GemmKernel224Int4_1 { method name (line 1381) | static std::string name() { return "INT4_1"; } method recommended_nth (line 1383) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO... method split_range_n (line 1385) | static std::pair split_range_n(int n, int ith, int nth) { method config (line 1391) | static void config() { method __m512i (line 1428) | static __m512i hi_mask() { return *((__m512i*)(&hi_mask_arr[0])); } method __m128i (line 1429) | static __m128i hi_mask_128() { return *((__m128i*)(&hi_mask_arr[0])); } method __m512i (line 1430) | static __m512i lo_mask() { return *((__m512i*)(&lo_mask_arr[0])); } method __m128i (line 1431) | static __m128i lo_mask_128() { return *((__m128i*)(&lo_mask_arr[0])); } method __m128i (line 1432) | static __m128i si_mask_128() { return *((__m128i*)(&sign_mask_arr[0]... method load_b_hi (line 1434) | static void load_b_hi(dt* b, size_t ldb) { method load_b_lo (line 1457) | static void load_b_lo(dt* b, size_t ldb) { method load_a (line 1481) | static void load_a(dt* a, size_t lda) { method clean_c (line 1496) | static void clean_c() { method load_c (line 1505) | static void load_c(output_t* c, size_t ldc) { method store_c (line 1517) | static void store_c(output_t* c, size_t ldc) { method run_tile (line 1529) | static void run_tile() { method avx_kernel (line 1544) | static void avx_kernel(int m, int n, int k, int m_begin, int n_begin... method amx_kernel (line 1573) | static void amx_kernel(int m, int n, int k, int m_begin, int n_begin... method apply_scale (line 1617) | static void apply_scale(int m, int n, int m_begin, int n_begin, floa... function mat_mul_single (line 1647) | inline void mat_mul_single(int m, int n, int k, int8_t* a, size_t lda,... function mat_mul_single (line 1680) | inline void mat_mul_single(int m, int n, int k, ggml_bf16_t* a, size_t... function mat_mul_single (line 1724) | void mat_mul_single(int m, int n, int k, QA* a, size_t lda, block_q8_0... function mat_mul_single (line 1758) | inline void mat_mul_single(int m, int n, int k, block_q4_K* a, size_t ... function mat_mul_single (line 1800) | inline void mat_mul_single(int m, int n, int k, blocks_aligned_q8_0_re... function merge_mat (line 1830) | inline void merge_mat(int d0, int d1, float* a, float* b, size_t ld) { function merge_mats (line 1845) | inline void merge_mats(int d0, int d1, int cnt, float** data, size_t l... type GemmKernel (line 1865) | struct GemmKernel { type GemmKernel (line 1871) | struct GemmKernel { type GemmKernel (line 1876) | struct GemmKernel { type GemmKernel (line 1881) | struct GemmKernel { type GemmKernel (line 1886) | struct GemmKernel { type GemmKernel (line 1891) | struct GemmKernel { type GemmKernel (line 1896) | struct GemmKernel { function mat_mul (line 1919) | void mat_mul(int m, int n, int k, TA* a, size_t lda, TB* b, size_t ldb... function mat_mul (line 1933) | inline void mat_mul(int m, int n, int k, std::shared_ptr split_range_n(int n, int ith, int nth) { method config (line 2097) | static void config() { method __m512i (line 2123) | static __m512i hi_mask() { return *((__m512i*)(&hi_mask_arr[0])); } method __m512i (line 2124) | static __m512i lo_mask() { return *((__m512i*)(&lo_mask_arr[0])); } method clean_c (line 2126) | static void clean_c() { method load_c (line 2133) | static void load_c(output_t* c, size_t ldc) { method store_c (line 2140) | static void store_c(output_t* c, size_t ldc) { method load_a (line 2147) | static void load_a(dt* a, size_t lda) { method load_b_lo (line 2152) | static void load_b_lo(dt* b, size_t ldb) { method load_b_hi (line 2174) | static void load_b_hi(dt* b, size_t ldb) { method run_tile (line 2191) | static void run_tile() { method avx_kernel (line 2205) | static void avx_kernel(int m, int n, int k, int m_begin, int n_begin... method amx_kernel (line 2247) | static void amx_kernel(int m, int n, int k, int m_begin, int n_begin... method apply_scale_kgroup (line 2275) | static void apply_scale_kgroup(int m, int n, int m_begin, int n_begi... type GemmKernel224Int4_1KGroup (line 2304) | struct GemmKernel224Int4_1KGroup { method name (line 2321) | static std::string name() { return "INT4_1K"; } method recommended_nth (line 2323) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO... method split_range_n (line 2325) | static std::pair split_range_n(int n, int ith, int nth) { method config (line 2331) | static void config() { method __m512i (line 2368) | static __m512i hi_mask() { return *((__m512i*)(&hi_mask_arr[0])); } method __m128i (line 2369) | static __m128i hi_mask_128() { return *((__m128i*)(&hi_mask_arr[0])); } method __m512i (line 2370) | static __m512i lo_mask() { return *((__m512i*)(&lo_mask_arr[0])); } method __m128i (line 2371) | static __m128i lo_mask_128() { return *((__m128i*)(&lo_mask_arr[0])); } method __m128i (line 2372) | static __m128i si_mask_128() { return *((__m128i*)(&sign_mask_arr[0]... method load_b_hi (line 2374) | static void load_b_hi(dt* b, size_t ldb) { method load_b_lo (line 2397) | static void load_b_lo(dt* b, size_t ldb) { method load_a (line 2421) | static void load_a(dt* a, size_t lda) { method clean_c (line 2436) | static void clean_c() { method load_c (line 2445) | static void load_c(output_t* c, size_t ldc) { method store_c (line 2457) | static void store_c(output_t* c, size_t ldc) { method run_tile (line 2469) | static void run_tile() { method avx_kernel (line 2484) | static void avx_kernel(int m, int n, int k, int m_begin, int n_begin... method amx_kernel (line 2522) | static void amx_kernel(int m, int n, int k, int m_begin, int n_begin... method apply_scale_kgroup (line 2548) | static void apply_scale_kgroup(int m, int n, int m_begin, int n_begi... type GemmKernel224Int4_1_LowKGroup (line 2580) | struct GemmKernel224Int4_1_LowKGroup { method name (line 2597) | static std::string name() { return "INT4_1K"; } method recommended_nth (line 2599) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO... method split_range_n (line 2601) | static std::pair split_range_n(int n, int ith, int nth) { method config (line 2607) | static void config() { method __m512i (line 2644) | static __m512i hi_mask() { return *((__m512i*)(&hi_mask_arr[0])); } method __m128i (line 2645) | static __m128i hi_mask_128() { return *((__m128i*)(&hi_mask_arr[0])); } method __m512i (line 2646) | static __m512i lo_mask() { return *((__m512i*)(&lo_mask_arr[0])); } method __m128i (line 2647) | static __m128i lo_mask_128() { return *((__m128i*)(&lo_mask_arr[0])); } method __m128i (line 2648) | static __m128i si_mask_128() { return *((__m128i*)(&sign_mask_arr[0]... method load_b_hi (line 2650) | static void load_b_hi(dt* b, size_t ldb) { method load_b_lo (line 2674) | static void load_b_lo(dt* b, size_t ldb) { method load_a (line 2697) | static void load_a(dt* a, size_t lda) { method clean_c (line 2712) | static void clean_c() { method load_c (line 2721) | static void load_c(output_t* c, size_t ldc) { method store_c (line 2733) | static void store_c(output_t* c, size_t ldc) { method run_tile (line 2745) | static void run_tile() { method avx_kernel (line 2760) | static void avx_kernel(int m, int n, int k, int m_begin, int n_begin... method amx_kernel (line 2798) | static void amx_kernel(int m, int n, int k, int m_begin, int n_begin... method apply_scale_kgroup (line 2824) | static void apply_scale_kgroup(int m, int n, int m_begin, int n_begi... type GemmKernel224Int4SmallKGroup (line 2858) | struct GemmKernel224Int4SmallKGroup { method name (line 2872) | static std::string name() { return "K2_INT4_KGROUP"; } method recommended_nth (line 2873) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO... method split_range_n (line 2874) | static std::pair split_range_n(int n, int ith, int nth) { method config (line 2879) | static void config() {} method __m256i (line 2892) | static __m256i hi_mask() { return *((__m256i*)(&hi_mask_arr[0])); } method __m256i (line 2893) | static __m256i lo_mask() { return *((__m256i*)(&lo_mask_arr[0])); } method __m256i (line 2894) | static __m256i sign_xor_mask() { return *((__m256i*)(&sign_xor_arr[0... method __m512i (line 2901) | static inline __m512i compressed_int4_to_int8_avx512(__m256i b256) { method integer_mat_vec_kgroup (line 2912) | static inline void integer_mat_vec_kgroup(int m, int n, int k, int k... function vec_mul_kgroup (line 2946) | inline void vec_mul_kgroup(int m, int n, int k, int k_group_size, function mat_mul_kgroup (line 2953) | inline void mat_mul_kgroup(int m, int n, int k, int k_group_size, function integer_mat_mul_kgroup (line 2962) | void integer_mat_mul_kgroup(int m, int n, int k, int k_group_size, typ... function vec_mul_kgroup (line 3001) | inline void vec_mul_kgroup(int m, int n, int k, int k_group_size, std:... function mat_mul_kgroup (line 3007) | inline void mat_mul_kgroup(int m, int n, int k, int k_group_size, std:... function vec_mul_kgroup (line 3014) | inline void vec_mul_kgroup(int m, int n, int k, int k_group_size, function mat_mul_kgroup (line 3022) | inline void mat_mul_kgroup(int m, int n, int k, int k_group_size, function vec_mul_kgroup (line 3031) | inline void vec_mul_kgroup(int m, int n, int k, int k_group_size, function mat_mul_kgroup (line 3039) | inline void mat_mul_kgroup(int m, int n, int k, int k_group_size, FILE: kt-kernel/operators/amx/la/amx_quantization.hpp type amx (line 11) | namespace amx { type blocks_aligned_q4_0_ref (line 13) | struct blocks_aligned_q4_0_ref { method blocks_aligned_q4_0_ref (line 20) | blocks_aligned_q4_0_ref offset(size_t blck_cnt) const { method expected_data_size (line 27) | static size_t expected_data_size(int64_t k) { method blocks_aligned_q4_0_ref (line 34) | static blocks_aligned_q4_0_ref quantize(const float* RESTRICT x, voi... method dequantize (line 78) | void dequantize(float* y, int64_t k) { type blocks_aligned_q8_0_ref (line 98) | struct blocks_aligned_q8_0_ref { method blocks_aligned_q8_0_ref (line 105) | blocks_aligned_q8_0_ref offset(size_t blck_cnt) const { method expected_data_size (line 112) | static size_t expected_data_size(int64_t k) { method blocks_aligned_q8_0_ref (line 118) | static blocks_aligned_q8_0_ref quantize(const float* RESTRICT x, voi... method dequantize (line 148) | void dequantize(float* y, int64_t k) { type Dequantizer (line 168) | struct Dequantizer {} function __m256i (line 178) | inline __m256i dequant4x32(const uint8_t* qs) { function __m256i (line 183) | inline __m256i unaligned_copy8x32(const int8_t* qs) { return _mm256_lo... function __m512i (line 185) | inline __m512i copy8x64(const int8_t* qs) { return _mm512_load_si512((... function __m256i (line 187) | inline __m256i lo4bit(const uint8_t* qs) { function __m256i (line 190) | inline __m256i hi4bit(const uint8_t* qs) { function __m128i (line 194) | inline __m128i make_q4K_scale_and_min(const uint8_t* scales8) { function __m256i (line 210) | inline __m256i merge_q8K_bsum(block_q8_K* b) { function __m512i (line 214) | inline __m512i _mm512_dpbusd_epi32_compat(__m512i src, __m512i a, __m5... function __m512i (line 237) | inline __m512i _mm512_dpbssd_epi32(__m512i src, __m512i a, __m512i b) { FILE: kt-kernel/operators/amx/la/amx_raw_buffers.hpp type amx (line 34) | namespace amx { type BufferABF16Impl (line 50) | struct BufferABF16Impl { method required_size (line 57) | static size_t required_size(int max_m, int k) { return sizeof(ggml_b... method BufferABF16Impl (line 59) | BufferABF16Impl(int max_m, int k, void* ptr) : max_m(max_m), k(k) { method set_data (line 66) | void set_data(void* new_ptr) { a = reinterpret_cast(ne... method from_mat (line 68) | void from_mat(int m, ggml_bf16_t* src, int ith, int nth) { method ggml_bf16_t (line 87) | ggml_bf16_t* get_submat(int m, int k, int m_begin, int k_begin) { type BufferBBF16Impl (line 117) | struct BufferBBF16Impl { method required_size (line 126) | static size_t required_size(int n, int k) { return sizeof(ggml_bf16_... method BufferBBF16Impl (line 128) | BufferBBF16Impl(int n, int k, void* ptr) : n(n), k(k) { method set_data (line 134) | void set_data(void* new_ptr) { b = reinterpret_cast(ne... method from_mat (line 136) | void from_mat(ggml_bf16_t* src, int ith, int nth) { method ggml_bf16_t (line 158) | ggml_bf16_t* get_submat(int n, int k, int n_begin, int k_begin) { type BufferBFP8Impl (line 178) | struct BufferBFP8Impl { method required_size (line 192) | static size_t required_size(int n, int k, int k_group_size) { method BufferBFP8Impl (line 201) | BufferBFP8Impl(int n, int k, int k_group_size, void* ptr) : n(n), k(... method set_data (line 203) | void set_data(void* ptr) { method from_mat (line 216) | void from_mat(const uint8_t* b_src, const float* d_src, int ith, int... method to_mat (line 299) | void to_mat(uint8_t* b_dst, float* d_dst, int ith, int nth) const { type BufferCFP32Impl (line 369) | struct BufferCFP32Impl { method required_size (line 386) | static size_t required_size(int max_m, int n) { return sizeof(float)... method BufferCFP32Impl (line 388) | BufferCFP32Impl(int max_m, int n, void* ptr) : max_m(max_m), n(n) { method set_data (line 395) | void set_data(void* new_ptr) { c = reinterpret_cast(new_ptr); } method to_mat (line 397) | void to_mat(int m, ggml_bf16_t* dst, int ith, int nth) { type BufferCFP32ReduceImpl (line 426) | struct BufferCFP32ReduceImpl { method required_size (line 435) | static size_t required_size(int max_m, int n) { return sizeof(float)... method BufferCFP32ReduceImpl (line 437) | BufferCFP32ReduceImpl(int max_m, int n, void* ptr) : max_m(max_m), n... method set_data (line 443) | void set_data(void* ptr) { method to_mat (line 449) | void to_mat(int m, ggml_bf16_t* dst, int ith, int nth) { type BufferBFP8PerChannelImpl (line 503) | struct BufferBFP8PerChannelImpl { method required_size (line 520) | static size_t required_size(int n, int k) { return sizeof(uint8_t) *... method BufferBFP8PerChannelImpl (line 525) | BufferBFP8PerChannelImpl(int n, int k, void* ptr) : n(n), k(k) { set... method set_data (line 527) | void set_data(void* ptr) { method from_mat (line 541) | void from_mat(const uint8_t* b_src, const float* d_src, int ith, int... FILE: kt-kernel/operators/amx/la/amx_raw_kernels.hpp type amx (line 15) | namespace amx { type GemmKernel224BF16 (line 17) | struct GemmKernel224BF16 { method name (line 32) | static std::string name() { return "BF16"; } method recommended_nth (line 34) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO... method split_range_n (line 36) | static std::pair split_range_n(int n, int ith, int nth) { method config (line 42) | static void config() { method load_a (line 60) | static void load_a(dt* a, size_t lda) { method load_b (line 70) | static void load_b(dt* b, size_t ldb) { method clean_c (line 80) | static void clean_c() { method load_c (line 89) | static void load_c(output_t* c, size_t ldc) { method store_c (line 101) | static void store_c(output_t* c, size_t ldc) { method run_tile (line 113) | static void run_tile() { method avx_kernel (line 126) | static void avx_kernel(int m, int n, int k, int m_begin, int n_begin... method avx_kernel_4 (line 155) | static void avx_kernel_4(int m, int n, int k, int m_begin, int n_beg... method amx_kernel (line 239) | static void amx_kernel(int m, int n, int k, int m_begin, int n_begin... type GemmKernel224FP8 (line 258) | struct GemmKernel224FP8 { method name (line 276) | static std::string name() { return "FP8"; } method recommended_nth (line 278) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO... method split_range_n (line 280) | static std::pair split_range_n(int n, int ith, int nth) { method config (line 286) | static void config() {} method __m512i (line 315) | static inline __m512i bf16_hi_0_mask() { return _mm512_load_si512((_... method __m512i (line 316) | static inline __m512i bf16_hi_1_mask() { return _mm512_load_si512((_... method __m512i (line 317) | static inline __m512i bf16_lo_0_mask() { return _mm512_load_si512((_... method __m512i (line 318) | static inline __m512i bf16_lo_1_mask() { return _mm512_load_si512((_... method __m512i (line 319) | static inline __m512i sign_mask() { return _mm512_set1_epi8(0x80); } method fp8x64_to_bf16x64 (line 324) | static inline std::pair<__m512i, __m512i> fp8x64_to_bf16x64(__m512i ... method avx_kernel (line 336) | static void avx_kernel(int m, int n, int k, int m_begin, int n_begin... method avx_kernel_4 (line 395) | static void avx_kernel_4(int m, int n, int k, int m_begin, int n_beg... method apply_scale_kgroup (line 504) | static void apply_scale_kgroup(int m, int n, int m_begin, int n_begi... function float_mat_vec_kgroup (line 529) | void float_mat_vec_kgroup(int m, int n, int k, int k_group_size, typen... function float_mat_vec (line 573) | void float_mat_vec(int m, int n, int k, typename K::BufferA* ba, typen... function mat_mul (line 595) | inline void mat_mul(int m, int n, int k, std::shared_ptr split_range_n(int n, int ith, int nth) { method config (line 658) | static void config() {} method fp8x64_to_bf16x64 (line 665) | static inline std::pair<__m512i, __m512i> fp8x64_to_bf16x64(__m512i ... method apply_scale_perchannel (line 682) | static void apply_scale_perchannel(int m, [[maybe_unused]] int n, in... method avx_kernel_4 (line 699) | static void avx_kernel_4(int m, int n, int k, int m_begin, int n_beg... function float_mat_vec_perchannel (line 816) | void float_mat_vec_perchannel(int m, int n, int k, typename K::BufferA... function vec_mul_perchannel (line 838) | inline void vec_mul_perchannel(int m, int n, int k, std::shared_ptr dims) : dims_(std::move(dims)) { method index_t (line 53) | index_t dims() const { return static_cast(dims_.size()); } method index_t (line 54) | index_t rows() const { return rows_; } method index_t (line 55) | index_t cols() const { return cols_; } method index_t (line 56) | index_t numel() const { return numel_; } method hd_to_rc (line 62) | std::pair hd_to_rc(const std::vector& hd_id... method rc_to_hd (line 77) | std::vector rc_to_hd(index_t row, index_t col) const { method index_t (line 96) | index_t rc_to_offset(index_t row, index_t col, index_t ld = 0) const { method offset_to_rc (line 102) | std::pair offset_to_rc(index_t offset, index_t ld = ... method index_t (line 111) | index_t hd_to_offset(const std::vector& hd_idx, index_t ld = ... method offset_to_hd (line 116) | std::vector offset_to_hd(index_t offset, index_t ld = 0) const { method decompose_row (line 123) | std::vector decompose_row(index_t row) const { method decompose_col (line 135) | std::vector decompose_col(index_t col) const { method index_t (line 147) | index_t compose_row(const std::vector& digits) const { method index_t (line 158) | index_t compose_col(const std::vector& digits) const { method check_hd_index (line 170) | void check_hd_index(const std::vector& hd_idx) const { method err_dim (line 175) | static std::string err_dim(index_t i, index_t v, index_t sz) { function main (line 189) | int main() { FILE: kt-kernel/operators/amx/la/utils.hpp function avx512_copy_32xbf16 (line 8) | static inline void avx512_copy_32xbf16(__m512i* src, __m512i* dst) { function avx512_32xfp32_to_32xbf16 (line 14) | static inline void avx512_32xfp32_to_32xbf16(__m512* src0, __m512* src1,... function avx512_32xbf16_to_32xfp32 (line 48) | static inline void avx512_32xbf16_to_32xfp32(__m512i* src, __m512* dst0,... function __m512 (line 55) | static inline __m512 vector_abs_max(__m512 a, __m512 b) { FILE: kt-kernel/operators/amx/moe.hpp class AMX_MOE_TP (line 20) | class AMX_MOE_TP : public AMX_MOE_BASE> { method write_weights (line 41) | inline void write_weights(std::filesystem::path prefix, std::string ma... method read_weights (line 65) | inline void read_weights(std::filesystem::path prefix, std::string mat... method load_check (line 92) | inline void load_check() { method verify_load_right (line 97) | void verify_load_right() { method AMX_MOE_TP (line 131) | AMX_MOE_TP() = default; method AMX_MOE_TP (line 133) | AMX_MOE_TP(GeneralMOEConfig config, int tp_part_idx = 0) : Base(config... method derived_init (line 137) | void derived_init() { method buffer_a_required_size_impl (line 163) | size_t buffer_a_required_size_impl(size_t m, size_t k) const { return ... method buffer_b_required_size_impl (line 164) | size_t buffer_b_required_size_impl(size_t n, size_t k) const { return ... method buffer_c_required_size_impl (line 165) | size_t buffer_c_required_size_impl(size_t m, size_t n) const { return ... method make_buffer_a_impl (line 167) | std::shared_ptr make_buffer_a_impl(size_t m, size... method make_buffer_b_impl (line 170) | std::shared_ptr make_buffer_b_impl(size_t n, size... method make_buffer_c_impl (line 173) | std::shared_ptr make_buffer_c_impl(size_t m, size... method do_gate_up_gemm (line 181) | void do_gate_up_gemm(bool do_up, int expert_idx, int ith, int nth, int... method do_down_gemm (line 194) | void do_down_gemm(int expert_idx, int ith, int nth, int qlen) { method load_weights (line 206) | void load_weights() { class TP_MOE> (line 358) | class TP_MOE> : public TP_MOE(this); } method Derived (line 644) | const Derived* derived_const() const { return static_cast make_buffer_a(size_t m, size_t k,... method make_buffer_b (line 668) | std::shared_ptr make_buffer_b(size_t n, size_t k,... method make_buffer_c (line 671) | std::shared_ptr make_buffer_c(size_t m, size_t n,... method apply_activation (line 675) | void apply_activation(int activated_expert, int nth, int qlen) { class TP_MOE> (line 714) | class TP_MOE> : public TP_MOE_Common split_range_n(int n, int ith, int nth) { function test_buffer_bkgroup_basic (line 29) | void test_buffer_bkgroup_basic() { function test_buffer_bkgroup_correctness (line 109) | void test_buffer_bkgroup_correctness() { function test_buffer_bkgroup_comparison (line 180) | void test_buffer_bkgroup_comparison() { function main (line 237) | int main(int argc, char** argv) { FILE: kt-kernel/operators/amx/test/amx-c-reduce-test.cpp type TestKernelC (line 13) | struct TestKernelC { method split_range_n (line 22) | static std::pair split_range_n(int n, int ith, int nth) { function test_buffer_c_reduce_basic (line 30) | void test_buffer_c_reduce_basic() { function test_buffer_c_reduce_comparison (line 215) | void test_buffer_c_reduce_comparison() { function test_buffer_c_reduce_performance (line 280) | void test_buffer_c_reduce_performance() { function main (line 324) | int main(int argc, char** argv) { FILE: kt-kernel/operators/amx/test/amx-kgroup-test.cpp type TestKernelKGroup (line 12) | struct TestKernelKGroup { method split_range_n (line 21) | static std::pair split_range_n(int n, int ith, int nth) { function test_buffer_kgroup_basic (line 29) | void test_buffer_kgroup_basic() { function test_buffer_kgroup_correctness (line 109) | void test_buffer_kgroup_correctness() { function test_buffer_kgroup_comparison (line 231) | void test_buffer_kgroup_comparison() { function main (line 288) | int main(int argc, char** argv) { FILE: kt-kernel/operators/amx/test/amx-test.cpp function q_latency_test_bf16 (line 13) | void q_latency_test_bf16(int m, int n, int k, ggml_bf16_t* qa, ggml_bf16... function group_q_latency_test_bf16 (line 65) | void group_q_latency_test_bf16(int n_max, int k_max) { function q_latency_test_int8 (line 98) | void q_latency_test_int8(int m, int n, int k, ggml_bf16_t* qa, ggml_bf16... function group_q_latency_test_int8 (line 150) | void group_q_latency_test_int8(int n_max, int k_max) { function correction_test_int4 (line 182) | void correction_test_int4(int m, int n, int k) { function correction_test_int4_1 (line 288) | void correction_test_int4_1(int m, int n, int k) { function q_latency_test_int4 (line 394) | void q_latency_test_int4(int m, int n, int k, ggml_bf16_t* qa, ggml_bf16... function group_q_latency_test_int4 (line 446) | void group_q_latency_test_int4(int n_max, int k_max) { function q_latency_test_int4_1 (line 479) | void q_latency_test_int4_1(int m, int n, int k, ggml_bf16_t* qa, ggml_bf... function group_q_latency_test_int4_1 (line 534) | void group_q_latency_test_int4_1(int n_max, int k_max) { function main (line 567) | int main() { FILE: kt-kernel/operators/amx/test/analyze-error.cpp function analyze_error_patterns (line 9) | void analyze_error_patterns() { function main (line 244) | int main() { FILE: kt-kernel/operators/amx/test/avx-test.cpp function generate_data (line 15) | void generate_data(uint8_t* data, size_t size) { function dpbusd_test (line 32) | void dpbusd_test(const uint8_t* data_a, const uint8_t* data_b, int32_t* ... function main (line 49) | int main() { FILE: kt-kernel/operators/amx/test/debug-kgroup-details.cpp function debug_kgroup_details (line 8) | void debug_kgroup_details() { function main (line 195) | int main() { FILE: kt-kernel/operators/amx/test/debug-kgroup.cpp function debug_simple_multiplication (line 12) | void debug_simple_multiplication() { function debug_pattern_multiplication (line 100) | void debug_pattern_multiplication() { function compare_with_regular_int4 (line 179) | void compare_with_regular_int4() { function main (line 278) | int main() { FILE: kt-kernel/operators/amx/test/debug-specific-dims.cpp function debug_specific_dimensions (line 8) | void debug_specific_dimensions() { function main (line 201) | int main() { FILE: kt-kernel/operators/amx/test/mat-test.hpp type DotProductImpl (line 16) | struct DotProductImpl { type DotProductImpl (line 25) | struct DotProductImpl { type DotProductImpl (line 29) | struct DotProductImpl { type DotProductImpl (line 33) | struct DotProductImpl { type DotProductImpl (line 37) | struct DotProductImpl { type DotProductImpl (line 42) | struct DotProductImpl { type Layout (line 46) | enum class Layout { type Mat (line 53) | struct Mat { method size (line 55) | size_t size() { return rows * cols; } method Mat (line 65) | Mat() {} method Mat (line 67) | Mat(int rows, int cols, Layout layout) : rows(rows), cols(cols), layou... method sub_mat (line 86) | Mat sub_mat(int r, int c) { method dealloc (line 98) | void dealloc() { method row_major_increase (line 105) | void row_major_increase() { method dis_to_00 (line 114) | void dis_to_00() { method random (line 122) | void random(std::mt19937& gen) { method stride (line 143) | size_t stride() { return stride_in_bytes; } method line_element_count (line 145) | int line_element_count() { method T (line 157) | T& at(int r, int c) { method print (line 173) | void print() { method print_all (line 206) | void print_all() { method mul_check (line 223) | Mat> mul_check(Mat& b) { method cmp (line 237) | bool cmp(Mat& b) { method quant (line 292) | void quant(ggml_type to) { method Block (line 390) | Block* quant_data() { method dequant (line 394) | void dequant() { function init (line 478) | inline void init() { FILE: kt-kernel/operators/amx/test/mmq-test.cpp function balance211 (line 65) | void balance211(T n, T nth, T ith, T& n_start, T& n_end) { function parallel_for (line 89) | inline void parallel_for(int nth, int ith, int n, const func_t& f) { type Unroll (line 99) | struct Unroll { method ALWAYS_INLINE (line 101) | ALWAYS_INLINE void operator()(const Func& f, Args... args) const { type Unroll<1> (line 108) | struct Unroll<1> { method ALWAYS_INLINE (line 110) | ALWAYS_INLINE void operator()(const Func& f, Args... args) const { type PackedTypes (line 117) | struct PackedTypes {} type PackedTypes (line 119) | struct PackedTypes { type PackedTypes (line 123) | struct PackedTypes { type PackedTypes (line 127) | struct PackedTypes { type do_compensate (line 134) | struct do_compensate : std::integral_constant (line 1144) | struct acc_C { method apply (line 1145) | static void apply(float* RESTRICT C, int ldc, const int32_t* RESTRICT ... type acc_C (line 1167) | struct acc_C { method apply (line 1168) | static void apply(float* RESTRICT C, int ldc, const int32_t* RESTRICT ... type acc_C (line 1194) | struct acc_C { method apply (line 1195) | static void apply(float* RESTRICT C, int ldc, const int32_t* RESTRICT ... type acc_C (line 1217) | struct acc_C { method apply (line 1218) | static void apply(float* RESTRICT C, int ldc, const int32_t* RESTRICT ... type acc_C (line 1260) | struct acc_C { method apply (line 1261) | static void apply(float* RESTRICT C, int ldc, const int32_t* RESTRICT ... type acc_C (line 1304) | struct acc_C { method apply (line 1305) | static void apply(float* RESTRICT C, int ldc, const int32_t* RESTRICT ... type acc_C (line 1332) | struct acc_C { method apply (line 1333) | static void apply(float* RESTRICT C, int ldc, const int32_t* RESTRICT ... function scale_C (line 1379) | inline void scale_C(const int32_t* RESTRICT tile, int32_t* RESTRICT sumi... type tinygemm_kernel_avx (line 1397) | struct tinygemm_kernel_avx { method apply (line 1398) | static void apply(int K, const TA* RESTRICT A, const TB* RESTRICT B, T... type tinygemm_kernel_avx (line 1408) | struct tinygemm_kernel_avx (line 1478) | struct tinygemm_kernel_vnni (line 1545) | struct tinygemm_kernel_vnni (line 1606) | struct tinygemm_kernel_vnni (line 1672) | struct tinygemm_kernel_vnni (line 1768) | struct tinygemm_kernel_vnni (line 1870) | struct tinygemm_kernel_vnni (line 1976) | struct tinygemm_kernel_vnni (line 108) | struct Unroll<1> { method ALWAYS_INLINE (line 110) | ALWAYS_INLINE void operator()(const Func& f, Args... args) const { type PackedTypes (line 117) | struct PackedTypes {} type PackedTypes (line 119) | struct PackedTypes { type PackedTypes (line 123) | struct PackedTypes { type PackedTypes (line 127) | struct PackedTypes { type do_compensate (line 134) | struct do_compensate : std::integral_constant (line 1140) | struct acc_C { method apply (line 1141) | static void apply(float* RESTRICT C, int ldc, const int32_t* RESTRICT ... type acc_C (line 1163) | struct acc_C { method apply (line 1164) | static void apply(float* RESTRICT C, int ldc, const int32_t* RESTRICT ... type acc_C (line 1190) | struct acc_C { method apply (line 1191) | static void apply(float* RESTRICT C, int ldc, const int32_t* RESTRICT ... type acc_C (line 1213) | struct acc_C { method apply (line 1214) | static void apply(float* RESTRICT C, int ldc, const int32_t* RESTRICT ... type acc_C (line 1256) | struct acc_C { method apply (line 1257) | static void apply(float* RESTRICT C, int ldc, const int32_t* RESTRICT ... type acc_C (line 1300) | struct acc_C { method apply (line 1301) | static void apply(float* RESTRICT C, int ldc, const int32_t* RESTRICT ... type acc_C (line 1328) | struct acc_C { method apply (line 1329) | static void apply(float* RESTRICT C, int ldc, const int32_t* RESTRICT ... function scale_C (line 1375) | inline void scale_C(const int32_t* RESTRICT tile, int32_t* RESTRICT sumi... type tinygemm_kernel_avx (line 1393) | struct tinygemm_kernel_avx { method apply (line 1394) | static void apply(int K, const TA* RESTRICT A, const TB* RESTRICT B, T... type tinygemm_kernel_avx (line 1404) | struct tinygemm_kernel_avx (line 1474) | struct tinygemm_kernel_vnni (line 1541) | struct tinygemm_kernel_vnni (line 1602) | struct tinygemm_kernel_vnni (line 1668) | struct tinygemm_kernel_vnni (line 1764) | struct tinygemm_kernel_vnni (line 1866) | struct tinygemm_kernel_vnni (line 1972) | struct tinygemm_kernel_vnni i) { method push_back (line 127) | void push_back(size_t i) { method push_back (line 134) | void push_back(std::vector i) { method count (line 139) | size_t count() { return card[0]; } method at (line 140) | size_t at(size_t id, size_t which) { return id % card.at(which) / card... type GeneralConfig (line 143) | struct GeneralConfig { method GeneralConfig (line 159) | GeneralConfig() {} type GeneralMLAConfig (line 162) | struct GeneralMLAConfig { method GeneralMLAConfig (line 211) | GeneralMLAConfig() {} method GeneralMLAConfig (line 212) | GeneralMLAConfig(size_t hidden_size, size_t q_lora_rank, size_t kv_lor... type QuantConfig (line 222) | struct QuantConfig { type GeneralMOEConfig (line 230) | struct GeneralMOEConfig { method compute_num_gpu_experts (line 246) | void compute_num_gpu_experts() { method should_skip_expert (line 256) | inline bool should_skip_expert(int64_t expert_id) const { method GeneralMOEConfig (line 299) | GeneralMOEConfig() {} method GeneralMOEConfig (line 301) | GeneralMOEConfig(int expert_num, int routed_expert_num, int hidden_siz... method max_possible_qlen (line 307) | int max_possible_qlen() { return std::max(max_len, group_max_len); } type GeneralGateConfig (line 310) | struct GeneralGateConfig { method GeneralGateConfig (line 333) | GeneralGateConfig() = default; method GeneralGateConfig (line 335) | GeneralGateConfig(int hidden_size, int num_experts_per_tok, int n_rout... class MLA_Interface (line 343) | class MLA_Interface { class MoE_Interface (line 349) | class MoE_Interface { function init_ggml (line 354) | inline void init_ggml() { function convert_or_copy (line 374) | void convert_or_copy(A* dst, const B* src, size_t count) { function convert_or_copy (line 404) | void convert_or_copy(A* dst, void* src, ggml_type type, size_t count) { function check_numerics (line 431) | void check_numerics(A* data, size_t count) { function debug_bf16 (line 441) | inline void debug_bf16(ggml_bf16_t* x) { function debug_f32 (line 447) | inline void debug_f32(float* x) { function debug_f32 (line 454) | inline void debug_f32(float* x, size_t count) { FILE: kt-kernel/operators/kvcache/kvcache.h type AnchorType (line 52) | enum AnchorType { type RetrievalType (line 83) | enum RetrievalType { type KVCacheConfig (line 111) | struct KVCacheConfig { function class (line 178) | class KVCache { FILE: kt-kernel/operators/kvcache/kvcache_utils.cpp function ggml_type_to_string (line 16) | std::string ggml_type_to_string(ggml_type type) { function AnchorTypeToString (line 29) | std::string AnchorTypeToString(AnchorType type) { function RetrievalTypeToString (line 44) | std::string RetrievalTypeToString(RetrievalType type) { function ggml_vec_scale_f32 (line 786) | void ggml_vec_scale_f32(const int n, float* y, const float v) { FILE: kt-kernel/operators/llamafile/conversion.h function to_float (line 18) | inline void to_float(const void* input, float* output, int size, ggml_ty... function from_float (line 30) | inline void from_float(const float* input, void* output, int size, ggml_... FILE: kt-kernel/operators/llamafile/linear.h type LinearConfig (line 27) | struct LinearConfig { FILE: kt-kernel/operators/llamafile/mlp.cpp function act_fn (line 54) | static float act_fn(float x) { return x / (1.0f + expf(-x)); } FILE: kt-kernel/operators/llamafile/mlp.h type MLPConfig (line 27) | struct MLPConfig { FILE: kt-kernel/operators/llamafile/moe.hpp function debug_quant (line 25) | inline void debug_quant(void* input, ggml_type type) { class LLAMA_MOE_TP (line 34) | class LLAMA_MOE_TP { method LLAMA_MOE_TP (line 93) | LLAMA_MOE_TP(GeneralMOEConfig config, int tp_part_idx) : config_(confi... method load_weights (line 192) | void load_weights(int complete_intermediate_size, int offset) { method warm_up (line 251) | void warm_up() { method act_fn (line 267) | static float act_fn(float x) { return x / (1.0f + expf(-x)); } method forward_one (line 269) | void forward_one(int k, const int64_t* expert_ids, const float* weight... method forward_many (line 459) | void forward_many(int qlen, int k, const int64_t* expert_ids, const fl... method forward (line 746) | void forward(int qlen, int k, const int64_t* expert_ids, const float* ... class TP_MOE (line 767) | class TP_MOE : public TP_MOE_Common { method load_weights (line 771) | void load_weights() { method merge_results (line 787) | void merge_results(int qlen, void* output) { merge_results(qlen, outpu... method merge_results (line 789) | void merge_results(int qlen, void* output, bool incremental) { FILE: kt-kernel/operators/mla-tp.hpp class TP_MLA_Common (line 29) | class TP_MLA_Common : public MLA_Interface { method TP_MLA_Common (line 46) | TP_MLA_Common(GeneralMLAConfig config) : config(config) { method forward (line 82) | void forward(std::vector qlens, std::vector> pag... method set_pages (line 125) | void set_pages(std::vector> kv_lora_pages, std::vec... method set_local_pages (line 131) | void set_local_pages(int page_count) { class TP_MLA (line 141) | class TP_MLA : public TP_MLA_Common { method load_weights (line 144) | void load_weights() { throw std::runtime_error("Not Implemented"); } method merge_results (line 145) | void merge_results(int qlen, void* output) { throw std::runtime_error(... FILE: kt-kernel/operators/moe-tp.hpp class LLAMA_MOE_TP (line 14) | class LLAMA_MOE_TP class TP_MOE_Common (line 26) | class TP_MOE_Common : public MoE_Interface { method TP_MOE_Common (line 45) | TP_MOE_Common(GeneralMOEConfig config) : config(config) { method warm_up (line 139) | void warm_up() { method forward (line 152) | void forward(int qlen, int k, const int64_t* expert_ids, const float* ... method forward (line 158) | void forward(int* qlen_ptr, int k, const int64_t* expert_ids, const fl... method forward_binding (line 162) | void forward_binding(intptr_t qlen_ptr, int k, intptr_t expert_ids, in... method forward (line 168) | void forward(int* qlen_ptr, int k, const int64_t* expert_ids, const fl... method merge_results (line 219) | virtual void merge_results(int qlen, void* output, bool incremental) { class TP_MOE (line 229) | class TP_MOE : public TP_MOE_Common { method load_weights (line 232) | void load_weights(const uint64_t* physical_to_logical_map) { throw std... FILE: kt-kernel/operators/moe_kernel/api/common.h type BLASINT8 (line 40) | typedef int8_t BLASINT8; type KERNEL_CBLAS_TRANSPOSE (line 43) | typedef enum KERNEL_CBLAS_TRANSPOSE { type KERNEL_CBLAS_ORDER (line 50) | typedef enum KERNEL_CBLAS_ORDER { KernelCblasRowMajor = 101, KernelCblas... type KERNEL_CBLAS_SIDE (line 52) | typedef enum KERNEL_CBLAS_SIDE { KernelCblasLeft = 141, KernelCblasRight... type KERNEL_CBLAS_ORDER (line 53) | typedef KERNEL_CBLAS_ORDER KERNEL_CBLAS_LAYOUT; type KERNEL_CBLAS_OFFSET (line 54) | typedef enum KERNEL_CBLAS_OFFSET { function MatKernelVariant (line 60) | enum class MatKernelVariant { FILE: kt-kernel/operators/moe_kernel/api/mat_kernel.h type MatKernelSelection (line 15) | struct MatKernelSelection { FILE: kt-kernel/operators/moe_kernel/la/kernel.hpp type moe_kernel (line 19) | namespace moe_kernel { function T (line 21) | T *offset_pointer(T *ptr, size_t byte_offset) { function bf16_to_fp32 (line 25) | inline float bf16_to_fp32(ggml_bf16_t src) { function fp16_to_fp32 (line 40) | inline float fp16_to_fp32(ggml_fp16_t src) { return ggml_fp16_to_fp32(... type BufferAImpl (line 43) | struct BufferAImpl { method K_BLOCK (line 52) | static inline int K_BLOCK() { return K::K_BLOCK; } method required_size (line 56) | static size_t required_size(int max_m, int k) { return sizeof(int8_t... method BufferAImpl (line 58) | BufferAImpl(int max_m, int k, void *ptr, bool if_pack = false) : max... method BufferAImpl (line 62) | BufferAImpl(int max_m, int k, bool if_pack = false) : max_m(max_m), ... method set_data (line 68) | void set_data(void *ptr) { method required_size (line 73) | size_t required_size() const { return sizeof(int8_t) * max_m * k + s... method offset_row (line 75) | BufferAImpl offset_row(size_t row_begin, size_t row_block) { method from_mat (line 83) | void from_mat(int m, ggml_bf16_t *src, int ith, int mth) { method from_mat (line 128) | void from_mat(int m, ggml_fp16_t *src, int ith, int mth) { method from_mat (line 175) | void from_mat(int m, float *src, int ith, int mth) { method from_mat (line 218) | void from_mat(int m, float *src) { method to_mat (line 245) | void to_mat(int m, float *dst, int ith, int mth) { type BufferCImpl (line 264) | struct BufferCImpl { method N_BLOCK (line 272) | static inline int N_BLOCK() { return K::N_BLOCK; } method required_size (line 274) | static size_t required_size(int max_m, int n) { return sizeof(int32_... method BufferCImpl (line 276) | BufferCImpl(int max_m, int n, void *ptr, bool if_row_major = false) ... method BufferCImpl (line 283) | BufferCImpl(int max_m, int n, bool if_row_major = false) : max_m(max... method set_data (line 285) | void set_data(void *ptr) { method required_size (line 289) | size_t required_size() const { return sizeof(int32_t) * max_m * n; } type GemmKernelInt8 (line 296) | struct GemmKernelInt8 { method set_tiling (line 324) | static void set_tiling(int n_block_up_gate, int n_block_down, int n_... method get_tiling (line 334) | static std::tuple get_tiling() { method name (line 343) | static std::string name() { return "MOE_INT8"; } method recommended_nth (line 344) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO... method recommended_nth_down (line 346) | static int recommended_nth_down(int n, char type_ = 'd') { method recommended_nth_up_gate (line 360) | static int recommended_nth_up_gate(int n, char type_ = 'd') { method recommended_mth (line 374) | static int recommended_mth(int m) { return (m + M_BLOCK - 1) / M_BLO... method split_range_n (line 376) | static std::pair split_range_n(int n, int ith, int nth, in... method split_range_m (line 382) | static std::pair split_range_m(int m, int ith, int mth = 0) { method split_range_n_block (line 388) | static std::pair split_range_n_block(int n, int ith, int n... type BufferB (line 397) | struct BufferB { method required_size (line 408) | static size_t required_size(int n, int k, bool if_pack = false, ch... method BufferB (line 433) | BufferB(int n, int k, bool if_pack = false, char mat_type = 'n', b... method BufferB (line 461) | BufferB(int n, int k, void *ptr, bool if_pack = false, char mat_ty... method set_data (line 466) | void set_data(void *ptr, bool plain = true) { method required_size (line 477) | size_t required_size() const { return sizeof(int8_t) * n * k + siz... method BufferB (line 478) | BufferB offset_col(size_t col_begin, size_t col_block) { method from_mat (line 484) | void from_mat(ggml_bf16_t *src, int ith, int nth, int n_new = -1, ... method from_mat (line 541) | void from_mat(float *src, int ith, int nth, int n_new = -1, bool i... method from_mat_row_major (line 591) | void from_mat_row_major(float *src, int ld, int ith, int nth, int ... method to_mat (line 621) | void to_mat(float *dst, int ith, int nth, int n_new = -1) { method convert_buffer_a_to_buffer_b (line 647) | static void convert_buffer_a_to_buffer_b(BufferA *ba, BufferB *bb) { method convert_buffer_b_to_buffer_a (line 656) | static void convert_buffer_b_to_buffer_a(BufferB *bb, BufferA *ba) { method change_view (line 665) | static void change_view(BufferC *c_src, BufferC *c_dst) { method apply_scale (line 676) | static void apply_scale(int m, int n, float *c, BufferA *ba, BufferB... method apply_scale (line 692) | static void apply_scale(int m, int n, float *c, BufferA *ba, BufferB... method apply_scale (line 718) | static void apply_scale(float *c, int ldc, BufferA *ba, BufferB *bb,... method apply_scale (line 754) | static void apply_scale(float *c, int ldc, BufferA *ba, BufferB *bb,... type GemmKernelInt4 (line 789) | struct GemmKernelInt4 { method set_tiling (line 817) | static void set_tiling(int n_block_up_gate, int n_block_down, int n_... method get_tiling (line 827) | static std::tuple get_tiling() { method name (line 836) | static std::string name() { return "MOE_INT4"; } method recommended_nth (line 837) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO... method recommended_nth_down (line 839) | static int recommended_nth_down(int n, char type_ = 'd') { method recommended_mth (line 852) | static int recommended_mth(int m) { return (m + M_BLOCK - 1) / M_BLO... method recommended_nth_up_gate (line 854) | static int recommended_nth_up_gate(int n, char type_ = 'd') { method split_range_n (line 868) | static std::pair split_range_n(int n, int ith, int nth) { method split_range_m (line 873) | static std::pair split_range_m(int m, int ith, int mth) { method split_range_n_block (line 879) | static std::pair split_range_n_block(int n, int ith, int n... type BufferB (line 888) | struct BufferB { method required_size (line 897) | static size_t required_size(int n, int k, bool if_pack = false, ch... method BufferB (line 927) | BufferB(int n, int k, bool if_pack = false, char mat_type = 'n', b... method BufferB (line 932) | BufferB(int n, int k, void *ptr, bool if_pack = false, char mat_ty... method set_data (line 936) | void set_data(void *ptr, bool plain = true) { method required_size (line 940) | size_t required_size() const { return sizeof(int8_t) * n * k / 2 +... method BufferB (line 941) | BufferB offset_col(size_t col_begin, size_t col_block) { method from_mat (line 947) | void from_mat(ggml_bf16_t *src, int ith, int nth, int n_new = -1, ... method from_mat (line 996) | void from_mat(float *src, int ith, int nth, int n_new = -1, bool i... method convert_buffer_a_to_buffer_b (line 1060) | static void convert_buffer_a_to_buffer_b(BufferA *ba, BufferB *bb) { method convert_buffer_b_to_buffer_a (line 1070) | static void convert_buffer_b_to_buffer_a(BufferB *bb, BufferA *ba) { method change_view (line 1081) | static void change_view(BufferC *c_src, BufferC *c_dst) { method apply_scale (line 1094) | static void apply_scale(int m, int n, float *c, BufferA *ba, BufferB... method apply_scale (line 1109) | static void apply_scale(int m, int n, float *c, BufferA *ba, BufferB... method apply_scale (line 1127) | static void apply_scale(float *c, int ldc, BufferA *ba, BufferB *bb,... method apply_scale (line 1163) | static void apply_scale(float *c, int ldc, BufferA *ba, BufferB *bb,... FILE: kt-kernel/operators/moe_kernel/la/mat_kernel.cpp function MatKernelSelection (line 36) | MatKernelSelection select_kernel_for_int4(MatKernelVariant variant) { function MatKernelSelection (line 46) | MatKernelSelection select_kernel_for_int8(MatKernelVariant variant) { FILE: kt-kernel/operators/moe_kernel/la/utils.hpp function float_to_bf16_trunc (line 7) | static inline uint16_t float_to_bf16_trunc(float f) { function convert_32fp32_to_32bf16_pure_c (line 14) | static inline void convert_32fp32_to_32bf16_pure_c(const float* src, uin... function convert_32bf16_to_32fp32_pure_c (line 24) | static inline void convert_32bf16_to_32fp32_pure_c(const uint16_t* src, ... FILE: kt-kernel/operators/moe_kernel/mat_kernel/aocl_kernel/kernel.cpp function ToAoclOrder (line 8) | char ToAoclOrder(KERNEL_CBLAS_LAYOUT layout) { function ToAoclTranspose (line 18) | char ToAoclTranspose(KERNEL_CBLAS_TRANSPOSE transpose) { function decode_cblas_gemm_s8s8s32 (line 37) | void decode_cblas_gemm_s8s8s32(const KERNEL_CBLAS_LAYOUT layout, const K... function prefill_cblas_gemm_s8s8s32 (line 52) | void prefill_cblas_gemm_s8s8s32(const KERNEL_CBLAS_LAYOUT layout, const ... function prefill_int4_cblas_gemm_s8s8s32 (line 67) | void prefill_int4_cblas_gemm_s8s8s32(const KERNEL_CBLAS_LAYOUT layout, c... function decode_int4_cblas_gemm_s8s8s32 (line 76) | void decode_int4_cblas_gemm_s8s8s32(const KERNEL_CBLAS_LAYOUT layout, co... function reorder_B_gemm (line 85) | void reorder_B_gemm(const KERNEL_CBLAS_LAYOUT layout, const KERNEL_CBLAS... function get_reorder_B_size (line 93) | size_t get_reorder_B_size(const KERNEL_CBLAS_LAYOUT layout, const KERNEL... FILE: kt-kernel/operators/moe_kernel/moe.hpp function TimePerf (line 22) | class MOE_KERNEL_TP class TP_MOE> (line 673) | class TP_MOE> : public TP_MOE_Common yarn_find_correction_range(double low_r... function yarn_linear_ramp_mask (line 167) | inline std::vector yarn_linear_ramp_mask(double min_val, double ... function yarn_get_mscale (line 179) | inline double yarn_get_mscale(double scale = 1.0, double mscale = 1.0) { class DeepseekV3YarnRotaryEmbedding (line 186) | class DeepseekV3YarnRotaryEmbedding : public DeepseekV3RotaryEmbedding { method DeepseekV3YarnRotaryEmbedding (line 188) | DeepseekV3YarnRotaryEmbedding(size_t dim, size_t max_position_embeddin... method calculate_inv_freq (line 211) | void calculate_inv_freq() override { method set_cos_sin_cache (line 242) | void set_cos_sin_cache(size_t seq_len) override { FILE: kt-kernel/operators/softmax.hpp class Softmax (line 20) | class Softmax { method apply_single (line 23) | static void apply_single(A* v, size_t size) { method apply_multiple (line 48) | static void apply_multiple(size_t count, A* v, size_t size, size_t ld) { FILE: kt-kernel/python/_cpu_detect.py function detect_cpu_features (line 26) | def detect_cpu_features(): function load_extension (line 165) | def load_extension(variant): function initialize (line 266) | def initialize(): FILE: kt-kernel/python/cli/commands/bench.py class BenchType (line 25) | class BenchType(str, Enum): function bench (line 36) | def bench( function microbench (line 88) | def microbench( function _find_kt_kernel_path (line 196) | def _find_kt_kernel_path() -> Optional[Path]: function _run_all_benchmarks (line 219) | def _run_all_benchmarks(model: Optional[str], output: Optional[Path], it... function _run_inference_benchmark (line 228) | def _run_inference_benchmark(model: Optional[str], output: Optional[Path... function _run_component_benchmark (line 243) | def _run_component_benchmark(component: str, output: Optional[Path], ite... FILE: kt-kernel/python/cli/commands/chat.py function chat (line 39) | def chat( function _stream_response (line 281) | def _stream_response( function _count_tokens_with_tokenizer (line 373) | def _count_tokens_with_tokenizer(messages: list, tokenizer) -> int: function _estimate_tokens (line 398) | def _estimate_tokens(messages: list) -> int: function _generate_response (line 412) | def _generate_response( function _handle_command (line 470) | def _handle_command(command: str, messages: list, temperature: float, ma... function _format_history (line 543) | def _format_history(messages: list) -> str: function _save_history (line 559) | def _save_history(file_path: Path, messages: list, model: str) -> None: FILE: kt-kernel/python/cli/commands/config.py function init (line 21) | def init() -> None: function show (line 31) | def show( function set_config (line 57) | def set_config( function get_config (line 72) | def get_config( function reset (line 90) | def reset( function path (line 104) | def path() -> None: function model_path_list (line 111) | def model_path_list() -> None: function model_path_add (line 119) | def model_path_add( function model_path_remove (line 129) | def model_path_remove( function _parse_value (line 138) | def _parse_value(value: str): FILE: kt-kernel/python/cli/commands/doctor.py function _get_kt_kernel_info (line 34) | def _get_kt_kernel_info() -> dict: function doctor (line 95) | def doctor( function _check_python_version (line 479) | def _check_python_version(version: str) -> bool: function _display_results (line 489) | def _display_results(checks: list[dict], verbose: bool) -> None: FILE: kt-kernel/python/cli/commands/model.py function is_amx_weights (line 46) | def is_amx_weights(model_path) -> tuple[bool, int]: function callback (line 93) | def callback(ctx: typer.Context) -> None: function download (line 105) | def download( function list_models (line 466) | def list_models( function clear_cache (line 1019) | def clear_cache() -> None: function path_list (line 1061) | def path_list() -> None: function link_cpu (line 1077) | def link_cpu( function unlink_cpu (line 1136) | def unlink_cpu( function path_add (line 1166) | def path_add( function path_remove (line 1194) | def path_remove( function scan (line 1210) | def scan( function add_model (line 1263) | def add_model( function edit_model (line 1339) | def edit_model( function info_model (line 1800) | def info_model( function remove_model (line 1920) | def remove_model( function refresh_models (line 1991) | def refresh_models() -> None: function verify_model (line 2090) | def verify_model( function verify_all_models (line 2651) | def verify_all_models() -> None: function auto_detect_repo (line 2685) | def auto_detect_repo( FILE: kt-kernel/python/cli/commands/quant.py class QuantMethod (line 30) | class QuantMethod(str, Enum): function quant (line 37) | def quant( function _resolve_input_path (line 478) | def _resolve_input_path(model: str, settings) -> Optional[Path]: function _find_kt_kernel_path (line 510) | def _find_kt_kernel_path() -> Optional[Path]: FILE: kt-kernel/python/cli/commands/run.py function run (line 78) | def run( function _run_impl (line 163) | def _run_impl( function _build_sglang_command (line 561) | def _build_sglang_command( function _interactive_model_selection (line 721) | def _interactive_model_selection(user_registry, settings) -> Optional[str]: FILE: kt-kernel/python/cli/commands/sft.py function callback (line 16) | def callback(ctx: typer.Context) -> None: function train (line 29) | def train() -> None: function chat (line 38) | def chat() -> None: function export (line 47) | def export() -> None: FILE: kt-kernel/python/cli/commands/version.py function _get_sglang_info (line 18) | def _get_sglang_info() -> str: function version (line 52) | def version( FILE: kt-kernel/python/cli/config/settings.py class Settings (line 68) | class Settings: method __init__ (line 71) | def __init__(self, config_path: Optional[Path] = None): method _ensure_dirs (line 82) | def _ensure_dirs(self) -> None: method _load (line 93) | def _load(self) -> None: method _save (line 108) | def _save(self) -> None: method _deep_copy (line 117) | def _deep_copy(self, obj: Any) -> Any: method _deep_merge (line 125) | def _deep_merge(self, base: dict, override: dict) -> None: method get (line 133) | def get(self, key: str, default: Any = None) -> Any: method set (line 154) | def set(self, key: str, value: Any) -> None: method delete (line 174) | def delete(self, key: str) -> bool: method reset (line 199) | def reset(self) -> None: method get_all (line 204) | def get_all(self) -> dict[str, Any]: method get_env_vars (line 208) | def get_env_vars(self) -> dict[str, str]: method models_dir (line 220) | def models_dir(self) -> Path: method get_model_paths (line 225) | def get_model_paths(self) -> list[Path]: method add_model_path (line 242) | def add_model_path(self, path: str) -> None: method remove_model_path (line 259) | def remove_model_path(self, path: str) -> bool: method cache_dir (line 285) | def cache_dir(self) -> Path: method weights_dir (line 290) | def weights_dir(self) -> Optional[Path]: function get_settings (line 300) | def get_settings() -> Settings: function reset_settings (line 308) | def reset_settings() -> None: FILE: kt-kernel/python/cli/i18n.py function get_lang (line 1260) | def get_lang() -> str: function t (line 1306) | def t(msg_key: str, **kwargs: Any) -> str: function set_lang (line 1336) | def set_lang(lang: str) -> None: FILE: kt-kernel/python/cli/main.py function _get_app_help (line 20) | def _get_app_help() -> str: function _get_help (line 28) | def _get_help(key: str) -> str: function _update_help_texts (line 57) | def _update_help_texts() -> None: function check_first_run (line 77) | def check_first_run() -> None: function _show_first_run_setup (line 103) | def _show_first_run_setup(settings) -> None: function _prompt_custom_path (line 359) | def _prompt_custom_path(console, settings) -> str: function _install_shell_completion (line 391) | def _install_shell_completion() -> None: function _apply_saved_language (line 447) | def _apply_saved_language() -> None: function main (line 484) | def main(): FILE: kt-kernel/python/cli/utils/analyze_moe_model.py function _get_sglang_moe_architectures (line 12) | def _get_sglang_moe_architectures(): function _get_cache_file (line 63) | def _get_cache_file(): function _load_all_cache (line 70) | def _load_all_cache(): function _save_all_cache (line 83) | def _save_all_cache(cache_data): function _compute_config_fingerprint (line 95) | def _compute_config_fingerprint(config_path: Path) -> Optional[str]: function _load_cache (line 109) | def _load_cache(model_path: Path) -> Optional[Dict[str, Any]]: function _save_cache (line 136) | def _save_cache(model_path: Path, result: Dict[str, Any]): function _load_config_json (line 160) | def _load_config_json(model_path: Path) -> Optional[Dict[str, Any]]: function _is_moe_model (line 178) | def _is_moe_model(config: Dict[str, Any]) -> bool: function _extract_moe_params (line 205) | def _extract_moe_params(config: Dict[str, Any]) -> Dict[str, Any]: function _estimate_model_size (line 271) | def _estimate_model_size(model_path: Path) -> float: function analyze_moe_model (line 285) | def analyze_moe_model(model_path, use_cache=True): function print_analysis (line 366) | def print_analysis(model_path): function main (line 400) | def main(): FILE: kt-kernel/python/cli/utils/console.py function print_info (line 44) | def print_info(message: str, **kwargs) -> None: function print_success (line 49) | def print_success(message: str, **kwargs) -> None: function print_warning (line 54) | def print_warning(message: str, **kwargs) -> None: function print_error (line 59) | def print_error(message: str, **kwargs) -> None: function print_step (line 64) | def print_step(message: str, **kwargs) -> None: function print_header (line 69) | def print_header(title: str, subtitle: Optional[str] = None) -> None: function print_version_table (line 77) | def print_version_table(versions: dict[str, Optional[str]]) -> None: function print_dependency_table (line 92) | def print_dependency_table(deps: list[dict]) -> None: function confirm (line 119) | def confirm(message: str, default: bool = True) -> bool: function prompt_choice (line 124) | def prompt_choice(message: str, choices: list[str], default: Optional[st... function prompt_text (line 149) | def prompt_text(message: str, default: Optional[str] = None) -> str: function create_progress (line 154) | def create_progress() -> Progress: function create_download_progress (line 166) | def create_download_progress() -> Progress: function print_model_table (line 179) | def print_model_table(models: list[dict]) -> None: function print_hardware_info (line 204) | def print_hardware_info(gpu_info: str, cpu_info: str, ram_info: str) -> ... function print_server_info (line 217) | def print_server_info( function print_api_info (line 234) | def print_api_info(host: str, port: int) -> None: FILE: kt-kernel/python/cli/utils/debug_configs.py function main (line 16) | def main(): FILE: kt-kernel/python/cli/utils/download_helper.py function list_remote_files_hf (line 8) | def list_remote_files_hf(repo_id: str, use_mirror: bool = False) -> List... function list_remote_files_ms (line 48) | def list_remote_files_ms(repo_id: str) -> List[Dict[str, any]]: function filter_files_by_pattern (line 70) | def filter_files_by_pattern(files: List[Dict[str, any]], pattern: str) -... function calculate_total_size (line 87) | def calculate_total_size(files: List[Dict[str, any]]) -> int: function format_file_list_table (line 92) | def format_file_list_table(files: List[Dict[str, any]], max_display: int... function verify_repo_exists (line 111) | def verify_repo_exists(repo_id: str, repo_type: str, use_mirror: bool = ... FILE: kt-kernel/python/cli/utils/environment.py class EnvManager (line 21) | class EnvManager: class GPUInfo (line 30) | class GPUInfo: class CPUInfo (line 40) | class CPUInfo: class MemoryInfo (line 52) | class MemoryInfo: class SystemInfo (line 63) | class SystemInfo: function run_command (line 75) | def run_command(cmd: list[str], timeout: int = 10) -> Optional[str]: function detect_env_managers (line 86) | def detect_env_managers() -> list[EnvManager]: function check_docker (line 128) | def check_docker() -> Optional[EnvManager]: function check_kt_env_exists (line 141) | def check_kt_env_exists(manager: str, env_name: str = "kt") -> bool: function get_kt_env_path (line 170) | def get_kt_env_path(manager: str, env_name: str = "kt") -> Optional[Path]: function detect_cuda_version (line 192) | def detect_cuda_version() -> Optional[str]: function detect_gpus (line 222) | def detect_gpus() -> list[GPUInfo]: function _parse_cuda_visible_devices (line 265) | def _parse_cuda_visible_devices(cuda_visible: str) -> list[int]: function detect_cpu_info (line 289) | def detect_cpu_info() -> CPUInfo: function _parse_cpu_flags (line 378) | def _parse_cpu_flags(flags: list[str]) -> list[str]: function _parse_cpu_list (line 446) | def _parse_cpu_list(cpulist: str) -> list[int]: function detect_memory_info (line 458) | def detect_memory_info() -> MemoryInfo: function _parse_dmidecode_memory (line 491) | def _parse_dmidecode_memory(output: str) -> tuple[Optional[int], Optiona... function _detect_memory_frequency_sysfs (line 518) | def _detect_memory_frequency_sysfs() -> Optional[int]: function _parse_macos_memory (line 538) | def _parse_macos_memory(output: str) -> tuple[Optional[int], Optional[st... function detect_ram_gb (line 557) | def detect_ram_gb() -> float: function detect_available_ram_gb (line 583) | def detect_available_ram_gb() -> float: function detect_disk_space_gb (line 604) | def detect_disk_space_gb(path: str = "/") -> tuple[float, float]: function get_installed_package_version (line 615) | def get_installed_package_version(package_name: str) -> Optional[str]: function get_system_info (line 625) | def get_system_info() -> SystemInfo: function is_in_virtual_env (line 638) | def is_in_virtual_env() -> bool: function get_current_env_name (line 648) | def get_current_env_name() -> Optional[str]: class StorageLocation (line 662) | class StorageLocation: function scan_storage_locations (line 672) | def scan_storage_locations(min_size_gb: float = 50.0) -> list[StorageLoc... function _get_mount_points (line 765) | def _get_mount_points() -> list[str]: function _get_potential_model_paths (line 827) | def _get_potential_model_paths(mount_point: str) -> list[str]: function format_size_gb (line 855) | def format_size_gb(size_gb: float) -> str: class LocalModel (line 863) | class LocalModel: function scan_local_models (line 874) | def scan_local_models(search_paths: list[str], max_depth: int = 3) -> li... function _scan_directory_for_models (line 902) | def _scan_directory_for_models( function _detect_model_in_directory (line 926) | def _detect_model_in_directory(directory: str, entries: list) -> Optiona... function _get_directory_size (line 984) | def _get_directory_size(directory: str) -> int: function scan_models_in_location (line 1001) | def scan_models_in_location(location: StorageLocation, max_depth: int = ... class CPUBuildFeatures (line 1015) | class CPUBuildFeatures: function detect_cpu_build_features (line 1027) | def detect_cpu_build_features() -> CPUBuildFeatures: FILE: kt-kernel/python/cli/utils/input_validators.py function prompt_int_with_retry (line 14) | def prompt_int_with_retry( function prompt_float_with_retry (line 76) | def prompt_float_with_retry( function prompt_choice_with_retry (line 126) | def prompt_choice_with_retry( function prompt_int_list_with_retry (line 154) | def prompt_int_list_with_retry( FILE: kt-kernel/python/cli/utils/kv_cache_calculator.py function get_dtype_bytes (line 21) | def get_dtype_bytes(dtype_str: str) -> int: function get_kv_size_gb (line 34) | def get_kv_size_gb( function main (line 182) | def main(): FILE: kt-kernel/python/cli/utils/model_discovery.py function discover_and_register_global (line 22) | def discover_and_register_global( function discover_and_register_path (line 64) | def discover_and_register_path( function _create_and_register_model (line 124) | def _create_and_register_model(registry: UserModelRegistry, scanned_mode... function format_discovery_summary (line 182) | def format_discovery_summary( FILE: kt-kernel/python/cli/utils/model_registry.py class ModelInfo (line 18) | class ModelInfo: class ModelRegistry (line 155) | class ModelRegistry: method __init__ (line 158) | def __init__(self): method _load_builtin_models (line 165) | def _load_builtin_models(self) -> None: method _load_user_models (line 170) | def _load_user_models(self) -> None: method _register (line 197) | def _register(self, model: ModelInfo) -> None: method get (line 205) | def get(self, name: str) -> Optional[ModelInfo]: method search (line 219) | def search(self, query: str, limit: int = 10) -> list[ModelInfo]: method _match_score (line 242) | def _match_score(self, query: str, model: ModelInfo) -> float: method list_all (line 279) | def list_all(self) -> list[ModelInfo]: method find_local_models (line 283) | def find_local_models(self, max_depth: int = 3) -> list[tuple[ModelInf... function get_registry (line 349) | def get_registry() -> ModelRegistry: function compute_deepseek_v3_gpu_experts (line 362) | def compute_deepseek_v3_gpu_experts(tensor_parallel_size: int, vram_per_... function compute_kimi_k2_thinking_gpu_experts (line 371) | def compute_kimi_k2_thinking_gpu_experts(tensor_parallel_size: int, vram... function compute_minimax_m2_gpu_experts (line 381) | def compute_minimax_m2_gpu_experts(tensor_parallel_size: int, vram_per_g... FILE: kt-kernel/python/cli/utils/model_scanner.py class ScannedModel (line 17) | class ScannedModel: method size_gb (line 27) | def size_gb(self) -> float: method folder_name (line 32) | def folder_name(self) -> str: class ModelScanner (line 37) | class ModelScanner: method __init__ (line 40) | def __init__(self, min_size_gb: float = 10.0): method scan_directory (line 49) | def scan_directory( method scan_single_path (line 128) | def scan_single_path(self, path: Path) -> Optional[ScannedModel]: method _calculate_total_size (line 170) | def _calculate_total_size(self, directory: Path, filenames: List[str])... function scan_directory (line 196) | def scan_directory( function scan_single_path (line 214) | def scan_single_path(path: Path) -> Optional[ScannedModel]: function format_size (line 228) | def format_size(size_bytes: int) -> str: function find_files_fast (line 248) | def find_files_fast(mount_point: str, pattern: str, max_depth: int = 6, ... function is_valid_model_directory (line 281) | def is_valid_model_directory(directory: Path, min_size_gb: float = 10.0)... function scan_all_models_fast (line 330) | def scan_all_models_fast(mount_points: List[str], min_size_gb: float = 1... function get_root_subdirs (line 367) | def get_root_subdirs() -> List[str]: function scan_directory_for_models (line 414) | def scan_directory_for_models(directory: str, min_file_size_gb: float = ... function scan_all_models_with_info (line 484) | def scan_all_models_with_info( function find_model_roots_from_paths (line 533) | def find_model_roots_from_paths(model_paths: List[str]) -> Tuple[List[st... class ModelRootInfo (line 655) | class ModelRootInfo: function discover_models (line 663) | def discover_models( function _get_mount_points (line 701) | def _get_mount_points() -> List[str]: FILE: kt-kernel/python/cli/utils/model_table_builder.py function format_model_size (line 15) | def format_model_size(model_path: Path, format_type: str) -> str: function format_repo_info (line 33) | def format_repo_info(model) -> str: function format_sha256_status (line 41) | def format_sha256_status(model, status_map: dict) -> str: function build_moe_gpu_table (line 46) | def build_moe_gpu_table( function build_amx_table (line 101) | def build_amx_table( function build_gguf_table (line 208) | def build_gguf_table( FILE: kt-kernel/python/cli/utils/model_verifier.py function _compute_file_sha256 (line 15) | def _compute_file_sha256(file_path: Path) -> Tuple[str, str, float]: function check_huggingface_connectivity (line 36) | def check_huggingface_connectivity(timeout: int = 5) -> Tuple[bool, str]: function verify_model_integrity (line 62) | def verify_model_integrity( function calculate_local_sha256 (line 226) | def calculate_local_sha256( function fetch_model_sha256 (line 286) | def fetch_model_sha256( function _fetch_from_huggingface (line 330) | def _fetch_from_huggingface( function _fetch_from_modelscope (line 392) | def _fetch_from_modelscope(repo_id: str, revision: str, timeout: int | N... function verify_model_integrity_with_progress (line 426) | def verify_model_integrity_with_progress( function pre_operation_verification (line 652) | def pre_operation_verification(user_model, user_registry, operation_name... FILE: kt-kernel/python/cli/utils/port_checker.py function is_port_available (line 9) | def is_port_available(host: str, port: int) -> bool: function find_available_port (line 40) | def find_available_port(host: str, start_port: int, max_attempts: int = ... FILE: kt-kernel/python/cli/utils/quant_interactive.py function select_model_to_quantize (line 19) | def select_model_to_quantize() -> Optional[Any]: function configure_quantization_method (line 72) | def configure_quantization_method() -> Dict[str, str]: function configure_cpu_params (line 101) | def configure_cpu_params(max_cores: int, max_numa: int) -> Dict[str, Any]: function configure_output_path (line 130) | def configure_output_path(model: Any, method: str, numa_nodes: int) -> P... function calculate_quantized_size (line 170) | def calculate_quantized_size(source_path: Path, input_type: str, quant_m... function check_disk_space (line 203) | def check_disk_space(output_path: Path, required_size_gb: float) -> tupl... function interactive_quant_config (line 234) | def interactive_quant_config() -> Optional[Dict[str, Any]]: FILE: kt-kernel/python/cli/utils/repo_detector.py function parse_readme_frontmatter (line 13) | def parse_readme_frontmatter(readme_path: Path) -> Optional[Dict]: function extract_repo_from_frontmatter (line 48) | def extract_repo_from_frontmatter(frontmatter: Dict) -> Optional[Tuple[s... function _extract_repo_from_url (line 120) | def _extract_repo_from_url(url: str) -> Optional[Tuple[str, str]]: function extract_repo_from_global_search (line 153) | def extract_repo_from_global_search(readme_path: Path) -> Optional[Tuple... function detect_repo_for_model (line 220) | def detect_repo_for_model(model_path: str) -> Optional[Tuple[str, str]]: function scan_models_for_repo (line 252) | def scan_models_for_repo(model_list) -> Dict: function format_detection_report (line 292) | def format_detection_report(results: Dict) -> str: function apply_detection_results (line 345) | def apply_detection_results(results: Dict, registry) -> int: FILE: kt-kernel/python/cli/utils/run_configs.py class RunConfigManager (line 16) | class RunConfigManager: method __init__ (line 19) | def __init__(self): method _ensure_config_file (line 23) | def _ensure_config_file(self): method _load_data (line 29) | def _load_data(self) -> Dict: method _save_data (line 37) | def _save_data(self, data: Dict): method list_configs (line 42) | def list_configs(self, model_id: str) -> List[Dict[str, Any]]: method save_config (line 52) | def save_config(self, model_id: str, config: Dict[str, Any]): method delete_config (line 75) | def delete_config(self, model_id: str, config_index: int) -> bool: method get_config (line 98) | def get_config(self, model_id: str, config_index: int) -> Optional[Dic... FILE: kt-kernel/python/cli/utils/run_interactive.py function get_gpu_info (line 28) | def get_gpu_info() -> List[Dict[str, Any]]: function select_model (line 60) | def select_model() -> Optional[Any]: function select_inference_method (line 162) | def select_inference_method(model: Any) -> Optional[Dict[str, Any]]: function _select_saved_config (line 238) | def _select_saved_config(model: Any, saved_configs: List[Dict]) -> Optio... function _build_command_preview (line 307) | def _build_command_preview(model: Any, cfg: Dict[str, Any]) -> List[str]: function _configure_raw_inference (line 378) | def _configure_raw_inference(model: Any) -> Dict[str, Any]: function _configure_amx_inference (line 409) | def _configure_amx_inference(model: Any) -> Optional[Dict[str, Any]]: function _configure_gguf_inference (line 468) | def _configure_gguf_inference(model: Any) -> Optional[Dict[str, Any]]: function configure_numa_and_cpu (line 509) | def configure_numa_and_cpu(method_config: Dict[str, Any]) -> Dict[str, i... function configure_gpu_experts (line 559) | def configure_gpu_experts(model: Any) -> int: function configure_kv_cache (line 597) | def configure_kv_cache(is_raw_inference: bool) -> Optional[Dict[str, int]]: function select_gpus_and_tp (line 627) | def select_gpus_and_tp( function configure_parsers (line 737) | def configure_parsers() -> Dict[str, Optional[str]]: function configure_host_and_port (line 771) | def configure_host_and_port() -> Dict[str, Any]: function save_config_prompt (line 823) | def save_config_prompt(model: Any, full_config: Dict[str, Any]) -> bool: function interactive_run_config (line 886) | def interactive_run_config() -> Optional[Dict[str, Any]]: function _display_config_summary (line 1052) | def _display_config_summary(config: Dict[str, Any]): FILE: kt-kernel/python/cli/utils/sglang_checker.py function check_sglang_installation (line 18) | def check_sglang_installation() -> dict: function get_sglang_install_instructions (line 155) | def get_sglang_install_instructions(lang: Optional[str] = None) -> str: function print_sglang_install_instructions (line 211) | def print_sglang_install_instructions() -> None: function check_sglang_and_warn (line 217) | def check_sglang_and_warn() -> bool: function _get_sglang_kt_kernel_cache_path (line 241) | def _get_sglang_kt_kernel_cache_path() -> Path: function _is_sglang_kt_kernel_cache_valid (line 248) | def _is_sglang_kt_kernel_cache_valid() -> bool: function _save_sglang_kt_kernel_cache (line 268) | def _save_sglang_kt_kernel_cache(supported: bool) -> None: function clear_sglang_kt_kernel_cache (line 277) | def clear_sglang_kt_kernel_cache() -> None: function check_sglang_kt_kernel_support (line 287) | def check_sglang_kt_kernel_support(use_cache: bool = True, silent: bool ... function print_sglang_kt_kernel_instructions (line 369) | def print_sglang_kt_kernel_instructions() -> None: FILE: kt-kernel/python/cli/utils/tuna_engine.py function get_num_experts (line 20) | def get_num_experts(model_path: Path) -> int: function detect_oom (line 58) | def detect_oom(log_line: Optional[str]) -> bool: function test_config (line 87) | def test_config( function test_inference (line 284) | def test_inference(port: int, verbose: bool = False) -> bool: function find_max_gpu_experts (line 338) | def find_max_gpu_experts( function run_tuna (line 389) | def run_tuna( FILE: kt-kernel/python/cli/utils/user_model_registry.py class UserModel (line 20) | class UserModel: method __post_init__ (line 42) | def __post_init__(self): method to_dict (line 49) | def to_dict(self) -> Dict[str, Any]: method from_dict (line 54) | def from_dict(cls, data: Dict[str, Any]) -> "UserModel": method path_exists (line 58) | def path_exists(self) -> bool: class UserModelRegistry (line 63) | class UserModelRegistry: method __init__ (line 66) | def __init__(self, registry_file: Optional[Path] = None): method load (line 83) | def load(self) -> None: method save (line 121) | def save(self) -> None: method add_model (line 131) | def add_model(self, model: UserModel) -> None: method remove_model (line 147) | def remove_model(self, name: str) -> bool: method update_model (line 165) | def update_model(self, name: str, updates: Dict[str, Any]) -> bool: method get_model (line 188) | def get_model(self, name: str) -> Optional[UserModel]: method get_model_by_id (line 203) | def get_model_by_id(self, model_id: str) -> Optional[UserModel]: method list_models (line 218) | def list_models(self) -> List[UserModel]: method find_by_path (line 227) | def find_by_path(self, path: str) -> Optional[UserModel]: method check_name_conflict (line 246) | def check_name_conflict(self, name: str, exclude_name: Optional[str] =... method refresh_status (line 262) | def refresh_status(self) -> Dict[str, List[str]]: method get_model_count (line 280) | def get_model_count(self) -> int: method suggest_name (line 284) | def suggest_name(self, base_name: str) -> str: FILE: kt-kernel/python/experts.py class KTMoEWrapper (line 26) | class KTMoEWrapper: method __new__ (line 53) | def __new__( method set_capture_batch_sizes (line 124) | def set_capture_batch_sizes(capture_bs: List[int]): method get_capture_batch_sizes (line 137) | def get_capture_batch_sizes() -> List[int]: method clear_buffer_cache (line 147) | def clear_buffer_cache(): FILE: kt-kernel/python/experts_base.py function generate_gpu_experts_masks (line 21) | def generate_gpu_experts_masks( class KExpertsCPUBuffer (line 75) | class KExpertsCPUBuffer: method get_buffer (line 89) | def get_buffer(cls, hidden_states: torch.Tensor, num_experts_per_tok): class BaseMoEWrapper (line 143) | class BaseMoEWrapper(ABC): method __init__ (line 152) | def __init__( method load_weights_from_tensors (line 241) | def load_weights_from_tensors( method load_weights (line 260) | def load_weights(self, physical_to_logical_map_cpu: torch.Tensor): method select_deferred_experts (line 269) | def select_deferred_experts( method submit_forward (line 299) | def submit_forward( method sync_forward (line 379) | def sync_forward(self, hidden_states: torch.Tensor, cuda_stream) -> to... method forward (line 407) | def forward( method set_capture_batch_sizes (line 430) | def set_capture_batch_sizes(capture_bs: List[int]): method get_capture_batch_sizes (line 446) | def get_capture_batch_sizes() -> List[int]: method clear_buffer_cache (line 456) | def clear_buffer_cache(): FILE: kt-kernel/python/utils/amx.py class AMXMoEWrapper (line 27) | class AMXMoEWrapper(BaseMoEWrapper): method __init__ (line 35) | def __init__( method load_weights_from_tensors (line 123) | def load_weights_from_tensors( method load_weights (line 180) | def load_weights(self, physical_to_logical_map_cpu: torch.Tensor): class NativeMoEWrapper (line 322) | class NativeMoEWrapper(BaseMoEWrapper): method __init__ (line 327) | def __init__( method load_weights_from_tensors (line 405) | def load_weights_from_tensors( method load_weights (line 414) | def load_weights(self, physical_to_logical_map_cpu: torch.Tensor): method submit_write_weight_scale_to_buffer (line 544) | def submit_write_weight_scale_to_buffer( method sync_write_weight_scale_to_buffer (line 579) | def sync_write_weight_scale_to_buffer(self): FILE: kt-kernel/python/utils/llamafile.py class LlamafileMoEWrapper (line 21) | class LlamafileMoEWrapper(BaseMoEWrapper): method __init__ (line 29) | def __init__( method load_weights_from_tensors (line 140) | def load_weights_from_tensors( method load_weights (line 156) | def load_weights(self, physical_to_logical_map_cpu: Optional[torch.Ten... FILE: kt-kernel/python/utils/loader.py class GGMLQuantizationType (line 19) | class GGMLQuantizationType(IntEnum): function translate_name_to_gguf (line 53) | def translate_name_to_gguf(name): class SafeTensorLoader (line 102) | class SafeTensorLoader: method __init__ (line 114) | def __init__(self, file_path: str): method __load_tensor_file_map (line 117) | def __load_tensor_file_map(self, file_path: str): method load_tensor (line 156) | def load_tensor(self, key: str, device: str = "cpu"): method close_all_handles (line 166) | def close_all_handles(self): method load_experts (line 171) | def load_experts(self, base_key: str, device: str = "cpu"): method has_tensor (line 236) | def has_tensor(self, name: str): class FP8SafeTensorLoader (line 240) | class FP8SafeTensorLoader(SafeTensorLoader): method __init__ (line 262) | def __init__(self, file_path: str, scale_suffix: str = None): method _detect_format (line 283) | def _detect_format(self): method _get_experts_prefix_candidates (line 355) | def _get_experts_prefix_candidates(self, base_key: str) -> list[str]: method _get_proj_names (line 370) | def _get_proj_names(self): method load_tensor (line 375) | def load_tensor(self, key: str, device: str = "cpu"): method load_experts (line 387) | def load_experts(self, base_key: str, device: str = "cpu"): method is_per_channel (line 454) | def is_per_channel(self) -> bool: class BF16SafeTensorLoader (line 459) | class BF16SafeTensorLoader(SafeTensorLoader): method __init__ (line 476) | def __init__(self, file_path: str): method _detect_format (line 481) | def _detect_format(self): method _get_experts_prefix_candidates (line 511) | def _get_experts_prefix_candidates(self, base_key: str) -> list[str]: method _get_proj_names (line 522) | def _get_proj_names(self): method load_tensor (line 527) | def load_tensor(self, key: str, device: str = "cpu"): method load_experts (line 539) | def load_experts(self, base_key: str, device: str = "cpu"): method _resolve_packed_experts_prefix (line 579) | def _resolve_packed_experts_prefix(self, base_key: str) -> str: method _load_experts_packed (line 596) | def _load_experts_packed(self, base_key: str, device: str = "cpu"): class CompressedSafeTensorLoader (line 623) | class CompressedSafeTensorLoader(SafeTensorLoader): method load_experts (line 626) | def load_experts(self, base_key: str, device: str = "cpu"): class GGUFLoader (line 678) | class GGUFLoader: method __init__ (line 685) | def __init__(self, gguf_path: str): method _load_single_file (line 719) | def _load_single_file(self, file_path: str): method _load_directory (line 745) | def _load_directory(self, dir_path: str): method get_model_config (line 782) | def get_model_config(self, layer_idx: int = 0): method print_metadata (line 870) | def print_metadata(self, filter_keywords=None): method has_tensor (line 890) | def has_tensor(self, name: str): method get_ggml_type (line 895) | def get_ggml_type(self, name: str): method get_undequanted_tensor_and_ggml_type (line 902) | def get_undequanted_tensor_and_ggml_type(self, name: str): FILE: kt-kernel/python/utils/moe_kernel.py class GeneralMoEWrapper (line 29) | class GeneralMoEWrapper(BaseMoEWrapper): method __init__ (line 37) | def __init__( method load_weights_from_tensors (line 123) | def load_weights_from_tensors( method load_weights (line 180) | def load_weights(self, physical_to_logical_map_cpu: torch.Tensor): FILE: kt-kernel/scripts/check.py function safe_open_binary_to_tensor (line 21) | def safe_open_binary_to_tensor(file_path): function read_safetensor_keys_from_folder (line 42) | def read_safetensor_keys_from_folder(folder_path) -> dict: function read_amx_tensor_from_folder (line 84) | def read_amx_tensor_from_folder(folder_path, keys) -> dict: function _clean_keys (line 137) | def _clean_keys(keys): function combine_tensor_sources (line 145) | def combine_tensor_sources(safetensor_path, amx_path): function write_combined_tensor (line 164) | def write_combined_tensor(target_tensor_map: dict, output_path: str): function main (line 238) | def main(): FILE: kt-kernel/scripts/check_cpu_features.py function check_cpuinfo (line 19) | def check_cpuinfo(): function main (line 29) | def main(): FILE: kt-kernel/scripts/compare_weights.py function unpack_awq_int32_to_int8 (line 25) | def unpack_awq_int32_to_int8(packed: np.ndarray, bits: int = 4) -> np.nd... function normalize_tensor_dtype (line 54) | def normalize_tensor_dtype(tensor: np.ndarray, tensor_name: str, is_awq:... function load_kt_binary (line 121) | def load_kt_binary(file_path: str) -> np.ndarray: function detect_weight_format (line 145) | def detect_weight_format(path: str) -> str: function detect_awq_format (line 170) | def detect_awq_format(weights_sample: Dict[str, np.ndarray]) -> bool: function load_safetensor_weights (line 196) | def load_safetensor_weights(path: str) -> Dict[str, np.ndarray]: function load_kt_weights (line 240) | def load_kt_weights(path: str) -> Dict[str, np.ndarray]: function normalize_key (line 312) | def normalize_key(key: str) -> Tuple[int, str, int, str]: function compare_weights (line 336) | def compare_weights( function main (line 474) | def main(): FILE: kt-kernel/scripts/convert_cpu_weights.py function weight_dequant_kernel (line 35) | def weight_dequant_kernel(x_ptr, s_ptr, y_ptr, M, N, BLOCK_SIZE: tl.cons... function weight_dequant (line 49) | def weight_dequant(x: torch.Tensor, s: torch.Tensor, block_size: int = 1... function load_model_config (line 59) | def load_model_config(input_path: str, input_type: str = None) -> Dict: function pack (line 123) | def pack(imatrix: torch.Tensor): function unpack (line 145) | def unpack(qmatrix: torch.Tensor): function reverse_awq_interleaving (line 167) | def reverse_awq_interleaving(imatrix: torch.Tensor): function unpack_reverse_awq_interleaving (line 179) | def unpack_reverse_awq_interleaving(qweight: torch.Tensor, qzeros: torch... function pack_column_major_1d (line 209) | def pack_column_major_1d(iweights: torch.Tensor, izeros: torch.Tensor = ... class ConverterBase (line 235) | class ConverterBase: method __init__ (line 242) | def __init__( method _load_input_files (line 273) | def _load_input_files(self): method _load_tensor (line 308) | def _load_tensor(self, key: str) -> torch.Tensor: method _find_expert_layers (line 318) | def _find_expert_layers(self) -> Dict[int, List[int]]: method _convert_layer_experts (line 364) | def _convert_layer_experts(self, layer_idx: int, expert_ids: List[int]... method convert (line 371) | def convert(self, resume_layer: int = 0): method _copy_config_files (line 458) | def _copy_config_files(self): method close (line 471) | def close(self): class AWQToColumnMajorConverter (line 476) | class AWQToColumnMajorConverter(ConverterBase): method _convert_layer_experts (line 480) | def _convert_layer_experts(self, layer_idx: int, expert_ids: List[int]... class OnlineQuantConverter (line 549) | class OnlineQuantConverter(ConverterBase): method __init__ (line 556) | def __init__( method _dequantize_fp8_blockwise (line 578) | def _dequantize_fp8_blockwise(self, fp8_weight: torch.Tensor, scale_in... method _load_binary_tensor (line 609) | def _load_binary_tensor(self, file_path: str) -> torch.Tensor: method _load_layer_tensors_from_disk (line 635) | def _load_layer_tensors_from_disk(self, layer_idx: int) -> Dict[str, t... method _remove_layer_folder (line 700) | def _remove_layer_folder(self, layer_idx: int): method _convert_layer_experts (line 713) | def _convert_layer_experts(self, layer_idx: int, expert_ids: List[int]... function main (line 923) | def main(): FILE: kt-kernel/scripts/convert_gpu_weights.py function parse_args (line 52) | def parse_args(): function setup_environment (line 149) | def setup_environment(force_cpu=False): function get_torch_dtype (line 171) | def get_torch_dtype(dtype_str): function check_dense_layers_and_update_ignore (line 185) | def check_dense_layers_and_update_ignore(model_id, ignore_patterns, trus... function load_and_prepare_dataset (line 238) | def load_and_prepare_dataset(dataset_name, dataset_split, num_samples, m... function main (line 280) | def main(): FILE: kt-kernel/scripts/convert_kimi_k2_fp8_to_bf16_cpu.py function weight_dequant_cpu (line 13) | def weight_dequant_cpu(x: torch.Tensor, s: torch.Tensor, block_size: int... function main (line 32) | def main(fp8_path, bf16_path): FILE: kt-kernel/scripts/convert_moe_to_bf16.py function _load_config (line 13) | def _load_config(model_dir: str, config_path: Optional[str]) -> Tuple[in... function _dequantize_tensor (line 29) | def _dequantize_tensor( function _is_quantized_weight_key (line 56) | def _is_quantized_weight_key(key: str) -> bool: function convert_file (line 67) | def convert_file( function parse_args (line 132) | def parse_args() -> argparse.Namespace: function main (line 159) | def main(): FILE: kt-kernel/setup.py function _env_get_bool (line 63) | def _env_get_bool(name: str, default: bool | None = None) -> bool | None: function _cmake_onoff (line 75) | def _cmake_onoff(flag: bool) -> str: function _forward_bool_env (line 79) | def _forward_bool_env(cmake_args: list[str], env_name: str, cmake_flag: ... function _forward_str_env (line 89) | def _forward_str_env(cmake_args: list[str], env_name: str, cmake_flag: s... function default_build_type (line 112) | def default_build_type() -> str: function detect_parallel_jobs (line 116) | def detect_parallel_jobs() -> str: function cpu_feature_flags (line 127) | def cpu_feature_flags() -> list[str]: class CMakeExtension (line 137) | class CMakeExtension(Extension): method __init__ (line 138) | def __init__(self, name: str, sourcedir: str = ""): class CMakeBuild (line 143) | class CMakeBuild(build_ext): method run (line 144) | def run(self): method detect_cpu_info (line 152) | def detect_cpu_info(self) -> dict: method build_extension (line 244) | def build_extension(self, ext: CMakeExtension): method build_multi_variants (line 258) | def build_multi_variants(self, ext: CMakeExtension): method _build_single_variant (line 432) | def _build_single_variant(self, ext: CMakeExtension): method _build_single_variant_impl (line 441) | def _build_single_variant_impl(self, ext: CMakeExtension, extdir: Path... FILE: kt-kernel/test/ci/ci_register.py class HWBackend (line 8) | class HWBackend(Enum): class CIRegistry (line 15) | class CIRegistry: function register_cpu_ci (line 22) | def register_cpu_ci(est_time: float, suite: str): function register_cuda_ci (line 26) | def register_cuda_ci(est_time: float, suite: str): function register_amd_ci (line 30) | def register_amd_ci(est_time: float, suite: str): class RegistryVisitor (line 41) | class RegistryVisitor(ast.NodeVisitor): method __init__ (line 42) | def __init__(self, filename: str): method _collect_ci_registry (line 46) | def _collect_ci_registry(self, func_call: ast.Call): method visit_Module (line 77) | def visit_Module(self, node): function ut_parse_one_file (line 89) | def ut_parse_one_file(filename: str) -> List[CIRegistry]: function collect_tests (line 98) | def collect_tests(files: list[str], sanity_check: bool = True) -> List[C... FILE: kt-kernel/test/ci/ci_utils.py function kill_process_tree (line 9) | def kill_process_tree(parent_pid, include_parent: bool = True, skip_pid:... class TestFile (line 49) | class TestFile: function run_with_timeout (line 54) | def run_with_timeout( function run_unittest_files (line 78) | def run_unittest_files( FILE: kt-kernel/test/per_commit/test_amd_placeholder.py function test_amd_placeholder (line 24) | def test_amd_placeholder(): FILE: kt-kernel/test/per_commit/test_basic_cpu.py function test_kt_kernel_import (line 29) | def test_kt_kernel_import(): function test_cpu_infer_initialization (line 38) | def test_cpu_infer_initialization(): function test_basic_module_attributes (line 49) | def test_basic_module_attributes(): function run_all_tests (line 58) | def run_all_tests(): FILE: kt-kernel/test/per_commit/test_cuda_placeholder.py function test_cuda_placeholder (line 24) | def test_cuda_placeholder(): FILE: kt-kernel/test/per_commit/test_moe_amx_accuracy_int4.py function act_fn (line 42) | def act_fn(x): function mlp_torch (line 47) | def mlp_torch(input, gate_proj, up_proj, down_proj): function moe_torch (line 56) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj): function test_moe_amx_int4_accuracy (line 92) | def test_moe_amx_int4_accuracy(): function run_all_tests (line 184) | def run_all_tests(): FILE: kt-kernel/test/per_commit/test_moe_amx_accuracy_int4_1.py function act_fn (line 42) | def act_fn(x): function mlp_torch (line 47) | def mlp_torch(input, gate_proj, up_proj, down_proj): function moe_torch (line 56) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj): function test_moe_amx_int4_1_accuracy (line 92) | def test_moe_amx_int4_1_accuracy(): function run_all_tests (line 184) | def run_all_tests(): FILE: kt-kernel/test/per_commit/test_moe_amx_accuracy_int4_1k.py function act_fn (line 43) | def act_fn(x): function mlp_torch (line 48) | def mlp_torch(input, gate_proj, up_proj, down_proj): function moe_torch (line 57) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj): function test_moe_amx_int4_1k_accuracy (line 93) | def test_moe_amx_int4_1k_accuracy(): function run_all_tests (line 188) | def run_all_tests(): FILE: kt-kernel/test/per_commit/test_moe_amx_accuracy_int8.py function act_fn (line 42) | def act_fn(x): function mlp_torch (line 47) | def mlp_torch(input, gate_proj, up_proj, down_proj): function moe_torch (line 56) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj): function test_moe_amx_int8_accuracy (line 92) | def test_moe_amx_int8_accuracy(): function run_all_tests (line 182) | def run_all_tests(): FILE: kt-kernel/test/per_commit/test_moe_amx_bench_int4.py function get_git_commit (line 56) | def get_git_commit(): function get_system_info (line 79) | def get_system_info(): function record_results (line 131) | def record_results(result, filename): function test_moe_amx_int4_benchmark (line 138) | def test_moe_amx_int4_benchmark(): function run_all_tests (line 296) | def run_all_tests(): FILE: kt-kernel/test/per_commit/test_moe_amx_bench_int4_1.py function get_git_commit (line 56) | def get_git_commit(): function get_system_info (line 79) | def get_system_info(): function record_results (line 131) | def record_results(result, filename): function test_moe_amx_int4_1_benchmark (line 138) | def test_moe_amx_int4_1_benchmark(): function run_all_tests (line 296) | def run_all_tests(): FILE: kt-kernel/test/per_commit/test_moe_amx_bench_int4_1k.py function get_git_commit (line 58) | def get_git_commit(): function get_system_info (line 81) | def get_system_info(): function record_results (line 133) | def record_results(result, filename): function test_moe_amx_int4_1k_benchmark (line 140) | def test_moe_amx_int4_1k_benchmark(): function run_all_tests (line 308) | def run_all_tests(): FILE: kt-kernel/test/per_commit/test_moe_amx_bench_int8.py function get_git_commit (line 56) | def get_git_commit(): function get_system_info (line 79) | def get_system_info(): function record_results (line 131) | def record_results(result, filename): function test_moe_amx_int8_benchmark (line 138) | def test_moe_amx_int8_benchmark(): function run_all_tests (line 296) | def run_all_tests(): FILE: kt-kernel/test/run_suite.py function _filter_tests (line 22) | def _filter_tests( function run_per_commit (line 34) | def run_per_commit(hw: HWBackend, suite: str): function main (line 48) | def main(): FILE: kt-kernel/test/test_generate_gpu_experts_masks.py function test_basic (line 14) | def test_basic(): function test_edge_cases (line 46) | def test_edge_cases(): function test_performance (line 80) | def test_performance(): function test_output_properties (line 117) | def test_output_properties(): function test_determinism (line 138) | def test_determinism(): FILE: kt-sft/csrc/custom_marlin/binding.cpp function PYBIND11_MODULE (line 20) | PYBIND11_MODULE(vLLMMarlin, m) { FILE: kt-sft/csrc/custom_marlin/test_cuda_graph.py function setup_seed (line 14) | def setup_seed(seed): function get_usable_mem (line 33) | def get_usable_mem(): function exp_range (line 42) | def exp_range(start, stop, step = 2): function timing (line 48) | def timing(func, iters, epochs=100): class LinearMarlin (line 88) | class LinearMarlin(nn.Linear): method __init__ (line 94) | def __init__( method forward (line 168) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor) -> torch.... function benchLinearMarlin (line 208) | def benchLinearMarlin(input_dim, output_dim):#, out_file function printMinMax (line 314) | def printMinMax(tensor): FILE: kt-sft/csrc/custom_marlin/utils/format24.py function _calculate_meta_reordering_scatter_offsets (line 21) | def _calculate_meta_reordering_scatter_offsets(m, meta_ncols, meta_dtype, function sparse_semi_structured_from_dense_cutlass (line 52) | def sparse_semi_structured_from_dense_cutlass(dense): function sparse_semi_structured_to_dense_cutlass (line 184) | def sparse_semi_structured_to_dense_cutlass(sparse, meta_reordered): function mask_creator (line 279) | def mask_creator(tensor): FILE: kt-sft/csrc/custom_marlin/utils/marlin_24_perms.py function get_perms_24 (line 21) | def get_perms_24(num_bits: int): FILE: kt-sft/csrc/custom_marlin/utils/marlin_perms.py function get_perms (line 21) | def get_perms(num_bits: int): FILE: kt-sft/csrc/custom_marlin/utils/marlin_utils.py function is_marlin_supported (line 31) | def is_marlin_supported(): function marlin_permute_weights (line 35) | def marlin_permute_weights(q_w, size_k, size_n, perm, tile=MARLIN_TILE): function marlin_weights (line 50) | def marlin_weights(q_w, size_k, size_n, num_bits, perm): function marlin_permute_scales (line 70) | def marlin_permute_scales(s, size_k, size_n, group_size, scale_perm, function marlin_quantize (line 81) | def marlin_quantize( function inject_24 (line 119) | def inject_24(w, size_k, size_n): function check_24 (line 127) | def check_24(w, num_rows_to_sample=50, _verbose=False): function compress_quantized_24_weight (line 154) | def compress_quantized_24_weight(q_24, size_k, size_n, num_bits): function marlin_24_quantize (line 177) | def marlin_24_quantize( function compute_max_diff (line 218) | def compute_max_diff(output, output_ref): class MarlinWorkspace (line 223) | class MarlinWorkspace: method __init__ (line 225) | def __init__(self, out_features, min_thread_n, max_parallel, device): FILE: kt-sft/csrc/custom_marlin/utils/quant_utils.py function get_pack_factor (line 9) | def get_pack_factor(num_bits): function permute_rows (line 14) | def permute_rows(q_w: torch.Tensor, w_ref: torch.Tensor, group_size: int): function dequantize_weights (line 40) | def dequantize_weights(qweight, qzeros, scales, g_idx, bits=4, group_siz... function quantize_weights (line 67) | def quantize_weights(w: torch.Tensor, num_bits: int, group_size: int, function sort_weights (line 137) | def sort_weights(q_w: torch.Tensor, g_idx: torch.Tensor): function gptq_pack (line 153) | def gptq_pack( function gptq_unpack (line 176) | def gptq_unpack( FILE: kt-sft/csrc/ktransformers_ext/bench/bench_attention.py function bench_linear (line 41) | def bench_linear(cache_seqlen: int): FILE: kt-sft/csrc/ktransformers_ext/bench/bench_attention_torch.py function bench_linear (line 29) | def bench_linear(cache_seqlen: int, device): FILE: kt-sft/csrc/ktransformers_ext/bench/bench_linear.py function bench_linear (line 28) | def bench_linear(quant_mode: str): FILE: kt-sft/csrc/ktransformers_ext/bench/bench_linear_torch.py function bench_linear (line 26) | def bench_linear(quant_mode: str): FILE: kt-sft/csrc/ktransformers_ext/bench/bench_mlp.py function bench_mlp (line 28) | def bench_mlp(quant_mode: str): FILE: kt-sft/csrc/ktransformers_ext/bench/bench_mlp_torch.py function act_fn (line 26) | def act_fn(x): function mlp_torch (line 29) | def mlp_torch(input, gate_proj, up_proj, down_proj): function bench_mlp (line 47) | def bench_mlp(quant_mode: str): FILE: kt-sft/csrc/ktransformers_ext/bench/bench_moe.py function bench_moe (line 31) | def bench_moe(quant_mode: str): FILE: kt-sft/csrc/ktransformers_ext/bench/bench_moe_amx.py function bench_moe (line 29) | def bench_moe(quant_mode: str): FILE: kt-sft/csrc/ktransformers_ext/bench/bench_moe_torch.py function act_fn (line 28) | def act_fn(x): function mlp_torch (line 31) | def mlp_torch(input, gate_proj, up_proj, down_proj): function moe_torch (line 49) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj): function bench_moe (line 80) | def bench_moe(quant_mode: str): FILE: kt-sft/csrc/ktransformers_ext/cpu_backend/backend.cpp type bitmask (line 93) | struct bitmask FILE: kt-sft/csrc/ktransformers_ext/cpu_backend/backend.h type ThreadStatus (line 21) | enum ThreadStatus { type ThreadState (line 27) | struct ThreadState { function class (line 33) | class Backend { FILE: kt-sft/csrc/ktransformers_ext/cpu_backend/cpuinfer.h function class (line 36) | class CPUInfer { function submit (line 58) | void submit(std::pair params) { function sync (line 65) | void sync() { function submit_with_cuda_stream (line 69) | void submit_with_cuda_stream(intptr_t user_cuda_stream, std::pair torch.Tensor: function silu_grad (line 49) | def silu_grad(x: torch.Tensor) -> torch.Tensor: class SiLU (line 54) | class SiLU(torch.autograd.Function): method forward (line 56) | def forward(ctx, inp): method backward (line 61) | def backward(ctx, grad_out): function mlp_torch (line 69) | def mlp_torch(x, gate, up, down, req_grad=False): function moe_torch (line 78) | def moe_torch(x, eid, w, gate, up, down, req_grad=False): function moe_backward_python (line 109) | def moe_backward_python(x, eid, w, gate, up, down, grad_output, gate_u_c... function test_amx_moe_two_round (line 316) | def test_amx_moe_two_round(): function load_bf16 (line 459) | def load_bf16(stub, shape): function load_f16 (line 462) | def load_f16(stub, shape): function load_f32 (line 465) | def load_f32(stub, shape): function load_uint8 (line 468) | def load_uint8(stub, shape): function load_int8 (line 471) | def load_int8(stub, shape): function load_dump_tensor (line 476) | def load_dump_tensor(experts_idx: int, name: str, shape: tuple, Ename: s... function load_bin (line 494) | def load_bin(path, n, k): function check_nan (line 501) | def check_nan(name, shape): function get_tensor (line 536) | def get_tensor(name, shape) -> torch.Tensor: function check_py_cpp (line 551) | def check_py_cpp(name1, name2, shape): function manual_check (line 619) | def manual_check(experts_ids): FILE: kt-sft/csrc/ktransformers_ext/examples/test_sft_moe.py function act_fn (line 37) | def act_fn(x): class SiLU (line 41) | class SiLU(torch.autograd.Function): method forward (line 43) | def forward(ctx, input): method backward (line 48) | def backward(ctx, grad_output): function mlp_torch (line 55) | def mlp_torch(input, gate_proj, up_proj, down_proj, requires_grad=False): function moe_torch (line 68) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj,... function test_forward (line 100) | def test_forward(): function test_backward (line 150) | def test_backward(): function test_backward_2round_with_tflops (line 284) | def test_backward_2round_with_tflops(): function test_backward_10round_5layer (line 481) | def test_backward_10round_5layer(): function test_backward_one_vs_many_comparison (line 604) | def test_backward_one_vs_many_comparison(): FILE: kt-sft/csrc/ktransformers_ext/ext_bindings.cpp class KVCacheBindings (line 39) | class KVCacheBindings { class AttnBindings (line 41) | class AttnBindings { type Args (line 43) | struct Args { method inner (line 60) | static void inner(void *args) { method cpuinfer_interface (line 69) | static std::pair class GetAllKVCacheOneLayerBindings (line 95) | class GetAllKVCacheOneLayerBindings { type Args (line 97) | struct Args { method inner (line 104) | static void inner(void *args) { method cpuinfer_interface (line 110) | static std::pair class GetAndUpdateKVCacheFp16Bindings (line 119) | class GetAndUpdateKVCacheFp16Bindings { type Args (line 121) | struct Args { method inner (line 133) | static void inner(void *args) { method cpuinfer_interface (line 141) | static std::pair class GetKVCacheFp16Bindings (line 159) | class GetKVCacheFp16Bindings { type Args (line 161) | struct Args { method inner (line 172) | static void inner(void *args) { method cpuinfer_interface (line 179) | static std::pair class UpdateKVCacheFp16Bindings (line 196) | class UpdateKVCacheFp16Bindings { type Args (line 198) | struct Args { method inner (line 210) | static void inner(void *args) { method cpuinfer_interface (line 218) | static std::pair class UpdateImportanceBindings (line 237) | class UpdateImportanceBindings { type Args (line 239) | struct Args { method inner (line 250) | static void inner(void *args) { method cpuinfer_interface (line 257) | static std::pair class AttnWithKVCacheBindings (line 274) | class AttnWithKVCacheBindings { type Args (line 276) | struct Args { method inner (line 294) | static void inner(void *args) { method cpuinfer_interface (line 303) | static std::pair class ClearImportanceAllLayersBindings (line 330) | class ClearImportanceAllLayersBindings { type Args (line 332) | struct Args { method inner (line 340) | static void inner(void *args) { method cpuinfer_interface (line 347) | static std::pair class CalcAnchorAllLayersBindinds (line 361) | class CalcAnchorAllLayersBindinds { type Args (line 363) | struct Args { method inner (line 371) | static void inner(void *args) { method cpuinfer_interface (line 378) | static std::pair class LoadKVCacheBindings (line 392) | class LoadKVCacheBindings { type Args (line 394) | struct Args { method inner (line 399) | static void inner(void *args) { method cpuinfer_interface (line 404) | static std::pair class DumpKVCacheBindings (line 411) | class DumpKVCacheBindings { type Args (line 413) | struct Args { method inner (line 420) | static void inner(void *args) { method cpuinfer_interface (line 426) | static std::pair class LinearBindings (line 437) | class LinearBindings { class WarmUpBindinds (line 439) | class WarmUpBindinds { type Args (line 441) | struct Args { method inner (line 445) | static void inner(void *args) { method cpuinfer_interface (line 449) | static std::pair class ForwardBindings (line 455) | class ForwardBindings { type Args (line 457) | struct Args { method inner (line 464) | static void inner(void *args) { method cpuinfer_interface (line 469) | static std::pair class MLPBindings (line 479) | class MLPBindings { class WarmUpBindinds (line 481) | class WarmUpBindinds { type Args (line 483) | struct Args { method inner (line 487) | static void inner(void *args) { method cpuinfer_interface (line 491) | static std::pair cpuinfer_interface(MLP &mlp) { class ForwardBindings (line 496) | class ForwardBindings { type Args (line 498) | struct Args { method inner (line 505) | static void inner(void *args) { method cpuinfer_interface (line 510) | static std::pair class MOEBindings (line 520) | class MOEBindings { class WarmUpBindinds (line 522) | class WarmUpBindinds { type Args (line 524) | struct Args { method inner (line 528) | static void inner(void *args) { method cpuinfer_interface (line 532) | static std::pair cpuinfer_interface(MOE &moe) { class ForwardBindings (line 537) | class ForwardBindings { type Args (line 539) | struct Args { method inner (line 550) | static void inner(void *args) { method cpuinfer_interface (line 556) | static std::pair function sft_moe_forward_wrapper (line 574) | inline void sft_moe_forward_wrapper( function sft_moe_backward_wrapper (line 590) | inline void sft_moe_backward_wrapper( class SFT_MOEBindings (line 608) | class SFT_MOEBindings { class WarmUpBindinds (line 610) | class WarmUpBindinds { type Args (line 612) | struct Args { method inner (line 616) | static void inner(void *args) { method cpuinfer_interface (line 620) | static std::pair cpuinfer_interface(SFT_MOE &moe) { class ForwardBindings (line 625) | class ForwardBindings { type Args (line 627) | struct Args { method inner (line 643) | static void inner(void *args) { method cpuinfer_interface (line 654) | static std::pair class BackwardBindings (line 669) | class BackwardBindings { type Args (line 671) | struct Args { method inner (line 694) | static void inner(void *args) { method cpuinfer_interface (line 708) | static std::pair cpuinfer_interface( class AMX_MOEBindings (line 731) | class AMX_MOEBindings { class WarmUpBindings (line 733) | class WarmUpBindings { type Args (line 735) | struct Args { method inner (line 739) | static void inner(void *args) { method cpuinfer_interface (line 743) | static std::pair cpuinfer_interface(AMX_MOE &... class LoadWeightsBindings (line 748) | class LoadWeightsBindings { type Args (line 750) | struct Args { method inner (line 754) | static void inner(void *args) { method cpuinfer_interface (line 758) | static std::pair cpuinfer_interface(AMX_MOE &... class ForwardBindings (line 763) | class ForwardBindings { type Args (line 765) | struct Args { method inner (line 776) | static void inner(void *args) { method cpuinfer_interface (line 782) | static std::pair class SFT_AMX_MOEBindings (line 802) | class SFT_AMX_MOEBindings { class WarmUpBindings (line 804) | class WarmUpBindings { type Args (line 806) | struct Args { method inner (line 810) | static void inner(void *args) { method cpuinfer_interface (line 814) | static std::pair cpuinfer_interface(SFT_AMX_MOE<... class LoadWeightsBindings (line 819) | class LoadWeightsBindings { type Args (line 821) | struct Args { method inner (line 825) | static void inner(void *args) { method cpuinfer_interface (line 829) | static std::pair cpuinfer_interface(SFT_AMX_MOE<... class ForwardBindings (line 834) | class ForwardBindings { type Args (line 836) | struct Args { method inner (line 846) | static void inner(void *args) { method cpuinfer_interface (line 852) | static std::pair class BackwardBindings (line 867) | class BackwardBindings { type Args (line 869) | struct Args { method inner (line 881) | static void inner(void *args) { method cpuinfer_interface (line 894) | static std::pair cpuinfer_interface( function PYBIND11_MODULE (line 916) | PYBIND11_MODULE(cpuinfer_ext, m) { FILE: kt-sft/csrc/ktransformers_ext/operators/amx/debug_sft_moe.hpp function __m512 (line 76) | static inline __m512 sigmoid(__m512 x) { function __m512 (line 83) | static inline __m512 act_fn_1(__m512 x) { function __m512 (line 88) | static inline __m512 act_fn_grad(__m512 x) { function int8_row_to_string (line 107) | std::string int8_row_to_string(const int8_t* row, int len) { type SFT_AMX_MOEConfig (line 116) | struct SFT_AMX_MOEConfig { method SFT_AMX_MOEConfig (line 126) | SFT_AMX_MOEConfig() {} method SFT_AMX_MOEConfig (line 128) | SFT_AMX_MOEConfig(int expert_num, int routed_expert_num, int hidden_si... class SFT_AMX_MOE (line 135) | class SFT_AMX_MOE { method SFT_AMX_MOE (line 207) | SFT_AMX_MOE(SFT_AMX_MOEConfig config) { method transpose_expert (line 384) | void transpose_expert(const void* src, void* dst, int R, int C, Backen... method load_weights (line 400) | void load_weights(Backend *backend) { method warm_up (line 536) | void warm_up(Backend *backend) {} method forward (line 538) | void forward(int qlen, int k, const uint64_t *expert_ids, const float ... method backward (line 696) | void backward(int qlen, int k, const uint64_t *expert_ids, const float... FILE: kt-sft/csrc/ktransformers_ext/operators/amx/debug_tools_sft_moe.hpp function get_env_or_default (line 10) | inline std::string get_env_or_default(const char *var_name, const std::s... function dump_grad_bin (line 21) | inline void dump_grad_bin(const std::string &file_name, function dump_bin (line 55) | inline void dump_bin(std::string file_name, float *data, size_t count) { function dump_bin (line 62) | inline void dump_bin(std::string file_name, int64_t *data, size_t count) { function dump_bin (line 69) | inline void dump_bin(std::string file_name, uint8_t *data, size_t count) { FILE: kt-sft/csrc/ktransformers_ext/operators/amx/la/amx.hpp type amx (line 41) | namespace amx { function enable_amx (line 63) | inline bool enable_amx() { type TileConfig (line 80) | struct alignas(64) TileConfig { method TileConfig (line 89) | TileConfig() { method set_row_col (line 97) | void set_row_col(int i, uint8_t row, uint16_t col) { method set_config (line 102) | void set_config() { _tile_loadconfig(this); } method load_data (line 104) | static void load_data(int to, void *from, size_t stride) { method store_data (line 135) | static void store_data(int from, void *to, size_t stride) { function debug_tile (line 169) | inline void debug_tile(int t) { function debug_tiles (line 182) | inline void debug_tiles(int to = 8) { function debug_m512 (line 188) | inline void debug_m512(__m512 x) { function transpose_16x16_32bit (line 198) | inline void transpose_16x16_32bit(__m512i *v) { function transpose_16x16_32bit (line 273) | inline void transpose_16x16_32bit(__m512i *v, size_t stride) { type GemmKernel224BF (line 348) | struct GemmKernel224BF { method recommended_nth (line 363) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO... method split_range_n (line 365) | static std::pair split_range_n(int n, int ith, int nth) { method config (line 371) | static void config() { method load_a (line 390) | static void load_a(dt *a, size_t lda) { method load_b (line 395) | static void load_b(dt *b, size_t ldb) { method clean_c (line 400) | static void clean_c() { method load_c (line 407) | static void load_c(output_t *c, size_t ldc) { method store_c (line 414) | static void store_c(output_t *c, size_t ldc) { method run_tile (line 421) | static void run_tile() { type BufferA (line 428) | struct BufferA { method required_size (line 432) | static size_t required_size(int max_m, int k) { return max_m * k *... method BufferA (line 434) | BufferA(int max_m, int k, void *ptr) : max_m(max_m), k(k) { method from_mat (line 441) | void from_mat(int m, ggml_bf16_t *src, int ith, int nth) { method ggml_bf16_t (line 460) | ggml_bf16_t *get_submat(int m, int k, int m_begin, int k_begin) { type BufferB (line 469) | struct BufferB { method required_size (line 473) | static size_t required_size(int n, int k) { return n * k * sizeof(... method BufferB (line 475) | BufferB(int n, int k, void *ptr) : n(n), k(k) { method from_mat (line 482) | void from_mat(ggml_bf16_t *src, int ith, int nth) { method ggml_bf16_t (line 505) | ggml_bf16_t *get_submat(int n, int k, int n_begin, int k_begin) { type BufferC (line 516) | struct BufferC { method required_size (line 520) | static size_t required_size(int max_m, int n) { return max_m * n *... method BufferC (line 522) | BufferC(int max_m, int n, void *ptr) : max_m(max_m), n(n) { method to_mat (line 529) | void to_mat(int m, ggml_bf16_t *dst, int ith, int nth) { type GemmKernel224Int8 (line 558) | struct GemmKernel224Int8 { method recommended_nth (line 573) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO... method split_range_n (line 575) | static std::pair split_range_n(int n, int ith, int nth) { method config (line 581) | static void config() { method load_a (line 600) | static void load_a(dt *a, size_t lda) { method load_b (line 605) | static void load_b(dt *b, size_t ldb) { method clean_c (line 610) | static void clean_c() { method load_c (line 617) | static void load_c(output_t *c, size_t ldc) { method store_c (line 624) | static void store_c(output_t *c, size_t ldc) { method run_tile (line 631) | static void run_tile() { type BufferA (line 638) | struct BufferA { method required_size (line 643) | static size_t required_size(int max_m, int k) { return max_m * k *... method BufferA (line 645) | BufferA(int max_m, int k, void *ptr) : max_m(max_m), k(k) { method from_mat (line 653) | void from_mat(int m, ggml_bf16_t *src, int ith, int nth) { type BufferB (line 708) | struct BufferB { method required_size (line 713) | static size_t required_size(int n, int k) { return n * k * sizeof(... method BufferB (line 715) | BufferB(int n, int k, void *ptr) : n(n), k(k) { method from_mat (line 723) | void from_mat(ggml_bf16_t *src, int ith, int nth) { type BufferC (line 787) | struct BufferC { method required_size (line 791) | static size_t required_size(int max_m, int n) { return max_m * n *... method BufferC (line 793) | BufferC(int max_m, int n, void *ptr) : max_m(max_m), n(n) { method to_mat (line 800) | void to_mat(int m, ggml_bf16_t *dst, int ith, int nth) { function mat_mul (line 829) | inline void mat_mul(int m, int n, int k, std::shared_ptr Tuple[torch.Ten... function weight_dequant_kernel (line 57) | def weight_dequant_kernel(x_ptr, s_ptr, y_ptr, M, N, BLOCK_SIZE: tl.cons... function weight_dequant (line 85) | def weight_dequant(x: torch.Tensor, s: torch.Tensor, block_size: int = 1... function fp8_gemm_kernel (line 117) | def fp8_gemm_kernel(a_ptr, b_ptr, c_ptr, function fp8_gemm (line 172) | def fp8_gemm(a: torch.Tensor, a_s: torch.Tensor, b: torch.Tensor, b_s: t... FILE: kt-sft/ktransformers/local_chat.py function print_module_tree (line 49) | def print_module_tree(module, indent=0): function write_to_file (line 56) | def write_to_file(content, file_path: str = 'ktransformers/mark_content.... function local_chat (line 87) | def local_chat( FILE: kt-sft/ktransformers/lora_test_module.py class TestModelLora (line 30) | class TestModelLora(nn.Module): method __init__ (line 31) | def __init__(self): method forward (line 51) | def forward(self, x): class TestModelBase (line 54) | class TestModelBase(nn.Module): method __init__ (line 55) | def __init__(self): method forward (line 73) | def forward(self, x): class TestModelTorch (line 80) | class TestModelTorch(nn.Module): method __init__ (line 81) | def __init__(self): method forward (line 98) | def forward(self, x): class BaseModel (line 130) | class BaseModel(nn.Module): method __init__ (line 131) | def __init__(self): method forward (line 135) | def forward(self, x): FILE: kt-sft/ktransformers/models/configuration_deepseek.py class DeepseekV2Config (line 11) | class DeepseekV2Config(PretrainedConfig): method __init__ (line 113) | def __init__( FILE: kt-sft/ktransformers/models/configuration_deepseek_v3.py class DeepseekV3Config (line 7) | class DeepseekV3Config(PretrainedConfig): method __init__ (line 106) | def __init__( FILE: kt-sft/ktransformers/models/configuration_llama.py class LlamaConfig (line 26) | class LlamaConfig(PretrainedConfig): method __init__ (line 143) | def __init__( FILE: kt-sft/ktransformers/models/configuration_qwen2_moe.py class Qwen2MoeConfig (line 24) | class Qwen2MoeConfig(PretrainedConfig): method __init__ (line 115) | def __init__( FILE: kt-sft/ktransformers/models/configuration_qwen3_moe.py class Qwen3MoeConfig (line 25) | class Qwen3MoeConfig(PretrainedConfig): method __init__ (line 161) | def __init__( FILE: kt-sft/ktransformers/models/custom_cache.py class StaticCache (line 19) | class StaticCache(transformers.StaticCache): method __init__ (line 37) | def __init__(self, config: PretrainedConfig, max_batch_size: int, max_... method update (line 112) | def update( method get_seq_length (line 154) | def get_seq_length(self, layer_idx: Optional[int] = 0) -> int: method change_seq_length (line 161) | def change_seq_length(self, bias: Optional[int] = 0) -> int: method get_max_length (line 169) | def get_max_length(self) -> Optional[int]: method reset (line 173) | def reset(self): method remove_suffix (line 182) | def remove_suffix(self, start_pos): method get_max_cache_shape (line 193) | def get_max_cache_shape(self) -> Tuple[int, int, int, int]: class KDeepSeekV3Cache (line 197) | class KDeepSeekV3Cache(nn.Module): method __init__ (line 198) | def __init__( method load (line 216) | def load(self, inference_context: "sched_ext.InferenceContext"): method update (line 224) | def update( method get_page_table (line 260) | def get_page_table(self, cache_position: torch.Tensor, q_indptr: torch... class KGQACache (line 278) | class KGQACache(nn.Module): method __init__ (line 279) | def __init__( method load (line 296) | def load(self, inference_context: "sched_ext.InferenceContext"): method get_page_table (line 311) | def get_page_table(self, cache_position: torch.Tensor, q_indptr: torch... method get_k_cache (line 329) | def get_k_cache(self, layer_idx): method get_v_cache (line 332) | def get_v_cache(self, layer_idx): FILE: kt-sft/ktransformers/models/custom_modeling_deepseek_v2.py class KDeepseekV2ForCausalLM (line 21) | class KDeepseekV2ForCausalLM(DeepseekV2PreTrainedModel): method __init__ (line 25) | def __init__( method init_wrapper (line 40) | def init_wrapper(self, use_cuda_graph, device, max_batch_size, max_pag... method batch_embeddings (line 57) | def batch_embeddings(self, batch: ForwardBatchInput, device="cuda:0"): method forward (line 71) | def forward( method flash_infer_attn_plan (line 140) | def flash_infer_attn_plan(self, batch: ForwardBatchInput, bsz_tensors,... FILE: kt-sft/ktransformers/models/custom_modeling_deepseek_v3.py class KDeepseekV3ForCausalLM (line 27) | class KDeepseekV3ForCausalLM(DeepseekV3PreTrainedModel): method __init__ (line 31) | def __init__( method init_wrapper (line 43) | def init_wrapper(self, use_cuda_graph, device, max_batch_size, max_pag... method batch_embeddings (line 61) | def batch_embeddings(self, batch: ForwardBatchInput, device="cuda:0"): method forward (line 75) | def forward( method flash_infer_attn_plan (line 136) | def flash_infer_attn_plan(self, batch: ForwardBatchInput, bsz_tensors,... FILE: kt-sft/ktransformers/models/custom_modeling_qwen2_moe.py class KQwen2MoeForCausalLM (line 27) | class KQwen2MoeForCausalLM(Qwen2MoePreTrainedModel): method __init__ (line 31) | def __init__( method init_wrapper (line 44) | def init_wrapper(self, use_cuda_graph, device, max_batch_token, max_ba... method batch_embeddings (line 48) | def batch_embeddings(self, batch: ForwardBatchInput, device="cuda:0"): method forward (line 62) | def forward( method flash_infer_attn_plan (line 120) | def flash_infer_attn_plan(self, batch: ForwardBatchInput, bsz_tensors,... FILE: kt-sft/ktransformers/models/custom_modeling_qwen3_moe.py class KQwen3MoeForCausalLM (line 27) | class KQwen3MoeForCausalLM(Qwen3MoePreTrainedModel): method __init__ (line 31) | def __init__( method init_wrapper (line 44) | def init_wrapper(self, use_cuda_graph, device, max_batch_token, max_ba... method batch_embeddings (line 48) | def batch_embeddings(self, batch: ForwardBatchInput, device="cuda:0"): method forward (line 62) | def forward( method flash_infer_attn_plan (line 120) | def flash_infer_attn_plan(self, batch: ForwardBatchInput, bsz_tensors,... FILE: kt-sft/ktransformers/models/modeling_deepseek.py function _get_unpad_data (line 89) | def _get_unpad_data(attention_mask): class DeepseekV2RMSNorm (line 103) | class DeepseekV2RMSNorm(nn.Module): method __init__ (line 104) | def __init__(self, hidden_size, eps=1e-6): method forward (line 113) | def forward(self, hidden_states): class DeepseekV2RotaryEmbedding (line 124) | class DeepseekV2RotaryEmbedding(nn.Module): method __init__ (line 125) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi... method forward (line 137) | def forward(self, x, position_ids): class DeepseekV2LinearScalingRotaryEmbedding (line 153) | class DeepseekV2LinearScalingRotaryEmbedding(DeepseekV2RotaryEmbedding): method __init__ (line 156) | def __init__( method _set_cos_sin_cache (line 168) | def _set_cos_sin_cache(self, seq_len, device, dtype): class DeepseekV2DynamicNTKScalingRotaryEmbedding (line 183) | class DeepseekV2DynamicNTKScalingRotaryEmbedding(DeepseekV2RotaryEmbeddi... method __init__ (line 186) | def __init__( method _set_cos_sin_cache (line 198) | def _set_cos_sin_cache(self, seq_len, device, dtype): function yarn_find_correction_dim (line 223) | def yarn_find_correction_dim( function yarn_find_correction_range (line 232) | def yarn_find_correction_range( function yarn_get_mscale (line 244) | def yarn_get_mscale(scale=1, mscale=1): function yarn_linear_ramp_mask (line 250) | def yarn_linear_ramp_mask(min, max, dim): class DeepseekV2YarnRotaryEmbedding (line 258) | class DeepseekV2YarnRotaryEmbedding(DeepseekV2RotaryEmbedding): method __init__ (line 259) | def __init__( method forward (line 314) | def forward(self, x, position_ids): function rotate_half (line 330) | def rotate_half(x): function apply_rotary_pos_emb (line 338) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di... class DeepseekV2MLP (line 368) | class DeepseekV2MLP(nn.Module): method __init__ (line 369) | def __init__(self, config, hidden_size=None, intermediate_size=None): method forward (line 382) | def forward(self, x): class MoEGate (line 387) | class MoEGate(nn.Module): method __init__ (line 388) | def __init__(self, config): method reset_parameters (line 409) | def reset_parameters(self) -> None: method forward (line 414) | def forward(self, hidden_states): class AddAuxiliaryLoss (line 494) | class AddAuxiliaryLoss(torch.autograd.Function): method forward (line 501) | def forward(ctx, x, loss): method backward (line 508) | def backward(ctx, grad_output): class DeepseekV2MoE (line 514) | class DeepseekV2MoE(nn.Module): method __init__ (line 519) | def __init__(self, config): method forward (line 559) | def forward(self, hidden_states): method moe_infer (line 582) | def moe_infer(self, x, topk_ids, topk_weight): function repeat_kv (line 658) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: class DeepseekV2Attention (line 672) | class DeepseekV2Attention(nn.Module): method __init__ (line 675) | def __init__(self, config: DeepseekV2Config, layer_idx: Optional[int] ... method _init_rope (line 742) | def _init_rope(self): method _shape (line 788) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): method forward (line 795) | def forward( class DeepseekV2FlashAttention2 (line 894) | class DeepseekV2FlashAttention2(DeepseekV2Attention): method __init__ (line 901) | def __init__(self, *args, **kwargs): method forward (line 909) | def forward( method _flash_attention_forward (line 1039) | def _flash_attention_forward( method _upad_input (line 1130) | def _upad_input( class DeepseekV2DecoderLayer (line 1181) | class DeepseekV2DecoderLayer(nn.Module): method __init__ (line 1182) | def __init__(self, config: DeepseekV2Config, layer_idx: int): method forward (line 1206) | def forward( class DeepseekV2PreTrainedModel (line 1292) | class DeepseekV2PreTrainedModel(PreTrainedModel): method _init_weights (line 1302) | def _init_weights(self, module): class DeepseekV2Model (line 1388) | class DeepseekV2Model(DeepseekV2PreTrainedModel): method __init__ (line 1396) | def __init__(self, config: DeepseekV2Config): method get_input_embeddings (line 1417) | def get_input_embeddings(self): method set_input_embeddings (line 1420) | def set_input_embeddings(self, value): method forward (line 1424) | def forward( method _update_causal_mask (line 1564) | def _update_causal_mask( class DeepseekV2ForCausalLM (line 1645) | class DeepseekV2ForCausalLM(DeepseekV2PreTrainedModel): method __init__ (line 1648) | def __init__(self, config): method get_input_embeddings (line 1657) | def get_input_embeddings(self): method set_input_embeddings (line 1660) | def set_input_embeddings(self, value): method get_output_embeddings (line 1663) | def get_output_embeddings(self): method set_output_embeddings (line 1666) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 1669) | def set_decoder(self, decoder): method get_decoder (line 1672) | def get_decoder(self): method forward (line 1679) | def forward( method prepare_inputs_for_generation (line 1777) | def prepare_inputs_for_generation( method _reorder_cache (line 1855) | def _reorder_cache(past_key_values, beam_idx): class DeepseekV2ForSequenceClassification (line 1882) | class DeepseekV2ForSequenceClassification(DeepseekV2PreTrainedModel): method __init__ (line 1883) | def __init__(self, config): method get_input_embeddings (line 1892) | def get_input_embeddings(self): method set_input_embeddings (line 1895) | def set_input_embeddings(self, value): method forward (line 1899) | def forward( FILE: kt-sft/ktransformers/models/modeling_deepseek_v3.py function _get_unpad_data (line 83) | def _get_unpad_data(attention_mask): class DeepseekV3RMSNorm (line 97) | class DeepseekV3RMSNorm(nn.Module): method __init__ (line 98) | def __init__(self, hidden_size, eps=1e-6): method forward (line 107) | def forward(self, hidden_states): class DeepseekV3RotaryEmbedding (line 118) | class DeepseekV3RotaryEmbedding(nn.Module): method __init__ (line 119) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi... method _set_cos_sin_cache (line 138) | def _set_cos_sin_cache(self, seq_len, device, dtype): method forward (line 150) | def forward(self, x, seq_len=None): class DeepseekV3LinearScalingRotaryEmbedding (line 162) | class DeepseekV3LinearScalingRotaryEmbedding(DeepseekV3RotaryEmbedding): method __init__ (line 165) | def __init__( method _set_cos_sin_cache (line 176) | def _set_cos_sin_cache(self, seq_len, device, dtype): class DeepseekV3DynamicNTKScalingRotaryEmbedding (line 191) | class DeepseekV3DynamicNTKScalingRotaryEmbedding(DeepseekV3RotaryEmbeddi... method __init__ (line 194) | def __init__( method _set_cos_sin_cache (line 205) | def _set_cos_sin_cache(self, seq_len, device, dtype): function yarn_find_correction_dim (line 230) | def yarn_find_correction_dim( function yarn_find_correction_range (line 239) | def yarn_find_correction_range( function yarn_get_mscale (line 251) | def yarn_get_mscale(scale=1, mscale=1): function yarn_linear_ramp_mask (line 257) | def yarn_linear_ramp_mask(min, max, dim): class DeepseekV3YarnRotaryEmbedding (line 266) | class DeepseekV3YarnRotaryEmbedding(DeepseekV3RotaryEmbedding): method __init__ (line 268) | def __init__( method _set_cos_sin_cache (line 289) | def _set_cos_sin_cache(self, seq_len, device, dtype): function rotate_half (line 335) | def rotate_half(x): function apply_rotary_pos_emb (line 343) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): class DeepseekV3MLP (line 378) | class DeepseekV3MLP(nn.Module): method __init__ (line 379) | def __init__(self, config, hidden_size=None, intermediate_size=None): method forward (line 392) | def forward(self, x): class MoEGate (line 397) | class MoEGate(nn.Module): method __init__ (line 398) | def __init__(self, config): method reset_parameters (line 421) | def reset_parameters(self) -> None: method forward (line 426) | def forward(self, hidden_states): class DeepseekV3MoE (line 479) | class DeepseekV3MoE(nn.Module): method __init__ (line 484) | def __init__(self, config): method forward (line 526) | def forward(self, hidden_states): method moe_infer (line 539) | def moe_infer(self, x, topk_ids, topk_weight): function repeat_kv (line 616) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: class DeepseekV3Attention (line 631) | class DeepseekV3Attention(nn.Module): method __init__ (line 634) | def __init__(self, config: DeepseekV3Config, layer_idx: Optional[int] ... method _init_rope (line 701) | def _init_rope(self): method _shape (line 747) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): method forward (line 754) | def forward( class DeepseekV3FlashAttention2 (line 865) | class DeepseekV3FlashAttention2(DeepseekV3Attention): method __init__ (line 872) | def __init__(self, *args, **kwargs): method forward (line 880) | def forward( method _flash_attention_forward (line 1016) | def _flash_attention_forward( method _upad_input (line 1096) | def _upad_input( class DeepseekV3DecoderLayer (line 1148) | class DeepseekV3DecoderLayer(nn.Module): method __init__ (line 1149) | def __init__(self, config: DeepseekV3Config, layer_idx: int): method forward (line 1173) | def forward( class DeepseekV3PreTrainedModel (line 1259) | class DeepseekV3PreTrainedModel(PreTrainedModel): method _init_weights (line 1268) | def _init_weights(self, module): class DeepseekV3Model (line 1354) | class DeepseekV3Model(DeepseekV3PreTrainedModel): method __init__ (line 1362) | def __init__(self, config: DeepseekV3Config): method get_input_embeddings (line 1383) | def get_input_embeddings(self): method set_input_embeddings (line 1386) | def set_input_embeddings(self, value): method forward (line 1390) | def forward( method _update_causal_mask (line 1524) | def _update_causal_mask( class DeepseekV3ForCausalLM (line 1604) | class DeepseekV3ForCausalLM(DeepseekV3PreTrainedModel, GenerationMixin): method __init__ (line 1607) | def __init__(self, config): method get_input_embeddings (line 1616) | def get_input_embeddings(self): method set_input_embeddings (line 1619) | def set_input_embeddings(self, value): method get_output_embeddings (line 1622) | def get_output_embeddings(self): method set_output_embeddings (line 1625) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 1628) | def set_decoder(self, decoder): method get_decoder (line 1631) | def get_decoder(self): method forward (line 1638) | def forward( method prepare_inputs_for_generation (line 1735) | def prepare_inputs_for_generation( method _reorder_cache (line 1800) | def _reorder_cache(past_key_values, beam_idx): class DeepseekV3ForSequenceClassification (line 1827) | class DeepseekV3ForSequenceClassification(DeepseekV3PreTrainedModel): method __init__ (line 1828) | def __init__(self, config): method get_input_embeddings (line 1837) | def get_input_embeddings(self): method set_input_embeddings (line 1840) | def set_input_embeddings(self, value): method forward (line 1844) | def forward( FILE: kt-sft/ktransformers/models/modeling_llama.py class LlamaRMSNorm (line 59) | class LlamaRMSNorm(nn.Module): method __init__ (line 60) | def __init__(self, hidden_size, eps=1e-6): method forward (line 68) | def forward(self, hidden_states): class LlamaRotaryEmbedding (line 79) | class LlamaRotaryEmbedding(nn.Module): method __init__ (line 80) | def __init__( method _dynamic_frequency_update (line 135) | def _dynamic_frequency_update(self, position_ids, device): method forward (line 160) | def forward(self, x, position_ids): class LlamaLinearScalingRotaryEmbedding (line 191) | class LlamaLinearScalingRotaryEmbedding(LlamaRotaryEmbedding): method __init__ (line 194) | def __init__(self, *args, **kwargs): class LlamaDynamicNTKScalingRotaryEmbedding (line 203) | class LlamaDynamicNTKScalingRotaryEmbedding(LlamaRotaryEmbedding): method __init__ (line 206) | def __init__(self, *args, **kwargs): function rotate_half (line 216) | def rotate_half(x): function apply_rotary_pos_emb (line 223) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di... class LlamaMLP (line 250) | class LlamaMLP(nn.Module): method __init__ (line 251) | def __init__(self, config): method forward (line 267) | def forward(self, x): function repeat_kv (line 301) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: class LlamaAttention (line 315) | class LlamaAttention(nn.Module): method __init__ (line 318) | def __init__(self, config: LlamaConfig, layer_idx: Optional[int] = None): method forward (line 365) | def forward( class LlamaFlashAttention2 (line 497) | class LlamaFlashAttention2(LlamaAttention): method __init__ (line 504) | def __init__(self, *args, **kwargs): method forward (line 512) | def forward( class LlamaSdpaAttention (line 628) | class LlamaSdpaAttention(LlamaAttention): method forward (line 636) | def forward( class LlamaDecoderLayer (line 746) | class LlamaDecoderLayer(nn.Module): method __init__ (line 747) | def __init__(self, config: LlamaConfig, layer_idx: int): method forward (line 761) | def forward( class LlamaPreTrainedModel (line 855) | class LlamaPreTrainedModel(PreTrainedModel): method _init_weights (line 867) | def _init_weights(self, module): class LlamaModel (line 957) | class LlamaModel(LlamaPreTrainedModel): method __init__ (line 965) | def __init__(self, config: LlamaConfig): method get_input_embeddings (line 986) | def get_input_embeddings(self): method set_input_embeddings (line 989) | def set_input_embeddings(self, value): method forward (line 993) | def forward( method _update_causal_mask (line 1134) | def _update_causal_mask( class LlamaForCausalLM (line 1237) | class LlamaForCausalLM(LlamaPreTrainedModel): method __init__ (line 1240) | def __init__(self, config): method get_input_embeddings (line 1249) | def get_input_embeddings(self): method set_input_embeddings (line 1252) | def set_input_embeddings(self, value): method get_output_embeddings (line 1255) | def get_output_embeddings(self): method set_output_embeddings (line 1258) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 1261) | def set_decoder(self, decoder): method get_decoder (line 1264) | def get_decoder(self): method forward (line 1271) | def forward( method prepare_inputs_for_generation (line 1377) | def prepare_inputs_for_generation( class LlamaForSequenceClassification (line 1441) | class LlamaForSequenceClassification(LlamaPreTrainedModel): method __init__ (line 1442) | def __init__(self, config): method get_input_embeddings (line 1451) | def get_input_embeddings(self): method set_input_embeddings (line 1454) | def set_input_embeddings(self, value): method forward (line 1458) | def forward( class LlamaForQuestionAnswering (line 1568) | class LlamaForQuestionAnswering(LlamaPreTrainedModel): method __init__ (line 1572) | def __init__(self, config): method get_input_embeddings (line 1580) | def get_input_embeddings(self): method set_input_embeddings (line 1583) | def set_input_embeddings(self, value): method forward (line 1587) | def forward( class LlamaForTokenClassification (line 1669) | class LlamaForTokenClassification(LlamaPreTrainedModel): method __init__ (line 1670) | def __init__(self, config): method get_input_embeddings (line 1686) | def get_input_embeddings(self): method set_input_embeddings (line 1689) | def set_input_embeddings(self, value): method forward (line 1693) | def forward( FILE: kt-sft/ktransformers/models/modeling_mixtral.py function load_balancing_loss_func (line 90) | def load_balancing_loss_func( function _get_unpad_data (line 167) | def _get_unpad_data(attention_mask): class MixtralRMSNorm (line 180) | class MixtralRMSNorm(nn.Module): method __init__ (line 181) | def __init__(self, hidden_size, eps=1e-6): method forward (line 189) | def forward(self, hidden_states): method extra_repr (line 196) | def extra_repr(self): class MixtralRotaryEmbedding (line 202) | class MixtralRotaryEmbedding(nn.Module): method __init__ (line 203) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi... method forward (line 216) | def forward(self, x, position_ids): function rotate_half (line 232) | def rotate_half(x): function apply_rotary_pos_emb (line 241) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): function repeat_kv (line 271) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: class MixtralAttention (line 285) | class MixtralAttention(nn.Module): method __init__ (line 291) | def __init__(self, config: MixtralConfig, layer_idx: Optional[int] = N... method _shape (line 328) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): method forward (line 331) | def forward( class MixtralFlashAttention2 (line 407) | class MixtralFlashAttention2(MixtralAttention): method forward (line 414) | def forward( method _flash_attention_forward (line 550) | def _flash_attention_forward( method _upad_input (line 661) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m... class MixtralSdpaAttention (line 707) | class MixtralSdpaAttention(MixtralAttention): method forward (line 715) | def forward( class MixtralBlockSparseTop2MLP (line 804) | class MixtralBlockSparseTop2MLP(nn.Module): method __init__ (line 805) | def __init__(self, config: MixtralConfig): method forward (line 816) | def forward(self, hidden_states): class MixtralSparseMoeBlock (line 822) | class MixtralSparseMoeBlock(nn.Module): method __init__ (line 834) | def __init__(self, config): method forward (line 849) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class MixtralDecoderLayer (line 890) | class MixtralDecoderLayer(nn.Module): method __init__ (line 891) | def __init__(self, config: MixtralConfig, layer_idx: int): method forward (line 901) | def forward( class MixtralPreTrainedModel (line 993) | class MixtralPreTrainedModel(PreTrainedModel): method _init_weights (line 1003) | def _init_weights(self, module): class MixtralModel (line 1092) | class MixtralModel(MixtralPreTrainedModel): method __init__ (line 1100) | def __init__(self, config: MixtralConfig): method get_input_embeddings (line 1116) | def get_input_embeddings(self): method set_input_embeddings (line 1119) | def set_input_embeddings(self, value): method forward (line 1124) | def forward( method _update_causal_mask (line 1257) | def _update_causal_mask( class MixtralForCausalLM (line 1338) | class MixtralForCausalLM(MixtralPreTrainedModel): method __init__ (line 1341) | def __init__(self, config): method get_input_embeddings (line 1352) | def get_input_embeddings(self): method set_input_embeddings (line 1355) | def set_input_embeddings(self, value): method get_output_embeddings (line 1358) | def get_output_embeddings(self): method set_output_embeddings (line 1361) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 1364) | def set_decoder(self, decoder): method get_decoder (line 1367) | def get_decoder(self): method forward (line 1373) | def forward( method prepare_inputs_for_generation (line 1483) | def prepare_inputs_for_generation( class MixtralForSequenceClassification (line 1546) | class MixtralForSequenceClassification(MixtralPreTrainedModel): method __init__ (line 1547) | def __init__(self, config): method get_input_embeddings (line 1556) | def get_input_embeddings(self): method set_input_embeddings (line 1559) | def set_input_embeddings(self, value): method forward (line 1563) | def forward( class MixtralForTokenClassification (line 1662) | class MixtralForTokenClassification(MixtralPreTrainedModel): method __init__ (line 1663) | def __init__(self, config): method get_input_embeddings (line 1679) | def get_input_embeddings(self): method set_input_embeddings (line 1682) | def set_input_embeddings(self, value): method forward (line 1686) | def forward( FILE: kt-sft/ktransformers/models/modeling_qwen2_moe.py function load_balancing_loss_func (line 77) | def load_balancing_loss_func( function _get_unpad_data (line 154) | def _get_unpad_data(attention_mask): class Qwen2MoeRMSNorm (line 167) | class Qwen2MoeRMSNorm(nn.Module): method __init__ (line 168) | def __init__(self, hidden_size, eps=1e-6): method forward (line 176) | def forward(self, hidden_states): class Qwen2MoeRotaryEmbedding (line 184) | class Qwen2MoeRotaryEmbedding(nn.Module): method __init__ (line 185) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi... method forward (line 197) | def forward(self, x, position_ids): function rotate_half (line 214) | def rotate_half(x): function apply_rotary_pos_emb (line 222) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di... class Qwen2MoeMLP (line 250) | class Qwen2MoeMLP(nn.Module): method __init__ (line 251) | def __init__(self, config, intermediate_size=None): method forward (line 261) | def forward(self, x): function repeat_kv (line 266) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: class Qwen2MoeAttention (line 279) | class Qwen2MoeAttention(nn.Module): method __init__ (line 285) | def __init__(self, config: Qwen2MoeConfig, layer_idx: Optional[int] = ... method forward (line 322) | def forward( class Qwen2MoeFlashAttention2 (line 397) | class Qwen2MoeFlashAttention2(Qwen2MoeAttention): method __init__ (line 407) | def __init__(self, *args, **kwargs): method forward (line 415) | def forward( method _flash_attention_forward (line 547) | def _flash_attention_forward( method _upad_input (line 664) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m... class Qwen2MoeSdpaAttention (line 708) | class Qwen2MoeSdpaAttention(Qwen2MoeAttention): method forward (line 716) | def forward( class Qwen2MoeSparseMoeBlock (line 804) | class Qwen2MoeSparseMoeBlock(nn.Module): method __init__ (line 805) | def __init__(self, config): method forward (line 820) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class Qwen2MoeDecoderLayer (line 866) | class Qwen2MoeDecoderLayer(nn.Module): method __init__ (line 867) | def __init__(self, config: Qwen2MoeConfig, layer_idx: int): method forward (line 883) | def forward( class Qwen2MoePreTrainedModel (line 980) | class Qwen2MoePreTrainedModel(PreTrainedModel): method _init_weights (line 991) | def _init_weights(self, module): class Qwen2MoeModel (line 1084) | class Qwen2MoeModel(Qwen2MoePreTrainedModel): method __init__ (line 1092) | def __init__(self, config: Qwen2MoeConfig): method get_input_embeddings (line 1108) | def get_input_embeddings(self): method set_input_embeddings (line 1111) | def set_input_embeddings(self, value): method forward (line 1115) | def forward( method _update_causal_mask (line 1248) | def _update_causal_mask( class Qwen2MoeForCausalLM (line 1329) | class Qwen2MoeForCausalLM(Qwen2MoePreTrainedModel): method __init__ (line 1332) | def __init__(self, config): method get_input_embeddings (line 1344) | def get_input_embeddings(self): method set_input_embeddings (line 1347) | def set_input_embeddings(self, value): method get_output_embeddings (line 1350) | def get_output_embeddings(self): method set_output_embeddings (line 1353) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 1356) | def set_decoder(self, decoder): method get_decoder (line 1359) | def get_decoder(self): method forward (line 1364) | def forward( method prepare_inputs_for_generation (line 1473) | def prepare_inputs_for_generation( method _reorder_cache (line 1551) | def _reorder_cache(past_key_values, beam_idx): class Qwen2MoeForSequenceClassification (line 1576) | class Qwen2MoeForSequenceClassification(Qwen2MoePreTrainedModel): method __init__ (line 1577) | def __init__(self, config): method get_input_embeddings (line 1586) | def get_input_embeddings(self): method set_input_embeddings (line 1589) | def set_input_embeddings(self, value): method forward (line 1593) | def forward( class Qwen2MoeForTokenClassification (line 1692) | class Qwen2MoeForTokenClassification(Qwen2MoePreTrainedModel): method __init__ (line 1693) | def __init__(self, config): method get_input_embeddings (line 1709) | def get_input_embeddings(self): method set_input_embeddings (line 1712) | def set_input_embeddings(self, value): method forward (line 1716) | def forward( FILE: kt-sft/ktransformers/models/modeling_qwen3_moe.py function rotate_half (line 66) | def rotate_half(x): function apply_rotary_pos_emb (line 73) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di... function repeat_kv (line 100) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: function eager_attention_forward (line 112) | def eager_attention_forward( class Qwen3MoeAttention (line 138) | class Qwen3MoeAttention(nn.Module): method __init__ (line 141) | def __init__(self, config: Qwen3MoeConfig, layer_idx: int): method forward (line 184) | def forward( class Qwen3MoeMLP (line 235) | class Qwen3MoeMLP(nn.Module): method __init__ (line 236) | def __init__(self, config, intermediate_size=None): method forward (line 246) | def forward(self, x): class Qwen3MoeSparseMoeBlock (line 251) | class Qwen3MoeSparseMoeBlock(nn.Module): method __init__ (line 252) | def __init__(self, config): method forward (line 264) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class Qwen3MoeRMSNorm (line 304) | class Qwen3MoeRMSNorm(nn.Module): method __init__ (line 305) | def __init__(self, hidden_size, eps=1e-6): method forward (line 314) | def forward(self, hidden_states): method extra_repr (line 321) | def extra_repr(self): class Qwen3MoeDecoderLayer (line 325) | class Qwen3MoeDecoderLayer(nn.Module): method __init__ (line 326) | def __init__(self, config: Qwen3MoeConfig, layer_idx: int): method forward (line 345) | def forward( function _compute_default_rope_parameters (line 422) | def _compute_default_rope_parameters( class Qwen3MoeRotaryEmbedding (line 462) | class Qwen3MoeRotaryEmbedding(nn.Module): method __init__ (line 463) | def __init__(self, config: Qwen3MoeConfig, device=None): method _dynamic_frequency_update (line 486) | def _dynamic_frequency_update(self, position_ids, device): method forward (line 506) | def forward(self, x, position_ids): class Qwen3MoePreTrainedModel (line 551) | class Qwen3MoePreTrainedModel(PreTrainedModel): method _init_weights (line 565) | def _init_weights(self, module): class Qwen3MoeModel (line 648) | class Qwen3MoeModel(Qwen3MoePreTrainedModel): method __init__ (line 656) | def __init__(self, config: Qwen3MoeConfig): method get_input_embeddings (line 672) | def get_input_embeddings(self): method set_input_embeddings (line 675) | def set_input_embeddings(self, value): method forward (line 679) | def forward( method _update_causal_mask (line 797) | def _update_causal_mask( method _prepare_4d_causal_attention_mask_with_cache_position (line 881) | def _prepare_4d_causal_attention_mask_with_cache_position( class KwargsForCausalLM (line 951) | class KwargsForCausalLM(): ... function load_balancing_loss_func (line 954) | def load_balancing_loss_func( class Qwen3MoeForCausalLM (line 1036) | class Qwen3MoeForCausalLM(Qwen3MoePreTrainedModel, GenerationMixin): method __init__ (line 1041) | def __init__(self, config): method get_input_embeddings (line 1053) | def get_input_embeddings(self): method set_input_embeddings (line 1056) | def set_input_embeddings(self, value): method get_output_embeddings (line 1059) | def get_output_embeddings(self): method set_output_embeddings (line 1062) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 1065) | def set_decoder(self, decoder): method get_decoder (line 1068) | def get_decoder(self): method forward (line 1074) | def forward( class Qwen3MoeForSequenceClassification (line 1200) | class Qwen3MoeForSequenceClassification(Qwen3MoePreTrainedModel): method __init__ (line 1201) | def __init__(self, config): method get_input_embeddings (line 1210) | def get_input_embeddings(self): method set_input_embeddings (line 1213) | def set_input_embeddings(self, value): method forward (line 1217) | def forward( class Qwen3MoeForTokenClassification (line 1299) | class Qwen3MoeForTokenClassification(Qwen3MoePreTrainedModel): method __init__ (line 1300) | def __init__(self, config): method get_input_embeddings (line 1316) | def get_input_embeddings(self): method set_input_embeddings (line 1319) | def set_input_embeddings(self, value): method forward (line 1328) | def forward( class Qwen3MoeForQuestionAnswering (line 1387) | class Qwen3MoeForQuestionAnswering(Qwen3MoePreTrainedModel): method __init__ (line 1390) | def __init__(self, config): method get_input_embeddings (line 1398) | def get_input_embeddings(self): method set_input_embeddings (line 1401) | def set_input_embeddings(self, value): method forward (line 1405) | def forward( FILE: kt-sft/ktransformers/moe_test_module.py class TestKExpertsTorch (line 31) | class TestKExpertsTorch(unittest.TestCase): method setUp (line 32) | def setUp(self): method _create_fixed_data (line 41) | def _create_fixed_data(self, device, batch_size=2): method _run_single_device_test (line 60) | def _run_single_device_test(self, device, seed=42): method test_forward_gradient (line 92) | def test_forward_gradient(self): FILE: kt-sft/ktransformers/moe_test_module_old.py class TestKExpertsTorch (line 31) | class TestKExpertsTorch(unittest.TestCase): method setUp (line 32) | def setUp(self): method _run_single_device_test (line 46) | def _run_single_device_test(self, device, seed=42): method test_forward_gradient (line 101) | def test_forward_gradient(self): FILE: kt-sft/ktransformers/operators/RoPE.py class RotaryEmbedding (line 33) | class RotaryEmbedding(BaseInjectedModule, DeepseekV2RotaryEmbedding): method __init__ (line 34) | def __init__( method load (line 54) | def load(self): class RotaryEmbeddingV3 (line 63) | class RotaryEmbeddingV3(BaseInjectedModule): method __init__ (line 64) | def __init__( method forward (line 82) | def forward(self, x, position_ids): method load (line 97) | def load(self): method _init (line 104) | def _init(self, dim, max_position_embeddings, base, device, scaling_fa... class RotaryEmbeddingV2 (line 114) | class RotaryEmbeddingV2(BaseInjectedModule, LlamaRotaryEmbedding): method __init__ (line 115) | def __init__( method load (line 140) | def load(self): class YarnRotaryEmbedding (line 151) | class YarnRotaryEmbedding(BaseInjectedModule, DeepseekV2YarnRotaryEmbedd... method __init__ (line 152) | def __init__( method load (line 181) | def load(self): class YarnRotaryEmbeddingV3 (line 221) | class YarnRotaryEmbeddingV3(BaseInjectedModule): method __init__ (line 222) | def __init__( method load (line 239) | def load(self): method forward (line 261) | def forward(self, x, position_ids): method _init (line 276) | def _init( class DynamicNTKScalingRotaryEmbedding (line 327) | class DynamicNTKScalingRotaryEmbedding( method __init__ (line 330) | def __init__( method load (line 353) | def load(self): class RotaryEmbeddingV4 (line 366) | class RotaryEmbeddingV4(BaseInjectedModule): method __init__ (line 367) | def __init__( method forward (line 385) | def forward(self, x, position_ids): method load (line 400) | def load(self): method _init (line 407) | def _init(self, dim, max_position_embeddings, base, device, scaling_fa... class KQwen3MoeRotaryEmbedding (line 417) | class KQwen3MoeRotaryEmbedding(BaseInjectedModule, DeepseekV2RotaryEmbed... method __init__ (line 418) | def __init__( method load (line 438) | def load(self): FILE: kt-sft/ktransformers/operators/attention.py function rotate_half (line 42) | def rotate_half(x): class KDeepseekV2Attention (line 49) | class KDeepseekV2Attention(BaseInjectedModule, DeepseekV2Attention): method __init__ (line 53) | def __init__(self, method get_absorbed (line 70) | def get_absorbed(self) -> Tuple[torch.Tensor, torch.Tensor]: method forward_chunck (line 78) | def forward_chunck( method forward_linux_triton (line 197) | def forward_linux_triton( method forward_linux_flashinfer (line 350) | def forward_linux_flashinfer( method forward_windows (line 526) | def forward_windows( method forward_xpu (line 592) | def forward_xpu( method forward (line 686) | def forward( class KLlamaAttention (line 747) | class KLlamaAttention(BaseInjectedModule): method __init__ (line 750) | def __init__(self, method apply_rotary_pos_emb (line 761) | def apply_rotary_pos_emb(self, q, k, cos, sin, position_ids=None, unsq... method forward (line 786) | def forward( class KQwen3MoeAttentionIPEXLLM (line 877) | class KQwen3MoeAttentionIPEXLLM(BaseInjectedModule, Qwen3MoeAttention): method __init__ (line 878) | def __init__(self, method forward (line 894) | def forward( function repeat_kv (line 949) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: function eager_attention_forward (line 961) | def eager_attention_forward( class KQwen3MoeAttention (line 987) | class KQwen3MoeAttention(BaseInjectedModule, Qwen3MoeAttention ): method __init__ (line 988) | def __init__(self, method apply_rotary_pos_emb (line 1004) | def apply_rotary_pos_emb(self, q, k, cos, sin, position_ids=None, unsq... method forward (line 1030) | def forward(self, FILE: kt-sft/ktransformers/operators/balance_serve_attention.py function rotate_half (line 23) | def rotate_half(x): class flashinfer_attn (line 29) | class flashinfer_attn(BaseInjectedModule, DeepseekV2Attention): method __init__ (line 30) | def __init__(self, method get_absorbed (line 45) | def get_absorbed(self) -> Tuple[torch.Tensor, torch.Tensor]: method forward (line 62) | def forward(self, class KQwen2MoeAttention (line 117) | class KQwen2MoeAttention(BaseInjectedModule, Qwen2MoeAttention): method __init__ (line 118) | def __init__(self, method apply_rotary_pos_emb (line 134) | def apply_rotary_pos_emb(self, q, k, cos, sin, position_ids=None, unsq... method forward (line 161) | def forward(self, class KQwen3MoeAttention (line 203) | class KQwen3MoeAttention(BaseInjectedModule, Qwen3MoeAttention): method __init__ (line 204) | def __init__(self, method apply_rotary_pos_emb (line 220) | def apply_rotary_pos_emb(self, q, k, cos, sin, position_ids=None, unsq... method forward (line 247) | def forward(self, class deepseek_torch_attn (line 293) | class deepseek_torch_attn(BaseInjectedModule, DeepseekV2Attention): method __init__ (line 294) | def __init__(self, method get_absorbed (line 309) | def get_absorbed(self) -> Tuple[torch.Tensor, torch.Tensor]: method forward (line 327) | def forward(self, FILE: kt-sft/ktransformers/operators/base_operator.py class BaseInjectedModule (line 12) | class BaseInjectedModule(nn.Module): method __init__ (line 14) | def __init__(self, method __getattr__ (line 31) | def __getattr__(self, name: str) -> Any: method __setattr__ (line 51) | def __setattr__(self, name: str, value: Tensor | nn.Module) -> None: method forward (line 60) | def forward(self, *args, **kwargs): method load (line 63) | def load(self, gguf_loader=None, adapter_gguf : bool = False): FILE: kt-sft/ktransformers/operators/cpuinfer.py class CPUInferKVCache (line 29) | class CPUInferKVCache: method __init__ (line 30) | def __init__( method load_kvcache (line 100) | def load_kvcache(self, tensor_file_path: str): method dump_kvcache (line 105) | def dump_kvcache( method update_cache_total_len (line 135) | def update_cache_total_len(self, cache_total_len: int): method attn (line 143) | def attn( method update_kvcache_one_block_fp16 (line 256) | def update_kvcache_one_block_fp16( method get_kvcache_one_block_fp16 (line 292) | def get_kvcache_one_block_fp16( method update_importance_one_block (line 328) | def update_importance_one_block( method get_importance_one_block (line 354) | def get_importance_one_block( method get_anchor_one_block (line 380) | def get_anchor_one_block(self, anchor: torch.Tensor, layer_id: int, bl... method update_anchor_one_block (line 406) | def update_anchor_one_block( method calc_anchor_all_layers (line 434) | def calc_anchor_all_layers( method clear_importance_all_layers (line 473) | def clear_importance_all_layers( method get_cache_total_len (line 512) | def get_cache_total_len(self): method update_kvcache_q4 (line 515) | def update_kvcache_q4( method update_kvcache_fp16 (line 528) | def update_kvcache_fp16( method get_kvcache_q4 (line 550) | def get_kvcache_q4( method get_kvcache_fp16 (line 563) | def get_kvcache_fp16( method get_and_update_kvcache_fp16 (line 584) | def get_and_update_kvcache_fp16( method update_importance (line 606) | def update_importance( method get_attn_sparsity (line 627) | def get_attn_sparsity( method attn_with_kvcache (line 665) | def attn_with_kvcache( method get_all_kvcache_one_layer (line 704) | def get_all_kvcache_one_layer( method get_importance (line 713) | def get_importance( method get_anchor (line 720) | def get_anchor( class CPUInfer (line 728) | class CPUInfer: method __init__ (line 732) | def __init__(self, thread_num): method submit (line 738) | def submit(self, task): method submit_with_cuda_stream (line 741) | def submit_with_cuda_stream(self, current_cuda_stream, task): method sync (line 744) | def sync(self): method sync_with_cuda_stream (line 747) | def sync_with_cuda_stream(self, current_cuda_stream): FILE: kt-sft/ktransformers/operators/dynamic_attention.py class DynamicScaledDotProductAttention (line 30) | class DynamicScaledDotProductAttention: method __init__ (line 34) | def __init__( method get_attn_score_one_block (line 233) | def get_attn_score_one_block( method get_preselect_block_table_and_attn_score (line 271) | def get_preselect_block_table_and_attn_score( method get_attn_score (line 374) | def get_attn_score( method swap_in_and_swap_out (line 467) | def swap_in_and_swap_out(self, layer_idx, past_len, q_len, key, value): method calc_anchor (line 518) | def calc_anchor(self, cache_seqlens: int): method clear_importance (line 533) | def clear_importance(self, cache_seqlens: int): method clear_kvcache (line 549) | def clear_kvcache(self, cache_seqlens: int): method get_attn_sparsity (line 564) | def get_attn_sparsity( method apply (line 605) | def apply( method save (line 762) | def save(self, path: str, length: int): method load (line 775) | def load(self, path: str, length: int): FILE: kt-sft/ktransformers/operators/experts.py function deduplicate_and_sort (line 50) | def deduplicate_and_sort(lst): function generate_cuda_graphs (line 52) | def generate_cuda_graphs(chunk_size: int) -> list: class KExpertsBase (line 68) | class KExpertsBase(ABC): method __init__ (line 69) | def __init__(self, key: str, gguf_loader: GGUFLoader, config: Pretrain... method forward (line 77) | def forward(self, input_tensor, expert_ids, weights): method load (line 81) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method unload (line 85) | def unload(): method load_weights (line 88) | def load_weights(self, override_key: str | None = None, device: str = ... method load_multi (line 138) | def load_multi(self, key: str, keys: list[str], device: str = "cpu"): class KExpertsCPU (line 143) | class KExpertsCPU(KExpertsBase): method __init__ (line 152) | def __init__( method load (line 169) | def load(self, w: dict | nn.Parameter | tuple | None = None, device:st... method submit_for_one_decode (line 279) | def submit_for_one_decode(self, input_tensor, expert_ids, weights, bsz... method sync_for_one_decode (line 296) | def sync_for_one_decode(self, cuda_graph_idx=0): method forward (line 306) | def forward(self, input_tensor, expert_ids, weights, bsz_tensor=None, ... method unload (line 350) | def unload(self): method load_weights (line 353) | def load_weights(self, override_key: str | None = None, device: str = ... class KSFTExpertsCPU (line 412) | class KSFTExpertsCPU(torch.autograd.Function): method __init__ (line 421) | def __init__( method load (line 452) | def load(self, w: dict | nn.Parameter | tuple | None = None, device:st... method submit_for_one_decode (line 556) | def submit_for_one_decode(self, input_tensor, expert_ids, weights): method sync_for_one_decode (line 562) | def sync_for_one_decode(self): method forward (line 568) | def forward(ctx, input_tensor, expert_ids, weights, cpu_infer, moe, ou... method backward (line 633) | def backward(ctx, output_grad): method unload (line 680) | def unload(self): method load_weights (line 683) | def load_weights(self, override_key: str | None = None, device: str = ... class KExpertsMarlin (line 743) | class KExpertsMarlin(KExpertsBase): method __init__ (line 746) | def __init__( method load (line 772) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method unload (line 805) | def unload(self): method load_weights (line 812) | def load_weights(self, override_key: str | None = None): method forward (line 831) | def forward(self, hidden_states_cpu: torch.Tensor, selected_experts_cp... class KExpertsTorch (line 868) | class KExpertsTorch(KExpertsBase): method __init__ (line 874) | def __init__( method load (line 906) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method unload (line 947) | def unload(self): method load_weights (line 953) | def load_weights(self, override_key: str | None = None): method forward (line 986) | def forward(self, hidden_states_cpu: torch.Tensor, selected_experts_cp... class KTransformersExperts (line 1181) | class KTransformersExperts(BaseInjectedModule, KExpertsBase): method __init__ (line 1182) | def __init__(self, method load (line 1207) | def load(self, w: dict = None, mode: InferenceState = None, warmup: b... method unload (line 1227) | def unload(self): method forward (line 1234) | def forward(self, input_tensor, expert_ids, weights): method set_inference_mode (line 1248) | def set_inference_mode(self, mode: InferenceState): class KQwen2MoeSparseMoeBlock (line 1266) | class KQwen2MoeSparseMoeBlock(BaseInjectedModule, Qwen2MoeSparseMoeBlock): method forward (line 1267) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: method moe_kexperts (line 1321) | def moe_kexperts(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_w... method moe_infer_simple (line 1327) | def moe_infer_simple(self, hidden_states_cpu: torch.Tensor, selected_e... method moe_infer (line 1341) | def moe_infer(self, hidden_states_cpu: torch.Tensor, selected_experts_... class KDeepseekV2MoE (line 1370) | class KDeepseekV2MoE(BaseInjectedModule, DeepseekV2MoE): method forward (line 1371) | def forward(self, hidden_states): method moe_kexperts (line 1411) | def moe_kexperts(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_w... method moe_infer_simple (line 1417) | def moe_infer_simple( method moe_infer (line 1435) | def moe_infer(self, x, topk_ids, topk_weight): class KDeepseekV3MoE (line 1468) | class KDeepseekV3MoE(BaseInjectedModule, DeepseekV3MoE): method forward (line 1470) | def forward(self, hidden_states): method moe_kexperts (line 1511) | def moe_kexperts(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_w... method moe_infer_simple (line 1517) | def moe_infer_simple( method moe_infer (line 1535) | def moe_infer(self, x, topk_ids, topk_weight): class KMistralSparseMoEBlock (line 1568) | class KMistralSparseMoEBlock(BaseInjectedModule, MixtralSparseMoeBlock): method forward (line 1570) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: method moe_kexperts (line 1617) | def moe_kexperts(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_w... method moe_infer_simple (line 1623) | def moe_infer_simple(self, hidden_states_cpu: torch.Tensor, selected_e... method moe_infer (line 1637) | def moe_infer(self, hidden_states_cpu: torch.Tensor, selected_experts_... class KDeepseekV3MoEV2 (line 1666) | class KDeepseekV3MoEV2(BaseInjectedModule, DeepseekV3MoE): method forward (line 1667) | def forward(self, hidden_states, bsz_tensor, cuda_graph_idx=0): method moe_on_cpuinfer (line 1709) | def moe_on_cpuinfer(self, x: torch.Tensor, topk_ids: torch.Tensor, top... method moe_infer_simple (line 1716) | def moe_infer_simple( method moe_infer (line 1734) | def moe_infer(self, x, topk_ids, topk_weight): class KTransformersExpertsV2 (line 1767) | class KTransformersExpertsV2(BaseInjectedModule, KExpertsBase): method __init__ (line 1768) | def __init__(self, method load (line 1793) | def load(self, w: dict = None, mode: InferenceState = None, warmup: b... method unload (line 1813) | def unload(self): method forward (line 1820) | def forward(self, input_tensor, expert_ids, weights, bsz_tensor, cuda_... method set_inference_mode (line 1830) | def set_inference_mode(self, mode: InferenceState): class KQwen2MoeSparseMoeBlockV2 (line 1840) | class KQwen2MoeSparseMoeBlockV2(BaseInjectedModule, Qwen2MoeSparseMoeBlo... method forward (line 1841) | def forward(self, hidden_states, bsz_tensor, cuda_graph_idx=0): method moe_on_cpuinfer (line 1895) | def moe_on_cpuinfer(self, x: torch.Tensor, topk_ids: torch.Tensor, top... method moe_infer_simple (line 1902) | def moe_infer_simple( method moe_infer (line 1920) | def moe_infer(self, x, topk_ids, topk_weight): class KQwen3MoeSparseMoeBlockV2 (line 1953) | class KQwen3MoeSparseMoeBlockV2(BaseInjectedModule, Qwen3MoeSparseMoeBlo... method forward (line 1954) | def forward(self, hidden_states, bsz_tensor=None, cuda_graph_idx=0): method moe_on_cpuinfer (line 2017) | def moe_on_cpuinfer(self, x: torch.Tensor, topk_ids: torch.Tensor, top... method moe_infer_simple (line 2024) | def moe_infer_simple( method moe_infer (line 2042) | def moe_infer(self, x, topk_ids, topk_weight): class KQwen3MoeSparseMoeBlock (line 2076) | class KQwen3MoeSparseMoeBlock(BaseInjectedModule, Qwen3MoeSparseMoeBlock): method forward (line 2077) | def forward(self, hidden_states): method moe_kexperts (line 2139) | def moe_kexperts(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_w... method moe_infer_simple (line 2145) | def moe_infer_simple( method moe_infer (line 2163) | def moe_infer(self, x, topk_ids, topk_weight): FILE: kt-sft/ktransformers/operators/flashinfer_batch_prefill_wrapper.py function setup_seed (line 13) | def setup_seed(seed): class flashInferAttn (line 27) | class flashInferAttn(): method __init__ (line 30) | def __init__(self, method plan (line 65) | def plan(self, method calc_batch_indices (line 99) | def calc_batch_indices(self, ragged_size = None): method forward (line 107) | def forward(self, q, k_cache, v_cache, k, v): function testCudaGraph (line 116) | def testCudaGraph(): function testAttentionFlashInfer (line 260) | def testAttentionFlashInfer( FILE: kt-sft/ktransformers/operators/flashinfer_wrapper.py function attention_ref_torch (line 22) | def attention_ref_torch( class MLAWrapper (line 70) | class MLAWrapper(): method __init__ (line 71) | def __init__(self, method plan (line 109) | def plan(self, method run (line 152) | def run(self, q_nope, q_pe, ckv, k_pe, return_lse = False): class MLAWrapperSingleton (line 155) | class MLAWrapperSingleton(): method get_instance (line 159) | def get_instance(cls, device, *args, **kwargs)->MLAWrapper: method make_instance (line 165) | def make_instance(cls, device, *args, **kwargs): method plan_all (line 169) | def plan_all(cls, qo_indptr, method need_plan_all (line 198) | def need_plan_all(cls): method reset_buffer (line 203) | def reset_buffer(cls): method update_buffer (line 208) | def update_buffer(cls, max_pages): function checksame (line 214) | def checksame(): FILE: kt-sft/ktransformers/operators/gate.py class KMoEGateBase (line 15) | class KMoEGateBase(ABC): method __init__ (line 16) | def __init__(self, method forward (line 32) | def forward(self, input_tensor, expert_ids, weights): method load (line 36) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method unload (line 40) | def unload(): method load_weights (line 43) | def load_weights(self, override_key: str | None = None, device: str = ... method load_multi (line 74) | def load_multi(self, key: str, keys: list[str], device: str = "cpu"): class KMoEGate (line 81) | class KMoEGate(BaseInjectedModule, KMoEGateBase): method __init__ (line 82) | def __init__( method forward (line 97) | def forward(self, hidden_states) -> torch.Tensor: method load (line 100) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method unload (line 112) | def unload(self): class KMoEGateQwen2Moe (line 119) | class KMoEGateQwen2Moe(BaseInjectedModule, KMoEGateBase): method __init__ (line 120) | def __init__( method forward (line 149) | def forward(self, hidden_states) -> torch.Tensor: method load (line 167) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method unload (line 181) | def unload(self): class KMoEGateIPEXLLM (line 188) | class KMoEGateIPEXLLM(KMoEGate): method __init__ (line 189) | def __init__( method forward (line 204) | def forward(self, hidden_states) -> torch.Tensor: FILE: kt-sft/ktransformers/operators/layernorm.py class RMSNorm (line 43) | class RMSNorm(DeepseekV3RMSNorm, BaseInjectedModule): method __init__ (line 44) | def __init__(self, method forward (line 56) | def forward( method forward_native (line 74) | def forward_native( class KQwen2MoeRMSNorm (line 84) | class KQwen2MoeRMSNorm(Qwen2MoeRMSNorm, BaseInjectedModule): method __init__ (line 85) | def __init__(self, method forward (line 97) | def forward( method forward_native (line 115) | def forward_native( class KQwen3MoeRMSNorm (line 125) | class KQwen3MoeRMSNorm(Qwen3MoeRMSNorm, BaseInjectedModule): method __init__ (line 126) | def __init__(self, method forward (line 138) | def forward( method forward_native (line 159) | def forward_native( class DeepseekV3RMSNormTorch (line 168) | class DeepseekV3RMSNormTorch(DeepseekV3RMSNorm, BaseInjectedModule): method __init__ (line 169) | def __init__(self, method forward (line 181) | def forward( class KDeepseekRMSNormIPEXLLM (line 200) | class KDeepseekRMSNormIPEXLLM(DeepseekV3RMSNorm, BaseInjectedModule): method __init__ (line 201) | def __init__(self, method forward (line 214) | def forward(self, x: torch.Tensor) -> torch.Tensor: method load (line 222) | def load(self): FILE: kt-sft/ktransformers/operators/linear.py class KLinearBase (line 48) | class KLinearBase(nn.Module, ABC): method __init__ (line 49) | def __init__( method forward (line 80) | def forward(self, x: torch.Tensor) -> torch.Tensor: method load_weight (line 83) | def load_weight(self, override_key: str | None = None, device: str | N... method load_multi (line 127) | def load_multi(self, key: str, keys: list[str], device: str = "cpu"): method load (line 134) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method unload (line 138) | def unload(self): class KLinearTorch (line 142) | class KLinearTorch(KLinearBase): method __init__ (line 143) | def __init__( method forward (line 158) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor=None, **kw... method load (line 172) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method unload (line 199) | def unload(self): class KLinearQ8 (line 205) | class KLinearQ8(KLinearBase): method __init__ (line 206) | def __init__( method forward (line 224) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor=None) -> t... method _dequantize_weight (line 239) | def _dequantize_weight(self, q_matrix, scales, bits=8): method _quantize_weight (line 275) | def _quantize_weight(self, matrix, bits=8): method load (line 330) | def load(self, w: Union[Dict, nn.Parameter, Tuple, None] = None, devic... method unload (line 361) | def unload(self): class KLinearFP8 (line 373) | class KLinearFP8(KLinearBase): method __init__ (line 379) | def __init__( method forward (line 394) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor) -> torch.... method load (line 401) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method unload (line 416) | def unload(self): class VLinearMarlin (line 424) | class VLinearMarlin(KLinearBase): method __init__ (line 430) | def __init__( method load (line 462) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method forward (line 510) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor = None) ->... method unload (line 549) | def unload(self): method _pad_input (line 559) | def _pad_input(self, x): class KLinearMarlin (line 580) | class KLinearMarlin(KLinearBase): method __init__ (line 586) | def __init__( method load (line 618) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method forward (line 664) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor=None, **kw... method unload (line 698) | def unload(self): class KLinearCPUInfer (line 708) | class KLinearCPUInfer(KLinearBase): method __init__ (line 710) | def __init__( method forward (line 733) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor = None) ->... method load (line 772) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method load_weights (line 793) | def load_weights(self, w: dict | nn.Parameter | tuple | None = None, d... method unload (line 806) | def unload(self): class KLinearIPEXLLM (line 812) | class KLinearIPEXLLM(KLinearBase): method __init__ (line 813) | def __init__( method forward (line 831) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor = None) ->... method load (line 842) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s... method unload (line 875) | def unload(self): class KTransformersLinear (line 891) | class KTransformersLinear(BaseInjectedModule, KLinearBase): method __init__ (line 892) | def __init__( method forward (line 920) | def forward(self, x, bsz_tensor=None): method load (line 940) | def load(self, w: dict | nn.Parameter | tuple | None = None, mode: Inf... method unload (line 962) | def unload(self): method set_inference_mode (line 969) | def set_inference_mode(self, mode: InferenceState): FILE: kt-sft/ktransformers/operators/mlp.py class kDeepseekV3MLP (line 8) | class kDeepseekV3MLP(DeepseekV3MLP, BaseInjectedModule): method __init__ (line 9) | def __init__(self, method forward (line 20) | def forward(self, x, bsz_tensor): class KQwen2MoeMLP (line 23) | class KQwen2MoeMLP(Qwen2MoeMLP, BaseInjectedModule): method __init__ (line 24) | def __init__(self, method forward (line 35) | def forward(self, x, bsz_tensor): FILE: kt-sft/ktransformers/operators/models.py class KQwen2MoeModel (line 188) | class KQwen2MoeModel(BaseInjectedModule): method __init__ (line 196) | def __init__( method forward (line 215) | def forward( method load_layer_to (line 446) | def load_layer_to(self, layer: Qwen2MoeDecoderLayer, target: Inference... class KDeepseekV2Model (line 550) | class KDeepseekV2Model(BaseInjectedModule): method __init__ (line 558) | def __init__( method forward (line 577) | def forward( method load_layer_to (line 834) | def load_layer_to(self, layer: DeepseekV2DecoderLayer, target: Inferen... class LlamaPreTrainedModel (line 960) | class LlamaPreTrainedModel(PreTrainedModel): method _init_weights (line 972) | def _init_weights(self, module): class KLlamaModel (line 984) | class KLlamaModel(BaseInjectedModule): method __init__ (line 994) | def __init__( method get_input_embeddings (line 1041) | def get_input_embeddings(self): method set_input_embeddings (line 1044) | def set_input_embeddings(self, value): method forward (line 1048) | def forward( method forward_chunk (line 1185) | def forward_chunk( method _update_causal_mask (line 1286) | def _update_causal_mask( class KQwen3MoeModel (line 1466) | class KQwen3MoeModel(BaseInjectedModule): method __init__ (line 1474) | def __init__( method forward (line 1501) | def forward( method load_layer_to (line 1725) | def load_layer_to(self, layer: Qwen3MoeDecoderLayer, target: Inference... FILE: kt-sft/ktransformers/operators/triton_attention.py function tanh (line 11) | def tanh(x): function _fwd_grouped_kernel_stage1 (line 16) | def _fwd_grouped_kernel_stage1( function _decode_grouped_att_m_fwd (line 165) | def _decode_grouped_att_m_fwd( function _fwd_kernel_stage2 (line 258) | def _fwd_kernel_stage2( function _decode_softmax_reducev_fwd (line 313) | def _decode_softmax_reducev_fwd( function decode_attention_fwd_grouped (line 358) | def decode_attention_fwd_grouped( FILE: kt-sft/ktransformers/operators/triton_attention_prefill.py function _fwd_kernel (line 24) | def _fwd_kernel( function context_attention_fwd (line 159) | def context_attention_fwd( FILE: kt-sft/ktransformers/optimize/optimize.py function inject (line 20) | def inject(module, local_optimization_dict, model_config:AutoConfig ,ggu... function del_meta (line 44) | def del_meta(module:nn.Module): function gen_optimize_config (line 55) | def gen_optimize_config(module: nn.Module, out_data: Mapping, rule_list:... function translate_model_config (line 109) | def translate_model_config(model_config: PretrainedConfig): function optimize_and_load_gguf (line 117) | def optimize_and_load_gguf(module: nn.Module, rule_file: str, gguf_path:... FILE: kt-sft/ktransformers/server/api/ollama/completions.py class OllamaGenerateCompletionRequest (line 21) | class OllamaGenerateCompletionRequest(BaseModel): class OllamaGenerationStreamResponse (line 45) | class OllamaGenerationStreamResponse(BaseModel): class OllamaGenerationResponse (line 51) | class OllamaGenerationResponse(BaseModel): function generate (line 58) | async def generate(request: Request, input: OllamaGenerateCompletionRequ... class OllamaChatCompletionMessage (line 103) | class OllamaChatCompletionMessage(BaseModel): class OllamaChatCompletionRequest (line 107) | class OllamaChatCompletionRequest(BaseModel): class OllamaChatCompletionStreamResponse (line 113) | class OllamaChatCompletionStreamResponse(BaseModel): class OllamaChatCompletionResponse (line 126) | class OllamaChatCompletionResponse(BaseModel): function chat (line 140) | async def chat(request: Request, input: OllamaChatCompletionRequest): class OllamaModel (line 226) | class OllamaModel(BaseModel): function tags (line 234) | async def tags(): class OllamaModelInfo (line 239) | class OllamaModelInfo(BaseModel): class OllamaShowRequest (line 243) | class OllamaShowRequest(BaseModel): class OllamaShowDetial (line 248) | class OllamaShowDetial(BaseModel): class OllamaShowResponse (line 256) | class OllamaShowResponse(BaseModel): class Config (line 263) | class Config: function show (line 267) | async def show(request: Request, input: OllamaShowRequest): FILE: kt-sft/ktransformers/server/api/openai/__init__.py function post_db_creation_operations (line 14) | def post_db_creation_operations(): FILE: kt-sft/ktransformers/server/api/openai/assistants/assistants.py function create_assistant (line 19) | async def create_assistant( function list_assistants (line 26) | async def list_assistants( function list_assistants_with_status (line 38) | async def list_assistants_with_status( function retrieve_assistant (line 48) | async def retrieve_assistant( function modify_assistant (line 55) | async def modify_assistant( function delete_assistant (line 63) | async def delete_assistant(assistant_id: str): function get_related_thread (line 69) | async def get_related_thread(assistant_id: ObjectID): function create_default_assistant (line 74) | def create_default_assistant(): function test_create_assistant (line 90) | def test_create_assistant(): FILE: kt-sft/ktransformers/server/api/openai/assistants/messages.py function create_message (line 16) | async def create_message(thread_id: str, msg: MessageCreate): function list_messages (line 26) | async def list_messages( function retrieve_message (line 38) | async def retrieve_message(thread_id: ObjectID, message_id: ObjectID): function modify_message (line 43) | async def modify_message(thread_id: ObjectID, message_id: ObjectID, msg:... function delete_message (line 49) | async def delete_message(thread_id: ObjectID, message_id: ObjectID): FILE: kt-sft/ktransformers/server/api/openai/assistants/runs.py function create_run (line 20) | async def create_run(request: Request, thread_id: str, run_create: RunCr... function create_thread_and_run (line 40) | async def create_thread_and_run(run_thread: RunThreadCreate): function list_runs (line 45) | async def list_runs( function retrieve_run (line 56) | async def retrieve_run( function modify_run (line 67) | async def modify_run( function submit_tool_outputs_to_run (line 76) | async def submit_tool_outputs_to_run(thread_id: str, run_id: str, submit... function cancel_run (line 81) | async def cancel_run(thread_id: str, run_id: str): FILE: kt-sft/ktransformers/server/api/openai/assistants/threads.py function create_thread (line 14) | async def create_thread(thread: ThreadCreate): function list_threads (line 19) | async def list_threads(limit: Optional[int] = 20, order: Order = Order.D... function retrieve_thread (line 24) | async def retrieve_thread(thread_id: ObjectID): function modify_thread (line 29) | async def modify_thread(thread_id: ObjectID, thread: ThreadModify): function delete_thread (line 34) | async def delete_thread(thread_id: ObjectID): FILE: kt-sft/ktransformers/server/api/openai/endpoints/chat.py class Choice (line 22) | class Choice(BaseModel): class ChatCompletion (line 30) | class ChatCompletion(BaseModel): class ChatCompletionMessageToolCallFunction (line 41) | class ChatCompletionMessageToolCallFunction(BaseModel): class ChatCompletionMessageToolCall (line 45) | class ChatCompletionMessageToolCall(BaseModel): class ChatCompletionMessage (line 50) | class ChatCompletionMessage(BaseModel): function list_models (line 58) | async def list_models(): function getTools (line 61) | def getTools(buffer): function get_tool_instructions (line 117) | def get_tool_instructions(): function chat_completion (line 136) | async def chat_completion(request: Request, create: ChatCompletionCreate): FILE: kt-sft/ktransformers/server/api/openai/legacy/completions.py function create_completion (line 15) | async def create_completion(request:Request, create:CompletionCreate): FILE: kt-sft/ktransformers/server/api/web/system.py function system_info (line 8) | def system_info(): FILE: kt-sft/ktransformers/server/args.py class ArgumentParser (line 6) | class ArgumentParser: method __init__ (line 7) | def __init__(self, cfg): method parse_args (line 10) | def parse_args(self): FILE: kt-sft/ktransformers/server/backend/args.py class ConfigArgs (line 6) | class ConfigArgs(BaseModel): class Config (line 12) | class Config: FILE: kt-sft/ktransformers/server/backend/base.py class BackendInterfaceBase (line 27) | class BackendInterfaceBase: method __init__ (line 36) | def __init__(self, args:ConfigArgs = default_args): method inference (line 40) | async def inference(self,local_messages,request_unique_id:Optional[str... method report_last_time_performance (line 57) | def report_last_time_performance(self): class ThreadContext (line 70) | class ThreadContext: method __init__ (line 89) | def __init__(self, run: RunObject,interface:BackendInterfaceBase, args... method get_local_messages (line 102) | def get_local_messages(self): method update_by_run (line 109) | def update_by_run(self,run:RunObject,args:ConfigArgs = default_args): method put_user_message (line 113) | def put_user_message(self, message: MessageObject): method delete_user_message (line 119) | def delete_user_message(self,message_id: ObjectID): method work (line 122) | async def work(self)->AsyncIterator: FILE: kt-sft/ktransformers/server/backend/context_manager.py class ThreadContextManager (line 17) | class ThreadContextManager: method __init__ (line 22) | def __init__(self,interface) -> None: method get_context_by_run_object (line 29) | async def get_context_by_run_object(self, run: RunObject) -> ThreadCon... method get_context_by_thread_id (line 57) | async def get_context_by_thread_id(self, thread_id: ObjectID) -> Optio... FILE: kt-sft/ktransformers/server/backend/interfaces/balance_serve.py function chat_stream (line 66) | async def chat_stream(queue: asyncio.Queue, tokenizer: AutoTokenizer): function fill_generated_tokens (line 84) | def fill_generated_tokens(query_updates: list[sched_ext.QueryUpdate], ge... function report_last_time_performance (line 94) | def report_last_time_performance(profiler: Profiler): class Engine (line 106) | class Engine: method __init__ (line 114) | def __init__(self, args: ConfigArgs = default_args, generated_token_qu... method sampling (line 208) | def sampling(self, forward_output: ForwardBatchOutput): method loop (line 226) | def loop(self): class BalanceServeThreadContext (line 266) | class BalanceServeThreadContext(ThreadContext): method get_local_messages (line 267) | def get_local_messages(self): function run_engine (line 275) | def run_engine(args, token_queue, broadcast_endpoint, event, kvcache_eve... class BalanceServeInterface (line 284) | class BalanceServeInterface(BackendInterfaceBase): method __init__ (line 300) | def __init__(self, args: ConfigArgs = default_args): method get_params (line 359) | def get_params(self, temperature: Optional[float] = None, top_p: Optio... method run_queue_proxy (line 380) | def run_queue_proxy(self): method lifespan (line 386) | async def lifespan(self, app: FastAPI): method queue_proxy (line 390) | async def queue_proxy(self): method tokenize_prompt (line 407) | def tokenize_prompt(self, prompt: str): method format_and_tokenize_input_ids (line 411) | def format_and_tokenize_input_ids(self, thread_id: ObjectID, messages:... method inference (line 420) | async def inference(self, local_messages, thread_id: str, temperature:... FILE: kt-sft/ktransformers/server/backend/interfaces/exllamav2.py class ExllamaThreadContext (line 14) | class ExllamaThreadContext(ThreadContext): method __init__ (line 15) | def __init__(self, run: RunObject, args: ConfigArgs = default_args) ->... method get_interface (line 18) | def get_interface(self): method get_local_messages (line 21) | def get_local_messages(self): class ExllamaInterface (line 27) | class ExllamaInterface(BackendInterfaceBase): method __init__ (line 29) | def __init__(self, args: ConfigArgs = ...): method tokenize_prompt (line 32) | def tokenize_prompt(self, prompt: str) -> torch.Tensor: method inference (line 35) | async def inference(self,local_messages,request_unique_id:Optional[str... FILE: kt-sft/ktransformers/server/backend/interfaces/ktransformers.py class KTransformersThreadContext (line 25) | class KTransformersThreadContext(TransformersThreadContext): class KTransformersInterface (line 29) | class KTransformersInterface(TransformersInterface): method __init__ (line 30) | def __init__(self, args: ConfigArgs = default_args): method decode_one_tokens (line 83) | def decode_one_tokens(self): method prefill (line 133) | def prefill(self, input_ids: torch.Tensor, is_new: bool, temperature: ... method active_cache_position (line 236) | def active_cache_position(self): method inference (line 240) | async def inference(self, local_messages, thread_id: str, temperature:... FILE: kt-sft/ktransformers/server/backend/interfaces/transformers.py class TextStreamer (line 37) | class TextStreamer: method __init__ (line 39) | def __init__(self, tokenizer: "AutoTokenizer", skip_prompt: bool = Fal... method reset (line 49) | def reset(self): method put (line 53) | def put(self, value) -> Optional[str]: method end (line 83) | def end(self) -> Optional[str]: method _is_chinese_char (line 96) | def _is_chinese_char(self, cp): class TransformersThreadContext (line 121) | class TransformersThreadContext(ThreadContext): method get_local_messages (line 122) | def get_local_messages(self): class TransformersInterface (line 130) | class TransformersInterface(BackendInterfaceBase): method __init__ (line 146) | def __init__(self, args: ConfigArgs = default_args): method current_ids (line 165) | def current_ids(self): method active_cache_position (line 169) | def active_cache_position(self): method tokenize_prompt (line 172) | def tokenize_prompt(self, prompt: str): method format_and_tokenize_input_ids (line 176) | def format_and_tokenize_input_ids(self, thread_id: ObjectID, messages:... method append_new_tokens (line 213) | def append_new_tokens(self, new_tokens: int) -> Optional[str]: method tf_logits_warper (line 219) | def tf_logits_warper(generation_config): method prepare_logits_wrapper (line 270) | def prepare_logits_wrapper(self, inputs, device, temperature: Optional... method logits_to_token (line 289) | def logits_to_token(self, logits: torch.Tensor): method decode_one_tokens (line 304) | def decode_one_tokens(self): method prefill (line 320) | def prefill(self, input_ids: torch.Tensor, is_new: bool, temperature: ... method generate (line 397) | def generate(self): method check_is_new (line 427) | def check_is_new(self, thread_id: str): method inference (line 440) | async def inference(self, local_messages, thread_id: str, temperature:... FILE: kt-sft/ktransformers/server/balance_serve/inference/config.py class ModelConfig (line 19) | class ModelConfig: method __init__ (line 56) | def __init__(self, config): method load_config (line 70) | def load_config(self): class ParallelConfig (line 88) | class ParallelConfig: method __init__ (line 89) | def __init__( class AttnConfig (line 98) | class AttnConfig: method __init__ (line 104) | def __init__(self, config): class SamplerConfig (line 111) | class SamplerConfig(): method __init__ (line 116) | def __init__(self, config): function load_yaml_config (line 121) | def load_yaml_config(file_path): class LLMConfig (line 128) | class LLMConfig: method __init__ (line 135) | def __init__(self, config_file): FILE: kt-sft/ktransformers/server/balance_serve/inference/distributed/communication_op.py function tensor_model_parallel_all_reduce (line 15) | def tensor_model_parallel_all_reduce(input_: torch.Tensor, bsz_tensor: t... function tensor_model_parallel_all_gather (line 20) | def tensor_model_parallel_all_gather( function tensor_model_parallel_gather (line 27) | def tensor_model_parallel_gather( function broadcast_tensor_dict (line 34) | def broadcast_tensor_dict( FILE: kt-sft/ktransformers/server/balance_serve/inference/distributed/cuda_wrapper.py class cudaIpcMemHandle_t (line 21) | class cudaIpcMemHandle_t(ctypes.Structure): class Function (line 26) | class Function: function find_loaded_library (line 32) | def find_loaded_library(lib_name) -> Optional[str]: class CudaRTLibrary (line 58) | class CudaRTLibrary: method __init__ (line 100) | def __init__(self, so_file: Optional[str] = None): method CUDART_CHECK (line 120) | def CUDART_CHECK(self, result: cudaError_t) -> None: method cudaGetErrorString (line 125) | def cudaGetErrorString(self, error: cudaError_t) -> str: method cudaSetDevice (line 128) | def cudaSetDevice(self, device: int) -> None: method cudaDeviceSynchronize (line 131) | def cudaDeviceSynchronize(self) -> None: method cudaDeviceReset (line 134) | def cudaDeviceReset(self) -> None: method cudaMalloc (line 137) | def cudaMalloc(self, size: int) -> ctypes.c_void_p: method cudaFree (line 142) | def cudaFree(self, devPtr: ctypes.c_void_p) -> None: method cudaMemset (line 145) | def cudaMemset(self, devPtr: ctypes.c_void_p, value: int, method cudaMemcpy (line 149) | def cudaMemcpy(self, dst: ctypes.c_void_p, src: ctypes.c_void_p, method cudaIpcGetMemHandle (line 155) | def cudaIpcGetMemHandle(self, method cudaIpcOpenMemHandle (line 162) | def cudaIpcOpenMemHandle(self, FILE: kt-sft/ktransformers/server/balance_serve/inference/distributed/custom_all_reduce.py function _can_p2p (line 25) | def _can_p2p(rank: int, world_size: int) -> bool: function is_weak_contiguous (line 37) | def is_weak_contiguous(inp: torch.Tensor): class CustomAllreduce (line 44) | class CustomAllreduce: method __init__ (line 49) | def __init__( method create_shared_buffer (line 179) | def create_shared_buffer( method free_shared_buffer (line 204) | def free_shared_buffer( method capture (line 212) | def capture(self): method register_graph_buffers (line 226) | def register_graph_buffers(self): method should_custom_ar (line 244) | def should_custom_ar(self, inp: torch.Tensor): method all_reduce (line 259) | def all_reduce( method custom_all_reduce (line 284) | def custom_all_reduce(self, input: torch.Tensor, bsz_tensor: torch.Ten... method close (line 302) | def close(self): method __del__ (line 309) | def __del__(self): FILE: kt-sft/ktransformers/server/balance_serve/inference/distributed/custom_all_reduce_utils.py function producer (line 19) | def producer( function consumer (line 53) | def consumer( function can_actually_p2p (line 94) | def can_actually_p2p( function gpu_p2p_access_check (line 194) | def gpu_p2p_access_check(src: int, tgt: int) -> bool: FILE: kt-sft/ktransformers/server/balance_serve/inference/distributed/parallel_state.py class GraphCaptureContext (line 43) | class GraphCaptureContext: function _split_tensor_dict (line 50) | def _split_tensor_dict( function _get_unique_name (line 79) | def _get_unique_name(name: str) -> str: function _register_group (line 95) | def _register_group(group: "GroupCoordinator") -> None: function inplace_all_reduce (line 101) | def inplace_all_reduce(tensor: torch.Tensor, group_name: str) -> None: function inplace_all_reduce_fake (line 108) | def inplace_all_reduce_fake(tensor: torch.Tensor, group_name: str) -> None: function outplace_all_reduce (line 118) | def outplace_all_reduce(tensor: torch.Tensor, group_name: str, bsz_tenso... function outplace_all_reduce_fake (line 125) | def outplace_all_reduce_fake(tensor: torch.Tensor, group_name: str, bsz_... class GroupCoordinator (line 136) | class GroupCoordinator: method __init__ (line 169) | def __init__( method first_rank (line 271) | def first_rank(self): method last_rank (line 276) | def last_rank(self): method is_first_rank (line 281) | def is_first_rank(self): method is_last_rank (line 286) | def is_last_rank(self): method next_rank (line 291) | def next_rank(self): method prev_rank (line 298) | def prev_rank(self): method graph_capture (line 305) | def graph_capture( method all_reduce (line 352) | def all_reduce(self, input_: torch.Tensor, bsz_tensor: torch.Tensor, i... method _all_reduce_out_place (line 406) | def _all_reduce_out_place(self, input_: torch.Tensor, bsz_tensor: torc... method _all_reduce_in_place (line 414) | def _all_reduce_in_place(self, input_: torch.Tensor) -> None: method all_gather (line 421) | def all_gather(self, input_: torch.Tensor, dim: int = -1) -> torch.Ten... method gather (line 464) | def gather( method broadcast (line 499) | def broadcast(self, input_: torch.Tensor, src: int = 0): method broadcast_object (line 514) | def broadcast_object(self, obj: Optional[Any] = None, src: int = 0): method broadcast_object_list (line 538) | def broadcast_object_list( method send_object (line 555) | def send_object(self, obj: Any, dst: int) -> None: method recv_object (line 582) | def recv_object(self, src: int) -> Any: method broadcast_tensor_dict (line 618) | def broadcast_tensor_dict( method send_tensor_dict (line 700) | def send_tensor_dict( method recv_tensor_dict (line 753) | def recv_tensor_dict( method barrier (line 815) | def barrier(self): method send (line 824) | def send(self, tensor: torch.Tensor, dst: Optional[int] = None) -> None: method recv (line 836) | def recv( method destroy (line 852) | def destroy(self): function get_world_group (line 870) | def get_world_group() -> GroupCoordinator: function init_world_group (line 875) | def init_world_group( function init_model_parallel_group (line 891) | def init_model_parallel_group( function get_tp_group (line 918) | def get_tp_group() -> GroupCoordinator: function get_pp_group (line 929) | def get_pp_group() -> GroupCoordinator: function graph_capture (line 939) | def graph_capture(): function set_custom_all_reduce (line 962) | def set_custom_all_reduce(enable: bool): function init_distributed_environment (line 967) | def init_distributed_environment( function initialize_model_parallel (line 1014) | def initialize_model_parallel( function ensure_model_parallel_initialized (line 1091) | def ensure_model_parallel_initialized( function model_parallel_is_initialized (line 1120) | def model_parallel_is_initialized(): function patch_tensor_parallel_group (line 1129) | def patch_tensor_parallel_group(tp_group: GroupCoordinator): function get_tensor_model_parallel_world_size (line 1153) | def get_tensor_model_parallel_world_size(): function get_tensor_model_parallel_rank (line 1158) | def get_tensor_model_parallel_rank(): function destroy_model_parallel (line 1163) | def destroy_model_parallel(): function destroy_distributed_environment (line 1176) | def destroy_distributed_environment(): function cleanup_dist_env_and_memory (line 1185) | def cleanup_dist_env_and_memory(shutdown_ray: bool = False): function in_the_same_node_as (line 1199) | def in_the_same_node_as(pg: ProcessGroup, source_rank: int = 0) -> List[... FILE: kt-sft/ktransformers/server/balance_serve/inference/distributed/pynccl.py class PyNcclCommunicator (line 21) | class PyNcclCommunicator: method __init__ (line 23) | def __init__( method all_reduce (line 119) | def all_reduce( method send (line 143) | def send(self, tensor: torch.Tensor, dst: int, stream=None): method recv (line 161) | def recv(self, tensor: torch.Tensor, src: int, stream=None): method change_state (line 180) | def change_state( FILE: kt-sft/ktransformers/server/balance_serve/inference/distributed/pynccl_wrapper.py class ncclUniqueId (line 41) | class ncclUniqueId(ctypes.Structure): class ncclDataTypeEnum (line 51) | class ncclDataTypeEnum: method from_torch (line 70) | def from_torch(cls, dtype: torch.dtype) -> int: class ncclRedOpTypeEnum (line 93) | class ncclRedOpTypeEnum: method from_torch (line 102) | def from_torch(cls, op: ReduceOp) -> int: class Function (line 117) | class Function: class NCCLLibrary (line 123) | class NCCLLibrary: method __init__ (line 184) | def __init__(self, so_file: Optional[str] = None): method ncclGetErrorString (line 215) | def ncclGetErrorString(self, result: ncclResult_t) -> str: method NCCL_CHECK (line 218) | def NCCL_CHECK(self, result: ncclResult_t) -> None: method ncclGetVersion (line 223) | def ncclGetVersion(self) -> str: method ncclGetUniqueId (line 233) | def ncclGetUniqueId(self) -> ncclUniqueId: method ncclCommInitRank (line 239) | def ncclCommInitRank(self, world_size: int, unique_id: ncclUniqueId, method ncclAllReduce (line 247) | def ncclAllReduce(self, sendbuff: buffer_type, recvbuff: buffer_type, method ncclSend (line 259) | def ncclSend(self, sendbuff: buffer_type, count: int, datatype: int, method ncclRecv (line 264) | def ncclRecv(self, recvbuff: buffer_type, count: int, datatype: int, method ncclCommDestroy (line 269) | def ncclCommDestroy(self, comm: ncclComm_t) -> None: FILE: kt-sft/ktransformers/server/balance_serve/inference/distributed/utils.py function ensure_divisibility (line 17) | def ensure_divisibility(numerator, denominator): function divide (line 24) | def divide(numerator, denominator): function split_tensor_along_last_dim (line 31) | def split_tensor_along_last_dim( function get_pp_indices (line 59) | def get_pp_indices( class StatelessProcessGroup (line 92) | class StatelessProcessGroup: method __post_init__ (line 113) | def __post_init__(self): method send_obj (line 119) | def send_obj(self, obj: Any, dst: int): method expire_data (line 127) | def expire_data(self): method recv_obj (line 138) | def recv_obj(self, src: int) -> Any: method broadcast_obj (line 146) | def broadcast_obj(self, obj: Optional[Any], src: int) -> Any: method all_gather_obj (line 164) | def all_gather_obj(self, obj: Any) -> list[Any]: method barrier (line 176) | def barrier(self): method create (line 185) | def create( FILE: kt-sft/ktransformers/server/balance_serve/inference/forward_batch.py class ForwardBatchInput (line 11) | class ForwardBatchInput: class ForwardMiniBatch (line 13) | class ForwardMiniBatch: method __init__ (line 31) | def __init__(self, prefill_querys_info: list[QueryInfo], decode_quer... method fill (line 94) | def fill(self, prefill_querys_info: list[QueryInfo], decode_querys_i... method __init__ (line 170) | def __init__(self, batch : sched_ext.BatchQueryTodo = None, query_mana... method gen_max_forward_batch (line 198) | def gen_max_forward_batch( method fill (line 244) | def fill(self, batch : sched_ext.BatchQueryTodo = None, query_manager:... class ForwardBatchOutput (line 268) | class ForwardBatchOutput: method __init__ (line 278) | def __init__(self): FILE: kt-sft/ktransformers/server/balance_serve/inference/model_runner.py function pad_num_tokens (line 37) | def pad_num_tokens(num_tokens): function deduplicate_and_sort (line 40) | def deduplicate_and_sort(lst): function generate_cuda_graphs (line 42) | def generate_cuda_graphs(chunk_size: int) -> list: class ModelRunner (line 52) | class ModelRunner: method __init__ (line 59) | def __init__(self, model = None, device = None, use_cuda_graph = False... method model_attn_plan (line 88) | def model_attn_plan(self, batch, cuda_graph_idx=0): method warmup (line 104) | def warmup(self): method run (line 159) | def run(self, batch: sched_ext.BatchQueryTodo = None, query_manager: Q... method replay (line 225) | def replay(self, cuda_graph_idx=-1): method sync (line 233) | def sync(self, calc_time = True): FILE: kt-sft/ktransformers/server/balance_serve/inference/query_manager.py class QueryInfo (line 11) | class QueryInfo: method __init__ (line 25) | def __init__(self, id, query_length: int, max_length: int, page_size: ... method check_stop (line 41) | def check_stop(self): method print (line 64) | def print(self): class QueryManager (line 69) | class QueryManager: method __init__ (line 75) | def __init__(self, page_size = 256, device = torch.device('cuda')): method add_query (line 80) | def add_query(self, batch: sched_ext.BatchQueryTodo): method update (line 103) | def update(self, batch: sched_ext.BatchQueryTodo) -> list[sched_ext.Qu... FILE: kt-sft/ktransformers/server/balance_serve/inference/sampling/penaltylib/orchestrator.py class _ReqLike (line 9) | class _ReqLike: class _BatchLike (line 14) | class _BatchLike: method batch_size (line 17) | def batch_size(self): class BatchedPenalizerOrchestrator (line 21) | class BatchedPenalizerOrchestrator: method __init__ (line 27) | def __init__( method reqs (line 51) | def reqs(self): method batch_size (line 54) | def batch_size(self): method cumulate_input_tokens (line 57) | def cumulate_input_tokens( method cumulate_output_tokens (line 74) | def cumulate_output_tokens( method apply (line 94) | def apply(self, logits: torch.Tensor) -> torch.Tensor: method filter (line 113) | def filter( method merge (line 149) | def merge(self, their: "BatchedPenalizerOrchestrator"): class _TokenIDs (line 171) | class _TokenIDs: method __init__ (line 185) | def __init__( method occurrence_count (line 204) | def occurrence_count(self) -> torch.Tensor: class _BatchedPenalizer (line 244) | class _BatchedPenalizer(abc.ABC): method __init__ (line 252) | def __init__(self, orchestrator: BatchedPenalizerOrchestrator): method is_prepared (line 255) | def is_prepared(self) -> bool: method is_required (line 258) | def is_required(self) -> bool: method prepare (line 261) | def prepare(self): method prepare_if_required (line 266) | def prepare_if_required(self): method teardown (line 273) | def teardown(self): method cumulate_input_tokens (line 278) | def cumulate_input_tokens(self, input_ids: _TokenIDs): method cumulate_output_tokens (line 284) | def cumulate_output_tokens(self, output_ids: _TokenIDs): method apply (line 290) | def apply(self, logits: torch.Tensor) -> torch.Tensor: method filter (line 296) | def filter( method merge (line 307) | def merge(self, their: "_BatchedPenalizer"): method _is_required (line 316) | def _is_required(self) -> bool: method _prepare (line 323) | def _prepare(self): method _teardown (line 331) | def _teardown(self): method _cumulate_input_tokens (line 339) | def _cumulate_input_tokens(self, input_ids: _TokenIDs): method _cumulate_output_tokens (line 347) | def _cumulate_output_tokens(self, output_ids: _TokenIDs): method _apply (line 355) | def _apply(self, logits: torch.Tensor) -> torch.Tensor: method _filter (line 363) | def _filter( method _merge (line 372) | def _merge(self, their: "_BatchedPenalizer"): FILE: kt-sft/ktransformers/server/balance_serve/inference/sampling/penaltylib/penalizers/frequency_penalty.py class BatchedFrequencyPenalizer (line 8) | class BatchedFrequencyPenalizer(_BatchedPenalizer): method _is_required (line 16) | def _is_required(self) -> bool: method _prepare (line 22) | def _prepare(self): method _teardown (line 46) | def _teardown(self): method _cumulate_input_tokens (line 53) | def _cumulate_input_tokens(self, input_ids: _TokenIDs): method _cumulate_output_tokens (line 56) | def _cumulate_output_tokens(self, output_ids: _TokenIDs): method _apply (line 61) | def _apply(self, logits: torch.Tensor) -> torch.Tensor: method _filter (line 65) | def _filter( method _merge (line 73) | def _merge(self, their: "BatchedFrequencyPenalizer"): FILE: kt-sft/ktransformers/server/balance_serve/inference/sampling/penaltylib/penalizers/min_new_tokens.py class BatchedMinNewTokensPenalizer (line 8) | class BatchedMinNewTokensPenalizer(_BatchedPenalizer): method _is_required (line 17) | def _is_required(self) -> bool: method _prepare (line 22) | def _prepare(self): method _teardown (line 72) | def _teardown(self): method _cumulate_input_tokens (line 81) | def _cumulate_input_tokens(self, input_ids: _TokenIDs): method _cumulate_output_tokens (line 84) | def _cumulate_output_tokens(self, output_ids: _TokenIDs): method _apply (line 87) | def _apply(self, logits: torch.Tensor) -> torch.Tensor: method _filter (line 92) | def _filter( method _merge (line 99) | def _merge(self, their: "BatchedMinNewTokensPenalizer"): FILE: kt-sft/ktransformers/server/balance_serve/inference/sampling/penaltylib/penalizers/presence_penalty.py class BatchedPresencePenalizer (line 8) | class BatchedPresencePenalizer(_BatchedPenalizer): method _is_required (line 16) | def _is_required(self) -> bool: method _prepare (line 22) | def _prepare(self): method _teardown (line 46) | def _teardown(self): method _cumulate_input_tokens (line 53) | def _cumulate_input_tokens(self, input_ids: _TokenIDs): method _cumulate_output_tokens (line 56) | def _cumulate_output_tokens(self, output_ids: _TokenIDs): method _apply (line 60) | def _apply(self, logits: torch.Tensor) -> torch.Tensor: method _filter (line 64) | def _filter( method _merge (line 72) | def _merge(self, their: "BatchedPresencePenalizer"): FILE: kt-sft/ktransformers/server/balance_serve/inference/sampling/penaltylib/penalizers/repetition_penalty.py class BatchedRepetitionPenalizer (line 8) | class BatchedRepetitionPenalizer(_BatchedPenalizer): method _is_required (line 16) | def _is_required(self) -> bool: method _prepare (line 22) | def _prepare(self): method _teardown (line 46) | def _teardown(self): method _cumulate_input_tokens (line 53) | def _cumulate_input_tokens(self, input_ids: _TokenIDs): method _cumulate_output_tokens (line 57) | def _cumulate_output_tokens(self, output_ids: _TokenIDs): method _apply (line 61) | def _apply(self, logits: torch.Tensor) -> torch.Tensor: method _filter (line 68) | def _filter( method _merge (line 76) | def _merge(self, their: "BatchedRepetitionPenalizer"): FILE: kt-sft/ktransformers/server/balance_serve/inference/sampling/sampler.py class SamplingOptions (line 20) | class SamplingOptions(): method __init__ (line 33) | def __init__(self, bsz = 1, device = torch.device('cuda'), pretrained_... class Sampler (line 54) | class Sampler(nn.Module): method __init__ (line 55) | def __init__(self): method forward (line 58) | def forward( FILE: kt-sft/ktransformers/server/balance_serve/sched_rpc.py class SchedulerServer (line 24) | class SchedulerServer: method __init__ (line 25) | def __init__(self, settings, main_args): method run_scheduler (line 36) | def run_scheduler(self): method stop_scheduler (line 39) | def stop_scheduler(self): method start_proxy (line 42) | def start_proxy(self): method worker_routine (line 45) | def worker_routine(self): method start_rpc_service (line 102) | def start_rpc_service(self): method stop_rpc_service (line 117) | def stop_rpc_service(self): function start_server (line 123) | def start_server(settings, main_args): class SchedulerClient (line 129) | class SchedulerClient: method __init__ (line 130) | def __init__(self, sched_port): method __del__ (line 138) | def __del__(self): method send_request (line 142) | def send_request(self, method, params=None): method add_query (line 159) | def add_query(self, query): method cancel_query (line 163) | def cancel_query(self, query_id): method update_last_batch (line 166) | def update_last_batch(self, updates): method rebuild_inferece_context (line 171) | def rebuild_inferece_context(self,response): method get_inference_context_raw (line 179) | def get_inference_context_raw(self): FILE: kt-sft/ktransformers/server/balance_serve/settings.py function create_sched_settings (line 16) | def create_sched_settings(args): function create_sched_settings_qwen2moe (line 69) | def create_sched_settings_qwen2moe(args): function create_sched_settings_qwen3moe (line 123) | def create_sched_settings_qwen3moe(args): FILE: kt-sft/ktransformers/server/config/config.py class Config (line 20) | class Config(metaclass=Singleton): method load (line 26) | def load() -> dict: method to_path (line 53) | def to_path(path: str) -> str: method __init__ (line 61) | def __init__(self): FILE: kt-sft/ktransformers/server/config/log.py class DailyRotatingFileHandler (line 25) | class DailyRotatingFileHandler(BaseRotatingHandler): method __init__ (line 32) | def __init__(self, filename, backupCount=0, encoding=None, delay=False... method shouldRollover (line 46) | def shouldRollover(self, record): method doRollover (line 59) | def doRollover(self): method _compute_fn (line 78) | def _compute_fn(self): method _open (line 84) | def _open(self): method delete_expired_files (line 106) | def delete_expired_files(self): class Logger (line 132) | class Logger(object): method __init__ (line 144) | def __init__(self, level: str = 'info'): FILE: kt-sft/ktransformers/server/config/singleton.py class Singleton (line 13) | class Singleton(abc.ABCMeta, type): method __call__ (line 24) | def __call__(cls, *args, **kwds): class AbstractSingleton (line 29) | class AbstractSingleton(abc.ABC, metaclass=Singleton): FILE: kt-sft/ktransformers/server/crud/assistants/assistants.py class AssistantDatabaseManager (line 12) | class AssistantDatabaseManager: method __init__ (line 13) | def __init__(self) -> None: method create_assistant_object (line 16) | def create_assistant_object(self, assistant: AssistantCreate) -> Assis... method db_count_assistants (line 25) | def db_count_assistants(self) -> int: method db_create_assistant (line 29) | def db_create_assistant(self, assistant: AssistantCreate): method db_list_assistants (line 34) | def db_list_assistants(self, limit: Optional[int], order: Order) -> Li... method db_get_assistant_by_id (line 44) | def db_get_assistant_by_id(self, assistant_id: str) -> Optional[Assist... method db_update_assistant_by_id (line 53) | def db_update_assistant_by_id(self, assistant_id: str, assistant: Assi... method db_delete_assistant_by_id (line 60) | def db_delete_assistant_by_id(self, assistant_id: str): FILE: kt-sft/ktransformers/server/crud/assistants/messages.py class MessageDatabaseManager (line 10) | class MessageDatabaseManager: method __init__ (line 11) | def __init__(self) -> None: method create_db_message_by_core (line 15) | def create_db_message_by_core(message: MessageCore): method create_db_message (line 19) | def create_db_message(self, message: MessageCreate): method db_add_message (line 22) | def db_add_message(self, message: Message): method db_create_message (line 27) | def db_create_message(self, thread_id: str, message: MessageCreate, st... method create_message_object (line 35) | def create_message_object(thread_id: ObjectID, run_id: ObjectID, messa... method db_sync_message (line 47) | def db_sync_message(self, message: MessageObject): method db_list_messages_of_thread (line 54) | def db_list_messages_of_thread( method db_get_message_by_id (line 72) | def db_get_message_by_id(self, thread_id: ObjectID, message_id: Object... method db_delete_message_by_id (line 80) | def db_delete_message_by_id(self, thread_id: ObjectID, message_id: Obj... FILE: kt-sft/ktransformers/server/crud/assistants/runs.py class RunsDatabaseManager (line 10) | class RunsDatabaseManager: method __init__ (line 11) | def __init__(self) -> None: method create_run_object (line 14) | def create_run_object(self, thread_id: ObjectID, run: RunCreate) -> Ru... method db_create_run (line 26) | def db_create_run(self, thread_id: str, run: RunCreate): method db_sync_run (line 40) | def db_sync_run(self, run: RunObject) -> None: method db_get_run (line 47) | def db_get_run(self, run_id: ObjectID) -> RunObject: FILE: kt-sft/ktransformers/server/crud/assistants/threads.py class ThreadsDatabaseManager (line 15) | class ThreadsDatabaseManager: method __init__ (line 16) | def __init__(self) -> None: method db_create_thread (line 21) | def db_create_thread(self, thread: ThreadCreate): method db_get_thread_by_id (line 54) | def db_get_thread_by_id(self, thread_id: ObjectID): method db_list_threads (line 59) | def db_list_threads(self, limit: Optional[int], order: Order) -> List[... method db_list_threads_preview (line 71) | def db_list_threads_preview(self, limit: Optional[int], order: Order) ... method db_delete_thread_by_id (line 88) | def db_delete_thread_by_id(self, thread_id: ObjectID): FILE: kt-sft/ktransformers/server/exceptions.py function db_exception (line 4) | def db_exception(): function not_implemented (line 11) | def not_implemented(what): function internal_server_error (line 18) | def internal_server_error(what): function request_error (line 22) | def request_error(what): FILE: kt-sft/ktransformers/server/main.py function mount_app_routes (line 21) | def mount_app_routes(mount_app: FastAPI): function create_app (line 29) | def create_app(): function update_web_port (line 49) | def update_web_port(config_file: str): function mount_index_routes (line 61) | def mount_index_routes(app: FastAPI): function run_api (line 75) | def run_api(app, host, port, **kwargs): function custom_openapi (line 88) | def custom_openapi(app): function main (line 103) | def main(): FILE: kt-sft/ktransformers/server/models/assistants/assistants.py class Assistant (line 7) | class Assistant(Base): FILE: kt-sft/ktransformers/server/models/assistants/messages.py class Message (line 7) | class Message(Base): FILE: kt-sft/ktransformers/server/models/assistants/run_steps.py class RunStep (line 7) | class RunStep(Base): FILE: kt-sft/ktransformers/server/models/assistants/runs.py class Run (line 7) | class Run(Base): FILE: kt-sft/ktransformers/server/models/assistants/threads.py class Thread (line 7) | class Thread(Base): FILE: kt-sft/ktransformers/server/schemas/assistants/assistants.py class AssistantBase (line 21) | class AssistantBase(BaseModel): method validate_tools (line 28) | def validate_tools(cls, value): method validate_tool_resources (line 51) | def validate_tool_resources(cls, value): method convert_meta_data (line 70) | def convert_meta_data(cls, values): class AssistantCreate (line 79) | class AssistantCreate(AssistantBase): class AssistantBuildStatus (line 83) | class AssistantBuildStatus(BaseModel): class Status (line 84) | class Status(Enum): method to_stream_reply (line 112) | def to_stream_reply(self) -> str: class AssistantObject (line 116) | class AssistantObject(AssistantBase, ObjectWithCreatedTime): method as_api_response (line 123) | def as_api_response(self): method get_related_threads_ids (line 126) | def get_related_threads_ids(self) -> List[ObjectID]: method get_related_threads_objects (line 133) | def get_related_threads_objects(self) -> List: method append_related_threads (line 145) | def append_related_threads(self, thread_ids: List[ObjectID]): method update_build_status (line 156) | async def update_build_status(self, events: AsyncIterable) -> AsyncIte... method get_build_status (line 178) | def get_build_status(self) -> AssistantBuildStatus: method sync_db (line 182) | def sync_db(self)->None: method get_encoded_instruction (line 191) | def get_encoded_instruction(self,encode_fn:Callable)->torch.Tensor: class AssistantModify (line 198) | class AssistantModify(AssistantBase): FILE: kt-sft/ktransformers/server/schemas/assistants/messages.py class IncompleteDetails (line 15) | class IncompleteDetails(BaseModel): class ContentType (line 19) | class ContentType(Enum): class ContentObject (line 25) | class ContentObject(BaseModel): class ImageFile (line 29) | class ImageFile(BaseModel): class ImageFileObject (line 34) | class ImageFileObject(ContentObject): class ImageUrl (line 38) | class ImageUrl(BaseModel): class ImageUrlObject (line 43) | class ImageUrlObject(ContentObject): class Annotation (line 47) | class Annotation(BaseModel): class Text (line 51) | class Text(BaseModel): class TextObject (line 56) | class TextObject(ContentObject): method filter_append (line 62) | def filter_append(self,text:str): class Attachment (line 72) | class Attachment(BaseModel): class Role (line 77) | class Role(Enum): method is_user (line 81) | def is_user(self)->bool: class MessageCore (line 85) | class MessageCore(BaseModel): method convert_meta_data (line 92) | def convert_meta_data(cls,values): class MessageBase (line 98) | class MessageBase(MessageCore): class Status (line 99) | class Status(Enum): class MessageObject (line 116) | class MessageObject(MessageBase, ObjectWithCreatedTime): method get_text_content (line 120) | def get_text_content(self) -> str: method get_encoded_content (line 129) | async def get_encoded_content(self,encode_fn:Callable): method get_attached_files (line 142) | def get_attached_files(self): method append_message_delta (line 147) | def append_message_delta(self,text:str): method sync_db (line 150) | def sync_db(self): method stream_response_with_event (line 160) | def stream_response_with_event(self, event: MessageBase.Status) -> Mes... class MessageStreamResponse (line 169) | class MessageStreamResponse(BaseModel): method to_stream_reply (line 173) | def to_stream_reply(self): class MessageCreate (line 177) | class MessageCreate(BaseModel): method convert_meta_data (line 184) | def convert_meta_data(cls,values): method to_core (line 189) | def to_core(self) -> MessageCore: class MessageModify (line 206) | class MessageModify(BaseModel): method convert_meta_data (line 210) | def convert_meta_data(cls,values): FILE: kt-sft/ktransformers/server/schemas/assistants/runs.py class ToolCall (line 13) | class ToolCall(BaseModel): class SubmitToolOutputs (line 19) | class SubmitToolOutputs(BaseModel): class RequiredAction (line 23) | class RequiredAction(BaseModel): class LastError (line 28) | class LastError(BaseModel): class IncompleteDetails (line 33) | class IncompleteDetails(BaseModel): class Usage (line 37) | class Usage(BaseModel): class TruncationStrategy (line 43) | class TruncationStrategy(BaseModel): class ToolChoiceType (line 48) | class ToolChoiceType(Enum): class RunBase (line 54) | class RunBase(BaseModel): class Status (line 55) | class Status(Enum): method convert_meta_data (line 84) | def convert_meta_data(cls,values): method set_compute_save (line 89) | def set_compute_save(self,save:int): class RunObject (line 104) | class RunObject(RunBase, ObjectWithCreatedTime): method stream_response_with_event (line 105) | def stream_response_with_event(self,event:RunBase.Status)->RunStreamRe... method sync_db (line 114) | def sync_db(self): method create_message_creation_step (line 123) | def create_message_creation_step(self): class RunStreamResponse (line 127) | class RunStreamResponse(BaseModel): method to_stream_reply (line 130) | def to_stream_reply(self): class RunCreate (line 133) | class RunCreate(BaseModel): method convert_meta_data (line 144) | def convert_meta_data(cls,values): class RunThreadCreate (line 159) | class RunThreadCreate(BaseModel): method convert_meta_data (line 169) | def convert_meta_data(cls,values): class RunModify (line 184) | class RunModify(BaseModel): method convert_meta_data (line 188) | def convert_meta_data(cls,values): class ToolOutput (line 194) | class ToolOutput(BaseModel): class RunSubmit (line 199) | class RunSubmit(BaseModel): FILE: kt-sft/ktransformers/server/schemas/assistants/streaming.py class TextObjectWithIndex (line 15) | class TextObjectWithIndex(TextObject): class ImageFileObjectWithIndex (line 19) | class ImageFileObjectWithIndex(ImageFileObject): class ImageUrlObjectWithIndex (line 23) | class ImageUrlObjectWithIndex(ImageUrlObject): class MessageDeltaImpl (line 31) | class MessageDeltaImpl(BaseModel): class MessageDelta (line 36) | class MessageDelta(Object): method to_stream_reply (line 39) | def to_stream_reply(self): function text_delta (line 43) | def text_delta(index: int, text: str): function append_message_delta (line 47) | def append_message_delta(self: MessageObject, text: str): class RunStepDeltaImpl (line 63) | class RunStepDeltaImpl(BaseModel): class RunStepDelta (line 67) | class RunStepDelta(Object): method to_stream_reply (line 70) | def to_stream_reply(self): class Done (line 74) | class Done(): method to_stream_reply (line 75) | def to_stream_reply(self): function check_client_link (line 79) | async def check_client_link(request: Request, async_events: AsyncIterable): function add_done (line 86) | async def add_done(async_events: AsyncIterable): function to_stream_reply (line 92) | async def to_stream_reply(async_events: AsyncIterable): function filter_api_event (line 100) | async def filter_api_event(async_events: AsyncIterable): function filter_chat_chunk (line 106) | async def filter_chat_chunk(async_events: AsyncIterable): function filter_by_types (line 112) | async def filter_by_types(async_events: AsyncIterable, types: List): function api_stream_response (line 120) | def api_stream_response(request: Request, async_events: AsyncIterable): function chat_stream_response (line 124) | def chat_stream_response(request: Request, async_events: AsyncIterable): function stream_response (line 128) | def stream_response(request: Request, async_events: AsyncIterable): function check_link_response (line 132) | def check_link_response(request: Request, async_events: AsyncIterable): function wrap_async_generator_into_queue (line 136) | def wrap_async_generator_into_queue(async_events: AsyncIterable) -> asyn... function unwrap_async_queue (line 151) | async def unwrap_async_queue(queue: asyncio.Queue) -> AsyncIterable: function unwrap_async_queue_slow (line 163) | async def unwrap_async_queue_slow(queue: asyncio.Queue) -> AsyncIterable: FILE: kt-sft/ktransformers/server/schemas/assistants/threads.py class ThreadBase (line 12) | class ThreadBase(BaseModel): method convert_meta_data (line 16) | def convert_meta_data(cls,values): class ThreadObject (line 24) | class ThreadObject(ThreadBase, ObjectWithCreatedTime): method check_is_related_threads (line 28) | def check_is_related_threads(self)->Self: class StreamEvent (line 34) | class StreamEvent(Enum): method to_stream_reply (line 37) | def to_stream_reply(self,event:StreamEvent): class ThreadCreate (line 41) | class ThreadCreate(ThreadBase): class ThreadModify (line 45) | class ThreadModify(ThreadBase): FILE: kt-sft/ktransformers/server/schemas/assistants/tool.py class ToolType (line 9) | class ToolType(str, Enum): class ToolBase (line 16) | class ToolBase(BaseModel): class CodeInterpreter (line 20) | class CodeInterpreter(ToolBase): class FileSearch (line 24) | class FileSearch(ToolBase): class RelatedThreads (line 28) | class RelatedThreads(ToolBase): class FuntionTool (line 32) | class FuntionTool(ToolBase): class CodeInterpreterResource (line 41) | class CodeInterpreterResource(BaseModel): class FileSearchResource (line 45) | class FileSearchResource(BaseModel): class RelatedThreadsResource (line 50) | class RelatedThreadsResource(BaseModel): FILE: kt-sft/ktransformers/server/schemas/base.py class Object (line 12) | class Object(BaseModel): class ObjectWithCreatedTime (line 20) | class ObjectWithCreatedTime(Object): class Order (line 25) | class Order(str, Enum): method to_sqlalchemy_order (line 29) | def to_sqlalchemy_order(self): class DeleteResponse (line 41) | class DeleteResponse(Object): class OperationResponse (line 44) | class OperationResponse(BaseModel): FILE: kt-sft/ktransformers/server/schemas/conversation.py class ThreadPreview (line 9) | class ThreadPreview(BaseModel): FILE: kt-sft/ktransformers/server/schemas/endpoints/chat.py class CompletionUsage (line 13) | class CompletionUsage(BaseModel): class Role (line 22) | class Role(Enum): class Message (line 29) | class Message(BaseModel): method to_tokenizer_message (line 36) | def to_tokenizer_message(self): class FunctionParameters (line 48) | class FunctionParameters(BaseModel): class FunctionDefinition (line 53) | class FunctionDefinition(BaseModel): class ToolFunction (line 58) | class ToolFunction(BaseModel): class Tool (line 61) | class Tool(BaseModel): class ChatCompletionCreate (line 65) | class ChatCompletionCreate(BaseModel): method get_tokenizer_messages (line 79) | def get_tokenizer_messages(self): class ChatCompletionChunk (line 82) | class ChatCompletionChunk(BaseModel): method to_stream_reply (line 92) | def to_stream_reply(self): class RawUsage (line 95) | class RawUsage(BaseModel): FILE: kt-sft/ktransformers/server/schemas/legacy/completions.py class CompletionCreate (line 7) | class CompletionCreate(BaseModel): method get_tokenizer_messages (line 16) | def get_tokenizer_messages(self): class FinishReason (line 22) | class FinishReason(Enum): class Choice (line 26) | class Choice(BaseModel): class CompletionObject (line 33) | class CompletionObject(Object): method set_token (line 40) | def set_token(self,token:str): method append_token (line 45) | def append_token(self,token:str): method to_stream_reply (line 50) | def to_stream_reply(self): FILE: kt-sft/ktransformers/server/utils/create_interface.py function create_interface (line 19) | def create_interface(config: Config, default_args: ConfigArgs): class GlobalContextManager (line 33) | class GlobalContextManager: class GlobalInterface (line 35) | class GlobalInterface: function get_thread_context_manager (line 38) | def get_thread_context_manager() -> GlobalContextManager: function get_interface (line 40) | def get_interface() -> GlobalInterface: FILE: kt-sft/ktransformers/server/utils/multi_timer.py function format_time (line 4) | def format_time(seconds): class Profiler (line 20) | class Profiler: method __init__ (line 21) | def __init__(self): method create_timer (line 25) | def create_timer(self, name): method start_timer (line 32) | def start_timer(self, name): method pause_timer (line 40) | def pause_timer(self, name): method get_timer_sec (line 48) | def get_timer_sec(self, name): method get_all_timers (line 57) | def get_all_timers(self): method report_timer_string (line 63) | def report_timer_string(self, name): method create_and_start_timer (line 66) | def create_and_start_timer(self, name): method inc (line 72) | def inc(self,key:str,delta:int=1): method set_counter (line 75) | def set_counter(self,key:str,to=0): method get_counter (line 78) | def get_counter(self,key:str): FILE: kt-sft/ktransformers/server/utils/sql_utils.py class SQLUtil (line 27) | class SQLUtil(metaclass=Singleton): method __init__ (line 34) | def __init__(self) -> None: method get_db (line 40) | def get_db(self): method init_engine (line 53) | def init_engine(cfg: Config): method create_sqllite_url (line 70) | def create_sqllite_url(cfg): method db_add_commit_refresh (line 89) | def db_add_commit_refresh(self, session: Session, what): method db_merge_commit (line 104) | def db_merge_commit(self, session: Session, what): method db_update_commit_refresh (line 115) | def db_update_commit_refresh(self, session: Session, existing, what): FILE: kt-sft/ktransformers/sft/flops_utils/custom_profile.py function profile_origin (line 77) | def profile_origin(model, inputs, custom_ops=None, verbose=True, report_... function custom_profile (line 162) | def custom_profile( FILE: kt-sft/ktransformers/sft/flops_utils/lora_test_utils.py class ProfilerCallback (line 5) | class ProfilerCallback(TrainerCallback): method __init__ (line 6) | def __init__(self, profiler): method on_step_end (line 9) | def on_step_end(self, args, state, control, **kwargs): function _short (line 12) | def _short(t): function install_shape_probes (line 15) | def install_shape_probes(model): function inspect_device (line 110) | def inspect_device(model, write_file): function print_model_params (line 121) | def print_model_params(model): function print_lora_params (line 165) | def print_lora_params(model): function print_grad_fn (line 188) | def print_grad_fn(grad_fn, indent=0): function forward_hook (line 198) | def forward_hook(module, inputs, output): function check_moe_gradients (line 210) | def check_moe_gradients(model): function disable_all_dropout (line 219) | def disable_all_dropout(module): function verify_lora_layers (line 226) | def verify_lora_layers(model): function print_moe_stats (line 260) | def print_moe_stats(moe_layer: KExpertsTorch): function recursive_traverse (line 276) | def recursive_traverse(model, parent_name=''): function log_step_state (line 289) | def log_step_state( function collect_gradients (line 325) | def collect_gradients(model, input_ids): function report_meta_tensors (line 343) | def report_meta_tensors(model): FILE: kt-sft/ktransformers/sft/lora.py class KAccelerator (line 50) | class KAccelerator(Accelerator): method __init__ (line 51) | def __init__(self, *args, **kwargs): method prepare_model (line 55) | def prepare_model(self, model, *args, **kwargs): method prepare (line 58) | def prepare(self, *args, **kwargs): class KTrainer (line 67) | class KTrainer(Trainer): method save_model (line 68) | def save_model(self, output_dir=None, _internal_call=False): method _move_model_to_device (line 74) | def _move_model_to_device(self, model, device): method _wrap_model (line 78) | def _wrap_model(self, model, training=True, dataloader=None): method create_accelerator_and_postprocess (line 82) | def create_accelerator_and_postprocess(self): method get_train_dataloader (line 210) | def get_train_dataloader(self) -> DataLoader: method training_step (line 257) | def training_step( class SFTJsonListDataset (line 333) | class SFTJsonListDataset(TorchDataset): method __init__ (line 334) | def __init__(self, path: str, tokenizer: AutoTokenizer, max_len: int =... method build_example (line 342) | def build_example(ins: str, inp: str, out: str) -> Dict[str, str]: method __len__ (line 349) | def __len__(self): method __getitem__ (line 352) | def __getitem__(self, idx: int): function lora_and_load_adapter (line 387) | def lora_and_load_adapter(model, tokenizer, sft_data_path, save_adapter_... function inject_lora_layer (line 455) | def inject_lora_layer(model, use_adapter_path): FILE: kt-sft/ktransformers/sft/metrics.py function eval_logit_processor (line 47) | def eval_logit_processor(logits: "torch.Tensor", labels: "torch.Tensor")... class ComputeSimilarity (line 61) | class ComputeSimilarity: method _dump (line 69) | def _dump(self) -> Optional[dict[str, float]]: method __post_init__ (line 78) | def __post_init__(self): method __call__ (line 81) | def __call__(self, eval_preds: "EvalPrediction", compute_result: bool ... FILE: kt-sft/ktransformers/sft/metrics_utils/constants.py class AttentionFunction (line 99) | class AttentionFunction(str, Enum): class EngineName (line 106) | class EngineName(str, Enum): class DownloadSource (line 112) | class DownloadSource(str, Enum): class QuantizationMethod (line 119) | class QuantizationMethod(str, Enum): class RopeScaling (line 132) | class RopeScaling(str, Enum): function register_model_group (line 139) | def register_model_group( FILE: kt-sft/ktransformers/sft/metrics_utils/env.py function print_env (line 33) | def print_env() -> None: FILE: kt-sft/ktransformers/sft/metrics_utils/logging.py class LoggerHandler (line 34) | class LoggerHandler(logging.Handler): method __init__ (line 37) | def __init__(self, output_dir: str) -> None: method _write_log (line 51) | def _write_log(self, log_entry: str) -> None: method emit (line 55) | def emit(self, record) -> None: method close (line 62) | def close(self) -> None: class _Logger (line 67) | class _Logger(logging.Logger): method info_rank0 (line 70) | def info_rank0(self, *args, **kwargs) -> None: method warning_rank0 (line 73) | def warning_rank0(self, *args, **kwargs) -> None: method warning_rank0_once (line 76) | def warning_rank0_once(self, *args, **kwargs) -> None: function _get_default_logging_level (line 80) | def _get_default_logging_level() -> "logging._Level": function _get_library_name (line 92) | def _get_library_name() -> str: function _get_library_root_logger (line 96) | def _get_library_root_logger() -> "_Logger": function _configure_library_root_logger (line 100) | def _configure_library_root_logger() -> None: function get_logger (line 120) | def get_logger(name: Optional[str] = None) -> "_Logger": function add_handler (line 129) | def add_handler(handler: "logging.Handler") -> None: function remove_handler (line 135) | def remove_handler(handler: logging.Handler) -> None: function info_rank0 (line 141) | def info_rank0(self: "logging.Logger", *args, **kwargs) -> None: function warning_rank0 (line 146) | def warning_rank0(self: "logging.Logger", *args, **kwargs) -> None: function warning_rank0_once (line 152) | def warning_rank0_once(self: "logging.Logger", *args, **kwargs) -> None: FILE: kt-sft/ktransformers/sft/metrics_utils/misc.py class AverageMeter (line 57) | class AverageMeter: method __init__ (line 60) | def __init__(self): method reset (line 63) | def reset(self): method update (line 69) | def update(self, val, n=1): function check_version (line 76) | def check_version(requirement: str, mandatory: bool = False) -> None: function check_dependencies (line 95) | def check_dependencies() -> None: function calculate_tps (line 104) | def calculate_tps(dataset: list[dict[str, Any]], metrics: dict[str, floa... function count_parameters (line 117) | def count_parameters(model: "torch.nn.Module") -> tuple[int, int]: function get_current_device (line 144) | def get_current_device() -> "torch.device": function get_device_count (line 160) | def get_device_count() -> int: function get_logits_processor (line 174) | def get_logits_processor() -> "LogitsProcessorList": function get_current_memory (line 181) | def get_current_memory() -> tuple[int, int]: function get_peak_memory (line 195) | def get_peak_memory() -> tuple[int, int]: function has_tokenized_data (line 209) | def has_tokenized_data(path: "os.PathLike") -> bool: function infer_optim_dtype (line 214) | def infer_optim_dtype(model_dtype: Optional["torch.dtype"]) -> "torch.dt... function is_accelerator_available (line 224) | def is_accelerator_available() -> bool: function is_env_enabled (line 231) | def is_env_enabled(env_var: str, default: str = "0") -> bool: function numpify (line 236) | def numpify(inputs: Union["NDArray", "torch.Tensor"]) -> "NDArray": function skip_check_imports (line 248) | def skip_check_imports() -> None: function torch_gc (line 254) | def torch_gc() -> None: function try_download_model_from_other_hub (line 267) | def try_download_model_from_other_hub(model_args: "ModelArguments") -> str: function use_modelscope (line 304) | def use_modelscope() -> bool: function use_openmind (line 308) | def use_openmind() -> bool: function use_ray (line 312) | def use_ray() -> bool: function find_available_port (line 316) | def find_available_port() -> int: function fix_proxy (line 325) | def fix_proxy(ipv6_enabled: bool = False) -> None: FILE: kt-sft/ktransformers/sft/metrics_utils/packages.py function _is_package_available (line 30) | def _is_package_available(name: str) -> bool: function _get_package_version (line 34) | def _get_package_version(name: str) -> "Version": function is_pyav_available (line 41) | def is_pyav_available(): function is_librosa_available (line 45) | def is_librosa_available(): function is_fastapi_available (line 49) | def is_fastapi_available(): function is_galore_available (line 53) | def is_galore_available(): function is_apollo_available (line 57) | def is_apollo_available(): function is_gradio_available (line 61) | def is_gradio_available(): function is_matplotlib_available (line 65) | def is_matplotlib_available(): function is_pillow_available (line 69) | def is_pillow_available(): function is_ray_available (line 73) | def is_ray_available(): function is_requests_available (line 77) | def is_requests_available(): function is_rouge_available (line 81) | def is_rouge_available(): function is_starlette_available (line 85) | def is_starlette_available(): function is_transformers_version_greater_than (line 90) | def is_transformers_version_greater_than(content: str): function is_uvicorn_available (line 94) | def is_uvicorn_available(): function is_vllm_available (line 98) | def is_vllm_available(): function is_sglang_available (line 102) | def is_sglang_available(): FILE: kt-sft/ktransformers/sft/metrics_utils/ploting.py function smooth (line 34) | def smooth(scalars: list[float]) -> list[float]: function gen_loss_plot (line 49) | def gen_loss_plot(trainer_log: list[dict[str, Any]]) -> "matplotlib.figu... function plot_loss (line 69) | def plot_loss(save_dictionary: str, keys: list[str] = ["loss"]) -> None: FILE: kt-sft/ktransformers/sft/monkey_patch_torch_module.py function _patched_module_init (line 7) | def _patched_module_init(self, *args, **kwargs): function install_patch (line 42) | def install_patch(): function restore_patch (line 45) | def restore_patch(): FILE: kt-sft/ktransformers/sft/peft_utils/lora_layer.py function dispatch_default (line 20) | def dispatch_default( class BaseTunerLayer (line 55) | class BaseTunerLayer(ABC): method get_orig_module (line 80) | def get_orig_module(self) -> nn.Module: method weight (line 93) | def weight(self) -> torch.Tensor: method bias (line 109) | def bias(self) -> torch.Tensor: method merge (line 113) | def merge(self, safe_merge: bool = False, adapter_names: Optional[list... method unmerge (line 116) | def unmerge(self) -> None: method merged (line 120) | def merged(self) -> bool: method disable_adapters (line 124) | def disable_adapters(self) -> bool: method active_adapter (line 129) | def active_adapter(self) -> str | list[str]: method _get_available_adapters (line 133) | def _get_available_adapters(self) -> set[str]: method active_adapters (line 144) | def active_adapters(self): method enable_adapters (line 150) | def enable_adapters(self, enabled: bool) -> None: method set_adapter (line 168) | def set_adapter(self, adapter_names: str | list[str]) -> None: method _all_available_adapter_names (line 199) | def _all_available_adapter_names(self) -> list[str]: method delete_adapter (line 210) | def delete_adapter(self, adapter_name: str) -> None: method _move_adapter_to_device_of_orig_module (line 247) | def _move_adapter_to_device_of_orig_module(self, adapter_name: str, de... class LoraLayer (line 283) | class LoraLayer(BaseTunerLayer): method __init__ (line 289) | def __init__(self, orig_module: nn.Module, ephemeral_gpu_offload: bool... method update_layer (line 321) | def update_layer( method reset_lora_parameters (line 364) | def reset_lora_parameters(self, adapter_name, init_lora_weights): method olora_init (line 389) | def olora_init(self, adapter_name): method pissa_init (line 414) | def pissa_init(self, adapter_name, init_lora_weights): method loftq_init (line 449) | def loftq_init(self, adapter_name): method _cache_store (line 470) | def _cache_store(self, key: str, value: Any) -> None: method _cache_pop (line 473) | def _cache_pop(self, key: str) -> Any: method set_scale (line 477) | def set_scale(self, adapter, scale): method scale_layer (line 483) | def scale_layer(self, scale: float) -> None: method unscale_layer (line 493) | def unscale_layer(self, scale=None) -> None: method _check_forward_args (line 503) | def _check_forward_args(self, x, *args, **kwargs): method _mixed_batch_forward (line 530) | def _mixed_batch_forward( class Linear (line 562) | class Linear(nn.Module, LoraLayer): method __init__ (line 564) | def __init__( method merge (line 596) | def merge(self, safe_merge: bool = False, adapter_names: Optional[list... method unmerge (line 682) | def unmerge(self) -> None: method get_delta_weight (line 705) | def get_delta_weight(self, adapter) -> torch.Tensor: method forward (line 739) | def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch... method __repr__ (line 770) | def __repr__(self) -> str: class Embedding (line 775) | class Embedding(nn.Module, LoraLayer): method __init__ (line 777) | def __init__( method update_layer (line 809) | def update_layer( method merge (line 845) | def merge(self, safe_merge: bool = False, adapter_names: Optional[list... method unmerge (line 881) | def unmerge(self) -> None: method get_delta_weight (line 893) | def get_delta_weight(self, adapter) -> torch.Tensor: method _mixed_batch_forward (line 927) | def _mixed_batch_forward( method _embed (line 957) | def _embed(self, input: torch.Tensor, weight: torch.Tensor) -> torch.T... method forward (line 969) | def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch... method __repr__ (line 1009) | def __repr__(self) -> str: class KTransformersLinearLora (line 1013) | class KTransformersLinearLora(KTransformersLinear, LoraLayer): method __init__ (line 1014) | def __init__( method merge (line 1065) | def merge(self, safe_merge: bool = False, adapter_names: Optional[list... method unmerge (line 1117) | def unmerge(self) -> None: method get_delta_weight (line 1137) | def get_delta_weight(self, adapter: str) -> torch.Tensor: method forward (line 1143) | def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch... FILE: kt-sft/ktransformers/sft/peft_utils/lora_model.py class LoraModel (line 40) | class LoraModel(nn.Module, ABC): method __init__ (line 115) | def __init__( method inject_adapter (line 150) | def inject_adapter( method _create_and_replace (line 223) | def _create_and_replace( method _replace_module (line 256) | def _replace_module(self, parent, child_name, new_module, child): method _mark_only_adapters_as_trainable (line 302) | def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None: method _create_new_module (line 324) | def _create_new_module(lora_config, adapter_name, target, parent, **kw... method __getattr__ (line 351) | def __getattr__(self, name: str): method _pre_injection_hook (line 360) | def _pre_injection_hook(self, model: nn.Module, config: PeftConfig, ad... method _set_adapter_layers (line 375) | def _set_adapter_layers(self, enabled: bool = True) -> None: method disable_adapter_layers (line 380) | def disable_adapter_layers(self) -> None: method enable_adapter_layers (line 389) | def enable_adapter_layers(self) -> None: method active_adapters (line 429) | def active_adapters(self) -> list[str]: FILE: kt-sft/ktransformers/sft/peft_utils/mapping.py function get_peft_model (line 11) | def get_peft_model( function inject_adapter_in_model (line 55) | def inject_adapter_in_model( FILE: kt-sft/ktransformers/sft/peft_utils/peft_model.py class PeftModel (line 68) | class PeftModel(PushToHubMixin, torch.nn.Module): method __init__ (line 104) | def __init__( method peft_config (line 147) | def peft_config(self) -> dict[str, PeftConfig]: method active_adapters (line 153) | def active_adapters(self) -> list[str]: method peft_config (line 172) | def peft_config(self, value: dict[str, PeftConfig]): method save_pretrained (line 178) | def save_pretrained( method from_pretrained (line 375) | def from_pretrained( method _setup_prompt_encoder (line 556) | def _setup_prompt_encoder(self, adapter_name: str): method _prepare_model_for_gradient_checkpointing (line 626) | def _prepare_model_for_gradient_checkpointing(self, model: PreTrainedM... method get_prompt_embedding_to_save (line 645) | def get_prompt_embedding_to_save(self, adapter_name: str) -> torch.Ten... method get_prompt (line 664) | def get_prompt(self, batch_size: int, task_ids: Optional[torch.Tensor]... method get_nb_trainable_parameters (line 727) | def get_nb_trainable_parameters(self) -> tuple[int, int]: method print_trainable_parameters (line 757) | def print_trainable_parameters(self) -> None: method __getattr__ (line 774) | def __getattr__(self, name: str): method _enable_peft_forward_hooks (line 784) | def _enable_peft_forward_hooks(self, *args, **kwargs): method forward (line 796) | def forward(self, *args: Any, **kwargs: Any): method generate (line 804) | def generate(self, *args, **kwargs): method _get_base_model_class (line 809) | def _get_base_model_class(self, is_prompt_tuning=False): method disable_adapter (line 818) | def disable_adapter(self): method get_base_model (line 865) | def get_base_model(self) -> torch.nn.Module: method add_adapter (line 875) | def add_adapter(self, adapter_name: str, peft_config: PeftConfig, low_... method set_additional_trainable_modules (line 926) | def set_additional_trainable_modules(self, peft_config, adapter_name): method get_layer_status (line 934) | def get_layer_status(self) -> list[TunerLayerStatus]: method get_model_status (line 964) | def get_model_status(self) -> TunerModelStatus: method _split_kwargs (line 1005) | def _split_kwargs(cls, kwargs: dict[str, Any]): method _update_offload (line 1018) | def _update_offload(self, offload_index: dict[str, dict[str, str]], ad... method _check_new_adapter_config (line 1098) | def _check_new_adapter_config(self, peft_config: PeftConfig, is_traina... method load_adapter (line 1121) | def load_adapter( method set_adapter (line 1268) | def set_adapter(self, adapter_name: str) -> None: method base_model_torch_dtype (line 1295) | def base_model_torch_dtype(self): method active_peft_config (line 1299) | def active_peft_config(self): method create_or_update_model_card (line 1302) | def create_or_update_model_card(self, output_dir: str): class PeftModelForCausalLM (line 1356) | class PeftModelForCausalLM(PeftModel): method __init__ (line 1397) | def __init__( method forward (line 1403) | def forward( method _cpt_forward (line 1498) | def _cpt_forward( method generate (line 1555) | def generate(self, *args, **kwargs): method prepare_inputs_for_generation (line 1576) | def prepare_inputs_for_generation(self, *args, task_ids: Optional[torc... class TunerLayerStatus (line 1651) | class TunerLayerStatus: function get_layer_status (line 1662) | def get_layer_status(model: torch.nn.Module) -> list[TunerLayerStatus]: class TunerModelStatus (line 1765) | class TunerModelStatus: function get_model_status (line 1780) | def get_model_status(model: torch.nn.Module) -> TunerModelStatus: FILE: kt-sft/ktransformers/sft/torchviz_test.py class SimpleNet (line 5) | class SimpleNet(nn.Module): method __init__ (line 6) | def __init__(self): method forward (line 12) | def forward(self, x): FILE: kt-sft/ktransformers/tests/AIME_2024/eval_api.py function generate_text (line 16) | def generate_text(api_url,question , model_name, stream=False, auth_toke... function load_data (line 39) | def load_data(file_path): function get_score (line 52) | def get_score(pred, answer): function run_eval_api (line 72) | def run_eval_api( function main (line 118) | def main(output_path, api_url, model_name, auth_token, format_tabs,probl... FILE: kt-sft/ktransformers/tests/AIME_2024/evaluation.py function filter_answer (line 2) | def filter_answer(completion: str) -> str: FILE: kt-sft/ktransformers/tests/AIME_2024/prompts.py function instruct_prompt (line 1) | def instruct_prompt(prompt: str) -> str: FILE: kt-sft/ktransformers/tests/function_call_test.py function send_messages (line 3) | def send_messages(messages): FILE: kt-sft/ktransformers/tests/humaneval/eval_api.py function generate_text (line 11) | def generate_text(api_url,question , model_name, stream=False, auth_toke... function run_eval_api (line 34) | def run_eval_api( function main (line 80) | def main(output_path, api_url, model_name, auth_token, format_tabs,probl... FILE: kt-sft/ktransformers/tests/humaneval/evaluation.py function filter_code (line 2) | def filter_code(completion: str) -> str: function fix_indents (line 14) | def fix_indents(text: str) -> str: FILE: kt-sft/ktransformers/tests/humaneval/prompts.py function instruct_prompt (line 1) | def instruct_prompt(prompt: str) -> str: function standard_prompt (line 5) | def standard_prompt(prompt: str) -> str: function write_prompt (line 9) | def write_prompt(prompt: str) -> str: function replit_glaive_prompt (line 13) | def replit_glaive_prompt(prompt: str) -> str: FILE: kt-sft/ktransformers/tests/mmlu_pro_test.py class DataEvaluator (line 16) | class DataEvaluator: method __init__ (line 17) | def __init__(self): method load_data (line 21) | def load_data(self, file_path): method get_prompt (line 43) | def get_prompt(self, record): method post_processing (line 53) | def post_processing(self, text): method score (line 62) | def score(self, pred, answers): function generate_text (line 77) | def generate_text(api_url, question, model_name, stream=False): function main (line 101) | def main(concurrent_requests, data_evaluator: DataEvaluator, result_file... FILE: kt-sft/ktransformers/tests/mmlu_test.py class DataEvaluator (line 16) | class DataEvaluator: method __init__ (line 17) | def __init__(self): method load_data (line 21) | def load_data(self, file_path): method get_prompt (line 35) | def get_prompt(self, record): method post_processing (line 45) | def post_processing(self, text): method score (line 54) | def score(self, pred, answers): function generate_text (line 69) | def generate_text(api_url, question, model_name, stream=False): function main (line 93) | def main(concurrent_requests, data_evaluator: DataEvaluator, result_file... FILE: kt-sft/ktransformers/tests/mmlu_test_multi.py function extract_final_answer (line 19) | def extract_final_answer(text): class DataEvaluator (line 57) | class DataEvaluator: method __init__ (line 58) | def __init__(self): method load_data (line 61) | def load_data(self, file_path): method get_prompt (line 72) | def get_prompt(self, record): method post_processing (line 80) | def post_processing(self, text): method score (line 87) | def score(self, pred, answer): function generate_text (line 95) | def generate_text(api_url, question, model_name, stream=False): function main (line 115) | def main(concurrent_requests, data_evaluator: DataEvaluator, result_file... FILE: kt-sft/ktransformers/tests/score.py function wait_for_server (line 7) | def wait_for_server(base_url: str, timeout: int = None) -> None: function enqueue_output (line 63) | def enqueue_output(out, queue): FILE: kt-sft/ktransformers/tests/test_client.py function fetch_event_stream (line 15) | async def fetch_event_stream(session, payload, request_id, stream): function main (line 76) | async def main(prompt_id, model, stream, max_tokens, temperature, top_p): FILE: kt-sft/ktransformers/tests/test_pytorch_q8.py class LinearModel (line 3) | class LinearModel(torch.nn.Module): method __init__ (line 4) | def __init__(self, in_features, out_features): method forward (line 8) | def forward(self, x): FILE: kt-sft/ktransformers/tests/test_speed.py function fetch_event_stream (line 48) | async def fetch_event_stream(session, request_id, prompt, max_tokens, mo... function main (line 137) | async def main(concurrent_requests , prompt, max_tokens, model): FILE: kt-sft/ktransformers/tests/triton_fp8gemm_test.py function test_fp8_gemm_vs_torch_matmul (line 21) | def test_fp8_gemm_vs_torch_matmul(): function test_fp8_gemm_vs_torch_matmul_load (line 48) | def test_fp8_gemm_vs_torch_matmul_load(): function test_fp8_gemm_tplops (line 71) | def test_fp8_gemm_tplops(): FILE: kt-sft/ktransformers/util/cuda_graph_runner.py class CUDAGraphRunner (line 10) | class CUDAGraphRunner: method __init__ (line 12) | def __init__(self): method capture (line 17) | def capture( method forward (line 63) | def forward( method __call__ (line 83) | def __call__(self, *args, **kwargs): FILE: kt-sft/ktransformers/util/custom_gguf.py class GGMLQuantizationType (line 32) | class GGMLQuantizationType(IntEnum): function quant_shape_to_byte_shape (line 97) | def quant_shape_to_byte_shape(shape: Sequence[int], quant_type: GGMLQuan... function read_value (line 170) | def read_value(f, data_type): function dequantize_q2_k (line 218) | def dequantize_q2_k(data): function dequantize_q2_k_gpu (line 255) | def dequantize_q2_k_gpu(data, device:str ="cuda", target_dtype = torch.g... function dequantize_q3_k (line 265) | def dequantize_q3_k(data): function dequantize_q3_k_gpu (line 307) | def dequantize_q3_k_gpu(data, device:str ="cuda", target_dtype = torch.g... function dequantize_q4_k (line 317) | def dequantize_q4_k(data): function dequantize_q4_k_gpu (line 339) | def dequantize_q4_k_gpu(data, device:str ="cuda", target_dtype = torch.g... function dequantize_q5_k (line 349) | def dequantize_q5_k(data): function dequantize_q5_k_gpu (line 405) | def dequantize_q5_k_gpu(data, device:str ="cuda", target_dtype = torch.g... function dequantize_q6_k (line 415) | def dequantize_q6_k(data): function dequantize_q6_k_gpu (line 464) | def dequantize_q6_k_gpu(data: np.ndarray, device:str = "cuda", target_dt... function dequantize_iq4_xs (line 475) | def dequantize_iq4_xs(data): function dequantize_iq4_xs_gpu (line 505) | def dequantize_iq4_xs_gpu(data: np.ndarray, device:str = "cuda", target_... function dequantize_q4_0 (line 514) | def dequantize_q4_0(data): function dequantize_q4_0_gpu (line 529) | def dequantize_q4_0_gpu(data, device:str = "cuda", target_dtype = torch.... function dequantize_q5_0 (line 532) | def dequantize_q5_0(data): function dequantize_q5_0_gpu (line 553) | def dequantize_q5_0_gpu(data, device:str = "cuda", target_dtype = torch.... function dequantize_q8_0 (line 556) | def dequantize_q8_0(data): function dequantize_q8_0_gpu (line 565) | def dequantize_q8_0_gpu(data, device:str = "cuda", target_dtype = torch.... function dequantize_f32 (line 577) | def dequantize_f32(data): function dequantize_f32_gpu (line 580) | def dequantize_f32_gpu(data, device, target_dtype = torch.get_default_dt... function dequantize_f16 (line 587) | def dequantize_f16(data): function dequantize_f16_gpu (line 590) | def dequantize_f16_gpu(data, device, target_dtype = torch.get_default_dt... function dequantize_bf16_gpu (line 597) | def dequantize_bf16_gpu(data, device, target_dtype = torch.get_default_d... function translate_name_to_gguf_mixtral (line 635) | def translate_name_to_gguf_mixtral(name): function translate_name_to_gguf (line 658) | def translate_name_to_gguf(name): function translate_adapter_name_to_gguf (line 704) | def translate_adapter_name_to_gguf(name): FILE: kt-sft/ktransformers/util/custom_loader.py class ModelLoader (line 19) | class ModelLoader(ABC): method has_tensor (line 26) | def has_tensor(cls, name: str): class SafeTensorLoader (line 38) | class SafeTensorLoader(ModelLoader): method __init__ (line 44) | def __init__(self, file_path: str): method __load_tensor_file_map (line 47) | def __load_tensor_file_map(self, file_path: str): method load_tensor (line 86) | def load_tensor(self, key: str, device: str="cpu"): method load_experts (line 100) | def load_experts(self, key: str, device: str="cpu"): method load_gate (line 201) | def load_gate(self, key: str, device: str="cpu"): method close_all_handles (line 228) | def close_all_handles(self): method load_dequantized_tensor (line 233) | def load_dequantized_tensor(self, key:str, device: str="cpu"): method has_tensor (line 251) | def has_tensor(self, name: str): class GGUFLoader (line 254) | class GGUFLoader(ModelLoader): method __init__ (line 260) | def __init__(self, gguf_path: str): method load_gguf (line 296) | def load_gguf(self, f): method get_mmap_tensor (line 378) | def get_mmap_tensor(self, name): method get_undequanted_tensor_and_ggml_type (line 389) | def get_undequanted_tensor_and_ggml_type(self, name): method load_expert_tensor (line 397) | def load_expert_tensor(self, name, data, expert_id, elements_per_exper... method load_gguf_tensor (line 426) | def load_gguf_tensor(self, name: str, device:str = "cpu", target_dtype... method has_tensor (line 491) | def has_tensor(self, name: str): method get_ggml_type (line 495) | def get_ggml_type(self, name: str): class ModelLoaderFactory (line 501) | class ModelLoaderFactory: method create_loader (line 508) | def create_loader(path: str): FILE: kt-sft/ktransformers/util/globals.py class _GlobalConfig (line 3) | class _GlobalConfig: method __init__ (line 4) | def __init__(self): method get (line 9) | def get(self, key, default=None): method set (line 12) | def set(self, key, value): method update (line 15) | def update(self, **kwargs): method all (line 18) | def all(self): method __getitem__ (line 21) | def __getitem__(self, key): method __setitem__ (line 24) | def __setitem__(self, key, value): FILE: kt-sft/ktransformers/util/grad_wrapper.py function maybe_no_grad (line 12) | def maybe_no_grad(_func=None): FILE: kt-sft/ktransformers/util/inference_state.py class InferenceState (line 5) | class InferenceState(enum.Enum): FILE: kt-sft/ktransformers/util/modeling_rope_utils.py function _compute_default_rope_parameters (line 29) | def _compute_default_rope_parameters( function _compute_linear_scaling_rope_parameters (line 71) | def _compute_linear_scaling_rope_parameters( function _compute_dynamic_ntk_parameters (line 112) | def _compute_dynamic_ntk_parameters( function _compute_yarn_parameters (line 163) | def _compute_yarn_parameters( function _compute_longrope_parameters (line 259) | def _compute_longrope_parameters( function _compute_llama3_parameters (line 322) | def _compute_llama3_parameters( function _check_received_keys (line 378) | def _check_received_keys( function _validate_default_rope_parameters (line 407) | def _validate_default_rope_parameters(config: PretrainedConfig, ignore_k... function _validate_linear_scaling_rope_parameters (line 415) | def _validate_linear_scaling_rope_parameters(config: PretrainedConfig, i... function _validate_dynamic_scaling_rope_parameters (line 427) | def _validate_dynamic_scaling_rope_parameters(config: PretrainedConfig, ... function _validate_yarn_parameters (line 441) | def _validate_yarn_parameters(config: PretrainedConfig, ignore_keys: Opt... function _validate_longrope_parameters (line 479) | def _validate_longrope_parameters(config: PretrainedConfig, ignore_keys:... function _validate_llama3_parameters (line 529) | def _validate_llama3_parameters(config: PretrainedConfig, ignore_keys: O... function rope_config_validation (line 576) | def rope_config_validation(config: PretrainedConfig, ignore_keys: Option... FILE: kt-sft/ktransformers/util/textstream.py class TextStreamer (line 2) | class TextStreamer: method __init__ (line 4) | def __init__(self, tokenizer: "AutoTokenizer", skip_prompt: bool = Fal... method reset (line 14) | def reset(self): method put (line 18) | def put(self, value)->Optional[str]: method end (line 49) | def end(self)->Optional[str]: method _is_chinese_char (line 62) | def _is_chinese_char(self, cp): FILE: kt-sft/ktransformers/util/utils.py class NoEosUntil (line 40) | class NoEosUntil(LogitsProcessor): method __init__ (line 41) | def __init__(self, prompt_len: int, min_gen_len: int, eos_ids): method __call__ (line 47) | def __call__(self, input_ids, scores): class SilentCaptureStreamer (line 52) | class SilentCaptureStreamer(TextStreamer): method __init__ (line 53) | def __init__(self, tokenizer: "AutoTokenizer", skip_prompt: bool = Fal... method _append_piece (line 57) | def _append_piece(self, piece: Optional[str]): method put (line 61) | def put(self, value) -> str: method end (line 84) | def end(self) -> str: method getvalue (line 89) | def getvalue(self) -> str: method clear (line 92) | def clear(self): function get_free_ports (line 97) | def get_free_ports(n: int, continue_prot: list): function get_compute_capability (line 113) | def get_compute_capability(device:torch.device = None): function set_module (line 127) | def set_module(model, submodule_key, module): function set_param (line 141) | def set_param(module: nn.Module, name: str, weights: torch.Tensor): function get_device (line 148) | def get_device(gguf_module_key:str, device_map:dict): function get_all_used_cuda_device (line 156) | def get_all_used_cuda_device(device_map:dict): function load_cur_state_dict (line 166) | def load_cur_state_dict(module: nn.Module, gguf_loader: ModelLoader, pre... function sync_all_device (line 254) | def sync_all_device(all_device_list): function xpu_fp16_model (line 265) | def xpu_fp16_model(config): function load_weights (line 277) | def load_weights(module:nn.Module, gguf_loader:ModelLoader, prefix='', d... function tf_logits_warper (line 293) | def tf_logits_warper(generation_config): function prefill_and_generate (line 344) | def prefill_and_generate(model, tokenizer, inputs, max_new_tokens=10000,... function prefill_and_generate_capture (line 527) | def prefill_and_generate_capture( FILE: kt-sft/ktransformers/util/vendors.py class GPUVendor (line 7) | class GPUVendor(IntEnum): class DeviceManager (line 15) | class DeviceManager: method __init__ (line 19) | def __init__(self): method _detect_gpu_vendor (line 23) | def _detect_gpu_vendor(self) -> GPUVendor: method _get_available_devices (line 60) | def _get_available_devices(self) -> List[int]: method get_device_str (line 75) | def get_device_str(self, device_id: Union[int, str]) -> str: method to_torch_device (line 102) | def to_torch_device(self, device_id: Union[int, str] = 0) -> torch.dev... method move_tensor_to_device (line 126) | def move_tensor_to_device(self, tensor: torch.Tensor, device_id: Union... method is_available (line 140) | def is_available(self, index: int = 0) -> bool: method get_all_devices (line 155) | def get_all_devices(self) -> List[int]: function get_device (line 168) | def get_device(device_id: Union[int, str] = 0) -> torch.device: function to_device (line 180) | def to_device(tensor: torch.Tensor, device_id: Union[int, str] = 0) -> t... FILE: kt-sft/ktransformers/util/weight_loader.py class ModelLoader (line 8) | class ModelLoader(ABC): method load_tensor (line 15) | def load_tensor(self, name: str, device: str = "cpu") -> torch.Tensor: method supports_format (line 30) | def supports_format(cls, path: str) -> bool: class SafeTensorLoader (line 43) | class SafeTensorLoader(ModelLoader): method __init__ (line 48) | def __init__(self, path: str): method _load_tensor_file_map (line 59) | def _load_tensor_file_map(self, path: str) -> None: method load_tensor (line 102) | def load_tensor(self, name: str, device: str = "cpu") -> torch.Tensor: method load_dequantized_tensor (line 122) | def load_dequantized_tensor(self, name: str, device: str = "cpu") -> t... method close_all_handles (line 148) | def close_all_handles(self) -> None: method supports_format (line 157) | def supports_format(cls, path: str) -> bool: class GGUFLoader (line 185) | class GGUFLoader(ModelLoader): method __init__ (line 190) | def __init__(self, path: str): method _load_gguf (line 228) | def _load_gguf(self, f) -> None: method _read_value (line 287) | def _read_value(self, f, data_type) -> Any: method load_tensor (line 310) | def load_tensor(self, name: str, device: str = "cpu") -> torch.Tensor: method load_gguf_tensor (line 324) | def load_gguf_tensor(self, name: str, device: str = "cpu", target_dtyp... method supports_format (line 346) | def supports_format(cls, path: str) -> bool: FILE: kt-sft/ktransformers/website/src/api/assistant.ts function filterAndConvert (line 3) | function filterAndConvert( type IAssistantData (line 12) | interface IAssistantData { FILE: kt-sft/ktransformers/website/src/api/run.ts type IRunData (line 4) | interface IRunData { function cancelRun (line 87) | async function cancelRun(threadId: string, runId: string){ FILE: kt-sft/ktransformers/website/src/assets/iconfont/iconfont.js function s (line 1) | function s(){h||(h=!0,e())} function d (line 1) | function d(){try{a.documentElement.doScroll("left")}catch(t){return void... FILE: kt-sft/ktransformers/website/src/conf/config.ts type Window (line 2) | interface Window { FILE: kt-sft/ktransformers/website/src/utils/copy.ts function showCopySuccessMessage (line 75) | function showCopySuccessMessage() { function showCopyErrorMessage (line 93) | function showCopyErrorMessage() { FILE: kt-sft/ktransformers/website/src/utils/types.ts type IAssistant (line 1) | interface IAssistant { type IAssistantWithStatus (line 17) | interface IAssistantWithStatus { type IMessage (line 34) | interface IMessage { type IThread (line 51) | interface IThread { type IRun (line 59) | interface IRun { type IFile (line 88) | interface IFile { type IMessageData (line 97) | interface IMessageData { type IThreadAndMessageAndAssistant (line 104) | interface IThreadAndMessageAndAssistant { type IDeleteResult (line 110) | interface IDeleteResult { type IBuildData (line 115) | interface IBuildData { FILE: kt-sft/merge_tensors/merge_safetensor_gguf.py function read_safetensor_keys_from_folder (line 15) | def read_safetensor_keys_from_folder(folder_path)->dict: function translate_name (line 58) | def translate_name(name:str)->str: function combine_tensor_sources (line 71) | def combine_tensor_sources(safetensor_path:str, gguf_path:str): function write_combined_tensor (line 97) | def write_combined_tensor(target_tensor_map: dict, output_path: str, ggu... function main (line 190) | def main(): FILE: kt-sft/setup.py function _load_pyproject_deps (line 48) | def _load_pyproject_deps(): function _strip_req (line 68) | def _strip_req(reqs, name: str): class CpuInstructInfo (line 87) | class CpuInstructInfo: class VersionInfo (line 97) | class VersionInfo: method get_musa_bare_metal_version (line 105) | def get_musa_bare_metal_version(self, musa_dir): method get_rocm_bare_metal_version (line 115) | def get_rocm_bare_metal_version(self, rocm_dir): method get_cuda_bare_metal_version (line 179) | def get_cuda_bare_metal_version(self, cuda_dir): method get_cuda_version_of_torch (line 188) | def get_cuda_version_of_torch(self): method get_platform (line 193) | def get_platform(self,): method get_cpu_instruct (line 204) | def get_cpu_instruct(self,): method get_torch_version (line 245) | def get_torch_version(self,): method get_flash_version (line 250) | def get_flash_version(self,): method get_package_version (line 259) | def get_package_version(self, full_version=False): class BuildWheelsCommand (line 282) | class BuildWheelsCommand(_bdist_wheel): method get_wheel_name (line 283) | def get_wheel_name(self,): method run (line 293) | def run(self): function colored (line 323) | def colored(text, color=None, bold=False): function split_line (line 335) | def split_line(text: str) -> List[str]: function colored (line 356) | def colored(text, color=None, bold=False): function split_line (line 368) | def split_line(text: str) -> List[str]: function run_command_with_live_tail (line 384) | def run_command_with_live_tail(ext: str, command: List[str], output_line... class CMakeExtension (line 494) | class CMakeExtension(Extension): method __init__ (line 495) | def __init__(self, name: str, sourcedir: str) -> None: function get_cmake_abi_args (line 500) | def get_cmake_abi_args(cmake_args): class CMakeBuild (line 507) | class CMakeBuild(BuildExtension): method build_extension (line 509) | def build_extension(self, ext) -> None: FILE: kt-sft/test_adapter/inspect_adapter.py function load_json (line 20) | def load_json(p: Path): function human_readable (line 25) | def human_readable(num: int) -> str: function inspect_adapter_weights (line 33) | def inspect_adapter_weights(weight_path: Path): function maybe_print_optimizer (line 57) | def maybe_print_optimizer(optimizer_pt: Path, max_keys: int = 20): function maybe_print_scheduler (line 74) | def maybe_print_scheduler(scheduler_pt: Path, max_keys: int = 20): function maybe_print_rng (line 91) | def maybe_print_rng(rng_pth: Path): function dump_tensors (line 105) | def dump_tensors(state: dict, out_dir="tensor_dump"): function main (line 125) | def main(): FILE: kt-sft/test_adapter/pred2metrics.py function load_pred_ref (line 8) | def load_pred_ref(pred_file: Path): function main (line 16) | def main(): FILE: kt-sft/withoutKT_PEFT.py function preprocess_function (line 21) | def preprocess_function(examples): function print_model_with_params (line 31) | def print_model_with_params(model, prefix="", max_layers=3, max_params=5): class KTrainer (line 133) | class KTrainer(Trainer): method save_model (line 134) | def save_model(self, output_dir=None, _internal_call=False): FILE: third_party/llamafile/micros.h function GetQueryPerformanceFrequency (line 19) | static long long GetQueryPerformanceFrequency() { function GetQueryPerformanceCounter (line 24) | static long long GetQueryPerformanceCounter() { function micros (line 31) | static long long micros(void) { FILE: third_party/llamafile/numba.h function rand32 (line 8) | inline int rand32(void) { function popcount (line 15) | inline int popcount(unsigned x) { function hamming (line 23) | inline int hamming(int x, int y) { function float01 (line 27) | inline float float01(unsigned x) { // (0,1) function numba (line 31) | inline float numba(void) { // (-10,10) FILE: third_party/llamafile/sgemm.cpp type GemmFuncs (line 32) | struct GemmFuncs { type ggml_compute_params (line 34) | struct ggml_compute_params type ggml_tensor (line 34) | struct ggml_tensor type ggml_tensor (line 34) | struct ggml_tensor type ggml_tensor (line 34) | struct ggml_tensor type ggml_tensor (line 34) | struct ggml_tensor method GemmFuncs (line 39) | GemmFuncs() { function llamafile_sgemm (line 190) | bool llamafile_sgemm(long m, long n, long k, const void* A, long lda, co... function llamafile_mixmul (line 198) | bool llamafile_mixmul(const ggml_compute_params* params, const ggml_tens... function llamafile_mixmul_iqk (line 202) | bool llamafile_mixmul_iqk(long Nx, long Ny, long ne00, int ne11, int typ... FILE: third_party/llamafile/sgemm.h type ggml_tensor (line 13) | struct ggml_tensor type ggml_compute_params (line 14) | struct ggml_compute_params type ggml_compute_params (line 27) | struct ggml_compute_params type ggml_tensor (line 27) | struct ggml_tensor type ggml_tensor (line 27) | struct ggml_tensor type ggml_tensor (line 27) | struct ggml_tensor type ggml_tensor (line 27) | struct ggml_tensor type ggml_tensor (line 28) | struct ggml_tensor type ggml_tensor (line 28) | struct ggml_tensor type ggml_tensor (line 28) | struct ggml_tensor type ggml_compute_params (line 40) | struct ggml_compute_params type ggml_tensor (line 40) | struct ggml_tensor type ggml_tensor (line 40) | struct ggml_tensor type ggml_tensor (line 40) | struct ggml_tensor type ggml_tensor (line 40) | struct ggml_tensor type ggml_compute_params (line 41) | struct ggml_compute_params type ggml_tensor (line 41) | struct ggml_tensor type ggml_tensor (line 41) | struct ggml_tensor type ggml_tensor (line 41) | struct ggml_tensor type ggml_tensor (line 41) | struct ggml_tensor type ggml_compute_params (line 42) | struct ggml_compute_params type ggml_tensor (line 42) | struct ggml_tensor type ggml_tensor (line 42) | struct ggml_tensor type ggml_tensor (line 42) | struct ggml_tensor type ggml_tensor (line 42) | struct ggml_tensor type ggml_compute_params (line 43) | struct ggml_compute_params type ggml_tensor (line 43) | struct ggml_tensor type ggml_tensor (line 43) | struct ggml_tensor type ggml_tensor (line 43) | struct ggml_tensor type ggml_tensor (line 43) | struct ggml_tensor type ggml_compute_params (line 44) | struct ggml_compute_params type ggml_tensor (line 44) | struct ggml_tensor type ggml_tensor (line 44) | struct ggml_tensor type ggml_tensor (line 44) | struct ggml_tensor type ggml_tensor (line 44) | struct ggml_tensor type ggml_compute_params (line 45) | struct ggml_compute_params type ggml_tensor (line 45) | struct ggml_tensor type ggml_tensor (line 45) | struct ggml_tensor type ggml_tensor (line 45) | struct ggml_tensor type ggml_tensor (line 45) | struct ggml_tensor type ggml_compute_params (line 46) | struct ggml_compute_params type ggml_tensor (line 46) | struct ggml_tensor type ggml_tensor (line 46) | struct ggml_tensor type ggml_tensor (line 46) | struct ggml_tensor type ggml_tensor (line 46) | struct ggml_tensor type ggml_compute_params (line 47) | struct ggml_compute_params type ggml_tensor (line 47) | struct ggml_tensor type ggml_tensor (line 47) | struct ggml_tensor type ggml_tensor (line 47) | struct ggml_tensor type ggml_tensor (line 47) | struct ggml_tensor type ggml_compute_params (line 48) | struct ggml_compute_params type ggml_tensor (line 48) | struct ggml_tensor type ggml_tensor (line 48) | struct ggml_tensor type ggml_tensor (line 48) | struct ggml_tensor type ggml_tensor (line 48) | struct ggml_tensor type ggml_compute_params (line 64) | struct ggml_compute_params type ggml_tensor (line 64) | struct ggml_tensor type ggml_tensor (line 64) | struct ggml_tensor type ggml_tensor (line 64) | struct ggml_tensor type ggml_tensor (line 64) | struct ggml_tensor type ggml_tensor (line 65) | struct ggml_tensor type ggml_tensor (line 65) | struct ggml_tensor type ggml_tensor (line 65) | struct ggml_tensor type ggml_compute_params (line 77) | struct ggml_compute_params type ggml_tensor (line 77) | struct ggml_tensor type ggml_tensor (line 77) | struct ggml_tensor type ggml_tensor (line 77) | struct ggml_tensor type ggml_tensor (line 77) | struct ggml_tensor type ggml_compute_params (line 78) | struct ggml_compute_params type ggml_tensor (line 78) | struct ggml_tensor type ggml_tensor (line 78) | struct ggml_tensor type ggml_tensor (line 78) | struct ggml_tensor type ggml_tensor (line 78) | struct ggml_tensor type ggml_compute_params (line 79) | struct ggml_compute_params type ggml_tensor (line 79) | struct ggml_tensor type ggml_tensor (line 79) | struct ggml_tensor type ggml_tensor (line 79) | struct ggml_tensor type ggml_tensor (line 79) | struct ggml_tensor type ggml_compute_params (line 80) | struct ggml_compute_params type ggml_tensor (line 80) | struct ggml_tensor type ggml_tensor (line 80) | struct ggml_tensor type ggml_tensor (line 80) | struct ggml_tensor type ggml_tensor (line 80) | struct ggml_tensor type ggml_compute_params (line 81) | struct ggml_compute_params type ggml_tensor (line 81) | struct ggml_tensor type ggml_tensor (line 81) | struct ggml_tensor type ggml_tensor (line 81) | struct ggml_tensor type ggml_tensor (line 81) | struct ggml_tensor type ggml_compute_params (line 82) | struct ggml_compute_params type ggml_tensor (line 82) | struct ggml_tensor type ggml_tensor (line 82) | struct ggml_tensor type ggml_tensor (line 82) | struct ggml_tensor type ggml_tensor (line 82) | struct ggml_tensor type ggml_compute_params (line 83) | struct ggml_compute_params type ggml_tensor (line 83) | struct ggml_tensor type ggml_tensor (line 83) | struct ggml_tensor type ggml_tensor (line 83) | struct ggml_tensor type ggml_tensor (line 83) | struct ggml_tensor type ggml_compute_params (line 84) | struct ggml_compute_params type ggml_tensor (line 84) | struct ggml_tensor type ggml_tensor (line 84) | struct ggml_tensor type ggml_tensor (line 84) | struct ggml_tensor type ggml_tensor (line 84) | struct ggml_tensor type ggml_compute_params (line 85) | struct ggml_compute_params type ggml_tensor (line 85) | struct ggml_tensor type ggml_tensor (line 85) | struct ggml_tensor type ggml_tensor (line 85) | struct ggml_tensor type ggml_tensor (line 85) | struct ggml_tensor FILE: third_party/llamafile/tinyblas_cpu.h function tinyBLAS_not_supported (line 85) | bool tinyBLAS_not_supported(const char* file, int line) { function unhalf (line 90) | inline float unhalf(ggml_fp16_t d) { function unhalf (line 93) | inline float unhalf(ggml_bf16_t d) { function float (line 112) | struct ggml_type_trait { function ggml_bf16_t (line 116) | struct ggml_type_trait { function ggml_fp16_t (line 120) | struct ggml_type_trait { function block_q8_0 (line 124) | struct ggml_type_trait { function __m128 (line 132) | inline __m128 add(__m128 x, __m128 y) { function __m128 (line 135) | inline __m128 sub(__m128 x, __m128 y) { function __m128 (line 138) | inline __m128 mul(__m128 x, __m128 y) { function __m256 (line 144) | inline __m256 add(__m256 x, __m256 y) { function __m256 (line 147) | inline __m256 sub(__m256 x, __m256 y) { function __m256 (line 150) | inline __m256 mul(__m256 x, __m256 y) { function __m512 (line 156) | inline __m512 add(__m512 x, __m512 y) { function __m512 (line 159) | inline __m512 sub(__m512 x, __m512 y) { function __m512 (line 162) | inline __m512 mul(__m512 x, __m512 y) { function float32x4_t (line 168) | inline float32x4_t add(float32x4_t x, float32x4_t y) { function float32x4_t (line 171) | inline float32x4_t sub(float32x4_t x, float32x4_t y) { function float32x4_t (line 174) | inline float32x4_t mul(float32x4_t x, float32x4_t y) { function float16x8_t (line 180) | inline float16x8_t add(float16x8_t x, float16x8_t y) { function float16x8_t (line 183) | inline float16x8_t sub(float16x8_t x, float16x8_t y) { function float16x8_t (line 186) | inline float16x8_t mul(float16x8_t x, float16x8_t y) { function U (line 198) | U madd(T a, T b, U c) { function U (line 210) | U madder(T a, T b, U c, U* e) { function float32x4_t (line 218) | inline float32x4_t badder(float32x4_t a, float b, float32x4_t c, float32... function __m256 (line 229) | inline __m256 madd(__m256 a, __m256 b, __m256 c) { function __m512 (line 235) | inline __m512 madd(__m512 a, __m512 b, __m512 c) { function float32x4_t (line 243) | inline float32x4_t madd(float32x4_t a, float32x4_t b, float32x4_t c) { function float16x8_t (line 249) | inline float16x8_t madd(float16x8_t a, float16x8_t b, float16x8_t c) { function __m512 (line 258) | inline __m512 madd(__m512bh x, __m512bh y, __m512 z) { function __m512 (line 262) | inline __m512 madder(__m512bh x, __m512bh y, __m512 z, __m512* _) { function hsum (line 271) | inline float hsum(float32x4_t x) { function hsum (line 277) | inline float hsum(float16x8_t x) { function hsum (line 284) | inline float hsum(__m128 x) { function hsum (line 300) | inline float hsum(__m256 x) { function hsum (line 306) | inline float hsum(__m512 x) { function load (line 318) | inline float load(const float* p) { function load (line 322) | inline float load(const ggml_fp16_t* p) { function load (line 326) | inline float load(const ggml_bf16_t* p) { function float32x4_t (line 332) | inline float32x4_t load(const float* p) { function float32x4_t (line 336) | inline float32x4_t load(const ggml_bf16_t* p) { function float16x8_t (line 341) | inline float16x8_t load(const ggml_fp16_t* p) { function float32x4_t (line 345) | inline float32x4_t load(const ggml_fp16_t* p) { function __m128 (line 353) | inline __m128 load(const float* p) { function __m256 (line 360) | inline __m256 load(const float* p) { function __m256 (line 367) | inline __m256 load(const ggml_bf16_t* p) { function __m256 (line 375) | inline __m256 load(const ggml_fp16_t* p) { function __m512 (line 382) | inline __m512 load(const float* p) { function __m512 (line 386) | inline __m512 load(const ggml_fp16_t* p) { function __m512 (line 390) | inline __m512 load(const ggml_bf16_t* p) { function __m512bh (line 398) | inline __m512bh load(const ggml_bf16_t* p) { function __m512bh (line 402) | inline __m512bh load(const float* p) { function store (line 410) | inline void store(float* p, float f) { function store (line 414) | inline void store(ggml_fp16_t* p, float f) { function store (line 418) | inline void store(ggml_bf16_t* p, float f) { function gemm (line 616) | void gemm(long m0, long m, long n0, long n) { function gemm (line 759) | void gemm(long m0, long m, long n0, long n) { function int8x16_t (line 797) | inline int8x16_t load_lo(const block_q8_0* b) { function int8x16_t (line 801) | inline int8x16_t load_hi(const block_q8_0* b) { function int8x16_t (line 805) | inline int8x16_t load_lo(const block_q4_0* b) { function int8x16_t (line 810) | inline int8x16_t load_hi(const block_q4_0* b) { function gemm (line 982) | void gemm(long m0, long m, long n0, long n) { function __m256i (line 1020) | inline __m256i load(const block_q8_0* b) { function __m256i (line 1024) | inline __m256i load(const block_q4_0* b) { function __m256 (line 1032) | inline __m256 updot(__m256i u, __m256i s) { FILE: third_party/llamafile/tinyblas_cpu_mixmul_amd_avx.cpp function llamafile_mixmul_needs (line 13) | size_t llamafile_mixmul_needs(const ggml_tensor* weights, const ggml_ten... FILE: third_party/llamafile/tinyblas_cpu_mixmul_arm80.cpp function llamafile_mixmul_needs (line 13) | size_t llamafile_mixmul_needs(const ggml_tensor* weights, const ggml_ten... FILE: third_party/llamafile/tinyblas_cpu_unsupported.cpp function llamafile_sgemm_unsupported (line 25) | bool llamafile_sgemm_unsupported(long m, long n, long k, const void* A, ... function llamafile_mixmul_unsupported (line 29) | bool llamafile_mixmul_unsupported(const struct ggml_compute_params* params, function iqk_mul_mat_moe_unsupported (line 37) | bool iqk_mul_mat_moe_unsupported(long, long, long, int, int, const void*...