SYMBOL INDEX (7608 symbols across 621 files)

FILE: archive/csrc/custom_marlin/binding.cpp
  function PYBIND11_MODULE (line 20) | PYBIND11_MODULE(vLLMMarlin, m) {

FILE: archive/csrc/custom_marlin/test_cuda_graph.py
  function setup_seed (line 14) | def setup_seed(seed):
  function get_usable_mem (line 33) | def get_usable_mem():
  function exp_range (line 42) | def exp_range(start, stop, step = 2):
  function timing (line 48) | def timing(func, iters, epochs=100):
  class LinearMarlin (line 88) | class LinearMarlin(nn.Linear):
    method __init__ (line 94) | def __init__(
    method forward (line 168) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor) -> torch....
  function benchLinearMarlin (line 208) | def benchLinearMarlin(input_dim, output_dim):#, out_file
  function printMinMax (line 314) | def printMinMax(tensor):

FILE: archive/csrc/custom_marlin/utils/format24.py
  function _calculate_meta_reordering_scatter_offsets (line 21) | def _calculate_meta_reordering_scatter_offsets(m, meta_ncols, meta_dtype,
  function sparse_semi_structured_from_dense_cutlass (line 52) | def sparse_semi_structured_from_dense_cutlass(dense):
  function sparse_semi_structured_to_dense_cutlass (line 184) | def sparse_semi_structured_to_dense_cutlass(sparse, meta_reordered):
  function mask_creator (line 279) | def mask_creator(tensor):

FILE: archive/csrc/custom_marlin/utils/marlin_24_perms.py
  function get_perms_24 (line 21) | def get_perms_24(num_bits: int):

FILE: archive/csrc/custom_marlin/utils/marlin_perms.py
  function get_perms (line 21) | def get_perms(num_bits: int):

FILE: archive/csrc/custom_marlin/utils/marlin_utils.py
  function is_marlin_supported (line 31) | def is_marlin_supported():
  function marlin_permute_weights (line 35) | def marlin_permute_weights(q_w, size_k, size_n, perm, tile=MARLIN_TILE):
  function marlin_weights (line 50) | def marlin_weights(q_w, size_k, size_n, num_bits, perm):
  function marlin_permute_scales (line 70) | def marlin_permute_scales(s, size_k, size_n, group_size, scale_perm,
  function marlin_quantize (line 81) | def marlin_quantize(
  function inject_24 (line 119) | def inject_24(w, size_k, size_n):
  function check_24 (line 127) | def check_24(w, num_rows_to_sample=50, _verbose=False):
  function compress_quantized_24_weight (line 154) | def compress_quantized_24_weight(q_24, size_k, size_n, num_bits):
  function marlin_24_quantize (line 177) | def marlin_24_quantize(
  function compute_max_diff (line 218) | def compute_max_diff(output, output_ref):
  class MarlinWorkspace (line 223) | class MarlinWorkspace:
    method __init__ (line 225) | def __init__(self, out_features, min_thread_n, max_parallel, device):

FILE: archive/csrc/custom_marlin/utils/quant_utils.py
  function get_pack_factor (line 9) | def get_pack_factor(num_bits):
  function permute_rows (line 14) | def permute_rows(q_w: torch.Tensor, w_ref: torch.Tensor, group_size: int):
  function dequantize_weights (line 40) | def dequantize_weights(qweight, qzeros, scales, g_idx, bits=4, group_siz...
  function quantize_weights (line 67) | def quantize_weights(w: torch.Tensor, num_bits: int, group_size: int,
  function sort_weights (line 137) | def sort_weights(q_w: torch.Tensor, g_idx: torch.Tensor):
  function gptq_pack (line 153) | def gptq_pack(
  function gptq_unpack (line 176) | def gptq_unpack(

FILE: archive/csrc/ktransformers_ext/bench/bench_attention.py
  function bench_linear (line 41) | def bench_linear(cache_seqlen: int):

FILE: archive/csrc/ktransformers_ext/bench/bench_attention_torch.py
  function bench_linear (line 29) | def bench_linear(cache_seqlen: int, device):

FILE: archive/csrc/ktransformers_ext/bench/bench_linear.py
  function bench_linear (line 28) | def bench_linear(quant_mode: str):

FILE: archive/csrc/ktransformers_ext/bench/bench_linear_torch.py
  function bench_linear (line 26) | def bench_linear(quant_mode: str):

FILE: archive/csrc/ktransformers_ext/bench/bench_mlp.py
  function bench_mlp (line 28) | def bench_mlp(quant_mode: str):

FILE: archive/csrc/ktransformers_ext/bench/bench_mlp_torch.py
  function act_fn (line 26) | def act_fn(x):
  function mlp_torch (line 29) | def mlp_torch(input, gate_proj, up_proj, down_proj):
  function bench_mlp (line 47) | def bench_mlp(quant_mode: str):

FILE: archive/csrc/ktransformers_ext/bench/bench_moe.py
  function bench_moe (line 31) | def bench_moe(quant_mode: str):

FILE: archive/csrc/ktransformers_ext/bench/bench_moe_amx.py
  function bench_moe (line 29) | def bench_moe(quant_mode: str):

FILE: archive/csrc/ktransformers_ext/bench/bench_moe_torch.py
  function act_fn (line 28) | def act_fn(x):
  function mlp_torch (line 31) | def mlp_torch(input, gate_proj, up_proj, down_proj):
  function moe_torch (line 49) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj):
  function bench_moe (line 80) | def bench_moe(quant_mode: str):

FILE: archive/csrc/ktransformers_ext/cpu_backend/backend.cpp
  type bitmask (line 93) | struct bitmask

FILE: archive/csrc/ktransformers_ext/cpu_backend/backend.h
  type ThreadStatus (line 21) | enum ThreadStatus {
  type ThreadState (line 27) | struct ThreadState {
  function class (line 33) | class Backend {

FILE: archive/csrc/ktransformers_ext/cpu_backend/cpuinfer.h
  function class (line 36) | class CPUInfer {
  function submit (line 58) | void submit(std::pair<intptr_t, intptr_t> params) {
  function sync (line 65) | void sync() {
  function submit_with_cuda_stream (line 69) | void submit_with_cuda_stream(intptr_t user_cuda_stream, std::pair<intptr...
  function sync_ (line 80) | static void sync_(void* cpu_infer_ptr) {
  function sync_with_cuda_stream (line 85) | void sync_with_cuda_stream(intptr_t user_cuda_stream) {

FILE: archive/csrc/ktransformers_ext/cpu_backend/shared_mem_buffer.h
  function class (line 19) | class SharedMemBuffer {

FILE: archive/csrc/ktransformers_ext/cpu_backend/task_queue.h
  function class (line 24) | class custom_mutex {
  function class (line 74) | class custom_condition_variable {
  function class (line 119) | class TaskQueue {

FILE: archive/csrc/ktransformers_ext/cpu_backend/vendors/hip.h
  type hip_bfloat16 (line 172) | typedef hip_bfloat16 nv_bfloat16;

FILE: archive/csrc/ktransformers_ext/cpu_backend/vendors/musa.h
  type mt_bfloat16 (line 137) | typedef mt_bfloat16 nv_bfloat16;

FILE: archive/csrc/ktransformers_ext/cuda/binding.cpp
  function PYBIND11_MODULE (line 21) | PYBIND11_MODULE(KTransformersOps, m) {

FILE: archive/csrc/ktransformers_ext/examples/test_mlp.py
  function act_fn (line 31) | def act_fn(x):
  function mlp_torch (line 34) | def mlp_torch(input, gate_proj, up_proj, down_proj):

FILE: archive/csrc/ktransformers_ext/examples/test_moe.py
  function act_fn (line 34) | def act_fn(x):
  function mlp_torch (line 37) | def mlp_torch(input, gate_proj, up_proj, down_proj):
  function moe_torch (line 44) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj):

FILE: archive/csrc/ktransformers_ext/ext_bindings.cpp
  class KVCacheBindings (line 37) | class KVCacheBindings {
    class AttnBindings (line 39) | class AttnBindings {
      type Args (line 41) | struct Args {
      method inner (line 58) | static void inner(void *args) {
      method cpuinfer_interface (line 67) | static std::pair<intptr_t, intptr_t>
    class GetAllKVCacheOneLayerBindings (line 93) | class GetAllKVCacheOneLayerBindings {
      type Args (line 95) | struct Args {
      method inner (line 102) | static void inner(void *args) {
      method cpuinfer_interface (line 108) | static std::pair<intptr_t, intptr_t>
    class GetAndUpdateKVCacheFp16Bindings (line 117) | class GetAndUpdateKVCacheFp16Bindings {
      type Args (line 119) | struct Args {
      method inner (line 131) | static void inner(void *args) {
      method cpuinfer_interface (line 139) | static std::pair<intptr_t, intptr_t>
    class GetKVCacheFp16Bindings (line 157) | class GetKVCacheFp16Bindings {
      type Args (line 159) | struct Args {
      method inner (line 170) | static void inner(void *args) {
      method cpuinfer_interface (line 177) | static std::pair<intptr_t, intptr_t>
    class UpdateKVCacheFp16Bindings (line 194) | class UpdateKVCacheFp16Bindings {
      type Args (line 196) | struct Args {
      method inner (line 208) | static void inner(void *args) {
      method cpuinfer_interface (line 216) | static std::pair<intptr_t, intptr_t>
    class UpdateImportanceBindings (line 235) | class UpdateImportanceBindings {
      type Args (line 237) | struct Args {
      method inner (line 248) | static void inner(void *args) {
      method cpuinfer_interface (line 255) | static std::pair<intptr_t, intptr_t>
    class AttnWithKVCacheBindings (line 272) | class AttnWithKVCacheBindings {
      type Args (line 274) | struct Args {
      method inner (line 292) | static void inner(void *args) {
      method cpuinfer_interface (line 301) | static std::pair<intptr_t, intptr_t>
    class ClearImportanceAllLayersBindings (line 328) | class ClearImportanceAllLayersBindings {
      type Args (line 330) | struct Args {
      method inner (line 338) | static void inner(void *args) {
      method cpuinfer_interface (line 345) | static std::pair<intptr_t, intptr_t>
    class CalcAnchorAllLayersBindinds (line 359) | class CalcAnchorAllLayersBindinds {
      type Args (line 361) | struct Args {
      method inner (line 369) | static void inner(void *args) {
      method cpuinfer_interface (line 376) | static std::pair<intptr_t, intptr_t>
    class LoadKVCacheBindings (line 390) | class LoadKVCacheBindings {
      type Args (line 392) | struct Args {
      method inner (line 397) | static void inner(void *args) {
      method cpuinfer_interface (line 402) | static std::pair<intptr_t, intptr_t>
    class DumpKVCacheBindings (line 409) | class DumpKVCacheBindings {
      type Args (line 411) | struct Args {
      method inner (line 418) | static void inner(void *args) {
      method cpuinfer_interface (line 424) | static std::pair<intptr_t, intptr_t>
  class LinearBindings (line 435) | class LinearBindings {
    class WarmUpBindinds (line 437) | class WarmUpBindinds {
      type Args (line 439) | struct Args {
      method inner (line 443) | static void inner(void *args) {
      method cpuinfer_interface (line 447) | static std::pair<intptr_t, intptr_t>
    class ForwardBindings (line 453) | class ForwardBindings {
      type Args (line 455) | struct Args {
      method inner (line 462) | static void inner(void *args) {
      method cpuinfer_interface (line 467) | static std::pair<intptr_t, intptr_t>
  class MLPBindings (line 477) | class MLPBindings {
    class WarmUpBindinds (line 479) | class WarmUpBindinds {
      type Args (line 481) | struct Args {
      method inner (line 485) | static void inner(void *args) {
      method cpuinfer_interface (line 489) | static std::pair<intptr_t, intptr_t> cpuinfer_interface(MLP &mlp) {
    class ForwardBindings (line 494) | class ForwardBindings {
      type Args (line 496) | struct Args {
      method inner (line 503) | static void inner(void *args) {
      method cpuinfer_interface (line 508) | static std::pair<intptr_t, intptr_t>
  class MOEBindings (line 518) | class MOEBindings {
    class WarmUpBindinds (line 520) | class WarmUpBindinds {
      type Args (line 522) | struct Args {
      method inner (line 526) | static void inner(void *args) {
      method cpuinfer_interface (line 530) | static std::pair<intptr_t, intptr_t> cpuinfer_interface(MOE &moe) {
    class ForwardBindings (line 535) | class ForwardBindings {
      type Args (line 537) | struct Args {
      method inner (line 548) | static void inner(void *args) {
      method cpuinfer_interface (line 554) | static std::pair<intptr_t, intptr_t>
  class AMX_MOEBindings (line 574) | class AMX_MOEBindings {
    class WarmUpBindings (line 576) | class WarmUpBindings {
      type Args (line 578) | struct Args {
      method inner (line 582) | static void inner(void *args) {
      method cpuinfer_interface (line 586) | static std::pair<intptr_t, intptr_t> cpuinfer_interface(AMX_MOE<T> &...
    class LoadWeightsBindings (line 591) | class LoadWeightsBindings {
      type Args (line 593) | struct Args {
      method inner (line 597) | static void inner(void *args) {
      method cpuinfer_interface (line 601) | static std::pair<intptr_t, intptr_t> cpuinfer_interface(AMX_MOE<T> &...
    class ForwardBindings (line 606) | class ForwardBindings {
      type Args (line 608) | struct Args {
      method inner (line 619) | static void inner(void *args) {
      method cpuinfer_interface (line 625) | static std::pair<intptr_t, intptr_t>
  function PYBIND11_MODULE (line 643) | PYBIND11_MODULE(cpuinfer_ext, m) {

FILE: archive/csrc/ktransformers_ext/operators/amx/la/amx.hpp
  type amx (line 41) | namespace amx {
    function enable_amx (line 63) | inline bool enable_amx() {
    type TileConfig (line 80) | struct alignas(64) TileConfig {
      method TileConfig (line 89) | TileConfig() {
      method set_row_col (line 97) | void set_row_col(int i, uint8_t row, uint16_t col) {
      method set_config (line 102) | void set_config() { _tile_loadconfig(this); }
      method load_data (line 104) | static void load_data(int to, void *from, size_t stride) {
      method store_data (line 135) | static void store_data(int from, void *to, size_t stride) {
    function debug_tile (line 169) | inline void debug_tile(int t) {
    function debug_tiles (line 182) | inline void debug_tiles(int to = 8) {
    function debug_m512 (line 188) | inline void debug_m512(__m512 x) {
    function transpose_16x16_32bit (line 198) | inline void transpose_16x16_32bit(__m512i *v) {
    function transpose_16x16_32bit (line 273) | inline void transpose_16x16_32bit(__m512i *v, size_t stride) {
    type GemmKernel224BF (line 348) | struct GemmKernel224BF {
      method recommended_nth (line 363) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO...
      method split_range_n (line 365) | static std::pair<int, int> split_range_n(int n, int ith, int nth) {
      method config (line 371) | static void config() {
      method load_a (line 390) | static void load_a(dt *a, size_t lda) {
      method load_b (line 395) | static void load_b(dt *b, size_t ldb) {
      method clean_c (line 400) | static void clean_c() {
      method load_c (line 407) | static void load_c(output_t *c, size_t ldc) {
      method store_c (line 414) | static void store_c(output_t *c, size_t ldc) {
      method run_tile (line 421) | static void run_tile() {
      type BufferA (line 428) | struct BufferA {
        method required_size (line 432) | static size_t required_size(int max_m, int k) { return max_m * k *...
        method BufferA (line 434) | BufferA(int max_m, int k, void *ptr) : max_m(max_m), k(k) {
        method from_mat (line 441) | void from_mat(int m, ggml_bf16_t *src, int ith, int nth) {
        method ggml_bf16_t (line 460) | ggml_bf16_t *get_submat(int m, int k, int m_begin, int k_begin) {
      type BufferB (line 469) | struct BufferB {
        method required_size (line 473) | static size_t required_size(int n, int k) { return n * k * sizeof(...
        method BufferB (line 475) | BufferB(int n, int k, void *ptr) : n(n), k(k) {
        method from_mat (line 482) | void from_mat(ggml_bf16_t *src, int ith, int nth) {
        method ggml_bf16_t (line 505) | ggml_bf16_t *get_submat(int n, int k, int n_begin, int k_begin) {
      type BufferC (line 516) | struct BufferC {
        method required_size (line 520) | static size_t required_size(int max_m, int n) { return max_m * n *...
        method BufferC (line 522) | BufferC(int max_m, int n, void *ptr) : max_m(max_m), n(n) {
        method to_mat (line 529) | void to_mat(int m, ggml_bf16_t *dst, int ith, int nth) {
    type GemmKernel224Int8 (line 558) | struct GemmKernel224Int8 {
      method recommended_nth (line 573) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO...
      method split_range_n (line 575) | static std::pair<int, int> split_range_n(int n, int ith, int nth) {
      method config (line 581) | static void config() {
      method load_a (line 600) | static void load_a(dt *a, size_t lda) {
      method load_b (line 605) | static void load_b(dt *b, size_t ldb) {
      method clean_c (line 610) | static void clean_c() {
      method load_c (line 617) | static void load_c(output_t *c, size_t ldc) {
      method store_c (line 624) | static void store_c(output_t *c, size_t ldc) {
      method run_tile (line 631) | static void run_tile() {
      type BufferA (line 638) | struct BufferA {
        method required_size (line 643) | static size_t required_size(int max_m, int k) { return max_m * k *...
        method BufferA (line 645) | BufferA(int max_m, int k, void *ptr) : max_m(max_m), k(k) {
        method from_mat (line 653) | void from_mat(int m, ggml_bf16_t *src, int ith, int nth) {
      type BufferB (line 708) | struct BufferB {
        method required_size (line 713) | static size_t required_size(int n, int k) { return n * k * sizeof(...
        method BufferB (line 715) | BufferB(int n, int k, void *ptr) : n(n), k(k) {
        method from_mat (line 723) | void from_mat(ggml_bf16_t *src, int ith, int nth) {
      type BufferC (line 787) | struct BufferC {
        method required_size (line 791) | static size_t required_size(int max_m, int n) { return max_m * n *...
        method BufferC (line 793) | BufferC(int max_m, int n, void *ptr) : max_m(max_m), n(n) {
        method to_mat (line 800) | void to_mat(int m, ggml_bf16_t *dst, int ith, int nth) {
    function mat_mul (line 829) | inline void mat_mul(int m, int n, int k, std::shared_ptr<GemmKernel224...
    function __m512i (line 883) | inline __m512i _mm512_dpbssd_epi32(__m512i src, __m512i a, __m512i b) {
    function mat_mul (line 900) | inline void mat_mul(int m, int n, int k, std::shared_ptr<GemmKernel224...

FILE: archive/csrc/ktransformers_ext/operators/amx/la/utils.hpp
  function T (line 16) | T* offset_pointer(T* ptr, std::size_t byte_offset) {
  function T (line 21) | const T* offset_pointer(const T* ptr, std::size_t byte_offset) {
  function T (line 26) | T* offset_pointer_row_major(T* t, int row, int col, std::size_t ld) {
  function T (line 31) | T* offset_pointer_col_major(T* t, int row, int col, std::size_t ld) {
  function avx512_copy_32xbf16 (line 35) | static inline void avx512_copy_32xbf16(__m512i* src, __m512i* dst) {
  function avx512_32xfp32_to_32xbf16 (line 39) | static inline void avx512_32xfp32_to_32xbf16(__m512* src0, __m512* src1,...
  function avx512_32xbf16_to_32xfp32 (line 43) | static inline void avx512_32xbf16_to_32xfp32(__m512i* src, __m512* dst0,...

FILE: archive/csrc/ktransformers_ext/operators/amx/moe.hpp
  function __m512 (line 38) | static inline __m512 exp_avx512(__m512 x) {
  function __m512 (line 63) | static inline __m512 act_fn(__m512 gate_val, __m512 up_val) {
  function __m512 (line 72) | static inline __m512 relu_act_fn(__m512 gate_val, __m512 up_val) {
  type AMX_MOEConfig (line 78) | struct AMX_MOEConfig {
    method AMX_MOEConfig (line 89) | AMX_MOEConfig() {}
    method AMX_MOEConfig (line 91) | AMX_MOEConfig(int expert_num, int routed_expert_num, int hidden_size, ...
  class AMX_MOE (line 98) | class AMX_MOE {
    method AMX_MOE (line 135) | AMX_MOE(AMX_MOEConfig config) {
    method load_weights (line 230) | void load_weights(Backend *backend) {
    method warm_up (line 278) | void warm_up(Backend *backend) {}
    method forward (line 280) | void forward(int qlen, int k, const uint64_t *expert_ids, const float ...

FILE: archive/csrc/ktransformers_ext/operators/kvcache/kvcache.h
  type AnchorType (line 63) | enum AnchorType {
  type RetrievalType (line 94) | enum RetrievalType {
  type KVCacheConfig (line 122) | struct KVCacheConfig {
  function class (line 193) | class KVCache {

FILE: archive/csrc/ktransformers_ext/operators/kvcache/kvcache_utils.cpp
  function ggml_type_to_string (line 15) | std::string ggml_type_to_string(ggml_type type) {
  function AnchorTypeToString (line 28) | std::string AnchorTypeToString(AnchorType type) {
  function RetrievalTypeToString (line 43) | std::string RetrievalTypeToString(RetrievalType type) {
  function ggml_vec_scale_f32 (line 1130) | void ggml_vec_scale_f32(const int n, float *y, const float v) {

FILE: archive/csrc/ktransformers_ext/operators/llamafile/conversion.h
  function to_float (line 16) | inline void to_float(const void* input, float* output, int size, ggml_ty...
  function from_float (line 24) | inline void from_float(const float* input, void* output, int size, ggml_...

FILE: archive/csrc/ktransformers_ext/operators/llamafile/linear.h
  type LinearConfig (line 27) | struct LinearConfig {

FILE: archive/csrc/ktransformers_ext/operators/llamafile/mlp.cpp
  function act_fn (line 49) | static float act_fn(float x) { return x / (1.0f + expf(-x)); }

FILE: archive/csrc/ktransformers_ext/operators/llamafile/mlp.h
  type MLPConfig (line 27) | struct MLPConfig {

FILE: archive/csrc/ktransformers_ext/operators/llamafile/moe.cpp
  function act_fn (line 134) | static float act_fn(float x) {
  function act_fn_relu (line 138) | static float act_fn_relu(float x) {

FILE: archive/csrc/ktransformers_ext/operators/llamafile/moe.h
  type MOEConfig (line 27) | struct MOEConfig {

FILE: archive/csrc/ktransformers_ext/vendors/hip.h
  type hip_bfloat16 (line 172) | typedef hip_bfloat16 nv_bfloat16;

FILE: archive/csrc/ktransformers_ext/vendors/musa.h
  type mt_bfloat16 (line 137) | typedef mt_bfloat16 nv_bfloat16;

FILE: archive/ktransformers/ktransformers_ext/operators/custom_marlin/quantize/utils/format_24.py
  function _calculate_meta_reordering_scatter_offsets (line 21) | def _calculate_meta_reordering_scatter_offsets(m, meta_ncols, meta_dtype,
  function sparse_semi_structured_from_dense_cutlass (line 52) | def sparse_semi_structured_from_dense_cutlass(dense):
  function sparse_semi_structured_to_dense_cutlass (line 184) | def sparse_semi_structured_to_dense_cutlass(sparse, meta_reordered):
  function mask_creator (line 279) | def mask_creator(tensor):

FILE: archive/ktransformers/ktransformers_ext/operators/custom_marlin/quantize/utils/marlin_24_perms.py
  function get_perms_24 (line 16) | def get_perms_24(num_bits: int):

FILE: archive/ktransformers/ktransformers_ext/operators/custom_marlin/quantize/utils/marlin_perms.py
  function get_perms (line 16) | def get_perms(num_bits: int):

FILE: archive/ktransformers/ktransformers_ext/operators/custom_marlin/quantize/utils/marlin_utils.py
  function is_marlin_supported (line 29) | def is_marlin_supported():
  function marlin_permute_weights (line 33) | def marlin_permute_weights(q_w, size_k, size_n, perm, tile=MARLIN_TILE):
  function marlin_weights (line 48) | def marlin_weights(q_w, size_k, size_n, num_bits, perm):
  function marlin_permute_scales (line 68) | def marlin_permute_scales(s, size_k, size_n, group_size, scale_perm,
  function marlin_quantize (line 79) | def marlin_quantize(
  function vllm_marlin_quantize (line 117) | def vllm_marlin_quantize(
  function inject_24 (line 155) | def inject_24(w, size_k, size_n):
  function check_24 (line 163) | def check_24(w, num_rows_to_sample=50, _verbose=False):
  function compress_quantized_24_weight (line 190) | def compress_quantized_24_weight(q_24, size_k, size_n, num_bits):
  function marlin_24_quantize (line 213) | def marlin_24_quantize(
  function compute_max_diff (line 254) | def compute_max_diff(output, output_ref):
  class MarlinWorkspace (line 259) | class MarlinWorkspace:
    method __init__ (line 261) | def __init__(self, out_features, min_thread_n, max_parallel, device):

FILE: archive/ktransformers/ktransformers_ext/operators/custom_marlin/quantize/utils/quant_utils.py
  function get_pack_factor (line 9) | def get_pack_factor(num_bits):
  function permute_rows (line 14) | def permute_rows(q_w: torch.Tensor, group_size: int):
  function quantize_weights (line 36) | def quantize_weights(w: torch.Tensor, num_bits: int, group_size: int,
  function sort_weights (line 101) | def sort_weights(q_w: torch.Tensor, g_idx: torch.Tensor):
  function gptq_pack (line 117) | def gptq_pack(

FILE: archive/ktransformers/ktransformers_ext/triton/fp8gemm.py
  function act_quant_kernel (line 11) | def act_quant_kernel(x_ptr, y_ptr, s_ptr, BLOCK_SIZE: tl.constexpr):
  function act_quant (line 34) | def act_quant(x: torch.Tensor, block_size: int = 128) -> Tuple[torch.Ten...
  function weight_dequant_kernel (line 57) | def weight_dequant_kernel(x_ptr, s_ptr, y_ptr, M, N, BLOCK_SIZE: tl.cons...
  function weight_dequant (line 85) | def weight_dequant(x: torch.Tensor, s: torch.Tensor, block_size: int = 1...
  function fp8_gemm_kernel (line 117) | def fp8_gemm_kernel(a_ptr, b_ptr, c_ptr,
  function fp8_gemm (line 172) | def fp8_gemm(a: torch.Tensor, a_s: torch.Tensor, b: torch.Tensor, b_s: t...

FILE: archive/ktransformers/local_chat.py
  function local_chat (line 76) | def local_chat(

FILE: archive/ktransformers/local_chat_test.py
  function local_chat (line 55) | def local_chat(

FILE: archive/ktransformers/models/ascend/custom_ascend_modeling_deepseek_v3.py
  class KNPUDeepseekV3ForCausalLM (line 31) | class KNPUDeepseekV3ForCausalLM(DeepseekV3PreTrainedModel):
    method __init__ (line 36) | def __init__(
    method init_wrapper (line 54) | def init_wrapper(self, use_cuda_graph, device, max_batch_size, max_pag...
    method batch_embeddings (line 57) | def batch_embeddings(self, batch: ForwardBatchInput, device="npu:0", i...
    method print_callback (line 111) | def print_callback(self, param):
    method forward (line 118) | def forward(
    method flash_infer_attn_plan (line 215) | def flash_infer_attn_plan(self, batch: ForwardBatchInput, bsz_tensors,...

FILE: archive/ktransformers/models/ascend/custom_ascend_modeling_qwen3.py
  class KNPUQwen3MoeForCausalLM (line 39) | class KNPUQwen3MoeForCausalLM(Qwen3MoePreTrainedModel):
    method __init__ (line 44) | def __init__(
    method init_wrapper (line 84) | def init_wrapper(self):
    method batch_embeddings (line 90) | def batch_embeddings(
    method forward (line 158) | def forward(
    method flash_infer_attn_plan (line 275) | def flash_infer_attn_plan(

FILE: archive/ktransformers/models/configuration_deepseek.py
  class DeepseekV2Config (line 11) | class DeepseekV2Config(PretrainedConfig):
    method __init__ (line 113) | def __init__(

FILE: archive/ktransformers/models/configuration_deepseek_v3.py
  class DeepseekV3Config (line 7) | class DeepseekV3Config(PretrainedConfig):
    method __init__ (line 106) | def __init__(

FILE: archive/ktransformers/models/configuration_glm4_moe.py
  class Glm4MoeConfig (line 26) | class Glm4MoeConfig(PretrainedConfig):
    method __init__ (line 170) | def __init__(

FILE: archive/ktransformers/models/configuration_llama.py
  class LlamaConfig (line 26) | class LlamaConfig(PretrainedConfig):
    method __init__ (line 143) | def __init__(

FILE: archive/ktransformers/models/configuration_qwen2_moe.py
  class Qwen2MoeConfig (line 24) | class Qwen2MoeConfig(PretrainedConfig):
    method __init__ (line 115) | def __init__(

FILE: archive/ktransformers/models/configuration_qwen3_moe.py
  class Qwen3MoeConfig (line 25) | class Qwen3MoeConfig(PretrainedConfig):
    method __init__ (line 161) | def __init__(

FILE: archive/ktransformers/models/configuration_qwen3_next.py
  class Qwen3NextConfig (line 25) | class Qwen3NextConfig(PretrainedConfig):
    method __init__ (line 180) | def __init__(

FILE: archive/ktransformers/models/configuration_smallthinker.py
  class SmallthinkerConfig (line 4) | class SmallthinkerConfig(PretrainedConfig):
    method __init__ (line 65) | def __init__(self,

FILE: archive/ktransformers/models/custom_cache.py
  class StaticCache (line 27) | class StaticCache(transformers.StaticCache):
    method __init__ (line 45) | def __init__(self, config: PretrainedConfig, max_batch_size: int, max_...
    method max_batch_size (line 140) | def max_batch_size(self):
    method max_cache_len (line 144) | def max_cache_len(self):
    method update (line 147) | def update(
    method get_seq_length (line 204) | def get_seq_length(self, layer_idx: Optional[int] = 0) -> int:
    method change_seq_length (line 211) | def change_seq_length(self, bias: Optional[int] = 0) -> int:
    method get_max_length (line 219) | def get_max_length(self) -> Optional[int]:
    method get_usable_length (line 223) | def get_usable_length(self, kv_seq_len, layer_idx: Optional[int] = 0) ...
    method reset (line 226) | def reset(self):
    method remove_suffix (line 238) | def remove_suffix(self, start_pos):
    method get_max_cache_shape (line 249) | def get_max_cache_shape(self) -> Tuple[int, int, int, int]:
  class KVC2StaticCache (line 253) | class KVC2StaticCache:
    method __init__ (line 258) | def __init__(self, config: PretrainedConfig, max_batch_size, page_size...
    method load (line 275) | def load(self, inference_context):
    method update (line 289) | def update(
    method get_seq_length (line 328) | def get_seq_length(self, layer_idx: Optional[int] = 0) -> int:
    method get_usable_length (line 332) | def get_usable_length(self, kv_seq_len, layer_idx: Optional[int] = 0) ...
    method change_seq_length (line 335) | def change_seq_length(self, bias: Optional[int] = 0) -> int:
    method get_max_length (line 339) | def get_max_length(self) -> Optional[int]:
    method reset (line 343) | def reset(self, inference_context):
    method get_page_table (line 354) | def get_page_table(self, mini_batch, bsz_tensors: torch.tensor = None,...
  class KDeepSeekV3Cache (line 387) | class KDeepSeekV3Cache(nn.Module):
    method __init__ (line 388) | def __init__(
    method load (line 406) | def load(self, inference_context: "sched_ext.InferenceContext"):
    method update (line 414) | def update(
    method get_page_table (line 450) | def get_page_table(self, cache_position: torch.Tensor, q_indptr: torch...
  class KGQACache (line 468) | class KGQACache(nn.Module):
    method __init__ (line 469) | def __init__(
    method load (line 486) | def load(self, inference_context: "sched_ext.InferenceContext"):
    method get_page_table (line 501) | def get_page_table(self, cache_position: torch.Tensor, q_indptr: torch...
    method get_k_cache (line 519) | def get_k_cache(self, layer_idx):
    method get_v_cache (line 522) | def get_v_cache(self, layer_idx):
  class KVC2Qwen3Cache (line 526) | class KVC2Qwen3Cache(nn.Module):
    method __init__ (line 528) | def __init__(self, config, max_batch_size, page_size=256,
    method load (line 547) | def load(self, inference_context):
    method update (line 575) | def update(
    method get_k_cache (line 635) | def get_k_cache(self, layer_idx):
    method get_v_cache (line 638) | def get_v_cache(self, layer_idx):
    method get_page_table (line 642) | def get_page_table(

FILE: archive/ktransformers/models/custom_modeling_deepseek_v2.py
  class KDeepseekV2ForCausalLM (line 21) | class KDeepseekV2ForCausalLM(DeepseekV2PreTrainedModel):
    method __init__ (line 25) | def __init__(
    method init_wrapper (line 40) | def init_wrapper(self, use_cuda_graph, device, max_batch_size, max_pag...
    method batch_embeddings (line 57) | def batch_embeddings(self, batch: ForwardBatchInput, device="cuda:0"):
    method forward (line 71) | def forward(
    method flash_infer_attn_plan (line 140) | def flash_infer_attn_plan(self, batch: ForwardBatchInput, bsz_tensors,...

FILE: archive/ktransformers/models/custom_modeling_deepseek_v3.py
  class KDeepseekV3ForCausalLM (line 27) | class KDeepseekV3ForCausalLM(DeepseekV3PreTrainedModel):
    method __init__ (line 31) | def __init__(
    method init_wrapper (line 43) | def init_wrapper(self, use_cuda_graph, device, max_batch_size, max_pag...
    method batch_embeddings (line 61) | def batch_embeddings(self, batch: ForwardBatchInput, device="cuda:0"):
    method forward (line 75) | def forward(
    method flash_infer_attn_plan (line 136) | def flash_infer_attn_plan(self, batch: ForwardBatchInput, bsz_tensors,...

FILE: archive/ktransformers/models/custom_modeling_glm4_moe.py
  class KGlm4MoeForCausalLM (line 27) | class KGlm4MoeForCausalLM(Glm4MoePreTrainedModel):
    method __init__ (line 31) | def __init__(
    method init_wrapper (line 45) | def init_wrapper(self, use_cuda_graph, device, max_batch_token, max_ba...
    method batch_embeddings (line 49) | def batch_embeddings(self, batch: ForwardBatchInput, device="cuda:0"):
    method forward (line 63) | def forward(
    method flash_infer_attn_plan (line 111) | def flash_infer_attn_plan(self, batch: ForwardBatchInput, bsz_tensors,...

FILE: archive/ktransformers/models/custom_modeling_qwen2_moe.py
  class KQwen2MoeForCausalLM (line 27) | class KQwen2MoeForCausalLM(Qwen2MoePreTrainedModel):
    method __init__ (line 31) | def __init__(
    method init_wrapper (line 44) | def init_wrapper(self, use_cuda_graph, device, max_batch_token, max_ba...
    method batch_embeddings (line 48) | def batch_embeddings(self, batch: ForwardBatchInput, device="cuda:0"):
    method forward (line 62) | def forward(
    method flash_infer_attn_plan (line 120) | def flash_infer_attn_plan(self, batch: ForwardBatchInput, bsz_tensors,...

FILE: archive/ktransformers/models/custom_modeling_qwen3_moe.py
  class KQwen3MoeForCausalLM (line 27) | class KQwen3MoeForCausalLM(Qwen3MoePreTrainedModel):
    method __init__ (line 31) | def __init__(
    method init_wrapper (line 44) | def init_wrapper(self, use_cuda_graph, device, max_batch_token, max_ba...
    method batch_embeddings (line 48) | def batch_embeddings(self, batch: ForwardBatchInput, device="cuda:0"):
    method forward (line 62) | def forward(
    method flash_infer_attn_plan (line 120) | def flash_infer_attn_plan(self, batch: ForwardBatchInput, bsz_tensors,...

FILE: archive/ktransformers/models/custom_modeling_qwen3_next.py
  class KQwen3NextForCausalLM (line 27) | class KQwen3NextForCausalLM(Qwen3NextPreTrainedModel):
    method __init__ (line 31) | def __init__(
    method init_wrapper (line 46) | def init_wrapper(self, use_cuda_graph, device, max_batch_token, max_ba...
    method batch_embeddings (line 50) | def batch_embeddings(self, batch: ForwardBatchInput, device="cuda:0"):
    method reset_conv_states (line 63) | def reset_conv_states(self):
    method forward (line 69) | def forward(
    method flash_infer_attn_plan (line 127) | def flash_infer_attn_plan(self, batch: ForwardBatchInput, bsz_tensors,...

FILE: archive/ktransformers/models/custom_modeling_smallthinker.py
  class KSmallThinkerForCausalLM (line 27) | class KSmallThinkerForCausalLM(SmallthinkerPreTrainedModel):
    method __init__ (line 31) | def __init__(
    method init_wrapper (line 45) | def init_wrapper(self, use_cuda_graph, device, max_batch_token, max_ba...
    method batch_embeddings (line 49) | def batch_embeddings(self, batch: ForwardBatchInput, device="cuda:0"):
    method forward (line 63) | def forward(
    method flash_infer_attn_plan (line 110) | def flash_infer_attn_plan(self, batch: ForwardBatchInput, bsz_tensors,...

FILE: archive/ktransformers/models/modeling_deepseek.py
  function _get_unpad_data (line 88) | def _get_unpad_data(attention_mask):
  class DeepseekV2RMSNorm (line 102) | class DeepseekV2RMSNorm(nn.Module):
    method __init__ (line 103) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 112) | def forward(self, hidden_states):
  class DeepseekV2RotaryEmbedding (line 123) | class DeepseekV2RotaryEmbedding(nn.Module):
    method __init__ (line 124) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method forward (line 136) | def forward(self, x, position_ids):
  class DeepseekV2LinearScalingRotaryEmbedding (line 152) | class DeepseekV2LinearScalingRotaryEmbedding(DeepseekV2RotaryEmbedding):
    method __init__ (line 155) | def __init__(
    method _set_cos_sin_cache (line 167) | def _set_cos_sin_cache(self, seq_len, device, dtype):
  class DeepseekV2DynamicNTKScalingRotaryEmbedding (line 182) | class DeepseekV2DynamicNTKScalingRotaryEmbedding(DeepseekV2RotaryEmbeddi...
    method __init__ (line 185) | def __init__(
    method _set_cos_sin_cache (line 197) | def _set_cos_sin_cache(self, seq_len, device, dtype):
  function yarn_find_correction_dim (line 222) | def yarn_find_correction_dim(
  function yarn_find_correction_range (line 231) | def yarn_find_correction_range(
  function yarn_get_mscale (line 243) | def yarn_get_mscale(scale=1, mscale=1):
  function yarn_linear_ramp_mask (line 249) | def yarn_linear_ramp_mask(min, max, dim):
  class DeepseekV2YarnRotaryEmbedding (line 257) | class DeepseekV2YarnRotaryEmbedding(DeepseekV2RotaryEmbedding):
    method __init__ (line 258) | def __init__(
    method forward (line 313) | def forward(self, x, position_ids):
  function rotate_half (line 329) | def rotate_half(x):
  function apply_rotary_pos_emb (line 337) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di...
  class DeepseekV2MLP (line 367) | class DeepseekV2MLP(nn.Module):
    method __init__ (line 368) | def __init__(self, config, hidden_size=None, intermediate_size=None):
    method forward (line 381) | def forward(self, x):
  class MoEGate (line 386) | class MoEGate(nn.Module):
    method __init__ (line 387) | def __init__(self, config):
    method reset_parameters (line 408) | def reset_parameters(self) -> None:
    method forward (line 413) | def forward(self, hidden_states):
  class AddAuxiliaryLoss (line 493) | class AddAuxiliaryLoss(torch.autograd.Function):
    method forward (line 500) | def forward(ctx, x, loss):
    method backward (line 507) | def backward(ctx, grad_output):
  class DeepseekV2MoE (line 513) | class DeepseekV2MoE(nn.Module):
    method __init__ (line 518) | def __init__(self, config):
    method forward (line 558) | def forward(self, hidden_states):
    method moe_infer (line 581) | def moe_infer(self, x, topk_ids, topk_weight):
  function repeat_kv (line 657) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  class DeepseekV2Attention (line 671) | class DeepseekV2Attention(nn.Module):
    method __init__ (line 674) | def __init__(self, config: DeepseekV2Config, layer_idx: Optional[int] ...
    method _init_rope (line 741) | def _init_rope(self):
    method _shape (line 787) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
    method forward (line 794) | def forward(
  class DeepseekV2FlashAttention2 (line 893) | class DeepseekV2FlashAttention2(DeepseekV2Attention):
    method __init__ (line 900) | def __init__(self, *args, **kwargs):
    method forward (line 908) | def forward(
    method _flash_attention_forward (line 1038) | def _flash_attention_forward(
    method _upad_input (line 1129) | def _upad_input(
  class DeepseekV2DecoderLayer (line 1180) | class DeepseekV2DecoderLayer(nn.Module):
    method __init__ (line 1181) | def __init__(self, config: DeepseekV2Config, layer_idx: int):
    method forward (line 1205) | def forward(
  class DeepseekV2PreTrainedModel (line 1291) | class DeepseekV2PreTrainedModel(PreTrainedModel):
    method _init_weights (line 1301) | def _init_weights(self, module):
  class DeepseekV2Model (line 1387) | class DeepseekV2Model(DeepseekV2PreTrainedModel):
    method __init__ (line 1395) | def __init__(self, config: DeepseekV2Config):
    method get_input_embeddings (line 1416) | def get_input_embeddings(self):
    method set_input_embeddings (line 1419) | def set_input_embeddings(self, value):
    method forward (line 1423) | def forward(
    method _update_causal_mask (line 1563) | def _update_causal_mask(
  class DeepseekV2ForCausalLM (line 1644) | class DeepseekV2ForCausalLM(DeepseekV2PreTrainedModel):
    method __init__ (line 1647) | def __init__(self, config):
    method get_input_embeddings (line 1656) | def get_input_embeddings(self):
    method set_input_embeddings (line 1659) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 1662) | def get_output_embeddings(self):
    method set_output_embeddings (line 1665) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 1668) | def set_decoder(self, decoder):
    method get_decoder (line 1671) | def get_decoder(self):
    method forward (line 1678) | def forward(
    method prepare_inputs_for_generation (line 1773) | def prepare_inputs_for_generation(
    method _reorder_cache (line 1851) | def _reorder_cache(past_key_values, beam_idx):
  class DeepseekV2ForSequenceClassification (line 1878) | class DeepseekV2ForSequenceClassification(DeepseekV2PreTrainedModel):
    method __init__ (line 1879) | def __init__(self, config):
    method get_input_embeddings (line 1888) | def get_input_embeddings(self):
    method set_input_embeddings (line 1891) | def set_input_embeddings(self, value):
    method forward (line 1895) | def forward(

FILE: archive/ktransformers/models/modeling_deepseek_v3.py
  function _get_unpad_data (line 87) | def _get_unpad_data(attention_mask):
  class DeepseekV3RMSNorm (line 101) | class DeepseekV3RMSNorm(nn.Module):
    method __init__ (line 102) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 111) | def forward(self, hidden_states):
  class DeepseekV3RotaryEmbedding (line 122) | class DeepseekV3RotaryEmbedding(nn.Module):
    method __init__ (line 123) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method _set_cos_sin_cache (line 142) | def _set_cos_sin_cache(self, seq_len, device, dtype):
    method forward (line 154) | def forward(self, x, seq_len=None):
  class DeepseekV3LinearScalingRotaryEmbedding (line 166) | class DeepseekV3LinearScalingRotaryEmbedding(DeepseekV3RotaryEmbedding):
    method __init__ (line 169) | def __init__(
    method _set_cos_sin_cache (line 180) | def _set_cos_sin_cache(self, seq_len, device, dtype):
  class DeepseekV3DynamicNTKScalingRotaryEmbedding (line 195) | class DeepseekV3DynamicNTKScalingRotaryEmbedding(DeepseekV3RotaryEmbeddi...
    method __init__ (line 198) | def __init__(
    method _set_cos_sin_cache (line 209) | def _set_cos_sin_cache(self, seq_len, device, dtype):
  function yarn_find_correction_dim (line 234) | def yarn_find_correction_dim(
  function yarn_find_correction_range (line 243) | def yarn_find_correction_range(
  function yarn_get_mscale (line 255) | def yarn_get_mscale(scale=1, mscale=1):
  function yarn_linear_ramp_mask (line 261) | def yarn_linear_ramp_mask(min, max, dim):
  class DeepseekV3YarnRotaryEmbedding (line 270) | class DeepseekV3YarnRotaryEmbedding(DeepseekV3RotaryEmbedding):
    method __init__ (line 272) | def __init__(
    method _set_cos_sin_cache (line 293) | def _set_cos_sin_cache(self, seq_len, device, dtype):
  function rotate_half (line 339) | def rotate_half(x):
  function apply_rotary_pos_emb (line 347) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1):
  class DeepseekV3MLP (line 382) | class DeepseekV3MLP(nn.Module):
    method __init__ (line 383) | def __init__(self, config, hidden_size=None, intermediate_size=None):
    method forward (line 396) | def forward(self, x):
  class MoEGate (line 401) | class MoEGate(nn.Module):
    method __init__ (line 402) | def __init__(self, config):
    method reset_parameters (line 425) | def reset_parameters(self) -> None:
    method forward (line 430) | def forward(self, hidden_states):
  class DeepseekV3MoE (line 483) | class DeepseekV3MoE(nn.Module):
    method __init__ (line 488) | def __init__(self, config):
    method forward (line 530) | def forward(self, hidden_states):
    method moe_infer (line 543) | def moe_infer(self, x, topk_ids, topk_weight):
  function repeat_kv (line 620) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  class DeepseekV3Attention (line 635) | class DeepseekV3Attention(nn.Module):
    method __init__ (line 638) | def __init__(self, config: DeepseekV3Config, layer_idx: Optional[int] ...
    method _init_rope (line 705) | def _init_rope(self):
    method _shape (line 751) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
    method forward (line 758) | def forward(
  class DeepseekV3FlashAttention2 (line 869) | class DeepseekV3FlashAttention2(DeepseekV3Attention):
    method __init__ (line 876) | def __init__(self, *args, **kwargs):
    method forward (line 884) | def forward(
    method _flash_attention_forward (line 1020) | def _flash_attention_forward(
    method _upad_input (line 1100) | def _upad_input(
  class DeepseekV3DecoderLayer (line 1152) | class DeepseekV3DecoderLayer(nn.Module):
    method __init__ (line 1153) | def __init__(self, config: DeepseekV3Config, layer_idx: int):
    method forward (line 1177) | def forward(
  class DeepseekV3PreTrainedModel (line 1265) | class DeepseekV3PreTrainedModel(PreTrainedModel):
    method _init_weights (line 1274) | def _init_weights(self, module):
  class DeepseekV3Model (line 1360) | class DeepseekV3Model(DeepseekV3PreTrainedModel):
    method __init__ (line 1368) | def __init__(self, config: DeepseekV3Config):
    method get_input_embeddings (line 1389) | def get_input_embeddings(self):
    method set_input_embeddings (line 1392) | def set_input_embeddings(self, value):
    method forward (line 1396) | def forward(
    method _update_causal_mask (line 1530) | def _update_causal_mask(
  class DeepseekV3ForCausalLM (line 1610) | class DeepseekV3ForCausalLM(DeepseekV3PreTrainedModel, GenerationMixin):
    method __init__ (line 1613) | def __init__(self, config):
    method get_input_embeddings (line 1622) | def get_input_embeddings(self):
    method set_input_embeddings (line 1625) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 1628) | def get_output_embeddings(self):
    method set_output_embeddings (line 1631) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 1634) | def set_decoder(self, decoder):
    method get_decoder (line 1637) | def get_decoder(self):
    method forward (line 1644) | def forward(
    method prepare_inputs_for_generation (line 1749) | def prepare_inputs_for_generation(
    method _reorder_cache (line 1814) | def _reorder_cache(past_key_values, beam_idx):
  class DeepseekV3ForSequenceClassification (line 1841) | class DeepseekV3ForSequenceClassification(DeepseekV3PreTrainedModel):
    method __init__ (line 1842) | def __init__(self, config):
    method get_input_embeddings (line 1851) | def get_input_embeddings(self):
    method set_input_embeddings (line 1854) | def set_input_embeddings(self, value):
    method forward (line 1858) | def forward(

FILE: archive/ktransformers/models/modeling_glm4_moe.py
  function repeat_kv (line 45) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  function eager_attention_forward (line 57) | def eager_attention_forward(
  function rotate_half (line 83) | def rotate_half(x):
  function apply_rotary_pos_emb (line 90) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di...
  class Glm4MoeAttention (line 128) | class Glm4MoeAttention(nn.Module):
    method __init__ (line 131) | def __init__(self, config: Glm4MoeConfig, layer_idx: Optional[int] = N...
    method forward (line 156) | def forward(
  class Glm4MoeMLP (line 208) | class Glm4MoeMLP(nn.Module):
    method __init__ (line 209) | def __init__(self, config, hidden_size=None, intermediate_size=None):
    method forward (line 220) | def forward(self, x):
  class Glm4MoeTopkRouter (line 225) | class Glm4MoeTopkRouter(nn.Module):
    method __init__ (line 226) | def __init__(self, config: Glm4MoeConfig):
    method get_topk_indices (line 240) | def get_topk_indices(self, scores):
    method forward (line 259) | def forward(self, hidden_states):
  class Glm4MoeRMSNorm (line 273) | class Glm4MoeRMSNorm(nn.Module):
    method __init__ (line 274) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 283) | def forward(self, hidden_states):
    method extra_repr (line 290) | def extra_repr(self):
  class Glm4MoeMoE (line 294) | class Glm4MoeMoE(nn.Module):
    method __init__ (line 299) | def __init__(self, config):
    method moe (line 313) | def moe(self, hidden_states: torch.Tensor, topk_indices: torch.Tensor,...
    method forward (line 339) | def forward(self, hidden_states):
  class Glm4MoeDecoderLayer (line 349) | class Glm4MoeDecoderLayer(GradientCheckpointingLayer):
    method __init__ (line 350) | def __init__(self, config: Glm4MoeConfig, layer_idx: int):
    method forward (line 364) | def forward(
  class Glm4MoePreTrainedModel (line 398) | class Glm4MoePreTrainedModel(PreTrainedModel):
    method _init_weights (line 414) | def _init_weights(self, module):
  class Glm4MoeRotaryEmbedding (line 430) | class Glm4MoeRotaryEmbedding(nn.Module):
    method __init__ (line 431) | def __init__(self, config: Glm4MoeConfig, device=None):
    method forward (line 450) | def forward(self, x, position_ids):
  class Glm4MoeModel (line 465) | class Glm4MoeModel(Glm4MoePreTrainedModel):
    method __init__ (line 468) | def __init__(self, config: Glm4MoeConfig):
    method get_input_embeddings (line 484) | def get_input_embeddings(self):
    method set_input_embeddings (line 487) | def set_input_embeddings(self, value):
    method forward (line 492) | def forward(
  class Glm4MoeForCausalLM (line 551) | class Glm4MoeForCausalLM(Glm4MoePreTrainedModel, GenerationMixin):
    method __init__ (line 556) | def __init__(self, config):
    method get_input_embeddings (line 565) | def get_input_embeddings(self):
    method set_input_embeddings (line 568) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 571) | def get_output_embeddings(self):
    method set_output_embeddings (line 574) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 577) | def set_decoder(self, decoder):
    method get_decoder (line 580) | def get_decoder(self):
    method forward (line 585) | def forward(

FILE: archive/ktransformers/models/modeling_llama.py
  class LlamaRMSNorm (line 58) | class LlamaRMSNorm(nn.Module):
    method __init__ (line 59) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 67) | def forward(self, hidden_states):
  class LlamaRotaryEmbedding (line 78) | class LlamaRotaryEmbedding(nn.Module):
    method __init__ (line 79) | def __init__(
    method _dynamic_frequency_update (line 134) | def _dynamic_frequency_update(self, position_ids, device):
    method forward (line 159) | def forward(self, x, position_ids):
  class LlamaLinearScalingRotaryEmbedding (line 190) | class LlamaLinearScalingRotaryEmbedding(LlamaRotaryEmbedding):
    method __init__ (line 193) | def __init__(self, *args, **kwargs):
  class LlamaDynamicNTKScalingRotaryEmbedding (line 202) | class LlamaDynamicNTKScalingRotaryEmbedding(LlamaRotaryEmbedding):
    method __init__ (line 205) | def __init__(self, *args, **kwargs):
  function rotate_half (line 215) | def rotate_half(x):
  function apply_rotary_pos_emb (line 222) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di...
  class LlamaMLP (line 249) | class LlamaMLP(nn.Module):
    method __init__ (line 250) | def __init__(self, config):
    method forward (line 266) | def forward(self, x):
  function repeat_kv (line 300) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  class LlamaAttention (line 314) | class LlamaAttention(nn.Module):
    method __init__ (line 317) | def __init__(self, config: LlamaConfig, layer_idx: Optional[int] = None):
    method forward (line 364) | def forward(
  class LlamaFlashAttention2 (line 496) | class LlamaFlashAttention2(LlamaAttention):
    method __init__ (line 503) | def __init__(self, *args, **kwargs):
    method forward (line 511) | def forward(
  class LlamaSdpaAttention (line 627) | class LlamaSdpaAttention(LlamaAttention):
    method forward (line 635) | def forward(
  class LlamaDecoderLayer (line 745) | class LlamaDecoderLayer(nn.Module):
    method __init__ (line 746) | def __init__(self, config: LlamaConfig, layer_idx: int):
    method forward (line 760) | def forward(
  class LlamaPreTrainedModel (line 854) | class LlamaPreTrainedModel(PreTrainedModel):
    method _init_weights (line 866) | def _init_weights(self, module):
  class LlamaModel (line 956) | class LlamaModel(LlamaPreTrainedModel):
    method __init__ (line 964) | def __init__(self, config: LlamaConfig):
    method get_input_embeddings (line 985) | def get_input_embeddings(self):
    method set_input_embeddings (line 988) | def set_input_embeddings(self, value):
    method forward (line 992) | def forward(
    method _update_causal_mask (line 1133) | def _update_causal_mask(
  class LlamaForCausalLM (line 1236) | class LlamaForCausalLM(LlamaPreTrainedModel):
    method __init__ (line 1239) | def __init__(self, config):
    method get_input_embeddings (line 1248) | def get_input_embeddings(self):
    method set_input_embeddings (line 1251) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 1254) | def get_output_embeddings(self):
    method set_output_embeddings (line 1257) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 1260) | def set_decoder(self, decoder):
    method get_decoder (line 1263) | def get_decoder(self):
    method forward (line 1270) | def forward(
    method prepare_inputs_for_generation (line 1376) | def prepare_inputs_for_generation(
  class LlamaForSequenceClassification (line 1440) | class LlamaForSequenceClassification(LlamaPreTrainedModel):
    method __init__ (line 1441) | def __init__(self, config):
    method get_input_embeddings (line 1450) | def get_input_embeddings(self):
    method set_input_embeddings (line 1453) | def set_input_embeddings(self, value):
    method forward (line 1457) | def forward(
  class LlamaForQuestionAnswering (line 1567) | class LlamaForQuestionAnswering(LlamaPreTrainedModel):
    method __init__ (line 1571) | def __init__(self, config):
    method get_input_embeddings (line 1579) | def get_input_embeddings(self):
    method set_input_embeddings (line 1582) | def set_input_embeddings(self, value):
    method forward (line 1586) | def forward(
  class LlamaForTokenClassification (line 1668) | class LlamaForTokenClassification(LlamaPreTrainedModel):
    method __init__ (line 1669) | def __init__(self, config):
    method get_input_embeddings (line 1685) | def get_input_embeddings(self):
    method set_input_embeddings (line 1688) | def set_input_embeddings(self, value):
    method forward (line 1692) | def forward(

FILE: archive/ktransformers/models/modeling_mixtral.py
  function load_balancing_loss_func (line 89) | def load_balancing_loss_func(
  function _get_unpad_data (line 166) | def _get_unpad_data(attention_mask):
  class MixtralRMSNorm (line 179) | class MixtralRMSNorm(nn.Module):
    method __init__ (line 180) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 188) | def forward(self, hidden_states):
    method extra_repr (line 195) | def extra_repr(self):
  class MixtralRotaryEmbedding (line 201) | class MixtralRotaryEmbedding(nn.Module):
    method __init__ (line 202) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method forward (line 215) | def forward(self, x, position_ids):
  function rotate_half (line 231) | def rotate_half(x):
  function apply_rotary_pos_emb (line 240) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1):
  function repeat_kv (line 270) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  class MixtralAttention (line 284) | class MixtralAttention(nn.Module):
    method __init__ (line 290) | def __init__(self, config: MixtralConfig, layer_idx: Optional[int] = N...
    method _shape (line 327) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
    method forward (line 330) | def forward(
  class MixtralFlashAttention2 (line 406) | class MixtralFlashAttention2(MixtralAttention):
    method forward (line 413) | def forward(
    method _flash_attention_forward (line 549) | def _flash_attention_forward(
    method _upad_input (line 660) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m...
  class MixtralSdpaAttention (line 706) | class MixtralSdpaAttention(MixtralAttention):
    method forward (line 714) | def forward(
  class MixtralBlockSparseTop2MLP (line 803) | class MixtralBlockSparseTop2MLP(nn.Module):
    method __init__ (line 804) | def __init__(self, config: MixtralConfig):
    method forward (line 815) | def forward(self, hidden_states):
  class MixtralSparseMoeBlock (line 821) | class MixtralSparseMoeBlock(nn.Module):
    method __init__ (line 833) | def __init__(self, config):
    method forward (line 848) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class MixtralDecoderLayer (line 889) | class MixtralDecoderLayer(nn.Module):
    method __init__ (line 890) | def __init__(self, config: MixtralConfig, layer_idx: int):
    method forward (line 900) | def forward(
  class MixtralPreTrainedModel (line 992) | class MixtralPreTrainedModel(PreTrainedModel):
    method _init_weights (line 1002) | def _init_weights(self, module):
  class MixtralModel (line 1091) | class MixtralModel(MixtralPreTrainedModel):
    method __init__ (line 1099) | def __init__(self, config: MixtralConfig):
    method get_input_embeddings (line 1115) | def get_input_embeddings(self):
    method set_input_embeddings (line 1118) | def set_input_embeddings(self, value):
    method forward (line 1123) | def forward(
    method _update_causal_mask (line 1256) | def _update_causal_mask(
  class MixtralForCausalLM (line 1337) | class MixtralForCausalLM(MixtralPreTrainedModel):
    method __init__ (line 1340) | def __init__(self, config):
    method get_input_embeddings (line 1351) | def get_input_embeddings(self):
    method set_input_embeddings (line 1354) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 1357) | def get_output_embeddings(self):
    method set_output_embeddings (line 1360) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 1363) | def set_decoder(self, decoder):
    method get_decoder (line 1366) | def get_decoder(self):
    method forward (line 1372) | def forward(
    method prepare_inputs_for_generation (line 1482) | def prepare_inputs_for_generation(
  class MixtralForSequenceClassification (line 1545) | class MixtralForSequenceClassification(MixtralPreTrainedModel):
    method __init__ (line 1546) | def __init__(self, config):
    method get_input_embeddings (line 1555) | def get_input_embeddings(self):
    method set_input_embeddings (line 1558) | def set_input_embeddings(self, value):
    method forward (line 1562) | def forward(
  class MixtralForTokenClassification (line 1661) | class MixtralForTokenClassification(MixtralPreTrainedModel):
    method __init__ (line 1662) | def __init__(self, config):
    method get_input_embeddings (line 1678) | def get_input_embeddings(self):
    method set_input_embeddings (line 1681) | def set_input_embeddings(self, value):
    method forward (line 1685) | def forward(

FILE: archive/ktransformers/models/modeling_qwen2_moe.py
  function load_balancing_loss_func (line 76) | def load_balancing_loss_func(
  function _get_unpad_data (line 153) | def _get_unpad_data(attention_mask):
  class Qwen2MoeRMSNorm (line 166) | class Qwen2MoeRMSNorm(nn.Module):
    method __init__ (line 167) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 175) | def forward(self, hidden_states):
  class Qwen2MoeRotaryEmbedding (line 183) | class Qwen2MoeRotaryEmbedding(nn.Module):
    method __init__ (line 184) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method forward (line 196) | def forward(self, x, position_ids):
  function rotate_half (line 213) | def rotate_half(x):
  function apply_rotary_pos_emb (line 221) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di...
  class Qwen2MoeMLP (line 249) | class Qwen2MoeMLP(nn.Module):
    method __init__ (line 250) | def __init__(self, config, intermediate_size=None):
    method forward (line 260) | def forward(self, x):
  function repeat_kv (line 265) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  class Qwen2MoeAttention (line 278) | class Qwen2MoeAttention(nn.Module):
    method __init__ (line 284) | def __init__(self, config: Qwen2MoeConfig, layer_idx: Optional[int] = ...
    method forward (line 321) | def forward(
  class Qwen2MoeFlashAttention2 (line 396) | class Qwen2MoeFlashAttention2(Qwen2MoeAttention):
    method __init__ (line 406) | def __init__(self, *args, **kwargs):
    method forward (line 414) | def forward(
    method _flash_attention_forward (line 546) | def _flash_attention_forward(
    method _upad_input (line 663) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m...
  class Qwen2MoeSdpaAttention (line 707) | class Qwen2MoeSdpaAttention(Qwen2MoeAttention):
    method forward (line 715) | def forward(
  class Qwen2MoeSparseMoeBlock (line 803) | class Qwen2MoeSparseMoeBlock(nn.Module):
    method __init__ (line 804) | def __init__(self, config):
    method forward (line 819) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class Qwen2MoeDecoderLayer (line 865) | class Qwen2MoeDecoderLayer(nn.Module):
    method __init__ (line 866) | def __init__(self, config: Qwen2MoeConfig, layer_idx: int):
    method forward (line 882) | def forward(
  class Qwen2MoePreTrainedModel (line 979) | class Qwen2MoePreTrainedModel(PreTrainedModel):
    method _init_weights (line 990) | def _init_weights(self, module):
  class Qwen2MoeModel (line 1083) | class Qwen2MoeModel(Qwen2MoePreTrainedModel):
    method __init__ (line 1091) | def __init__(self, config: Qwen2MoeConfig):
    method get_input_embeddings (line 1107) | def get_input_embeddings(self):
    method set_input_embeddings (line 1110) | def set_input_embeddings(self, value):
    method forward (line 1114) | def forward(
    method _update_causal_mask (line 1247) | def _update_causal_mask(
  class Qwen2MoeForCausalLM (line 1328) | class Qwen2MoeForCausalLM(Qwen2MoePreTrainedModel):
    method __init__ (line 1331) | def __init__(self, config):
    method get_input_embeddings (line 1343) | def get_input_embeddings(self):
    method set_input_embeddings (line 1346) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 1349) | def get_output_embeddings(self):
    method set_output_embeddings (line 1352) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 1355) | def set_decoder(self, decoder):
    method get_decoder (line 1358) | def get_decoder(self):
    method forward (line 1363) | def forward(
    method prepare_inputs_for_generation (line 1472) | def prepare_inputs_for_generation(
    method _reorder_cache (line 1550) | def _reorder_cache(past_key_values, beam_idx):
  class Qwen2MoeForSequenceClassification (line 1575) | class Qwen2MoeForSequenceClassification(Qwen2MoePreTrainedModel):
    method __init__ (line 1576) | def __init__(self, config):
    method get_input_embeddings (line 1585) | def get_input_embeddings(self):
    method set_input_embeddings (line 1588) | def set_input_embeddings(self, value):
    method forward (line 1592) | def forward(
  class Qwen2MoeForTokenClassification (line 1691) | class Qwen2MoeForTokenClassification(Qwen2MoePreTrainedModel):
    method __init__ (line 1692) | def __init__(self, config):
    method get_input_embeddings (line 1708) | def get_input_embeddings(self):
    method set_input_embeddings (line 1711) | def set_input_embeddings(self, value):
    method forward (line 1715) | def forward(

FILE: archive/ktransformers/models/modeling_qwen3_moe.py
  function rotate_half (line 65) | def rotate_half(x):
  function apply_rotary_pos_emb (line 72) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di...
  function repeat_kv (line 99) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  function eager_attention_forward (line 111) | def eager_attention_forward(
  class Qwen3MoeAttention (line 137) | class Qwen3MoeAttention(nn.Module):
    method __init__ (line 140) | def __init__(self, config: Qwen3MoeConfig, layer_idx: int):
    method forward (line 183) | def forward(
  class Qwen3MoeMLP (line 234) | class Qwen3MoeMLP(nn.Module):
    method __init__ (line 235) | def __init__(self, config, intermediate_size=None):
    method forward (line 245) | def forward(self, x):
  class Qwen3MoeSparseMoeBlock (line 250) | class Qwen3MoeSparseMoeBlock(nn.Module):
    method __init__ (line 251) | def __init__(self, config):
    method forward (line 263) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class Qwen3MoeRMSNorm (line 303) | class Qwen3MoeRMSNorm(nn.Module):
    method __init__ (line 304) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 313) | def forward(self, hidden_states):
    method extra_repr (line 320) | def extra_repr(self):
  class Qwen3MoeDecoderLayer (line 324) | class Qwen3MoeDecoderLayer(nn.Module):
    method __init__ (line 325) | def __init__(self, config: Qwen3MoeConfig, layer_idx: int):
    method forward (line 344) | def forward(
  function _compute_default_rope_parameters (line 421) | def _compute_default_rope_parameters(
  class Qwen3MoeRotaryEmbedding (line 461) | class Qwen3MoeRotaryEmbedding(nn.Module):
    method __init__ (line 462) | def __init__(self, config: Qwen3MoeConfig, device=None):
    method _dynamic_frequency_update (line 485) | def _dynamic_frequency_update(self, position_ids, device):
    method forward (line 505) | def forward(self, x, position_ids):
  class Qwen3MoePreTrainedModel (line 550) | class Qwen3MoePreTrainedModel(PreTrainedModel):
    method _init_weights (line 564) | def _init_weights(self, module):
  class Qwen3MoeModel (line 647) | class Qwen3MoeModel(Qwen3MoePreTrainedModel):
    method __init__ (line 655) | def __init__(self, config: Qwen3MoeConfig):
    method get_input_embeddings (line 671) | def get_input_embeddings(self):
    method set_input_embeddings (line 674) | def set_input_embeddings(self, value):
    method forward (line 678) | def forward(
    method _update_causal_mask (line 796) | def _update_causal_mask(
    method _prepare_4d_causal_attention_mask_with_cache_position (line 880) | def _prepare_4d_causal_attention_mask_with_cache_position(
  class KwargsForCausalLM (line 950) | class KwargsForCausalLM(): ...
  function load_balancing_loss_func (line 953) | def load_balancing_loss_func(
  class Qwen3MoeForCausalLM (line 1035) | class Qwen3MoeForCausalLM(Qwen3MoePreTrainedModel, GenerationMixin):
    method __init__ (line 1040) | def __init__(self, config):
    method get_input_embeddings (line 1052) | def get_input_embeddings(self):
    method set_input_embeddings (line 1055) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 1058) | def get_output_embeddings(self):
    method set_output_embeddings (line 1061) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 1064) | def set_decoder(self, decoder):
    method get_decoder (line 1067) | def get_decoder(self):
    method forward (line 1073) | def forward(
  class Qwen3MoeForSequenceClassification (line 1199) | class Qwen3MoeForSequenceClassification(Qwen3MoePreTrainedModel):
    method __init__ (line 1200) | def __init__(self, config):
    method get_input_embeddings (line 1209) | def get_input_embeddings(self):
    method set_input_embeddings (line 1212) | def set_input_embeddings(self, value):
    method forward (line 1216) | def forward(
  class Qwen3MoeForTokenClassification (line 1298) | class Qwen3MoeForTokenClassification(Qwen3MoePreTrainedModel):
    method __init__ (line 1299) | def __init__(self, config):
    method get_input_embeddings (line 1315) | def get_input_embeddings(self):
    method set_input_embeddings (line 1318) | def set_input_embeddings(self, value):
    method forward (line 1327) | def forward(
  class Qwen3MoeForQuestionAnswering (line 1386) | class Qwen3MoeForQuestionAnswering(Qwen3MoePreTrainedModel):
    method __init__ (line 1389) | def __init__(self, config):
    method get_input_embeddings (line 1397) | def get_input_embeddings(self):
    method set_input_embeddings (line 1400) | def set_input_embeddings(self, value):
    method forward (line 1404) | def forward(

FILE: archive/ktransformers/models/modeling_qwen3_next.py
  class Qwen3NextRMSNormGated (line 82) | class Qwen3NextRMSNormGated(nn.Module):
    method __init__ (line 83) | def __init__(self, hidden_size, eps=1e-6, **kwargs):
    method forward (line 88) | def forward(self, hidden_states, gate=None):
  class Qwen3NextDynamicCache (line 100) | class Qwen3NextDynamicCache:
    method __init__ (line 116) | def __init__(self, config: Qwen3NextConfig):
    method __len__ (line 130) | def __len__(self):
    method __getitem__ (line 133) | def __getitem__(self, layer_idx: int) -> tuple[torch.Tensor, torch.Ten...
    method update (line 136) | def update(
    method reorder_cache (line 152) | def reorder_cache(self, beam_idx: torch.LongTensor):
    method get_seq_length (line 167) | def get_seq_length(self, layer_idx: Optional[int] = 0) -> int:
    method get_mask_sizes (line 175) | def get_mask_sizes(self, cache_position: torch.Tensor, layer_idx: int)...
    method has_previous_state (line 188) | def has_previous_state(self):
  class Qwen3NextRotaryEmbedding (line 193) | class Qwen3NextRotaryEmbedding(nn.Module):
    method __init__ (line 196) | def __init__(self, config: Qwen3NextConfig, device=None):
    method forward (line 215) | def forward(self, x, position_ids):
  class Qwen3NextRMSNorm (line 229) | class Qwen3NextRMSNorm(nn.Module):
    method __init__ (line 230) | def __init__(self, dim: int, eps: float = 1e-6):
    method _norm (line 237) | def _norm(self, x):
    method forward (line 240) | def forward(self, x):
    method extra_repr (line 247) | def extra_repr(self):
  function rotate_half (line 251) | def rotate_half(x):
  function apply_rotary_pos_emb (line 259) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di...
  function repeat_kv (line 299) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  function eager_attention_forward (line 311) | def eager_attention_forward(
  class Qwen3NextAttention (line 337) | class Qwen3NextAttention(nn.Module):
    method __init__ (line 340) | def __init__(self, config: Qwen3NextConfig, layer_idx: int):
    method forward (line 369) | def forward(
  function apply_mask_to_padding_states (line 420) | def apply_mask_to_padding_states(hidden_states, attention_mask):
  function torch_causal_conv1d_update (line 436) | def torch_causal_conv1d_update(
  function torch_chunk_gated_delta_rule (line 454) | def torch_chunk_gated_delta_rule(
  function torch_recurrent_gated_delta_rule (line 534) | def torch_recurrent_gated_delta_rule(
  class Qwen3NextGatedDeltaNet (line 576) | class Qwen3NextGatedDeltaNet(nn.Module):
    method __init__ (line 577) | def __init__(self, config: Qwen3NextConfig, layer_idx: int):
    method fix_query_key_value_ordering (line 645) | def fix_query_key_value_ordering(self, mixed_qkvz, mixed_ba):
    method forward (line 674) | def forward(
  class Qwen3NextMLP (line 792) | class Qwen3NextMLP(nn.Module):
    method __init__ (line 793) | def __init__(self, config, intermediate_size=None):
    method forward (line 803) | def forward(self, x):
  class Qwen3NextSparseMoeBlock (line 808) | class Qwen3NextSparseMoeBlock(nn.Module):
    method __init__ (line 809) | def __init__(self, config):
    method forward (line 824) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class Qwen3NextDecoderLayer (line 871) | class Qwen3NextDecoderLayer(GradientCheckpointingLayer):
    method __init__ (line 872) | def __init__(self, config: Qwen3NextConfig, layer_idx: int):
    method forward (line 894) | def forward(
  class Qwen3NextPreTrainedModel (line 966) | class Qwen3NextPreTrainedModel(PreTrainedModel):
    method _init_weights (line 982) | def _init_weights(self, module):
  class Qwen3NextModel (line 989) | class Qwen3NextModel(Qwen3NextPreTrainedModel):
    method __init__ (line 990) | def __init__(self, config: Qwen3NextConfig):
    method forward (line 1004) | def forward(
    method _update_linear_attn_mask (line 1068) | def _update_linear_attn_mask(self, attention_mask, cache_position):
  function load_balancing_loss_func (line 1081) | def load_balancing_loss_func(
  class Qwen3NextForCausalLM (line 1164) | class Qwen3NextForCausalLM(Qwen3NextPreTrainedModel, GenerationMixin):
    method __init__ (line 1169) | def __init__(self, config):
    method forward (line 1183) | def forward(
  class Qwen3NextForSequenceClassification (line 1268) | class Qwen3NextForSequenceClassification(GenericForSequenceClassificatio...
  class Qwen3NextForTokenClassification (line 1272) | class Qwen3NextForTokenClassification(GenericForTokenClassification, Qwe...
  class Qwen3NextForQuestionAnswering (line 1276) | class Qwen3NextForQuestionAnswering(GenericForQuestionAnswering, Qwen3Ne...

FILE: archive/ktransformers/models/modeling_smallthinker.py
  class SmallthinkerHierarchicalMLP (line 33) | class SmallthinkerHierarchicalMLP(nn.Module):
    method __init__ (line 34) | def __init__(self, config: SmallthinkerConfig):
    method forward (line 49) | def forward(self, secondary_gate_input: torch.Tensor, hidden_states: t...
  class SmallthinkerMoeBlock (line 70) | class SmallthinkerMoeBlock(nn.Module):
    method __init__ (line 71) | def __init__(self, config: SmallthinkerConfig):
    method forward (line 81) | def forward(self, router_input: torch.Tensor, hidden_states: torch.Ten...
  class SmallthinkerDenseMlpBlock (line 130) | class SmallthinkerDenseMlpBlock(nn.Module):
    method __init__ (line 131) | def __init__(self, config: SmallthinkerConfig):
    method forward (line 140) | def forward(self, router_input: torch.Tensor, hidden_states: torch.Ten...
  class SmallthinkerRMSNorm (line 146) | class SmallthinkerRMSNorm(nn.Module):
    method __init__ (line 147) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 156) | def forward(self, hidden_states):
    method extra_repr (line 163) | def extra_repr(self):
  function rotate_half (line 167) | def rotate_half(x):
  function apply_rotary_pos_emb (line 174) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di...
  function repeat_kv (line 201) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  function eager_attention_forward (line 213) | def eager_attention_forward(
  class SmallthinkerAttention (line 239) | class SmallthinkerAttention(nn.Module):
    method __init__ (line 240) | def __init__(self, config: SmallthinkerConfig, layer_idx: int):
    method forward (line 257) | def forward(
  class SmallthinkerDecoderLayer (line 317) | class SmallthinkerDecoderLayer(nn.Module):
    method __init__ (line 318) | def __init__(self, config: SmallthinkerConfig, layer_idx: int):
    method forward (line 327) | def forward(
  class SmallthinkerRotaryEmbedding (line 396) | class SmallthinkerRotaryEmbedding(nn.Module):
    method __init__ (line 397) | def __init__(self, config: SmallthinkerConfig, device=None):
    method forward (line 416) | def forward(self, x, position_ids):
  class SmallthinkerPreTrainedModel (line 430) | class SmallthinkerPreTrainedModel(PreTrainedModel):
    method _init_weights (line 444) | def _init_weights(self, module):
  class SmallthinkerModel (line 459) | class SmallthinkerModel(SmallthinkerPreTrainedModel):
    method __init__ (line 460) | def __init__(self, config: SmallthinkerConfig):
    method get_input_embeddings (line 477) | def get_input_embeddings(self):
    method set_input_embeddings (line 480) | def set_input_embeddings(self, value):
    method forward (line 485) | def forward(
    method _update_causal_mask (line 601) | def _update_causal_mask(
    method _prepare_4d_causal_attention_mask_with_cache_position (line 688) | def _prepare_4d_causal_attention_mask_with_cache_position(
  class KwargsForCausalLM (line 756) | class KwargsForCausalLM(FlashAttentionKwargs): ...
  function load_balancing_loss_func (line 759) | def load_balancing_loss_func(
  class SmallThinkerForCausalLM (line 842) | class SmallThinkerForCausalLM(SmallthinkerPreTrainedModel, GenerationMix...
    method __init__ (line 844) | def __init__(self, config):
    method get_input_embeddings (line 855) | def get_input_embeddings(self):
    method set_input_embeddings (line 858) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 861) | def get_output_embeddings(self):
    method set_output_embeddings (line 864) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 867) | def set_decoder(self, decoder):
    method get_decoder (line 870) | def get_decoder(self):
    method forward (line 875) | def forward(

FILE: archive/ktransformers/operators/RoPE.py
  class RotaryEmbedding (line 34) | class RotaryEmbedding(BaseInjectedModule, DeepseekV2RotaryEmbedding):
    method __init__ (line 35) | def __init__(
    method load (line 55) | def load(self):
  class RotaryEmbeddingV3 (line 64) | class RotaryEmbeddingV3(BaseInjectedModule):
    method __init__ (line 65) | def __init__(
    method forward (line 83) | def forward(self, x, position_ids):
    method load (line 98) | def load(self):
    method _init (line 105) | def _init(self, dim, max_position_embeddings, base, device, scaling_fa...
  class RotaryEmbeddingV2 (line 115) | class RotaryEmbeddingV2(BaseInjectedModule, LlamaRotaryEmbedding):
    method __init__ (line 116) | def __init__(
    method load (line 141) | def load(self):
  class YarnRotaryEmbedding (line 152) | class YarnRotaryEmbedding(BaseInjectedModule, DeepseekV2YarnRotaryEmbedd...
    method __init__ (line 153) | def __init__(
    method load (line 182) | def load(self):
  class YarnRotaryEmbeddingV3 (line 222) | class YarnRotaryEmbeddingV3(BaseInjectedModule):
    method __init__ (line 223) | def __init__(
    method load (line 240) | def load(self):
    method forward (line 262) | def forward(self, x, position_ids):
    method _init (line 277) | def _init(
  class DynamicNTKScalingRotaryEmbedding (line 328) | class DynamicNTKScalingRotaryEmbedding(
    method __init__ (line 331) | def __init__(
    method load (line 354) | def load(self):
  class RotaryEmbeddingV4 (line 367) | class RotaryEmbeddingV4(BaseInjectedModule):
    method __init__ (line 368) | def __init__(
    method forward (line 386) | def forward(self, x, position_ids):
    method load (line 401) | def load(self):
    method _init (line 408) | def _init(self, dim, max_position_embeddings, base, device, scaling_fa...
  class KQwen3MoeRotaryEmbedding (line 418) | class KQwen3MoeRotaryEmbedding(BaseInjectedModule, DeepseekV2RotaryEmbed...
    method __init__ (line 419) | def __init__(
    method load (line 439) | def load(self):
  class KSmallthinkerRotaryEmbedding (line 445) | class KSmallthinkerRotaryEmbedding(BaseInjectedModule, SmallthinkerRotar...
    method __init__ (line 446) | def __init__(
    method load (line 466) | def load(self):
    method forward (line 473) | def forward(self, x, position_ids):
  class KGlm4MoeRotaryEmbedding (line 486) | class KGlm4MoeRotaryEmbedding(BaseInjectedModule, Glm4MoeRotaryEmbedding):
    method __init__ (line 487) | def __init__(
    method load (line 507) | def load(self):
    method forward (line 514) | def forward(self, x, position_ids):

FILE: archive/ktransformers/operators/ascend/ascend_attention.py
  function apply_rotary_pos_emb_fusion (line 39) | def apply_rotary_pos_emb_fusion(q, k, cos, sin, unsqueeze_dim=1):
  class MatMulOps (line 51) | class MatMulOps(object):
    method execute (line 52) | def execute(self, x_input):
  class DynamicQuantOps (line 64) | class DynamicQuantOps(object):
    method execute (line 69) | def execute(self, x_input):
  class KDeepseekV2AttentionW8A8A2 (line 74) | class KDeepseekV2AttentionW8A8A2(BaseInjectedModule, DeepseekV2Attention):
    class PageKVWrapper (line 78) | class PageKVWrapper(object):
      method __init__ (line 83) | def __init__(self, past_key_value: StaticCache):
      method update (line 91) | def update(self, compressed_kv, k_pe, layer_idx, cache_kwargs):
      method get_usable_length (line 94) | def get_usable_length(self, kv_seq_len, layer_idx):
      method get_seq_length (line 97) | def get_seq_length(self, layer_idx):
      method get_block_table (line 100) | def get_block_table(self, layer_idx):
    method init_page_kv_wrapper (line 103) | def init_page_kv_wrapper(self, past_key_value: StaticCache):
    method __init__ (line 106) | def __init__(self,
    method forward_chunck (line 140) | def forward_chunck(
    method forward_paged (line 329) | def forward_paged(
    method forward_windows (line 416) | def forward_windows(
    method forward (line 486) | def forward(
  class KDeepseekV2AttentionW8A8A2Serve (line 512) | class KDeepseekV2AttentionW8A8A2Serve(BaseInjectedModule, DeepseekV2Atte...
    method __init__ (line 516) | def __init__(self,
    method print_callback (line 541) | def print_callback(self, param):
    method forward (line 554) | def forward(
    method forward_paged (line 757) | def forward_paged(
  function rotate_half (line 851) | def rotate_half(x):
  function apply_rotary_pos_emb (line 856) | def apply_rotary_pos_emb(q, k, cos, sin, unsqueeze_dim=1):
  class KQwen3MoeAttentionW8A8A2Serve (line 864) | class KQwen3MoeAttentionW8A8A2Serve(BaseInjectedModule, Qwen3MoeAttention):
    method __init__ (line 868) | def __init__(self,
    method _linear_w8a8a2 (line 903) | def _linear_w8a8a2(self, x: torch.Tensor, proj: nn.Module, name: str) ...
    method forward (line 923) | def forward(self,
    method _forward_prefill (line 997) | def _forward_prefill(
    method forward_paged (line 1155) | def forward_paged(

FILE: archive/ktransformers/operators/ascend/ascend_experts.py
  class KExpertsCPUW8A8 (line 38) | class KExpertsCPUW8A8(KExpertsCPU):
    method forward (line 40) | def forward(self, input_tensor, expert_ids, weights, bsz_tensor=None, ...
  class KTransformersExpertsW8A8 (line 70) | class KTransformersExpertsW8A8(KTransformersExperts):
    method forward (line 71) | def forward(self, input_tensor, expert_ids, weights, cuda_graph_idx=No...
  class KDeepseekV3MoEW8A8 (line 82) | class KDeepseekV3MoEW8A8(KDeepseekV3MoE):
    method forward (line 83) | def forward(self, hidden_states, stream=None, para_stream=None):
    method cpu_moe_kexperts (line 176) | def cpu_moe_kexperts(self, moe_kexperts_param) -> torch.Tensor:
    method moe_kexperts (line 181) | def moe_kexperts(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_w...
  class KQwen3MoeSparseMoeBlockW8A8 (line 185) | class KQwen3MoeSparseMoeBlockW8A8(BaseInjectedModule):
    method __init__ (line 186) | def __init__(
    method set_inference_mode (line 226) | def set_inference_mode(self, mode: InferenceState):
    method cpu_moe_kexperts (line 231) | def cpu_moe_kexperts(self, moe_kexperts_param):
    method moe_kexperts (line 242) | def moe_kexperts(
    method forward (line 260) | def forward(

FILE: archive/ktransformers/operators/ascend/ascend_gate.py
  class KDeepseekV3GateA2 (line 8) | class KDeepseekV3GateA2(KMoEGate):
    method load (line 9) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method forward (line 25) | def forward(self, hidden_states) -> torch.Tensor:

FILE: archive/ktransformers/operators/ascend/ascend_layernorm.py
  class KDeepseekV3RMSNormW8A8 (line 32) | class KDeepseekV3RMSNormW8A8(BaseInjectedModule):
    method __init__ (line 33) | def __init__(self,
    method forward (line 46) | def forward(self, hidden_states):
    method load (line 51) | def load(self):
    method unload (line 55) | def unload(self):
  class KQwen3MoeRMSNormW8A8 (line 61) | class KQwen3MoeRMSNormW8A8(BaseInjectedModule):
    method __init__ (line 62) | def __init__(self,
    method forward (line 78) | def forward(self, x: torch.Tensor):
    method load (line 91) | def load(self):
    method unload (line 104) | def unload(self):
  class KQwen3FinalRMSNormNPU (line 108) | class KQwen3FinalRMSNormNPU(nn.Module):
    method __init__ (line 109) | def __init__(self, orig_module: nn.Module):
    method forward (line 123) | def forward(self, x: torch.Tensor):

FILE: archive/ktransformers/operators/ascend/ascend_linear.py
  class KLinearW8A8 (line 34) | class KLinearW8A8(KLinearBase):
    method __init__ (line 35) | def __init__(
    method load_weight (line 46) | def load_weight(self, override_key: str | None = None, device: str | N...
    method load (line 102) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method unload (line 106) | def unload(self):
  class KLinearTorchW8A8A2 (line 110) | class KLinearTorchW8A8A2(KLinearW8A8):
    method __init__ (line 111) | def __init__(
    method forward (line 131) | def forward(self, x: torch.Tensor, bsz_tensor) -> torch.Tensor:
    method load (line 136) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method unload (line 184) | def unload(self):
  class KTransformersLinearW8A8A2 (line 200) | class KTransformersLinearW8A8A2(BaseInjectedModule, KLinearW8A8):
    method __init__ (line 201) | def __init__(
    method forward (line 229) | def forward(self, x, bsz_tensor=None):
    method load (line 238) | def load(self, w: dict | nn.Parameter | tuple | None = None, mode: Inf...
    method unload (line 272) | def unload(self):
    method set_inference_mode (line 279) | def set_inference_mode(self, mode: InferenceState):

FILE: archive/ktransformers/operators/ascend/ascend_mlp.py
  class KDeepseekV3MLPW8A8A2V1 (line 26) | class KDeepseekV3MLPW8A8A2V1(BaseInjectedModule, DeepseekV3MLP):
    method forward (line 28) | def forward(self, x, is_prefill=None, use_cuda_graph=False):
  class KDeepseekV3MLPW8A8A2V2 (line 63) | class KDeepseekV3MLPW8A8A2V2(BaseInjectedModule, DeepseekV3MLP):
    method forward (line 65) | def forward(self, x, is_prefill=None, use_cuda_graph=False):
  class KQwen3MoeMLPW8A8A2 (line 92) | class KQwen3MoeMLPW8A8A2(BaseInjectedModule, Qwen3MoeMLP):
    method forward (line 94) | def forward(self, x, is_prefill=None, use_cuda_graph=False):

FILE: archive/ktransformers/operators/attention.py
  function rotate_half (line 41) | def rotate_half(x):
  class KDeepseekV2Attention (line 48) | class KDeepseekV2Attention(BaseInjectedModule, DeepseekV2Attention):
    method __init__ (line 52) | def __init__(self,
    method get_absorbed (line 69) | def get_absorbed(self) -> Tuple[torch.Tensor, torch.Tensor]:
    method forward_chunck (line 77) | def forward_chunck(
    method forward_linux_triton (line 196) | def forward_linux_triton(
    method forward_linux_flashinfer (line 349) | def forward_linux_flashinfer(
    method forward_windows (line 525) | def forward_windows(
    method forward_xpu (line 591) | def forward_xpu(
    method forward (line 685) | def forward(
  class KLlamaAttention (line 746) | class KLlamaAttention(BaseInjectedModule):
    method __init__ (line 749) | def __init__(self,
    method apply_rotary_pos_emb (line 760) | def apply_rotary_pos_emb(self, q, k, cos, sin, position_ids=None, unsq...
    method forward (line 785) | def forward(
  class KQwen3MoeAttentionIPEXLLM (line 876) | class KQwen3MoeAttentionIPEXLLM(BaseInjectedModule, Qwen3MoeAttention):
    method __init__ (line 877) | def __init__(self,
    method forward (line 893) | def forward(

FILE: archive/ktransformers/operators/balance_serve_attention.py
  function rotate_half (line 26) | def rotate_half(x):
  class flashinfer_attn (line 32) | class flashinfer_attn(BaseInjectedModule, DeepseekV2Attention):
    method __init__ (line 33) | def __init__(self,
    method get_absorbed (line 48) | def get_absorbed(self) -> Tuple[torch.Tensor, torch.Tensor]:
    method forward (line 65) | def forward(self,
  class KQwen2MoeAttention (line 120) | class KQwen2MoeAttention(BaseInjectedModule, Qwen2MoeAttention):
    method __init__ (line 121) | def __init__(self,
    method apply_rotary_pos_emb (line 137) | def apply_rotary_pos_emb(self, q, k, cos, sin, position_ids=None, unsq...
    method forward (line 164) | def forward(self,
  class KQwen3MoeAttention (line 206) | class KQwen3MoeAttention(BaseInjectedModule, Qwen3MoeAttention):
    method __init__ (line 207) | def __init__(self,
    method apply_rotary_pos_emb (line 223) | def apply_rotary_pos_emb(self, q, k, cos, sin, position_ids=None, unsq...
    method forward (line 250) | def forward(self,
  class deepseek_torch_attn (line 296) | class deepseek_torch_attn(BaseInjectedModule, DeepseekV2Attention):
    method __init__ (line 297) | def __init__(self,
    method get_absorbed (line 312) | def get_absorbed(self) -> Tuple[torch.Tensor, torch.Tensor]:
    method forward (line 330) | def forward(self,
  class KSmallthinkerAttention (line 462) | class KSmallthinkerAttention(BaseInjectedModule, SmallthinkerAttention):
    method __init__ (line 463) | def __init__(self,
    method apply_rotary_pos_emb (line 477) | def apply_rotary_pos_emb(self, q, k, cos, sin, position_ids=None, unsq...
    method forward (line 503) | def forward(self,
  class KGlm4MoeAttention (line 555) | class KGlm4MoeAttention(BaseInjectedModule, Glm4MoeAttention):
    method __init__ (line 556) | def __init__(self,
    method apply_rotary_pos_emb (line 570) | def apply_rotary_pos_emb(
    method forward (line 598) | def forward(self,
  class KQwen3NextAttention (line 654) | class KQwen3NextAttention(BaseInjectedModule, Qwen3NextAttention):
    method __init__ (line 655) | def __init__(self,
    method apply_rotary_pos_emb (line 670) | def apply_rotary_pos_emb(self, q, k, cos, sin, position_ids=None, unsq...
    method forward (line 709) | def forward(self,
  class KQwen3NextGatedDeltaNet (line 763) | class KQwen3NextGatedDeltaNet(BaseInjectedModule, Qwen3NextGatedDeltaNet):
    method __init__ (line 764) | def __init__(self,
    method fix_query_key_value_ordering (line 778) | def fix_query_key_value_ordering(self, mixed_qkvz, mixed_ba):
    method forward (line 807) | def forward(

FILE: archive/ktransformers/operators/base_operator.py
  class BaseInjectedModule (line 12) | class BaseInjectedModule(nn.Module):
    method __init__ (line 14) | def __init__(self,
    method __getattr__ (line 31) | def __getattr__(self, name: str) -> Any:
    method __setattr__ (line 51) | def __setattr__(self, name: str, value: Tensor | nn.Module) -> None:
    method forward (line 58) | def forward(self, *args, **kwargs):
    method load (line 61) | def load(self):

FILE: archive/ktransformers/operators/cpuinfer.py
  class CPUInferKVCache (line 29) | class CPUInferKVCache:
    method __init__ (line 30) | def __init__(
    method load_kvcache (line 100) | def load_kvcache(self, tensor_file_path: str):
    method dump_kvcache (line 105) | def dump_kvcache(
    method update_cache_total_len (line 135) | def update_cache_total_len(self, cache_total_len: int):
    method attn (line 143) | def attn(
    method update_kvcache_one_block_fp16 (line 256) | def update_kvcache_one_block_fp16(
    method get_kvcache_one_block_fp16 (line 292) | def get_kvcache_one_block_fp16(
    method update_importance_one_block (line 328) | def update_importance_one_block(
    method get_importance_one_block (line 354) | def get_importance_one_block(
    method get_anchor_one_block (line 380) | def get_anchor_one_block(self, anchor: torch.Tensor, layer_id: int, bl...
    method update_anchor_one_block (line 406) | def update_anchor_one_block(
    method calc_anchor_all_layers (line 434) | def calc_anchor_all_layers(
    method clear_importance_all_layers (line 473) | def clear_importance_all_layers(
    method get_cache_total_len (line 512) | def get_cache_total_len(self):
    method update_kvcache_q4 (line 515) | def update_kvcache_q4(
    method update_kvcache_fp16 (line 528) | def update_kvcache_fp16(
    method get_kvcache_q4 (line 550) | def get_kvcache_q4(
    method get_kvcache_fp16 (line 563) | def get_kvcache_fp16(
    method get_and_update_kvcache_fp16 (line 584) | def get_and_update_kvcache_fp16(
    method update_importance (line 606) | def update_importance(
    method get_attn_sparsity (line 627) | def get_attn_sparsity(
    method attn_with_kvcache (line 665) | def attn_with_kvcache(
    method get_all_kvcache_one_layer (line 704) | def get_all_kvcache_one_layer(
    method get_importance (line 713) | def get_importance(
    method get_anchor (line 720) | def get_anchor(
  class CPUInfer (line 728) | class CPUInfer:
    method __init__ (line 732) | def __init__(self, thread_num):
    method submit (line 738) | def submit(self, task):
    method submit_with_cuda_stream (line 741) | def submit_with_cuda_stream(self, current_cuda_stream, task):
    method sync (line 744) | def sync(self):
    method sync_with_cuda_stream (line 747) | def sync_with_cuda_stream(self, current_cuda_stream):

FILE: archive/ktransformers/operators/dynamic_attention.py
  class DynamicScaledDotProductAttention (line 30) | class DynamicScaledDotProductAttention:
    method __init__ (line 34) | def __init__(
    method get_attn_score_one_block (line 233) | def get_attn_score_one_block(
    method get_preselect_block_table_and_attn_score (line 271) | def get_preselect_block_table_and_attn_score(
    method get_attn_score (line 374) | def get_attn_score(
    method swap_in_and_swap_out (line 467) | def swap_in_and_swap_out(self, layer_idx, past_len, q_len, key, value):
    method calc_anchor (line 518) | def calc_anchor(self, cache_seqlens: int):
    method clear_importance (line 533) | def clear_importance(self, cache_seqlens: int):
    method clear_kvcache (line 549) | def clear_kvcache(self, cache_seqlens: int):
    method get_attn_sparsity (line 564) | def get_attn_sparsity(
    method apply (line 605) | def apply(
    method save (line 762) | def save(self, path: str, length: int):
    method load (line 775) | def load(self, path: str, length: int):

FILE: archive/ktransformers/operators/experts.py
  function deduplicate_and_sort (line 48) | def deduplicate_and_sort(lst):
  function generate_cuda_graphs (line 50) | def generate_cuda_graphs(chunk_size: int) -> list:
  class KExpertsBase (line 68) | class KExpertsBase(ABC):
    method __init__ (line 69) | def __init__(self, key: str, gguf_loader: GGUFLoader, config: Pretrain...
    method forward (line 77) | def forward(self, input_tensor, expert_ids, weights):
    method load (line 81) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method unload (line 85) | def unload():
    method load_weights (line 88) | def load_weights(self, override_key: str | None = None, device: str = ...
    method load_multi (line 136) | def load_multi(self, key: str, keys: list[str], device: str = "cpu"):
  class KExpertsCPU (line 143) | class KExpertsCPU(KExpertsBase):
    method __init__ (line 152) | def __init__(
    method load (line 169) | def load(self, w: dict | nn.Parameter | tuple | None = None, device:st...
    method submit_for_one_decode (line 293) | def submit_for_one_decode(self, input_tensor, expert_ids, weights, bsz...
    method sync_for_one_decode (line 310) | def sync_for_one_decode(self, cuda_graph_idx=0):
    method forward (line 320) | def forward(self, input_tensor, expert_ids, weights, bsz_tensor=None, ...
    method unload (line 364) | def unload(self):
    method load_weights (line 367) | def load_weights(self, override_key: str | None = None, device: str = ...
  class KExpertsMarlin (line 437) | class KExpertsMarlin(KExpertsBase):
    method __init__ (line 440) | def __init__(
    method load (line 466) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method unload (line 499) | def unload(self):
    method load_weights (line 506) | def load_weights(self, override_key: str | None = None):
    method forward (line 525) | def forward(self, hidden_states_cpu: torch.Tensor, selected_experts_cp...
  class KExpertsTorch (line 562) | class KExpertsTorch(KExpertsBase):
    method __init__ (line 568) | def __init__(
    method load (line 589) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method unload (line 617) | def unload(self):
    method load_weights (line 623) | def load_weights(self, override_key: str | None = None):
    method forward (line 642) | def forward(self, hidden_states_cpu: torch.Tensor, selected_experts_cp...
  class KTransformersExperts (line 686) | class KTransformersExperts(BaseInjectedModule, KExpertsBase):
    method __init__ (line 687) | def __init__(self,
    method load (line 712) | def load(self, w: dict = None,  mode: InferenceState = None, warmup: b...
    method unload (line 732) | def unload(self):
    method forward (line 739) | def forward(self, input_tensor, expert_ids, weights):
    method set_inference_mode (line 749) | def set_inference_mode(self, mode: InferenceState):
  class KQwen2MoeSparseMoeBlock (line 770) | class KQwen2MoeSparseMoeBlock(BaseInjectedModule, Qwen2MoeSparseMoeBlock):
    method forward (line 771) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
    method moe_kexperts (line 825) | def moe_kexperts(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_w...
    method moe_infer_simple (line 831) | def moe_infer_simple(self, hidden_states_cpu: torch.Tensor, selected_e...
    method moe_infer (line 845) | def moe_infer(self, hidden_states_cpu: torch.Tensor, selected_experts_...
  class KDeepseekV2MoE (line 874) | class KDeepseekV2MoE(BaseInjectedModule, DeepseekV2MoE):
    method forward (line 875) | def forward(self, hidden_states):
    method moe_kexperts (line 915) | def moe_kexperts(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_w...
    method moe_infer_simple (line 921) | def moe_infer_simple(
    method moe_infer (line 939) | def moe_infer(self, x, topk_ids, topk_weight):
  class KDeepseekV3MoE (line 972) | class KDeepseekV3MoE(BaseInjectedModule, DeepseekV3MoE):
    method forward (line 974) | def forward(self, hidden_states):
    method moe_kexperts (line 1017) | def moe_kexperts(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_w...
    method moe_infer_simple (line 1023) | def moe_infer_simple(
    method moe_infer (line 1041) | def moe_infer(self, x, topk_ids, topk_weight):
  class KMistralSparseMoEBlock (line 1074) | class KMistralSparseMoEBlock(BaseInjectedModule, MixtralSparseMoeBlock):
    method forward (line 1076) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
    method moe_kexperts (line 1123) | def moe_kexperts(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_w...
    method moe_infer_simple (line 1129) | def moe_infer_simple(self, hidden_states_cpu: torch.Tensor, selected_e...
    method moe_infer (line 1143) | def moe_infer(self, hidden_states_cpu: torch.Tensor, selected_experts_...
  class KDeepseekV3MoEV2 (line 1172) | class KDeepseekV3MoEV2(BaseInjectedModule, DeepseekV3MoE):
    method forward (line 1173) | def forward(self, hidden_states, bsz_tensor, cuda_graph_idx=0):
    method moe_on_cpuinfer (line 1215) | def moe_on_cpuinfer(self, x: torch.Tensor, topk_ids: torch.Tensor, top...
    method moe_infer_simple (line 1222) | def moe_infer_simple(
    method moe_infer (line 1240) | def moe_infer(self, x, topk_ids, topk_weight):
  class KTransformersExpertsV2 (line 1273) | class KTransformersExpertsV2(BaseInjectedModule, KExpertsBase):
    method __init__ (line 1274) | def __init__(self,
    method load (line 1305) | def load(self, w: dict = None,  mode: InferenceState = None, warmup: b...
    method unload (line 1325) | def unload(self):
    method forward (line 1332) | def forward(self, input_tensor, expert_ids, weights, bsz_tensor, cuda_...
    method set_inference_mode (line 1342) | def set_inference_mode(self, mode: InferenceState):
  class KSmallthinkerExperts (line 1353) | class KSmallthinkerExperts(BaseInjectedModule, KExpertsBase):
    method __init__ (line 1354) | def __init__(self,
    method load (line 1378) | def load(self, w: dict = None,  mode: InferenceState = None, warmup: b...
    method unload (line 1398) | def unload(self):
    method forward (line 1405) | def forward(self, input_tensor, expert_ids, weights, bsz_tensor, cuda_...
    method set_inference_mode (line 1415) | def set_inference_mode(self, mode: InferenceState):
  class KGlm4Experts (line 1425) | class KGlm4Experts(BaseInjectedModule, KExpertsBase):
    method __init__ (line 1426) | def __init__(self,
    method load (line 1450) | def load(self, w: dict = None,  mode: InferenceState = None, warmup: b...
    method unload (line 1470) | def unload(self):
    method forward (line 1477) | def forward(self, input_tensor, expert_ids, weights, bsz_tensor, cuda_...
    method set_inference_mode (line 1487) | def set_inference_mode(self, mode: InferenceState):
  class KQwen2MoeSparseMoeBlockV2 (line 1498) | class KQwen2MoeSparseMoeBlockV2(BaseInjectedModule, Qwen2MoeSparseMoeBlo...
    method forward (line 1499) | def forward(self, hidden_states, bsz_tensor, cuda_graph_idx=0):
    method moe_on_cpuinfer (line 1553) | def moe_on_cpuinfer(self, x: torch.Tensor, topk_ids: torch.Tensor, top...
    method moe_infer_simple (line 1560) | def moe_infer_simple(
    method moe_infer (line 1578) | def moe_infer(self, x, topk_ids, topk_weight):
  class KQwen3MoeSparseMoeBlockV2 (line 1611) | class KQwen3MoeSparseMoeBlockV2(BaseInjectedModule, Qwen3MoeSparseMoeBlo...
    method forward (line 1612) | def forward(self, hidden_states, bsz_tensor=None, cuda_graph_idx=0):
    method moe_on_cpuinfer (line 1675) | def moe_on_cpuinfer(self, x: torch.Tensor, topk_ids: torch.Tensor, top...
    method moe_infer_simple (line 1682) | def moe_infer_simple(
    method moe_infer (line 1700) | def moe_infer(self, x, topk_ids, topk_weight):
  class KSmallthinkerMoeBlock (line 1734) | class KSmallthinkerMoeBlock(BaseInjectedModule, SmallthinkerMoeBlock):
    method forward (line 1735) | def forward(self, router_input: torch.Tensor, hidden_states: torch.Ten...
    method moe_on_cpuinfer (line 1809) | def moe_on_cpuinfer(self, x: torch.Tensor, topk_ids: torch.Tensor, top...
    method moe_infer_simple (line 1816) | def moe_infer_simple(
    method moe_infer (line 1834) | def moe_infer(self, x, topk_ids, topk_weight):
  class KGlm4MoeMoE (line 1868) | class KGlm4MoeMoE(BaseInjectedModule, Glm4MoeMoE):
    method forward (line 1869) | def forward(self, hidden_states, bsz_tensor=None, cuda_graph_idx=0):
    method moe_on_cpuinfer (line 1915) | def moe_on_cpuinfer(self, x: torch.Tensor, topk_ids: torch.Tensor, top...
    method moe_infer_simple (line 1922) | def moe_infer_simple(
    method moe_infer (line 1940) | def moe_infer(self, x, topk_ids, topk_weight):
  class KQwen3NextSparseMoeBlockV2 (line 1974) | class KQwen3NextSparseMoeBlockV2(BaseInjectedModule, Qwen3NextSparseMoeB...
    method forward (line 1975) | def forward(self, hidden_states, bsz_tensor=None, cuda_graph_idx=0):
    method moe_on_cpuinfer (line 2041) | def moe_on_cpuinfer(self, x: torch.Tensor, topk_ids: torch.Tensor, top...
    method moe_infer_simple (line 2048) | def moe_infer_simple(
    method moe_infer (line 2066) | def moe_infer(self, x, topk_ids, topk_weight):

FILE: archive/ktransformers/operators/flashinfer_batch_prefill_wrapper.py
  function setup_seed (line 13) | def setup_seed(seed):
  class flashInferAttn (line 34) | class flashInferAttn():
    method __init__ (line 37) | def __init__(self,
    method plan (line 72) | def plan(self,
    method calc_batch_indices (line 106) | def calc_batch_indices(self, ragged_size = None):
    method forward (line 114) | def forward(self, q, k_cache, v_cache, k, v):
  function testCudaGraph (line 123) | def testCudaGraph():
  function testAttentionFlashInfer (line 267) | def testAttentionFlashInfer(

FILE: archive/ktransformers/operators/flashinfer_wrapper.py
  function attention_ref_torch (line 30) | def attention_ref_torch(
  class MLAWrapper (line 78) | class MLAWrapper():
    method __init__ (line 79) | def __init__(self,
    method plan (line 117) | def plan(self,
    method run (line 160) | def run(self, q_nope, q_pe, ckv, k_pe, return_lse = False):
  class MLAWrapperSingleton (line 163) | class MLAWrapperSingleton():
    method get_instance (line 167) | def get_instance(cls, device, *args, **kwargs)->MLAWrapper:
    method make_instance (line 173) | def make_instance(cls, device, *args, **kwargs):
    method plan_all (line 177) | def plan_all(cls, qo_indptr,
    method need_plan_all (line 206) | def need_plan_all(cls):
    method reset_buffer (line 211) | def reset_buffer(cls):
    method update_buffer (line 216) | def update_buffer(cls, max_pages):
  function checksame (line 222) | def checksame():

FILE: archive/ktransformers/operators/gate.py
  class KMoEGateBase (line 15) | class KMoEGateBase(ABC):
    method __init__ (line 16) | def __init__(self,
    method forward (line 32) | def forward(self, input_tensor, expert_ids, weights):
    method load (line 36) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method unload (line 40) | def unload():
    method load_weights (line 43) | def load_weights(self, override_key: str | None = None, device: str = ...
    method load_multi (line 84) | def load_multi(self, key: str, keys: list[str], device: str = "cpu"):
  class KMoEGate (line 91) | class KMoEGate(BaseInjectedModule, KMoEGateBase):
    method __init__ (line 92) | def __init__(
    method forward (line 107) | def forward(self, hidden_states) -> torch.Tensor:
    method load (line 110) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method unload (line 122) | def unload(self):
  class KMoEGateQwen2Moe (line 129) | class KMoEGateQwen2Moe(BaseInjectedModule, KMoEGateBase):
    method __init__ (line 130) | def __init__(
    method forward (line 159) | def forward(self, hidden_states) -> torch.Tensor:
    method load (line 177) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method unload (line 191) | def unload(self):
  class KMoEGateIPEXLLM (line 198) | class KMoEGateIPEXLLM(KMoEGate):
    method __init__ (line 199) | def __init__(
    method forward (line 214) | def forward(self, hidden_states) -> torch.Tensor:

FILE: archive/ktransformers/operators/layernorm.py
  class RMSNorm (line 46) | class RMSNorm(DeepseekV3RMSNorm, BaseInjectedModule):
    method __init__ (line 47) | def __init__(self,
    method forward (line 59) | def forward(
    method forward_native (line 77) | def forward_native(
  class KQwen2MoeRMSNorm (line 87) | class KQwen2MoeRMSNorm(Qwen2MoeRMSNorm, BaseInjectedModule):
    method __init__ (line 88) | def __init__(self,
    method forward (line 100) | def forward(
    method forward_native (line 118) | def forward_native(
  class KQwen3MoeRMSNorm (line 128) | class KQwen3MoeRMSNorm(Qwen3MoeRMSNorm, BaseInjectedModule):
    method __init__ (line 129) | def __init__(self,
    method forward (line 141) | def forward(
    method forward_native (line 162) | def forward_native(
  class KQwen3NextRMSNorm (line 171) | class KQwen3NextRMSNorm(Qwen3NextRMSNorm, BaseInjectedModule):
    method __init__ (line 172) | def __init__(self,
    method _norm (line 184) | def _norm(self, x):
    method forward (line 187) | def forward(self, x, num_tokens_tensors, residual = None):
    method extra_repr (line 201) | def extra_repr(self):
  class KSmallthinkerRMSNorm (line 205) | class KSmallthinkerRMSNorm(SmallthinkerRMSNorm, BaseInjectedModule):
    method __init__ (line 206) | def __init__(self,
    method forward (line 218) | def forward(
    method forward_native (line 239) | def forward_native(
  class KGlm4MoeRMSNorm (line 248) | class KGlm4MoeRMSNorm(Glm4MoeRMSNorm, BaseInjectedModule):
    method __init__ (line 249) | def __init__(self,
    method forward (line 261) | def forward(
    method forward_native (line 282) | def forward_native(
  class DeepseekV3RMSNormTorch (line 293) | class DeepseekV3RMSNormTorch(DeepseekV3RMSNorm, BaseInjectedModule):
    method __init__ (line 294) | def __init__(self,
    method forward (line 306) | def forward(
  class KDeepseekRMSNormIPEXLLM (line 325) | class KDeepseekRMSNormIPEXLLM(DeepseekV3RMSNorm, BaseInjectedModule):
    method __init__ (line 326) | def __init__(self,
    method forward (line 339) | def forward(self, x: torch.Tensor) -> torch.Tensor:
    method load (line 347) | def load(self):

FILE: archive/ktransformers/operators/linear.py
  class KLinearBase (line 57) | class KLinearBase(ABC):
    method __init__ (line 58) | def __init__(
    method forward (line 89) | def forward(self, x: torch.Tensor) -> torch.Tensor:
    method load_weight (line 92) | def load_weight(self, override_key: str | None = None, device: str | N...
    method load_multi (line 143) | def load_multi(self, key: str, keys: list[str], device: str = "cpu"):
    method load (line 150) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method unload (line 154) | def unload(self):
  class KLinearTorch (line 158) | class KLinearTorch(KLinearBase):
    method __init__ (line 159) | def __init__(
    method forward (line 174) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor=None, **kw...
    method load (line 185) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method unload (line 212) | def unload(self):
  class KLinearQ8 (line 218) | class KLinearQ8(KLinearBase):
    method __init__ (line 219) | def __init__(
    method forward (line 237) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor=None) -> t...
    method _dequantize_weight (line 254) | def _dequantize_weight(self, q_matrix, scales, bits=8):
    method _quantize_weight (line 290) | def _quantize_weight(self, matrix, bits=8):
    method load (line 345) | def load(self, w: Union[Dict, nn.Parameter, Tuple, None] = None, devic...
    method unload (line 376) | def unload(self):
  class KLinearFP8 (line 388) | class KLinearFP8(KLinearBase):
    method __init__ (line 394) | def __init__(
    method forward (line 409) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor) -> torch....
    method load (line 416) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method unload (line 431) | def unload(self):
  class VLinearMarlin (line 439) | class VLinearMarlin(KLinearBase):
    method __init__ (line 445) | def __init__(
    method load (line 477) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method forward (line 525) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor = None) ->...
    method unload (line 564) | def unload(self):
    method _pad_input (line 574) | def _pad_input(self, x):
  class KLinearMarlin (line 595) | class KLinearMarlin(KLinearBase):
    method __init__ (line 601) | def __init__(
    method load (line 633) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method forward (line 679) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor=None, **kw...
    method unload (line 713) | def unload(self):
  class KLinearCPUInfer (line 723) | class KLinearCPUInfer(KLinearBase):
    method __init__ (line 725) | def __init__(
    method forward (line 748) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor = None) ->...
    method load (line 787) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method load_weights (line 808) | def load_weights(self, w: dict | nn.Parameter | tuple | None = None, d...
    method unload (line 821) | def unload(self):
  class KLinearIPEXLLM (line 827) | class KLinearIPEXLLM(KLinearBase):
    method __init__ (line 828) | def __init__(
    method forward (line 846) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor = None) ->...
    method load (line 857) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method unload (line 890) | def unload(self):
  class KTransformersLinear (line 906) | class KTransformersLinear(BaseInjectedModule, KLinearBase):
    method __init__ (line 907) | def __init__(
    method forward (line 935) | def forward(self, x, bsz_tensor=None):
    method load (line 944) | def load(self, w: dict | nn.Parameter | tuple | None = None, mode: Inf...
    method unload (line 966) | def unload(self):
    method set_inference_mode (line 973) | def set_inference_mode(self, mode: InferenceState):

FILE: archive/ktransformers/operators/mlp.py
  class kDeepseekV3MLP (line 10) | class kDeepseekV3MLP(DeepseekV3MLP, BaseInjectedModule):
    method __init__ (line 11) | def __init__(self,
    method forward (line 22) | def forward(self, x, bsz_tensor):
  class KQwen2MoeMLP (line 25) | class KQwen2MoeMLP(Qwen2MoeMLP, BaseInjectedModule):
    method __init__ (line 26) | def __init__(self,
    method forward (line 37) | def forward(self, x, bsz_tensor):
  class KSmallthinkerDenseMlpBlock (line 42) | class KSmallthinkerDenseMlpBlock(SmallthinkerDenseMlpBlock, BaseInjected...
    method __init__ (line 43) | def __init__(self,
    method forward (line 53) | def forward(self, x, bsz_tensor):
  class KGlm4MoeMLP (line 57) | class KGlm4MoeMLP(Glm4MoeMLP, BaseInjectedModule):
    method __init__ (line 58) | def __init__(self,
    method forward (line 68) | def forward(self, x, bsz_tensor):

FILE: archive/ktransformers/operators/models.py
  class KQwen2MoeModel (line 185) | class KQwen2MoeModel(BaseInjectedModule):
    method __init__ (line 193) | def __init__(
    method forward (line 212) | def forward(
    method load_layer_to (line 443) | def load_layer_to(self, layer: Qwen2MoeDecoderLayer, target: Inference...
  class KDeepseekV2Model (line 547) | class KDeepseekV2Model(BaseInjectedModule):
    method __init__ (line 555) | def __init__(
    method forward (line 574) | def forward(
    method load_layer_to (line 843) | def load_layer_to(self, layer: DeepseekV2DecoderLayer, target: Inferen...
  class LlamaPreTrainedModel (line 969) | class LlamaPreTrainedModel(PreTrainedModel):
    method _init_weights (line 981) | def _init_weights(self, module):
  class KLlamaModel (line 993) | class KLlamaModel(BaseInjectedModule):
    method __init__ (line 1003) | def __init__(
    method get_input_embeddings (line 1050) | def get_input_embeddings(self):
    method set_input_embeddings (line 1053) | def set_input_embeddings(self, value):
    method forward (line 1057) | def forward(
    method forward_chunk (line 1194) | def forward_chunk(
    method _update_causal_mask (line 1295) | def _update_causal_mask(

FILE: archive/ktransformers/operators/triton_attention.py
  function tanh (line 11) | def tanh(x):
  function _fwd_grouped_kernel_stage1 (line 16) | def _fwd_grouped_kernel_stage1(
  function _decode_grouped_att_m_fwd (line 165) | def _decode_grouped_att_m_fwd(
  function _fwd_kernel_stage2 (line 258) | def _fwd_kernel_stage2(
  function _decode_softmax_reducev_fwd (line 313) | def _decode_softmax_reducev_fwd(
  function decode_attention_fwd_grouped (line 358) | def decode_attention_fwd_grouped(

FILE: archive/ktransformers/operators/triton_attention_prefill.py
  function _fwd_kernel (line 24) | def _fwd_kernel(
  function context_attention_fwd (line 159) | def context_attention_fwd(

FILE: archive/ktransformers/optimize/optimize.py
  function inject (line 28) | def inject(module, local_optimization_dict, model_config:AutoConfig ,ggu...
  function del_meta (line 56) | def del_meta(module:nn.Module):
  function gen_optimize_config (line 67) | def gen_optimize_config(module: nn.Module, out_data: Mapping, rule_list:...
  function translate_model_config (line 121) | def translate_model_config(model_config: PretrainedConfig):
  function optimize_and_load_gguf (line 129) | def optimize_and_load_gguf(module: nn.Module, rule_file: str, gguf_path:...

FILE: archive/ktransformers/server/api/ollama/completions.py
  class OllamaGenerateCompletionRequest (line 21) | class OllamaGenerateCompletionRequest(BaseModel):
  class OllamaGenerationStreamResponse (line 45) | class OllamaGenerationStreamResponse(BaseModel):
  class OllamaGenerationResponse (line 51) | class OllamaGenerationResponse(BaseModel):
  function generate (line 58) | async def generate(request: Request, input: OllamaGenerateCompletionRequ...
  class OllamaChatCompletionMessage (line 103) | class OllamaChatCompletionMessage(BaseModel):
  class OllamaChatCompletionRequest (line 107) | class OllamaChatCompletionRequest(BaseModel):
  class OllamaChatCompletionStreamResponse (line 113) | class OllamaChatCompletionStreamResponse(BaseModel):
  class OllamaChatCompletionResponse (line 126) | class OllamaChatCompletionResponse(BaseModel):
  function chat (line 140) | async def chat(request: Request, input: OllamaChatCompletionRequest):
  class OllamaModel (line 227) | class OllamaModel(BaseModel):
  function tags (line 235) | async def tags():
  class OllamaModelInfo (line 240) | class OllamaModelInfo(BaseModel):
  class OllamaShowRequest (line 244) | class OllamaShowRequest(BaseModel):
  class OllamaShowDetial (line 249) | class OllamaShowDetial(BaseModel):
  class OllamaShowResponse (line 257) | class OllamaShowResponse(BaseModel):
    class Config (line 264) | class Config:
  function show (line 268) | async def show(request: Request, input: OllamaShowRequest):

FILE: archive/ktransformers/server/api/openai/__init__.py
  function post_db_creation_operations (line 14) | def post_db_creation_operations():

FILE: archive/ktransformers/server/api/openai/assistants/assistants.py
  function create_assistant (line 19) | async def create_assistant(
  function list_assistants (line 26) | async def list_assistants(
  function list_assistants_with_status (line 38) | async def list_assistants_with_status(
  function retrieve_assistant (line 48) | async def retrieve_assistant(
  function modify_assistant (line 55) | async def modify_assistant(
  function delete_assistant (line 63) | async def delete_assistant(assistant_id: str):
  function get_related_thread (line 69) | async def get_related_thread(assistant_id: ObjectID):
  function create_default_assistant (line 74) | def create_default_assistant():
  function test_create_assistant (line 90) | def test_create_assistant():

FILE: archive/ktransformers/server/api/openai/assistants/messages.py
  function create_message (line 16) | async def create_message(thread_id: str, msg: MessageCreate):
  function list_messages (line 26) | async def list_messages(
  function retrieve_message (line 38) | async def retrieve_message(thread_id: ObjectID, message_id: ObjectID):
  function modify_message (line 43) | async def modify_message(thread_id: ObjectID, message_id: ObjectID, msg:...
  function delete_message (line 49) | async def delete_message(thread_id: ObjectID, message_id: ObjectID):

FILE: archive/ktransformers/server/api/openai/assistants/runs.py
  function create_run (line 20) | async def create_run(request: Request, thread_id: str, run_create: RunCr...
  function create_thread_and_run (line 40) | async def create_thread_and_run(run_thread: RunThreadCreate):
  function list_runs (line 45) | async def list_runs(
  function retrieve_run (line 56) | async def retrieve_run(
  function modify_run (line 67) | async def modify_run(
  function submit_tool_outputs_to_run (line 76) | async def submit_tool_outputs_to_run(thread_id: str, run_id: str, submit...
  function cancel_run (line 81) | async def cancel_run(thread_id: str, run_id: str):

FILE: archive/ktransformers/server/api/openai/assistants/threads.py
  function create_thread (line 14) | async def create_thread(thread: ThreadCreate):
  function list_threads (line 19) | async def list_threads(limit: Optional[int] = 20, order: Order = Order.D...
  function retrieve_thread (line 24) | async def retrieve_thread(thread_id: ObjectID):
  function modify_thread (line 29) | async def modify_thread(thread_id: ObjectID, thread: ThreadModify):
  function delete_thread (line 34) | async def delete_thread(thread_id: ObjectID):

FILE: archive/ktransformers/server/api/openai/endpoints/chat.py
  class Choice (line 22) | class Choice(BaseModel):
  class ChatCompletion (line 30) | class ChatCompletion(BaseModel):
  class ChatCompletionMessageToolCallFunction (line 41) | class ChatCompletionMessageToolCallFunction(BaseModel):
  class ChatCompletionMessageToolCall (line 45) | class ChatCompletionMessageToolCall(BaseModel):
  class ChatCompletionMessage (line 50) | class ChatCompletionMessage(BaseModel):
  function list_models (line 58) | async def list_models():
  function getTools (line 61) | def getTools(buffer):
  function get_tool_instructions (line 117) | def get_tool_instructions():
  function chat_completion (line 136) | async def chat_completion(request: Request, create: ChatCompletionCreate):

FILE: archive/ktransformers/server/api/openai/legacy/completions.py
  function create_completion (line 15) | async def create_completion(request:Request, create:CompletionCreate):

FILE: archive/ktransformers/server/api/web/system.py
  function system_info (line 8) | def system_info():

FILE: archive/ktransformers/server/args.py
  class ArgumentParser (line 10) | class ArgumentParser:
    method __init__ (line 11) | def __init__(self, cfg):
    method parse_args (line 14) | def parse_args(self):

FILE: archive/ktransformers/server/backend/args.py
  class ConfigArgs (line 6) | class ConfigArgs(BaseModel):
    class Config (line 15) | class Config:

FILE: archive/ktransformers/server/backend/base.py
  class BackendInterfaceBase (line 27) | class BackendInterfaceBase:
    method __init__ (line 36) | def __init__(self, args:ConfigArgs = default_args):
    method inference (line 40) | async def inference(self,local_messages,request_unique_id:Optional[str...
    method report_last_time_performance (line 57) | def report_last_time_performance(self):
  class ThreadContext (line 70) | class ThreadContext:
    method __init__ (line 89) | def __init__(self, run: RunObject,interface:BackendInterfaceBase, args...
    method get_local_messages (line 102) | def get_local_messages(self):
    method update_by_run (line 109) | def update_by_run(self,run:RunObject,args:ConfigArgs = default_args):
    method put_user_message (line 113) | def put_user_message(self, message: MessageObject):
    method delete_user_message (line 119) | def delete_user_message(self,message_id: ObjectID):
    method work (line 122) | async def work(self)->AsyncIterator:

FILE: archive/ktransformers/server/backend/context_manager.py
  class ThreadContextManager (line 17) | class ThreadContextManager:
    method __init__ (line 22) | def __init__(self,interface) -> None:
    method get_context_by_run_object (line 29) | async def get_context_by_run_object(self, run: RunObject) -> ThreadCon...
    method get_context_by_thread_id (line 57) | async def get_context_by_thread_id(self, thread_id: ObjectID) -> Optio...

FILE: archive/ktransformers/server/backend/interfaces/balance_serve.py
  function chat_stream (line 102) | async def chat_stream(queue: asyncio.Queue, tokenizer: AutoTokenizer):
  function fill_generated_tokens (line 122) | def fill_generated_tokens(query_updates: list[sched_ext.QueryUpdate], ge...
  function report_last_time_performance (line 132) | def report_last_time_performance(profiler: Profiler):
  class Engine (line 144) | class Engine:
    method __init__ (line 152) | def __init__(self, args: ConfigArgs = default_args, generated_token_qu...
    method sampling (line 300) | def sampling(self, forward_output: ForwardBatchOutput):
    method loop (line 323) | def loop(self):
  class BalanceServeThreadContext (line 383) | class BalanceServeThreadContext(ThreadContext):
    method get_local_messages (line 384) | def get_local_messages(self):
  function init_distributed (line 392) | def init_distributed(rank: int,
  function run_engine (line 408) | def run_engine(args, token_queue, broadcast_endpoint, event, kvcache_eve...
  class BalanceServeInterface (line 427) | class BalanceServeInterface(BackendInterfaceBase):
    method __init__ (line 443) | def __init__(self, args: ConfigArgs = default_args, input_args=None):
    method get_params (line 529) | def get_params(self, temperature: Optional[float] = None, top_p: Optio...
    method run_queue_proxy (line 550) | def run_queue_proxy(self):
    method lifespan (line 556) | async def lifespan(self, app: FastAPI):
    method queue_proxy (line 560) | async def queue_proxy(self):
    method tokenize_prompt (line 577) | def tokenize_prompt(self, prompt: str):
    method format_and_tokenize_input_ids (line 581) | def format_and_tokenize_input_ids(self, thread_id: ObjectID, messages:...
    method inference (line 601) | async def inference(self, local_messages, thread_id: str, temperature:...

FILE: archive/ktransformers/server/backend/interfaces/exllamav2.py
  class ExllamaThreadContext (line 14) | class ExllamaThreadContext(ThreadContext):
    method __init__ (line 15) | def __init__(self, run: RunObject, args: ConfigArgs = default_args) ->...
    method get_interface (line 18) | def get_interface(self):
    method get_local_messages (line 21) | def get_local_messages(self):
  class ExllamaInterface (line 27) | class ExllamaInterface(BackendInterfaceBase):
    method __init__ (line 29) | def __init__(self, args: ConfigArgs = ...):
    method tokenize_prompt (line 32) | def tokenize_prompt(self, prompt: str) -> torch.Tensor:
    method inference (line 35) | async def inference(self,local_messages,request_unique_id:Optional[str...

FILE: archive/ktransformers/server/backend/interfaces/ktransformers.py
  class KTransformersThreadContext (line 52) | class KTransformersThreadContext(TransformersThreadContext):
  class KTransformersInterface (line 56) | class KTransformersInterface(TransformersInterface):
    method __init__ (line 57) | def __init__(self, args: ConfigArgs = default_args, input_args=None):
    method decode_one_tokens (line 130) | def decode_one_tokens(self):
    method prefill (line 206) | def prefill(self, input_ids: torch.Tensor, is_new: bool, temperature: ...
    method active_cache_position (line 353) | def active_cache_position(self):
    method sampling (line 357) | def sampling(self, logits, do_sample):
    method verify_by_tokenid (line 377) | def verify_by_tokenid(self, main_token: int, draft_token: int):
    method verify_speculative_decoding (line 380) | def verify_speculative_decoding(self, main_prob: torch.Tensor, draft_p...
    method logits_to_token (line 397) | def logits_to_token(self, logits: torch.Tensor):
    method inference (line 410) | async def inference(self, local_messages, thread_id: str, temperature:...
    method sync_inference (line 424) | def sync_inference(self, local_messages, thread_id: str, temperature: ...

FILE: archive/ktransformers/server/backend/interfaces/transformers.py
  class TextStreamer (line 47) | class TextStreamer:
    method __init__ (line 49) | def __init__(self, tokenizer: "AutoTokenizer", skip_prompt: bool = Fal...
    method reset (line 59) | def reset(self):
    method put (line 63) | def put(self, value) -> Optional[str]:
    method end (line 93) | def end(self) -> Optional[str]:
    method _is_chinese_char (line 106) | def _is_chinese_char(self, cp):
  class TransformersThreadContext (line 131) | class TransformersThreadContext(ThreadContext):
    method get_local_messages (line 132) | def get_local_messages(self):
  class TransformersInterface (line 140) | class TransformersInterface(BackendInterfaceBase):
    method __init__ (line 156) | def __init__(self, args: ConfigArgs = default_args):
    method current_ids (line 175) | def current_ids(self):
    method active_cache_position (line 179) | def active_cache_position(self):
    method tokenize_prompt (line 182) | def tokenize_prompt(self, prompt: str):
    method format_and_tokenize_input_ids (line 186) | def format_and_tokenize_input_ids(self, thread_id: ObjectID, messages:...
    method append_new_tokens (line 224) | def append_new_tokens(self, new_tokens: int) -> Optional[str]:
    method tf_logits_warper (line 231) | def tf_logits_warper(generation_config):
    method prepare_logits_wrapper (line 282) | def prepare_logits_wrapper(self, inputs, device, temperature: Optional...
    method logits_to_token (line 301) | def logits_to_token(self, logits: torch.Tensor):
    method decode_one_tokens (line 316) | def decode_one_tokens(self):
    method prefill (line 332) | def prefill(self, input_ids: torch.Tensor, is_new: bool, temperature: ...
    method generate (line 409) | def generate(self):
    method check_is_new (line 445) | def check_is_new(self, thread_id: str):
    method inference (line 458) | async def inference(self, local_messages, thread_id: str, temperature:...

FILE: archive/ktransformers/server/balance_serve/inference/config.py
  class ModelConfig (line 21) | class ModelConfig:
    method __init__ (line 58) | def __init__(self, config):
    method load_config (line 72) | def load_config(self):
  class ParallelConfig (line 90) | class ParallelConfig:
    method __init__ (line 91) | def __init__(
  class AttnConfig (line 100) | class AttnConfig:
    method __init__ (line 106) | def __init__(self, config):
  class SamplerConfig (line 113) | class SamplerConfig():
    method __init__ (line 118) | def __init__(self, config):
  function load_yaml_config (line 123) | def load_yaml_config(file_path):
  class LLMConfig (line 130) | class LLMConfig:
    method __init__ (line 137) | def __init__(self, config_file):

FILE: archive/ktransformers/server/balance_serve/inference/distributed/communication_op.py
  function tensor_model_parallel_all_reduce (line 15) | def tensor_model_parallel_all_reduce(input_: torch.Tensor, bsz_tensor: t...
  function tensor_model_parallel_all_gather (line 20) | def tensor_model_parallel_all_gather(
  function tensor_model_parallel_gather (line 27) | def tensor_model_parallel_gather(
  function broadcast_tensor_dict (line 34) | def broadcast_tensor_dict(

FILE: archive/ktransformers/server/balance_serve/inference/distributed/cuda_wrapper.py
  class cudaIpcMemHandle_t (line 21) | class cudaIpcMemHandle_t(ctypes.Structure):
  class Function (line 26) | class Function:
  function find_loaded_library (line 32) | def find_loaded_library(lib_name) -> Optional[str]:
  class CudaRTLibrary (line 58) | class CudaRTLibrary:
    method __init__ (line 100) | def __init__(self, so_file: Optional[str] = None):
    method CUDART_CHECK (line 120) | def CUDART_CHECK(self, result: cudaError_t) -> None:
    method cudaGetErrorString (line 125) | def cudaGetErrorString(self, error: cudaError_t) -> str:
    method cudaSetDevice (line 128) | def cudaSetDevice(self, device: int) -> None:
    method cudaDeviceSynchronize (line 131) | def cudaDeviceSynchronize(self) -> None:
    method cudaDeviceReset (line 134) | def cudaDeviceReset(self) -> None:
    method cudaMalloc (line 137) | def cudaMalloc(self, size: int) -> ctypes.c_void_p:
    method cudaFree (line 142) | def cudaFree(self, devPtr: ctypes.c_void_p) -> None:
    method cudaMemset (line 145) | def cudaMemset(self, devPtr: ctypes.c_void_p, value: int,
    method cudaMemcpy (line 149) | def cudaMemcpy(self, dst: ctypes.c_void_p, src: ctypes.c_void_p,
    method cudaIpcGetMemHandle (line 155) | def cudaIpcGetMemHandle(self,
    method cudaIpcOpenMemHandle (line 162) | def cudaIpcOpenMemHandle(self,

FILE: archive/ktransformers/server/balance_serve/inference/distributed/custom_all_reduce.py
  function _can_p2p (line 25) | def _can_p2p(rank: int, world_size: int) -> bool:
  function is_weak_contiguous (line 37) | def is_weak_contiguous(inp: torch.Tensor):
  class CustomAllreduce (line 44) | class CustomAllreduce:
    method __init__ (line 49) | def __init__(
    method create_shared_buffer (line 179) | def create_shared_buffer(
    method free_shared_buffer (line 204) | def free_shared_buffer(
    method capture (line 212) | def capture(self):
    method register_graph_buffers (line 226) | def register_graph_buffers(self):
    method should_custom_ar (line 244) | def should_custom_ar(self, inp: torch.Tensor):
    method all_reduce (line 259) | def all_reduce(
    method custom_all_reduce (line 284) | def custom_all_reduce(self, input: torch.Tensor, bsz_tensor: torch.Ten...
    method close (line 302) | def close(self):
    method __del__ (line 309) | def __del__(self):

FILE: archive/ktransformers/server/balance_serve/inference/distributed/custom_all_reduce_utils.py
  function producer (line 19) | def producer(
  function consumer (line 53) | def consumer(
  function can_actually_p2p (line 94) | def can_actually_p2p(
  function gpu_p2p_access_check (line 194) | def gpu_p2p_access_check(src: int, tgt: int) -> bool:

FILE: archive/ktransformers/server/balance_serve/inference/distributed/parallel_state.py
  class GraphCaptureContext (line 43) | class GraphCaptureContext:
  function _split_tensor_dict (line 50) | def _split_tensor_dict(
  function _get_unique_name (line 79) | def _get_unique_name(name: str) -> str:
  function _register_group (line 95) | def _register_group(group: "GroupCoordinator") -> None:
  function inplace_all_reduce (line 101) | def inplace_all_reduce(tensor: torch.Tensor, group_name: str) -> None:
  function inplace_all_reduce_fake (line 108) | def inplace_all_reduce_fake(tensor: torch.Tensor, group_name: str) -> None:
  function outplace_all_reduce (line 118) | def outplace_all_reduce(tensor: torch.Tensor, group_name: str, bsz_tenso...
  function outplace_all_reduce_fake (line 125) | def outplace_all_reduce_fake(tensor: torch.Tensor, group_name: str, bsz_...
  class GroupCoordinator (line 136) | class GroupCoordinator:
    method __init__ (line 169) | def __init__(
    method first_rank (line 271) | def first_rank(self):
    method last_rank (line 276) | def last_rank(self):
    method is_first_rank (line 281) | def is_first_rank(self):
    method is_last_rank (line 286) | def is_last_rank(self):
    method next_rank (line 291) | def next_rank(self):
    method prev_rank (line 298) | def prev_rank(self):
    method graph_capture (line 305) | def graph_capture(
    method all_reduce (line 352) | def all_reduce(self, input_: torch.Tensor, bsz_tensor: torch.Tensor, i...
    method _all_reduce_out_place (line 406) | def _all_reduce_out_place(self, input_: torch.Tensor, bsz_tensor: torc...
    method _all_reduce_in_place (line 414) | def _all_reduce_in_place(self, input_: torch.Tensor) -> None:
    method all_gather (line 421) | def all_gather(self, input_: torch.Tensor, dim: int = -1) -> torch.Ten...
    method gather (line 464) | def gather(
    method broadcast (line 499) | def broadcast(self, input_: torch.Tensor, src: int = 0):
    method broadcast_object (line 514) | def broadcast_object(self, obj: Optional[Any] = None, src: int = 0):
    method broadcast_object_list (line 538) | def broadcast_object_list(
    method send_object (line 555) | def send_object(self, obj: Any, dst: int) -> None:
    method recv_object (line 582) | def recv_object(self, src: int) -> Any:
    method broadcast_tensor_dict (line 618) | def broadcast_tensor_dict(
    method send_tensor_dict (line 700) | def send_tensor_dict(
    method recv_tensor_dict (line 753) | def recv_tensor_dict(
    method barrier (line 815) | def barrier(self):
    method send (line 824) | def send(self, tensor: torch.Tensor, dst: Optional[int] = None) -> None:
    method recv (line 836) | def recv(
    method destroy (line 852) | def destroy(self):
  function get_world_group (line 870) | def get_world_group() -> GroupCoordinator:
  function init_world_group (line 875) | def init_world_group(
  function init_model_parallel_group (line 891) | def init_model_parallel_group(
  function get_tp_group (line 918) | def get_tp_group() -> GroupCoordinator:
  function get_pp_group (line 929) | def get_pp_group() -> GroupCoordinator:
  function graph_capture (line 939) | def graph_capture():
  function set_custom_all_reduce (line 962) | def set_custom_all_reduce(enable: bool):
  function init_distributed_environment (line 967) | def init_distributed_environment(
  function initialize_model_parallel (line 1014) | def initialize_model_parallel(
  function ensure_model_parallel_initialized (line 1091) | def ensure_model_parallel_initialized(
  function model_parallel_is_initialized (line 1120) | def model_parallel_is_initialized():
  function patch_tensor_parallel_group (line 1129) | def patch_tensor_parallel_group(tp_group: GroupCoordinator):
  function get_tensor_model_parallel_world_size (line 1153) | def get_tensor_model_parallel_world_size():
  function get_tensor_model_parallel_rank (line 1158) | def get_tensor_model_parallel_rank():
  function destroy_model_parallel (line 1163) | def destroy_model_parallel():
  function destroy_distributed_environment (line 1176) | def destroy_distributed_environment():
  function cleanup_dist_env_and_memory (line 1185) | def cleanup_dist_env_and_memory(shutdown_ray: bool = False):
  function in_the_same_node_as (line 1199) | def in_the_same_node_as(pg: ProcessGroup, source_rank: int = 0) -> List[...

FILE: archive/ktransformers/server/balance_serve/inference/distributed/pynccl.py
  class PyNcclCommunicator (line 21) | class PyNcclCommunicator:
    method __init__ (line 23) | def __init__(
    method all_reduce (line 119) | def all_reduce(
    method send (line 143) | def send(self, tensor: torch.Tensor, dst: int, stream=None):
    method recv (line 161) | def recv(self, tensor: torch.Tensor, src: int, stream=None):
    method change_state (line 180) | def change_state(

FILE: archive/ktransformers/server/balance_serve/inference/distributed/pynccl_wrapper.py
  class ncclUniqueId (line 41) | class ncclUniqueId(ctypes.Structure):
  class ncclDataTypeEnum (line 51) | class ncclDataTypeEnum:
    method from_torch (line 70) | def from_torch(cls, dtype: torch.dtype) -> int:
  class ncclRedOpTypeEnum (line 93) | class ncclRedOpTypeEnum:
    method from_torch (line 102) | def from_torch(cls, op: ReduceOp) -> int:
  class Function (line 117) | class Function:
  class NCCLLibrary (line 123) | class NCCLLibrary:
    method __init__ (line 184) | def __init__(self, so_file: Optional[str] = None):
    method ncclGetErrorString (line 215) | def ncclGetErrorString(self, result: ncclResult_t) -> str:
    method NCCL_CHECK (line 218) | def NCCL_CHECK(self, result: ncclResult_t) -> None:
    method ncclGetVersion (line 223) | def ncclGetVersion(self) -> str:
    method ncclGetUniqueId (line 233) | def ncclGetUniqueId(self) -> ncclUniqueId:
    method ncclCommInitRank (line 239) | def ncclCommInitRank(self, world_size: int, unique_id: ncclUniqueId,
    method ncclAllReduce (line 247) | def ncclAllReduce(self, sendbuff: buffer_type, recvbuff: buffer_type,
    method ncclSend (line 259) | def ncclSend(self, sendbuff: buffer_type, count: int, datatype: int,
    method ncclRecv (line 264) | def ncclRecv(self, recvbuff: buffer_type, count: int, datatype: int,
    method ncclCommDestroy (line 269) | def ncclCommDestroy(self, comm: ncclComm_t) -> None:

FILE: archive/ktransformers/server/balance_serve/inference/distributed/utils.py
  function ensure_divisibility (line 17) | def ensure_divisibility(numerator, denominator):
  function divide (line 24) | def divide(numerator, denominator):
  function split_tensor_along_last_dim (line 31) | def split_tensor_along_last_dim(
  function get_pp_indices (line 59) | def get_pp_indices(
  class StatelessProcessGroup (line 92) | class StatelessProcessGroup:
    method __post_init__ (line 113) | def __post_init__(self):
    method send_obj (line 119) | def send_obj(self, obj: Any, dst: int):
    method expire_data (line 127) | def expire_data(self):
    method recv_obj (line 138) | def recv_obj(self, src: int) -> Any:
    method broadcast_obj (line 146) | def broadcast_obj(self, obj: Optional[Any], src: int) -> Any:
    method all_gather_obj (line 164) | def all_gather_obj(self, obj: Any) -> list[Any]:
    method barrier (line 176) | def barrier(self):
    method create (line 185) | def create(

FILE: archive/ktransformers/server/balance_serve/inference/forward_batch.py
  class ForwardMiniBatchCombine (line 18) | class ForwardMiniBatchCombine:
    method __init__ (line 36) | def __init__(self, prefill_querys_info: list[QueryInfo], decode_querys...
    method fill (line 99) | def fill(self, prefill_querys_info: list[QueryInfo], decode_querys_inf...
    method __str__ (line 168) | def __str__(self):
  class ForwardMiniBatchSplit (line 177) | class ForwardMiniBatchSplit:
    method __init__ (line 202) | def __init__(
    method fill (line 466) | def fill(
    method __str__ (line 719) | def __str__(self):
  class ForwardBatchInput (line 732) | class ForwardBatchInput:
    method __init__ (line 739) | def __init__(self, batch : sched_ext.BatchQueryTodo = None, query_mana...
    method gen_max_forward_batch (line 769) | def gen_max_forward_batch(
    method fill (line 821) | def fill(self, batch : sched_ext.BatchQueryTodo = None, query_manager:...
  class ForwardBatchOutput (line 845) | class ForwardBatchOutput:
    method __init__ (line 856) | def __init__(self):
    method merge (line 867) | def merge(self, new_output):
    method __str__ (line 877) | def __str__(self):

FILE: archive/ktransformers/server/balance_serve/inference/model_runner.py
  function pad_num_tokens (line 53) | def pad_num_tokens(num_tokens):
  function deduplicate_and_sort (line 56) | def deduplicate_and_sort(lst):
  function generate_cuda_graphs (line 58) | def generate_cuda_graphs(chunk_size: int) -> list:
  class ModelRunner (line 69) | class ModelRunner:
    method __init__ (line 80) | def __init__(self, model = None, cache = None, device = None, use_cuda...
    method model_attn_plan (line 135) | def model_attn_plan(self, batch, cuda_graph_idx=0):
    method warmup (line 151) | def warmup(self):
    method warmup_npu (line 206) | def warmup_npu(self):
    method run (line 267) | def run(self, batch: sched_ext.BatchQueryTodo = None, query_manager: Q...
    method run_split (line 349) | def run_split(self, batch: sched_ext.BatchQueryTodo = None, query_mana...
    method replay (line 465) | def replay(self, cuda_graph_idx=-1):
    method sync (line 478) | def sync(self, calc_time = True):
  function get_or_create_model_runner (line 484) | def get_or_create_model_runner(model=None, cache=None, device=None, use_...

FILE: archive/ktransformers/server/balance_serve/inference/query_manager.py
  class QueryInfo (line 13) | class QueryInfo:
    method __init__ (line 32) | def __init__(self, id, query_length: int, max_length: int, page_size: ...
    method check_stop (line 58) | def check_stop(self):
    method print (line 93) | def print(self):
  class QueryManager (line 101) | class QueryManager:
    method __init__ (line 108) | def __init__(self, max_length = 65536, page_size = 256, device = torch...
    method print (line 114) | def print(self, hint: str = ""):
    method add_query (line 122) | def add_query(self, batch: sched_ext.BatchQueryTodo):
    method update (line 148) | def update(self, batch: sched_ext.BatchQueryTodo) -> list[sched_ext.Qu...

FILE: archive/ktransformers/server/balance_serve/inference/sampling/penaltylib/orchestrator.py
  class _ReqLike (line 9) | class _ReqLike:
  class _BatchLike (line 14) | class _BatchLike:
    method batch_size (line 17) | def batch_size(self):
  class BatchedPenalizerOrchestrator (line 21) | class BatchedPenalizerOrchestrator:
    method __init__ (line 27) | def __init__(
    method reqs (line 51) | def reqs(self):
    method batch_size (line 54) | def batch_size(self):
    method cumulate_input_tokens (line 57) | def cumulate_input_tokens(
    method cumulate_output_tokens (line 74) | def cumulate_output_tokens(
    method apply (line 94) | def apply(self, logits: torch.Tensor) -> torch.Tensor:
    method filter (line 113) | def filter(
    method merge (line 149) | def merge(self, their: "BatchedPenalizerOrchestrator"):
  class _TokenIDs (line 171) | class _TokenIDs:
    method __init__ (line 185) | def __init__(
    method occurrence_count (line 204) | def occurrence_count(self) -> torch.Tensor:
  class _BatchedPenalizer (line 244) | class _BatchedPenalizer(abc.ABC):
    method __init__ (line 252) | def __init__(self, orchestrator: BatchedPenalizerOrchestrator):
    method is_prepared (line 255) | def is_prepared(self) -> bool:
    method is_required (line 258) | def is_required(self) -> bool:
    method prepare (line 261) | def prepare(self):
    method prepare_if_required (line 266) | def prepare_if_required(self):
    method teardown (line 273) | def teardown(self):
    method cumulate_input_tokens (line 278) | def cumulate_input_tokens(self, input_ids: _TokenIDs):
    method cumulate_output_tokens (line 284) | def cumulate_output_tokens(self, output_ids: _TokenIDs):
    method apply (line 290) | def apply(self, logits: torch.Tensor) -> torch.Tensor:
    method filter (line 296) | def filter(
    method merge (line 307) | def merge(self, their: "_BatchedPenalizer"):
    method _is_required (line 316) | def _is_required(self) -> bool:
    method _prepare (line 323) | def _prepare(self):
    method _teardown (line 331) | def _teardown(self):
    method _cumulate_input_tokens (line 339) | def _cumulate_input_tokens(self, input_ids: _TokenIDs):
    method _cumulate_output_tokens (line 347) | def _cumulate_output_tokens(self, output_ids: _TokenIDs):
    method _apply (line 355) | def _apply(self, logits: torch.Tensor) -> torch.Tensor:
    method _filter (line 363) | def _filter(
    method _merge (line 372) | def _merge(self, their: "_BatchedPenalizer"):

FILE: archive/ktransformers/server/balance_serve/inference/sampling/penaltylib/penalizers/frequency_penalty.py
  class BatchedFrequencyPenalizer (line 8) | class BatchedFrequencyPenalizer(_BatchedPenalizer):
    method _is_required (line 16) | def _is_required(self) -> bool:
    method _prepare (line 22) | def _prepare(self):
    method _teardown (line 46) | def _teardown(self):
    method _cumulate_input_tokens (line 53) | def _cumulate_input_tokens(self, input_ids: _TokenIDs):
    method _cumulate_output_tokens (line 56) | def _cumulate_output_tokens(self, output_ids: _TokenIDs):
    method _apply (line 61) | def _apply(self, logits: torch.Tensor) -> torch.Tensor:
    method _filter (line 65) | def _filter(
    method _merge (line 73) | def _merge(self, their: "BatchedFrequencyPenalizer"):

FILE: archive/ktransformers/server/balance_serve/inference/sampling/penaltylib/penalizers/min_new_tokens.py
  class BatchedMinNewTokensPenalizer (line 8) | class BatchedMinNewTokensPenalizer(_BatchedPenalizer):
    method _is_required (line 17) | def _is_required(self) -> bool:
    method _prepare (line 22) | def _prepare(self):
    method _teardown (line 72) | def _teardown(self):
    method _cumulate_input_tokens (line 81) | def _cumulate_input_tokens(self, input_ids: _TokenIDs):
    method _cumulate_output_tokens (line 84) | def _cumulate_output_tokens(self, output_ids: _TokenIDs):
    method _apply (line 87) | def _apply(self, logits: torch.Tensor) -> torch.Tensor:
    method _filter (line 92) | def _filter(
    method _merge (line 99) | def _merge(self, their: "BatchedMinNewTokensPenalizer"):

FILE: archive/ktransformers/server/balance_serve/inference/sampling/penaltylib/penalizers/presence_penalty.py
  class BatchedPresencePenalizer (line 8) | class BatchedPresencePenalizer(_BatchedPenalizer):
    method _is_required (line 16) | def _is_required(self) -> bool:
    method _prepare (line 22) | def _prepare(self):
    method _teardown (line 46) | def _teardown(self):
    method _cumulate_input_tokens (line 53) | def _cumulate_input_tokens(self, input_ids: _TokenIDs):
    method _cumulate_output_tokens (line 56) | def _cumulate_output_tokens(self, output_ids: _TokenIDs):
    method _apply (line 60) | def _apply(self, logits: torch.Tensor) -> torch.Tensor:
    method _filter (line 64) | def _filter(
    method _merge (line 72) | def _merge(self, their: "BatchedPresencePenalizer"):

FILE: archive/ktransformers/server/balance_serve/inference/sampling/penaltylib/penalizers/repetition_penalty.py
  class BatchedRepetitionPenalizer (line 8) | class BatchedRepetitionPenalizer(_BatchedPenalizer):
    method _is_required (line 16) | def _is_required(self) -> bool:
    method _prepare (line 22) | def _prepare(self):
    method _teardown (line 46) | def _teardown(self):
    method _cumulate_input_tokens (line 53) | def _cumulate_input_tokens(self, input_ids: _TokenIDs):
    method _cumulate_output_tokens (line 57) | def _cumulate_output_tokens(self, output_ids: _TokenIDs):
    method _apply (line 61) | def _apply(self, logits: torch.Tensor) -> torch.Tensor:
    method _filter (line 68) | def _filter(
    method _merge (line 76) | def _merge(self, their: "BatchedRepetitionPenalizer"):

FILE: archive/ktransformers/server/balance_serve/inference/sampling/sampler.py
  class SamplingOptions (line 25) | class SamplingOptions():
    method __init__ (line 38) | def __init__(self, bsz = 1, device = torch.device('cuda'), pretrained_...
  class Sampler (line 59) | class Sampler(nn.Module):
    method __init__ (line 60) | def __init__(self):
    method forward (line 63) | def forward(

FILE: archive/ktransformers/server/balance_serve/sched_rpc.py
  class SchedulerServer (line 31) | class SchedulerServer:
    method __init__ (line 32) | def __init__(self, settings, main_args):
    method run_scheduler (line 50) | def run_scheduler(self):
    method stop_scheduler (line 54) | def stop_scheduler(self):
    method start_proxy (line 58) | def start_proxy(self):
    method worker_routine (line 63) | def worker_routine(self):
    method start_rpc_service (line 129) | def start_rpc_service(self):
    method stop_rpc_service (line 148) | def stop_rpc_service(self):
  function start_server (line 154) | def start_server(settings, main_args):
  class SchedulerClient (line 160) | class SchedulerClient:
    method __init__ (line 161) | def __init__(self, sched_port):
    method __del__ (line 169) | def __del__(self):
    method send_request (line 173) | def send_request(self, method, params=None):
    method add_query (line 190) | def add_query(self, query):
    method cancel_query (line 194) | def cancel_query(self, query_id):
    method update_last_batch (line 197) | def update_last_batch(self, updates):
    method rebuild_inferece_context (line 202) | def rebuild_inferece_context(self,response):
    method get_inference_context_raw (line 210) | def get_inference_context_raw(self):

FILE: archive/ktransformers/server/balance_serve/settings.py
  function create_sched_settings (line 19) | def create_sched_settings(args):
  function create_sched_settings_qwen2moe (line 71) | def create_sched_settings_qwen2moe(args):
  function create_sched_settings_qwen3moe (line 125) | def create_sched_settings_qwen3moe(args):
  function create_sched_settings_glm4moe (line 177) | def create_sched_settings_glm4moe(args):
  function create_sched_settings_smallthinker (line 229) | def create_sched_settings_smallthinker(args):
  function create_sched_settings_qwen3next (line 281) | def create_sched_settings_qwen3next(args):

FILE: archive/ktransformers/server/config/config.py
  class Config (line 20) | class Config(metaclass=Singleton):
    method load (line 26) | def load() -> dict:
    method to_path (line 53) | def to_path(path: str) -> str:
    method __init__ (line 61) | def __init__(self):

FILE: archive/ktransformers/server/config/log.py
  class DailyRotatingFileHandler (line 25) | class DailyRotatingFileHandler(BaseRotatingHandler):
    method __init__ (line 32) | def __init__(self, filename, backupCount=0, encoding=None, delay=False...
    method shouldRollover (line 46) | def shouldRollover(self, record):
    method doRollover (line 59) | def doRollover(self):
    method _compute_fn (line 78) | def _compute_fn(self):
    method _open (line 84) | def _open(self):
    method delete_expired_files (line 106) | def delete_expired_files(self):
  class Logger (line 132) | class Logger(object):
    method __init__ (line 144) | def __init__(self, level: str = 'info'):

FILE: archive/ktransformers/server/config/singleton.py
  class Singleton (line 13) | class Singleton(abc.ABCMeta, type):
    method __call__ (line 24) | def __call__(cls, *args, **kwds):
  class AbstractSingleton (line 29) | class AbstractSingleton(abc.ABC, metaclass=Singleton):

FILE: archive/ktransformers/server/crud/assistants/assistants.py
  class AssistantDatabaseManager (line 12) | class AssistantDatabaseManager:
    method __init__ (line 13) | def __init__(self) -> None:
    method create_assistant_object (line 16) | def create_assistant_object(self, assistant: AssistantCreate) -> Assis...
    method db_count_assistants (line 25) | def db_count_assistants(self) -> int:
    method db_create_assistant (line 29) | def db_create_assistant(self, assistant: AssistantCreate):
    method db_list_assistants (line 34) | def db_list_assistants(self, limit: Optional[int], order: Order) -> Li...
    method db_get_assistant_by_id (line 44) | def db_get_assistant_by_id(self, assistant_id: str) -> Optional[Assist...
    method db_update_assistant_by_id (line 53) | def db_update_assistant_by_id(self, assistant_id: str, assistant: Assi...
    method db_delete_assistant_by_id (line 60) | def db_delete_assistant_by_id(self, assistant_id: str):

FILE: archive/ktransformers/server/crud/assistants/messages.py
  class MessageDatabaseManager (line 10) | class MessageDatabaseManager:
    method __init__ (line 11) | def __init__(self) -> None:
    method create_db_message_by_core (line 15) | def create_db_message_by_core(message: MessageCore):
    method create_db_message (line 19) | def create_db_message(self, message: MessageCreate):
    method db_add_message (line 22) | def db_add_message(self, message: Message):
    method db_create_message (line 27) | def db_create_message(self, thread_id: str, message: MessageCreate, st...
    method create_message_object (line 35) | def create_message_object(thread_id: ObjectID, run_id: ObjectID, messa...
    method db_sync_message (line 47) | def db_sync_message(self, message: MessageObject):
    method db_list_messages_of_thread (line 54) | def db_list_messages_of_thread(
    method db_get_message_by_id (line 72) | def db_get_message_by_id(self, thread_id: ObjectID, message_id: Object...
    method db_delete_message_by_id (line 80) | def db_delete_message_by_id(self, thread_id: ObjectID, message_id: Obj...

FILE: archive/ktransformers/server/crud/assistants/runs.py
  class RunsDatabaseManager (line 10) | class RunsDatabaseManager:
    method __init__ (line 11) | def __init__(self) -> None:
    method create_run_object (line 14) | def create_run_object(self, thread_id: ObjectID, run: RunCreate) -> Ru...
    method db_create_run (line 26) | def db_create_run(self, thread_id: str, run: RunCreate):
    method db_sync_run (line 40) | def db_sync_run(self, run: RunObject) -> None:
    method db_get_run (line 47) | def db_get_run(self, run_id: ObjectID) -> RunObject:

FILE: archive/ktransformers/server/crud/assistants/threads.py
  class ThreadsDatabaseManager (line 15) | class ThreadsDatabaseManager:
    method __init__ (line 16) | def __init__(self) -> None:
    method db_create_thread (line 21) | def db_create_thread(self, thread: ThreadCreate):
    method db_get_thread_by_id (line 54) | def db_get_thread_by_id(self, thread_id: ObjectID):
    method db_list_threads (line 59) | def db_list_threads(self, limit: Optional[int], order: Order) -> List[...
    method db_list_threads_preview (line 71) | def db_list_threads_preview(self, limit: Optional[int], order: Order) ...
    method db_delete_thread_by_id (line 88) | def db_delete_thread_by_id(self, thread_id: ObjectID):

FILE: archive/ktransformers/server/exceptions.py
  function db_exception (line 4) | def db_exception():
  function not_implemented (line 11) | def not_implemented(what):
  function internal_server_error (line 18) | def internal_server_error(what):
  function request_error (line 22) | def request_error(what):

FILE: archive/ktransformers/server/main.py
  function mount_app_routes (line 29) | def mount_app_routes(mount_app: FastAPI):
  function create_app (line 37) | def create_app():
  function update_web_port (line 57) | def update_web_port(config_file: str):
  function mount_index_routes (line 69) | def mount_index_routes(app: FastAPI):
  function run_api (line 83) | def run_api(app, host, port, **kwargs):
  function custom_openapi (line 96) | def custom_openapi(app):
  function verify_arg (line 111) | def verify_arg(args):
  function main (line 127) | def main():

FILE: archive/ktransformers/server/models/assistants/assistants.py
  class Assistant (line 7) | class Assistant(Base):

FILE: archive/ktransformers/server/models/assistants/messages.py
  class Message (line 7) | class Message(Base):

FILE: archive/ktransformers/server/models/assistants/run_steps.py
  class RunStep (line 7) | class RunStep(Base):

FILE: archive/ktransformers/server/models/assistants/runs.py
  class Run (line 7) | class Run(Base):

FILE: archive/ktransformers/server/models/assistants/threads.py
  class Thread (line 7) | class Thread(Base):

FILE: archive/ktransformers/server/schemas/assistants/assistants.py
  class AssistantBase (line 21) | class AssistantBase(BaseModel):
    method validate_tools (line 28) | def validate_tools(cls, value):
    method validate_tool_resources (line 51) | def validate_tool_resources(cls, value):
    method convert_meta_data (line 70) | def convert_meta_data(cls, values):
  class AssistantCreate (line 79) | class AssistantCreate(AssistantBase):
  class AssistantBuildStatus (line 83) | class AssistantBuildStatus(BaseModel):
    class Status (line 84) | class Status(Enum):
    method to_stream_reply (line 112) | def to_stream_reply(self) -> str:
  class AssistantObject (line 116) | class AssistantObject(AssistantBase, ObjectWithCreatedTime):
    method as_api_response (line 123) | def as_api_response(self):
    method get_related_threads_ids (line 126) | def get_related_threads_ids(self) -> List[ObjectID]:
    method get_related_threads_objects (line 133) | def get_related_threads_objects(self) -> List:
    method append_related_threads (line 145) | def append_related_threads(self, thread_ids: List[ObjectID]):
    method update_build_status (line 156) | async def update_build_status(self, events: AsyncIterable) -> AsyncIte...
    method get_build_status (line 178) | def get_build_status(self) -> AssistantBuildStatus:
    method sync_db (line 182) | def sync_db(self)->None:
    method get_encoded_instruction (line 191) | def get_encoded_instruction(self,encode_fn:Callable)->torch.Tensor:
  class AssistantModify (line 198) | class AssistantModify(AssistantBase):

FILE: archive/ktransformers/server/schemas/assistants/messages.py
  class IncompleteDetails (line 15) | class IncompleteDetails(BaseModel):
  class ContentType (line 19) | class ContentType(Enum):
  class ContentObject (line 25) | class ContentObject(BaseModel):
  class ImageFile (line 29) | class ImageFile(BaseModel):
  class ImageFileObject (line 34) | class ImageFileObject(ContentObject):
  class ImageUrl (line 38) | class ImageUrl(BaseModel):
  class ImageUrlObject (line 43) | class ImageUrlObject(ContentObject):
  class Annotation (line 47) | class Annotation(BaseModel):
  class Text (line 51) | class Text(BaseModel):
  class TextObject (line 56) | class TextObject(ContentObject):
    method filter_append (line 62) | def filter_append(self,text:str):
  class Attachment (line 72) | class Attachment(BaseModel):
  class Role (line 77) | class Role(Enum):
    method is_user (line 81) | def is_user(self)->bool:
  class MessageCore (line 85) | class MessageCore(BaseModel):
    method convert_meta_data (line 92) | def convert_meta_data(cls,values):
  class MessageBase (line 98) | class MessageBase(MessageCore):
    class Status (line 99) | class Status(Enum):
  class MessageObject (line 116) | class MessageObject(MessageBase, ObjectWithCreatedTime):
    method get_text_content (line 120) | def get_text_content(self) -> str:
    method get_encoded_content (line 129) | async def get_encoded_content(self,encode_fn:Callable):
    method get_attached_files (line 142) | def get_attached_files(self):
    method append_message_delta (line 147) | def append_message_delta(self,text:str):
    method sync_db (line 150) | def sync_db(self):
    method stream_response_with_event (line 160) | def stream_response_with_event(self, event: MessageBase.Status) -> Mes...
  class MessageStreamResponse (line 169) | class MessageStreamResponse(BaseModel):
    method to_stream_reply (line 173) | def to_stream_reply(self):
  class MessageCreate (line 177) | class MessageCreate(BaseModel):
    method convert_meta_data (line 184) | def convert_meta_data(cls,values):
    method to_core (line 189) | def to_core(self) -> MessageCore:
  class MessageModify (line 206) | class MessageModify(BaseModel):
    method convert_meta_data (line 210) | def convert_meta_data(cls,values):

FILE: archive/ktransformers/server/schemas/assistants/runs.py
  class ToolCall (line 13) | class ToolCall(BaseModel):
  class SubmitToolOutputs (line 19) | class SubmitToolOutputs(BaseModel):
  class RequiredAction (line 23) | class RequiredAction(BaseModel):
  class LastError (line 28) | class LastError(BaseModel):
  class IncompleteDetails (line 33) | class IncompleteDetails(BaseModel):
  class Usage (line 37) | class Usage(BaseModel):
  class TruncationStrategy (line 43) | class TruncationStrategy(BaseModel):
  class ToolChoiceType (line 48) | class ToolChoiceType(Enum):
  class RunBase (line 54) | class RunBase(BaseModel):
    class Status (line 55) | class Status(Enum):
    method convert_meta_data (line 84) | def convert_meta_data(cls,values):
    method set_compute_save (line 89) | def set_compute_save(self,save:int):
  class RunObject (line 104) | class RunObject(RunBase, ObjectWithCreatedTime):
    method stream_response_with_event (line 105) | def stream_response_with_event(self,event:RunBase.Status)->RunStreamRe...
    method sync_db (line 114) | def sync_db(self):
    method create_message_creation_step (line 123) | def create_message_creation_step(self):
  class RunStreamResponse (line 127) | class RunStreamResponse(BaseModel):
    method to_stream_reply (line 130) | def to_stream_reply(self):
  class RunCreate (line 133) | class RunCreate(BaseModel):
    method convert_meta_data (line 144) | def convert_meta_data(cls,values):
  class RunThreadCreate (line 159) | class RunThreadCreate(BaseModel):
    method convert_meta_data (line 169) | def convert_meta_data(cls,values):
  class RunModify (line 184) | class RunModify(BaseModel):
    method convert_meta_data (line 188) | def convert_meta_data(cls,values):
  class ToolOutput (line 194) | class ToolOutput(BaseModel):
  class RunSubmit (line 199) | class RunSubmit(BaseModel):

FILE: archive/ktransformers/server/schemas/assistants/streaming.py
  class TextObjectWithIndex (line 15) | class TextObjectWithIndex(TextObject):
  class ImageFileObjectWithIndex (line 19) | class ImageFileObjectWithIndex(ImageFileObject):
  class ImageUrlObjectWithIndex (line 23) | class ImageUrlObjectWithIndex(ImageUrlObject):
  class MessageDeltaImpl (line 31) | class MessageDeltaImpl(BaseModel):
  class MessageDelta (line 36) | class MessageDelta(Object):
    method to_stream_reply (line 39) | def to_stream_reply(self):
  function text_delta (line 43) | def text_delta(index: int, text: str):
  function append_message_delta (line 47) | def append_message_delta(self: MessageObject, text: str):
  class RunStepDeltaImpl (line 63) | class RunStepDeltaImpl(BaseModel):
  class RunStepDelta (line 67) | class RunStepDelta(Object):
    method to_stream_reply (line 70) | def to_stream_reply(self):
  class Done (line 74) | class Done():
    method to_stream_reply (line 75) | def to_stream_reply(self):
  function check_client_link (line 79) | async def check_client_link(request: Request, async_events: AsyncIterable):
  function add_done (line 86) | async def add_done(async_events: AsyncIterable):
  function to_stream_reply (line 92) | async def to_stream_reply(async_events: AsyncIterable):
  function filter_api_event (line 100) | async def filter_api_event(async_events: AsyncIterable):
  function filter_chat_chunk (line 106) | async def filter_chat_chunk(async_events: AsyncIterable):
  function filter_by_types (line 112) | async def filter_by_types(async_events: AsyncIterable, types: List):
  function api_stream_response (line 120) | def api_stream_response(request: Request, async_events: AsyncIterable):
  function chat_stream_response (line 124) | def chat_stream_response(request: Request, async_events: AsyncIterable):
  function stream_response (line 128) | def stream_response(request: Request, async_events: AsyncIterable):
  function check_link_response (line 132) | def check_link_response(request: Request, async_events: AsyncIterable):
  function wrap_async_generator_into_queue (line 136) | def wrap_async_generator_into_queue(async_events: AsyncIterable) -> asyn...
  function unwrap_async_queue (line 151) | async def unwrap_async_queue(queue: asyncio.Queue) -> AsyncIterable:
  function unwrap_async_queue_slow (line 163) | async def unwrap_async_queue_slow(queue: asyncio.Queue) -> AsyncIterable:

FILE: archive/ktransformers/server/schemas/assistants/threads.py
  class ThreadBase (line 12) | class ThreadBase(BaseModel):
    method convert_meta_data (line 16) | def convert_meta_data(cls,values):
  class ThreadObject (line 24) | class ThreadObject(ThreadBase, ObjectWithCreatedTime):
    method check_is_related_threads (line 28) | def check_is_related_threads(self)->Self:
    class StreamEvent (line 34) | class StreamEvent(Enum):
    method to_stream_reply (line 37) | def to_stream_reply(self,event:StreamEvent):
  class ThreadCreate (line 41) | class ThreadCreate(ThreadBase):
  class ThreadModify (line 45) | class ThreadModify(ThreadBase):

FILE: archive/ktransformers/server/schemas/assistants/tool.py
  class ToolType (line 9) | class ToolType(str, Enum):
  class ToolBase (line 16) | class ToolBase(BaseModel):
  class CodeInterpreter (line 20) | class CodeInterpreter(ToolBase):
  class FileSearch (line 24) | class FileSearch(ToolBase):
  class RelatedThreads (line 28) | class RelatedThreads(ToolBase):
  class FuntionTool (line 32) | class FuntionTool(ToolBase):
  class CodeInterpreterResource (line 41) | class CodeInterpreterResource(BaseModel):
  class FileSearchResource (line 45) | class FileSearchResource(BaseModel):
  class RelatedThreadsResource (line 50) | class RelatedThreadsResource(BaseModel):

FILE: archive/ktransformers/server/schemas/base.py
  class Object (line 12) | class Object(BaseModel):
  class ObjectWithCreatedTime (line 20) | class ObjectWithCreatedTime(Object):
  class Order (line 25) | class Order(str, Enum):
    method to_sqlalchemy_order (line 29) | def to_sqlalchemy_order(self):
  class DeleteResponse (line 41) | class DeleteResponse(Object):
  class OperationResponse (line 44) | class OperationResponse(BaseModel):

FILE: archive/ktransformers/server/schemas/conversation.py
  class ThreadPreview (line 9) | class ThreadPreview(BaseModel):

FILE: archive/ktransformers/server/schemas/endpoints/chat.py
  class CompletionUsage (line 13) | class CompletionUsage(BaseModel):
  class Role (line 22) | class Role(Enum):
  class Message (line 29) | class Message(BaseModel):
    method to_tokenizer_message (line 36) | def to_tokenizer_message(self):
  class FunctionParameters (line 48) | class FunctionParameters(BaseModel):
  class FunctionDefinition (line 53) | class FunctionDefinition(BaseModel):
  class ToolFunction (line 58) | class ToolFunction(BaseModel):
  class Tool (line 61) | class Tool(BaseModel):
  class ChatCompletionCreate (line 65) | class ChatCompletionCreate(BaseModel):
    method get_tokenizer_messages (line 79) | def get_tokenizer_messages(self):
  class ChatCompletionChunk (line 82) | class ChatCompletionChunk(BaseModel):
    method to_stream_reply (line 92) | def to_stream_reply(self):
  class RawUsage (line 95) | class RawUsage(BaseModel):

FILE: archive/ktransformers/server/schemas/legacy/completions.py
  class CompletionCreate (line 7) | class CompletionCreate(BaseModel):
    method get_tokenizer_messages (line 16) | def get_tokenizer_messages(self):
  class FinishReason (line 22) | class FinishReason(Enum):
  class Choice (line 26) | class Choice(BaseModel):
  class CompletionObject (line 33) | class CompletionObject(Object):
    method set_token (line 40) | def set_token(self,token:str):
    method append_token (line 45) | def append_token(self,token:str):
    method to_stream_reply (line 50) | def to_stream_reply(self):

FILE: archive/ktransformers/server/utils/create_interface.py
  function create_interface (line 19) | def create_interface(config: Config, default_args: ConfigArgs, input_arg...
  class GlobalContextManager (line 38) | class GlobalContextManager:
  class GlobalInterface (line 40) | class GlobalInterface:
  function get_thread_context_manager (line 43) | def get_thread_context_manager() -> GlobalContextManager:
  function get_interface (line 45) | def get_interface() -> GlobalInterface:

FILE: archive/ktransformers/server/utils/multi_timer.py
  function format_time (line 4) | def format_time(seconds):
  class Profiler (line 20) | class Profiler:
    method __init__ (line 21) | def __init__(self):
    method create_timer (line 25) | def create_timer(self, name):
    method start_timer (line 32) | def start_timer(self, name):
    method pause_timer (line 40) | def pause_timer(self, name):
    method get_timer_sec (line 48) | def get_timer_sec(self, name):
    method get_all_timers (line 57) | def get_all_timers(self):
    method report_timer_string (line 63) | def report_timer_string(self, name):
    method create_and_start_timer (line 66) | def create_and_start_timer(self, name):
    method inc (line 72) | def inc(self,key:str,delta:int=1):
    method set_counter (line 75) | def set_counter(self,key:str,to=0):
    method get_counter (line 78) | def get_counter(self,key:str):

FILE: archive/ktransformers/server/utils/serve_profiling.py
  class ProfStatKey (line 8) | class ProfStatKey(StrEnum):
  class ProfTimeStat (line 15) | class ProfTimeStat:
    method __init__ (line 16) | def __init__(self):
    method record_start_time (line 30) | def record_start_time(self):
    method add_time_stat (line 34) | def add_time_stat(self, key: ProfStatKey, time_ns, is_prefill):
    method print_all (line 45) | def print_all(self):
    method reset_all (line 58) | def reset_all(self):
  class ProfStatItem (line 65) | class ProfStatItem:
    method __init__ (line 66) | def __init__(self):
    method add_item (line 75) | def add_item(self, cost_time_ns):
    method reset (line 88) | def reset(self):
    method get_stat (line 94) | def get_stat(self):

FILE: archive/ktransformers/server/utils/sql_utils.py
  class SQLUtil (line 27) | class SQLUtil(metaclass=Singleton):
    method __init__ (line 34) | def __init__(self) -> None:
    method get_db (line 40) | def get_db(self):
    method init_engine (line 53) | def init_engine(cfg: Config):
    method create_sqllite_url (line 70) | def create_sqllite_url(cfg):
    method db_add_commit_refresh (line 89) | def db_add_commit_refresh(self, session: Session, what):
    method db_merge_commit (line 104) | def db_merge_commit(self, session: Session, what):
    method db_update_commit_refresh (line 115) | def db_update_commit_refresh(self, session: Session, existing, what):

FILE: archive/ktransformers/tests/AIME_2024/eval_api.py
  function generate_text (line 16) | def generate_text(api_url,question , model_name, stream=False, auth_toke...
  function load_data (line 40) | def load_data(file_path):
  function get_score (line 54) | def get_score(pred, answer):
  function run_eval_api (line 74) | def run_eval_api(
  function main (line 120) | def main(output_path, api_url, model_name, auth_token, format_tabs,probl...

FILE: archive/ktransformers/tests/AIME_2024/evaluation.py
  function filter_answer (line 2) | def filter_answer(completion: str) -> str:

FILE: archive/ktransformers/tests/AIME_2024/prompts.py
  function instruct_prompt (line 1) | def instruct_prompt(prompt: str) -> str:

FILE: archive/ktransformers/tests/UT/test_kdeepseek_attention_w8a8a2serve_npu.py
  class DummyConfig (line 15) | class DummyConfig:
    method __init__ (line 16) | def __init__(self, hidden_size=4, num_attention_heads=1):
  class DummyOrigAttn (line 21) | class DummyOrigAttn(nn.Module):
    method __init__ (line 22) | def __init__(self, config=None, layer_idx=0):
  class DummyDynamicQuantOps (line 35) | class DummyDynamicQuantOps:
    method execute (line 36) | def execute(self, inputs):
  class DummyMatMulOps (line 41) | class DummyMatMulOps:
    method execute (line 42) | def execute(self, inputs):
  class DummyQuantProj (line 47) | class DummyQuantProj(nn.Module):
    method __init__ (line 48) | def __init__(self, dim):
  class DummyStaticCache (line 57) | class DummyStaticCache:
    method __init__ (line 58) | def __init__(self, page_size=16):
    method get_usable_length (line 61) | def get_usable_length(self, kv_seq_len, layer_idx):
    method update (line 64) | def update(self, combined, layer_idx, cache_kwargs):
  class DummyNpuFusedAttention (line 68) | class DummyNpuFusedAttention:
    method __call__ (line 69) | def __call__(self, q, k, v, **kwargs):
    method out (line 77) | def out(self, q, k, v, workspace=None,
  class DummyOpsNpu (line 92) | class DummyOpsNpu:
    method npu_fused_infer_attention_score (line 93) | def npu_fused_infer_attention_score(self, q, k, v, **kwargs):
  function fake_apply_rotary_pos_emb_fusion (line 101) | def fake_apply_rotary_pos_emb_fusion(q_pe, k_pe, cos, sin):
  function build_attention_module (line 104) | def build_attention_module(q_lora_rank=None):
  function _patch_env (line 175) | def _patch_env(monkeypatch):
  function test_print_callback_smoke (line 225) | def test_print_callback_smoke():
  function _common_inputs_prefill (line 241) | def _common_inputs_prefill():
  function test_forward_prefill_with_mask (line 261) | def test_forward_prefill_with_mask():
  function test_forward_prefill_without_mask_and_q_lora (line 298) | def test_forward_prefill_without_mask_and_q_lora():
  function test_forward_decode_paged_path (line 335) | def test_forward_decode_paged_path():
  function test_forward_prefill_layer_idx_none_raises (line 378) | def test_forward_prefill_layer_idx_none_raises():
  function test_forward_prefill_attn_output_shape_mismatch_raises (line 408) | def test_forward_prefill_attn_output_shape_mismatch_raises(monkeypatch):
  function test_forward_paged_use_npu_graph (line 452) | def test_forward_paged_use_npu_graph(monkeypatch):

FILE: archive/ktransformers/tests/UT/test_kdeepseek_ln_npu.py
  class DummyOrigModule (line 16) | class DummyOrigModule(nn.Module):
    method __init__ (line 17) | def __init__(self, hidden_size=4, variance_epsilon=1e-5):
  class DummySafeTensorLoader (line 23) | class DummySafeTensorLoader:
    method __init__ (line 24) | def __init__(self):
    method load_tensor (line 28) | def load_tensor(self, name: str):
  class DummyGGUFLoader (line 33) | class DummyGGUFLoader:
    method __init__ (line 34) | def __init__(self, safetensor_loader: DummySafeTensorLoader):
  class DummyConfig (line 38) | class DummyConfig:
  class FakeRMSNorm (line 42) | class FakeRMSNorm:
    method __init__ (line 43) | def __init__(self):
    method __call__ (line 46) | def __call__(self, hidden_states, weight, eps):
  function build_rms_module (line 53) | def build_rms_module(hidden_size=4, eps=1e-5, safetensor_loader=None):
  function patch_utils_and_npu (line 70) | def patch_utils_and_npu(monkeypatch):
  function get_fake_rms (line 81) | def get_fake_rms():
  function test_forward_preserves_shape_and_dtype (line 85) | def test_forward_preserves_shape_and_dtype():
  function test_forward_with_bfloat16_dtype (line 103) | def test_forward_with_bfloat16_dtype():
  function test_forward_uses_bias (line 114) | def test_forward_uses_bias():
  function test_load_from_safetensor_loader (line 132) | def test_load_from_safetensor_loader():
  function test_unload_sets_weight_and_bias_to_none_idempotent (line 150) | def test_unload_sets_weight_and_bias_to_none_idempotent():

FILE: archive/ktransformers/tests/function_call_test.py
  function send_messages (line 3) | def send_messages(messages):

FILE: archive/ktransformers/tests/humaneval/eval_api.py
  function generate_text (line 11) | def generate_text(api_url,question , model_name, stream=False, auth_toke...
  function run_eval_api (line 35) | def run_eval_api(
  function main (line 81) | def main(output_path, api_url, model_name, auth_token, format_tabs,probl...

FILE: archive/ktransformers/tests/humaneval/evaluation.py
  function filter_code (line 2) | def filter_code(completion: str) -> str:
  function fix_indents (line 14) | def fix_indents(text: str) -> str:

FILE: archive/ktransformers/tests/humaneval/prompts.py
  function instruct_prompt (line 1) | def instruct_prompt(prompt: str) -> str:
  function standard_prompt (line 5) | def standard_prompt(prompt: str) -> str:
  function write_prompt (line 9) | def write_prompt(prompt: str) -> str:
  function replit_glaive_prompt (line 13) | def replit_glaive_prompt(prompt: str) -> str:

FILE: archive/ktransformers/tests/mmlu_pro_test.py
  class DataEvaluator (line 16) | class DataEvaluator:
    method __init__ (line 17) | def __init__(self):
    method load_data (line 21) | def load_data(self, file_path):
    method get_prompt (line 45) | def get_prompt(self, record):
    method post_processing (line 56) | def post_processing(self, text):
    method score (line 65) | def score(self, pred, answers):
  function generate_text (line 80) | def generate_text(api_url, question, model_name, stream=False):
  function main (line 105) | def main(concurrent_requests, data_evaluator: DataEvaluator, result_file...

FILE: archive/ktransformers/tests/mmlu_test.py
  class DataEvaluator (line 16) | class DataEvaluator:
    method __init__ (line 17) | def __init__(self):
    method load_data (line 21) | def load_data(self, file_path):
    method get_prompt (line 36) | def get_prompt(self, record):
    method post_processing (line 47) | def post_processing(self, text):
    method score (line 56) | def score(self, pred, answers):
  function generate_text (line 71) | def generate_text(api_url, question, model_name, stream=False):
  function main (line 96) | def main(concurrent_requests, data_evaluator: DataEvaluator, result_file...

FILE: archive/ktransformers/tests/mmlu_test_multi.py
  function extract_final_answer (line 19) | def extract_final_answer(text):
  class DataEvaluator (line 62) | class DataEvaluator:
    method __init__ (line 63) | def __init__(self):
    method load_data (line 66) | def load_data(self, file_path):
    method get_prompt (line 77) | def get_prompt(self, record):
    method post_processing (line 85) | def post_processing(self, text):
    method score (line 92) | def score(self, pred, answer):
  function generate_text (line 100) | def generate_text(api_url, question, model_name, stream=False):
  function main (line 120) | def main(concurrent_requests, data_evaluator: DataEvaluator, result_file...

FILE: archive/ktransformers/tests/parse_cover_info.py
  function main (line 7) | def main():

FILE: archive/ktransformers/tests/score.py
  function wait_for_server (line 7) | def wait_for_server(base_url: str, timeout: int = None) -> None:
  function enqueue_output (line 63) | def enqueue_output(out, queue):

FILE: archive/ktransformers/tests/test_client.py
  function fetch_event_stream (line 15) | async def fetch_event_stream(session, payload, request_id, stream):
  function main (line 77) | async def main(prompt_id, model, stream, max_tokens, temperature, top_p):

FILE: archive/ktransformers/tests/test_prefix.py
  function fetch_message_once (line 18) | async def fetch_message_once(session, request_id, messages, max_tokens, ...
  function multi_turn_conversation (line 79) | async def multi_turn_conversation(session, request_id, rounds, max_token...
  function main (line 104) | async def main(concurrent_requests, rounds, max_tokens, model):

FILE: archive/ktransformers/tests/test_pytorch_q8.py
  class LinearModel (line 4) | class LinearModel(torch.nn.Module):
    method __init__ (line 5) | def __init__(self, in_features, out_features):
    method forward (line 9) | def forward(self, x):

FILE: archive/ktransformers/tests/test_speed.py
  function fetch_event_stream (line 48) | async def fetch_event_stream(session, request_id, prompt, max_tokens, mo...
  function main (line 137) | async def main(concurrent_requests , prompt, max_tokens, model):

FILE: archive/ktransformers/tests/triton_fp8gemm_test.py
  function test_fp8_gemm_vs_torch_matmul (line 21) | def test_fp8_gemm_vs_torch_matmul():
  function test_fp8_gemm_vs_torch_matmul_load (line 48) | def test_fp8_gemm_vs_torch_matmul_load():
  function test_fp8_gemm_tplops (line 71) | def test_fp8_gemm_tplops():

FILE: archive/ktransformers/util/ascend/ascend_utils.py
  function setup_model_parallel (line 33) | def setup_model_parallel(distributed_timeout_minutes: int = 30, tp: int ...
  function get_tensor_parallel_size (line 90) | def get_tensor_parallel_size():
  function get_tensor_parallel_group (line 95) | def get_tensor_parallel_group():
  function get_tensor_parallel_rank (line 100) | def get_tensor_parallel_rank():
  function get_data_parallel_size (line 105) | def get_data_parallel_size():
  function get_data_parallel_gloo (line 110) | def get_data_parallel_gloo():
  function get_data_parallel_group (line 115) | def get_data_parallel_group():
  function get_data_parallel_rank (line 120) | def get_data_parallel_rank():
  function get_nccl_options (line 126) | def get_nccl_options(pg_name, nccl_comm_cfgs):
  function get_safetensors_cut_weight (line 137) | def get_safetensors_cut_weight(name: str, weights: torch.Tensor):
  function get_absort_weight (line 166) | def get_absort_weight(model, config):
  function allredeuce_warpper (line 198) | def allredeuce_warpper(func):

FILE: archive/ktransformers/util/cuda_graph_runner.py
  class CUDAGraphRunner (line 10) | class CUDAGraphRunner:
    method __init__ (line 12) | def __init__(self):
    method capture (line 17) | def capture(
    method forward (line 63) | def forward(
    method __call__ (line 83) | def __call__(self, *args, **kwargs):

FILE: archive/ktransformers/util/custom_gguf.py
  class GGMLQuantizationType (line 40) | class GGMLQuantizationType(IntEnum):
  function quant_shape_to_byte_shape (line 105) | def quant_shape_to_byte_shape(shape: Sequence[int], quant_type: GGMLQuan...
  function read_value (line 177) | def read_value(f, data_type):
  function dequantize_q2_k (line 225) | def dequantize_q2_k(data):
  function dequantize_q2_k_gpu (line 262) | def dequantize_q2_k_gpu(data, device:str ="cuda", target_dtype = torch.g...
  function dequantize_q3_k (line 272) | def dequantize_q3_k(data):
  function dequantize_q3_k_gpu (line 314) | def dequantize_q3_k_gpu(data, device:str ="cuda", target_dtype = torch.g...
  function dequantize_q4_k (line 324) | def dequantize_q4_k(data):
  function dequantize_q4_k_gpu (line 346) | def dequantize_q4_k_gpu(data, device:str ="cuda", target_dtype = torch.g...
  function dequantize_q5_k (line 356) | def dequantize_q5_k(data):
  function dequantize_q5_k_gpu (line 412) | def dequantize_q5_k_gpu(data, device:str ="cuda", target_dtype = torch.g...
  function dequantize_q6_k (line 422) | def dequantize_q6_k(data):
  function dequantize_q6_k_gpu (line 471) | def dequantize_q6_k_gpu(data: np.ndarray, device:str = "cuda", target_dt...
  function dequantize_iq4_xs (line 482) | def dequantize_iq4_xs(data):
  function dequantize_iq4_xs_gpu (line 512) | def dequantize_iq4_xs_gpu(data: np.ndarray, device:str = "cuda", target_...
  function dequantize_q4_0 (line 521) | def dequantize_q4_0(data):
  function dequantize_q4_0_gpu (line 536) | def dequantize_q4_0_gpu(data, device:str = "cuda", target_dtype = torch....
  function dequantize_q5_0 (line 539) | def dequantize_q5_0(data):
  function dequantize_q5_0_gpu (line 560) | def dequantize_q5_0_gpu(data, device:str = "cuda", target_dtype = torch....
  function dequantize_q8_0 (line 563) | def dequantize_q8_0(data):
  function dequantize_q8_0_gpu (line 572) | def dequantize_q8_0_gpu(data, device:str = "cuda", target_dtype = torch....
  function dequantize_f32 (line 584) | def dequantize_f32(data):
  function dequantize_f32_gpu (line 587) | def dequantize_f32_gpu(data, device, target_dtype = torch.get_default_dt...
  function dequantize_f16 (line 594) | def dequantize_f16(data):
  function dequantize_f16_gpu (line 597) | def dequantize_f16_gpu(data, device, target_dtype = torch.get_default_dt...
  function dequantize_bf16_gpu (line 604) | def dequantize_bf16_gpu(data, device, target_dtype = torch.get_default_d...
  function translate_name_to_gguf_mixtral (line 642) | def translate_name_to_gguf_mixtral(name):
  function translate_name_to_gguf (line 665) | def translate_name_to_gguf(name):

FILE: archive/ktransformers/util/custom_loader.py
  class ModelLoader (line 28) | class ModelLoader(ABC):
    method has_tensor (line 35) | def has_tensor(cls, name: str):
  class SafeTensorLoader (line 47) | class SafeTensorLoader(ModelLoader):
    method __init__ (line 53) | def __init__(self, file_path: str):
    method __load_tensor_file_map (line 56) | def __load_tensor_file_map(self, file_path: str):
    method load_tensor (line 96) | def load_tensor(self, key: str, device: str = "cpu"):
    method load_experts (line 114) | def load_experts(self, key: str, device: str="cpu"):
    method load_gate (line 225) | def load_gate(self, key: str, device: str="cpu"):
    method close_all_handles (line 252) | def close_all_handles(self):
    method load_dequantized_tensor (line 257) | def load_dequantized_tensor(self, key: str, device: str = "cpu"):
    method has_tensor (line 275) | def has_tensor(self, name: str):
  class GGUFLoader (line 278) | class GGUFLoader(ModelLoader):
    method __init__ (line 284) | def __init__(self, gguf_path: str, quantize: str = None):
    method load_gguf (line 323) | def load_gguf(self, f):
    method get_mmap_tensor (line 405) | def get_mmap_tensor(self, name):
    method get_undequanted_tensor_and_ggml_type (line 416) | def get_undequanted_tensor_and_ggml_type(self, name):
    method load_expert_tensor (line 424) | def load_expert_tensor(self, name, data, expert_id, elements_per_exper...
    method load_gguf_tensor (line 453) | def load_gguf_tensor(self, name: str, device:str = "cpu", target_dtype...
    method has_tensor (line 518) | def has_tensor(self, name: str):
    method get_ggml_type (line 522) | def get_ggml_type(self, name: str):
  class ModelLoaderFactory (line 528) | class ModelLoaderFactory:
    method create_loader (line 535) | def create_loader(path: str):
  class W8A8SafeTensorLoader (line 600) | class W8A8SafeTensorLoader(SafeTensorLoader):
    method load_tensor (line 601) | def load_tensor(self, key: str, device: str = "cpu"):
    method load_dequantized_tensor (line 625) | def load_dequantized_tensor(self, key: str, device: str = "cpu"):

FILE: archive/ktransformers/util/modeling_rope_utils.py
  function _compute_default_rope_parameters (line 29) | def _compute_default_rope_parameters(
  function _compute_linear_scaling_rope_parameters (line 71) | def _compute_linear_scaling_rope_parameters(
  function _compute_dynamic_ntk_parameters (line 112) | def _compute_dynamic_ntk_parameters(
  function _compute_yarn_parameters (line 163) | def _compute_yarn_parameters(
  function _compute_longrope_parameters (line 259) | def _compute_longrope_parameters(
  function _compute_llama3_parameters (line 322) | def _compute_llama3_parameters(
  function _check_received_keys (line 378) | def _check_received_keys(
  function _validate_default_rope_parameters (line 407) | def _validate_default_rope_parameters(config: PretrainedConfig, ignore_k...
  function _validate_linear_scaling_rope_parameters (line 415) | def _validate_linear_scaling_rope_parameters(config: PretrainedConfig, i...
  function _validate_dynamic_scaling_rope_parameters (line 427) | def _validate_dynamic_scaling_rope_parameters(config: PretrainedConfig, ...
  function _validate_yarn_parameters (line 441) | def _validate_yarn_parameters(config: PretrainedConfig, ignore_keys: Opt...
  function _validate_longrope_parameters (line 479) | def _validate_longrope_parameters(config: PretrainedConfig, ignore_keys:...
  function _validate_llama3_parameters (line 529) | def _validate_llama3_parameters(config: PretrainedConfig, ignore_keys: O...
  function rope_config_validation (line 576) | def rope_config_validation(config: PretrainedConfig, ignore_keys: Option...

FILE: archive/ktransformers/util/npu_graph_runner.py
  class NPUGraphRunner (line 14) | class NPUGraphRunner:
    method __init__ (line 16) | def __init__(self, deviceId):
    method init (line 23) | def init(self, batch_size, seq_length):
    method destroy (line 32) | def destroy(self):
    method capture (line 37) | def capture(
    method forward (line 65) | def forward(
    method launch_callback (line 86) | def launch_callback(self, func, data, block, stream):
    method __call__ (line 89) | def __call__(self, *args, **kwargs):
  function check_runner (line 94) | def check_runner(deviceId: int):
  function destory_runner (line 101) | def destory_runner(deviceId: int):
  function get_or_create_runner (line 107) | def get_or_create_runner(deviceId: int):

FILE: archive/ktransformers/util/textstream.py
  class TextStreamer (line 2) | class TextStreamer:
    method __init__ (line 4) | def __init__(self, tokenizer: "AutoTokenizer", skip_prompt: bool = Fal...
    method reset (line 14) | def reset(self):
    method put (line 18) | def put(self, value)->Optional[str]:
    method end (line 49) | def end(self)->Optional[str]:
    method _is_chinese_char (line 62) | def _is_chinese_char(self, cp):

FILE: archive/ktransformers/util/utils.py
  function get_use_npu_graph (line 56) | def get_use_npu_graph():
  class StatKey (line 62) | class StatKey(StrEnum):
  class TimeStat (line 74) | class TimeStat:
    method __init__ (line 75) | def __init__(self):
    method record_start_time (line 89) | def record_start_time(self):
    method add_time_stat (line 93) | def add_time_stat(self, key: StatKey, time_ns, is_prefill):
    method print_all (line 104) | def print_all(self):
    method reset_all (line 117) | def reset_all(self):
  class StatItem (line 124) | class StatItem:
    method __init__ (line 125) | def __init__(self):
    method add_item (line 131) | def add_item(self, cost_time_ns):
    method reset (line 137) | def reset(self):
    method get_stat (line 143) | def get_stat(self):
  function get_free_ports (line 157) | def get_free_ports(n: int, continue_prot: list):
  function get_current_device (line 173) | def get_current_device():
  function get_compute_capability (line 179) | def get_compute_capability(device:torch.device = None):
  function set_module (line 193) | def set_module(model, submodule_key, module):
  function set_param (line 207) | def set_param(module: nn.Module, name: str, weights: torch.Tensor):
  function get_device (line 214) | def get_device(gguf_module_key:str, device_map:dict):
  function get_all_used_cuda_device (line 220) | def get_all_used_cuda_device(device_map:dict):
  function load_cur_state_dict_npu (line 232) | def load_cur_state_dict_npu(module: nn.Module, gguf_loader: ModelLoader,...
  function load_cur_state_dict (line 263) | def load_cur_state_dict(module: nn.Module, gguf_loader: ModelLoader, pre...
  function sync_all_device (line 310) | def sync_all_device(all_device_list):
  function xpu_fp16_model (line 323) | def xpu_fp16_model(config):
  function load_weights (line 335) | def load_weights(module:nn.Module, gguf_loader:ModelLoader, prefix='', d...
  function tf_logits_warper (line 344) | def tf_logits_warper(generation_config):
  function prefill_and_generate (line 394) | def prefill_and_generate(model, tokenizer, inputs, max_new_tokens=10000,...
  class InferenceState (line 809) | class InferenceState(enum.Enum):

FILE: archive/ktransformers/util/vendors.py
  class GPUVendor (line 7) | class GPUVendor(IntEnum):
  class DeviceManager (line 15) | class DeviceManager:
    method __init__ (line 19) | def __init__(self):
    method _detect_gpu_vendor (line 23) | def _detect_gpu_vendor(self) -> GPUVendor:
    method _get_available_devices (line 60) | def _get_available_devices(self) -> List[int]:
    method get_device_str (line 75) | def get_device_str(self, device_id: Union[int, str]) -> str:
    method to_torch_device (line 102) | def to_torch_device(self, device_id: Union[int, str] = 0) -> torch.dev...
    method move_tensor_to_device (line 126) | def move_tensor_to_device(self, tensor: torch.Tensor, device_id: Union...
    method is_available (line 140) | def is_available(self, index: int = 0) -> bool:
    method get_all_devices (line 155) | def get_all_devices(self) -> List[int]:
  function get_device (line 168) | def get_device(device_id: Union[int, str] = 0) -> torch.device:
  function to_device (line 180) | def to_device(tensor: torch.Tensor, device_id: Union[int, str] = 0) -> t...

FILE: archive/ktransformers/util/weight_loader.py
  class ModelLoader (line 8) | class ModelLoader(ABC):
    method load_tensor (line 15) | def load_tensor(self, name: str, device: str = "cpu") -> torch.Tensor:
    method supports_format (line 30) | def supports_format(cls, path: str) -> bool:
  class SafeTensorLoader (line 43) | class SafeTensorLoader(ModelLoader):
    method __init__ (line 48) | def __init__(self, path: str):
    method _load_tensor_file_map (line 59) | def _load_tensor_file_map(self, path: str) -> None:
    method load_tensor (line 102) | def load_tensor(self, name: str, device: str = "cpu") -> torch.Tensor:
    method load_dequantized_tensor (line 122) | def load_dequantized_tensor(self, name: str, device: str = "cpu") -> t...
    method close_all_handles (line 148) | def close_all_handles(self) -> None:
    method supports_format (line 157) | def supports_format(cls, path: str) -> bool:
  class GGUFLoader (line 185) | class GGUFLoader(ModelLoader):
    method __init__ (line 190) | def __init__(self, path: str):
    method _load_gguf (line 228) | def _load_gguf(self, f) -> None:
    method _read_value (line 287) | def _read_value(self, f, data_type) -> Any:
    method load_tensor (line 310) | def load_tensor(self, name: str, device: str = "cpu") -> torch.Tensor:
    method load_gguf_tensor (line 324) | def load_gguf_tensor(self, name: str, device: str = "cpu", target_dtyp...
    method supports_format (line 346) | def supports_format(cls, path: str) -> bool:

FILE: archive/ktransformers/website/src/api/assistant.ts
  function filterAndConvert (line 3) | function filterAndConvert(
  type IAssistantData (line 12) | interface IAssistantData {

FILE: archive/ktransformers/website/src/api/run.ts
  type IRunData (line 4) | interface IRunData {
  function cancelRun (line 87) | async function cancelRun(threadId: string, runId: string){

FILE: archive/ktransformers/website/src/assets/iconfont/iconfont.js
  function s (line 1) | function s(){h||(h=!0,e())}
  function d (line 1) | function d(){try{a.documentElement.doScroll("left")}catch(t){return void...

FILE: archive/ktransformers/website/src/conf/config.ts
  type Window (line 2) | interface Window {

FILE: archive/ktransformers/website/src/utils/copy.ts
  function showCopySuccessMessage (line 75) | function showCopySuccessMessage() {
  function showCopyErrorMessage (line 93) | function showCopyErrorMessage() {

FILE: archive/ktransformers/website/src/utils/types.ts
  type IAssistant (line 1) | interface IAssistant {
  type IAssistantWithStatus (line 17) | interface IAssistantWithStatus {
  type IMessage (line 34) | interface IMessage {
  type IThread (line 51) | interface IThread {
  type IRun (line 59) | interface IRun {
  type IFile (line 88) | interface IFile {
  type IMessageData (line 97) | interface IMessageData {
  type IThreadAndMessageAndAssistant (line 104) | interface IThreadAndMessageAndAssistant {
  type IDeleteResult (line 110) | interface IDeleteResult {
  type IBuildData (line 115) | interface IBuildData {

FILE: archive/merge_tensors/merge_safetensor_gguf.py
  function read_safetensor_keys_from_folder (line 15) | def read_safetensor_keys_from_folder(folder_path)->dict:
  function translate_name (line 58) | def translate_name(name:str)->str:
  function combine_tensor_sources (line 71) | def combine_tensor_sources(safetensor_path:str, gguf_path:str):
  function write_combined_tensor (line 97) | def write_combined_tensor(target_tensor_map: dict, output_path: str, ggu...
  function main (line 190) | def main():

FILE: archive/merge_tensors/merge_safetensor_gguf_for_qwen3.py
  function read_safetensor_keys_from_folder (line 27) | def read_safetensor_keys_from_folder(folder_path) -> dict:
  function translate_name (line 60) | def translate_name(name: str) -> str:
  function combine_tensor_sources (line 69) | def combine_tensor_sources(safetensor_path: str, gguf_path: str):
  function write_combined_tensor (line 103) | def write_combined_tensor(target_tensor_map: dict, output_path: str, ggu...
  function main (line 198) | def main():

FILE: archive/setup.py
  class CpuInstructInfo (line 62) | class CpuInstructInfo:
  class VersionInfo (line 72) | class VersionInfo:
    method get_musa_bare_metal_version (line 80) | def get_musa_bare_metal_version(self, musa_dir):
    method get_rocm_bare_metal_version (line 90) | def get_rocm_bare_metal_version(self, rocm_dir):
    method get_cuda_bare_metal_version (line 154) | def get_cuda_bare_metal_version(self, cuda_dir):
    method get_cuda_version_of_torch (line 163) | def get_cuda_version_of_torch(self):
    method get_platform (line 170) | def get_platform(self,):
    method get_cpu_instruct (line 181) | def get_cpu_instruct(self,):
    method get_torch_version (line 224) | def get_torch_version(self,):
    method get_flash_version (line 229) | def get_flash_version(self,):
    method get_package_version (line 238) | def get_package_version(self, full_version=False):
  class BuildWheelsCommand (line 263) | class BuildWheelsCommand(_bdist_wheel):
    method get_wheel_name (line 264) | def get_wheel_name(self,):
    method run (line 274) | def run(self):
  function colored (line 304) | def colored(text, color=None, bold=False):
  function split_line (line 316) | def split_line(text: str) -> List[str]:
  function colored (line 337) | def colored(text, color=None, bold=False):
  function split_line (line 349) | def split_line(text: str) -> List[str]:
  function run_command_with_live_tail (line 365) | def run_command_with_live_tail(ext: str, command: List[str], output_line...
  class CMakeExtension (line 475) | class CMakeExtension(Extension):
    method __init__ (line 476) | def __init__(self, name: str, sourcedir: str) -> None:
  function get_cmake_abi_args (line 481) | def get_cmake_abi_args(cmake_args):
  class CMakeBuild (line 488) | class CMakeBuild(BuildExtension):
    method build_extension (line 490) | def build_extension(self, ext) -> None:

FILE: archive/third_party/llamafile/micros.h
  function GetQueryPerformanceFrequency (line 19) | static long long GetQueryPerformanceFrequency() {
  function GetQueryPerformanceCounter (line 24) | static long long GetQueryPerformanceCounter() {
  function micros (line 31) | static long long micros(void) {

FILE: archive/third_party/llamafile/numba.h
  function rand32 (line 8) | inline int rand32(void) {
  function popcount (line 15) | inline int popcount(unsigned x) {
  function hamming (line 23) | inline int hamming(int x, int y) {
  function float01 (line 27) | inline float float01(unsigned x) {  // (0,1)
  function numba (line 31) | inline float numba(void) {  // (-10,10)

FILE: archive/third_party/llamafile/sgemm.h
  type ggml_tensor (line 13) | struct ggml_tensor
  type ggml_compute_params (line 14) | struct ggml_compute_params
  type ggml_compute_params (line 31) | struct ggml_compute_params
  type ggml_tensor (line 31) | struct ggml_tensor
  type ggml_tensor (line 31) | struct ggml_tensor
  type ggml_tensor (line 31) | struct ggml_tensor
  type ggml_tensor (line 31) | struct ggml_tensor
  type ggml_tensor (line 32) | struct ggml_tensor
  type ggml_tensor (line 32) | struct ggml_tensor
  type ggml_tensor (line 32) | struct ggml_tensor
  type ggml_compute_params (line 44) | struct ggml_compute_params
  type ggml_tensor (line 44) | struct ggml_tensor
  type ggml_tensor (line 44) | struct ggml_tensor
  type ggml_tensor (line 44) | struct ggml_tensor
  type ggml_tensor (line 44) | struct ggml_tensor
  type ggml_compute_params (line 45) | struct ggml_compute_params
  type ggml_tensor (line 45) | struct ggml_tensor
  type ggml_tensor (line 45) | struct ggml_tensor
  type ggml_tensor (line 45) | struct ggml_tensor
  type ggml_tensor (line 45) | struct ggml_tensor
  type ggml_compute_params (line 46) | struct ggml_compute_params
  type ggml_tensor (line 46) | struct ggml_tensor
  type ggml_tensor (line 46) | struct ggml_tensor
  type ggml_tensor (line 46) | struct ggml_tensor
  type ggml_tensor (line 46) | struct ggml_tensor
  type ggml_compute_params (line 47) | struct ggml_compute_params
  type ggml_tensor (line 47) | struct ggml_tensor
  type ggml_tensor (line 47) | struct ggml_tensor
  type ggml_tensor (line 47) | struct ggml_tensor
  type ggml_tensor (line 47) | struct ggml_tensor
  type ggml_compute_params (line 48) | struct ggml_compute_params
  type ggml_tensor (line 48) | struct ggml_tensor
  type ggml_tensor (line 48) | struct ggml_tensor
  type ggml_tensor (line 48) | struct ggml_tensor
  type ggml_tensor (line 48) | struct ggml_tensor
  type ggml_compute_params (line 49) | struct ggml_compute_params
  type ggml_tensor (line 49) | struct ggml_tensor
  type ggml_tensor (line 49) | struct ggml_tensor
  type ggml_tensor (line 49) | struct ggml_tensor
  type ggml_tensor (line 49) | struct ggml_tensor
  type ggml_compute_params (line 50) | struct ggml_compute_params
  type ggml_tensor (line 50) | struct ggml_tensor
  type ggml_tensor (line 50) | struct ggml_tensor
  type ggml_tensor (line 50) | struct ggml_tensor
  type ggml_tensor (line 50) | struct ggml_tensor
  type ggml_compute_params (line 51) | struct ggml_compute_params
  type ggml_tensor (line 51) | struct ggml_tensor
  type ggml_tensor (line 51) | struct ggml_tensor
  type ggml_tensor (line 51) | struct ggml_tensor
  type ggml_tensor (line 51) | struct ggml_tensor
  type ggml_compute_params (line 52) | struct ggml_compute_params
  type ggml_tensor (line 52) | struct ggml_tensor
  type ggml_tensor (line 52) | struct ggml_tensor
  type ggml_tensor (line 52) | struct ggml_tensor
  type ggml_tensor (line 52) | struct ggml_tensor

FILE: archive/third_party/llamafile/sgemm_arm.cpp
  type GemmFuncs (line 32) | struct GemmFuncs {
    type ggml_compute_params (line 34) | struct ggml_compute_params
    type ggml_tensor (line 34) | struct ggml_tensor
    type ggml_tensor (line 34) | struct ggml_tensor
    type ggml_tensor (line 34) | struct ggml_tensor
    type ggml_tensor (line 34) | struct ggml_tensor
    method GemmFuncs (line 39) | GemmFuncs() {
  function llamafile_sgemm (line 190) | bool llamafile_sgemm(long m, long n, long k, const void* A, long lda, co...
  function llamafile_mixmul (line 198) | bool llamafile_mixmul(const ggml_compute_params* params, const ggml_tens...
  function llamafile_mixmul_iqk (line 202) | bool llamafile_mixmul_iqk(long Nx, long Ny, long ne00, int ne11, int typ...

FILE: archive/third_party/llamafile/sgemm_x86.cpp
  type GemmFuncs (line 32) | struct GemmFuncs {
    type ggml_compute_params (line 34) | struct ggml_compute_params
    type ggml_tensor (line 34) | struct ggml_tensor
    type ggml_tensor (line 34) | struct ggml_tensor
    type ggml_tensor (line 34) | struct ggml_tensor
    type ggml_tensor (line 34) | struct ggml_tensor
    method GemmFuncs (line 39) | GemmFuncs() {
  function llamafile_sgemm (line 190) | bool llamafile_sgemm(long m, long n, long k, const void* A, long lda, co...
  function llamafile_mixmul (line 198) | bool llamafile_mixmul(const ggml_compute_params* params, const ggml_tens...
  function llamafile_mixmul_iqk (line 202) | bool llamafile_mixmul_iqk(long Nx, long Ny, long ne00, int ne11, int typ...

FILE: archive/third_party/llamafile/tinyblas_cpu.h
  function tinyBLAS_not_supported (line 85) | bool tinyBLAS_not_supported(const char* file, int line) {
  function unhalf (line 90) | inline float unhalf(ggml_fp16_t d) {
  function unhalf (line 93) | inline float unhalf(ggml_bf16_t d) {
  function float (line 112) | struct ggml_type_trait<float> {
  function ggml_bf16_t (line 116) | struct ggml_type_trait<ggml_bf16_t> {
  function ggml_fp16_t (line 120) | struct ggml_type_trait<ggml_fp16_t> {
  function block_q8_0 (line 124) | struct ggml_type_trait<block_q8_0> {
  function __m128 (line 132) | inline __m128 add(__m128 x, __m128 y) {
  function __m128 (line 135) | inline __m128 sub(__m128 x, __m128 y) {
  function __m128 (line 138) | inline __m128 mul(__m128 x, __m128 y) {
  function __m256 (line 144) | inline __m256 add(__m256 x, __m256 y) {
  function __m256 (line 147) | inline __m256 sub(__m256 x, __m256 y) {
  function __m256 (line 150) | inline __m256 mul(__m256 x, __m256 y) {
  function __m512 (line 156) | inline __m512 add(__m512 x, __m512 y) {
  function __m512 (line 159) | inline __m512 sub(__m512 x, __m512 y) {
  function __m512 (line 162) | inline __m512 mul(__m512 x, __m512 y) {
  function float32x4_t (line 168) | inline float32x4_t add(float32x4_t x, float32x4_t y) {
  function float32x4_t (line 171) | inline float32x4_t sub(float32x4_t x, float32x4_t y) {
  function float32x4_t (line 174) | inline float32x4_t mul(float32x4_t x, float32x4_t y) {
  function float16x8_t (line 180) | inline float16x8_t add(float16x8_t x, float16x8_t y) {
  function float16x8_t (line 183) | inline float16x8_t sub(float16x8_t x, float16x8_t y) {
  function float16x8_t (line 186) | inline float16x8_t mul(float16x8_t x, float16x8_t y) {
  function U (line 198) | U madd(T a, T b, U c) {
  function U (line 210) | U madder(T a, T b, U c, U* e) {
  function float32x4_t (line 218) | inline float32x4_t badder(float32x4_t a, float b, float32x4_t c, float32...
  function __m256 (line 229) | inline __m256 madd(__m256 a, __m256 b, __m256 c) {
  function __m512 (line 235) | inline __m512 madd(__m512 a, __m512 b, __m512 c) {
  function float32x4_t (line 243) | inline float32x4_t madd(float32x4_t a, float32x4_t b, float32x4_t c) {
  function float16x8_t (line 249) | inline float16x8_t madd(float16x8_t a, float16x8_t b, float16x8_t c) {
  function __m512 (line 258) | inline __m512 madd(__m512bh x, __m512bh y, __m512 z) {
  function __m512 (line 262) | inline __m512 madder(__m512bh x, __m512bh y, __m512 z, __m512* _) {
  function hsum (line 271) | inline float hsum(float32x4_t x) {
  function hsum (line 277) | inline float hsum(float16x8_t x) {
  function hsum (line 284) | inline float hsum(__m128 x) {
  function hsum (line 300) | inline float hsum(__m256 x) {
  function hsum (line 306) | inline float hsum(__m512 x) {
  function load (line 318) | inline float load(const float* p) {
  function load (line 322) | inline float load(const ggml_fp16_t* p) {
  function load (line 326) | inline float load(const ggml_bf16_t* p) {
  function float32x4_t (line 332) | inline float32x4_t load(const float* p) {
  function float32x4_t (line 336) | inline float32x4_t load(const ggml_bf16_t* p) {
  function float16x8_t (line 341) | inline float16x8_t load(const ggml_fp16_t* p) {
  function float32x4_t (line 345) | inline float32x4_t load(const ggml_fp16_t* p) {
  function __m128 (line 353) | inline __m128 load(const float* p) {
  function __m256 (line 360) | inline __m256 load(const float* p) {
  function __m256 (line 367) | inline __m256 load(const ggml_bf16_t* p) {
  function __m256 (line 375) | inline __m256 load(const ggml_fp16_t* p) {
  function __m512 (line 382) | inline __m512 load(const float* p) {
  function __m512 (line 386) | inline __m512 load(const ggml_fp16_t* p) {
  function __m512 (line 390) | inline __m512 load(const ggml_bf16_t* p) {
  function __m512bh (line 398) | inline __m512bh load(const ggml_bf16_t* p) {
  function __m512bh (line 402) | inline __m512bh load(const float* p) {
  function store (line 410) | inline void store(float* p, float f) {
  function store (line 414) | inline void store(ggml_fp16_t* p, float f) {
  function store (line 418) | inline void store(ggml_bf16_t* p, float f) {
  function gemm (line 616) | void gemm(long m0, long m, long n0, long n) {
  function gemm (line 759) | void gemm(long m0, long m, long n0, long n) {
  function int8x16_t (line 797) | inline int8x16_t load_lo(const block_q8_0* b) {
  function int8x16_t (line 801) | inline int8x16_t load_hi(const block_q8_0* b) {
  function int8x16_t (line 805) | inline int8x16_t load_lo(const block_q4_0* b) {
  function int8x16_t (line 810) | inline int8x16_t load_hi(const block_q4_0* b) {
  function gemm (line 982) | void gemm(long m0, long m, long n0, long n) {
  function __m256i (line 1020) | inline __m256i load(const block_q8_0* b) {
  function __m256i (line 1024) | inline __m256i load(const block_q4_0* b) {
  function __m256 (line 1032) | inline __m256 updot(__m256i u, __m256i s) {

FILE: archive/third_party/llamafile/tinyblas_cpu_mixmul_amd_avx.cpp
  function llamafile_mixmul_needs (line 13) | size_t llamafile_mixmul_needs(const ggml_tensor* weights, const ggml_ten...

FILE: archive/third_party/llamafile/tinyblas_cpu_mixmul_arm80.cpp
  function llamafile_mixmul_needs (line 13) | size_t llamafile_mixmul_needs(const ggml_tensor* weights, const ggml_ten...

FILE: archive/third_party/llamafile/tinyblas_cpu_unsupported.cpp
  function llamafile_sgemm_unsupported (line 25) | bool llamafile_sgemm_unsupported(long m, long n, long k, const void* A, ...
  function llamafile_mixmul_unsupported (line 29) | bool llamafile_mixmul_unsupported(const struct ggml_compute_params* params,
  function iqk_mul_mat_moe_unsupported (line 37) | bool iqk_mul_mat_moe_unsupported(long, long, long, int, int, const void*...

FILE: archive/third_party/nlohmann/json.hpp
  function NLOHMANN_JSON_NAMESPACE_BEGIN (line 239) | NLOHMANN_JSON_NAMESPACE_BEGIN
  function NLOHMANN_JSON_NAMESPACE_END (line 250) | NLOHMANN_JSON_NAMESPACE_END
  type would_call_std_ (line 2833) | struct would_call_std_
  type value_t (line 2891) | enum class value_t : std::uint8_t
  function NLOHMANN_JSON_NAMESPACE_END (line 2956) | NLOHMANN_JSON_NAMESPACE_END
  function NLOHMANN_JSON_NAMESPACE_BEGIN (line 3047) | NLOHMANN_JSON_NAMESPACE_BEGIN
  function NLOHMANN_JSON_NAMESPACE_BEGIN (line 3092) | NLOHMANN_JSON_NAMESPACE_BEGIN
  function NLOHMANN_JSON_NAMESPACE_BEGIN (line 3281) | NLOHMANN_JSON_NAMESPACE_BEGIN
  class json_pointer (line 3428) | class json_pointer
  type ordered_map (line 3439) | struct ordered_map
  function NLOHMANN_JSON_NAMESPACE_BEGIN (line 3450) | NLOHMANN_JSON_NAMESPACE_BEGIN
  function NLOHMANN_JSON_NAMESPACE_BEGIN (line 4241) | NLOHMANN_JSON_NAMESPACE_BEGIN
  function NLOHMANN_JSON_NAMESPACE_END (line 4369) | NLOHMANN_JSON_NAMESPACE_END
  function NLOHMANN_JSON_NAMESPACE_END (line 4601) | NLOHMANN_JSON_NAMESPACE_END
  function NLOHMANN_JSON_NAMESPACE_BEGIN (line 4645) | NLOHMANN_JSON_NAMESPACE_BEGIN
  function NLOHMANN_JSON_NAMESPACE_BEGIN (line 4653) | NLOHMANN_JSON_NAMESPACE_BEGIN
  function NLOHMANN_JSON_NAMESPACE_BEGIN (line 4668) | NLOHMANN_JSON_NAMESPACE_BEGIN
  function NLOHMANN_JSON_NAMESPACE_BEGIN (line 5181) | NLOHMANN_JSON_NAMESPACE_BEGIN
  function NLOHMANN_JSON_NAMESPACE_END (line 5363) | NLOHMANN_JSON_NAMESPACE_END
  function NLOHMANN_JSON_NAMESPACE_BEGIN (line 5411) | NLOHMANN_JSON_NAMESPACE_BEGIN
  type adl_serializer (line 5840) | struct adl_serializer
    method from_json (line 5845) | static auto from_json(BasicJsonType && j, TargetType& val) noexcept(
    method from_json (line 5855) | static auto from_json(BasicJsonType && j) noexcept(
    method to_json (line 5865) | static auto to_json(BasicJsonType& j, TargetType && val) noexcept(
  function set_subtype (line 5945) | void set_subtype(subtype_type subtype_) noexcept
  function subtype_type (line 5953) | constexpr subtype_type subtype() const noexcept
  function has_subtype (line 5960) | constexpr bool has_subtype() const noexcept
  function clear_subtype (line 5967) | void clear_subtype() noexcept
  function NLOHMANN_JSON_NAMESPACE_BEGIN (line 6005) | NLOHMANN_JSON_NAMESPACE_BEGIN
  function NLOHMANN_JSON_NAMESPACE_BEGIN (line 6175) | NLOHMANN_JSON_NAMESPACE_BEGIN
  function json_sax_dom_parser (line 6816) | explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptio...
  function json_sax_dom_parser (line 6821) | json_sax_dom_parser(const json_sax_dom_parser&) = delete;
  function json_sax_dom_parser (line 6822) | json_sax_dom_parser(json_sax_dom_parser&&) = default;
  function null (line 6827) | bool null()
  function boolean (line 6833) | bool boolean(bool val)
  function number_integer (line 6839) | bool number_integer(number_integer_t val)
  function number_unsigned (line 6845) | bool number_unsigned(number_unsigned_t val)
  function number_float (line 6851) | bool number_float(number_float_t val, const string_t& /*unused*/)
  function string (line 6857) | bool string(string_t& val)
  function binary (line 6863) | bool binary(binary_t& val)
  function start_object (line 6869) | bool start_object(std::size_t len)
  function key (line 6881) | bool key(string_t& val)
  function end_object (line 6891) | bool end_object()
  function start_array (line 6901) | bool start_array(std::size_t len)
  function end_array (line 6913) | bool end_array()
  function parse_error (line 6924) | bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/,
  function is_errored (line 6936) | constexpr bool is_errored() const
  class json_sax_dom_callback_parser (line 6985) | class json_sax_dom_callback_parser
    method json_sax_dom_callback_parser (line 6996) | json_sax_dom_callback_parser(BasicJsonType& r,
    method json_sax_dom_callback_parser (line 7005) | json_sax_dom_callback_parser(const json_sax_dom_callback_parser&) = de...
    method json_sax_dom_callback_parser (line 7006) | json_sax_dom_callback_parser(json_sax_dom_callback_parser&&) = default;
    method json_sax_dom_callback_parser (line 7007) | json_sax_dom_callback_parser& operator=(const json_sax_dom_callback_pa...
    method json_sax_dom_callback_parser (line 7008) | json_sax_dom_callback_parser& operator=(json_sax_dom_callback_parser&&...
    method null (line 7011) | bool null()
    method boolean (line 7017) | bool boolean(bool val)
    method number_integer (line 7023) | bool number_integer(number_integer_t val)
    method number_unsigned (line 7029) | bool number_unsigned(number_unsigned_t val)
    method number_float (line 7035) | bool number_float(number_float_t val, const string_t& /*unused*/)
    method string (line 7041) | bool string(string_t& val)
    method binary (line 7047) | bool binary(binary_t& val)
    method start_object (line 7053) | bool start_object(std::size_t len)
    method key (line 7071) | bool key(string_t& val)
    method end_object (line 7088) | bool end_object()
    method start_array (line 7124) | bool start_array(std::size_t len)
    method end_array (line 7141) | bool end_array()
    method parse_error (line 7174) | bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/,
    method is_errored (line 7186) | constexpr bool is_errored() const
    method handle_value (line 7208) | std::pair<bool, BasicJsonType*> handle_value(Value&& v, const bool ski...
  class json_sax_acceptor (line 7292) | class json_sax_acceptor
    method null (line 7301) | bool null()
    method boolean (line 7306) | bool boolean(bool /*unused*/)
    method number_integer (line 7311) | bool number_integer(number_integer_t /*unused*/)
    method number_unsigned (line 7316) | bool number_unsigned(number_unsigned_t /*unused*/)
    method number_float (line 7321) | bool number_float(number_float_t /*unused*/, const string_t& /*unused*/)
    method string (line 7326) | bool string(string_t& /*unused*/)
    method binary (line 7331) | bool binary(binary_t& /*unused*/)
    method start_object (line 7336) | bool start_object(std::size_t /*unused*/ = static_cast<std::size_t>(-1))
    method key (line 7341) | bool key(string_t& /*unused*/)
    method end_object (line 7346) | bool end_object()
    method start_array (line 7351) | bool start_array(std::size_t /*unused*/ = static_cast<std::size_t>(-1))
    method end_array (line 7356) | bool end_array()
    method parse_error (line 7361) | bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/...
  function NLOHMANN_JSON_NAMESPACE_BEGIN (line 7399) | NLOHMANN_JSON_NAMESPACE_BEGIN
  function reset (line 8694) | void reset() noexcept
  function char_int_type (line 8711) | char_int_type get()
  function unget (line 8748) | void unget()
  function add (line 8775) | void add(char_int_type c)
  function number_unsigned_t (line 8792) | constexpr number_unsigned_t get_number_unsigned() const noexcept
  function number_float_t (line 8798) | constexpr number_float_t get_number_float() const noexcept
  function string_t (line 8804) | string_t& get_string()
  function position_t (line 8814) | constexpr position_t get_position() const noexcept
  function get_token_string (line 8822) | std::string get_token_string() const
  function JSON_HEDLEY_RETURNS_NON_NULL (line 8846) | JSON_HEDLEY_RETURNS_NON_NULL
  function skip_bom (line 8860) | bool skip_bom()
  function skip_whitespace (line 8874) | void skip_whitespace()
  function token_type (line 8883) | token_type scan()
  function NLOHMANN_JSON_NAMESPACE_BEGIN (line 9031) | NLOHMANN_JSON_NAMESPACE_BEGIN
  function NLOHMANN_JSON_NAMESPACE_END (line 9171) | NLOHMANN_JSON_NAMESPACE_END
  function NLOHMANN_JSON_NAMESPACE_BEGIN (line 12195) | NLOHMANN_JSON_NAMESPACE_BEGIN
  function NLOHMANN_JSON_NAMESPACE_BEGIN (line 12717) | NLOHMANN_JSON_NAMESPACE_BEGIN
  function NLOHMANN_JSON_NAMESPACE_END (line 12833) | NLOHMANN_JSON_NAMESPACE_END
  function NLOHMANN_JSON_NAMESPACE_BEGIN (line 12887) | NLOHMANN_JSON_NAMESPACE_BEGIN
  function pointer (line 13189) | pointer operator->() const
  function iter_impl (line 13231) | iter_impl operator++(int)& // NOLINT(cert-dcl21-cpp)
  function iter_impl (line 13242) | iter_impl& operator++()
  function iter_impl (line 13282) | iter_impl operator--(int)& // NOLINT(cert-dcl21-cpp)
  function iter_impl (line 13293) | iter_impl& operator--()
  function iter_impl (line 13441) | iter_impl& operator+=(difference_type i)
  function iter_impl (line 13478) | iter_impl& operator-=(difference_type i)
  function iter_impl (line 13487) | iter_impl operator+(difference_type i) const
  function friend (line 13498) | friend iter_impl operator+(difference_type i, const iter_impl& it)
  function iter_impl (line 13509) | iter_impl operator-(difference_type i) const
  function difference_type (line 13520) | difference_type operator-(const iter_impl& other) const
  function reference (line 13549) | reference operator[](difference_type n) const
  function reference (line 13603) | reference value() const
  function NLOHMANN_JSON_NAMESPACE_BEGIN (line 13637) | NLOHMANN_JSON_NAMESPACE_BEGIN
  function NLOHMANN_JSON_NAMESPACE_BEGIN (line 13770) | NLOHMANN_JSON_NAMESPACE_BEGIN
  function NLOHMANN_JSON_NAMESPACE_BEGIN (line 13830) | NLOHMANN_JSON_NAMESPACE_BEGIN
  function NLOHMANN_BASIC_JSON_TPL_DECLARATION (line 13850) | NLOHMANN_BASIC_JSON_TPL_DECLARATION
  function json_pointer (line 13862) | explicit json_pointer(const string_t& s = "")
  function string_t (line 13868) | string_t to_string() const
  function friend (line 13889) | friend std::ostream& operator<<(std::ostream& o, const json_pointer& ptr)
  function json_pointer (line 13898) | json_pointer& operator/=(const json_pointer& ptr)
  function json_pointer (line 13908) | json_pointer& operator/=(string_t token)
  function json_pointer (line 13916) | json_pointer& operator/=(std::size_t array_idx)
  function friend (line 13923) | friend json_pointer operator/(const json_pointer& lhs,
  function friend (line 13931) | friend json_pointer operator/(const json_pointer& lhs, string_t token) /...
  function friend (line 13938) | friend json_pointer operator/(const json_pointer& lhs, std::size_t array...
  function json_pointer (line 13945) | json_pointer parent_pointer() const
  function pop_back (line 13959) | void pop_back()
  function string_t (line 13971) | const string_t& back() const
  function push_back (line 13983) | void push_back(const string_t& token)
  function push_back (line 13990) | void push_back(string_t&& token)
  function empty (line 13997) | bool empty() const noexcept
  function BasicJsonType (line 14074) | BasicJsonType& get_and_create(BasicJsonType& j) const
  function BasicJsonType (line 14154) | BasicJsonType& get_unchecked(BasicJsonType* ptr) const
  function BasicJsonType (line 14222) | BasicJsonType& get_checked(BasicJsonType* ptr) const
  function BasicJsonType (line 14280) | const BasicJsonType& get_unchecked(const BasicJsonType* ptr) const
  function BasicJsonType (line 14329) | const BasicJsonType& get_checked(const BasicJsonType* ptr) const
  function contains (line 14378) | bool contains(const BasicJsonType* ptr) const
  function split (line 14466) | static std::vector<string_t> split(const string_t& reference_string)
  function BasicJsonType (line 14606) | static BasicJsonType
  function convert (line 14635) | json_pointer<string_t> convert() const&
  function convert (line 14642) | json_pointer<string_t> convert()&&
  function NLOHMANN_JSON_NAMESPACE_BEGIN (line 14808) | NLOHMANN_JSON_NAMESPACE_BEGIN
  function NLOHMANN_JSON_NAMESPACE_BEGIN (line 14931) | NLOHMANN_JSON_NAMESPACE_BEGIN
  function NLOHMANN_JSON_NAMESPACE_END (line 15053) | NLOHMANN_JSON_NAMESPACE_END
  function NLOHMANN_JSON_NAMESPACE_BEGIN (line 16918) | NLOHMANN_JSON_NAMESPACE_BEGIN
  function NLOHMANN_JSON_NAMESPACE_END (line 18015) | NLOHMANN_JSON_NAMESPACE_END
  function hex_bytes (line 18675) | static std::string hex_bytes(std::uint8_t byte)
  function is_negative_number (line 18686) | bool is_negative_number(NumberType x)
  function is_negative_number (line 18692) | bool is_negative_number(NumberType /*unused*/)
  function dump_integer (line 18712) | void dump_integer(NumberType x)
  function dump_float (line 18797) | void dump_float(number_float_t x)
  function dump_float (line 18818) | void dump_float(number_float_t x, std::true_type /*is_ieee_single_or_dou...
  function dump_float (line 18826) | void dump_float(number_float_t x, std::false_type /*is_ieee_single_or_do...
  function decode (line 18898) | static std::uint8_t decode(std::uint8_t& state, std::uint32_t& codep, co...
  function number_unsigned_t (line 18938) | number_unsigned_t remove_sign(number_unsigned_t x)
  function number_unsigned_t (line 18953) | inline number_unsigned_t remove_sign(number_integer_t x) noexcept
  function ordered_map (line 19039) | ordered_map() noexcept(noexcept(Container())) : Container{} {}
  function ordered_map (line 19040) | explicit ordered_map(const Allocator& alloc) noexcept(noexcept(Container...
  function ordered_map (line 19042) | ordered_map(It first, It last, const Allocator& alloc = Allocator())
  function ordered_map (line 19044) | ordered_map(std::initializer_list<value_type> init, const Allocator& all...
  function emplace (line 19047) | std::pair<iterator, bool> emplace(const key_type& key, T&& t)
  function emplace (line 19062) | std::pair<iterator, bool> emplace(KeyType && key, T && t)
  function T (line 19075) | T& operator[](const key_type& key)
  function T (line 19082) | T & operator[](KeyType && key)
  function T (line 19087) | const T& operator[](const key_type& key) const
  function T (line 19094) | const T & operator[](KeyType && key) const
  function T (line 19099) | T& at(const key_type& key)
  function T (line 19114) | T & at(KeyType && key) // NOLINT(cppcoreguidelines-missing-std-forward)
  function T (line 19127) | const T& at(const key_type& key) const
  function T (line 19142) | const T & at(KeyType && key) const // NOLINT(cppcoreguidelines-missing-s...
  function size_type (line 19155) | size_type erase(const key_type& key)
  function size_type (line 19176) | size_type erase(KeyType && key) // NOLINT(cppcoreguidelines-missing-std-...
  function iterator (line 19195) | iterator erase(iterator pos)
  function iterator (line 19200) | iterator erase(iterator first, iterator last)
  function size_type (line 19253) | size_type count(const key_type& key) const
  function size_type (line 19267) | size_type count(KeyType && key) const // NOLINT(cppcoreguidelines-missin...
  function iterator (line 19279) | iterator find(const key_type& key)
  function iterator (line 19293) | iterator find(KeyType && key) // NOLINT(cppcoreguidelines-missing-std-fo...
  function const_iterator (line 19305) | const_iterator find(const key_type& key) const
  function insert (line 19317) | std::pair<iterator, bool> insert( value_type&& value )
  function insert (line 19322) | std::pair<iterator, bool> insert( const value_type& value )
  function insert (line 19340) | void insert(InputIt first, InputIt last)
  function NLOHMANN_JSON_NAMESPACE_BEGIN (line 19367) | NLOHMANN_JSON_NAMESPACE_BEGIN
  function set_parents (line 19994) | void set_parents()
  function iterator (line 20031) | iterator set_parents(iterator it, typename iterator::difference_type cou...
  function reference (line 20044) | reference set_parent(reference j, std::size_t old_capacity = static_cast...
  function basic_json (line 20106) | basic_json(const value_t v)
  function basic_json (line 20114) | basic_json(std::nullptr_t = nullptr) noexcept // NOLINT(bugprone-excepti...
  function basic_json (line 20126) | basic_json(CompatibleType && val) noexcept(noexcept( // NOLINT(bugprone-...
  function basic_json (line 20140) | basic_json(const BasicJsonType& val)
  function basic_json (line 20193) | basic_json(initializer_list_t init,
  function JSON_HEDLEY_WARN_UNUSED_RESULT (line 20251) | JSON_HEDLEY_WARN_UNUSED_RESULT
  function JSON_HEDLEY_WARN_UNUSED_RESULT (line 20262) | JSON_HEDLEY_WARN_UNUSED_RESULT
  function JSON_HEDLEY_WARN_UNUSED_RESULT (line 20273) | JSON_HEDLEY_WARN_UNUSED_RESULT
  function JSON_HEDLEY_WARN_UNUSED_RESULT (line 20284) | JSON_HEDLEY_WARN_UNUSED_RESULT
  function JSON_HEDLEY_WARN_UNUSED_RESULT (line 20295) | JSON_HEDLEY_WARN_UNUSED_RESULT
  function JSON_HEDLEY_WARN_UNUSED_RESULT (line 20303) | JSON_HEDLEY_WARN_UNUSED_RESULT
  function basic_json (line 20311) | basic_json(size_type cnt, const basic_json& val):
  function basic_json (line 20323) | basic_json(InputIT first, InputIT last)
  function basic_json (line 20432) | basic_json(const JsonRef& ref) : basic_json(ref.moved_or_copied()) {}
  function basic_json (line 20436) | basic_json(const basic_json& other)
  function basic_json (line 20505) | basic_json(basic_json&& other) noexcept
  function basic_json (line 20522) | basic_json& operator=(basic_json other) noexcept (
  function value_t (line 20585) | constexpr value_t type() const noexcept
  function is_primitive (line 20592) | constexpr bool is_primitive() const noexcept
  function is_structured (line 20599) | constexpr bool is_structured() const noexcept
  function is_null (line 20606) | constexpr bool is_null() const noexcept
  function is_boolean (line 20613) | constexpr bool is_boolean() const noexcept
  function is_number (line 20620) | constexpr bool is_number() const noexcept
  function is_number_integer (line 20627) | constexpr bool is_number_integer() const noexcept
  function is_number_unsigned (line 20634) | constexpr bool is_number_unsigned() const noexcept
  function is_number_float (line 20641) | constexpr bool is_number_float() const noexcept
  function is_object (line 20648) | constexpr bool is_object() const noexcept
  function is_array (line 20655) | constexpr bool is_array() const noexcept
  function is_string (line 20662) | constexpr bool is_string() const noexcept
  function is_binary (line 20669) | constexpr bool is_binary() const noexcept
  function is_discarded (line 20676) | constexpr bool is_discarded() const noexcept
  function object_t (line 20707) | object_t* get_impl_ptr(object_t* /*unused*/) noexcept
  function object_t (line 20713) | constexpr const object_t* get_impl_ptr(const object_t* /*unused*/) const...
  function array_t (line 20719) | array_t* get_impl_ptr(array_t* /*unused*/) noexcept
  function array_t (line 20725) | constexpr const array_t* get_impl_ptr(const array_t* /*unused*/) const n...
  function string_t (line 20731) | string_t* get_impl_ptr(string_t* /*unused*/) noexcept
  function string_t (line 20737) | constexpr const string_t* get_impl_ptr(const string_t* /*unused*/) const...
  function boolean_t (line 20743) | boolean_t* get_impl_ptr(boolean_t* /*unused*/) noexcept
  function boolean_t (line 20749) | constexpr const boolean_t* get_impl_ptr(const boolean_t* /*unused*/) con...
  function number_integer_t (line 20755) | number_integer_t* get_impl_ptr(number_integer_t* /*unused*/) noexcept
  function number_integer_t (line 20761) | constexpr const number_integer_t* get_impl_ptr(const number_integer_t* /...
  function number_unsigned_t (line 20767) | number_unsigned_t* get_impl_ptr(number_unsigned_t* /*unused*/) noexcept
  function number_unsigned_t (line 20773) | constexpr const number_unsigned_t* get_impl_ptr(const number_unsigned_t*...
  function number_float_t (line 20779) | number_float_t* get_impl_ptr(number_float_t* /*unused*/) noexcept
  function number_float_t (line 20785) | constexpr const number_float_t* get_impl_ptr(const number_float_t* /*unu...
  function binary_t (line 20791) | binary_t* get_impl_ptr(binary_t* /*unused*/) noexcept
  function binary_t (line 20797) | constexpr const binary_t* get_impl_ptr(const binary_t* /*unused*/) const...
  function ReferenceType (line 20814) | static ReferenceType get_ref_impl(ThisType& obj)
  function get_ptr (line 20847) | constexpr auto get_ptr() const noexcept -> decltype(std::declval<const b...
  function ValueType (line 20939) | ValueType get_impl(detail::priority_tag<1> /*unused*/) const noexcept(no...
  function BasicJsonType (line 20964) | BasicJsonType get_impl(detail::priority_tag<2> /*unused*/) const
  function basic_json (line 20987) | basic_json get_impl(detail::priority_tag<3> /*unused*/) const
  function get_impl (line 21000) | constexpr auto get_impl(detail::priority_tag<4> /*unused*/) const noexcept
  function get (line 21076) | auto get() noexcept -> decltype(std::declval<basic_json_t&>().template g...
  function ValueType (line 21089) | ValueType & get_to(ValueType& v) const noexcept(noexcept(
  function ValueType (line 21102) | ValueType & get_to(ValueType& v) const
  function Array (line 21113) | Array get_to(T (&v)[N]) const // NOLINT(cppcoreguidelines-avoid-c-arrays...
  function ReferenceType (line 21125) | ReferenceType get_ref()
  function ReferenceType (line 21136) | ReferenceType get_ref() const
  function binary_t (line 21195) | binary_t& get_binary()
  function binary_t (line 21207) | const binary_t& get_binary() const
  function reference (line 21229) | reference at(size_type idx)
  function const_reference (line 21252) | const_reference at(size_type idx) const
  function reference (line 21275) | reference at(const typename object_t::key_type& key)
  function reference (line 21295) | reference at(KeyType && key)
  function const_reference (line 21313) | const_reference at(const typename object_t::key_type& key) const
  function const_reference (line 21333) | const_reference at(KeyType && key) const
  function reference (line 21351) | reference operator[](size_type idx)
  function const_reference (line 21397) | const_reference operator[](size_type idx) const
  function reference (line 21410) | reference operator[](typename object_t::key_type key)
  function const_reference (line 21432) | const_reference operator[](const typename object_t::key_type& key) const
  function reference (line 21448) | reference operator[](T* key)
  function const_reference (line 21454) | const_reference operator[](T* key) const
  function reference (line 21463) | reference operator[](KeyType && key)
  function const_reference (line 21487) | const_reference operator[](KeyType && key) const
  class ValueType (line 21513) | class ValueType
  function ReturnType (line 21542) | ReturnType value(const typename object_t::key_type& key, ValueType && de...
  function ValueType (line 21568) | ValueType value(KeyType && key, const ValueType& default_value) const
  function ReturnType (line 21595) | ReturnType value(KeyType && key, ValueType && default_value) const
  function ValueType (line 21618) | ValueType value(const json_pointer& ptr, const ValueType& default_value)...
  function ReturnType (line 21643) | ReturnType value(const json_pointer& ptr, ValueType && default_value) const
  function ValueType (line 21667) | ValueType value(const ::nlohmann::json_pointer<BasicJsonType>& ptr, cons...
  function ReturnType (line 21678) | ReturnType value(const ::nlohmann::json_pointer<BasicJsonType>& ptr, Val...
  function reference (line 21685) | reference front()
  function const_reference (line 21692) | const_reference front() const
  function reference (line 21699) | reference back()
  function const_reference (line 21708) | const_reference back() const
  function IteratorType (line 21720) | IteratorType erase(IteratorType pos)
  function IteratorType (line 21790) | IteratorType erase(IteratorType first, IteratorType last)
  function erase_internal (line 21858) | private:
  function size_type (line 21874) | size_type erase_internal(KeyType && key)
  function size_type (line 21906) | size_type erase(KeyType && key)
  function erase (line 21913) | void erase(const size_type idx)
  function iterator (line 21942) | iterator find(const typename object_t::key_type& key)
  function const_iterator (line 21956) | const_iterator find(const typename object_t::key_type& key) const
  function iterator (line 21972) | iterator find(KeyType && key)
  function const_iterator (line 21988) | const_iterator find(KeyType && key) const
  function size_type (line 22002) | size_type count(const typename object_t::key_type& key) const
  function size_type (line 22012) | size_type count(KeyType && key) const
  function contains (line 22020) | bool contains(const typename object_t::key_type& key) const
  function contains (line 22029) | bool contains(KeyType && key) const
  function contains (line 22036) | bool contains(const json_pointer& ptr) const
  function contains (line 22043) | bool contains(const typename ::nlohmann::json_pointer<BasicJsonType>& pt...
  function iterator (line 22059) | iterator begin() noexcept
  function const_iterator (line 22068) | const_iterator begin() const noexcept
  function const_iterator (line 22075) | const_iterator cbegin() const noexcept
  function iterator (line 22084) | iterator end() noexcept
  function const_iterator (line 22093) | const_iterator end() const noexcept
  function const_iterator (line 22100) | const_iterator cend() const noexcept
  function reverse_iterator (line 22109) | reverse_iterator rbegin() noexcept
  function const_reverse_iterator (line 22116) | const_reverse_iterator rbegin() const noexcept
  function reverse_iterator (line 22123) | reverse_iterator rend() noexcept
  function const_reverse_iterator (line 22130) | const_reverse_iterator rend() const noexcept
  function const_reverse_iterator (line 22137) | const_reverse_iterator crbegin() const noexcept
  function const_reverse_iterator (line 22144) | const_reverse_iterator crend() const noexcept
  function iterator_wrapper (line 22156) | static iteration_proxy<iterator> iterator_wrapper(reference ref) noexcept
  function iterator_wrapper (line 22167) | static iteration_proxy<const_iterator> iterator_wrapper(const_reference ...
  function items (line 22174) | iteration_proxy<iterator> items() noexcept
  function items (line 22181) | iteration_proxy<const_iterator> items() const noexcept
  function empty (line 22197) | bool empty() const noexcept
  function size_type (line 22236) | size_type size() const noexcept
  function size_type (line 22275) | size_type max_size() const noexcept
  function clear (line 22318) | void clear() noexcept
  function push_back (line 22379) | void push_back(basic_json&& val)
  function reference (line 22404) | reference operator+=(basic_json&& val)
  function push_back (line 22412) | void push_back(const basic_json& val)
  function reference (line 22436) | reference operator+=(const basic_json& val)
  function push_back (line 22444) | void push_back(const typename object_t::value_type& val)
  function reference (line 22467) | reference operator+=(const typename object_t::value_type& val)
  function push_back (line 22475) | void push_back(initializer_list_t init)
  function reference (line 22491) | reference operator+=(initializer_list_t init)
  function reference (line 22500) | reference emplace_back(Args&& ... args)
  function emplace (line 22525) | std::pair<iterator, bool> emplace(Args&& ... args)
  function iterator (line 22557) | iterator insert_iterator(const_iterator pos, Args&& ... args)
  function iterator (line 22576) | iterator insert(const_iterator pos, const basic_json& val)
  function iterator (line 22596) | iterator insert(const_iterator pos, basic_json&& val)
  function iterator (line 22603) | iterator insert(const_iterator pos, size_type cnt, const basic_json& val)
  function iterator (line 22623) | iterator insert(const_iterator pos, const_iterator first, const_iterator...
  function iterator (line 22654) | iterator insert(const_iterator pos, initializer_list_t ilist)
  function insert (line 22674) | void insert(const_iterator first, const_iterator last)
  function update (line 22699) | void update(const_reference j, bool merge_objects = false)
  function update (line 22706) | void update(const_iterator first, const_iterator last, bool merge_object...
  function swap (line 22753) | void swap(reference other) noexcept (
  function friend (line 22770) | friend void swap(reference left, reference right) noexcept (
  function swap (line 22782) | void swap(array_t& other) // NOLINT(bugprone-exception-escape,cppcoregui...
  function swap (line 22798) | void swap(object_t& other) // NOLINT(bugprone-exception-escape,cppcoregu...
  function swap (line 22814) | void swap(string_t& other) // NOLINT(bugprone-exception-escape,cppcoregu...
  function swap (line 22830) | void swap(binary_t& other) // NOLINT(bugprone-exception-escape,cppcoregu...
  function swap (line 22846) | void swap(typename binary_t::container_type& other) // NOLINT(bugprone-e...
  function else (line 22935) | else if(compares_unordered(lhs, rhs))\
  function compares_unordered (line 22964) | bool compares_unordered(const_reference rhs, bool inverse = false) const...
  function friend (line 23077) | friend bool operator==(const_reference lhs, const_reference rhs) noexcept
  function friend (line 23109) | friend bool operator!=(const_reference lhs, const_reference rhs) noexcept
  function friend (line 23166) | friend bool operator<=(const_reference lhs, const_reference rhs) noexcept
  function friend (line 23195) | friend bool operator>(const_reference lhs, const_reference rhs) noexcept
  function friend (line 23225) | friend bool operator>=(const_reference lhs, const_reference rhs) noexcept
  function friend (line 23266) | friend std::ostream& operator<<(std::ostream& o, const basic_json& j)
  function JSON_HEDLEY_WARN_UNUSED_RESULT (line 23305) | JSON_HEDLEY_WARN_UNUSED_RESULT
  function JSON_HEDLEY_WARN_UNUSED_RESULT (line 23319) | JSON_HEDLEY_WARN_UNUSED_RESULT
  function basic_json (line 23333) | static basic_json parse(detail::span_input_adapter&& i,
  function accept (line 23346) | static bool accept(InputType&& i,
  function accept (line 23355) | static bool accept(IteratorType first, IteratorType last,
  function accept (line 23363) | static bool accept(detail::span_input_adapter&& i,
  function sax_parse (line 23373) | static bool sax_parse(InputType&& i, SAX* sax,
  function sax_parse (line 23388) | static bool sax_parse(IteratorType first, IteratorType last, SAX* sax,
  function sax_parse (line 23407) | static bool sax_parse(detail::span_input_adapter&& i, SAX* sax,
  function JSON_HEDLEY_RETURNS_NON_NULL (line 23448) | JSON_HEDLEY_RETURNS_NON_NULL
  type data (line 23480) | struct data
    method data (line 23488) | data(const value_t v)
    method data (line 23493) | data(size_type cnt, const basic_json& val)
    method data (line 23499) | data() noexcept = default;
    method data (line 23500) | data(data&&) noexcept = default;
    method data (line 23501) | data(const data&) noexcept = delete;
    method data (line 23502) | data& operator=(data&&) noexcept = delete;
    method data (line 23503) | data& operator=(const data&) noexcept = delete;
  function to_cbor (line 23537) | static void to_cbor(const basic_json& j, detail::output_adapter<std::uin...
  function to_cbor (line 23544) | static void to_cbor(const basic_json& j, detail::output_adapter<char> o)
  function to_msgpack (line 23551) | static std::vector<std::uint8_t> to_msgpack(const basic_json& j)
  function to_msgpack (line 23560) | static void to_msgpack(const basic_json& j, detail::output_adapter<std::...
  function to_msgpack (line 23567) | static void to_msgpack(const basic_json& j, detail::output_adapter<char> o)
  function to_ubjson (line 23574) | static std::vector<std::uint8_t> to_ubjson(const basic_json& j,
  function to_ubjson (line 23585) | static void to_ubjson(const basic_json& j, detail::output_adapter<std::u...
  function to_ubjson (line 23593) | static void to_ubjson(const basic_json& j, detail::output_adapter<char> o,
  function to_bjdata (line 23601) | static std::vector<std::uint8_t> to_bjdata(const basic_json& j,
  function to_bjdata (line 23612) | static void to_bjdata(const basic_json& j, detail::output_adapter<std::u...
  function to_bjdata (line 23620) | static void to_bjdata(const basic_json& j, detail::output_adapter<char> o,
  function to_bson (line 23628) | static std::vector<std::uint8_t> to_bson(const basic_json& j)
  function to_bson (line 23637) | static void to_bson(const basic_json& j, detail::output_adapter<std::uin...
  function to_bson (line 23644) | static void to_bson(const basic_json& j, detail::output_adapter<char> o)
  function JSON_HEDLEY_WARN_UNUSED_RESULT (line 23652) | JSON_HEDLEY_WARN_UNUSED_RESULT
  function JSON_HEDLEY_WARN_UNUSED_RESULT (line 23668) | JSON_HEDLEY_WARN_UNUSED_RESULT
  function basic_json (line 23684) | static basic_json from_cbor(const T* ptr, std::size_t len,
  function basic_json (line 23694) | static basic_json from_cbor(detail::span_input_adapter&& i,
  function JSON_HEDLEY_WARN_UNUSED_RESULT (line 23710) | JSON_HEDLEY_WARN_UNUSED_RESULT
  function JSON_HEDLEY_WARN_UNUSED_RESULT (line 23725) | JSON_HEDLEY_WARN_UNUSED_RESULT
  function basic_json (line 23740) | static basic_json from_msgpack(const T* ptr, std::size_t len,
  function basic_json (line 23749) | static basic_json from_msgpack(detail::span_input_adapter&& i,
  function JSON_HEDLEY_WARN_UNUSED_RESULT (line 23764) | JSON_HEDLEY_WARN_UNUSED_RESULT
  function JSON_HEDLEY_WARN_UNUSED_RESULT (line 23779) | JSON_HEDLEY_WARN_UNUSED_RESULT
  function basic_json (line 23794) | static basic_json from_ubjson(const T* ptr, std::size_t len,
  function basic_json (line 23803) | static basic_json from_ubjson(detail::span_input_adapter&& i,
  function JSON_HEDLEY_WARN_UNUSED_RESULT (line 23818) | JSON_HEDLEY_WARN_UNUSED_RESULT
  function JSON_HEDLEY_WARN_UNUSED_RESULT (line 23833) | JSON_HEDLEY_WARN_UNUSED_RESULT
  function JSON_HEDLEY_WARN_UNUSED_RESULT (line 23848) | JSON_HEDLEY_WARN_UNUSED_RESULT
  function JSON_HEDLEY_WARN_UNUSED_RESULT (line 23863) | JSON_HEDLEY_WARN_UNUSED_RESULT
  function basic_json (line 23878) | static basic_json from_bson(const T* ptr, std::size_t len,
  function basic_json (line 23887) | static basic_json from_bson(detail::span_input_adapter&& i,
  function reference (line 23909) | reference operator[](const json_pointer& ptr)
  function reference (line 23916) | reference operator[](const ::nlohmann::json_pointer<BasicJsonType>& ptr)
  function const_reference (line 23923) | const_reference operator[](const json_pointer& ptr) const
  function const_reference (line 23930) | const_reference operator[](const ::nlohmann::json_pointer<BasicJsonType>...
  function reference (line 23937) | reference at(const json_pointer& ptr)
  function reference (line 23944) | reference at(const ::nlohmann::json_pointer<BasicJsonType>& ptr)
  function const_reference (line 23951) | const_reference at(const json_pointer& ptr) const
  function const_reference (line 23958) | const_reference at(const ::nlohmann::json_pointer<BasicJsonType>& ptr) c...
  function basic_json (line 23965) | basic_json flatten() const
  function basic_json (line 23974) | basic_json unflatten() const
  function patch_inplace (line 23990) | void patch_inplace(const basic_json& json_patch)
  function basic_json (line 24261) | basic_json patch(const basic_json& json_patch) const
  function JSON_HEDLEY_WARN_UNUSED_RESULT (line 24270) | JSON_HEDLEY_WARN_UNUSED_RESULT
  function merge_patch (line 24413) | void merge_patch(const basic_json& apply_patch)
  function NLOHMANN_BASIC_JSON_TPL_DECLARATION (line 24444) | NLOHMANN_BASIC_JSON_TPL_DECLARATION
  function NLOHMANN_JSON_NAMESPACE_END (line 24481) | NLOHMANN_JSON_NAMESPACE_END

FILE: archive/third_party/nlohmann/json_fwd.hpp
  class json_pointer (line 156) | class json_pointer
  type ordered_map (line 167) | struct ordered_map

FILE: kt-kernel/bench/bench_attention.py
  function bench_linear (line 41) | def bench_linear(cache_seqlen: int):

FILE: kt-kernel/bench/bench_attention_torch.py
  function bench_linear (line 29) | def bench_linear(cache_seqlen: int, device):

FILE: kt-kernel/bench/bench_bf16_moe.py
  function get_git_commit (line 44) | def get_git_commit():
  function get_system_info (line 62) | def get_system_info():
  function record_results (line 84) | def record_results(result, filename=json_path):
  function generate_bf16_weights (line 90) | def generate_bf16_weights(shape: tuple):
  function bench_bf16_moe (line 105) | def bench_bf16_moe():

FILE: kt-kernel/bench/bench_fp8_moe.py
  function get_git_commit (line 45) | def get_git_commit():
  function get_system_info (line 63) | def get_system_info():
  function record_results (line 85) | def record_results(result, filename=json_path):
  function generate_fp8_weights_direct (line 91) | def generate_fp8_weights_direct(shape: tuple, group_size: int = 128):
  function bench_fp8_moe (line 121) | def bench_fp8_moe():

FILE: kt-kernel/bench/bench_fp8_perchannel_moe.py
  function get_git_commit (line 44) | def get_git_commit():
  function get_system_info (line 62) | def get_system_info():
  function record_results (line 84) | def record_results(result, filename=json_path):
  function generate_fp8_perchannel_weights_direct (line 90) | def generate_fp8_perchannel_weights_direct(shape: tuple):
  function bench_fp8_perchannel_moe (line 115) | def bench_fp8_perchannel_moe():

FILE: kt-kernel/bench/bench_k2_moe_amx.py
  function get_git_commit (line 41) | def get_git_commit():
  function get_system_info (line 63) | def get_system_info():
  function record_results (line 116) | def record_results(result, filename=json_path):
  function pack_to_int32 (line 121) | def pack_to_int32(value: torch.Tensor, num_bits: int, packed_dim: int = ...
  function pack_tensor_per_row (line 154) | def pack_tensor_per_row(q: torch.Tensor, num_bits: int) -> torch.Tensor:
  function quantize_k2_tensor (line 161) | def quantize_k2_tensor(weights: torch.Tensor, group_size: int):
  function build_quantized_layer_weights (line 180) | def build_quantized_layer_weights():
  function bench_k2_moe (line 211) | def bench_k2_moe():

FILE: kt-kernel/bench/bench_k2_write_buffer.py
  function get_git_commit (line 41) | def get_git_commit():
  function get_system_info (line 63) | def get_system_info():
  function record_results (line 116) | def record_results(result, filename=json_path):
  function allocate_weights (line 121) | def allocate_weights():
  function build_moe (line 145) | def build_moe(layer_idx=0):
  function allocate_buffers (line 194) | def allocate_buffers(buffer_shapes):
  function bench_write_buffer (line 225) | def bench_write_buffer():

FILE: kt-kernel/bench/bench_linear.py
  function bench_linear (line 30) | def bench_linear(quant_mode: str):

FILE: kt-kernel/bench/bench_linear_torch.py
  function bench_linear (line 26) | def bench_linear(quant_mode: str):

FILE: kt-kernel/bench/bench_mla.py
  function get_git_commit (line 65) | def get_git_commit():
  function get_system_info (line 92) | def get_system_info():
  function record_results (line 145) | def record_results(result, filename=json_path):
  function bench_mla (line 153) | def bench_mla(quant_mode: str):

FILE: kt-kernel/bench/bench_mlp.py
  function bench_mlp (line 30) | def bench_mlp(quant_mode: str):

FILE: kt-kernel/bench/bench_mlp_torch.py
  function act_fn (line 26) | def act_fn(x):
  function mlp_torch (line 29) | def mlp_torch(input, gate_proj, up_proj, down_proj):
  function bench_mlp (line 47) | def bench_mlp(quant_mode: str):

FILE: kt-kernel/bench/bench_moe.py
  function get_git_commit (line 41) | def get_git_commit():
  function get_system_info (line 68) | def get_system_info():
  function record_results (line 121) | def record_results(result, filename=json_path):
  function bench_moe (line 129) | def bench_moe(quant_mode: str):

FILE: kt-kernel/bench/bench_moe_amx.py
  function get_git_commit (line 46) | def get_git_commit():
  function get_system_info (line 73) | def get_system_info():
  function record_results (line 135) | def record_results(result, filename=json_path):
  function bench_moe (line 143) | def bench_moe(quant_mode: str):

FILE: kt-kernel/bench/bench_moe_amx_k.py
  function get_git_commit (line 47) | def get_git_commit():
  function get_system_info (line 74) | def get_system_info():
  function record_results (line 136) | def record_results(result, filename=json_path):
  function bench_moe (line 144) | def bench_moe(quant_mode: str):

FILE: kt-kernel/bench/bench_moe_kernel.py
  function get_git_commit (line 53) | def get_git_commit():
  function get_system_info (line 80) | def get_system_info():
  function record_results (line 142) | def record_results(result, filename=json_path):
  function bench_moe (line 150) | def bench_moe(quant_mode: str):

FILE: kt-kernel/bench/bench_moe_kernel_tiling.py
  function maybe_get_class (line 33) | def maybe_get_class(module, name):
  function main (line 37) | def main():

FILE: kt-kernel/bench/bench_moe_kml.py
  function get_git_commit (line 47) | def get_git_commit():
  function get_system_info (line 74) | def get_system_info():
  function record_results (line 136) | def record_results(result, filename=json_path):
  function bench_moe (line 144) | def bench_moe(quant_mode: str):

FILE: kt-kernel/bench/bench_moe_torch.py
  function act_fn (line 28) | def act_fn(x):
  function mlp_torch (line 31) | def mlp_torch(input, gate_proj, up_proj, down_proj):
  function moe_torch (line 49) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj):
  function bench_moe (line 80) | def bench_moe(quant_mode: str):

FILE: kt-kernel/bench/bench_write_buffer.py
  function get_git_commit (line 50) | def get_git_commit():
  function get_system_info (line 66) | def get_system_info():
  function record_results (line 93) | def record_results(result, filename=json_path):
  function div_up (line 98) | def div_up(a, b):
  function allocate_weights_fp8 (line 107) | def allocate_weights_fp8():
  function allocate_weights_fp8_perchannel (line 152) | def allocate_weights_fp8_perchannel():
  function build_moe_fp8 (line 195) | def build_moe_fp8(layer_idx=0):
  function build_moe_fp8_perchannel (line 226) | def build_moe_fp8_perchannel(layer_idx=0):
  function allocate_buffers_fp8 (line 258) | def allocate_buffers_fp8(buffer_shapes):
  function allocate_buffers_fp8_perchannel (line 292) | def allocate_buffers_fp8_perchannel(buffer_shapes):
  function allocate_weights_bf16 (line 331) | def allocate_weights_bf16():
  function build_moe_bf16 (line 352) | def build_moe_bf16(layer_idx=0):
  function allocate_buffers_bf16 (line 379) | def allocate_buffers_bf16(buffer_shapes):
  function bench_write_buffer (line 413) | def bench_write_buffer(quant_mode: str):
  function main (line 525) | def main(quant_modes=None):

FILE: kt-kernel/bench/compare_moe_performance.py
  class EnvironmentConfig (line 30) | class EnvironmentConfig:
    method apply (line 34) | def apply(self):
  function get_cpu_count (line 84) | def get_cpu_count() -> int:
  function get_physical_cpu_count (line 111) | def get_physical_cpu_count() -> int:
  class TestConfig (line 192) | class TestConfig:
    method __post_init__ (line 204) | def __post_init__(self):
    method total_configurations (line 213) | def total_configurations(self) -> int:
  function get_numa_count (line 216) | def get_numa_count() -> int:
  class SystemConfig (line 251) | class SystemConfig:
    method __post_init__ (line 255) | def __post_init__(self):
  class ThreadConfig (line 264) | class ThreadConfig:
    method from_thread_count (line 271) | def from_thread_count(cls, thread_count: int, numa_count: int, cpu_cor...
  function get_system_info (line 290) | def get_system_info() -> Dict[str, any]:
  class BenchmarkResult (line 343) | class BenchmarkResult:
    method to_dict (line 354) | def to_dict(self) -> Dict:
  class CheckpointState (line 358) | class CheckpointState:
    method to_dict (line 366) | def to_dict(self) -> Dict:
    method from_dict (line 376) | def from_dict(cls, data: Dict) -> 'CheckpointState':
  class CheckpointManager (line 387) | class CheckpointManager:
    method __init__ (line 389) | def __init__(self, checkpoint_dir: str = None):
    method _signal_handler (line 399) | def _signal_handler(self, signum, frame):
    method save_checkpoint (line 403) | def save_checkpoint(self, state: CheckpointState):
    method load_checkpoint (line 421) | def load_checkpoint(self) -> Optional[CheckpointState]:
    method clear_checkpoint (line 437) | def clear_checkpoint(self):
  function bench_ktransformers_moe (line 443) | def bench_ktransformers_moe(test_config: TestConfig, quant_mode: str, ql...
  function run_sgl_int4_with_numactl (line 619) | def run_sgl_int4_with_numactl(test_config: TestConfig, qlen: int,
  function run_sgl_with_numactl (line 874) | def run_sgl_with_numactl(test_config: TestConfig, qlen: int,
  function save_results (line 1063) | def save_results(results: List[BenchmarkResult], test_config: TestConfig...
  function print_summary_table (line 1089) | def print_summary_table(results: List[BenchmarkResult]):
  function main (line 1116) | def main():

FILE: kt-kernel/bench/multi_bench_moe.py
  function expand_param_dict (line 46) | def expand_param_dict(param_dict):
  function update_bench_parameters (line 77) | def update_bench_parameters(params):
  function main (line 96) | def main():

FILE: kt-kernel/bench/upload-bench-json.py
  function insert_jsonl_file (line 28) | def insert_jsonl_file(file_path):

FILE: kt-kernel/cpu_backend/cpuinfer.h
  function class (line 34) | class CPUInfer {
  function submit (line 78) | void submit(std::pair<intptr_t, intptr_t> params) {
  function submit_with_cuda_stream (line 85) | void submit_with_cuda_stream(intptr_t user_cuda_stream, std::pair<intptr...
  type SyncArgs (line 95) | struct SyncArgs {
  function sync_ (line 100) | static void sync_(void* sync_args) {

FILE: kt-kernel/cpu_backend/shared_mem_buffer.h
  type MemoryRequest (line 23) | struct MemoryRequest {
  function class (line 37) | class SharedMemBuffer {
  function class (line 53) | class SharedMemBufferNuma {

FILE: kt-kernel/cpu_backend/task_queue.h
  type Node (line 31) | struct Node {

FILE: kt-kernel/cpu_backend/vendors/hip.h
  type hip_bfloat16 (line 182) | typedef hip_bfloat16 nv_bfloat16;

FILE: kt-kernel/cpu_backend/vendors/musa.h
  type mt_bfloat16 (line 139) | typedef mt_bfloat16 nv_bfloat16;

FILE: kt-kernel/cpu_backend/worker_pool.cpp
  function InNumaPool (line 448) | InNumaPool* WorkerPool::get_subpool(int numa_id) { return numa_worker_po...
  function NumaJobDistributor (line 450) | NumaJobDistributor* WorkerPool::dispense_backend() { return distributor....

FILE: kt-kernel/cpu_backend/worker_pool.h
  function set_to_numa (line 28) | inline void set_to_numa(int this_numa) {
  function set_memory_to_numa (line 35) | inline void set_memory_to_numa(int this_numa) {
  type ThreadStatus (line 57) | enum ThreadStatus {
  type alignas (line 63) | struct alignas
  function class (line 70) | class InNumaPool {
  function class (line 103) | class NumaJobDistributor {
  type WorkerPoolConfig (line 128) | struct WorkerPoolConfig {
  function class (line 134) | class WorkerPool {

FILE: kt-kernel/cuda/binding.cpp
  function PYBIND11_MODULE (line 22) | PYBIND11_MODULE(KTransformersOps, m) {

FILE: kt-kernel/cuda/moe/utils.h
  function check_shape (line 141) | inline void check_shape(const at::Tensor& a, const at::Tensor& b, const ...
  function pack_u16 (line 148) | inline constexpr uint32_t pack_u16(uint16_t a, uint16_t b) { return (uin...
  function is_float8_tensor (line 175) | inline bool is_float8_tensor(const at::Tensor& tensor) {
  type cuda_error (line 180) | struct cuda_error
  function runtime_error (line 186) | cuda_error(const char* message) : std::runtime_error(message) {}
  function getSMVersion (line 212) | inline int getSMVersion() {

FILE: kt-kernel/demo/bench_reorder_bandwidth.cpp
  function fill_random (line 16) | void fill_random(int8_t* ptr, size_t count) {
  function fill_zero (line 23) | void fill_zero(int32_t* ptr, size_t count) { std::memset(ptr, 0, count *...
  function verify (line 25) | bool verify(const int8_t* a, const int8_t* b, const int32_t* c) {
  function main (line 42) | int main() {

FILE: kt-kernel/demo/bf16-test.cpp
  function main (line 16) | int main() {

FILE: kt-kernel/demo/fp16-test.cpp
  function main (line 14) | int main() {

FILE: kt-kernel/demo/simple_test.cpp
  function main (line 12) | int main() {

FILE: kt-kernel/demo/simple_test_aocl.cpp
  function fill_inputs (line 14) | void fill_inputs(int8_t* a, int8_t* b) {
  function compute_reference (line 24) | void compute_reference(const int8_t* a, const int8_t* b, int32_t* ref) {
  function check_result (line 36) | bool check_result(const int32_t* got, const int32_t* ref) {
  function main (line 47) | int main() {

FILE: kt-kernel/examples/bench_moe_amx_int8.py
  function parse_args (line 44) | def parse_args():
  function generate_uniform_workload (line 81) | def generate_uniform_workload(expert_num, num_experts_per_tok, workload):
  function run_benchmark (line 110) | def run_benchmark(args):
  function main (line 346) | def main():

FILE: kt-kernel/examples/configuration_deepseek_v3.py
  class DeepseekV3Config (line 7) | class DeepseekV3Config(PretrainedConfig):
    method __init__ (line 106) | def __init__(

FILE: kt-kernel/examples/modeling_deepseek_v3.py
  function _get_unpad_data (line 80) | def _get_unpad_data(attention_mask):
  class DeepseekV3RMSNorm (line 94) | class DeepseekV3RMSNorm(nn.Module):
    method __init__ (line 95) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 104) | def forward(self, hidden_states):
  class DeepseekV3RotaryEmbedding (line 115) | class DeepseekV3RotaryEmbedding(nn.Module):
    method __init__ (line 116) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method _set_cos_sin_cache (line 135) | def _set_cos_sin_cache(self, seq_len, device, dtype):
    method forward (line 147) | def forward(self, x, seq_len=None):
  class DeepseekV3LinearScalingRotaryEmbedding (line 159) | class DeepseekV3LinearScalingRotaryEmbedding(DeepseekV3RotaryEmbedding):
    method __init__ (line 162) | def __init__(
    method _set_cos_sin_cache (line 173) | def _set_cos_sin_cache(self, seq_len, device, dtype):
  class DeepseekV3DynamicNTKScalingRotaryEmbedding (line 188) | class DeepseekV3DynamicNTKScalingRotaryEmbedding(DeepseekV3RotaryEmbeddi...
    method __init__ (line 191) | def __init__(
    method _set_cos_sin_cache (line 202) | def _set_cos_sin_cache(self, seq_len, device, dtype):
  function yarn_find_correction_dim (line 227) | def yarn_find_correction_dim(
  function yarn_find_correction_range (line 236) | def yarn_find_correction_range(
  function yarn_get_mscale (line 248) | def yarn_get_mscale(scale=1, mscale=1):
  function yarn_linear_ramp_mask (line 254) | def yarn_linear_ramp_mask(min, max, dim):
  class DeepseekV3YarnRotaryEmbedding (line 263) | class DeepseekV3YarnRotaryEmbedding(DeepseekV3RotaryEmbedding):
    method __init__ (line 265) | def __init__(
    method _set_cos_sin_cache (line 286) | def _set_cos_sin_cache(self, seq_len, device, dtype):
  function rotate_half (line 332) | def rotate_half(x):
  function apply_rotary_pos_emb (line 340) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1):
  class DeepseekV3MLP (line 375) | class DeepseekV3MLP(nn.Module):
    method __init__ (line 376) | def __init__(self, config, hidden_size=None, intermediate_size=None):
    method forward (line 389) | def forward(self, x):
  class MoEGate (line 394) | class MoEGate(nn.Module):
    method __init__ (line 395) | def __init__(self, config):
    method reset_parameters (line 418) | def reset_parameters(self) -> None:
    method forward (line 423) | def forward(self, hidden_states):
  class DeepseekV3MoE (line 476) | class DeepseekV3MoE(nn.Module):
    method __init__ (line 481) | def __init__(self, config):
    method forward (line 523) | def forward(self, hidden_states):
    method moe_infer (line 536) | def moe_infer(self, x, topk_ids, topk_weight):
  function repeat_kv (line 613) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  class DeepseekV3Attention (line 628) | class DeepseekV3Attention(nn.Module):
    method __init__ (line 631) | def __init__(self, config: DeepseekV3Config, layer_idx: Optional[int] ...
    method _init_rope (line 698) | def _init_rope(self):
    method _shape (line 744) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
    method forward (line 751) | def forward(
  class DeepseekV3FlashAttention2 (line 862) | class DeepseekV3FlashAttention2(DeepseekV3Attention):
    method __init__ (line 869) | def __init__(self, *args, **kwargs):
    method forward (line 877) | def forward(
    method _flash_attention_forward (line 1013) | def _flash_attention_forward(
    method _upad_input (line 1093) | def _upad_input(
  class DeepseekV3DecoderLayer (line 1145) | class DeepseekV3DecoderLayer(nn.Module):
    method __init__ (line 1146) | def __init__(self, config: DeepseekV3Config, layer_idx: int):
    method forward (line 1170) | def forward(
  class DeepseekV3PreTrainedModel (line 1256) | class DeepseekV3PreTrainedModel(PreTrainedModel):
    method _init_weights (line 1265) | def _init_weights(self, module):
  class DeepseekV3Model (line 1351) | class DeepseekV3Model(DeepseekV3PreTrainedModel):
    method __init__ (line 1359) | def __init__(self, config: DeepseekV3Config):
    method get_input_embeddings (line 1380) | def get_input_embeddings(self):
    method set_input_embeddings (line 1383) | def set_input_embeddings(self, value):
    method forward (line 1387) | def forward(
    method _update_causal_mask (line 1521) | def _update_causal_mask(
  class DeepseekV3ForCausalLM (line 1601) | class DeepseekV3ForCausalLM(DeepseekV3PreTrainedModel):
    method __init__ (line 1604) | def __init__(self, config):
    method get_input_embeddings (line 1613) | def get_input_embeddings(self):
    method set_input_embeddings (line 1616) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 1619) | def get_output_embeddings(self):
    method set_output_embeddings (line 1622) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 1625) | def set_decoder(self, decoder):
    method get_decoder (line 1628) | def get_decoder(self):
    method forward (line 1635) | def forward(
    method prepare_inputs_for_generation (line 1731) | def prepare_inputs_for_generation(
    method _reorder_cache (line 1796) | def _reorder_cache(past_key_values, beam_idx):
  class DeepseekV3ForSequenceClassification (line 1823) | class DeepseekV3ForSequenceClassification(DeepseekV3PreTrainedModel):
    method __init__ (line 1824) | def __init__(self, config):
    method get_input_embeddings (line 1833) | def get_input_embeddings(self):
    method set_input_embeddings (line 1836) | def set_input_embeddings(self, value):
    method forward (line 1840) | def forward(

FILE: kt-kernel/examples/repro_llamafile_re.py
  function getenv_int (line 43) | def getenv_int(name: str, default: int) -> int:
  function get_stream_for (line 50) | def get_stream_for(device: torch.device | str):
  function main (line 57) | def main() -> int:

FILE: kt-kernel/examples/test_apply_rope.py
  function rotate_half (line 4) | def rotate_half(x):
  function apply_rotary_pos_emb (line 10) | def apply_rotary_pos_emb(q, cos, sin, position_ids=None, unsqueeze_dim=1):
  function my_apply (line 37) | def my_apply(q,cos,sin):

FILE: kt-kernel/examples/test_awq_moe_amx.py
  function pack (line 17) | def pack(imatrix: torch.Tensor, direction: str = "row"):
  function act_fn (line 60) | def act_fn(x):
  function generate_original_weights (line 64) | def generate_original_weights():
  function generate_awq_quantized_weights (line 94) | def generate_awq_quantized_weights():
  function mlp_torch (line 203) | def mlp_torch(input, gate_proj, up_proj, down_proj, debug_expert_id=None...
  function moe_torch (line 225) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj,...
  function test_online_int4_kgroup_moe (line 290) | def test_online_int4_kgroup_moe():
  function test_awq_moe (line 413) | def test_awq_moe():
  function compare_quantization_methods (line 552) | def compare_quantization_methods():

FILE: kt-kernel/examples/test_bf16_moe.py
  function act_fn (line 41) | def act_fn(x):
  function mlp_torch (line 46) | def mlp_torch(input, gate_proj, up_proj, down_proj):
  function moe_torch (line 55) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj):
  function build_bf16_weights (line 89) | def build_bf16_weights():
  function build_moes_from_bf16_data (line 131) | def build_moes_from_bf16_data(bf16_data: dict):
  function run_bf16_moe_test (line 159) | def run_bf16_moe_test():

FILE: kt-kernel/examples/test_deepseekv3.py
  function read_gguf_file (line 34) | def read_gguf_file(gguf_file_path):
  function read_gguf_directory (line 67) | def read_gguf_directory(directory):
  function find_weights (line 94) | def find_weights(name, weights):
  function get_torch_tensor_from_gguf (line 111) | def get_torch_tensor_from_gguf(gguf_weights, name):
  function get_torch_tensor_and_type_from_gguf (line 115) | def get_torch_tensor_and_type_from_gguf(gguf_weights, name):
  function type_to_ggml_type (line 119) | def type_to_ggml_type(type):
  function build_mla (line 130) | def build_mla(layer_idx, json_config, gguf_weights):
  function build_ffn (line 210) | def build_ffn(layer_idx, json_config, gguf_weights):
  function build_moegate (line 280) | def build_moegate(layer_idx, json_config, gguf_weights):
  function build_llm (line 306) | def build_llm(json_config, gguf_weights):
  function start_chat (line 395) | def start_chat(content=None):

FILE: kt-kernel/examples/test_deepseekv3_prefill.py
  function read_gguf_file (line 32) | def read_gguf_file(gguf_file_path):
  function read_gguf_directory (line 65) | def read_gguf_directory(directory):
  function find_weights (line 92) | def find_weights(name, weights):
  function get_torch_tensor_from_gguf (line 109) | def get_torch_tensor_from_gguf(gguf_weights, name):
  function get_torch_tensor_and_type_from_gguf (line 113) | def get_torch_tensor_and_type_from_gguf(gguf_weights, name):
  function type_to_ggml_type (line 117) | def type_to_ggml_type(type):
  function build_mla (line 128) | def build_mla(layer_idx, json_config, gguf_weights):
  function build_ffn (line 208) | def build_ffn(layer_idx, json_config, gguf_weights):
  function build_moegate (line 278) | def build_moegate(layer_idx, json_config, gguf_weights):
  function build_llm (line 304) | def build_llm(json_config, gguf_weights):
  function start_chat (line 388) | def start_chat():

FILE: kt-kernel/examples/test_deepseekv3_prefill_speed.py
  function read_gguf_file (line 39) | def read_gguf_file(gguf_file_path):
  function read_gguf_directory (line 72) | def read_gguf_directory(directory):
  function find_weights (line 99) | def find_weights(name, weights):
  function get_torch_tensor_from_gguf (line 116) | def get_torch_tensor_from_gguf(gguf_weights, name):
  function get_torch_tensor_and_type_from_gguf (line 120) | def get_torch_tensor_and_type_from_gguf(gguf_weights, name):
  function type_to_ggml_type (line 124) | def type_to_ggml_type(type):
  function build_mla (line 135) | def build_mla(layer_idx, json_config, gguf_weights):
  function build_ffn (line 215) | def build_ffn(layer_idx, json_config, gguf_weights):
  function build_moegate (line 285) | def build_moegate(layer_idx, json_config, gguf_weights):
  function build_llm (line 311) | def build_llm(json_config, gguf_weights):
  function start_chat (line 401) | def start_chat(content=None):

FILE: kt-kernel/examples/test_fp8_moe.py
  function act_fn (line 44) | def act_fn(x):
  function mlp_torch (line 49) | def mlp_torch(input, gate_proj, up_proj, down_proj):
  function moe_torch (line 58) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj):
  function fp8_e4m3_to_float (line 96) | def fp8_e4m3_to_float(fp8_val: int) -> float:
  function float_to_fp8_e4m3 (line 119) | def float_to_fp8_e4m3(val: float) -> int:
  function quantize_to_fp8_blockwise (line 162) | def quantize_to_fp8_blockwise(weights: torch.Tensor, group_size: int = 1...
  function dequantize_fp8_blockwise (line 235) | def dequantize_fp8_blockwise(fp8_weights: torch.Tensor, scales: torch.Te...
  function build_random_fp8_weights (line 272) | def build_random_fp8_weights():
  function build_moes_from_fp8_data (line 341) | def build_moes_from_fp8_data(fp8_data: dict):
  function run_fp8_moe_test (line 372) | def run_fp8_moe_test():

FILE: kt-kernel/examples/test_fp8_perchannel_moe.py
  function act_fn (line 42) | def act_fn(x):
  function mlp_torch (line 47) | def mlp_torch(input, gate_proj, up_proj, down_proj):
  function moe_torch (line 56) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj):
  function fp8_e4m3_to_float (line 94) | def fp8_e4m3_to_float(fp8_val: int) -> float:
  function float_to_fp8_e4m3 (line 117) | def float_to_fp8_e4m3(val: float) -> int:
  function quantize_to_fp8_perchannel (line 160) | def quantize_to_fp8_perchannel(weights: torch.Tensor):
  function dequantize_fp8_perchannel (line 211) | def dequantize_fp8_perchannel(fp8_weights: torch.Tensor, scales: torch.T...
  function build_random_fp8_perchannel_weights (line 235) | def build_random_fp8_perchannel_weights():
  function build_moes_from_fp8_perchannel_data (line 290) | def build_moes_from_fp8_perchannel_data(fp8_data: dict):
  function run_fp8_perchannel_moe_test (line 322) | def run_fp8_perchannel_moe_test():

FILE: kt-kernel/examples/test_gate.py
  function load_fp32_tensor (line 38) | def load_fp32_tensor(file_path, shape):
  class MoEGate (line 47) | class MoEGate(nn.Module):
    method __init__ (line 48) | def __init__(self, config):
    method reset_parameters (line 67) | def reset_parameters(self) -> None:
    method forward (line 72) | def forward(self, hidden_states):
  function torch_gate (line 159) | def torch_gate(hidden_states):
  function cpuinfer_gate (line 169) | def cpuinfer_gate(hidden_states):

FILE: kt-kernel/examples/test_k2_moe_amx.py
  function _pattern_uniform (line 30) | def _pattern_uniform(groups: int) -> torch.Tensor:
  function _pattern_alternating (line 34) | def _pattern_alternating(groups: int) -> torch.Tensor:
  function _pattern_ramp (line 40) | def _pattern_ramp(groups: int) -> torch.Tensor:
  function act_fn (line 52) | def act_fn(x):
  function mlp_torch (line 56) | def mlp_torch(input, gate_proj, up_proj, down_proj):
  function moe_torch (line 68) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj):
  function pack_to_int32 (line 101) | def pack_to_int32(value: torch.Tensor, num_bits: int, packed_dim: Litera...
  function pack_tensor_per_row (line 136) | def pack_tensor_per_row(q: torch.Tensor, num_bits: int) -> torch.Tensor:
  function quantize_k2_tensor (line 143) | def quantize_k2_tensor(weights: torch.Tensor, group_size: int):
  function build_structured_tensor (line 171) | def build_structured_tensor(shape: torch.Size, pattern: str) -> torch.Te...
  function prepare_k2_quantized_weights (line 191) | def prepare_k2_quantized_weights(pattern: str) -> Dict[str, torch.Tensor]:
  function build_moes_from_quantized_data (line 218) | def build_moes_from_quantized_data(quant_data: Dict[str, torch.Tensor]):
  function run_case (line 246) | def run_case(pattern: str) -> Dict[str, float]:
  function run_k2_moe_test (line 306) | def run_k2_moe_test():

FILE: kt-kernel/examples/test_k2_write_buffer.py
  function make_cpu_infer (line 13) | def make_cpu_infer(thread_num=80):
  function build_config (line 17) | def build_config(cpuinfer, expert_num, num_experts_per_tok, hidden_size,...
  function allocate_weights (line 27) | def allocate_weights(expert_num, hidden_size, intermediate_size, group_s...
  function test_with_tp (line 52) | def test_with_tp(gpu_tp_count):
  function main (line 312) | def main():

FILE: kt-kernel/examples/test_mla.py
  function read_gguf_file (line 20) | def read_gguf_file(gguf_file_path):
  function get_torch_tensor_from_gguf (line 53) | def get_torch_tensor_from_gguf(gguf_weights, name):
  function get_torch_tensor_and_type_from_gguf (line 57) | def get_torch_tensor_and_type_from_gguf(gguf_weights, name):
  function type_to_ggml_type (line 61) | def type_to_ggml_type(type):
  function read_gguf_directory (line 141) | def read_gguf_directory(directory):
  function test_cpu_mla (line 214) | def test_cpu_mla():
  function load_fp16_tensor (line 295) | def load_fp16_tensor(file_path, shape):
  function load_fp32_tensor (line 305) | def load_fp32_tensor(file_path, shape):
  function test_torch (line 314) | def test_torch():

FILE: kt-kernel/examples/test_mla_qlen.py
  function read_gguf_file (line 20) | def read_gguf_file(gguf_file_path):
  function get_torch_tensor_from_gguf (line 53) | def get_torch_tensor_from_gguf(gguf_weights, name):
  function get_torch_tensor_and_type_from_gguf (line 57) | def get_torch_tensor_and_type_from_gguf(gguf_weights, name):
  function type_to_ggml_type (line 61) | def type_to_ggml_type(type):
  function read_gguf_directory (line 141) | def read_gguf_directory(directory):
  function build_mla (line 214) | def build_mla():
  function load_fp32_tensor (line 289) | def load_fp32_tensor(file_path, shape):

FILE: kt-kernel/examples/test_mla_quant.py
  function load_fp32_tensor_raw (line 20) | def load_fp32_tensor_raw(file_path):
  function load_fp16_tensor (line 28) | def load_fp16_tensor(file_path, shape=None):
  function load_fp32_tensor (line 39) | def load_fp32_tensor(file_path, shape):
  function test_torch (line 48) | def test_torch():

FILE: kt-kernel/examples/test_mla_simple.py
  function torch_attn (line 136) | def torch_attn(hidden_states: torch.Tensor,
  function torch_attn_for_test (line 284) | def torch_attn_for_test(hidden_states,kv_cache,):
  function test_mla_simple (line 287) | def test_mla_simple():

FILE: kt-kernel/examples/test_mla_torch.py
  function torch_attn (line 178) | def torch_attn(

FILE: kt-kernel/examples/test_mlp.py
  function act_fn (line 33) | def act_fn(x):
  function mlp_torch (line 37) | def mlp_torch(input, gate_proj, up_proj, down_proj):

FILE: kt-kernel/examples/test_moe.py
  function act_fn (line 44) | def act_fn(x):
  function mlp_torch (line 48) | def mlp_torch(input, gate_proj, up_proj, down_proj):
  function moe_torch (line 56) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj):
  function to_cpuinfer_tensor (line 88) | def to_cpuinfer_tensor(tensor, type):
  function from_cpuinfer_tensor (line 93) | def from_cpuinfer_tensor(tensor, size, type):

FILE: kt-kernel/examples/test_moe_amx.py
  function act_fn (line 25) | def act_fn(x):
  function mlp_torch (line 29) | def mlp_torch(input, gate_proj, up_proj, down_proj, debug_expert_id=None...
  function moe_torch (line 51) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj,...
  function test_moe (line 101) | def test_moe(quant_mode: str):

FILE: kt-kernel/examples/test_moe_kernel.py
  function act_fn (line 39) | def act_fn(x):
  function mlp_torch (line 43) | def mlp_torch(input, gate_proj, up_proj, down_proj):
  function moe_torch (line 51) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj):
  function test_moe (line 83) | def test_moe(quant_mode: str):

FILE: kt-kernel/examples/test_moe_kml.py
  function act_fn (line 33) | def act_fn(x):
  function mlp_torch (line 37) | def mlp_torch(input, gate_proj, up_proj, down_proj):
  function moe_torch (line 45) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj):
  function test_moe (line 77) | def test_moe(quant_mode: str):

FILE: kt-kernel/examples/test_rope.cpp
  function create_random_vector (line 8) | std::vector<float> create_random_vector(size_t total_size, std::vector<s...
  function print_vector_to_file (line 30) | void print_vector_to_file(const std::vector<float>& vec, const char* fil...
  function cpp_torch_rope_with_apply_single (line 38) | std::pair<std::vector<float>, std::vector<float>> cpp_torch_rope_with_ap...
  function main (line 69) | int main() {

FILE: kt-kernel/examples/test_rope.py
  function load_fp16_tensor (line 35) | def load_fp16_tensor(file_path, shape):
  function load_fp32_tensor (line 42) | def load_fp32_tensor(file_path, shape):
  function torch_rope (line 62) | def torch_rope(q, k):

FILE: kt-kernel/examples/test_softmax.py
  function load_fp16_tensor (line 6) | def load_fp16_tensor(file_path, shape):

FILE: kt-kernel/examples/test_write_buffer.py
  function make_cpu_infer (line 28) | def make_cpu_infer(thread_num=80):
  function div_up (line 32) | def div_up(a, b):
  function build_config_fp8 (line 36) | def build_config_fp8(cpuinfer, expert_num, num_experts_per_tok, hidden_s...
  function build_config_fp8_perchannel (line 46) | def build_config_fp8_perchannel(cpuinfer, expert_num, num_experts_per_to...
  function build_config_bf16 (line 57) | def build_config_bf16(cpuinfer, expert_num, num_experts_per_tok, hidden_...
  function allocate_weights_fp8 (line 64) | def allocate_weights_fp8(expert_num, hidden_size, intermediate_size, gro...
  function allocate_weights_fp8_perchannel (line 99) | def allocate_weights_fp8_perchannel(expert_num, hidden_size, intermediat...
  function allocate_weights_bf16 (line 126) | def allocate_weights_bf16(expert_num, hidden_size, intermediate_size):
  function test_fp8_write_buffer (line 145) | def test_fp8_write_buffer(gpu_tp_count):
  function test_fp8_perchannel_write_buffer (line 355) | def test_fp8_perchannel_write_buffer(gpu_tp_count):
  function test_bf16_write_buffer (line 544) | def test_bf16_write_buffer(gpu_tp_count):
  function test_with_tp (line 706) | def test_with_tp(quant_mode: str, gpu_tp_count: int):
  function main (line 718) | def main(quant_modes=None):

FILE: kt-kernel/examples/torch_attention.py
  class KDeepSeekV3Cache (line 13) | class KDeepSeekV3Cache(nn.Module):
    method __init__ (line 14) | def __init__(
    method update (line 33) | def update(
    method get_page_table (line 69) | def get_page_table(self, cache_position: torch.Tensor, q_indptr: torch...
  function rotate_half (line 90) | def rotate_half(x):
  function apply_rotary_pos_emb (line 96) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di...
  class DeepseekV2RMSNorm (line 127) | class DeepseekV2RMSNorm(nn.Module):
    method __init__ (line 128) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 136) | def forward(self, hidden_states):
  class DeepseekV2RotaryEmbedding (line 144) | class DeepseekV2RotaryEmbedding(nn.Module):
    method __init__ (line 145) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method forward (line 157) | def forward(self, x, position_ids):
  class DeepseekV3RotaryEmbedding (line 172) | class DeepseekV3RotaryEmbedding(nn.Module):
    method __init__ (line 173) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method _set_cos_sin_cache (line 192) | def _set_cos_sin_cache(self, seq_len, device, dtype):
    method forward (line 205) | def forward(self, x, seq_len=None):
  function yarn_find_correction_dim (line 216) | def yarn_find_correction_dim(
  function yarn_find_correction_range (line 225) | def yarn_find_correction_range(
  function yarn_linear_ramp_mask (line 236) | def yarn_linear_ramp_mask(min, max, dim):
  function yarn_get_mscale (line 244) | def yarn_get_mscale(scale=1, mscale=1):
  class DeepseekV3YarnRotaryEmbedding (line 249) | class DeepseekV3YarnRotaryEmbedding(DeepseekV3RotaryEmbedding):
    method __init__ (line 251) | def __init__(
    method _set_cos_sin_cache (line 272) | def _set_cos_sin_cache(self, seq_len, device, dtype):

FILE: kt-kernel/ext_bindings.cpp
  function to_float_ptr (line 64) | py::object to_float_ptr(uintptr_t input_ptr, int size, ggml_type type) {
  function from_float_ptr (line 88) | py::object from_float_ptr(uintptr_t input_ptr, int size, ggml_type type) {
  function void_ptr_nested_to_uint (line 118) | std::vector<std::vector<uintptr_t>> void_ptr_nested_to_uint(const std::v...
  function uint_to_void_ptr_nested (line 131) | std::vector<std::vector<T*>> uint_to_void_ptr_nested(const std::vector<s...
  class MOEBindings (line 156) | class MOEBindings {
    class WarmUpBindings (line 158) | class WarmUpBindings {
      type Args (line 160) | struct Args {
      method inner (line 164) | static void inner(void* args) {
      method cpuinfer_interface (line 168) | static std::pair<intptr_t, intptr_t> cpuinfer_interface(std::shared_...
    class LoadWeightsBindings (line 173) | class LoadWeightsBindings {
      type Args (line 175) | struct Args {
      method inner (line 179) | static void inner(void* args) {
      method cpuinfer_interface (line 183) | static std::pair<intptr_t, intptr_t> cpuinfer_interface(std::shared_...
      method cpuinfer_interface (line 194) | static std::pair<intptr_t, intptr_t> cpuinfer_interface(std::shared_...
    class ForwardBindings (line 198) | class ForwardBindings {
      type Args (line 200) | struct Args {
      method inner (line 211) | static void inner(void* args) {
      method cpuinfer_interface (line 216) | static std::pair<intptr_t, intptr_t> cpuinfer_interface(std::shared_...
      method cpuinfer_interface (line 222) | static std::pair<intptr_t, intptr_t> cpuinfer_interface(std::shared_...
  function bind_moe_module (line 231) | void bind_moe_module(py::module_& moe_module, const char* name) {
  function PYBIND11_MODULE (line 302) | PYBIND11_MODULE(kt_kernel_ext, m) {

FILE: kt-kernel/operators/amx/awq-moe.hpp
  class AMX_AWQ_MOE_TP (line 30) | class AMX_AWQ_MOE_TP : public AMX_MOE_BASE<T, AMX_AWQ_MOE_TP<T>> {
    method write_weights (line 51) | inline void write_weights(std::filesystem::path prefix, std::string ma...
    method write_weights (line 73) | inline void write_weights(std::filesystem::path prefix, std::string ma...
    method read_weights (line 127) | inline void read_weights(std::filesystem::path prefix, std::string mat...
    method read_weights (line 151) | inline bool read_weights(std::filesystem::path prefix, std::string mat...
    method read_awq_weights (line 210) | inline void read_awq_weights(std::filesystem::path prefix, std::string...
    method load_check (line 253) | inline void load_check() {
    method verify_load_right (line 258) | void verify_load_right() {
    method dump_buffer_b (line 290) | inline void dump_buffer_b(const std::string& quantization_type, int ex...
    method convert_zeros_to_mins_avx (line 373) | inline void convert_zeros_to_mins_avx(const uint32_t* zeros_int4_packe...
    method AMX_AWQ_MOE_TP (line 392) | AMX_AWQ_MOE_TP() = default;
    method AMX_AWQ_MOE_TP (line 394) | AMX_AWQ_MOE_TP(GeneralMOEConfig config, int tp_part_idx_ = 0) : Base(c...
    method derived_init (line 396) | void derived_init() {
    method buffer_a_required_size_impl (line 428) | size_t buffer_a_required_size_impl(size_t m, size_t k) const {
    method buffer_b_required_size_impl (line 431) | size_t buffer_b_required_size_impl(size_t n, size_t k) const {
    method buffer_c_required_size_impl (line 434) | size_t buffer_c_required_size_impl(size_t m, size_t n) const { return ...
    method make_buffer_a_impl (line 436) | std::shared_ptr<typename T::BufferA> make_buffer_a_impl(size_t m, size...
    method make_buffer_b_impl (line 439) | std::shared_ptr<typename T::BufferB> make_buffer_b_impl(size_t n, size...
    method make_buffer_c_impl (line 442) | std::shared_ptr<typename T::BufferC> make_buffer_c_impl(size_t m, size...
    method do_gate_up_gemm (line 450) | void do_gate_up_gemm(bool do_up, int expert_idx, int ith, int nth, int...
    method do_down_gemm (line 465) | void do_down_gemm(int expert_idx, int ith, int nth, int qlen) {
    method load_weights (line 486) | void load_weights() {
  class TP_MOE<AMX_AWQ_MOE_TP<K>> (line 649) | class TP_MOE<AMX_AWQ_MOE_TP<K>> : public TP_MOE<AMX_MOE_BASE<K, AMX_AWQ_...
    method load_weights (line 654) | void load_weights() override {

FILE: kt-kernel/operators/amx/bf16-moe.hpp
  class AMX_BF16_MOE_TP (line 31) | class AMX_BF16_MOE_TP : public AMX_MOE_BASE<T, AMX_BF16_MOE_TP<T>> {
    method AMX_BF16_MOE_TP (line 49) | AMX_BF16_MOE_TP() = default;
    method AMX_BF16_MOE_TP (line 51) | AMX_BF16_MOE_TP(GeneralMOEConfig config, int tp_part_idx_ = 0) : Base(...
    method derived_init (line 55) | void derived_init() {
    method buffer_a_required_size_impl (line 66) | size_t buffer_a_required_size_impl(size_t m, size_t k) const { return ...
    method buffer_b_required_size_impl (line 68) | size_t buffer_b_required_size_impl(size_t n, size_t k) const {
    method buffer_c_required_size_impl (line 72) | size_t buffer_c_required_size_impl(size_t m, size_t n) const { return ...
    method make_buffer_a_impl (line 74) | std::shared_ptr<typename T::BufferA> make_buffer_a_impl(size_t m, size...
    method make_buffer_b_impl (line 78) | std::shared_ptr<typename T::BufferB> make_buffer_b_impl(size_t n, size...
    method make_buffer_c_impl (line 82) | std::shared_ptr<typename T::BufferC> make_buffer_c_impl(size_t m, size...
    method do_gate_up_gemm (line 90) | void do_gate_up_gemm(bool do_up, int expert_idx, int ith, int nth, int...
    method do_down_gemm (line 104) | void do_down_gemm(int expert_idx, int ith, int nth, int qlen) {
    method dump_buffer_b (line 118) | inline void dump_buffer_b(int expert_idx, const std::string& matrix_ty...
    method load_weights (line 159) | void load_weights() {
    method fast_memcpy_64 (line 211) | static inline void fast_memcpy_64(void* __restrict dst, const void* __...
    method fast_stream_64 (line 217) | static inline void fast_stream_64(void* __restrict dst, const void* __...
    method fast_memcpy (line 223) | static inline void fast_memcpy(void* __restrict dst, const void* __res...
    method unpack_nk_block_bf16 (line 248) | static inline void unpack_nk_block_bf16(const ggml_bf16_t* src, ggml_b...
    method write_weights_to_buffer (line 301) | void write_weights_to_buffer(int gpu_tp_count, [[maybe_unused]] int cp...
  class TP_MOE<AMX_BF16_MOE_TP<K>> (line 432) | class TP_MOE<AMX_BF16_MOE_TP<K>> : public TP_MOE<AMX_MOE_BASE<K, AMX_BF1...
    method load_weights (line 437) | void load_weights() override {
    method write_weight_scale_to_buffer (line 521) | void write_weight_scale_to_buffer(int gpu_tp_count, int expert_id, con...

FILE: kt-kernel/operators/amx/fp8-moe.hpp
  class AMX_FP8_MOE_TP (line 29) | class AMX_FP8_MOE_TP : public AMX_MOE_BASE<T, AMX_FP8_MOE_TP<T>> {
    method AMX_FP8_MOE_TP (line 47) | AMX_FP8_MOE_TP() = default;
    method AMX_FP8_MOE_TP (line 49) | AMX_FP8_MOE_TP(GeneralMOEConfig config, int tp_part_idx_ = 0) : Base(c...
    method derived_init (line 53) | void derived_init() {
    method buffer_a_required_size_impl (line 66) | size_t buffer_a_required_size_impl(size_t m, size_t k) const { return ...
    method buffer_b_required_size_impl (line 67) | size_t buffer_b_required_size_impl(size_t n, size_t k) const {
    method buffer_c_required_size_impl (line 70) | size_t buffer_c_required_size_impl(size_t m, size_t n) const { return ...
    method make_buffer_a_impl (line 72) | std::shared_ptr<typename T::BufferA> make_buffer_a_impl(size_t m, size...
    method make_buffer_b_impl (line 75) | std::shared_ptr<typename T::BufferB> make_buffer_b_impl(size_t n, size...
    method make_buffer_c_impl (line 78) | std::shared_ptr<typename T::BufferC> make_buffer_c_impl(size_t m, size...
    method do_gate_up_gemm (line 86) | void do_gate_up_gemm(bool do_up, int expert_idx, int ith, int nth, int...
    method do_down_gemm (line 95) | void do_down_gemm(int expert_idx, int ith, int nth, int qlen) {
    method dump_buffer_b (line 105) | inline void dump_buffer_b(const std::string& quantization_type, int ex...
    method load_weights (line 173) | void load_weights() {
    method fast_memcpy_64 (line 228) | static inline void fast_memcpy_64(void* __restrict dst, const void* __...
    method fast_memcpy (line 234) | static inline void fast_memcpy(void* __restrict dst, const void* __res...
    method unpack_nk_block (line 259) | static inline void unpack_nk_block(const uint8_t* src, uint8_t* dst, s...
    method unpack_4nk_blocks (line 325) | static inline void unpack_4nk_blocks(const uint8_t* src[4], uint8_t* d...
    method write_weights_to_buffer (line 385) | void write_weights_to_buffer(int gpu_tp_count, [[maybe_unused]] int cp...
  class TP_MOE<AMX_FP8_MOE_TP<K>> (line 629) | class TP_MOE<AMX_FP8_MOE_TP<K>> : public TP_MOE<AMX_MOE_BASE<K, AMX_FP8_...
    method load_weights (line 634) | void load_weights() override {
    method write_weight_scale_to_buffer (line 753) | void write_weight_scale_to_buffer(int gpu_tp_count, int expert_id, con...

FILE: kt-kernel/operators/amx/fp8-perchannel-moe.hpp
  class AMX_FP8_PERCHANNEL_MOE_TP (line 28) | class AMX_FP8_PERCHANNEL_MOE_TP : public AMX_MOE_BASE<T, AMX_FP8_PERCHAN...
    method AMX_FP8_PERCHANNEL_MOE_TP (line 46) | AMX_FP8_PERCHANNEL_MOE_TP() = default;
    method AMX_FP8_PERCHANNEL_MOE_TP (line 48) | AMX_FP8_PERCHANNEL_MOE_TP(GeneralMOEConfig config, int tp_part_idx_ = ...
    method derived_init (line 52) | void derived_init() {
    method buffer_a_required_size_impl (line 66) | size_t buffer_a_required_size_impl(size_t m, size_t k) const { return ...
    method buffer_b_required_size_impl (line 67) | size_t buffer_b_required_size_impl(size_t n, size_t k) const {
    method buffer_c_required_size_impl (line 71) | size_t buffer_c_required_size_impl(size_t m, size_t n) const { return ...
    method make_buffer_a_impl (line 73) | std::shared_ptr<typename T::BufferA> make_buffer_a_impl(size_t m, size...
    method make_buffer_b_impl (line 76) | std::shared_ptr<typename T::BufferB> make_buffer_b_impl(size_t n, size...
    method make_buffer_c_impl (line 80) | std::shared_ptr<typename T::BufferC> make_buffer_c_impl(size_t m, size...
    method do_gate_up_gemm (line 88) | void do_gate_up_gemm(bool do_up, int expert_idx, int ith, int nth, int...
    method do_down_gemm (line 99) | void do_down_gemm(int expert_idx, int ith, int nth, int qlen) {
    method fast_memcpy_64 (line 107) | static inline void fast_memcpy_64(void* __restrict dst, const void* __...
    method fast_memcpy (line 113) | static inline void fast_memcpy(void* __restrict dst, const void* __res...
    method unpack_nk_block (line 138) | static inline void unpack_nk_block(const uint8_t* src, uint8_t* dst, s...
    method unpack_4nk_blocks (line 204) | static inline void unpack_4nk_blocks(const uint8_t* src[4], uint8_t* d...
    method write_weights_to_buffer (line 259) | void write_weights_to_buffer(int gpu_tp_count, [[maybe_unused]] int cp...
    method load_weights (line 504) | void load_weights() {
  class TP_MOE<AMX_FP8_PERCHANNEL_MOE_TP<K>> (line 559) | class TP_MOE<AMX_FP8_PERCHANNEL_MOE_TP<K>> : public TP_MOE<AMX_MOE_BASE<...
    method write_weight_scale_to_buffer (line 577) | void write_weight_scale_to_buffer(int gpu_tp_count, int expert_id, con...
    method load_weights (line 598) | void load_weights() override {

FILE: kt-kernel/operators/amx/k2-moe.hpp
  class AMX_K2_MOE_TP (line 28) | class AMX_K2_MOE_TP : public AMX_MOE_BASE<T, AMX_K2_MOE_TP<T>> {
    method AMX_K2_MOE_TP (line 46) | AMX_K2_MOE_TP() = default;
    method AMX_K2_MOE_TP (line 48) | AMX_K2_MOE_TP(GeneralMOEConfig config, int tp_part_idx_ = 0) : Base(co...
    method derived_init (line 50) | void derived_init() {
    method buffer_a_required_size_impl (line 64) | size_t buffer_a_required_size_impl(size_t m, size_t k) const {
    method buffer_b_required_size_impl (line 67) | size_t buffer_b_required_size_impl(size_t n, size_t k) const {
    method buffer_c_required_size_impl (line 70) | size_t buffer_c_required_size_impl(size_t m, size_t n) const { return ...
    method make_buffer_a_impl (line 72) | std::shared_ptr<typename T::BufferA> make_buffer_a_impl(size_t m, size...
    method make_buffer_b_impl (line 75) | std::shared_ptr<typename T::BufferB> make_buffer_b_impl(size_t n, size...
    method make_buffer_c_impl (line 78) | std::shared_ptr<typename T::BufferC> make_buffer_c_impl(size_t m, size...
    method do_gate_up_gemm (line 86) | void do_gate_up_gemm(bool do_up, int expert_idx, int ith, int nth, int...
    method do_down_gemm (line 101) | void do_down_gemm(int expert_idx, int ith, int nth, int qlen) {
    method load_weights (line 120) | void load_weights() {
    method fast_memcpy (line 189) | static inline void fast_memcpy(void* __restrict dst, const void* __res...
    method fast_fp32_to_bf16 (line 210) | static inline void fast_fp32_to_bf16(ggml_bf16_t* __restrict dst, cons...
    method write_weights_to_buffer (line 244) | void write_weights_to_buffer(int gpu_tp_count, int cpu_tp_count, int e...
  class TP_MOE<AMX_K2_MOE_TP<K>> (line 455) | class TP_MOE<AMX_K2_MOE_TP<K>> : public TP_MOE<AMX_MOE_BASE<K, AMX_K2_MO...
    method load_weights (line 460) | void load_weights() override {
    method write_weight_scale_to_buffer (line 632) | void write_weight_scale_to_buffer(int gpu_tp_count, int expert_id, con...

FILE: kt-kernel/operators/amx/la/amx-example.cpp
  function main (line 8) | int main() {

FILE: kt-kernel/operators/amx/la/amx.hpp
  type amx (line 20) | namespace amx {
    function __m512 (line 22) | static inline __m512 exp_avx512(__m512 x) {
    function __m512 (line 47) | static inline __m512 act_fn(__m512 gate_val, __m512 up_val) {
    function gemm (line 104) | inline void gemm(int m, int n, int k, const void* a, size_t lda, int t...
    function init_tile (line 113) | inline void init_tile(int type_a, int type_b, int type_c) {
    function recommended_nth (line 121) | inline int recommended_nth(int m, int n, int k, int type_a, int type_b...

FILE: kt-kernel/operators/amx/la/amx_buffers.hpp
  type amx (line 17) | namespace amx {
    type BufferAImpl (line 20) | struct BufferAImpl {
      method required_size (line 29) | static size_t required_size(int max_m, int k) { return sizeof(int8_t...
      method BufferAImpl (line 31) | BufferAImpl(int max_m, int k, void* ptr) : max_m(max_m), k(k) {
      method set_data (line 41) | void set_data(void* ptr) {
      method from_mat (line 47) | void from_mat(int m, ggml_bf16_t* src, int ith, int nth) {
    type BufferAWithSumImpl (line 127) | struct BufferAWithSumImpl {
      method required_size (line 137) | static size_t required_size(int max_m, int k) { return sizeof(int8_t...
      method BufferAWithSumImpl (line 139) | BufferAWithSumImpl(int max_m, int k, void* ptr) : max_m(max_m), k(k) {
      method set_data (line 149) | void set_data(void* ptr) {
      method from_mat (line 156) | void from_mat(int m, ggml_bf16_t* src, int ith, int nth) {
    type BufferAWithSumKGroupImpl (line 217) | struct BufferAWithSumKGroupImpl {
      method required_size (line 227) | static size_t required_size(int max_m, int k, int k_group_size) {
      method BufferAWithSumKGroupImpl (line 231) | BufferAWithSumKGroupImpl(int max_m, int k, int k_group_size, void* ptr)
      method set_data (line 242) | void set_data(void* ptr) {
      method from_mat (line 249) | void from_mat(int m, ggml_bf16_t* src, int ith, int nth) {
    type BufferAKGroupImpl (line 339) | struct BufferAKGroupImpl {
      method required_size (line 348) | static size_t required_size(int max_m, int k, int k_group_size) {
      method BufferAKGroupImpl (line 353) | BufferAKGroupImpl(int max_m, int k, int k_group_size, void* ptr) : m...
      method set_data (line 364) | void set_data(void* ptr) {
      method from_mat (line 379) | void from_mat(int m, ggml_bf16_t* src, int ith, int nth) {
    type BufferASmallKGroupImpl (line 446) | struct BufferASmallKGroupImpl : public BufferAKGroupImpl<K> {
      method BufferASmallKGroupImpl (line 459) | BufferASmallKGroupImpl(int max_m, int k, int k_group_size, void* ptr...
      method from_mat (line 462) | void from_mat(int m, ggml_bf16_t* src, int ith, int nth) {
    type BufferBInt4Impl (line 514) | struct BufferBInt4Impl {
      method required_size (line 529) | static size_t required_size(int n, int k) { return sizeof(int8_t) * ...
      method BufferBInt4Impl (line 531) | BufferBInt4Impl(int n, int k, void* ptr) : n(n), k(k) {
      method __m128i (line 543) | static __m128i round_4bit_s8(__m128i x) {
      method from_mat (line 557) | void from_mat(ggml_bf16_t* src, int ith, int nth) {
      method dt (line 647) | dt* get_submat(int n, int k, int n_begin, int k_begin) {
    type BufferBKGroupImpl (line 662) | struct BufferBKGroupImpl {
      method required_size (line 677) | static size_t required_size(int n, int k, int k_group_size) {
      method BufferBKGroupImpl (line 682) | BufferBKGroupImpl(int n, int k, int k_group_size, void* ptr) : n(n),...
      method __m128i (line 697) | static __m128i round_4bit_s8(__m128i x) {
      method from_mat (line 711) | void from_mat(ggml_bf16_t* src, int ith, int nth) {
      method dt (line 817) | dt* get_submat(int n, int k, int n_begin, int k_begin) {
    type BufferBInt4WithZeroImpl (line 836) | struct BufferBInt4WithZeroImpl {
      method required_size (line 851) | static size_t required_size(int n, int k) { return sizeof(int8_t) * ...
      method BufferBInt4WithZeroImpl (line 853) | BufferBInt4WithZeroImpl(int n, int k, void* ptr) : n(n), k(k) {
      method __m128i (line 867) | static __m128i round_4bit_u8(__m128i x) {
      method from_mat (line 875) | void from_mat(ggml_bf16_t* src, int ith, int nth) {
      method dt (line 970) | dt* get_submat(int n, int k, int n_begin, int k_begin) {
    type BufferBInt4KGroupImpl (line 988) | struct BufferBInt4KGroupImpl {
      method required_size (line 999) | static size_t required_size(int n, int k, int k_group_size) {
      method BufferBInt4KGroupImpl (line 1003) | BufferBInt4KGroupImpl(int n, int k, int k_group_size, void* ptr) : n...
      method from_raw_mat (line 1020) | void from_raw_mat(uint8_t* proj, int ith, int nth) {
      method dt (line 1033) | dt* get_submat(int n, int k, int n_begin, int k_begin) {
      method split_range_n (line 1047) | static std::pair<int, int> split_range_n(int n, int ith, int nth) {
    type BufferBInt4WithZeroKGroupImpl (line 1057) | struct BufferBInt4WithZeroKGroupImpl {
      method required_size (line 1072) | static size_t required_size(int n, int k, int k_group_size) {
      method BufferBInt4WithZeroKGroupImpl (line 1076) | BufferBInt4WithZeroKGroupImpl(int n, int k, int k_group_size, void* ...
      method __m128i (line 1091) | static __m128i round_4bit_u8(__m128i x) {
      method from_raw_mat (line 1099) | void from_raw_mat(uint8_t* proj, int ith, int nth) {
      method from_mat (line 1135) | void from_mat(ggml_bf16_t* src, int ith, int nth) {
      method dt (line 1243) | dt* get_submat(int n, int k, int n_begin, int k_begin) {
    type BufferBInt4WithZeroLowKGroupImpl (line 1265) | struct BufferBInt4WithZeroLowKGroupImpl {
      method required_size (line 1280) | static size_t required_size(int n, int k, int k_group_size) {
      method BufferBInt4WithZeroLowKGroupImpl (line 1284) | BufferBInt4WithZeroLowKGroupImpl(int n, int k, int k_group_size, voi...
      method __m128i (line 1299) | static __m128i round_4bit_u8(__m128i x) {
      method from_raw_mat (line 1307) | void from_raw_mat(uint8_t* proj, int ith, int nth) {
      method from_mat (line 1343) | void from_mat(ggml_bf16_t* src, int ith, int nth) {
      method dt (line 1460) | dt* get_submat(int n, int k, int n_begin, int k_begin) {
    type BufferCImpl (line 1482) | struct BufferCImpl {
      method required_size (line 1490) | static size_t required_size(int max_m, int n) { return sizeof(float)...
      method BufferCImpl (line 1492) | BufferCImpl(int max_m, int n, void* ptr) : max_m(max_m), n(n) {
      method set_data (line 1502) | void set_data(void* ptr) {
      method to_mat (line 1507) | void to_mat(int m, ggml_bf16_t* dst, int ith, int nth) {
    type BufferCReduceImpl (line 1536) | struct BufferCReduceImpl {
      method required_size (line 1545) | static size_t required_size(int max_m, int n) {
      method BufferCReduceImpl (line 1550) | BufferCReduceImpl(int max_m, int n, void* ptr) : max_m(max_m), n(n) {
      method set_data (line 1560) | void set_data(void* ptr) {
      method to_mat (line 1567) | void to_mat(int m, ggml_bf16_t* dst, int ith, int nth) {
      method clear_int_buffer (line 1603) | void clear_int_buffer() { std::memset(int_c, 0, sizeof(int32_t) * ma...
      method convert_int_to_float (line 1606) | void convert_int_to_float(int m) {

FILE: kt-kernel/operators/amx/la/amx_config.hpp
  type amx (line 33) | namespace amx {
    function enable_amx (line 59) | inline bool enable_amx() {
    type TileConfig (line 99) | struct alignas(64) TileConfig {
      method TileConfig (line 108) | TileConfig() {
      method set_row_col (line 116) | void set_row_col(int i, uint8_t row, uint16_t col) {
      method set_config (line 121) | void set_config() { _tile_loadconfig(this); }
      method load_data (line 123) | static void load_data(int to, void* from, size_t stride) {
      method store_data (line 154) | static void store_data(int from, void* to, size_t stride) {

FILE: kt-kernel/operators/amx/la/amx_kernels.hpp
  type amx (line 18) | namespace amx {
    type dpb133 (line 55) | struct dpb133 {
    type GemmKernel133 (line 88) | struct GemmKernel133 {
      method recommended_nth (line 98) | static int recommended_nth(int m) { return (m + M_STEP - 1) / M_STEP; }
      method config (line 100) | static void config() {
      method run_full_tile (line 115) | static void run_full_tile(const TA* a, size_t lda, const TB* b, size...
      method run_full_tile_zero (line 136) | static void run_full_tile_zero(const TA* a, size_t lda, const TB* b,...
      method convert_full_tile_b_to_vnni_inplace (line 158) | static void convert_full_tile_b_to_vnni_inplace(void* b) { transpose...
      type ATile (line 161) | struct ATile {
        method partial_load (line 163) | void partial_load(TA* a, int m, int k, size_t lda) {
        method partial_load_quant (line 172) | void partial_load_quant(block_q4_0* a, int m, int k, size_t lda) {
        method partial_load_quant (line 182) | void partial_load_quant(block_q8_0* a, int m, int k, size_t lda) {
        method partial_load_quant (line 192) | void partial_load_quant(TA* a, int m, size_t lda) {
        method partial_load_quant (line 206) | void partial_load_quant(block_q4_K* a, int m, int inner_block_idx,...
        method partial_load_quant (line 221) | void partial_load_quant(blocks_aligned_q8_0_ref a, int m, int k, i...
      type BTile (line 231) | struct alignas(64) BTile {
        method partial_load (line 235) | void partial_load(TB* b, int n, int k, size_t ldb) {
        method partial_load_quant (line 244) | void partial_load_quant(block_q8_0* b, int n, int k, size_t ldb) {
        method partial_load_quant (line 258) | void partial_load_quant(blocks_aligned_q8_0_ref b, int n, int k, i...
        method load_from (line 272) | void load_from(TB* b, size_t ldb) {
        method run_full_ac (line 282) | void run_full_ac(TA* a, size_t lda, TC* c, size_t ldc) {
      type BTileSum (line 288) | struct alignas(64) BTileSum {
        method partial_load_quant (line 292) | void partial_load_quant(block_q8_K* b, int n, int inner_block_idx,...
      type CTile (line 310) | struct alignas(64) CTile {
        method partial_load (line 314) | void partial_load(TC* c, int m, int n, size_t ldc) {
        method partial_store (line 322) | void partial_store(TC* c, int m, int n, size_t ldc) {
        method to_fp32 (line 330) | void to_fp32() {
      type PartialTiles (line 340) | struct PartialTiles {
        method partial_run (line 344) | void partial_run(int m, int n, int k, TA* a, size_t lda, TB* b, si...
        method partial_run_quant (line 353) | void partial_run_quant(int m, int n, int k, QA* a, size_t lda, blo...
        method partial_run_quant_ac (line 371) | void partial_run_quant_ac(int m, int n, int k, QA* a, size_t lda, ...
        method partial_run_quant_ac (line 388) | void partial_run_quant_ac(int m, int n, int k, AQA a, int a_blck_s...
      type PartialTilesSum (line 406) | struct PartialTilesSum {
        method partial_run_quant_ac (line 411) | void partial_run_quant_ac(int m, int n, int inner_block_idx, block...
    type GemmKernel133BF (line 429) | struct GemmKernel133BF {
      method recommended_nth (line 441) | static int recommended_nth(int m) { return (m + M_STEP - 1) / M_STEP; }
      method config (line 442) | static void config() {
      method run_full_tile (line 460) | static void run_full_tile(const dt* a, size_t lda, const dt* b, size...
      type ATile (line 484) | struct ATile {
        method partial_load (line 487) | void partial_load(dt* a, int m, int k, size_t lda) {
      type BTile (line 497) | struct alignas(64) BTile {
        method full_load (line 500) | void full_load(dt* b, size_t ldb) { partial_load(b, TILE_N, TILE_K...
        method partial_load (line 502) | void partial_load(dt* b, int n, int k, size_t ldb) {
        method run_full_ac (line 512) | void run_full_ac(TA* a, size_t lda, TC* c, size_t ldc) {
      type CTile (line 517) | struct alignas(64) CTile {
        method partial_load (line 520) | void partial_load(float* c, int m, int n, size_t ldc) {
        method partial_store (line 529) | void partial_store(float* c, int m, int n, size_t ldc) {
      type PartialTiles (line 539) | struct PartialTiles {
        method partial_run (line 543) | void partial_run(int m, int n, int k, dt* a, size_t lda, dt* b, si...
    function T2 (line 554) | constexpr T2 convert_to(const T1& value) {
    type GemmKernel224BF (line 564) | struct GemmKernel224BF {
      method name (line 579) | static std::string name() { return "BF16"; }
      method recommended_nth (line 581) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO...
      method split_range_n (line 583) | static std::pair<int, int> split_range_n(int n, int ith, int nth) {
      method config (line 589) | static void config() {
      method load_a (line 607) | static void load_a(dt* a, size_t lda) {
      method load_b (line 617) | static void load_b(dt* b, size_t ldb) {
      method clean_c (line 627) | static void clean_c() {
      method load_c (line 636) | static void load_c(output_t* c, size_t ldc) {
      method store_c (line 648) | static void store_c(output_t* c, size_t ldc) {
      method run_tile (line 660) | static void run_tile() {
      type BufferA (line 669) | struct BufferA {
        method required_size (line 673) | static size_t required_size(int max_m, int k) { return sizeof(ggml...
        method BufferA (line 675) | BufferA(int max_m, int k, void* ptr) : max_m(max_m), k(k) {
        method set_data (line 682) | void set_data(void* new_ptr) { a = reinterpret_cast<ggml_bf16_t*>(...
        method from_mat (line 684) | void from_mat(int m, ggml_bf16_t* src, int ith, int nth) {
        method ggml_bf16_t (line 703) | ggml_bf16_t* get_submat(int m, int k, int m_begin, int k_begin) {
      type BufferB (line 712) | struct BufferB {
        method required_size (line 717) | static size_t required_size(int n, int k) { return sizeof(ggml_bf1...
        method BufferB (line 719) | BufferB(int n, int k, void* ptr) : n(n), k(k) {
        method set_data (line 726) | void set_data(void* new_ptr) { b = reinterpret_cast<ggml_bf16_t*>(...
        method from_mat (line 728) | void from_mat(ggml_bf16_t* src, int ith, int nth) {
        method ggml_bf16_t (line 751) | ggml_bf16_t* get_submat(int n, int k, int n_begin, int k_begin) {
      type BufferC (line 762) | struct BufferC {
        method required_size (line 776) | static size_t required_size(int max_m, int n) { return sizeof(floa...
        method BufferC (line 778) | BufferC(int max_m, int n, void* ptr) : max_m(max_m), n(n) {
        method set_data (line 785) | void set_data(void* new_ptr) { c = reinterpret_cast<float*>(new_pt...
        method to_mat (line 787) | void to_mat(int m, ggml_bf16_t* dst, int ith, int nth) {
    type GemmKernel224Int8 (line 816) | struct GemmKernel224Int8 {
      method name (line 833) | static std::string name() { return "INT8"; }
      method recommended_nth (line 835) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO...
      method split_range_n (line 837) | static std::pair<int, int> split_range_n(int n, int ith, int nth) {
      method config (line 843) | static void config() {
      method load_a (line 861) | static void load_a(dt* a, size_t lda) {
      method load_b (line 871) | static void load_b(dt* b, size_t ldb) {
      method clean_c (line 881) | static void clean_c() {
      method load_c (line 890) | static void load_c(output_t* c, size_t ldc) {
      method store_c (line 902) | static void store_c(output_t* c, size_t ldc) {
      method run_tile (line 914) | static void run_tile() {
      type BufferB (line 926) | struct BufferB {
        method required_size (line 932) | static size_t required_size(int n, int k) { return sizeof(int8_t) ...
        method BufferB (line 934) | BufferB(int n, int k, void* ptr) : n(n), k(k) {
        method from_mat (line 946) | void from_mat(ggml_bf16_t* src, int ith, int nth) {  // CHECK: nth...
      method amx_kernel (line 1010) | static void amx_kernel(int m, int n, int k, int m_begin, int n_begin...
      method avx_kernel (line 1025) | static void avx_kernel(int m, int n, int k, int m_begin, int n_begin...
      method apply_scale (line 1053) | static void apply_scale(int m, int n, int m_begin, int n_begin, floa...
    type GemmKernel224Int4 (line 1073) | struct GemmKernel224Int4 {
      method name (line 1093) | static std::string name() { return "INT4"; }
      method recommended_nth (line 1095) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO...
      method split_range_n (line 1097) | static std::pair<int, int> split_range_n(int n, int ith, int nth) {
      method config (line 1103) | static void config() {
      method __m512i (line 1140) | static __m512i hi_mask() { return *((__m512i*)(&hi_mask_arr[0])); }
      method __m128i (line 1141) | static __m128i hi_mask_128() { return *((__m128i*)(&hi_mask_arr[0])); }
      method __m512i (line 1142) | static __m512i lo_mask() { return *((__m512i*)(&lo_mask_arr[0])); }
      method __m128i (line 1143) | static __m128i lo_mask_128() { return *((__m128i*)(&lo_mask_arr[0])); }
      method __m128i (line 1144) | static __m128i si_mask_128() { return *((__m128i*)(&sign_mask_arr[0]...
      method load_b_hi (line 1146) | static void load_b_hi(dt* b, size_t ldb) {
      method load_b_lo (line 1169) | static void load_b_lo(dt* b, size_t ldb) {
      method load_a (line 1193) | static void load_a(dt* a, size_t lda) {
      method clean_c (line 1203) | static void clean_c() {
      method load_c (line 1212) | static void load_c(output_t* c, size_t ldc) {
      method store_c (line 1224) | static void store_c(output_t* c, size_t ldc) {
      method run_tile (line 1236) | static void run_tile() {
      method avx_kernel (line 1249) | static void avx_kernel(int m, int n, int k, int m_begin, int n_begin...
      method amx_kernel (line 1279) | static void amx_kernel(int m, int n, int k, int m_begin, int n_begin...
      method apply_scale (line 1322) | static void apply_scale(int m, int n, int m_begin, int n_begin, floa...
    type GemmKernel224Int4_1 (line 1364) | struct GemmKernel224Int4_1 {
      method name (line 1381) | static std::string name() { return "INT4_1"; }
      method recommended_nth (line 1383) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO...
      method split_range_n (line 1385) | static std::pair<int, int> split_range_n(int n, int ith, int nth) {
      method config (line 1391) | static void config() {
      method __m512i (line 1428) | static __m512i hi_mask() { return *((__m512i*)(&hi_mask_arr[0])); }
      method __m128i (line 1429) | static __m128i hi_mask_128() { return *((__m128i*)(&hi_mask_arr[0])); }
      method __m512i (line 1430) | static __m512i lo_mask() { return *((__m512i*)(&lo_mask_arr[0])); }
      method __m128i (line 1431) | static __m128i lo_mask_128() { return *((__m128i*)(&lo_mask_arr[0])); }
      method __m128i (line 1432) | static __m128i si_mask_128() { return *((__m128i*)(&sign_mask_arr[0]...
      method load_b_hi (line 1434) | static void load_b_hi(dt* b, size_t ldb) {
      method load_b_lo (line 1457) | static void load_b_lo(dt* b, size_t ldb) {
      method load_a (line 1481) | static void load_a(dt* a, size_t lda) {
      method clean_c (line 1496) | static void clean_c() {
      method load_c (line 1505) | static void load_c(output_t* c, size_t ldc) {
      method store_c (line 1517) | static void store_c(output_t* c, size_t ldc) {
      method run_tile (line 1529) | static void run_tile() {
      method avx_kernel (line 1544) | static void avx_kernel(int m, int n, int k, int m_begin, int n_begin...
      method amx_kernel (line 1573) | static void amx_kernel(int m, int n, int k, int m_begin, int n_begin...
      method apply_scale (line 1617) | static void apply_scale(int m, int n, int m_begin, int n_begin, floa...
    function mat_mul_single (line 1647) | inline void mat_mul_single(int m, int n, int k, int8_t* a, size_t lda,...
    function mat_mul_single (line 1680) | inline void mat_mul_single(int m, int n, int k, ggml_bf16_t* a, size_t...
    function mat_mul_single (line 1724) | void mat_mul_single(int m, int n, int k, QA* a, size_t lda, block_q8_0...
    function mat_mul_single (line 1758) | inline void mat_mul_single(int m, int n, int k, block_q4_K* a, size_t ...
    function mat_mul_single (line 1800) | inline void mat_mul_single(int m, int n, int k, blocks_aligned_q8_0_re...
    function merge_mat (line 1830) | inline void merge_mat(int d0, int d1, float* a, float* b, size_t ld) {
    function merge_mats (line 1845) | inline void merge_mats(int d0, int d1, int cnt, float** data, size_t l...
    type GemmKernel (line 1865) | struct GemmKernel {
    type GemmKernel<uint8_t, TB, float> (line 1871) | struct GemmKernel<uint8_t, TB, float> {
    type GemmKernel<int8_t, TB, float> (line 1876) | struct GemmKernel<int8_t, TB, float> {
    type GemmKernel<block_q4_0, block_q8_0, float> (line 1881) | struct GemmKernel<block_q4_0, block_q8_0, float> {
    type GemmKernel<block_q8_0, block_q8_0, float> (line 1886) | struct GemmKernel<block_q8_0, block_q8_0, float> {
    type GemmKernel<block_q4_K, block_q8_K, float> (line 1891) | struct GemmKernel<block_q4_K, block_q8_K, float> {
    type GemmKernel<ggml_bf16_t, ggml_bf16_t, float> (line 1896) | struct GemmKernel<ggml_bf16_t, ggml_bf16_t, float> {
    function mat_mul (line 1919) | void mat_mul(int m, int n, int k, TA* a, size_t lda, TB* b, size_t ldb...
    function mat_mul (line 1933) | inline void mat_mul(int m, int n, int k, std::shared_ptr<GemmKernel224...
    function vec_mul (line 1990) | inline void vec_mul(int m, int n, int k, std::shared_ptr<GemmKernel224...
    function integer_mat_mul (line 1997) | void integer_mat_mul(int m, int n, int k, typename K::BufferA* ba, typ...
    function vec_mul (line 2022) | inline void vec_mul(int m, int n, int k, std::shared_ptr<GemmKernel224...
    function mat_mul (line 2028) | inline void mat_mul(int m, int n, int k, std::shared_ptr<GemmKernel224...
    function vec_mul (line 2034) | inline void vec_mul(int m, int n, int k, std::shared_ptr<GemmKernel224...
    function mat_mul (line 2040) | inline void mat_mul(int m, int n, int k, std::shared_ptr<GemmKernel224...
    function vec_mul (line 2046) | inline void vec_mul(int m, int n, int k, std::shared_ptr<GemmKernel224...
    function mat_mul (line 2052) | inline void mat_mul(int m, int n, int k, std::shared_ptr<GemmKernel224...
    function mat_mul (line 2058) | inline void mat_mul(int m, int n, int k, blocks_aligned_q8_0_ref aref,...
    type GemmKernel224Int4KGroup (line 2074) | struct GemmKernel224Int4KGroup {
      method name (line 2089) | static std::string name() { return "INT4_KGROUP"; }
      method recommended_nth (line 2090) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO...
      method split_range_n (line 2091) | static std::pair<int, int> split_range_n(int n, int ith, int nth) {
      method config (line 2097) | static void config() {
      method __m512i (line 2123) | static __m512i hi_mask() { return *((__m512i*)(&hi_mask_arr[0])); }
      method __m512i (line 2124) | static __m512i lo_mask() { return *((__m512i*)(&lo_mask_arr[0])); }
      method clean_c (line 2126) | static void clean_c() {
      method load_c (line 2133) | static void load_c(output_t* c, size_t ldc) {
      method store_c (line 2140) | static void store_c(output_t* c, size_t ldc) {
      method load_a (line 2147) | static void load_a(dt* a, size_t lda) {
      method load_b_lo (line 2152) | static void load_b_lo(dt* b, size_t ldb) {
      method load_b_hi (line 2174) | static void load_b_hi(dt* b, size_t ldb) {
      method run_tile (line 2191) | static void run_tile() {
      method avx_kernel (line 2205) | static void avx_kernel(int m, int n, int k, int m_begin, int n_begin...
      method amx_kernel (line 2247) | static void amx_kernel(int m, int n, int k, int m_begin, int n_begin...
      method apply_scale_kgroup (line 2275) | static void apply_scale_kgroup(int m, int n, int m_begin, int n_begi...
    type GemmKernel224Int4_1KGroup (line 2304) | struct GemmKernel224Int4_1KGroup {
      method name (line 2321) | static std::string name() { return "INT4_1K"; }
      method recommended_nth (line 2323) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO...
      method split_range_n (line 2325) | static std::pair<int, int> split_range_n(int n, int ith, int nth) {
      method config (line 2331) | static void config() {
      method __m512i (line 2368) | static __m512i hi_mask() { return *((__m512i*)(&hi_mask_arr[0])); }
      method __m128i (line 2369) | static __m128i hi_mask_128() { return *((__m128i*)(&hi_mask_arr[0])); }
      method __m512i (line 2370) | static __m512i lo_mask() { return *((__m512i*)(&lo_mask_arr[0])); }
      method __m128i (line 2371) | static __m128i lo_mask_128() { return *((__m128i*)(&lo_mask_arr[0])); }
      method __m128i (line 2372) | static __m128i si_mask_128() { return *((__m128i*)(&sign_mask_arr[0]...
      method load_b_hi (line 2374) | static void load_b_hi(dt* b, size_t ldb) {
      method load_b_lo (line 2397) | static void load_b_lo(dt* b, size_t ldb) {
      method load_a (line 2421) | static void load_a(dt* a, size_t lda) {
      method clean_c (line 2436) | static void clean_c() {
      method load_c (line 2445) | static void load_c(output_t* c, size_t ldc) {
      method store_c (line 2457) | static void store_c(output_t* c, size_t ldc) {
      method run_tile (line 2469) | static void run_tile() {
      method avx_kernel (line 2484) | static void avx_kernel(int m, int n, int k, int m_begin, int n_begin...
      method amx_kernel (line 2522) | static void amx_kernel(int m, int n, int k, int m_begin, int n_begin...
      method apply_scale_kgroup (line 2548) | static void apply_scale_kgroup(int m, int n, int m_begin, int n_begi...
    type GemmKernel224Int4_1_LowKGroup (line 2580) | struct GemmKernel224Int4_1_LowKGroup {
      method name (line 2597) | static std::string name() { return "INT4_1K"; }
      method recommended_nth (line 2599) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO...
      method split_range_n (line 2601) | static std::pair<int, int> split_range_n(int n, int ith, int nth) {
      method config (line 2607) | static void config() {
      method __m512i (line 2644) | static __m512i hi_mask() { return *((__m512i*)(&hi_mask_arr[0])); }
      method __m128i (line 2645) | static __m128i hi_mask_128() { return *((__m128i*)(&hi_mask_arr[0])); }
      method __m512i (line 2646) | static __m512i lo_mask() { return *((__m512i*)(&lo_mask_arr[0])); }
      method __m128i (line 2647) | static __m128i lo_mask_128() { return *((__m128i*)(&lo_mask_arr[0])); }
      method __m128i (line 2648) | static __m128i si_mask_128() { return *((__m128i*)(&sign_mask_arr[0]...
      method load_b_hi (line 2650) | static void load_b_hi(dt* b, size_t ldb) {
      method load_b_lo (line 2674) | static void load_b_lo(dt* b, size_t ldb) {
      method load_a (line 2697) | static void load_a(dt* a, size_t lda) {
      method clean_c (line 2712) | static void clean_c() {
      method load_c (line 2721) | static void load_c(output_t* c, size_t ldc) {
      method store_c (line 2733) | static void store_c(output_t* c, size_t ldc) {
      method run_tile (line 2745) | static void run_tile() {
      method avx_kernel (line 2760) | static void avx_kernel(int m, int n, int k, int m_begin, int n_begin...
      method amx_kernel (line 2798) | static void amx_kernel(int m, int n, int k, int m_begin, int n_begin...
      method apply_scale_kgroup (line 2824) | static void apply_scale_kgroup(int m, int n, int m_begin, int n_begi...
    type GemmKernel224Int4SmallKGroup (line 2858) | struct GemmKernel224Int4SmallKGroup {
      method name (line 2872) | static std::string name() { return "K2_INT4_KGROUP"; }
      method recommended_nth (line 2873) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO...
      method split_range_n (line 2874) | static std::pair<int, int> split_range_n(int n, int ith, int nth) {
      method config (line 2879) | static void config() {}
      method __m256i (line 2892) | static __m256i hi_mask() { return *((__m256i*)(&hi_mask_arr[0])); }
      method __m256i (line 2893) | static __m256i lo_mask() { return *((__m256i*)(&lo_mask_arr[0])); }
      method __m256i (line 2894) | static __m256i sign_xor_mask() { return *((__m256i*)(&sign_xor_arr[0...
      method __m512i (line 2901) | static inline __m512i compressed_int4_to_int8_avx512(__m256i b256) {
      method integer_mat_vec_kgroup (line 2912) | static inline void integer_mat_vec_kgroup(int m, int n, int k, int k...
    function vec_mul_kgroup (line 2946) | inline void vec_mul_kgroup(int m, int n, int k, int k_group_size,
    function mat_mul_kgroup (line 2953) | inline void mat_mul_kgroup(int m, int n, int k, int k_group_size,
    function integer_mat_mul_kgroup (line 2962) | void integer_mat_mul_kgroup(int m, int n, int k, int k_group_size, typ...
    function vec_mul_kgroup (line 3001) | inline void vec_mul_kgroup(int m, int n, int k, int k_group_size, std:...
    function mat_mul_kgroup (line 3007) | inline void mat_mul_kgroup(int m, int n, int k, int k_group_size, std:...
    function vec_mul_kgroup (line 3014) | inline void vec_mul_kgroup(int m, int n, int k, int k_group_size,
    function mat_mul_kgroup (line 3022) | inline void mat_mul_kgroup(int m, int n, int k, int k_group_size,
    function vec_mul_kgroup (line 3031) | inline void vec_mul_kgroup(int m, int n, int k, int k_group_size,
    function mat_mul_kgroup (line 3039) | inline void mat_mul_kgroup(int m, int n, int k, int k_group_size,

FILE: kt-kernel/operators/amx/la/amx_quantization.hpp
  type amx (line 11) | namespace amx {
    type blocks_aligned_q4_0_ref (line 13) | struct blocks_aligned_q4_0_ref {
      method blocks_aligned_q4_0_ref (line 20) | blocks_aligned_q4_0_ref offset(size_t blck_cnt) const {
      method expected_data_size (line 27) | static size_t expected_data_size(int64_t k) {
      method blocks_aligned_q4_0_ref (line 34) | static blocks_aligned_q4_0_ref quantize(const float* RESTRICT x, voi...
      method dequantize (line 78) | void dequantize(float* y, int64_t k) {
    type blocks_aligned_q8_0_ref (line 98) | struct blocks_aligned_q8_0_ref {
      method blocks_aligned_q8_0_ref (line 105) | blocks_aligned_q8_0_ref offset(size_t blck_cnt) const {
      method expected_data_size (line 112) | static size_t expected_data_size(int64_t k) {
      method blocks_aligned_q8_0_ref (line 118) | static blocks_aligned_q8_0_ref quantize(const float* RESTRICT x, voi...
      method dequantize (line 148) | void dequantize(float* y, int64_t k) {
    type Dequantizer (line 168) | struct Dequantizer {}
    function __m256i (line 178) | inline __m256i dequant4x32(const uint8_t* qs) {
    function __m256i (line 183) | inline __m256i unaligned_copy8x32(const int8_t* qs) { return _mm256_lo...
    function __m512i (line 185) | inline __m512i copy8x64(const int8_t* qs) { return _mm512_load_si512((...
    function __m256i (line 187) | inline __m256i lo4bit(const uint8_t* qs) {
    function __m256i (line 190) | inline __m256i hi4bit(const uint8_t* qs) {
    function __m128i (line 194) | inline __m128i make_q4K_scale_and_min(const uint8_t* scales8) {
    function __m256i (line 210) | inline __m256i merge_q8K_bsum(block_q8_K* b) {
    function __m512i (line 214) | inline __m512i _mm512_dpbusd_epi32_compat(__m512i src, __m512i a, __m5...
    function __m512i (line 237) | inline __m512i _mm512_dpbssd_epi32(__m512i src, __m512i a, __m512i b) {

FILE: kt-kernel/operators/amx/la/amx_raw_buffers.hpp
  type amx (line 34) | namespace amx {
    type BufferABF16Impl (line 50) | struct BufferABF16Impl {
      method required_size (line 57) | static size_t required_size(int max_m, int k) { return sizeof(ggml_b...
      method BufferABF16Impl (line 59) | BufferABF16Impl(int max_m, int k, void* ptr) : max_m(max_m), k(k) {
      method set_data (line 66) | void set_data(void* new_ptr) { a = reinterpret_cast<ggml_bf16_t*>(ne...
      method from_mat (line 68) | void from_mat(int m, ggml_bf16_t* src, int ith, int nth) {
      method ggml_bf16_t (line 87) | ggml_bf16_t* get_submat(int m, int k, int m_begin, int k_begin) {
    type BufferBBF16Impl (line 117) | struct BufferBBF16Impl {
      method required_size (line 126) | static size_t required_size(int n, int k) { return sizeof(ggml_bf16_...
      method BufferBBF16Impl (line 128) | BufferBBF16Impl(int n, int k, void* ptr) : n(n), k(k) {
      method set_data (line 134) | void set_data(void* new_ptr) { b = reinterpret_cast<ggml_bf16_t*>(ne...
      method from_mat (line 136) | void from_mat(ggml_bf16_t* src, int ith, int nth) {
      method ggml_bf16_t (line 158) | ggml_bf16_t* get_submat(int n, int k, int n_begin, int k_begin) {
    type BufferBFP8Impl (line 178) | struct BufferBFP8Impl {
      method required_size (line 192) | static size_t required_size(int n, int k, int k_group_size) {
      method BufferBFP8Impl (line 201) | BufferBFP8Impl(int n, int k, int k_group_size, void* ptr) : n(n), k(...
      method set_data (line 203) | void set_data(void* ptr) {
      method from_mat (line 216) | void from_mat(const uint8_t* b_src, const float* d_src, int ith, int...
      method to_mat (line 299) | void to_mat(uint8_t* b_dst, float* d_dst, int ith, int nth) const {
    type BufferCFP32Impl (line 369) | struct BufferCFP32Impl {
      method required_size (line 386) | static size_t required_size(int max_m, int n) { return sizeof(float)...
      method BufferCFP32Impl (line 388) | BufferCFP32Impl(int max_m, int n, void* ptr) : max_m(max_m), n(n) {
      method set_data (line 395) | void set_data(void* new_ptr) { c = reinterpret_cast<float*>(new_ptr); }
      method to_mat (line 397) | void to_mat(int m, ggml_bf16_t* dst, int ith, int nth) {
    type BufferCFP32ReduceImpl (line 426) | struct BufferCFP32ReduceImpl {
      method required_size (line 435) | static size_t required_size(int max_m, int n) { return sizeof(float)...
      method BufferCFP32ReduceImpl (line 437) | BufferCFP32ReduceImpl(int max_m, int n, void* ptr) : max_m(max_m), n...
      method set_data (line 443) | void set_data(void* ptr) {
      method to_mat (line 449) | void to_mat(int m, ggml_bf16_t* dst, int ith, int nth) {
    type BufferBFP8PerChannelImpl (line 503) | struct BufferBFP8PerChannelImpl {
      method required_size (line 520) | static size_t required_size(int n, int k) { return sizeof(uint8_t) *...
      method BufferBFP8PerChannelImpl (line 525) | BufferBFP8PerChannelImpl(int n, int k, void* ptr) : n(n), k(k) { set...
      method set_data (line 527) | void set_data(void* ptr) {
      method from_mat (line 541) | void from_mat(const uint8_t* b_src, const float* d_src, int ith, int...

FILE: kt-kernel/operators/amx/la/amx_raw_kernels.hpp
  type amx (line 15) | namespace amx {
    type GemmKernel224BF16 (line 17) | struct GemmKernel224BF16 {
      method name (line 32) | static std::string name() { return "BF16"; }
      method recommended_nth (line 34) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO...
      method split_range_n (line 36) | static std::pair<int, int> split_range_n(int n, int ith, int nth) {
      method config (line 42) | static void config() {
      method load_a (line 60) | static void load_a(dt* a, size_t lda) {
      method load_b (line 70) | static void load_b(dt* b, size_t ldb) {
      method clean_c (line 80) | static void clean_c() {
      method load_c (line 89) | static void load_c(output_t* c, size_t ldc) {
      method store_c (line 101) | static void store_c(output_t* c, size_t ldc) {
      method run_tile (line 113) | static void run_tile() {
      method avx_kernel (line 126) | static void avx_kernel(int m, int n, int k, int m_begin, int n_begin...
      method avx_kernel_4 (line 155) | static void avx_kernel_4(int m, int n, int k, int m_begin, int n_beg...
      method amx_kernel (line 239) | static void amx_kernel(int m, int n, int k, int m_begin, int n_begin...
    type GemmKernel224FP8 (line 258) | struct GemmKernel224FP8 {
      method name (line 276) | static std::string name() { return "FP8"; }
      method recommended_nth (line 278) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO...
      method split_range_n (line 280) | static std::pair<int, int> split_range_n(int n, int ith, int nth) {
      method config (line 286) | static void config() {}
      method __m512i (line 315) | static inline __m512i bf16_hi_0_mask() { return _mm512_load_si512((_...
      method __m512i (line 316) | static inline __m512i bf16_hi_1_mask() { return _mm512_load_si512((_...
      method __m512i (line 317) | static inline __m512i bf16_lo_0_mask() { return _mm512_load_si512((_...
      method __m512i (line 318) | static inline __m512i bf16_lo_1_mask() { return _mm512_load_si512((_...
      method __m512i (line 319) | static inline __m512i sign_mask() { return _mm512_set1_epi8(0x80); }
      method fp8x64_to_bf16x64 (line 324) | static inline std::pair<__m512i, __m512i> fp8x64_to_bf16x64(__m512i ...
      method avx_kernel (line 336) | static void avx_kernel(int m, int n, int k, int m_begin, int n_begin...
      method avx_kernel_4 (line 395) | static void avx_kernel_4(int m, int n, int k, int m_begin, int n_beg...
      method apply_scale_kgroup (line 504) | static void apply_scale_kgroup(int m, int n, int m_begin, int n_begi...
    function float_mat_vec_kgroup (line 529) | void float_mat_vec_kgroup(int m, int n, int k, int k_group_size, typen...
    function float_mat_vec (line 573) | void float_mat_vec(int m, int n, int k, typename K::BufferA* ba, typen...
    function mat_mul (line 595) | inline void mat_mul(int m, int n, int k, std::shared_ptr<GemmKernel224...
    function vec_mul (line 601) | inline void vec_mul(int m, int n, int k, std::shared_ptr<GemmKernel224...
    function vec_mul_kgroup (line 607) | inline void vec_mul_kgroup(int m, int n, int k, int k_group_size, std:...
    function mat_mul_kgroup (line 613) | inline void mat_mul_kgroup(int m, int n, int k, int k_group_size, std:...
    type GemmKernel224FP8PerChannel (line 630) | struct GemmKernel224FP8PerChannel {
      method name (line 648) | static std::string name() { return "FP8PerChannel"; }
      method recommended_nth (line 650) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO...
      method split_range_n (line 652) | static std::pair<int, int> split_range_n(int n, int ith, int nth) {
      method config (line 658) | static void config() {}
      method fp8x64_to_bf16x64 (line 665) | static inline std::pair<__m512i, __m512i> fp8x64_to_bf16x64(__m512i ...
      method apply_scale_perchannel (line 682) | static void apply_scale_perchannel(int m, [[maybe_unused]] int n, in...
      method avx_kernel_4 (line 699) | static void avx_kernel_4(int m, int n, int k, int m_begin, int n_beg...
    function float_mat_vec_perchannel (line 816) | void float_mat_vec_perchannel(int m, int n, int k, typename K::BufferA...
    function vec_mul_perchannel (line 838) | inline void vec_mul_perchannel(int m, int n, int k, std::shared_ptr<Ge...

FILE: kt-kernel/operators/amx/la/amx_utils.hpp
  type amx (line 10) | namespace amx {
    function debug_tile (line 13) | inline void debug_tile(int t) {
    function debug_tile_int32 (line 26) | inline void debug_tile_int32(int t) {
    function debug_tiles (line 39) | inline void debug_tiles(int to = 8) {
    function debug_tiles_int32 (line 45) | inline void debug_tiles_int32(int to = 8) {
    function debug_tiles_224 (line 51) | inline void debug_tiles_224() {
    function debug_m512 (line 60) | inline void debug_m512(__m512 x) {
    function debug_m512i (line 69) | inline void debug_m512i(__m512i x) {
    function debug_m128i (line 78) | inline void debug_m128i(__m128i x) {
    function transpose_8x8_32bit (line 91) | inline void transpose_8x8_32bit(__m256i* v, __m256i* v1) {
    function transpose_8x8_32bit (line 123) | inline void transpose_8x8_32bit(__m256i* v) {
    function transpose_16x4_32bit (line 137) | inline void transpose_16x4_32bit(__m512i* r, __m512i* d) {
    function transpose_16x16_32bit (line 157) | inline void transpose_16x16_32bit(__m512i* v) {
    function transpose_16x8_32bit (line 228) | inline void transpose_16x8_32bit(__m256i* v) {
    function transpose_16x16_32bit (line 244) | inline void transpose_16x16_32bit(__m512i* v, size_t stride) {

FILE: kt-kernel/operators/amx/la/pack.hpp
  class Packed2DLayout (line 14) | class Packed2DLayout {
    type Dim (line 18) | struct Dim {
    method Packed2DLayout (line 24) | explicit Packed2DLayout(std::vector<Dim> dims) : dims_(std::move(dims)) {
    method index_t (line 53) | index_t dims() const { return static_cast<index_t>(dims_.size()); }
    method index_t (line 54) | index_t rows() const { return rows_; }
    method index_t (line 55) | index_t cols() const { return cols_; }
    method index_t (line 56) | index_t numel() const { return numel_; }
    method hd_to_rc (line 62) | std::pair<index_t, index_t> hd_to_rc(const std::vector<index_t>& hd_id...
    method rc_to_hd (line 77) | std::vector<index_t> rc_to_hd(index_t row, index_t col) const {
    method index_t (line 96) | index_t rc_to_offset(index_t row, index_t col, index_t ld = 0) const {
    method offset_to_rc (line 102) | std::pair<index_t, index_t> offset_to_rc(index_t offset, index_t ld = ...
    method index_t (line 111) | index_t hd_to_offset(const std::vector<index_t>& hd_idx, index_t ld = ...
    method offset_to_hd (line 116) | std::vector<index_t> offset_to_hd(index_t offset, index_t ld = 0) const {
    method decompose_row (line 123) | std::vector<index_t> decompose_row(index_t row) const {
    method decompose_col (line 135) | std::vector<index_t> decompose_col(index_t col) const {
    method index_t (line 147) | index_t compose_row(const std::vector<index_t>& digits) const {
    method index_t (line 158) | index_t compose_col(const std::vector<index_t>& digits) const {
    method check_hd_index (line 170) | void check_hd_index(const std::vector<index_t>& hd_idx) const {
    method err_dim (line 175) | static std::string err_dim(index_t i, index_t v, index_t sz) {
  function main (line 189) | int main() {

FILE: kt-kernel/operators/amx/la/utils.hpp
  function avx512_copy_32xbf16 (line 8) | static inline void avx512_copy_32xbf16(__m512i* src, __m512i* dst) {
  function avx512_32xfp32_to_32xbf16 (line 14) | static inline void avx512_32xfp32_to_32xbf16(__m512* src0, __m512* src1,...
  function avx512_32xbf16_to_32xfp32 (line 48) | static inline void avx512_32xbf16_to_32xfp32(__m512i* src, __m512* dst0,...
  function __m512 (line 55) | static inline __m512 vector_abs_max(__m512 a, __m512 b) {

FILE: kt-kernel/operators/amx/moe.hpp
  class AMX_MOE_TP (line 20) | class AMX_MOE_TP : public AMX_MOE_BASE<T, AMX_MOE_TP<T>> {
    method write_weights (line 41) | inline void write_weights(std::filesystem::path prefix, std::string ma...
    method read_weights (line 65) | inline void read_weights(std::filesystem::path prefix, std::string mat...
    method load_check (line 92) | inline void load_check() {
    method verify_load_right (line 97) | void verify_load_right() {
    method AMX_MOE_TP (line 131) | AMX_MOE_TP() = default;
    method AMX_MOE_TP (line 133) | AMX_MOE_TP(GeneralMOEConfig config, int tp_part_idx = 0) : Base(config...
    method derived_init (line 137) | void derived_init() {
    method buffer_a_required_size_impl (line 163) | size_t buffer_a_required_size_impl(size_t m, size_t k) const { return ...
    method buffer_b_required_size_impl (line 164) | size_t buffer_b_required_size_impl(size_t n, size_t k) const { return ...
    method buffer_c_required_size_impl (line 165) | size_t buffer_c_required_size_impl(size_t m, size_t n) const { return ...
    method make_buffer_a_impl (line 167) | std::shared_ptr<typename T::BufferA> make_buffer_a_impl(size_t m, size...
    method make_buffer_b_impl (line 170) | std::shared_ptr<typename T::BufferB> make_buffer_b_impl(size_t n, size...
    method make_buffer_c_impl (line 173) | std::shared_ptr<typename T::BufferC> make_buffer_c_impl(size_t m, size...
    method do_gate_up_gemm (line 181) | void do_gate_up_gemm(bool do_up, int expert_idx, int ith, int nth, int...
    method do_down_gemm (line 194) | void do_down_gemm(int expert_idx, int ith, int nth, int qlen) {
    method load_weights (line 206) | void load_weights() {
  class TP_MOE<AMX_MOE_TP<K>> (line 358) | class TP_MOE<AMX_MOE_TP<K>> : public TP_MOE<AMX_MOE_BASE<K, AMX_MOE_TP<K...
    method load_weights (line 363) | void load_weights() override {

FILE: kt-kernel/operators/amx/moe_base.hpp
  class AMX_MOE_BASE (line 39) | class AMX_MOE_BASE {
    method AMX_MOE_BASE (line 82) | AMX_MOE_BASE(GeneralMOEConfig config, int tp_part_idx_) : tp_part_idx(...
    method init (line 87) | void init() {
    method warm_up (line 152) | void warm_up() {
    method forward (line 165) | void forward(int qlen, int k, const int64_t* expert_ids, const float* ...
    method load_weights (line 174) | void load_weights(Args&&... args) {
    method write_weights_to_buffer (line 179) | void write_weights_to_buffer(Args&&... args) const {
    method forward_prefill (line 183) | void forward_prefill(int qlen, int k, const int64_t* expert_ids, const...
    method forward_decode (line 441) | void forward_decode(int k, const int64_t* expert_ids, const float* wei...
    method Derived (line 643) | Derived* derived() { return static_cast<Derived*>(this); }
    method Derived (line 644) | const Derived* derived_const() const { return static_cast<const Derive...
    method derived_init (line 651) | void derived_init() {
    method buffer_a_required_size (line 661) | size_t buffer_a_required_size(size_t m, size_t k) const { return deriv...
    method buffer_b_required_size (line 662) | size_t buffer_b_required_size(size_t n, size_t k) const { return deriv...
    method buffer_c_required_size (line 663) | size_t buffer_c_required_size(size_t m, size_t n) const { return deriv...
    method make_buffer_a (line 665) | std::shared_ptr<typename T::BufferA> make_buffer_a(size_t m, size_t k,...
    method make_buffer_b (line 668) | std::shared_ptr<typename T::BufferB> make_buffer_b(size_t n, size_t k,...
    method make_buffer_c (line 671) | std::shared_ptr<typename T::BufferC> make_buffer_c(size_t m, size_t n,...
    method apply_activation (line 675) | void apply_activation(int activated_expert, int nth, int qlen) {
  class TP_MOE<AMX_MOE_BASE<T, Derived>> (line 714) | class TP_MOE<AMX_MOE_BASE<T, Derived>> : public TP_MOE_Common<AMX_MOE_BA...
    method load_weights (line 719) | void load_weights() override { throw std::runtime_error("Not Implement...
    method write_weight_scale_to_buffer (line 721) | void write_weight_scale_to_buffer(int gpu_tp_count, int gpu_experts_num,
    method merge_results (line 729) | void merge_results(int qlen, void* output, bool incremental) override {
    method merge_results (line 773) | void merge_results(int qlen, void* output) override { merge_results(ql...

FILE: kt-kernel/operators/amx/test/amx-bkgroup-test.cpp
  type TestKernelKGroupB (line 12) | struct TestKernelKGroupB {
    method split_range_n (line 21) | static std::pair<int, int> split_range_n(int n, int ith, int nth) {
  function test_buffer_bkgroup_basic (line 29) | void test_buffer_bkgroup_basic() {
  function test_buffer_bkgroup_correctness (line 109) | void test_buffer_bkgroup_correctness() {
  function test_buffer_bkgroup_comparison (line 180) | void test_buffer_bkgroup_comparison() {
  function main (line 237) | int main(int argc, char** argv) {

FILE: kt-kernel/operators/amx/test/amx-c-reduce-test.cpp
  type TestKernelC (line 13) | struct TestKernelC {
    method split_range_n (line 22) | static std::pair<int, int> split_range_n(int n, int ith, int nth) {
  function test_buffer_c_reduce_basic (line 30) | void test_buffer_c_reduce_basic() {
  function test_buffer_c_reduce_comparison (line 215) | void test_buffer_c_reduce_comparison() {
  function test_buffer_c_reduce_performance (line 280) | void test_buffer_c_reduce_performance() {
  function main (line 324) | int main(int argc, char** argv) {

FILE: kt-kernel/operators/amx/test/amx-kgroup-test.cpp
  type TestKernelKGroup (line 12) | struct TestKernelKGroup {
    method split_range_n (line 21) | static std::pair<int, int> split_range_n(int n, int ith, int nth) {
  function test_buffer_kgroup_basic (line 29) | void test_buffer_kgroup_basic() {
  function test_buffer_kgroup_correctness (line 109) | void test_buffer_kgroup_correctness() {
  function test_buffer_kgroup_comparison (line 231) | void test_buffer_kgroup_comparison() {
  function main (line 288) | int main(int argc, char** argv) {

FILE: kt-kernel/operators/amx/test/amx-test.cpp
  function q_latency_test_bf16 (line 13) | void q_latency_test_bf16(int m, int n, int k, ggml_bf16_t* qa, ggml_bf16...
  function group_q_latency_test_bf16 (line 65) | void group_q_latency_test_bf16(int n_max, int k_max) {
  function q_latency_test_int8 (line 98) | void q_latency_test_int8(int m, int n, int k, ggml_bf16_t* qa, ggml_bf16...
  function group_q_latency_test_int8 (line 150) | void group_q_latency_test_int8(int n_max, int k_max) {
  function correction_test_int4 (line 182) | void correction_test_int4(int m, int n, int k) {
  function correction_test_int4_1 (line 288) | void correction_test_int4_1(int m, int n, int k) {
  function q_latency_test_int4 (line 394) | void q_latency_test_int4(int m, int n, int k, ggml_bf16_t* qa, ggml_bf16...
  function group_q_latency_test_int4 (line 446) | void group_q_latency_test_int4(int n_max, int k_max) {
  function q_latency_test_int4_1 (line 479) | void q_latency_test_int4_1(int m, int n, int k, ggml_bf16_t* qa, ggml_bf...
  function group_q_latency_test_int4_1 (line 534) | void group_q_latency_test_int4_1(int n_max, int k_max) {
  function main (line 567) | int main() {

FILE: kt-kernel/operators/amx/test/analyze-error.cpp
  function analyze_error_patterns (line 9) | void analyze_error_patterns() {
  function main (line 244) | int main() {

FILE: kt-kernel/operators/amx/test/avx-test.cpp
  function generate_data (line 15) | void generate_data(uint8_t* data, size_t size) {
  function dpbusd_test (line 32) | void dpbusd_test(const uint8_t* data_a, const uint8_t* data_b, int32_t* ...
  function main (line 49) | int main() {

FILE: kt-kernel/operators/amx/test/debug-kgroup-details.cpp
  function debug_kgroup_details (line 8) | void debug_kgroup_details() {
  function main (line 195) | int main() {

FILE: kt-kernel/operators/amx/test/debug-kgroup.cpp
  function debug_simple_multiplication (line 12) | void debug_simple_multiplication() {
  function debug_pattern_multiplication (line 100) | void debug_pattern_multiplication() {
  function compare_with_regular_int4 (line 179) | void compare_with_regular_int4() {
  function main (line 278) | int main() {

FILE: kt-kernel/operators/amx/test/debug-specific-dims.cpp
  function debug_specific_dimensions (line 8) | void debug_specific_dimensions() {
  function main (line 201) | int main() {

FILE: kt-kernel/operators/amx/test/mat-test.hpp
  type DotProductImpl (line 16) | struct DotProductImpl {
  type DotProductImpl<uint8_t> (line 25) | struct DotProductImpl<uint8_t> {
  type DotProductImpl<int8_t> (line 29) | struct DotProductImpl<int8_t> {
  type DotProductImpl<uint32_t> (line 33) | struct DotProductImpl<uint32_t> {
  type DotProductImpl<int32_t> (line 37) | struct DotProductImpl<int32_t> {
  type DotProductImpl<float> (line 42) | struct DotProductImpl<float> {
  type Layout (line 46) | enum class Layout {
  type Mat (line 53) | struct Mat {
    method size (line 55) | size_t size() { return rows * cols; }
    method Mat (line 65) | Mat() {}
    method Mat (line 67) | Mat(int rows, int cols, Layout layout) : rows(rows), cols(cols), layou...
    method sub_mat (line 86) | Mat<T> sub_mat(int r, int c) {
    method dealloc (line 98) | void dealloc() {
    method row_major_increase (line 105) | void row_major_increase() {
    method dis_to_00 (line 114) | void dis_to_00() {
    method random (line 122) | void random(std::mt19937& gen) {
    method stride (line 143) | size_t stride() { return stride_in_bytes; }
    method line_element_count (line 145) | int line_element_count() {
    method T (line 157) | T& at(int r, int c) {
    method print (line 173) | void print() {
    method print_all (line 206) | void print_all() {
    method mul_check (line 223) | Mat<DotProductType<T>> mul_check(Mat<T>& b) {
    method cmp (line 237) | bool cmp(Mat<T>& b) {
    method quant (line 292) | void quant(ggml_type to) {
    method Block (line 390) | Block* quant_data() {
    method dequant (line 394) | void dequant() {
  function init (line 478) | inline void init() {

FILE: kt-kernel/operators/amx/test/mmq-test.cpp
  function balance211 (line 65) | void balance211(T n, T nth, T ith, T& n_start, T& n_end) {
  function parallel_for (line 89) | inline void parallel_for(int nth, int ith, int n, const func_t& f) {
  type Unroll (line 99) | struct Unroll {
    method ALWAYS_INLINE (line 101) | ALWAYS_INLINE void operator()(const Func& f, Args... args) const {
  type Unroll<1> (line 108) | struct Unroll<1> {
    method ALWAYS_INLINE (line 110) | ALWAYS_INLINE void operator()(const Func& f, Args... args) const {
  type PackedTypes (line 117) | struct PackedTypes {}
  type PackedTypes<block_q4_0> (line 119) | struct PackedTypes<block_q4_0> {
  type PackedTypes<block_q4_1> (line 123) | struct PackedTypes<block_q4_1> {
  type PackedTypes<block_q8_0> (line 127) | struct PackedTypes<block_q8_0> {
  type do_compensate (line 134) | struct do_compensate : std::integral_constant<bool, std::is_same<T, bloc...
  type do_unpack (line 137) | struct do_unpack
  type is_type_qkk (line 141) | struct is_type_qkk
  type tile_config_t (line 225) | struct tile_config_t {
  function ggml_tile_config_init (line 272) | void ggml_tile_config_init(void) {
  function get_tile_size (line 297) | int get_tile_size() {
  function get_row_size (line 312) | int get_row_size(int K) {
  function FP16_TO_FP32 (line 328) | inline float FP16_TO_FP32(ggml_half val) {
  function __m512 (line 334) | inline __m512 FP16_TO_FP32_VEC(ggml_half val) {
  function _mm512_reduce_max_ps (line 340) | inline float _mm512_reduce_max_ps(const __m512 x) {
  function transpose_8x8_32bit (line 358) | inline void transpose_8x8_32bit(__m256i* v, __m256i* v1) {
  function transpose_16x4_32bit (line 391) | inline void transpose_16x4_32bit(__m512i* r, __m512i* d) {
  function transpose_16x16_32bit (line 412) | inline void transpose_16x16_32bit(__m512i* v) {
  function quantize_row_q8_K_vnni (line 483) | void quantize_row_q8_K_vnni(const float* RESTRICT x, void* RESTRICT vy, ...
  function unpack_A (line 574) | void unpack_A(int8_t* RESTRICT tile, const block_q8_0* RESTRICT A, int l...
  function unpack_A (line 582) | void unpack_A(int8_t* RESTRICT tile, const block_q8_1* RESTRICT A, int l...
  function unpack_A (line 591) | void unpack_A(int8_t* RESTRICT tile, const block_q8_K* RESTRICT A, int l...
  function __m256i (line 612) | inline __m256i bytes_from_nibbles_32(const uint8_t* rsi) {
  function __m512i (line 620) | inline __m512i bytes_from_nibbles_64(const uint8_t* rsi) {
  function __m512i (line 629) | inline __m512i bytes_from_nibbles_64(const uint8_t* qs, const uint8_t* q...
  function bytes_from_nibbles_128 (line 650) | inline void bytes_from_nibbles_128(__m512i& r0, __m512i& r1, const uint8...
  function __m512i (line 672) | inline __m512i packNibbles(__m512i r0, __m512i r1) { return _mm512_or_si...
  function pack_qs (line 675) | inline void pack_qs(void* RESTRICT packed_B, const TB* RESTRICT B, int K...
  function pack_B (line 846) | void pack_B(void* RESTRICT packed_B, const block_q4_0* RESTRICT B, int K...
  function pack_B (line 854) | void pack_B(void* RESTRICT packed_B, const block_q4_1* RESTRICT B, int K...
  function s8s8_compensation (line 864) | inline void s8s8_compensation(void* RESTRICT packed_B) {
  function pack_B (line 879) | void pack_B(void* RESTRICT packed_B, const block_q8_0* RESTRICT B, int K...
  function unpack_mins_and_scales (line 889) | inline void unpack_mins_and_scales(const uint8_t* scales, uint32_t* utmp) {
  function pack_B (line 908) | void pack_B(void* RESTRICT packed_B, const block_q4_K* RESTRICT B, int K...
  function pack_B (line 939) | void pack_B(void* RESTRICT packed_B, const block_q5_K* RESTRICT B, int K...
  function pack_B (line 968) | void pack_B(void* RESTRICT packed_B, const block_q6_K* RESTRICT B, int K...
  function pack_B (line 986) | void pack_B(void* RESTRICT packed_B, const block_iq4_xs* RESTRICT B, int...
  function unpack_B (line 1039) | void unpack_B(int8_t* RESTRICT tile, const void* RESTRICT packed_B, int ...
  type acc_C (line 1141) | struct acc_C {}
  type acc_C<block_q8_0, block_q4_0, is_acc> (line 1144) | struct acc_C<block_q8_0, block_q4_0, is_acc> {
    method apply (line 1145) | static void apply(float* RESTRICT C, int ldc, const int32_t* RESTRICT ...
  type acc_C<block_q8_1, block_q4_1, is_acc> (line 1167) | struct acc_C<block_q8_1, block_q4_1, is_acc> {
    method apply (line 1168) | static void apply(float* RESTRICT C, int ldc, const int32_t* RESTRICT ...
  type acc_C<block_q8_0, block_q8_0, is_acc> (line 1194) | struct acc_C<block_q8_0, block_q8_0, is_acc> {
    method apply (line 1195) | static void apply(float* RESTRICT C, int ldc, const int32_t* RESTRICT ...
  type acc_C<block_q8_K, block_q4_K, is_acc> (line 1217) | struct acc_C<block_q8_K, block_q4_K, is_acc> {
    method apply (line 1218) | static void apply(float* RESTRICT C, int ldc, const int32_t* RESTRICT ...
  type acc_C<block_q8_K, block_q5_K, is_acc> (line 1260) | struct acc_C<block_q8_K, block_q5_K, is_acc> {
    method apply (line 1261) | static void apply(float* RESTRICT C, int ldc, const int32_t* RESTRICT ...
  type acc_C<block_q8_K, block_q6_K, is_acc> (line 1304) | struct acc_C<block_q8_K, block_q6_K, is_acc> {
    method apply (line 1305) | static void apply(float* RESTRICT C, int ldc, const int32_t* RESTRICT ...
  type acc_C<block_q8_K, block_iq4_xs, is_acc> (line 1332) | struct acc_C<block_q8_K, block_iq4_xs, is_acc> {
    method apply (line 1333) | static void apply(float* RESTRICT C, int ldc, const int32_t* RESTRICT ...
  function scale_C (line 1379) | inline void scale_C(const int32_t* RESTRICT tile, int32_t* RESTRICT sumi...
  type tinygemm_kernel_avx (line 1397) | struct tinygemm_kernel_avx {
    method apply (line 1398) | static void apply(int K, const TA* RESTRICT A, const TB* RESTRICT B, T...
  type tinygemm_kernel_avx<float, ggml_fp16_t, float, BLOCK_M, BLOCK_N, BLOCK_K> (line 1408) | struct tinygemm_kernel_avx<float, ggml_fp16_t, float, BLOCK_M, BLOCK_N, ...
    method apply (line 1409) | static void apply(int K, const float* RESTRICT A, const ggml_fp16_t* R...
  function convert_B_packed_format (line 1458) | void convert_B_packed_format(void* RESTRICT packed_B, const TB* RESTRICT...
  type tinygemm_kernel_vnni (line 1475) | struct tinygemm_kernel_vnni {}
  type tinygemm_kernel_vnni<block_q8_0, block_q4_0, float, BLOCK_M, BLOCK_N, BLOCK_K> (line 1478) | struct tinygemm_kernel_vnni<block_q8_0, block_q4_0, float, BLOCK_M, BLOC...
    method apply (line 1479) | static void apply(int KB, const void* RESTRICT _A, const void* RESTRIC...
  type tinygemm_kernel_vnni<block_q8_1, block_q4_1, float, 1, BLOCK_N, BLOCK_K> (line 1545) | struct tinygemm_kernel_vnni<block_q8_1, block_q4_1, float, 1, BLOCK_N, B...
    method apply (line 1546) | static void apply(int KB, const void* RESTRICT _A, const void* RESTRIC...
  type tinygemm_kernel_vnni<block_q8_0, block_q8_0, float, BLOCK_M, BLOCK_N, BLOCK_K> (line 1606) | struct tinygemm_kernel_vnni<block_q8_0, block_q8_0, float, BLOCK_M, BLOC...
    method apply (line 1607) | static void apply(int KB, const void* RESTRICT _A, const void* RESTRIC...
  type tinygemm_kernel_vnni<block_q8_K, block_q4_K, float, BLOCK_M, BLOCK_N, BLOCK_K> (line 1672) | struct tinygemm_kernel_vnni<block_q8_K, block_q4_K, float, BLOCK_M, BLOC...
    method apply (line 1673) | static void apply(int KB, const void* RESTRICT _A, const void* RESTRIC...
  type tinygemm_kernel_vnni<block_q8_K, block_q5_K, float, BLOCK_M, BLOCK_N, BLOCK_K> (line 1768) | struct tinygemm_kernel_vnni<block_q8_K, block_q5_K, float, BLOCK_M, BLOC...
    method apply (line 1769) | static void apply(int KB, const void* RESTRICT _A, const void* RESTRIC...
  type tinygemm_kernel_vnni<block_q8_K, block_q6_K, float, BLOCK_M, BLOCK_N, BLOCK_K> (line 1870) | struct tinygemm_kernel_vnni<block_q8_K, block_q6_K, float, BLOCK_M, BLOC...
    method apply (line 1871) | static void apply(int KB, const void* RESTRICT _A, const void* RESTRIC...
  type tinygemm_kernel_vnni<block_q8_K, block_iq4_xs, float, BLOCK_M, BLOCK_N, BLOCK_K> (line 1976) | struct tinygemm_kernel_vnni<block_q8_K, block_iq4_xs, float, BLOCK_M, BL...
    method apply (line 1977) | static void apply(int KB, const void* RESTRICT _A, const void* RESTRIC...
  function tinygemm_kernel_amx (line 2074) | void tinygemm_kernel_amx(int M, int N, int KB, const void* RESTRICT _A, ...
  function tinygemm_kernel_amx (line 2267) | void tinygemm_kernel_amx(int M, int N, int KB, const void* RESTRICT _A, ...
  function ggml_amx_init (line 2363) | bool ggml_amx_init() {
  function ggml_compute_forward_mul_mat_use_amx (line 2375) | bool ggml_compute_forward_mul_mat_use_amx(struct ggml_tensor* dst) {
  function ggml_mul_mat_amx (line 2423) | void ggml_mul_mat_amx(struct ggml_tensor* dst, int nth, int ith, void* w...
  function ggml_amx_init (line 2596) | bool ggml_amx_init() {
  function ggml_compute_forward_mul_mat_use_amx (line 2601) | bool ggml_compute_forward_mul_mat_use_amx(struct ggml_tensor* dst) {
  function ggml_mul_mat_amx (line 2607) | void ggml_mul_mat_amx(struct ggml_tensor* dst, int nth, int ith, void* w...
  function test_gemm (line 2618) | void test_gemm() {
  function main (line 2640) | int main() {

FILE: kt-kernel/operators/amx/test/mmq.cpp
  function T (line 60) | inline T div_up(T x, T y) {
  function balance211 (line 65) | void balance211(T n, T nth, T ith, T& n_start, T& n_end) {
  function parallel_for (line 89) | inline void parallel_for(int nth, int ith, int n, const func_t& f) {
  type Unroll (line 99) | struct Unroll {
    method ALWAYS_INLINE (line 101) | ALWAYS_INLINE void operator()(const Func& f, Args... args) const {
  type Unroll<1> (line 108) | struct Unroll<1> {
    method ALWAYS_INLINE (line 110) | ALWAYS_INLINE void operator()(const Func& f, Args... args) const {
  type PackedTypes (line 117) | struct PackedTypes {}
  type PackedTypes<block_q4_0> (line 119) | struct PackedTypes<block_q4_0> {
  type PackedTypes<block_q4_1> (line 123) | struct PackedTypes<block_q4_1> {
  type PackedTypes<block_q8_0> (line 127) | struct PackedTypes<block_q8_0> {
  type do_compensate (line 134) | struct do_compensate : std::integral_constant<bool, std::is_same<T, bloc...
  type do_unpack (line 137) | struct do_unpack
  type is_type_qkk (line 141) | struct is_type_qkk
  type tile_config_t (line 225) | struct tile_config_t {
  function ggml_tile_config_init (line 272) | void ggml_tile_config_init(void) {
  function get_tile_size (line 297) | int get_tile_size() {
  function get_row_size (line 312) | int get_row_size(int K) {
  function FP16_TO_FP32 (line 328) | inline float FP16_TO_FP32(ggml_half val) {
  function __m512 (line 334) | inline __m512 FP16_TO_FP32_VEC(ggml_half val) {
  function _mm512_reduce_max_ps (line 340) | inline float _mm512_reduce_max_ps(const __m512 x) {
  function transpose_8x8_32bit (line 356) | inline void transpose_8x8_32bit(__m256i* v, __m256i* v1) {
  function transpose_16x4_32bit (line 388) | inline void transpose_16x4_32bit(__m512i* r, __m512i* d) {
  function transpose_16x16_32bit (line 408) | inline void transpose_16x16_32bit(__m512i* v) {
  function quantize_row_q8_K_vnni (line 479) | void quantize_row_q8_K_vnni(const float* RESTRICT x, void* RESTRICT vy, ...
  function unpack_A (line 570) | void unpack_A(int8_t* RESTRICT tile, const block_q8_0* RESTRICT A, int l...
  function unpack_A (line 578) | void unpack_A(int8_t* RESTRICT tile, const block_q8_1* RESTRICT A, int l...
  function unpack_A (line 587) | void unpack_A(int8_t* RESTRICT tile, const block_q8_K* RESTRICT A, int l...
  function __m256i (line 608) | inline __m256i bytes_from_nibbles_32(const uint8_t* rsi) {
  function __m512i (line 616) | inline __m512i bytes_from_nibbles_64(const uint8_t* rsi) {
  function __m512i (line 625) | inline __m512i bytes_from_nibbles_64(const uint8_t* qs, const uint8_t* q...
  function bytes_from_nibbles_128 (line 646) | inline void bytes_from_nibbles_128(__m512i& r0, __m512i& r1, const uint8...
  function __m512i (line 668) | inline __m512i packNibbles(__m512i r0, __m512i r1) { return _mm512_or_si...
  function pack_qs (line 671) | inline void pack_qs(void* RESTRICT packed_B, const TB* RESTRICT B, int K...
  function pack_B (line 842) | void pack_B(void* RESTRICT packed_B, const block_q4_0* RESTRICT B, int K...
  function pack_B (line 850) | void pack_B(void* RESTRICT packed_B, const block_q4_1* RESTRICT B, int K...
  function s8s8_compensation (line 860) | inline void s8s8_compensation(void* RESTRICT packed_B) {
  function pack_B (line 875) | void pack_B(void* RESTRICT packed_B, const block_q8_0* RESTRICT B, int K...
  function unpack_mins_and_scales (line 885) | inline void unpack_mins_and_scales(const uint8_t* scales, uint32_t* utmp) {
  function pack_B (line 904) | void pack_B(void* RESTRICT packed_B, const block_q4_K* RESTRICT B, int K...
  function pack_B (line 935) | void pack_B(void* RESTRICT packed_B, const block_q5_K* RESTRICT B, int K...
  function pack_B (line 964) | void pack_B(void* RESTRICT packed_B, const block_q6_K* RESTRICT B, int K...
  function pack_B (line 982) | void pack_B(void* RESTRICT packed_B, const block_iq4_xs* RESTRICT B, int...
  function unpack_B (line 1035) | void unpack_B(int8_t* RESTRICT tile, const void* RESTRICT packed_B, int ...
  type acc_C (line 1137) | struct acc_C {}
  type acc_C<block_q8_0, block_q4_0, is_acc> (line 1140) | struct acc_C<block_q8_0, block_q4_0, is_acc> {
    method apply (line 1141) | static void apply(float* RESTRICT C, int ldc, const int32_t* RESTRICT ...
  type acc_C<block_q8_1, block_q4_1, is_acc> (line 1163) | struct acc_C<block_q8_1, block_q4_1, is_acc> {
    method apply (line 1164) | static void apply(float* RESTRICT C, int ldc, const int32_t* RESTRICT ...
  type acc_C<block_q8_0, block_q8_0, is_acc> (line 1190) | struct acc_C<block_q8_0, block_q8_0, is_acc> {
    method apply (line 1191) | static void apply(float* RESTRICT C, int ldc, const int32_t* RESTRICT ...
  type acc_C<block_q8_K, block_q4_K, is_acc> (line 1213) | struct acc_C<block_q8_K, block_q4_K, is_acc> {
    method apply (line 1214) | static void apply(float* RESTRICT C, int ldc, const int32_t* RESTRICT ...
  type acc_C<block_q8_K, block_q5_K, is_acc> (line 1256) | struct acc_C<block_q8_K, block_q5_K, is_acc> {
    method apply (line 1257) | static void apply(float* RESTRICT C, int ldc, const int32_t* RESTRICT ...
  type acc_C<block_q8_K, block_q6_K, is_acc> (line 1300) | struct acc_C<block_q8_K, block_q6_K, is_acc> {
    method apply (line 1301) | static void apply(float* RESTRICT C, int ldc, const int32_t* RESTRICT ...
  type acc_C<block_q8_K, block_iq4_xs, is_acc> (line 1328) | struct acc_C<block_q8_K, block_iq4_xs, is_acc> {
    method apply (line 1329) | static void apply(float* RESTRICT C, int ldc, const int32_t* RESTRICT ...
  function scale_C (line 1375) | inline void scale_C(const int32_t* RESTRICT tile, int32_t* RESTRICT sumi...
  type tinygemm_kernel_avx (line 1393) | struct tinygemm_kernel_avx {
    method apply (line 1394) | static void apply(int K, const TA* RESTRICT A, const TB* RESTRICT B, T...
  type tinygemm_kernel_avx<float, ggml_fp16_t, float, BLOCK_M, BLOCK_N, BLOCK_K> (line 1404) | struct tinygemm_kernel_avx<float, ggml_fp16_t, float, BLOCK_M, BLOCK_N, ...
    method apply (line 1405) | static void apply(int K, const float* RESTRICT A, const ggml_fp16_t* R...
  function convert_B_packed_format (line 1454) | void convert_B_packed_format(void* RESTRICT packed_B, const TB* RESTRICT...
  type tinygemm_kernel_vnni (line 1471) | struct tinygemm_kernel_vnni {}
  type tinygemm_kernel_vnni<block_q8_0, block_q4_0, float, BLOCK_M, BLOCK_N, BLOCK_K> (line 1474) | struct tinygemm_kernel_vnni<block_q8_0, block_q4_0, float, BLOCK_M, BLOC...
    method apply (line 1475) | static void apply(int KB, const void* RESTRICT _A, const void* RESTRIC...
  type tinygemm_kernel_vnni<block_q8_1, block_q4_1, float, 1, BLOCK_N, BLOCK_K> (line 1541) | struct tinygemm_kernel_vnni<block_q8_1, block_q4_1, float, 1, BLOCK_N, B...
    method apply (line 1542) | static void apply(int KB, const void* RESTRICT _A, const void* RESTRIC...
  type tinygemm_kernel_vnni<block_q8_0, block_q8_0, float, BLOCK_M, BLOCK_N, BLOCK_K> (line 1602) | struct tinygemm_kernel_vnni<block_q8_0, block_q8_0, float, BLOCK_M, BLOC...
    method apply (line 1603) | static void apply(int KB, const void* RESTRICT _A, const void* RESTRIC...
  type tinygemm_kernel_vnni<block_q8_K, block_q4_K, float, BLOCK_M, BLOCK_N, BLOCK_K> (line 1668) | struct tinygemm_kernel_vnni<block_q8_K, block_q4_K, float, BLOCK_M, BLOC...
    method apply (line 1669) | static void apply(int KB, const void* RESTRICT _A, const void* RESTRIC...
  type tinygemm_kernel_vnni<block_q8_K, block_q5_K, float, BLOCK_M, BLOCK_N, BLOCK_K> (line 1764) | struct tinygemm_kernel_vnni<block_q8_K, block_q5_K, float, BLOCK_M, BLOC...
    method apply (line 1765) | static void apply(int KB, const void* RESTRICT _A, const void* RESTRIC...
  type tinygemm_kernel_vnni<block_q8_K, block_q6_K, float, BLOCK_M, BLOCK_N, BLOCK_K> (line 1866) | struct tinygemm_kernel_vnni<block_q8_K, block_q6_K, float, BLOCK_M, BLOC...
    method apply (line 1867) | static void apply(int KB, const void* RESTRICT _A, const void* RESTRIC...
  type tinygemm_kernel_vnni<block_q8_K, block_iq4_xs, float, BLOCK_M, BLOCK_N, BLOCK_K> (line 1972) | struct tinygemm_kernel_vnni<block_q8_K, block_iq4_xs, float, BLOCK_M, BL...
    method apply (line 1973) | static void apply(int KB, const void* RESTRICT _A, const void* RESTRIC...
  function tinygemm_kernel_amx (line 2070) | void tinygemm_kernel_amx(int M, int N, int KB, const void* RESTRICT _A, ...
  function tinygemm_kernel_amx (line 2263) | void tinygemm_kernel_amx(int M, int N, int KB, const void* RESTRICT _A, ...
  function ggml_amx_init (line 2359) | bool ggml_amx_init() {
  function ggml_compute_forward_mul_mat_use_amx (line 2371) | bool ggml_compute_forward_mul_mat_use_amx(struct ggml_tensor* dst) {
  function ggml_mul_mat_amx (line 2419) | void ggml_mul_mat_amx(struct ggml_tensor* dst, int nth, int ith, void* w...
  function ggml_amx_init (line 2592) | bool ggml_amx_init() {
  function ggml_compute_forward_mul_mat_use_amx (line 2597) | bool ggml_compute_forward_mul_mat_use_amx(struct ggml_tensor* dst) {
  function ggml_mul_mat_amx (line 2603) | void ggml_mul_mat_amx(struct ggml_tensor* dst, int nth, int ith, void* w...
  function main (line 2613) | int main() {

FILE: kt-kernel/operators/amx/test/mmq.h
  type ggml_tensor (line 11) | struct ggml_tensor
  type ggml_tensor (line 13) | struct ggml_tensor

FILE: kt-kernel/operators/amx/test/test-kgroup-128.cpp
  function test_kgroup_128 (line 8) | void test_kgroup_128() {
  function main (line 270) | int main() {

FILE: kt-kernel/operators/amx/test/test-kgroup-kernel.cpp
  function test_kgroup_kernel_basic (line 13) | void test_kgroup_kernel_basic() {
  function test_kgroup_kernel_correctness (line 99) | void test_kgroup_kernel_correctness() {
  function test_kgroup_kernel_performance (line 213) | void test_kgroup_kernel_performance() {
  function main (line 287) | int main(int argc, char** argv) {

FILE: kt-kernel/operators/amx/test/test-specific-dims.cpp
  function test_specific_dimensions (line 12) | void test_specific_dimensions() {
  function main (line 674) | int main() {

FILE: kt-kernel/operators/amx/test/timer.hh
  function doubleToStringR2 (line 13) | inline std::string doubleToStringR2(double value) {
  function readable_number (line 21) | inline std::string readable_number(size_t size) {
  class Timer (line 34) | class Timer {
    method Timer (line 39) | Timer() {}
    method Timer (line 40) | Timer(std::string name) : name(name), tmp_timer(true) { start(); }
    method start (line 47) | void start() {
    method stop (line 53) | void stop() {
    method elapsedNs (line 60) | double elapsedNs() {
    method printElapsedMilliseconds (line 74) | void printElapsedMilliseconds() {
    method ns_to_string (line 78) | static std::string ns_to_string(double duration) {
    method runningTimeNs (line 110) | double runningTimeNs() { return m_runningNs; }
    method runningTime (line 112) | std::string runningTime() {
    method elapsedTime (line 117) | std::string elapsedTime() { return ns_to_string(elapsedNs()); }
    method elapsedMs (line 118) | double elapsedMs() { return elapsedNs() / 1e6; }
    method report_throughput (line 119) | std::string report_throughput(size_t op_cnt) {
    method merge (line 124) | void merge(Timer &other) {
  class Counter (line 137) | class Counter {
    method Counter (line 139) | Counter() {}
    method inc (line 143) | void inc(const char *name, size_t num) { counters[name] += num; }
    method print (line 144) | void print() {

FILE: kt-kernel/operators/amx/test/verify-kgroup.cpp
  function verify_kgroup_accuracy (line 8) | void verify_kgroup_accuracy() {
  function main (line 105) | int main() {

FILE: kt-kernel/operators/common.hpp
  function T (line 51) | inline T div_up(T x, T y) {
  function T (line 56) | T* offset_pointer(T* ptr, size_t byte_offset) {
  function pointer_offset (line 61) | size_t pointer_offset(T* ptr, T* b) {
  function T (line 66) | const T* offset_pointer(const T* ptr, size_t byte_offset) {
  function T (line 71) | T* offset_pointer_row_major(T* t, int row, int col, size_t ld) {
  function T (line 76) | T* offset_pointer_col_major(T* t, int row, int col, size_t ld) {
  class TimePerf (line 80) | class TimePerf {
    method forward_perf_start (line 87) | void forward_perf_start() {
    method perf_report (line 92) | void perf_report() {
  type TaskCounter (line 117) | struct TaskCounter {
    method TaskCounter (line 120) | TaskCounter(std::initializer_list<size_t> i) {
    method push_back (line 127) | void push_back(size_t i) {
    method push_back (line 134) | void push_back(std::vector<size_t> i) {
    method count (line 139) | size_t count() { return card[0]; }
    method at (line 140) | size_t at(size_t id, size_t which) { return id % card.at(which) / card...
  type GeneralConfig (line 143) | struct GeneralConfig {
    method GeneralConfig (line 159) | GeneralConfig() {}
  type GeneralMLAConfig (line 162) | struct GeneralMLAConfig {
    method GeneralMLAConfig (line 211) | GeneralMLAConfig() {}
    method GeneralMLAConfig (line 212) | GeneralMLAConfig(size_t hidden_size, size_t q_lora_rank, size_t kv_lor...
  type QuantConfig (line 222) | struct QuantConfig {
  type GeneralMOEConfig (line 230) | struct GeneralMOEConfig {
    method compute_num_gpu_experts (line 246) | void compute_num_gpu_experts() {
    method should_skip_expert (line 256) | inline bool should_skip_expert(int64_t expert_id) const {
    method GeneralMOEConfig (line 299) | GeneralMOEConfig() {}
    method GeneralMOEConfig (line 301) | GeneralMOEConfig(int expert_num, int routed_expert_num, int hidden_siz...
    method max_possible_qlen (line 307) | int max_possible_qlen() { return std::max(max_len, group_max_len); }
  type GeneralGateConfig (line 310) | struct GeneralGateConfig {
    method GeneralGateConfig (line 333) | GeneralGateConfig() = default;
    method GeneralGateConfig (line 335) | GeneralGateConfig(int hidden_size, int num_experts_per_tok, int n_rout...
  class MLA_Interface (line 343) | class MLA_Interface {
  class MoE_Interface (line 349) | class MoE_Interface {
  function init_ggml (line 354) | inline void init_ggml() {
  function convert_or_copy (line 374) | void convert_or_copy(A* dst, const B* src, size_t count) {
  function convert_or_copy (line 404) | void convert_or_copy(A* dst, void* src, ggml_type type, size_t count) {
  function check_numerics (line 431) | void check_numerics(A* data, size_t count) {
  function debug_bf16 (line 441) | inline void debug_bf16(ggml_bf16_t* x) {
  function debug_f32 (line 447) | inline void debug_f32(float* x) {
  function debug_f32 (line 454) | inline void debug_f32(float* x, size_t count) {

FILE: kt-kernel/operators/kvcache/kvcache.h
  type AnchorType (line 52) | enum AnchorType {
  type RetrievalType (line 83) | enum RetrievalType {
  type KVCacheConfig (line 111) | struct KVCacheConfig {
  function class (line 178) | class KVCache {

FILE: kt-kernel/operators/kvcache/kvcache_utils.cpp
  function ggml_type_to_string (line 16) | std::string ggml_type_to_string(ggml_type type) {
  function AnchorTypeToString (line 29) | std::string AnchorTypeToString(AnchorType type) {
  function RetrievalTypeToString (line 44) | std::string RetrievalTypeToString(RetrievalType type) {
  function ggml_vec_scale_f32 (line 786) | void ggml_vec_scale_f32(const int n, float* y, const float v) {

FILE: kt-kernel/operators/llamafile/conversion.h
  function to_float (line 18) | inline void to_float(const void* input, float* output, int size, ggml_ty...
  function from_float (line 30) | inline void from_float(const float* input, void* output, int size, ggml_...

FILE: kt-kernel/operators/llamafile/linear.h
  type LinearConfig (line 27) | struct LinearConfig {

FILE: kt-kernel/operators/llamafile/mlp.cpp
  function act_fn (line 54) | static float act_fn(float x) { return x / (1.0f + expf(-x)); }

FILE: kt-kernel/operators/llamafile/mlp.h
  type MLPConfig (line 27) | struct MLPConfig {

FILE: kt-kernel/operators/llamafile/moe.hpp
  function debug_quant (line 25) | inline void debug_quant(void* input, ggml_type type) {
  class LLAMA_MOE_TP (line 34) | class LLAMA_MOE_TP {
    method LLAMA_MOE_TP (line 93) | LLAMA_MOE_TP(GeneralMOEConfig config, int tp_part_idx) : config_(confi...
    method load_weights (line 192) | void load_weights(int complete_intermediate_size, int offset) {
    method warm_up (line 251) | void warm_up() {
    method act_fn (line 267) | static float act_fn(float x) { return x / (1.0f + expf(-x)); }
    method forward_one (line 269) | void forward_one(int k, const int64_t* expert_ids, const float* weight...
    method forward_many (line 459) | void forward_many(int qlen, int k, const int64_t* expert_ids, const fl...
    method forward (line 746) | void forward(int qlen, int k, const int64_t* expert_ids, const float* ...
  class TP_MOE<LLAMA_MOE_TP> (line 767) | class TP_MOE<LLAMA_MOE_TP> : public TP_MOE_Common<LLAMA_MOE_TP> {
    method load_weights (line 771) | void load_weights() {
    method merge_results (line 787) | void merge_results(int qlen, void* output) { merge_results(qlen, outpu...
    method merge_results (line 789) | void merge_results(int qlen, void* output, bool incremental) {

FILE: kt-kernel/operators/mla-tp.hpp
  class TP_MLA_Common (line 29) | class TP_MLA_Common : public MLA_Interface {
    method TP_MLA_Common (line 46) | TP_MLA_Common(GeneralMLAConfig config) : config(config) {
    method forward (line 82) | void forward(std::vector<int> qlens, std::vector<std::vector<int>> pag...
    method set_pages (line 125) | void set_pages(std::vector<std::vector<void*>> kv_lora_pages, std::vec...
    method set_local_pages (line 131) | void set_local_pages(int page_count) {
  class TP_MLA (line 141) | class TP_MLA : public TP_MLA_Common<T> {
    method load_weights (line 144) | void load_weights() { throw std::runtime_error("Not Implemented"); }
    method merge_results (line 145) | void merge_results(int qlen, void* output) { throw std::runtime_error(...

FILE: kt-kernel/operators/moe-tp.hpp
  class LLAMA_MOE_TP (line 14) | class LLAMA_MOE_TP
  class TP_MOE_Common (line 26) | class TP_MOE_Common : public MoE_Interface {
    method TP_MOE_Common (line 45) | TP_MOE_Common(GeneralMOEConfig config) : config(config) {
    method warm_up (line 139) | void warm_up() {
    method forward (line 152) | void forward(int qlen, int k, const int64_t* expert_ids, const float* ...
    method forward (line 158) | void forward(int* qlen_ptr, int k, const int64_t* expert_ids, const fl...
    method forward_binding (line 162) | void forward_binding(intptr_t qlen_ptr, int k, intptr_t expert_ids, in...
    method forward (line 168) | void forward(int* qlen_ptr, int k, const int64_t* expert_ids, const fl...
    method merge_results (line 219) | virtual void merge_results(int qlen, void* output, bool incremental) {
  class TP_MOE (line 229) | class TP_MOE : public TP_MOE_Common<T> {
    method load_weights (line 232) | void load_weights(const uint64_t* physical_to_logical_map) { throw std...

FILE: kt-kernel/operators/moe_kernel/api/common.h
  type BLASINT8 (line 40) | typedef int8_t BLASINT8;
  type KERNEL_CBLAS_TRANSPOSE (line 43) | typedef enum KERNEL_CBLAS_TRANSPOSE {
  type KERNEL_CBLAS_ORDER (line 50) | typedef enum KERNEL_CBLAS_ORDER { KernelCblasRowMajor = 101, KernelCblas...
  type KERNEL_CBLAS_SIDE (line 52) | typedef enum KERNEL_CBLAS_SIDE { KernelCblasLeft = 141, KernelCblasRight...
  type KERNEL_CBLAS_ORDER (line 53) | typedef KERNEL_CBLAS_ORDER KERNEL_CBLAS_LAYOUT;
  type KERNEL_CBLAS_OFFSET (line 54) | typedef enum KERNEL_CBLAS_OFFSET {
  function MatKernelVariant (line 60) | enum class MatKernelVariant {

FILE: kt-kernel/operators/moe_kernel/api/mat_kernel.h
  type MatKernelSelection (line 15) | struct MatKernelSelection {

FILE: kt-kernel/operators/moe_kernel/la/kernel.hpp
  type moe_kernel (line 19) | namespace moe_kernel {
    function T (line 21) | T *offset_pointer(T *ptr, size_t byte_offset) {
    function bf16_to_fp32 (line 25) | inline float bf16_to_fp32(ggml_bf16_t src) {
    function fp16_to_fp32 (line 40) | inline float fp16_to_fp32(ggml_fp16_t src) { return ggml_fp16_to_fp32(...
    type BufferAImpl (line 43) | struct BufferAImpl {
      method K_BLOCK (line 52) | static inline int K_BLOCK() { return K::K_BLOCK; }
      method required_size (line 56) | static size_t required_size(int max_m, int k) { return sizeof(int8_t...
      method BufferAImpl (line 58) | BufferAImpl(int max_m, int k, void *ptr, bool if_pack = false) : max...
      method BufferAImpl (line 62) | BufferAImpl(int max_m, int k, bool if_pack = false) : max_m(max_m), ...
      method set_data (line 68) | void set_data(void *ptr) {
      method required_size (line 73) | size_t required_size() const { return sizeof(int8_t) * max_m * k + s...
      method offset_row (line 75) | BufferAImpl<K> offset_row(size_t row_begin, size_t row_block) {
      method from_mat (line 83) | void from_mat(int m, ggml_bf16_t *src, int ith, int mth) {
      method from_mat (line 128) | void from_mat(int m, ggml_fp16_t *src, int ith, int mth) {
      method from_mat (line 175) | void from_mat(int m, float *src, int ith, int mth) {
      method from_mat (line 218) | void from_mat(int m, float *src) {
      method to_mat (line 245) | void to_mat(int m, float *dst, int ith, int mth) {
    type BufferCImpl (line 264) | struct BufferCImpl {
      method N_BLOCK (line 272) | static inline int N_BLOCK() { return K::N_BLOCK; }
      method required_size (line 274) | static size_t required_size(int max_m, int n) { return sizeof(int32_...
      method BufferCImpl (line 276) | BufferCImpl(int max_m, int n, void *ptr, bool if_row_major = false) ...
      method BufferCImpl (line 283) | BufferCImpl(int max_m, int n, bool if_row_major = false) : max_m(max...
      method set_data (line 285) | void set_data(void *ptr) {
      method required_size (line 289) | size_t required_size() const { return sizeof(int32_t) * max_m * n; }
    type GemmKernelInt8 (line 296) | struct GemmKernelInt8 {
      method set_tiling (line 324) | static void set_tiling(int n_block_up_gate, int n_block_down, int n_...
      method get_tiling (line 334) | static std::tuple<int, int, int, int, int, int, int> get_tiling() {
      method name (line 343) | static std::string name() { return "MOE_INT8"; }
      method recommended_nth (line 344) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO...
      method recommended_nth_down (line 346) | static int recommended_nth_down(int n, char type_ = 'd') {
      method recommended_nth_up_gate (line 360) | static int recommended_nth_up_gate(int n, char type_ = 'd') {
      method recommended_mth (line 374) | static int recommended_mth(int m) { return (m + M_BLOCK - 1) / M_BLO...
      method split_range_n (line 376) | static std::pair<int, int> split_range_n(int n, int ith, int nth, in...
      method split_range_m (line 382) | static std::pair<int, int> split_range_m(int m, int ith, int mth = 0) {
      method split_range_n_block (line 388) | static std::pair<int, int> split_range_n_block(int n, int ith, int n...
      type BufferB (line 397) | struct BufferB {
        method required_size (line 408) | static size_t required_size(int n, int k, bool if_pack = false, ch...
        method BufferB (line 433) | BufferB(int n, int k, bool if_pack = false, char mat_type = 'n', b...
        method BufferB (line 461) | BufferB(int n, int k, void *ptr, bool if_pack = false, char mat_ty...
        method set_data (line 466) | void set_data(void *ptr, bool plain = true) {
        method required_size (line 477) | size_t required_size() const { return sizeof(int8_t) * n * k + siz...
        method BufferB (line 478) | BufferB offset_col(size_t col_begin, size_t col_block) {
        method from_mat (line 484) | void from_mat(ggml_bf16_t *src, int ith, int nth, int n_new = -1, ...
        method from_mat (line 541) | void from_mat(float *src, int ith, int nth, int n_new = -1, bool i...
        method from_mat_row_major (line 591) | void from_mat_row_major(float *src, int ld, int ith, int nth, int ...
        method to_mat (line 621) | void to_mat(float *dst, int ith, int nth, int n_new = -1) {
      method convert_buffer_a_to_buffer_b (line 647) | static void convert_buffer_a_to_buffer_b(BufferA *ba, BufferB *bb) {
      method convert_buffer_b_to_buffer_a (line 656) | static void convert_buffer_b_to_buffer_a(BufferB *bb, BufferA *ba) {
      method change_view (line 665) | static void change_view(BufferC *c_src, BufferC *c_dst) {
      method apply_scale (line 676) | static void apply_scale(int m, int n, float *c, BufferA *ba, BufferB...
      method apply_scale (line 692) | static void apply_scale(int m, int n, float *c, BufferA *ba, BufferB...
      method apply_scale (line 718) | static void apply_scale(float *c, int ldc, BufferA *ba, BufferB *bb,...
      method apply_scale (line 754) | static void apply_scale(float *c, int ldc, BufferA *ba, BufferB *bb,...
    type GemmKernelInt4 (line 789) | struct GemmKernelInt4 {
      method set_tiling (line 817) | static void set_tiling(int n_block_up_gate, int n_block_down, int n_...
      method get_tiling (line 827) | static std::tuple<int, int, int, int, int, int, int> get_tiling() {
      method name (line 836) | static std::string name() { return "MOE_INT4"; }
      method recommended_nth (line 837) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO...
      method recommended_nth_down (line 839) | static int recommended_nth_down(int n, char type_ = 'd') {
      method recommended_mth (line 852) | static int recommended_mth(int m) { return (m + M_BLOCK - 1) / M_BLO...
      method recommended_nth_up_gate (line 854) | static int recommended_nth_up_gate(int n, char type_ = 'd') {
      method split_range_n (line 868) | static std::pair<int, int> split_range_n(int n, int ith, int nth) {
      method split_range_m (line 873) | static std::pair<int, int> split_range_m(int m, int ith, int mth) {
      method split_range_n_block (line 879) | static std::pair<int, int> split_range_n_block(int n, int ith, int n...
      type BufferB (line 888) | struct BufferB {
        method required_size (line 897) | static size_t required_size(int n, int k, bool if_pack = false, ch...
        method BufferB (line 927) | BufferB(int n, int k, bool if_pack = false, char mat_type = 'n', b...
        method BufferB (line 932) | BufferB(int n, int k, void *ptr, bool if_pack = false, char mat_ty...
        method set_data (line 936) | void set_data(void *ptr, bool plain = true) {
        method required_size (line 940) | size_t required_size() const { return sizeof(int8_t) * n * k / 2 +...
        method BufferB (line 941) | BufferB offset_col(size_t col_begin, size_t col_block) {
        method from_mat (line 947) | void from_mat(ggml_bf16_t *src, int ith, int nth, int n_new = -1, ...
        method from_mat (line 996) | void from_mat(float *src, int ith, int nth, int n_new = -1, bool i...
      method convert_buffer_a_to_buffer_b (line 1060) | static void convert_buffer_a_to_buffer_b(BufferA *ba, BufferB *bb) {
      method convert_buffer_b_to_buffer_a (line 1070) | static void convert_buffer_b_to_buffer_a(BufferB *bb, BufferA *ba) {
      method change_view (line 1081) | static void change_view(BufferC *c_src, BufferC *c_dst) {
      method apply_scale (line 1094) | static void apply_scale(int m, int n, float *c, BufferA *ba, BufferB...
      method apply_scale (line 1109) | static void apply_scale(int m, int n, float *c, BufferA *ba, BufferB...
      method apply_scale (line 1127) | static void apply_scale(float *c, int ldc, BufferA *ba, BufferB *bb,...
      method apply_scale (line 1163) | static void apply_scale(float *c, int ldc, BufferA *ba, BufferB *bb,...

FILE: kt-kernel/operators/moe_kernel/la/mat_kernel.cpp
  function MatKernelSelection (line 36) | MatKernelSelection select_kernel_for_int4(MatKernelVariant variant) {
  function MatKernelSelection (line 46) | MatKernelSelection select_kernel_for_int8(MatKernelVariant variant) {

FILE: kt-kernel/operators/moe_kernel/la/utils.hpp
  function float_to_bf16_trunc (line 7) | static inline uint16_t float_to_bf16_trunc(float f) {
  function convert_32fp32_to_32bf16_pure_c (line 14) | static inline void convert_32fp32_to_32bf16_pure_c(const float* src, uin...
  function convert_32bf16_to_32fp32_pure_c (line 24) | static inline void convert_32bf16_to_32fp32_pure_c(const uint16_t* src, ...

FILE: kt-kernel/operators/moe_kernel/mat_kernel/aocl_kernel/kernel.cpp
  function ToAoclOrder (line 8) | char ToAoclOrder(KERNEL_CBLAS_LAYOUT layout) {
  function ToAoclTranspose (line 18) | char ToAoclTranspose(KERNEL_CBLAS_TRANSPOSE transpose) {
  function decode_cblas_gemm_s8s8s32 (line 37) | void decode_cblas_gemm_s8s8s32(const KERNEL_CBLAS_LAYOUT layout, const K...
  function prefill_cblas_gemm_s8s8s32 (line 52) | void prefill_cblas_gemm_s8s8s32(const KERNEL_CBLAS_LAYOUT layout, const ...
  function prefill_int4_cblas_gemm_s8s8s32 (line 67) | void prefill_int4_cblas_gemm_s8s8s32(const KERNEL_CBLAS_LAYOUT layout, c...
  function decode_int4_cblas_gemm_s8s8s32 (line 76) | void decode_int4_cblas_gemm_s8s8s32(const KERNEL_CBLAS_LAYOUT layout, co...
  function reorder_B_gemm (line 85) | void reorder_B_gemm(const KERNEL_CBLAS_LAYOUT layout, const KERNEL_CBLAS...
  function get_reorder_B_size (line 93) | size_t get_reorder_B_size(const KERNEL_CBLAS_LAYOUT layout, const KERNEL...

FILE: kt-kernel/operators/moe_kernel/moe.hpp
  function TimePerf (line 22) | class MOE_KERNEL_TP
  class TP_MOE<MOE_KERNEL_TP<K, T>> (line 673) | class TP_MOE<MOE_KERNEL_TP<K, T>> : public TP_MOE_Common<MOE_KERNEL_TP<K...
    method load_weights (line 677) | void load_weights() {
    method merge_results (line 743) | void merge_results(int qlen, void* output, bool incremental) {
    method merge_results (line 797) | void merge_results(int qlen, void* output) { merge_results(qlen, outpu...

FILE: kt-kernel/operators/moe_kernel/test/convert-test.cpp
  function bf16_to_fp16 (line 16) | void bf16_to_fp16(const ggml_bf16_t* src, ggml_fp16_t* dst, size_t n) {
  function debug_rope (line 23) | void debug_rope() {
  function debug_softmax (line 44) | void debug_softmax() {
  function debug_inf (line 59) | void debug_inf() {
  function debug_reduce (line 69) | void debug_reduce() {
  function main (line 84) | int main() {

FILE: kt-kernel/operators/moe_kernel/test/debug.hpp
  function get_env_or_default (line 11) | inline std::string get_env_or_default(const char* var_name, const std::s...
  function dump_bin (line 16) | inline void dump_bin(std::string file_name, float16_t* data, size_t coun...
  function dump_bin (line 22) | inline void dump_bin(std::string file_name, float* data, size_t count) {
  function dump_bin (line 28) | inline void dump_bin(std::string file_name, int64_t* data, size_t count) {
  function dump_bin (line 35) | inline void dump_bin(std::string file_name, int8_t* data, size_t count) {
  function dump_bin (line 42) | inline void dump_bin(std::string file_name, int32_t* data, size_t count) {
  function load_bin (line 49) | inline void load_bin(std::string file_name, float* data, size_t count) {

FILE: kt-kernel/operators/moe_kernel/test/int4_mul-test.cpp
  function main (line 10) | int main() {

FILE: kt-kernel/operators/moe_kernel/test/mat_test.cpp
  function main (line 3) | int main() {

FILE: kt-kernel/operators/moe_kernel/test/utils_test.cpp
  function sve_32xbf16_to_32xfp32 (line 7) | static inline void sve_32xbf16_to_32xfp32(const bfloat16_t* src, float* ...
  function neon_32xbf16_to_32xfp32 (line 15) | static inline void neon_32xbf16_to_32xfp32(const uint16_t* src, float* d...
  function main (line 47) | int main() {

FILE: kt-kernel/operators/reduce.hpp
  function reduce_sum (line 7) | void reduce_sum(T** data, size_t data_groups_count, size_t begin, size_t...

FILE: kt-kernel/operators/rms-norm.hpp
  type RMSNorm (line 15) | struct RMSNorm {
    method rms_norm_single (line 16) | static void rms_norm_single(int size, A* input) {
    method rms_norm (line 28) | static void rms_norm(int hidden_size, int qlen, A* input) {
    method rms_norm_with_weights (line 35) | static void rms_norm_with_weights(int hidden_size, int qlen, A* weight...
    method rms_norm_single_with_weights (line 41) | static void rms_norm_single_with_weights(int size, A* weights, A* inpu...

FILE: kt-kernel/operators/rope.hpp
  type Rope (line 31) | struct Rope {
    method apply_single (line 33) | static void apply_single(E& emb, A* v, int size, int pos_start) {
    method apply_multiple (line 60) | static void apply_multiple(E& emb, A* v_block_start, int size_per_vect...
  class RotaryEmbeddingBase (line 74) | class RotaryEmbeddingBase {
    method init (line 77) | virtual void init(size_t seq_len) {
    method RotaryEmbeddingBase (line 84) | RotaryEmbeddingBase(size_t dim, size_t max_pos_embeddings, double base...
  class DeepseekV3RotaryEmbedding (line 97) | class DeepseekV3RotaryEmbedding : public RotaryEmbeddingBase {
    method DeepseekV3RotaryEmbedding (line 99) | DeepseekV3RotaryEmbedding(size_t dim, size_t max_position_embeddings =...
    method calculate_inv_freq (line 117) | void calculate_inv_freq() override {
    method set_cos_sin_cache (line 124) | void set_cos_sin_cache(size_t seq_len) override {
  function yarn_find_correction_dim (line 150) | inline double yarn_find_correction_dim(double num_rotations, double dim,...
  function yarn_find_correction_range (line 155) | inline std::pair<size_t, size_t> yarn_find_correction_range(double low_r...
  function yarn_linear_ramp_mask (line 167) | inline std::vector<double> yarn_linear_ramp_mask(double min_val, double ...
  function yarn_get_mscale (line 179) | inline double yarn_get_mscale(double scale = 1.0, double mscale = 1.0) {
  class DeepseekV3YarnRotaryEmbedding (line 186) | class DeepseekV3YarnRotaryEmbedding : public DeepseekV3RotaryEmbedding {
    method DeepseekV3YarnRotaryEmbedding (line 188) | DeepseekV3YarnRotaryEmbedding(size_t dim, size_t max_position_embeddin...
    method calculate_inv_freq (line 211) | void calculate_inv_freq() override {
    method set_cos_sin_cache (line 242) | void set_cos_sin_cache(size_t seq_len) override {

FILE: kt-kernel/operators/softmax.hpp
  class Softmax (line 20) | class Softmax {
    method apply_single (line 23) | static void apply_single(A* v, size_t size) {
    method apply_multiple (line 48) | static void apply_multiple(size_t count, A* v, size_t size, size_t ld) {

FILE: kt-kernel/python/_cpu_detect.py
  function detect_cpu_features (line 26) | def detect_cpu_features():
  function load_extension (line 165) | def load_extension(variant):
  function initialize (line 266) | def initialize():

FILE: kt-kernel/python/cli/commands/bench.py
  class BenchType (line 25) | class BenchType(str, Enum):
  function bench (line 36) | def bench(
  function microbench (line 88) | def microbench(
  function _find_kt_kernel_path (line 196) | def _find_kt_kernel_path() -> Optional[Path]:
  function _run_all_benchmarks (line 219) | def _run_all_benchmarks(model: Optional[str], output: Optional[Path], it...
  function _run_inference_benchmark (line 228) | def _run_inference_benchmark(model: Optional[str], output: Optional[Path...
  function _run_component_benchmark (line 243) | def _run_component_benchmark(component: str, output: Optional[Path], ite...

FILE: kt-kernel/python/cli/commands/chat.py
  function chat (line 39) | def chat(
  function _stream_response (line 281) | def _stream_response(
  function _count_tokens_with_tokenizer (line 373) | def _count_tokens_with_tokenizer(messages: list, tokenizer) -> int:
  function _estimate_tokens (line 398) | def _estimate_tokens(messages: list) -> int:
  function _generate_response (line 412) | def _generate_response(
  function _handle_command (line 470) | def _handle_command(command: str, messages: list, temperature: float, ma...
  function _format_history (line 543) | def _format_history(messages: list) -> str:
  function _save_history (line 559) | def _save_history(file_path: Path, messages: list, model: str) -> None:

FILE: kt-kernel/python/cli/commands/config.py
  function init (line 21) | def init() -> None:
  function show (line 31) | def show(
  function set_config (line 57) | def set_config(
  function get_config (line 72) | def get_config(
  function reset (line 90) | def reset(
  function path (line 104) | def path() -> None:
  function model_path_list (line 111) | def model_path_list() -> None:
  function model_path_add (line 119) | def model_path_add(
  function model_path_remove (line 129) | def model_path_remove(
  function _parse_value (line 138) | def _parse_value(value: str):

FILE: kt-kernel/python/cli/commands/doctor.py
  function _get_kt_kernel_info (line 34) | def _get_kt_kernel_info() -> dict:
  function doctor (line 95) | def doctor(
  function _check_python_version (line 479) | def _check_python_version(version: str) -> bool:
  function _display_results (line 489) | def _display_results(checks: list[dict], verbose: bool) -> None:

FILE: kt-kernel/python/cli/commands/model.py
  function is_amx_weights (line 46) | def is_amx_weights(model_path) -> tuple[bool, int]:
  function callback (line 93) | def callback(ctx: typer.Context) -> None:
  function download (line 105) | def download(
  function list_models (line 466) | def list_models(
  function clear_cache (line 1019) | def clear_cache() -> None:
  function path_list (line 1061) | def path_list() -> None:
  function link_cpu (line 1077) | def link_cpu(
  function unlink_cpu (line 1136) | def unlink_cpu(
  function path_add (line 1166) | def path_add(
  function path_remove (line 1194) | def path_remove(
  function scan (line 1210) | def scan(
  function add_model (line 1263) | def add_model(
  function edit_model (line 1339) | def edit_model(
  function info_model (line 1800) | def info_model(
  function remove_model (line 1920) | def remove_model(
  function refresh_models (line 1991) | def refresh_models() -> None:
  function verify_model (line 2090) | def verify_model(
  function verify_all_models (line 2651) | def verify_all_models() -> None:
  function auto_detect_repo (line 2685) | def auto_detect_repo(

FILE: kt-kernel/python/cli/commands/quant.py
  class QuantMethod (line 30) | class QuantMethod(str, Enum):
  function quant (line 37) | def quant(
  function _resolve_input_path (line 478) | def _resolve_input_path(model: str, settings) -> Optional[Path]:
  function _find_kt_kernel_path (line 510) | def _find_kt_kernel_path() -> Optional[Path]:

FILE: kt-kernel/python/cli/commands/run.py
  function run (line 78) | def run(
  function _run_impl (line 163) | def _run_impl(
  function _build_sglang_command (line 561) | def _build_sglang_command(
  function _interactive_model_selection (line 721) | def _interactive_model_selection(user_registry, settings) -> Optional[str]:

FILE: kt-kernel/python/cli/commands/sft.py
  function callback (line 16) | def callback(ctx: typer.Context) -> None:
  function train (line 29) | def train() -> None:
  function chat (line 38) | def chat() -> None:
  function export (line 47) | def export() -> None:

FILE: kt-kernel/python/cli/commands/version.py
  function _get_sglang_info (line 18) | def _get_sglang_info() -> str:
  function version (line 52) | def version(

FILE: kt-kernel/python/cli/config/settings.py
  class Settings (line 68) | class Settings:
    method __init__ (line 71) | def __init__(self, config_path: Optional[Path] = None):
    method _ensure_dirs (line 82) | def _ensure_dirs(self) -> None:
    method _load (line 93) | def _load(self) -> None:
    method _save (line 108) | def _save(self) -> None:
    method _deep_copy (line 117) | def _deep_copy(self, obj: Any) -> Any:
    method _deep_merge (line 125) | def _deep_merge(self, base: dict, override: dict) -> None:
    method get (line 133) | def get(self, key: str, default: Any = None) -> Any:
    method set (line 154) | def set(self, key: str, value: Any) -> None:
    method delete (line 174) | def delete(self, key: str) -> bool:
    method reset (line 199) | def reset(self) -> None:
    method get_all (line 204) | def get_all(self) -> dict[str, Any]:
    method get_env_vars (line 208) | def get_env_vars(self) -> dict[str, str]:
    method models_dir (line 220) | def models_dir(self) -> Path:
    method get_model_paths (line 225) | def get_model_paths(self) -> list[Path]:
    method add_model_path (line 242) | def add_model_path(self, path: str) -> None:
    method remove_model_path (line 259) | def remove_model_path(self, path: str) -> bool:
    method cache_dir (line 285) | def cache_dir(self) -> Path:
    method weights_dir (line 290) | def weights_dir(self) -> Optional[Path]:
  function get_settings (line 300) | def get_settings() -> Settings:
  function reset_settings (line 308) | def reset_settings() -> None:

FILE: kt-kernel/python/cli/i18n.py
  function get_lang (line 1260) | def get_lang() -> str:
  function t (line 1306) | def t(msg_key: str, **kwargs: Any) -> str:
  function set_lang (line 1336) | def set_lang(lang: str) -> None:

FILE: kt-kernel/python/cli/main.py
  function _get_app_help (line 20) | def _get_app_help() -> str:
  function _get_help (line 28) | def _get_help(key: str) -> str:
  function _update_help_texts (line 57) | def _update_help_texts() -> None:
  function check_first_run (line 77) | def check_first_run() -> None:
  function _show_first_run_setup (line 103) | def _show_first_run_setup(settings) -> None:
  function _prompt_custom_path (line 359) | def _prompt_custom_path(console, settings) -> str:
  function _install_shell_completion (line 391) | def _install_shell_completion() -> None:
  function _apply_saved_language (line 447) | def _apply_saved_language() -> None:
  function main (line 484) | def main():

FILE: kt-kernel/python/cli/utils/analyze_moe_model.py
  function _get_sglang_moe_architectures (line 12) | def _get_sglang_moe_architectures():
  function _get_cache_file (line 63) | def _get_cache_file():
  function _load_all_cache (line 70) | def _load_all_cache():
  function _save_all_cache (line 83) | def _save_all_cache(cache_data):
  function _compute_config_fingerprint (line 95) | def _compute_config_fingerprint(config_path: Path) -> Optional[str]:
  function _load_cache (line 109) | def _load_cache(model_path: Path) -> Optional[Dict[str, Any]]:
  function _save_cache (line 136) | def _save_cache(model_path: Path, result: Dict[str, Any]):
  function _load_config_json (line 160) | def _load_config_json(model_path: Path) -> Optional[Dict[str, Any]]:
  function _is_moe_model (line 178) | def _is_moe_model(config: Dict[str, Any]) -> bool:
  function _extract_moe_params (line 205) | def _extract_moe_params(config: Dict[str, Any]) -> Dict[str, Any]:
  function _estimate_model_size (line 271) | def _estimate_model_size(model_path: Path) -> float:
  function analyze_moe_model (line 285) | def analyze_moe_model(model_path, use_cache=True):
  function print_analysis (line 366) | def print_analysis(model_path):
  function main (line 400) | def main():

FILE: kt-kernel/python/cli/utils/console.py
  function print_info (line 44) | def print_info(message: str, **kwargs) -> None:
  function print_success (line 49) | def print_success(message: str, **kwargs) -> None:
  function print_warning (line 54) | def print_warning(message: str, **kwargs) -> None:
  function print_error (line 59) | def print_error(message: str, **kwargs) -> None:
  function print_step (line 64) | def print_step(message: str, **kwargs) -> None:
  function print_header (line 69) | def print_header(title: str, subtitle: Optional[str] = None) -> None:
  function print_version_table (line 77) | def print_version_table(versions: dict[str, Optional[str]]) -> None:
  function print_dependency_table (line 92) | def print_dependency_table(deps: list[dict]) -> None:
  function confirm (line 119) | def confirm(message: str, default: bool = True) -> bool:
  function prompt_choice (line 124) | def prompt_choice(message: str, choices: list[str], default: Optional[st...
  function prompt_text (line 149) | def prompt_text(message: str, default: Optional[str] = None) -> str:
  function create_progress (line 154) | def create_progress() -> Progress:
  function create_download_progress (line 166) | def create_download_progress() -> Progress:
  function print_model_table (line 179) | def print_model_table(models: list[dict]) -> None:
  function print_hardware_info (line 204) | def print_hardware_info(gpu_info: str, cpu_info: str, ram_info: str) -> ...
  function print_server_info (line 217) | def print_server_info(
  function print_api_info (line 234) | def print_api_info(host: str, port: int) -> None:

FILE: kt-kernel/python/cli/utils/debug_configs.py
  function main (line 16) | def main():

FILE: kt-kernel/python/cli/utils/download_helper.py
  function list_remote_files_hf (line 8) | def list_remote_files_hf(repo_id: str, use_mirror: bool = False) -> List...
  function list_remote_files_ms (line 48) | def list_remote_files_ms(repo_id: str) -> List[Dict[str, any]]:
  function filter_files_by_pattern (line 70) | def filter_files_by_pattern(files: List[Dict[str, any]], pattern: str) -...
  function calculate_total_size (line 87) | def calculate_total_size(files: List[Dict[str, any]]) -> int:
  function format_file_list_table (line 92) | def format_file_list_table(files: List[Dict[str, any]], max_display: int...
  function verify_repo_exists (line 111) | def verify_repo_exists(repo_id: str, repo_type: str, use_mirror: bool = ...

FILE: kt-kernel/python/cli/utils/environment.py
  class EnvManager (line 21) | class EnvManager:
  class GPUInfo (line 30) | class GPUInfo:
  class CPUInfo (line 40) | class CPUInfo:
  class MemoryInfo (line 52) | class MemoryInfo:
  class SystemInfo (line 63) | class SystemInfo:
  function run_command (line 75) | def run_command(cmd: list[str], timeout: int = 10) -> Optional[str]:
  function detect_env_managers (line 86) | def detect_env_managers() -> list[EnvManager]:
  function check_docker (line 128) | def check_docker() -> Optional[EnvManager]:
  function check_kt_env_exists (line 141) | def check_kt_env_exists(manager: str, env_name: str = "kt") -> bool:
  function get_kt_env_path (line 170) | def get_kt_env_path(manager: str, env_name: str = "kt") -> Optional[Path]:
  function detect_cuda_version (line 192) | def detect_cuda_version() -> Optional[str]:
  function detect_gpus (line 222) | def detect_gpus() -> list[GPUInfo]:
  function _parse_cuda_visible_devices (line 265) | def _parse_cuda_visible_devices(cuda_visible: str) -> list[int]:
  function detect_cpu_info (line 289) | def detect_cpu_info() -> CPUInfo:
  function _parse_cpu_flags (line 378) | def _parse_cpu_flags(flags: list[str]) -> list[str]:
  function _parse_cpu_list (line 446) | def _parse_cpu_list(cpulist: str) -> list[int]:
  function detect_memory_info (line 458) | def detect_memory_info() -> MemoryInfo:
  function _parse_dmidecode_memory (line 491) | def _parse_dmidecode_memory(output: str) -> tuple[Optional[int], Optiona...
  function _detect_memory_frequency_sysfs (line 518) | def _detect_memory_frequency_sysfs() -> Optional[int]:
  function _parse_macos_memory (line 538) | def _parse_macos_memory(output: str) -> tuple[Optional[int], Optional[st...
  function detect_ram_gb (line 557) | def detect_ram_gb() -> float:
  function detect_available_ram_gb (line 583) | def detect_available_ram_gb() -> float:
  function detect_disk_space_gb (line 604) | def detect_disk_space_gb(path: str = "/") -> tuple[float, float]:
  function get_installed_package_version (line 615) | def get_installed_package_version(package_name: str) -> Optional[str]:
  function get_system_info (line 625) | def get_system_info() -> SystemInfo:
  function is_in_virtual_env (line 638) | def is_in_virtual_env() -> bool:
  function get_current_env_name (line 648) | def get_current_env_name() -> Optional[str]:
  class StorageLocation (line 662) | class StorageLocation:
  function scan_storage_locations (line 672) | def scan_storage_locations(min_size_gb: float = 50.0) -> list[StorageLoc...
  function _get_mount_points (line 765) | def _get_mount_points() -> list[str]:
  function _get_potential_model_paths (line 827) | def _get_potential_model_paths(mount_point: str) -> list[str]:
  function format_size_gb (line 855) | def format_size_gb(size_gb: float) -> str:
  class LocalModel (line 863) | class LocalModel:
  function scan_local_models (line 874) | def scan_local_models(search_paths: list[str], max_depth: int = 3) -> li...
  function _scan_directory_for_models (line 902) | def _scan_directory_for_models(
  function _detect_model_in_directory (line 926) | def _detect_model_in_directory(directory: str, entries: list) -> Optiona...
  function _get_directory_size (line 984) | def _get_directory_size(directory: str) -> int:
  function scan_models_in_location (line 1001) | def scan_models_in_location(location: StorageLocation, max_depth: int = ...
  class CPUBuildFeatures (line 1015) | class CPUBuildFeatures:
  function detect_cpu_build_features (line 1027) | def detect_cpu_build_features() -> CPUBuildFeatures:

FILE: kt-kernel/python/cli/utils/input_validators.py
  function prompt_int_with_retry (line 14) | def prompt_int_with_retry(
  function prompt_float_with_retry (line 76) | def prompt_float_with_retry(
  function prompt_choice_with_retry (line 126) | def prompt_choice_with_retry(
  function prompt_int_list_with_retry (line 154) | def prompt_int_list_with_retry(

FILE: kt-kernel/python/cli/utils/kv_cache_calculator.py
  function get_dtype_bytes (line 21) | def get_dtype_bytes(dtype_str: str) -> int:
  function get_kv_size_gb (line 34) | def get_kv_size_gb(
  function main (line 182) | def main():

FILE: kt-kernel/python/cli/utils/model_discovery.py
  function discover_and_register_global (line 22) | def discover_and_register_global(
  function discover_and_register_path (line 64) | def discover_and_register_path(
  function _create_and_register_model (line 124) | def _create_and_register_model(registry: UserModelRegistry, scanned_mode...
  function format_discovery_summary (line 182) | def format_discovery_summary(

FILE: kt-kernel/python/cli/utils/model_registry.py
  class ModelInfo (line 18) | class ModelInfo:
  class ModelRegistry (line 155) | class ModelRegistry:
    method __init__ (line 158) | def __init__(self):
    method _load_builtin_models (line 165) | def _load_builtin_models(self) -> None:
    method _load_user_models (line 170) | def _load_user_models(self) -> None:
    method _register (line 197) | def _register(self, model: ModelInfo) -> None:
    method get (line 205) | def get(self, name: str) -> Optional[ModelInfo]:
    method search (line 219) | def search(self, query: str, limit: int = 10) -> list[ModelInfo]:
    method _match_score (line 242) | def _match_score(self, query: str, model: ModelInfo) -> float:
    method list_all (line 279) | def list_all(self) -> list[ModelInfo]:
    method find_local_models (line 283) | def find_local_models(self, max_depth: int = 3) -> list[tuple[ModelInf...
  function get_registry (line 349) | def get_registry() -> ModelRegistry:
  function compute_deepseek_v3_gpu_experts (line 362) | def compute_deepseek_v3_gpu_experts(tensor_parallel_size: int, vram_per_...
  function compute_kimi_k2_thinking_gpu_experts (line 371) | def compute_kimi_k2_thinking_gpu_experts(tensor_parallel_size: int, vram...
  function compute_minimax_m2_gpu_experts (line 381) | def compute_minimax_m2_gpu_experts(tensor_parallel_size: int, vram_per_g...

FILE: kt-kernel/python/cli/utils/model_scanner.py
  class ScannedModel (line 17) | class ScannedModel:
    method size_gb (line 27) | def size_gb(self) -> float:
    method folder_name (line 32) | def folder_name(self) -> str:
  class ModelScanner (line 37) | class ModelScanner:
    method __init__ (line 40) | def __init__(self, min_size_gb: float = 10.0):
    method scan_directory (line 49) | def scan_directory(
    method scan_single_path (line 128) | def scan_single_path(self, path: Path) -> Optional[ScannedModel]:
    method _calculate_total_size (line 170) | def _calculate_total_size(self, directory: Path, filenames: List[str])...
  function scan_directory (line 196) | def scan_directory(
  function scan_single_path (line 214) | def scan_single_path(path: Path) -> Optional[ScannedModel]:
  function format_size (line 228) | def format_size(size_bytes: int) -> str:
  function find_files_fast (line 248) | def find_files_fast(mount_point: str, pattern: str, max_depth: int = 6, ...
  function is_valid_model_directory (line 281) | def is_valid_model_directory(directory: Path, min_size_gb: float = 10.0)...
  function scan_all_models_fast (line 330) | def scan_all_models_fast(mount_points: List[str], min_size_gb: float = 1...
  function get_root_subdirs (line 367) | def get_root_subdirs() -> List[str]:
  function scan_directory_for_models (line 414) | def scan_directory_for_models(directory: str, min_file_size_gb: float = ...
  function scan_all_models_with_info (line 484) | def scan_all_models_with_info(
  function find_model_roots_from_paths (line 533) | def find_model_roots_from_paths(model_paths: List[str]) -> Tuple[List[st...
  class ModelRootInfo (line 655) | class ModelRootInfo:
  function discover_models (line 663) | def discover_models(
  function _get_mount_points (line 701) | def _get_mount_points() -> List[str]:

FILE: kt-kernel/python/cli/utils/model_table_builder.py
  function format_model_size (line 15) | def format_model_size(model_path: Path, format_type: str) -> str:
  function format_repo_info (line 33) | def format_repo_info(model) -> str:
  function format_sha256_status (line 41) | def format_sha256_status(model, status_map: dict) -> str:
  function build_moe_gpu_table (line 46) | def build_moe_gpu_table(
  function build_amx_table (line 101) | def build_amx_table(
  function build_gguf_table (line 208) | def build_gguf_table(

FILE: kt-kernel/python/cli/utils/model_verifier.py
  function _compute_file_sha256 (line 15) | def _compute_file_sha256(file_path: Path) -> Tuple[str, str, float]:
  function check_huggingface_connectivity (line 36) | def check_huggingface_connectivity(timeout: int = 5) -> Tuple[bool, str]:
  function verify_model_integrity (line 62) | def verify_model_integrity(
  function calculate_local_sha256 (line 226) | def calculate_local_sha256(
  function fetch_model_sha256 (line 286) | def fetch_model_sha256(
  function _fetch_from_huggingface (line 330) | def _fetch_from_huggingface(
  function _fetch_from_modelscope (line 392) | def _fetch_from_modelscope(repo_id: str, revision: str, timeout: int | N...
  function verify_model_integrity_with_progress (line 426) | def verify_model_integrity_with_progress(
  function pre_operation_verification (line 652) | def pre_operation_verification(user_model, user_registry, operation_name...

FILE: kt-kernel/python/cli/utils/port_checker.py
  function is_port_available (line 9) | def is_port_available(host: str, port: int) -> bool:
  function find_available_port (line 40) | def find_available_port(host: str, start_port: int, max_attempts: int = ...

FILE: kt-kernel/python/cli/utils/quant_interactive.py
  function select_model_to_quantize (line 19) | def select_model_to_quantize() -> Optional[Any]:
  function configure_quantization_method (line 72) | def configure_quantization_method() -> Dict[str, str]:
  function configure_cpu_params (line 101) | def configure_cpu_params(max_cores: int, max_numa: int) -> Dict[str, Any]:
  function configure_output_path (line 130) | def configure_output_path(model: Any, method: str, numa_nodes: int) -> P...
  function calculate_quantized_size (line 170) | def calculate_quantized_size(source_path: Path, input_type: str, quant_m...
  function check_disk_space (line 203) | def check_disk_space(output_path: Path, required_size_gb: float) -> tupl...
  function interactive_quant_config (line 234) | def interactive_quant_config() -> Optional[Dict[str, Any]]:

FILE: kt-kernel/python/cli/utils/repo_detector.py
  function parse_readme_frontmatter (line 13) | def parse_readme_frontmatter(readme_path: Path) -> Optional[Dict]:
  function extract_repo_from_frontmatter (line 48) | def extract_repo_from_frontmatter(frontmatter: Dict) -> Optional[Tuple[s...
  function _extract_repo_from_url (line 120) | def _extract_repo_from_url(url: str) -> Optional[Tuple[str, str]]:
  function extract_repo_from_global_search (line 153) | def extract_repo_from_global_search(readme_path: Path) -> Optional[Tuple...
  function detect_repo_for_model (line 220) | def detect_repo_for_model(model_path: str) -> Optional[Tuple[str, str]]:
  function scan_models_for_repo (line 252) | def scan_models_for_repo(model_list) -> Dict:
  function format_detection_report (line 292) | def format_detection_report(results: Dict) -> str:
  function apply_detection_results (line 345) | def apply_detection_results(results: Dict, registry) -> int:

FILE: kt-kernel/python/cli/utils/run_configs.py
  class RunConfigManager (line 16) | class RunConfigManager:
    method __init__ (line 19) | def __init__(self):
    method _ensure_config_file (line 23) | def _ensure_config_file(self):
    method _load_data (line 29) | def _load_data(self) -> Dict:
    method _save_data (line 37) | def _save_data(self, data: Dict):
    method list_configs (line 42) | def list_configs(self, model_id: str) -> List[Dict[str, Any]]:
    method save_config (line 52) | def save_config(self, model_id: str, config: Dict[str, Any]):
    method delete_config (line 75) | def delete_config(self, model_id: str, config_index: int) -> bool:
    method get_config (line 98) | def get_config(self, model_id: str, config_index: int) -> Optional[Dic...

FILE: kt-kernel/python/cli/utils/run_interactive.py
  function get_gpu_info (line 28) | def get_gpu_info() -> List[Dict[str, Any]]:
  function select_model (line 60) | def select_model() -> Optional[Any]:
  function select_inference_method (line 162) | def select_inference_method(model: Any) -> Optional[Dict[str, Any]]:
  function _select_saved_config (line 238) | def _select_saved_config(model: Any, saved_configs: List[Dict]) -> Optio...
  function _build_command_preview (line 307) | def _build_command_preview(model: Any, cfg: Dict[str, Any]) -> List[str]:
  function _configure_raw_inference (line 378) | def _configure_raw_inference(model: Any) -> Dict[str, Any]:
  function _configure_amx_inference (line 409) | def _configure_amx_inference(model: Any) -> Optional[Dict[str, Any]]:
  function _configure_gguf_inference (line 468) | def _configure_gguf_inference(model: Any) -> Optional[Dict[str, Any]]:
  function configure_numa_and_cpu (line 509) | def configure_numa_and_cpu(method_config: Dict[str, Any]) -> Dict[str, i...
  function configure_gpu_experts (line 559) | def configure_gpu_experts(model: Any) -> int:
  function configure_kv_cache (line 597) | def configure_kv_cache(is_raw_inference: bool) -> Optional[Dict[str, int]]:
  function select_gpus_and_tp (line 627) | def select_gpus_and_tp(
  function configure_parsers (line 737) | def configure_parsers() -> Dict[str, Optional[str]]:
  function configure_host_and_port (line 771) | def configure_host_and_port() -> Dict[str, Any]:
  function save_config_prompt (line 823) | def save_config_prompt(model: Any, full_config: Dict[str, Any]) -> bool:
  function interactive_run_config (line 886) | def interactive_run_config() -> Optional[Dict[str, Any]]:
  function _display_config_summary (line 1052) | def _display_config_summary(config: Dict[str, Any]):

FILE: kt-kernel/python/cli/utils/sglang_checker.py
  function check_sglang_installation (line 18) | def check_sglang_installation() -> dict:
  function get_sglang_install_instructions (line 155) | def get_sglang_install_instructions(lang: Optional[str] = None) -> str:
  function print_sglang_install_instructions (line 211) | def print_sglang_install_instructions() -> None:
  function check_sglang_and_warn (line 217) | def check_sglang_and_warn() -> bool:
  function _get_sglang_kt_kernel_cache_path (line 241) | def _get_sglang_kt_kernel_cache_path() -> Path:
  function _is_sglang_kt_kernel_cache_valid (line 248) | def _is_sglang_kt_kernel_cache_valid() -> bool:
  function _save_sglang_kt_kernel_cache (line 268) | def _save_sglang_kt_kernel_cache(supported: bool) -> None:
  function clear_sglang_kt_kernel_cache (line 277) | def clear_sglang_kt_kernel_cache() -> None:
  function check_sglang_kt_kernel_support (line 287) | def check_sglang_kt_kernel_support(use_cache: bool = True, silent: bool ...
  function print_sglang_kt_kernel_instructions (line 369) | def print_sglang_kt_kernel_instructions() -> None:

FILE: kt-kernel/python/cli/utils/tuna_engine.py
  function get_num_experts (line 20) | def get_num_experts(model_path: Path) -> int:
  function detect_oom (line 58) | def detect_oom(log_line: Optional[str]) -> bool:
  function test_config (line 87) | def test_config(
  function test_inference (line 284) | def test_inference(port: int, verbose: bool = False) -> bool:
  function find_max_gpu_experts (line 338) | def find_max_gpu_experts(
  function run_tuna (line 389) | def run_tuna(

FILE: kt-kernel/python/cli/utils/user_model_registry.py
  class UserModel (line 20) | class UserModel:
    method __post_init__ (line 42) | def __post_init__(self):
    method to_dict (line 49) | def to_dict(self) -> Dict[str, Any]:
    method from_dict (line 54) | def from_dict(cls, data: Dict[str, Any]) -> "UserModel":
    method path_exists (line 58) | def path_exists(self) -> bool:
  class UserModelRegistry (line 63) | class UserModelRegistry:
    method __init__ (line 66) | def __init__(self, registry_file: Optional[Path] = None):
    method load (line 83) | def load(self) -> None:
    method save (line 121) | def save(self) -> None:
    method add_model (line 131) | def add_model(self, model: UserModel) -> None:
    method remove_model (line 147) | def remove_model(self, name: str) -> bool:
    method update_model (line 165) | def update_model(self, name: str, updates: Dict[str, Any]) -> bool:
    method get_model (line 188) | def get_model(self, name: str) -> Optional[UserModel]:
    method get_model_by_id (line 203) | def get_model_by_id(self, model_id: str) -> Optional[UserModel]:
    method list_models (line 218) | def list_models(self) -> List[UserModel]:
    method find_by_path (line 227) | def find_by_path(self, path: str) -> Optional[UserModel]:
    method check_name_conflict (line 246) | def check_name_conflict(self, name: str, exclude_name: Optional[str] =...
    method refresh_status (line 262) | def refresh_status(self) -> Dict[str, List[str]]:
    method get_model_count (line 280) | def get_model_count(self) -> int:
    method suggest_name (line 284) | def suggest_name(self, base_name: str) -> str:

FILE: kt-kernel/python/experts.py
  class KTMoEWrapper (line 26) | class KTMoEWrapper:
    method __new__ (line 53) | def __new__(
    method set_capture_batch_sizes (line 124) | def set_capture_batch_sizes(capture_bs: List[int]):
    method get_capture_batch_sizes (line 137) | def get_capture_batch_sizes() -> List[int]:
    method clear_buffer_cache (line 147) | def clear_buffer_cache():

FILE: kt-kernel/python/experts_base.py
  function generate_gpu_experts_masks (line 21) | def generate_gpu_experts_masks(
  class KExpertsCPUBuffer (line 75) | class KExpertsCPUBuffer:
    method get_buffer (line 89) | def get_buffer(cls, hidden_states: torch.Tensor, num_experts_per_tok):
  class BaseMoEWrapper (line 143) | class BaseMoEWrapper(ABC):
    method __init__ (line 152) | def __init__(
    method load_weights_from_tensors (line 241) | def load_weights_from_tensors(
    method load_weights (line 260) | def load_weights(self, physical_to_logical_map_cpu: torch.Tensor):
    method select_deferred_experts (line 269) | def select_deferred_experts(
    method submit_forward (line 299) | def submit_forward(
    method sync_forward (line 379) | def sync_forward(self, hidden_states: torch.Tensor, cuda_stream) -> to...
    method forward (line 407) | def forward(
    method set_capture_batch_sizes (line 430) | def set_capture_batch_sizes(capture_bs: List[int]):
    method get_capture_batch_sizes (line 446) | def get_capture_batch_sizes() -> List[int]:
    method clear_buffer_cache (line 456) | def clear_buffer_cache():

FILE: kt-kernel/python/utils/amx.py
  class AMXMoEWrapper (line 27) | class AMXMoEWrapper(BaseMoEWrapper):
    method __init__ (line 35) | def __init__(
    method load_weights_from_tensors (line 123) | def load_weights_from_tensors(
    method load_weights (line 180) | def load_weights(self, physical_to_logical_map_cpu: torch.Tensor):
  class NativeMoEWrapper (line 322) | class NativeMoEWrapper(BaseMoEWrapper):
    method __init__ (line 327) | def __init__(
    method load_weights_from_tensors (line 405) | def load_weights_from_tensors(
    method load_weights (line 414) | def load_weights(self, physical_to_logical_map_cpu: torch.Tensor):
    method submit_write_weight_scale_to_buffer (line 544) | def submit_write_weight_scale_to_buffer(
    method sync_write_weight_scale_to_buffer (line 579) | def sync_write_weight_scale_to_buffer(self):

FILE: kt-kernel/python/utils/llamafile.py
  class LlamafileMoEWrapper (line 21) | class LlamafileMoEWrapper(BaseMoEWrapper):
    method __init__ (line 29) | def __init__(
    method load_weights_from_tensors (line 140) | def load_weights_from_tensors(
    method load_weights (line 156) | def load_weights(self, physical_to_logical_map_cpu: Optional[torch.Ten...

FILE: kt-kernel/python/utils/loader.py
  class GGMLQuantizationType (line 19) | class GGMLQuantizationType(IntEnum):
  function translate_name_to_gguf (line 53) | def translate_name_to_gguf(name):
  class SafeTensorLoader (line 102) | class SafeTensorLoader:
    method __init__ (line 114) | def __init__(self, file_path: str):
    method __load_tensor_file_map (line 117) | def __load_tensor_file_map(self, file_path: str):
    method load_tensor (line 156) | def load_tensor(self, key: str, device: str = "cpu"):
    method close_all_handles (line 166) | def close_all_handles(self):
    method load_experts (line 171) | def load_experts(self, base_key: str, device: str = "cpu"):
    method has_tensor (line 236) | def has_tensor(self, name: str):
  class FP8SafeTensorLoader (line 240) | class FP8SafeTensorLoader(SafeTensorLoader):
    method __init__ (line 262) | def __init__(self, file_path: str, scale_suffix: str = None):
    method _detect_format (line 283) | def _detect_format(self):
    method _get_experts_prefix_candidates (line 355) | def _get_experts_prefix_candidates(self, base_key: str) -> list[str]:
    method _get_proj_names (line 370) | def _get_proj_names(self):
    method load_tensor (line 375) | def load_tensor(self, key: str, device: str = "cpu"):
    method load_experts (line 387) | def load_experts(self, base_key: str, device: str = "cpu"):
    method is_per_channel (line 454) | def is_per_channel(self) -> bool:
  class BF16SafeTensorLoader (line 459) | class BF16SafeTensorLoader(SafeTensorLoader):
    method __init__ (line 476) | def __init__(self, file_path: str):
    method _detect_format (line 481) | def _detect_format(self):
    method _get_experts_prefix_candidates (line 511) | def _get_experts_prefix_candidates(self, base_key: str) -> list[str]:
    method _get_proj_names (line 522) | def _get_proj_names(self):
    method load_tensor (line 527) | def load_tensor(self, key: str, device: str = "cpu"):
    method load_experts (line 539) | def load_experts(self, base_key: str, device: str = "cpu"):
    method _resolve_packed_experts_prefix (line 579) | def _resolve_packed_experts_prefix(self, base_key: str) -> str:
    method _load_experts_packed (line 596) | def _load_experts_packed(self, base_key: str, device: str = "cpu"):
  class CompressedSafeTensorLoader (line 623) | class CompressedSafeTensorLoader(SafeTensorLoader):
    method load_experts (line 626) | def load_experts(self, base_key: str, device: str = "cpu"):
  class GGUFLoader (line 678) | class GGUFLoader:
    method __init__ (line 685) | def __init__(self, gguf_path: str):
    method _load_single_file (line 719) | def _load_single_file(self, file_path: str):
    method _load_directory (line 745) | def _load_directory(self, dir_path: str):
    method get_model_config (line 782) | def get_model_config(self, layer_idx: int = 0):
    method print_metadata (line 870) | def print_metadata(self, filter_keywords=None):
    method has_tensor (line 890) | def has_tensor(self, name: str):
    method get_ggml_type (line 895) | def get_ggml_type(self, name: str):
    method get_undequanted_tensor_and_ggml_type (line 902) | def get_undequanted_tensor_and_ggml_type(self, name: str):

FILE: kt-kernel/python/utils/moe_kernel.py
  class GeneralMoEWrapper (line 29) | class GeneralMoEWrapper(BaseMoEWrapper):
    method __init__ (line 37) | def __init__(
    method load_weights_from_tensors (line 123) | def load_weights_from_tensors(
    method load_weights (line 180) | def load_weights(self, physical_to_logical_map_cpu: torch.Tensor):

FILE: kt-kernel/scripts/check.py
  function safe_open_binary_to_tensor (line 21) | def safe_open_binary_to_tensor(file_path):
  function read_safetensor_keys_from_folder (line 42) | def read_safetensor_keys_from_folder(folder_path) -> dict:
  function read_amx_tensor_from_folder (line 84) | def read_amx_tensor_from_folder(folder_path, keys) -> dict:
  function _clean_keys (line 137) | def _clean_keys(keys):
  function combine_tensor_sources (line 145) | def combine_tensor_sources(safetensor_path, amx_path):
  function write_combined_tensor (line 164) | def write_combined_tensor(target_tensor_map: dict, output_path: str):
  function main (line 238) | def main():

FILE: kt-kernel/scripts/check_cpu_features.py
  function check_cpuinfo (line 19) | def check_cpuinfo():
  function main (line 29) | def main():

FILE: kt-kernel/scripts/compare_weights.py
  function unpack_awq_int32_to_int8 (line 25) | def unpack_awq_int32_to_int8(packed: np.ndarray, bits: int = 4) -> np.nd...
  function normalize_tensor_dtype (line 54) | def normalize_tensor_dtype(tensor: np.ndarray, tensor_name: str, is_awq:...
  function load_kt_binary (line 121) | def load_kt_binary(file_path: str) -> np.ndarray:
  function detect_weight_format (line 145) | def detect_weight_format(path: str) -> str:
  function detect_awq_format (line 170) | def detect_awq_format(weights_sample: Dict[str, np.ndarray]) -> bool:
  function load_safetensor_weights (line 196) | def load_safetensor_weights(path: str) -> Dict[str, np.ndarray]:
  function load_kt_weights (line 240) | def load_kt_weights(path: str) -> Dict[str, np.ndarray]:
  function normalize_key (line 312) | def normalize_key(key: str) -> Tuple[int, str, int, str]:
  function compare_weights (line 336) | def compare_weights(
  function main (line 474) | def main():

FILE: kt-kernel/scripts/convert_cpu_weights.py
  function weight_dequant_kernel (line 35) | def weight_dequant_kernel(x_ptr, s_ptr, y_ptr, M, N, BLOCK_SIZE: tl.cons...
  function weight_dequant (line 49) | def weight_dequant(x: torch.Tensor, s: torch.Tensor, block_size: int = 1...
  function load_model_config (line 59) | def load_model_config(input_path: str, input_type: str = None) -> Dict:
  function pack (line 123) | def pack(imatrix: torch.Tensor):
  function unpack (line 145) | def unpack(qmatrix: torch.Tensor):
  function reverse_awq_interleaving (line 167) | def reverse_awq_interleaving(imatrix: torch.Tensor):
  function unpack_reverse_awq_interleaving (line 179) | def unpack_reverse_awq_interleaving(qweight: torch.Tensor, qzeros: torch...
  function pack_column_major_1d (line 209) | def pack_column_major_1d(iweights: torch.Tensor, izeros: torch.Tensor = ...
  class ConverterBase (line 235) | class ConverterBase:
    method __init__ (line 242) | def __init__(
    method _load_input_files (line 273) | def _load_input_files(self):
    method _load_tensor (line 308) | def _load_tensor(self, key: str) -> torch.Tensor:
    method _find_expert_layers (line 318) | def _find_expert_layers(self) -> Dict[int, List[int]]:
    method _convert_layer_experts (line 364) | def _convert_layer_experts(self, layer_idx: int, expert_ids: List[int]...
    method convert (line 371) | def convert(self, resume_layer: int = 0):
    method _copy_config_files (line 458) | def _copy_config_files(self):
    method close (line 471) | def close(self):
  class AWQToColumnMajorConverter (line 476) | class AWQToColumnMajorConverter(ConverterBase):
    method _convert_layer_experts (line 480) | def _convert_layer_experts(self, layer_idx: int, expert_ids: List[int]...
  class OnlineQuantConverter (line 549) | class OnlineQuantConverter(ConverterBase):
    method __init__ (line 556) | def __init__(
    method _dequantize_fp8_blockwise (line 578) | def _dequantize_fp8_blockwise(self, fp8_weight: torch.Tensor, scale_in...
    method _load_binary_tensor (line 609) | def _load_binary_tensor(self, file_path: str) -> torch.Tensor:
    method _load_layer_tensors_from_disk (line 635) | def _load_layer_tensors_from_disk(self, layer_idx: int) -> Dict[str, t...
    method _remove_layer_folder (line 700) | def _remove_layer_folder(self, layer_idx: int):
    method _convert_layer_experts (line 713) | def _convert_layer_experts(self, layer_idx: int, expert_ids: List[int]...
  function main (line 923) | def main():

FILE: kt-kernel/scripts/convert_gpu_weights.py
  function parse_args (line 52) | def parse_args():
  function setup_environment (line 149) | def setup_environment(force_cpu=False):
  function get_torch_dtype (line 171) | def get_torch_dtype(dtype_str):
  function check_dense_layers_and_update_ignore (line 185) | def check_dense_layers_and_update_ignore(model_id, ignore_patterns, trus...
  function load_and_prepare_dataset (line 238) | def load_and_prepare_dataset(dataset_name, dataset_split, num_samples, m...
  function main (line 280) | def main():

FILE: kt-kernel/scripts/convert_kimi_k2_fp8_to_bf16_cpu.py
  function weight_dequant_cpu (line 13) | def weight_dequant_cpu(x: torch.Tensor, s: torch.Tensor, block_size: int...
  function main (line 32) | def main(fp8_path, bf16_path):

FILE: kt-kernel/scripts/convert_moe_to_bf16.py
  function _load_config (line 13) | def _load_config(model_dir: str, config_path: Optional[str]) -> Tuple[in...
  function _dequantize_tensor (line 29) | def _dequantize_tensor(
  function _is_quantized_weight_key (line 56) | def _is_quantized_weight_key(key: str) -> bool:
  function convert_file (line 67) | def convert_file(
  function parse_args (line 132) | def parse_args() -> argparse.Namespace:
  function main (line 159) | def main():

FILE: kt-kernel/setup.py
  function _env_get_bool (line 63) | def _env_get_bool(name: str, default: bool | None = None) -> bool | None:
  function _cmake_onoff (line 75) | def _cmake_onoff(flag: bool) -> str:
  function _forward_bool_env (line 79) | def _forward_bool_env(cmake_args: list[str], env_name: str, cmake_flag: ...
  function _forward_str_env (line 89) | def _forward_str_env(cmake_args: list[str], env_name: str, cmake_flag: s...
  function default_build_type (line 112) | def default_build_type() -> str:
  function detect_parallel_jobs (line 116) | def detect_parallel_jobs() -> str:
  function cpu_feature_flags (line 127) | def cpu_feature_flags() -> list[str]:
  class CMakeExtension (line 137) | class CMakeExtension(Extension):
    method __init__ (line 138) | def __init__(self, name: str, sourcedir: str = ""):
  class CMakeBuild (line 143) | class CMakeBuild(build_ext):
    method run (line 144) | def run(self):
    method detect_cpu_info (line 152) | def detect_cpu_info(self) -> dict:
    method build_extension (line 244) | def build_extension(self, ext: CMakeExtension):
    method build_multi_variants (line 258) | def build_multi_variants(self, ext: CMakeExtension):
    method _build_single_variant (line 432) | def _build_single_variant(self, ext: CMakeExtension):
    method _build_single_variant_impl (line 441) | def _build_single_variant_impl(self, ext: CMakeExtension, extdir: Path...

FILE: kt-kernel/test/ci/ci_register.py
  class HWBackend (line 8) | class HWBackend(Enum):
  class CIRegistry (line 15) | class CIRegistry:
  function register_cpu_ci (line 22) | def register_cpu_ci(est_time: float, suite: str):
  function register_cuda_ci (line 26) | def register_cuda_ci(est_time: float, suite: str):
  function register_amd_ci (line 30) | def register_amd_ci(est_time: float, suite: str):
  class RegistryVisitor (line 41) | class RegistryVisitor(ast.NodeVisitor):
    method __init__ (line 42) | def __init__(self, filename: str):
    method _collect_ci_registry (line 46) | def _collect_ci_registry(self, func_call: ast.Call):
    method visit_Module (line 77) | def visit_Module(self, node):
  function ut_parse_one_file (line 89) | def ut_parse_one_file(filename: str) -> List[CIRegistry]:
  function collect_tests (line 98) | def collect_tests(files: list[str], sanity_check: bool = True) -> List[C...

FILE: kt-kernel/test/ci/ci_utils.py
  function kill_process_tree (line 9) | def kill_process_tree(parent_pid, include_parent: bool = True, skip_pid:...
  class TestFile (line 49) | class TestFile:
  function run_with_timeout (line 54) | def run_with_timeout(
  function run_unittest_files (line 78) | def run_unittest_files(

FILE: kt-kernel/test/per_commit/test_amd_placeholder.py
  function test_amd_placeholder (line 24) | def test_amd_placeholder():

FILE: kt-kernel/test/per_commit/test_basic_cpu.py
  function test_kt_kernel_import (line 29) | def test_kt_kernel_import():
  function test_cpu_infer_initialization (line 38) | def test_cpu_infer_initialization():
  function test_basic_module_attributes (line 49) | def test_basic_module_attributes():
  function run_all_tests (line 58) | def run_all_tests():

FILE: kt-kernel/test/per_commit/test_cuda_placeholder.py
  function test_cuda_placeholder (line 24) | def test_cuda_placeholder():

FILE: kt-kernel/test/per_commit/test_moe_amx_accuracy_int4.py
  function act_fn (line 42) | def act_fn(x):
  function mlp_torch (line 47) | def mlp_torch(input, gate_proj, up_proj, down_proj):
  function moe_torch (line 56) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj):
  function test_moe_amx_int4_accuracy (line 92) | def test_moe_amx_int4_accuracy():
  function run_all_tests (line 184) | def run_all_tests():

FILE: kt-kernel/test/per_commit/test_moe_amx_accuracy_int4_1.py
  function act_fn (line 42) | def act_fn(x):
  function mlp_torch (line 47) | def mlp_torch(input, gate_proj, up_proj, down_proj):
  function moe_torch (line 56) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj):
  function test_moe_amx_int4_1_accuracy (line 92) | def test_moe_amx_int4_1_accuracy():
  function run_all_tests (line 184) | def run_all_tests():

FILE: kt-kernel/test/per_commit/test_moe_amx_accuracy_int4_1k.py
  function act_fn (line 43) | def act_fn(x):
  function mlp_torch (line 48) | def mlp_torch(input, gate_proj, up_proj, down_proj):
  function moe_torch (line 57) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj):
  function test_moe_amx_int4_1k_accuracy (line 93) | def test_moe_amx_int4_1k_accuracy():
  function run_all_tests (line 188) | def run_all_tests():

FILE: kt-kernel/test/per_commit/test_moe_amx_accuracy_int8.py
  function act_fn (line 42) | def act_fn(x):
  function mlp_torch (line 47) | def mlp_torch(input, gate_proj, up_proj, down_proj):
  function moe_torch (line 56) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj):
  function test_moe_amx_int8_accuracy (line 92) | def test_moe_amx_int8_accuracy():
  function run_all_tests (line 182) | def run_all_tests():

FILE: kt-kernel/test/per_commit/test_moe_amx_bench_int4.py
  function get_git_commit (line 56) | def get_git_commit():
  function get_system_info (line 79) | def get_system_info():
  function record_results (line 131) | def record_results(result, filename):
  function test_moe_amx_int4_benchmark (line 138) | def test_moe_amx_int4_benchmark():
  function run_all_tests (line 296) | def run_all_tests():

FILE: kt-kernel/test/per_commit/test_moe_amx_bench_int4_1.py
  function get_git_commit (line 56) | def get_git_commit():
  function get_system_info (line 79) | def get_system_info():
  function record_results (line 131) | def record_results(result, filename):
  function test_moe_amx_int4_1_benchmark (line 138) | def test_moe_amx_int4_1_benchmark():
  function run_all_tests (line 296) | def run_all_tests():

FILE: kt-kernel/test/per_commit/test_moe_amx_bench_int4_1k.py
  function get_git_commit (line 58) | def get_git_commit():
  function get_system_info (line 81) | def get_system_info():
  function record_results (line 133) | def record_results(result, filename):
  function test_moe_amx_int4_1k_benchmark (line 140) | def test_moe_amx_int4_1k_benchmark():
  function run_all_tests (line 308) | def run_all_tests():

FILE: kt-kernel/test/per_commit/test_moe_amx_bench_int8.py
  function get_git_commit (line 56) | def get_git_commit():
  function get_system_info (line 79) | def get_system_info():
  function record_results (line 131) | def record_results(result, filename):
  function test_moe_amx_int8_benchmark (line 138) | def test_moe_amx_int8_benchmark():
  function run_all_tests (line 296) | def run_all_tests():

FILE: kt-kernel/test/run_suite.py
  function _filter_tests (line 22) | def _filter_tests(
  function run_per_commit (line 34) | def run_per_commit(hw: HWBackend, suite: str):
  function main (line 48) | def main():

FILE: kt-kernel/test/test_generate_gpu_experts_masks.py
  function test_basic (line 14) | def test_basic():
  function test_edge_cases (line 46) | def test_edge_cases():
  function test_performance (line 80) | def test_performance():
  function test_output_properties (line 117) | def test_output_properties():
  function test_determinism (line 138) | def test_determinism():

FILE: kt-sft/csrc/custom_marlin/binding.cpp
  function PYBIND11_MODULE (line 20) | PYBIND11_MODULE(vLLMMarlin, m) {

FILE: kt-sft/csrc/custom_marlin/test_cuda_graph.py
  function setup_seed (line 14) | def setup_seed(seed):
  function get_usable_mem (line 33) | def get_usable_mem():
  function exp_range (line 42) | def exp_range(start, stop, step = 2):
  function timing (line 48) | def timing(func, iters, epochs=100):
  class LinearMarlin (line 88) | class LinearMarlin(nn.Linear):
    method __init__ (line 94) | def __init__(
    method forward (line 168) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor) -> torch....
  function benchLinearMarlin (line 208) | def benchLinearMarlin(input_dim, output_dim):#, out_file
  function printMinMax (line 314) | def printMinMax(tensor):

FILE: kt-sft/csrc/custom_marlin/utils/format24.py
  function _calculate_meta_reordering_scatter_offsets (line 21) | def _calculate_meta_reordering_scatter_offsets(m, meta_ncols, meta_dtype,
  function sparse_semi_structured_from_dense_cutlass (line 52) | def sparse_semi_structured_from_dense_cutlass(dense):
  function sparse_semi_structured_to_dense_cutlass (line 184) | def sparse_semi_structured_to_dense_cutlass(sparse, meta_reordered):
  function mask_creator (line 279) | def mask_creator(tensor):

FILE: kt-sft/csrc/custom_marlin/utils/marlin_24_perms.py
  function get_perms_24 (line 21) | def get_perms_24(num_bits: int):

FILE: kt-sft/csrc/custom_marlin/utils/marlin_perms.py
  function get_perms (line 21) | def get_perms(num_bits: int):

FILE: kt-sft/csrc/custom_marlin/utils/marlin_utils.py
  function is_marlin_supported (line 31) | def is_marlin_supported():
  function marlin_permute_weights (line 35) | def marlin_permute_weights(q_w, size_k, size_n, perm, tile=MARLIN_TILE):
  function marlin_weights (line 50) | def marlin_weights(q_w, size_k, size_n, num_bits, perm):
  function marlin_permute_scales (line 70) | def marlin_permute_scales(s, size_k, size_n, group_size, scale_perm,
  function marlin_quantize (line 81) | def marlin_quantize(
  function inject_24 (line 119) | def inject_24(w, size_k, size_n):
  function check_24 (line 127) | def check_24(w, num_rows_to_sample=50, _verbose=False):
  function compress_quantized_24_weight (line 154) | def compress_quantized_24_weight(q_24, size_k, size_n, num_bits):
  function marlin_24_quantize (line 177) | def marlin_24_quantize(
  function compute_max_diff (line 218) | def compute_max_diff(output, output_ref):
  class MarlinWorkspace (line 223) | class MarlinWorkspace:
    method __init__ (line 225) | def __init__(self, out_features, min_thread_n, max_parallel, device):

FILE: kt-sft/csrc/custom_marlin/utils/quant_utils.py
  function get_pack_factor (line 9) | def get_pack_factor(num_bits):
  function permute_rows (line 14) | def permute_rows(q_w: torch.Tensor, w_ref: torch.Tensor, group_size: int):
  function dequantize_weights (line 40) | def dequantize_weights(qweight, qzeros, scales, g_idx, bits=4, group_siz...
  function quantize_weights (line 67) | def quantize_weights(w: torch.Tensor, num_bits: int, group_size: int,
  function sort_weights (line 137) | def sort_weights(q_w: torch.Tensor, g_idx: torch.Tensor):
  function gptq_pack (line 153) | def gptq_pack(
  function gptq_unpack (line 176) | def gptq_unpack(

FILE: kt-sft/csrc/ktransformers_ext/bench/bench_attention.py
  function bench_linear (line 41) | def bench_linear(cache_seqlen: int):

FILE: kt-sft/csrc/ktransformers_ext/bench/bench_attention_torch.py
  function bench_linear (line 29) | def bench_linear(cache_seqlen: int, device):

FILE: kt-sft/csrc/ktransformers_ext/bench/bench_linear.py
  function bench_linear (line 28) | def bench_linear(quant_mode: str):

FILE: kt-sft/csrc/ktransformers_ext/bench/bench_linear_torch.py
  function bench_linear (line 26) | def bench_linear(quant_mode: str):

FILE: kt-sft/csrc/ktransformers_ext/bench/bench_mlp.py
  function bench_mlp (line 28) | def bench_mlp(quant_mode: str):

FILE: kt-sft/csrc/ktransformers_ext/bench/bench_mlp_torch.py
  function act_fn (line 26) | def act_fn(x):
  function mlp_torch (line 29) | def mlp_torch(input, gate_proj, up_proj, down_proj):
  function bench_mlp (line 47) | def bench_mlp(quant_mode: str):

FILE: kt-sft/csrc/ktransformers_ext/bench/bench_moe.py
  function bench_moe (line 31) | def bench_moe(quant_mode: str):

FILE: kt-sft/csrc/ktransformers_ext/bench/bench_moe_amx.py
  function bench_moe (line 29) | def bench_moe(quant_mode: str):

FILE: kt-sft/csrc/ktransformers_ext/bench/bench_moe_torch.py
  function act_fn (line 28) | def act_fn(x):
  function mlp_torch (line 31) | def mlp_torch(input, gate_proj, up_proj, down_proj):
  function moe_torch (line 49) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj):
  function bench_moe (line 80) | def bench_moe(quant_mode: str):

FILE: kt-sft/csrc/ktransformers_ext/cpu_backend/backend.cpp
  type bitmask (line 93) | struct bitmask

FILE: kt-sft/csrc/ktransformers_ext/cpu_backend/backend.h
  type ThreadStatus (line 21) | enum ThreadStatus {
  type ThreadState (line 27) | struct ThreadState {
  function class (line 33) | class Backend {

FILE: kt-sft/csrc/ktransformers_ext/cpu_backend/cpuinfer.h
  function class (line 36) | class CPUInfer {
  function submit (line 58) | void submit(std::pair<intptr_t, intptr_t> params) {
  function sync (line 65) | void sync() {
  function submit_with_cuda_stream (line 69) | void submit_with_cuda_stream(intptr_t user_cuda_stream, std::pair<intptr...
  function sync_ (line 80) | static void sync_(void* cpu_infer_ptr) {
  function sync_with_cuda_stream (line 85) | void sync_with_cuda_stream(intptr_t user_cuda_stream) {

FILE: kt-sft/csrc/ktransformers_ext/cpu_backend/shared_mem_buffer.h
  function class (line 19) | class SharedMemBuffer {

FILE: kt-sft/csrc/ktransformers_ext/cpu_backend/task_queue.h
  function class (line 24) | class custom_mutex {
  function class (line 74) | class custom_condition_variable {
  function class (line 119) | class TaskQueue {

FILE: kt-sft/csrc/ktransformers_ext/cpu_backend/vendors/hip.h
  type hip_bfloat16 (line 172) | typedef hip_bfloat16 nv_bfloat16;

FILE: kt-sft/csrc/ktransformers_ext/cpu_backend/vendors/musa.h
  type mt_bfloat16 (line 137) | typedef mt_bfloat16 nv_bfloat16;

FILE: kt-sft/csrc/ktransformers_ext/cuda/binding.cpp
  function PYBIND11_MODULE (line 21) | PYBIND11_MODULE(KTransformersOps, m) {

FILE: kt-sft/csrc/ktransformers_ext/examples/test_mlp.py
  function act_fn (line 31) | def act_fn(x):
  function mlp_torch (line 34) | def mlp_torch(input, gate_proj, up_proj, down_proj):

FILE: kt-sft/csrc/ktransformers_ext/examples/test_moe.py
  function act_fn (line 34) | def act_fn(x):
  function mlp_torch (line 37) | def mlp_torch(input, gate_proj, up_proj, down_proj):
  function moe_torch (line 44) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj):

FILE: kt-sft/csrc/ktransformers_ext/examples/test_sft_amx_moe.py
  function act_fn (line 43) | def act_fn(x):
  function silu_fwd (line 46) | def silu_fwd(x: torch.Tensor) -> torch.Tensor:
  function silu_grad (line 49) | def silu_grad(x: torch.Tensor) -> torch.Tensor:
  class SiLU (line 54) | class SiLU(torch.autograd.Function):
    method forward (line 56) | def forward(ctx, inp):
    method backward (line 61) | def backward(ctx, grad_out):
  function mlp_torch (line 69) | def mlp_torch(x, gate, up, down, req_grad=False):
  function moe_torch (line 78) | def moe_torch(x, eid, w, gate, up, down, req_grad=False):
  function moe_backward_python (line 109) | def moe_backward_python(x, eid, w, gate, up, down, grad_output, gate_u_c...
  function test_amx_moe_two_round (line 316) | def test_amx_moe_two_round():
  function load_bf16 (line 459) | def load_bf16(stub, shape):
  function load_f16 (line 462) | def load_f16(stub, shape):
  function load_f32 (line 465) | def load_f32(stub, shape):
  function load_uint8 (line 468) | def load_uint8(stub, shape):
  function load_int8 (line 471) | def load_int8(stub, shape):
  function load_dump_tensor (line 476) | def load_dump_tensor(experts_idx: int, name: str, shape: tuple, Ename: s...
  function load_bin (line 494) | def load_bin(path, n, k):
  function check_nan (line 501) | def check_nan(name, shape):
  function get_tensor (line 536) | def get_tensor(name, shape) -> torch.Tensor:
  function check_py_cpp (line 551) | def check_py_cpp(name1, name2, shape):
  function manual_check (line 619) | def manual_check(experts_ids):

FILE: kt-sft/csrc/ktransformers_ext/examples/test_sft_moe.py
  function act_fn (line 37) | def act_fn(x):
  class SiLU (line 41) | class SiLU(torch.autograd.Function):
    method forward (line 43) | def forward(ctx, input):
    method backward (line 48) | def backward(ctx, grad_output):
  function mlp_torch (line 55) | def mlp_torch(input, gate_proj, up_proj, down_proj, requires_grad=False):
  function moe_torch (line 68) | def moe_torch(input, expert_ids, weights, gate_proj, up_proj, down_proj,...
  function test_forward (line 100) | def test_forward():
  function test_backward (line 150) | def test_backward():
  function test_backward_2round_with_tflops (line 284) | def test_backward_2round_with_tflops():
  function test_backward_10round_5layer (line 481) | def test_backward_10round_5layer():
  function test_backward_one_vs_many_comparison (line 604) | def test_backward_one_vs_many_comparison():

FILE: kt-sft/csrc/ktransformers_ext/ext_bindings.cpp
  class KVCacheBindings (line 39) | class KVCacheBindings {
    class AttnBindings (line 41) | class AttnBindings {
      type Args (line 43) | struct Args {
      method inner (line 60) | static void inner(void *args) {
      method cpuinfer_interface (line 69) | static std::pair<intptr_t, intptr_t>
    class GetAllKVCacheOneLayerBindings (line 95) | class GetAllKVCacheOneLayerBindings {
      type Args (line 97) | struct Args {
      method inner (line 104) | static void inner(void *args) {
      method cpuinfer_interface (line 110) | static std::pair<intptr_t, intptr_t>
    class GetAndUpdateKVCacheFp16Bindings (line 119) | class GetAndUpdateKVCacheFp16Bindings {
      type Args (line 121) | struct Args {
      method inner (line 133) | static void inner(void *args) {
      method cpuinfer_interface (line 141) | static std::pair<intptr_t, intptr_t>
    class GetKVCacheFp16Bindings (line 159) | class GetKVCacheFp16Bindings {
      type Args (line 161) | struct Args {
      method inner (line 172) | static void inner(void *args) {
      method cpuinfer_interface (line 179) | static std::pair<intptr_t, intptr_t>
    class UpdateKVCacheFp16Bindings (line 196) | class UpdateKVCacheFp16Bindings {
      type Args (line 198) | struct Args {
      method inner (line 210) | static void inner(void *args) {
      method cpuinfer_interface (line 218) | static std::pair<intptr_t, intptr_t>
    class UpdateImportanceBindings (line 237) | class UpdateImportanceBindings {
      type Args (line 239) | struct Args {
      method inner (line 250) | static void inner(void *args) {
      method cpuinfer_interface (line 257) | static std::pair<intptr_t, intptr_t>
    class AttnWithKVCacheBindings (line 274) | class AttnWithKVCacheBindings {
      type Args (line 276) | struct Args {
      method inner (line 294) | static void inner(void *args) {
      method cpuinfer_interface (line 303) | static std::pair<intptr_t, intptr_t>
    class ClearImportanceAllLayersBindings (line 330) | class ClearImportanceAllLayersBindings {
      type Args (line 332) | struct Args {
      method inner (line 340) | static void inner(void *args) {
      method cpuinfer_interface (line 347) | static std::pair<intptr_t, intptr_t>
    class CalcAnchorAllLayersBindinds (line 361) | class CalcAnchorAllLayersBindinds {
      type Args (line 363) | struct Args {
      method inner (line 371) | static void inner(void *args) {
      method cpuinfer_interface (line 378) | static std::pair<intptr_t, intptr_t>
    class LoadKVCacheBindings (line 392) | class LoadKVCacheBindings {
      type Args (line 394) | struct Args {
      method inner (line 399) | static void inner(void *args) {
      method cpuinfer_interface (line 404) | static std::pair<intptr_t, intptr_t>
    class DumpKVCacheBindings (line 411) | class DumpKVCacheBindings {
      type Args (line 413) | struct Args {
      method inner (line 420) | static void inner(void *args) {
      method cpuinfer_interface (line 426) | static std::pair<intptr_t, intptr_t>
  class LinearBindings (line 437) | class LinearBindings {
    class WarmUpBindinds (line 439) | class WarmUpBindinds {
      type Args (line 441) | struct Args {
      method inner (line 445) | static void inner(void *args) {
      method cpuinfer_interface (line 449) | static std::pair<intptr_t, intptr_t>
    class ForwardBindings (line 455) | class ForwardBindings {
      type Args (line 457) | struct Args {
      method inner (line 464) | static void inner(void *args) {
      method cpuinfer_interface (line 469) | static std::pair<intptr_t, intptr_t>
  class MLPBindings (line 479) | class MLPBindings {
    class WarmUpBindinds (line 481) | class WarmUpBindinds {
      type Args (line 483) | struct Args {
      method inner (line 487) | static void inner(void *args) {
      method cpuinfer_interface (line 491) | static std::pair<intptr_t, intptr_t> cpuinfer_interface(MLP &mlp) {
    class ForwardBindings (line 496) | class ForwardBindings {
      type Args (line 498) | struct Args {
      method inner (line 505) | static void inner(void *args) {
      method cpuinfer_interface (line 510) | static std::pair<intptr_t, intptr_t>
  class MOEBindings (line 520) | class MOEBindings {
    class WarmUpBindinds (line 522) | class WarmUpBindinds {
      type Args (line 524) | struct Args {
      method inner (line 528) | static void inner(void *args) {
      method cpuinfer_interface (line 532) | static std::pair<intptr_t, intptr_t> cpuinfer_interface(MOE &moe) {
    class ForwardBindings (line 537) | class ForwardBindings {
      type Args (line 539) | struct Args {
      method inner (line 550) | static void inner(void *args) {
      method cpuinfer_interface (line 556) | static std::pair<intptr_t, intptr_t>
  function sft_moe_forward_wrapper (line 574) | inline void sft_moe_forward_wrapper(
  function sft_moe_backward_wrapper (line 590) | inline void sft_moe_backward_wrapper(
  class SFT_MOEBindings (line 608) | class SFT_MOEBindings {
    class WarmUpBindinds (line 610) | class WarmUpBindinds {
      type Args (line 612) | struct Args {
      method inner (line 616) | static void inner(void *args) {
      method cpuinfer_interface (line 620) | static std::pair<intptr_t, intptr_t> cpuinfer_interface(SFT_MOE &moe) {
    class ForwardBindings (line 625) | class ForwardBindings {
      type Args (line 627) | struct Args {
      method inner (line 643) | static void inner(void *args) {
      method cpuinfer_interface (line 654) | static std::pair<intptr_t, intptr_t>
    class BackwardBindings (line 669) | class BackwardBindings {
      type Args (line 671) | struct Args {
      method inner (line 694) | static void inner(void *args) {
      method cpuinfer_interface (line 708) | static std::pair<intptr_t, intptr_t> cpuinfer_interface(
  class AMX_MOEBindings (line 731) | class AMX_MOEBindings {
    class WarmUpBindings (line 733) | class WarmUpBindings {
      type Args (line 735) | struct Args {
      method inner (line 739) | static void inner(void *args) {
      method cpuinfer_interface (line 743) | static std::pair<intptr_t, intptr_t> cpuinfer_interface(AMX_MOE<T> &...
    class LoadWeightsBindings (line 748) | class LoadWeightsBindings {
      type Args (line 750) | struct Args {
      method inner (line 754) | static void inner(void *args) {
      method cpuinfer_interface (line 758) | static std::pair<intptr_t, intptr_t> cpuinfer_interface(AMX_MOE<T> &...
    class ForwardBindings (line 763) | class ForwardBindings {
      type Args (line 765) | struct Args {
      method inner (line 776) | static void inner(void *args) {
      method cpuinfer_interface (line 782) | static std::pair<intptr_t, intptr_t>
  class SFT_AMX_MOEBindings (line 802) | class SFT_AMX_MOEBindings {
    class WarmUpBindings (line 804) | class WarmUpBindings {
      type Args (line 806) | struct Args {
      method inner (line 810) | static void inner(void *args) {
      method cpuinfer_interface (line 814) | static std::pair<intptr_t, intptr_t> cpuinfer_interface(SFT_AMX_MOE<...
    class LoadWeightsBindings (line 819) | class LoadWeightsBindings {
      type Args (line 821) | struct Args {
      method inner (line 825) | static void inner(void *args) {
      method cpuinfer_interface (line 829) | static std::pair<intptr_t, intptr_t> cpuinfer_interface(SFT_AMX_MOE<...
    class ForwardBindings (line 834) | class ForwardBindings {
      type Args (line 836) | struct Args {
      method inner (line 846) | static void inner(void *args) {
      method cpuinfer_interface (line 852) | static std::pair<intptr_t, intptr_t>
    class BackwardBindings (line 867) | class BackwardBindings {
      type Args (line 869) | struct Args {
      method inner (line 881) | static void inner(void *args) {
      method cpuinfer_interface (line 894) | static std::pair<intptr_t, intptr_t> cpuinfer_interface(
  function PYBIND11_MODULE (line 916) | PYBIND11_MODULE(cpuinfer_ext, m) {

FILE: kt-sft/csrc/ktransformers_ext/operators/amx/debug_sft_moe.hpp
  function __m512 (line 76) | static inline __m512 sigmoid(__m512 x) {
  function __m512 (line 83) | static inline __m512 act_fn_1(__m512 x) {
  function __m512 (line 88) | static inline __m512 act_fn_grad(__m512 x) {
  function int8_row_to_string (line 107) | std::string int8_row_to_string(const int8_t* row, int len) {
  type SFT_AMX_MOEConfig (line 116) | struct SFT_AMX_MOEConfig {
    method SFT_AMX_MOEConfig (line 126) | SFT_AMX_MOEConfig() {}
    method SFT_AMX_MOEConfig (line 128) | SFT_AMX_MOEConfig(int expert_num, int routed_expert_num, int hidden_si...
  class SFT_AMX_MOE (line 135) | class SFT_AMX_MOE {
    method SFT_AMX_MOE (line 207) | SFT_AMX_MOE(SFT_AMX_MOEConfig config) {
    method transpose_expert (line 384) | void transpose_expert(const void* src, void* dst, int R, int C, Backen...
    method load_weights (line 400) | void load_weights(Backend *backend) {
    method warm_up (line 536) | void warm_up(Backend *backend) {}
    method forward (line 538) | void forward(int qlen, int k, const uint64_t *expert_ids, const float ...
    method backward (line 696) | void backward(int qlen, int k, const uint64_t *expert_ids, const float...

FILE: kt-sft/csrc/ktransformers_ext/operators/amx/debug_tools_sft_moe.hpp
  function get_env_or_default (line 10) | inline std::string get_env_or_default(const char *var_name, const std::s...
  function dump_grad_bin (line 21) | inline void dump_grad_bin(const std::string &file_name,
  function dump_bin (line 55) | inline void dump_bin(std::string file_name, float *data, size_t count) {
  function dump_bin (line 62) | inline void dump_bin(std::string file_name, int64_t *data, size_t count) {
  function dump_bin (line 69) | inline void dump_bin(std::string file_name, uint8_t *data, size_t count) {

FILE: kt-sft/csrc/ktransformers_ext/operators/amx/la/amx.hpp
  type amx (line 41) | namespace amx {
    function enable_amx (line 63) | inline bool enable_amx() {
    type TileConfig (line 80) | struct alignas(64) TileConfig {
      method TileConfig (line 89) | TileConfig() {
      method set_row_col (line 97) | void set_row_col(int i, uint8_t row, uint16_t col) {
      method set_config (line 102) | void set_config() { _tile_loadconfig(this); }
      method load_data (line 104) | static void load_data(int to, void *from, size_t stride) {
      method store_data (line 135) | static void store_data(int from, void *to, size_t stride) {
    function debug_tile (line 169) | inline void debug_tile(int t) {
    function debug_tiles (line 182) | inline void debug_tiles(int to = 8) {
    function debug_m512 (line 188) | inline void debug_m512(__m512 x) {
    function transpose_16x16_32bit (line 198) | inline void transpose_16x16_32bit(__m512i *v) {
    function transpose_16x16_32bit (line 273) | inline void transpose_16x16_32bit(__m512i *v, size_t stride) {
    type GemmKernel224BF (line 348) | struct GemmKernel224BF {
      method recommended_nth (line 363) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO...
      method split_range_n (line 365) | static std::pair<int, int> split_range_n(int n, int ith, int nth) {
      method config (line 371) | static void config() {
      method load_a (line 390) | static void load_a(dt *a, size_t lda) {
      method load_b (line 395) | static void load_b(dt *b, size_t ldb) {
      method clean_c (line 400) | static void clean_c() {
      method load_c (line 407) | static void load_c(output_t *c, size_t ldc) {
      method store_c (line 414) | static void store_c(output_t *c, size_t ldc) {
      method run_tile (line 421) | static void run_tile() {
      type BufferA (line 428) | struct BufferA {
        method required_size (line 432) | static size_t required_size(int max_m, int k) { return max_m * k *...
        method BufferA (line 434) | BufferA(int max_m, int k, void *ptr) : max_m(max_m), k(k) {
        method from_mat (line 441) | void from_mat(int m, ggml_bf16_t *src, int ith, int nth) {
        method ggml_bf16_t (line 460) | ggml_bf16_t *get_submat(int m, int k, int m_begin, int k_begin) {
      type BufferB (line 469) | struct BufferB {
        method required_size (line 473) | static size_t required_size(int n, int k) { return n * k * sizeof(...
        method BufferB (line 475) | BufferB(int n, int k, void *ptr) : n(n), k(k) {
        method from_mat (line 482) | void from_mat(ggml_bf16_t *src, int ith, int nth) {
        method ggml_bf16_t (line 505) | ggml_bf16_t *get_submat(int n, int k, int n_begin, int k_begin) {
      type BufferC (line 516) | struct BufferC {
        method required_size (line 520) | static size_t required_size(int max_m, int n) { return max_m * n *...
        method BufferC (line 522) | BufferC(int max_m, int n, void *ptr) : max_m(max_m), n(n) {
        method to_mat (line 529) | void to_mat(int m, ggml_bf16_t *dst, int ith, int nth) {
    type GemmKernel224Int8 (line 558) | struct GemmKernel224Int8 {
      method recommended_nth (line 573) | static int recommended_nth(int n) { return (n + N_BLOCK - 1) / N_BLO...
      method split_range_n (line 575) | static std::pair<int, int> split_range_n(int n, int ith, int nth) {
      method config (line 581) | static void config() {
      method load_a (line 600) | static void load_a(dt *a, size_t lda) {
      method load_b (line 605) | static void load_b(dt *b, size_t ldb) {
      method clean_c (line 610) | static void clean_c() {
      method load_c (line 617) | static void load_c(output_t *c, size_t ldc) {
      method store_c (line 624) | static void store_c(output_t *c, size_t ldc) {
      method run_tile (line 631) | static void run_tile() {
      type BufferA (line 638) | struct BufferA {
        method required_size (line 643) | static size_t required_size(int max_m, int k) { return max_m * k *...
        method BufferA (line 645) | BufferA(int max_m, int k, void *ptr) : max_m(max_m), k(k) {
        method from_mat (line 653) | void from_mat(int m, ggml_bf16_t *src, int ith, int nth) {
      type BufferB (line 708) | struct BufferB {
        method required_size (line 713) | static size_t required_size(int n, int k) { return n * k * sizeof(...
        method BufferB (line 715) | BufferB(int n, int k, void *ptr) : n(n), k(k) {
        method from_mat (line 723) | void from_mat(ggml_bf16_t *src, int ith, int nth) {
      type BufferC (line 787) | struct BufferC {
        method required_size (line 791) | static size_t required_size(int max_m, int n) { return max_m * n *...
        method BufferC (line 793) | BufferC(int max_m, int n, void *ptr) : max_m(max_m), n(n) {
        method to_mat (line 800) | void to_mat(int m, ggml_bf16_t *dst, int ith, int nth) {
    function mat_mul (line 829) | inline void mat_mul(int m, int n, int k, std::shared_ptr<GemmKernel224...
    function __m512i (line 884) | inline __m512i _mm512_dpbssd_epi32(__m512i src, __m512i a, __m512i b) {
    function mat_mul (line 901) | inline void mat_mul(int m, int n, int k, std::shared_ptr<GemmKernel224...

FILE: kt-sft/csrc/ktransformers_ext/operators/amx/la/utils.hpp
  function T (line 16) | T* offset_pointer(T* ptr, std::size_t byte_offset) {
  function T (line 21) | const T* offset_pointer(const T* ptr, std::size_t byte_offset) {
  function T (line 26) | T* offset_pointer_row_major(T* t, int row, int col, std::size_t ld) {
  function T (line 31) | T* offset_pointer_col_major(T* t, int row, int col, std::size_t ld) {
  function avx512_copy_32xbf16 (line 35) | static inline void avx512_copy_32xbf16(__m512i* src, __m512i* dst) {
  function avx512_32xfp32_to_32xbf16 (line 39) | static inline void avx512_32xfp32_to_32xbf16(__m512* src0, __m512* src1,...
  function avx512_32xbf16_to_32xfp32 (line 43) | static inline void avx512_32xbf16_to_32xfp32(__m512i* src, __m512* dst0,...

FILE: kt-sft/csrc/ktransformers_ext/operators/amx/moe.hpp
  function __m512 (line 38) | static inline __m512 exp_avx512(__m512 x) {
  function __m512 (line 63) | static inline __m512 act_fn(__m512 gate_val, __m512 up_val) {
  type AMX_MOEConfig (line 72) | struct AMX_MOEConfig {
    method AMX_MOEConfig (line 82) | AMX_MOEConfig() {}
    method AMX_MOEConfig (line 84) | AMX_MOEConfig(int expert_num, int routed_expert_num, int hidden_size, ...
  class AMX_MOE (line 91) | class AMX_MOE {
    method AMX_MOE (line 128) | AMX_MOE(AMX_MOEConfig config) {
    method load_weights (line 223) | void load_weights(Backend *backend) {
    method warm_up (line 271) | void warm_up(Backend *backend) {}
    method forward (line 273) | void forward(int qlen, int k, const uint64_t *expert_ids, const float ...

FILE: kt-sft/csrc/ktransformers_ext/operators/amx/sft_moe.hpp
  function __m512 (line 42) | static inline __m512 sigmoid(__m512 x) {
  function __m512 (line 49) | static inline __m512 act_fn_1(__m512 x) {
  function __m512 (line 54) | static inline __m512 act_fn_grad(__m512 x) {
  type SFT_AMX_MOEConfig (line 63) | struct SFT_AMX_MOEConfig {
    method SFT_AMX_MOEConfig (line 73) | SFT_AMX_MOEConfig() {}
    method SFT_AMX_MOEConfig (line 75) | SFT_AMX_MOEConfig(int expert_num, int routed_expert_num, int hidden_si...
  class SFT_AMX_MOE (line 82) | class SFT_AMX_MOE {
    method SFT_AMX_MOE (line 160) | SFT_AMX_MOE(SFT_AMX_MOEConfig config) {
    method transpose_expert (line 357) | void transpose_expert(const void* src, void* dst, int R, int C, Backen...
    method load_weights (line 373) | void load_weights(Backend *backend) {
    method warm_up (line 463) | void warm_up(Backend *backend) {}
    method forward (line 465) | void forward(int qlen, int k, const uint64_t *expert_ids, const float ...
    method backward (line 588) | void backward(int qlen, int k, const uint64_t *expert_ids, const float...

FILE: kt-sft/csrc/ktransformers_ext/operators/kvcache/kvcache.h
  type AnchorType (line 63) | enum AnchorType {
  type RetrievalType (line 94) | enum RetrievalType {
  type KVCacheConfig (line 122) | struct KVCacheConfig {
  function class (line 193) | class KVCache {

FILE: kt-sft/csrc/ktransformers_ext/operators/kvcache/kvcache_utils.cpp
  function ggml_type_to_string (line 15) | std::string ggml_type_to_string(ggml_type type) {
  function AnchorTypeToString (line 28) | std::string AnchorTypeToString(AnchorType type) {
  function RetrievalTypeToString (line 43) | std::string RetrievalTypeToString(RetrievalType type) {
  function ggml_vec_scale_f32 (line 1130) | void ggml_vec_scale_f32(const int n, float *y, const float v) {

FILE: kt-sft/csrc/ktransformers_ext/operators/llamafile/conversion.h
  function to_float (line 16) | inline void to_float(const void* input, float* output, int size, ggml_ty...
  function from_float (line 24) | inline void from_float(const float* input, void* output, int size, ggml_...

FILE: kt-sft/csrc/ktransformers_ext/operators/llamafile/linear.h
  type LinearConfig (line 27) | struct LinearConfig {

FILE: kt-sft/csrc/ktransformers_ext/operators/llamafile/mlp.cpp
  function act_fn (line 49) | static float act_fn(float x) { return x / (1.0f + expf(-x)); }

FILE: kt-sft/csrc/ktransformers_ext/operators/llamafile/mlp.h
  type MLPConfig (line 27) | struct MLPConfig {

FILE: kt-sft/csrc/ktransformers_ext/operators/llamafile/moe.cpp
  function act_fn (line 133) | static float act_fn(float x) {

FILE: kt-sft/csrc/ktransformers_ext/operators/llamafile/moe.h
  type MOEConfig (line 27) | struct MOEConfig {

FILE: kt-sft/csrc/ktransformers_ext/operators/llamafile/sft_moe.cpp
  function act_fn (line 193) | static float act_fn(float x) {
  function SFT_MoEForwardCache (line 222) | SFT_MoEForwardCache* SFT_MOE::fwd_cache_ptr()
  function act_fn_grad (line 485) | static float act_fn_grad(float x) {

FILE: kt-sft/csrc/ktransformers_ext/operators/llamafile/sft_moe.h
  type SFT_MOEConfig (line 28) | struct SFT_MOEConfig {

FILE: kt-sft/csrc/ktransformers_ext/operators/llamafile/sft_moe_forward_cache.h
  type SFT_MoEForwardCache (line 4) | struct SFT_MoEForwardCache {

FILE: kt-sft/csrc/ktransformers_ext/vendors/hip.h
  type hip_bfloat16 (line 172) | typedef hip_bfloat16 nv_bfloat16;

FILE: kt-sft/csrc/ktransformers_ext/vendors/musa.h
  type mt_bfloat16 (line 137) | typedef mt_bfloat16 nv_bfloat16;

FILE: kt-sft/ktransformers/configs/model_config/configuration_deepseek.py
  class DeepseekV2Config (line 7) | class DeepseekV2Config(PretrainedConfig):
    method __init__ (line 115) | def __init__(

FILE: kt-sft/ktransformers/ktransformers_ext/operators/custom_marlin/quantize/utils/format_24.py
  function _calculate_meta_reordering_scatter_offsets (line 21) | def _calculate_meta_reordering_scatter_offsets(m, meta_ncols, meta_dtype,
  function sparse_semi_structured_from_dense_cutlass (line 52) | def sparse_semi_structured_from_dense_cutlass(dense):
  function sparse_semi_structured_to_dense_cutlass (line 184) | def sparse_semi_structured_to_dense_cutlass(sparse, meta_reordered):
  function mask_creator (line 279) | def mask_creator(tensor):

FILE: kt-sft/ktransformers/ktransformers_ext/operators/custom_marlin/quantize/utils/marlin_24_perms.py
  function get_perms_24 (line 16) | def get_perms_24(num_bits: int):

FILE: kt-sft/ktransformers/ktransformers_ext/operators/custom_marlin/quantize/utils/marlin_perms.py
  function get_perms (line 16) | def get_perms(num_bits: int):

FILE: kt-sft/ktransformers/ktransformers_ext/operators/custom_marlin/quantize/utils/marlin_utils.py
  function is_marlin_supported (line 29) | def is_marlin_supported():
  function marlin_permute_weights (line 33) | def marlin_permute_weights(q_w, size_k, size_n, perm, tile=MARLIN_TILE):
  function marlin_weights (line 48) | def marlin_weights(q_w, size_k, size_n, num_bits, perm):
  function marlin_permute_scales (line 68) | def marlin_permute_scales(s, size_k, size_n, group_size, scale_perm,
  function marlin_quantize (line 79) | def marlin_quantize(
  function vllm_marlin_quantize (line 117) | def vllm_marlin_quantize(
  function inject_24 (line 155) | def inject_24(w, size_k, size_n):
  function check_24 (line 163) | def check_24(w, num_rows_to_sample=50, _verbose=False):
  function compress_quantized_24_weight (line 190) | def compress_quantized_24_weight(q_24, size_k, size_n, num_bits):
  function marlin_24_quantize (line 213) | def marlin_24_quantize(
  function compute_max_diff (line 254) | def compute_max_diff(output, output_ref):
  class MarlinWorkspace (line 259) | class MarlinWorkspace:
    method __init__ (line 261) | def __init__(self, out_features, min_thread_n, max_parallel, device):

FILE: kt-sft/ktransformers/ktransformers_ext/operators/custom_marlin/quantize/utils/quant_utils.py
  function get_pack_factor (line 9) | def get_pack_factor(num_bits):
  function permute_rows (line 14) | def permute_rows(q_w: torch.Tensor, group_size: int):
  function quantize_weights (line 36) | def quantize_weights(w: torch.Tensor, num_bits: int, group_size: int,
  function sort_weights (line 101) | def sort_weights(q_w: torch.Tensor, g_idx: torch.Tensor):
  function gptq_pack (line 117) | def gptq_pack(

FILE: kt-sft/ktransformers/ktransformers_ext/triton/fp8gemm.py
  function act_quant_kernel (line 11) | def act_quant_kernel(x_ptr, y_ptr, s_ptr, BLOCK_SIZE: tl.constexpr):
  function act_quant (line 34) | def act_quant(x: torch.Tensor, block_size: int = 128) -> Tuple[torch.Ten...
  function weight_dequant_kernel (line 57) | def weight_dequant_kernel(x_ptr, s_ptr, y_ptr, M, N, BLOCK_SIZE: tl.cons...
  function weight_dequant (line 85) | def weight_dequant(x: torch.Tensor, s: torch.Tensor, block_size: int = 1...
  function fp8_gemm_kernel (line 117) | def fp8_gemm_kernel(a_ptr, b_ptr, c_ptr,
  function fp8_gemm (line 172) | def fp8_gemm(a: torch.Tensor, a_s: torch.Tensor, b: torch.Tensor, b_s: t...

FILE: kt-sft/ktransformers/local_chat.py
  function print_module_tree (line 49) | def print_module_tree(module, indent=0):
  function write_to_file (line 56) | def write_to_file(content, file_path: str = 'ktransformers/mark_content....
  function local_chat (line 87) | def local_chat(

FILE: kt-sft/ktransformers/lora_test_module.py
  class TestModelLora (line 30) | class TestModelLora(nn.Module):
    method __init__ (line 31) | def __init__(self):
    method forward (line 51) | def forward(self, x):
  class TestModelBase (line 54) | class TestModelBase(nn.Module):
    method __init__ (line 55) | def __init__(self):
    method forward (line 73) | def forward(self, x):
  class TestModelTorch (line 80) | class TestModelTorch(nn.Module):
    method __init__ (line 81) | def __init__(self):
    method forward (line 98) | def forward(self, x):
  class BaseModel (line 130) | class BaseModel(nn.Module):
    method __init__ (line 131) | def __init__(self):
    method forward (line 135) | def forward(self, x):

FILE: kt-sft/ktransformers/models/configuration_deepseek.py
  class DeepseekV2Config (line 11) | class DeepseekV2Config(PretrainedConfig):
    method __init__ (line 113) | def __init__(

FILE: kt-sft/ktransformers/models/configuration_deepseek_v3.py
  class DeepseekV3Config (line 7) | class DeepseekV3Config(PretrainedConfig):
    method __init__ (line 106) | def __init__(

FILE: kt-sft/ktransformers/models/configuration_llama.py
  class LlamaConfig (line 26) | class LlamaConfig(PretrainedConfig):
    method __init__ (line 143) | def __init__(

FILE: kt-sft/ktransformers/models/configuration_qwen2_moe.py
  class Qwen2MoeConfig (line 24) | class Qwen2MoeConfig(PretrainedConfig):
    method __init__ (line 115) | def __init__(

FILE: kt-sft/ktransformers/models/configuration_qwen3_moe.py
  class Qwen3MoeConfig (line 25) | class Qwen3MoeConfig(PretrainedConfig):
    method __init__ (line 161) | def __init__(

FILE: kt-sft/ktransformers/models/custom_cache.py
  class StaticCache (line 19) | class StaticCache(transformers.StaticCache):
    method __init__ (line 37) | def __init__(self, config: PretrainedConfig, max_batch_size: int, max_...
    method update (line 112) | def update(
    method get_seq_length (line 154) | def get_seq_length(self, layer_idx: Optional[int] = 0) -> int:
    method change_seq_length (line 161) | def change_seq_length(self, bias: Optional[int] = 0) -> int:
    method get_max_length (line 169) | def get_max_length(self) -> Optional[int]:
    method reset (line 173) | def reset(self):
    method remove_suffix (line 182) | def remove_suffix(self, start_pos):
    method get_max_cache_shape (line 193) | def get_max_cache_shape(self) -> Tuple[int, int, int, int]:
  class KDeepSeekV3Cache (line 197) | class KDeepSeekV3Cache(nn.Module):
    method __init__ (line 198) | def __init__(
    method load (line 216) | def load(self, inference_context: "sched_ext.InferenceContext"):
    method update (line 224) | def update(
    method get_page_table (line 260) | def get_page_table(self, cache_position: torch.Tensor, q_indptr: torch...
  class KGQACache (line 278) | class KGQACache(nn.Module):
    method __init__ (line 279) | def __init__(
    method load (line 296) | def load(self, inference_context: "sched_ext.InferenceContext"):
    method get_page_table (line 311) | def get_page_table(self, cache_position: torch.Tensor, q_indptr: torch...
    method get_k_cache (line 329) | def get_k_cache(self, layer_idx):
    method get_v_cache (line 332) | def get_v_cache(self, layer_idx):

FILE: kt-sft/ktransformers/models/custom_modeling_deepseek_v2.py
  class KDeepseekV2ForCausalLM (line 21) | class KDeepseekV2ForCausalLM(DeepseekV2PreTrainedModel):
    method __init__ (line 25) | def __init__(
    method init_wrapper (line 40) | def init_wrapper(self, use_cuda_graph, device, max_batch_size, max_pag...
    method batch_embeddings (line 57) | def batch_embeddings(self, batch: ForwardBatchInput, device="cuda:0"):
    method forward (line 71) | def forward(
    method flash_infer_attn_plan (line 140) | def flash_infer_attn_plan(self, batch: ForwardBatchInput, bsz_tensors,...

FILE: kt-sft/ktransformers/models/custom_modeling_deepseek_v3.py
  class KDeepseekV3ForCausalLM (line 27) | class KDeepseekV3ForCausalLM(DeepseekV3PreTrainedModel):
    method __init__ (line 31) | def __init__(
    method init_wrapper (line 43) | def init_wrapper(self, use_cuda_graph, device, max_batch_size, max_pag...
    method batch_embeddings (line 61) | def batch_embeddings(self, batch: ForwardBatchInput, device="cuda:0"):
    method forward (line 75) | def forward(
    method flash_infer_attn_plan (line 136) | def flash_infer_attn_plan(self, batch: ForwardBatchInput, bsz_tensors,...

FILE: kt-sft/ktransformers/models/custom_modeling_qwen2_moe.py
  class KQwen2MoeForCausalLM (line 27) | class KQwen2MoeForCausalLM(Qwen2MoePreTrainedModel):
    method __init__ (line 31) | def __init__(
    method init_wrapper (line 44) | def init_wrapper(self, use_cuda_graph, device, max_batch_token, max_ba...
    method batch_embeddings (line 48) | def batch_embeddings(self, batch: ForwardBatchInput, device="cuda:0"):
    method forward (line 62) | def forward(
    method flash_infer_attn_plan (line 120) | def flash_infer_attn_plan(self, batch: ForwardBatchInput, bsz_tensors,...

FILE: kt-sft/ktransformers/models/custom_modeling_qwen3_moe.py
  class KQwen3MoeForCausalLM (line 27) | class KQwen3MoeForCausalLM(Qwen3MoePreTrainedModel):
    method __init__ (line 31) | def __init__(
    method init_wrapper (line 44) | def init_wrapper(self, use_cuda_graph, device, max_batch_token, max_ba...
    method batch_embeddings (line 48) | def batch_embeddings(self, batch: ForwardBatchInput, device="cuda:0"):
    method forward (line 62) | def forward(
    method flash_infer_attn_plan (line 120) | def flash_infer_attn_plan(self, batch: ForwardBatchInput, bsz_tensors,...

FILE: kt-sft/ktransformers/models/modeling_deepseek.py
  function _get_unpad_data (line 89) | def _get_unpad_data(attention_mask):
  class DeepseekV2RMSNorm (line 103) | class DeepseekV2RMSNorm(nn.Module):
    method __init__ (line 104) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 113) | def forward(self, hidden_states):
  class DeepseekV2RotaryEmbedding (line 124) | class DeepseekV2RotaryEmbedding(nn.Module):
    method __init__ (line 125) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method forward (line 137) | def forward(self, x, position_ids):
  class DeepseekV2LinearScalingRotaryEmbedding (line 153) | class DeepseekV2LinearScalingRotaryEmbedding(DeepseekV2RotaryEmbedding):
    method __init__ (line 156) | def __init__(
    method _set_cos_sin_cache (line 168) | def _set_cos_sin_cache(self, seq_len, device, dtype):
  class DeepseekV2DynamicNTKScalingRotaryEmbedding (line 183) | class DeepseekV2DynamicNTKScalingRotaryEmbedding(DeepseekV2RotaryEmbeddi...
    method __init__ (line 186) | def __init__(
    method _set_cos_sin_cache (line 198) | def _set_cos_sin_cache(self, seq_len, device, dtype):
  function yarn_find_correction_dim (line 223) | def yarn_find_correction_dim(
  function yarn_find_correction_range (line 232) | def yarn_find_correction_range(
  function yarn_get_mscale (line 244) | def yarn_get_mscale(scale=1, mscale=1):
  function yarn_linear_ramp_mask (line 250) | def yarn_linear_ramp_mask(min, max, dim):
  class DeepseekV2YarnRotaryEmbedding (line 258) | class DeepseekV2YarnRotaryEmbedding(DeepseekV2RotaryEmbedding):
    method __init__ (line 259) | def __init__(
    method forward (line 314) | def forward(self, x, position_ids):
  function rotate_half (line 330) | def rotate_half(x):
  function apply_rotary_pos_emb (line 338) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di...
  class DeepseekV2MLP (line 368) | class DeepseekV2MLP(nn.Module):
    method __init__ (line 369) | def __init__(self, config, hidden_size=None, intermediate_size=None):
    method forward (line 382) | def forward(self, x):
  class MoEGate (line 387) | class MoEGate(nn.Module):
    method __init__ (line 388) | def __init__(self, config):
    method reset_parameters (line 409) | def reset_parameters(self) -> None:
    method forward (line 414) | def forward(self, hidden_states):
  class AddAuxiliaryLoss (line 494) | class AddAuxiliaryLoss(torch.autograd.Function):
    method forward (line 501) | def forward(ctx, x, loss):
    method backward (line 508) | def backward(ctx, grad_output):
  class DeepseekV2MoE (line 514) | class DeepseekV2MoE(nn.Module):
    method __init__ (line 519) | def __init__(self, config):
    method forward (line 559) | def forward(self, hidden_states):
    method moe_infer (line 582) | def moe_infer(self, x, topk_ids, topk_weight):
  function repeat_kv (line 658) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  class DeepseekV2Attention (line 672) | class DeepseekV2Attention(nn.Module):
    method __init__ (line 675) | def __init__(self, config: DeepseekV2Config, layer_idx: Optional[int] ...
    method _init_rope (line 742) | def _init_rope(self):
    method _shape (line 788) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
    method forward (line 795) | def forward(
  class DeepseekV2FlashAttention2 (line 894) | class DeepseekV2FlashAttention2(DeepseekV2Attention):
    method __init__ (line 901) | def __init__(self, *args, **kwargs):
    method forward (line 909) | def forward(
    method _flash_attention_forward (line 1039) | def _flash_attention_forward(
    method _upad_input (line 1130) | def _upad_input(
  class DeepseekV2DecoderLayer (line 1181) | class DeepseekV2DecoderLayer(nn.Module):
    method __init__ (line 1182) | def __init__(self, config: DeepseekV2Config, layer_idx: int):
    method forward (line 1206) | def forward(
  class DeepseekV2PreTrainedModel (line 1292) | class DeepseekV2PreTrainedModel(PreTrainedModel):
    method _init_weights (line 1302) | def _init_weights(self, module):
  class DeepseekV2Model (line 1388) | class DeepseekV2Model(DeepseekV2PreTrainedModel):
    method __init__ (line 1396) | def __init__(self, config: DeepseekV2Config):
    method get_input_embeddings (line 1417) | def get_input_embeddings(self):
    method set_input_embeddings (line 1420) | def set_input_embeddings(self, value):
    method forward (line 1424) | def forward(
    method _update_causal_mask (line 1564) | def _update_causal_mask(
  class DeepseekV2ForCausalLM (line 1645) | class DeepseekV2ForCausalLM(DeepseekV2PreTrainedModel):
    method __init__ (line 1648) | def __init__(self, config):
    method get_input_embeddings (line 1657) | def get_input_embeddings(self):
    method set_input_embeddings (line 1660) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 1663) | def get_output_embeddings(self):
    method set_output_embeddings (line 1666) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 1669) | def set_decoder(self, decoder):
    method get_decoder (line 1672) | def get_decoder(self):
    method forward (line 1679) | def forward(
    method prepare_inputs_for_generation (line 1777) | def prepare_inputs_for_generation(
    method _reorder_cache (line 1855) | def _reorder_cache(past_key_values, beam_idx):
  class DeepseekV2ForSequenceClassification (line 1882) | class DeepseekV2ForSequenceClassification(DeepseekV2PreTrainedModel):
    method __init__ (line 1883) | def __init__(self, config):
    method get_input_embeddings (line 1892) | def get_input_embeddings(self):
    method set_input_embeddings (line 1895) | def set_input_embeddings(self, value):
    method forward (line 1899) | def forward(

FILE: kt-sft/ktransformers/models/modeling_deepseek_v3.py
  function _get_unpad_data (line 83) | def _get_unpad_data(attention_mask):
  class DeepseekV3RMSNorm (line 97) | class DeepseekV3RMSNorm(nn.Module):
    method __init__ (line 98) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 107) | def forward(self, hidden_states):
  class DeepseekV3RotaryEmbedding (line 118) | class DeepseekV3RotaryEmbedding(nn.Module):
    method __init__ (line 119) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method _set_cos_sin_cache (line 138) | def _set_cos_sin_cache(self, seq_len, device, dtype):
    method forward (line 150) | def forward(self, x, seq_len=None):
  class DeepseekV3LinearScalingRotaryEmbedding (line 162) | class DeepseekV3LinearScalingRotaryEmbedding(DeepseekV3RotaryEmbedding):
    method __init__ (line 165) | def __init__(
    method _set_cos_sin_cache (line 176) | def _set_cos_sin_cache(self, seq_len, device, dtype):
  class DeepseekV3DynamicNTKScalingRotaryEmbedding (line 191) | class DeepseekV3DynamicNTKScalingRotaryEmbedding(DeepseekV3RotaryEmbeddi...
    method __init__ (line 194) | def __init__(
    method _set_cos_sin_cache (line 205) | def _set_cos_sin_cache(self, seq_len, device, dtype):
  function yarn_find_correction_dim (line 230) | def yarn_find_correction_dim(
  function yarn_find_correction_range (line 239) | def yarn_find_correction_range(
  function yarn_get_mscale (line 251) | def yarn_get_mscale(scale=1, mscale=1):
  function yarn_linear_ramp_mask (line 257) | def yarn_linear_ramp_mask(min, max, dim):
  class DeepseekV3YarnRotaryEmbedding (line 266) | class DeepseekV3YarnRotaryEmbedding(DeepseekV3RotaryEmbedding):
    method __init__ (line 268) | def __init__(
    method _set_cos_sin_cache (line 289) | def _set_cos_sin_cache(self, seq_len, device, dtype):
  function rotate_half (line 335) | def rotate_half(x):
  function apply_rotary_pos_emb (line 343) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1):
  class DeepseekV3MLP (line 378) | class DeepseekV3MLP(nn.Module):
    method __init__ (line 379) | def __init__(self, config, hidden_size=None, intermediate_size=None):
    method forward (line 392) | def forward(self, x):
  class MoEGate (line 397) | class MoEGate(nn.Module):
    method __init__ (line 398) | def __init__(self, config):
    method reset_parameters (line 421) | def reset_parameters(self) -> None:
    method forward (line 426) | def forward(self, hidden_states):
  class DeepseekV3MoE (line 479) | class DeepseekV3MoE(nn.Module):
    method __init__ (line 484) | def __init__(self, config):
    method forward (line 526) | def forward(self, hidden_states):
    method moe_infer (line 539) | def moe_infer(self, x, topk_ids, topk_weight):
  function repeat_kv (line 616) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  class DeepseekV3Attention (line 631) | class DeepseekV3Attention(nn.Module):
    method __init__ (line 634) | def __init__(self, config: DeepseekV3Config, layer_idx: Optional[int] ...
    method _init_rope (line 701) | def _init_rope(self):
    method _shape (line 747) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
    method forward (line 754) | def forward(
  class DeepseekV3FlashAttention2 (line 865) | class DeepseekV3FlashAttention2(DeepseekV3Attention):
    method __init__ (line 872) | def __init__(self, *args, **kwargs):
    method forward (line 880) | def forward(
    method _flash_attention_forward (line 1016) | def _flash_attention_forward(
    method _upad_input (line 1096) | def _upad_input(
  class DeepseekV3DecoderLayer (line 1148) | class DeepseekV3DecoderLayer(nn.Module):
    method __init__ (line 1149) | def __init__(self, config: DeepseekV3Config, layer_idx: int):
    method forward (line 1173) | def forward(
  class DeepseekV3PreTrainedModel (line 1259) | class DeepseekV3PreTrainedModel(PreTrainedModel):
    method _init_weights (line 1268) | def _init_weights(self, module):
  class DeepseekV3Model (line 1354) | class DeepseekV3Model(DeepseekV3PreTrainedModel):
    method __init__ (line 1362) | def __init__(self, config: DeepseekV3Config):
    method get_input_embeddings (line 1383) | def get_input_embeddings(self):
    method set_input_embeddings (line 1386) | def set_input_embeddings(self, value):
    method forward (line 1390) | def forward(
    method _update_causal_mask (line 1524) | def _update_causal_mask(
  class DeepseekV3ForCausalLM (line 1604) | class DeepseekV3ForCausalLM(DeepseekV3PreTrainedModel, GenerationMixin):
    method __init__ (line 1607) | def __init__(self, config):
    method get_input_embeddings (line 1616) | def get_input_embeddings(self):
    method set_input_embeddings (line 1619) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 1622) | def get_output_embeddings(self):
    method set_output_embeddings (line 1625) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 1628) | def set_decoder(self, decoder):
    method get_decoder (line 1631) | def get_decoder(self):
    method forward (line 1638) | def forward(
    method prepare_inputs_for_generation (line 1735) | def prepare_inputs_for_generation(
    method _reorder_cache (line 1800) | def _reorder_cache(past_key_values, beam_idx):
  class DeepseekV3ForSequenceClassification (line 1827) | class DeepseekV3ForSequenceClassification(DeepseekV3PreTrainedModel):
    method __init__ (line 1828) | def __init__(self, config):
    method get_input_embeddings (line 1837) | def get_input_embeddings(self):
    method set_input_embeddings (line 1840) | def set_input_embeddings(self, value):
    method forward (line 1844) | def forward(

FILE: kt-sft/ktransformers/models/modeling_llama.py
  class LlamaRMSNorm (line 59) | class LlamaRMSNorm(nn.Module):
    method __init__ (line 60) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 68) | def forward(self, hidden_states):
  class LlamaRotaryEmbedding (line 79) | class LlamaRotaryEmbedding(nn.Module):
    method __init__ (line 80) | def __init__(
    method _dynamic_frequency_update (line 135) | def _dynamic_frequency_update(self, position_ids, device):
    method forward (line 160) | def forward(self, x, position_ids):
  class LlamaLinearScalingRotaryEmbedding (line 191) | class LlamaLinearScalingRotaryEmbedding(LlamaRotaryEmbedding):
    method __init__ (line 194) | def __init__(self, *args, **kwargs):
  class LlamaDynamicNTKScalingRotaryEmbedding (line 203) | class LlamaDynamicNTKScalingRotaryEmbedding(LlamaRotaryEmbedding):
    method __init__ (line 206) | def __init__(self, *args, **kwargs):
  function rotate_half (line 216) | def rotate_half(x):
  function apply_rotary_pos_emb (line 223) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di...
  class LlamaMLP (line 250) | class LlamaMLP(nn.Module):
    method __init__ (line 251) | def __init__(self, config):
    method forward (line 267) | def forward(self, x):
  function repeat_kv (line 301) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  class LlamaAttention (line 315) | class LlamaAttention(nn.Module):
    method __init__ (line 318) | def __init__(self, config: LlamaConfig, layer_idx: Optional[int] = None):
    method forward (line 365) | def forward(
  class LlamaFlashAttention2 (line 497) | class LlamaFlashAttention2(LlamaAttention):
    method __init__ (line 504) | def __init__(self, *args, **kwargs):
    method forward (line 512) | def forward(
  class LlamaSdpaAttention (line 628) | class LlamaSdpaAttention(LlamaAttention):
    method forward (line 636) | def forward(
  class LlamaDecoderLayer (line 746) | class LlamaDecoderLayer(nn.Module):
    method __init__ (line 747) | def __init__(self, config: LlamaConfig, layer_idx: int):
    method forward (line 761) | def forward(
  class LlamaPreTrainedModel (line 855) | class LlamaPreTrainedModel(PreTrainedModel):
    method _init_weights (line 867) | def _init_weights(self, module):
  class LlamaModel (line 957) | class LlamaModel(LlamaPreTrainedModel):
    method __init__ (line 965) | def __init__(self, config: LlamaConfig):
    method get_input_embeddings (line 986) | def get_input_embeddings(self):
    method set_input_embeddings (line 989) | def set_input_embeddings(self, value):
    method forward (line 993) | def forward(
    method _update_causal_mask (line 1134) | def _update_causal_mask(
  class LlamaForCausalLM (line 1237) | class LlamaForCausalLM(LlamaPreTrainedModel):
    method __init__ (line 1240) | def __init__(self, config):
    method get_input_embeddings (line 1249) | def get_input_embeddings(self):
    method set_input_embeddings (line 1252) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 1255) | def get_output_embeddings(self):
    method set_output_embeddings (line 1258) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 1261) | def set_decoder(self, decoder):
    method get_decoder (line 1264) | def get_decoder(self):
    method forward (line 1271) | def forward(
    method prepare_inputs_for_generation (line 1377) | def prepare_inputs_for_generation(
  class LlamaForSequenceClassification (line 1441) | class LlamaForSequenceClassification(LlamaPreTrainedModel):
    method __init__ (line 1442) | def __init__(self, config):
    method get_input_embeddings (line 1451) | def get_input_embeddings(self):
    method set_input_embeddings (line 1454) | def set_input_embeddings(self, value):
    method forward (line 1458) | def forward(
  class LlamaForQuestionAnswering (line 1568) | class LlamaForQuestionAnswering(LlamaPreTrainedModel):
    method __init__ (line 1572) | def __init__(self, config):
    method get_input_embeddings (line 1580) | def get_input_embeddings(self):
    method set_input_embeddings (line 1583) | def set_input_embeddings(self, value):
    method forward (line 1587) | def forward(
  class LlamaForTokenClassification (line 1669) | class LlamaForTokenClassification(LlamaPreTrainedModel):
    method __init__ (line 1670) | def __init__(self, config):
    method get_input_embeddings (line 1686) | def get_input_embeddings(self):
    method set_input_embeddings (line 1689) | def set_input_embeddings(self, value):
    method forward (line 1693) | def forward(

FILE: kt-sft/ktransformers/models/modeling_mixtral.py
  function load_balancing_loss_func (line 90) | def load_balancing_loss_func(
  function _get_unpad_data (line 167) | def _get_unpad_data(attention_mask):
  class MixtralRMSNorm (line 180) | class MixtralRMSNorm(nn.Module):
    method __init__ (line 181) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 189) | def forward(self, hidden_states):
    method extra_repr (line 196) | def extra_repr(self):
  class MixtralRotaryEmbedding (line 202) | class MixtralRotaryEmbedding(nn.Module):
    method __init__ (line 203) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method forward (line 216) | def forward(self, x, position_ids):
  function rotate_half (line 232) | def rotate_half(x):
  function apply_rotary_pos_emb (line 241) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1):
  function repeat_kv (line 271) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  class MixtralAttention (line 285) | class MixtralAttention(nn.Module):
    method __init__ (line 291) | def __init__(self, config: MixtralConfig, layer_idx: Optional[int] = N...
    method _shape (line 328) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
    method forward (line 331) | def forward(
  class MixtralFlashAttention2 (line 407) | class MixtralFlashAttention2(MixtralAttention):
    method forward (line 414) | def forward(
    method _flash_attention_forward (line 550) | def _flash_attention_forward(
    method _upad_input (line 661) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m...
  class MixtralSdpaAttention (line 707) | class MixtralSdpaAttention(MixtralAttention):
    method forward (line 715) | def forward(
  class MixtralBlockSparseTop2MLP (line 804) | class MixtralBlockSparseTop2MLP(nn.Module):
    method __init__ (line 805) | def __init__(self, config: MixtralConfig):
    method forward (line 816) | def forward(self, hidden_states):
  class MixtralSparseMoeBlock (line 822) | class MixtralSparseMoeBlock(nn.Module):
    method __init__ (line 834) | def __init__(self, config):
    method forward (line 849) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class MixtralDecoderLayer (line 890) | class MixtralDecoderLayer(nn.Module):
    method __init__ (line 891) | def __init__(self, config: MixtralConfig, layer_idx: int):
    method forward (line 901) | def forward(
  class MixtralPreTrainedModel (line 993) | class MixtralPreTrainedModel(PreTrainedModel):
    method _init_weights (line 1003) | def _init_weights(self, module):
  class MixtralModel (line 1092) | class MixtralModel(MixtralPreTrainedModel):
    method __init__ (line 1100) | def __init__(self, config: MixtralConfig):
    method get_input_embeddings (line 1116) | def get_input_embeddings(self):
    method set_input_embeddings (line 1119) | def set_input_embeddings(self, value):
    method forward (line 1124) | def forward(
    method _update_causal_mask (line 1257) | def _update_causal_mask(
  class MixtralForCausalLM (line 1338) | class MixtralForCausalLM(MixtralPreTrainedModel):
    method __init__ (line 1341) | def __init__(self, config):
    method get_input_embeddings (line 1352) | def get_input_embeddings(self):
    method set_input_embeddings (line 1355) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 1358) | def get_output_embeddings(self):
    method set_output_embeddings (line 1361) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 1364) | def set_decoder(self, decoder):
    method get_decoder (line 1367) | def get_decoder(self):
    method forward (line 1373) | def forward(
    method prepare_inputs_for_generation (line 1483) | def prepare_inputs_for_generation(
  class MixtralForSequenceClassification (line 1546) | class MixtralForSequenceClassification(MixtralPreTrainedModel):
    method __init__ (line 1547) | def __init__(self, config):
    method get_input_embeddings (line 1556) | def get_input_embeddings(self):
    method set_input_embeddings (line 1559) | def set_input_embeddings(self, value):
    method forward (line 1563) | def forward(
  class MixtralForTokenClassification (line 1662) | class MixtralForTokenClassification(MixtralPreTrainedModel):
    method __init__ (line 1663) | def __init__(self, config):
    method get_input_embeddings (line 1679) | def get_input_embeddings(self):
    method set_input_embeddings (line 1682) | def set_input_embeddings(self, value):
    method forward (line 1686) | def forward(

FILE: kt-sft/ktransformers/models/modeling_qwen2_moe.py
  function load_balancing_loss_func (line 77) | def load_balancing_loss_func(
  function _get_unpad_data (line 154) | def _get_unpad_data(attention_mask):
  class Qwen2MoeRMSNorm (line 167) | class Qwen2MoeRMSNorm(nn.Module):
    method __init__ (line 168) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 176) | def forward(self, hidden_states):
  class Qwen2MoeRotaryEmbedding (line 184) | class Qwen2MoeRotaryEmbedding(nn.Module):
    method __init__ (line 185) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method forward (line 197) | def forward(self, x, position_ids):
  function rotate_half (line 214) | def rotate_half(x):
  function apply_rotary_pos_emb (line 222) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di...
  class Qwen2MoeMLP (line 250) | class Qwen2MoeMLP(nn.Module):
    method __init__ (line 251) | def __init__(self, config, intermediate_size=None):
    method forward (line 261) | def forward(self, x):
  function repeat_kv (line 266) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  class Qwen2MoeAttention (line 279) | class Qwen2MoeAttention(nn.Module):
    method __init__ (line 285) | def __init__(self, config: Qwen2MoeConfig, layer_idx: Optional[int] = ...
    method forward (line 322) | def forward(
  class Qwen2MoeFlashAttention2 (line 397) | class Qwen2MoeFlashAttention2(Qwen2MoeAttention):
    method __init__ (line 407) | def __init__(self, *args, **kwargs):
    method forward (line 415) | def forward(
    method _flash_attention_forward (line 547) | def _flash_attention_forward(
    method _upad_input (line 664) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m...
  class Qwen2MoeSdpaAttention (line 708) | class Qwen2MoeSdpaAttention(Qwen2MoeAttention):
    method forward (line 716) | def forward(
  class Qwen2MoeSparseMoeBlock (line 804) | class Qwen2MoeSparseMoeBlock(nn.Module):
    method __init__ (line 805) | def __init__(self, config):
    method forward (line 820) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class Qwen2MoeDecoderLayer (line 866) | class Qwen2MoeDecoderLayer(nn.Module):
    method __init__ (line 867) | def __init__(self, config: Qwen2MoeConfig, layer_idx: int):
    method forward (line 883) | def forward(
  class Qwen2MoePreTrainedModel (line 980) | class Qwen2MoePreTrainedModel(PreTrainedModel):
    method _init_weights (line 991) | def _init_weights(self, module):
  class Qwen2MoeModel (line 1084) | class Qwen2MoeModel(Qwen2MoePreTrainedModel):
    method __init__ (line 1092) | def __init__(self, config: Qwen2MoeConfig):
    method get_input_embeddings (line 1108) | def get_input_embeddings(self):
    method set_input_embeddings (line 1111) | def set_input_embeddings(self, value):
    method forward (line 1115) | def forward(
    method _update_causal_mask (line 1248) | def _update_causal_mask(
  class Qwen2MoeForCausalLM (line 1329) | class Qwen2MoeForCausalLM(Qwen2MoePreTrainedModel):
    method __init__ (line 1332) | def __init__(self, config):
    method get_input_embeddings (line 1344) | def get_input_embeddings(self):
    method set_input_embeddings (line 1347) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 1350) | def get_output_embeddings(self):
    method set_output_embeddings (line 1353) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 1356) | def set_decoder(self, decoder):
    method get_decoder (line 1359) | def get_decoder(self):
    method forward (line 1364) | def forward(
    method prepare_inputs_for_generation (line 1473) | def prepare_inputs_for_generation(
    method _reorder_cache (line 1551) | def _reorder_cache(past_key_values, beam_idx):
  class Qwen2MoeForSequenceClassification (line 1576) | class Qwen2MoeForSequenceClassification(Qwen2MoePreTrainedModel):
    method __init__ (line 1577) | def __init__(self, config):
    method get_input_embeddings (line 1586) | def get_input_embeddings(self):
    method set_input_embeddings (line 1589) | def set_input_embeddings(self, value):
    method forward (line 1593) | def forward(
  class Qwen2MoeForTokenClassification (line 1692) | class Qwen2MoeForTokenClassification(Qwen2MoePreTrainedModel):
    method __init__ (line 1693) | def __init__(self, config):
    method get_input_embeddings (line 1709) | def get_input_embeddings(self):
    method set_input_embeddings (line 1712) | def set_input_embeddings(self, value):
    method forward (line 1716) | def forward(

FILE: kt-sft/ktransformers/models/modeling_qwen3_moe.py
  function rotate_half (line 66) | def rotate_half(x):
  function apply_rotary_pos_emb (line 73) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di...
  function repeat_kv (line 100) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  function eager_attention_forward (line 112) | def eager_attention_forward(
  class Qwen3MoeAttention (line 138) | class Qwen3MoeAttention(nn.Module):
    method __init__ (line 141) | def __init__(self, config: Qwen3MoeConfig, layer_idx: int):
    method forward (line 184) | def forward(
  class Qwen3MoeMLP (line 235) | class Qwen3MoeMLP(nn.Module):
    method __init__ (line 236) | def __init__(self, config, intermediate_size=None):
    method forward (line 246) | def forward(self, x):
  class Qwen3MoeSparseMoeBlock (line 251) | class Qwen3MoeSparseMoeBlock(nn.Module):
    method __init__ (line 252) | def __init__(self, config):
    method forward (line 264) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
  class Qwen3MoeRMSNorm (line 304) | class Qwen3MoeRMSNorm(nn.Module):
    method __init__ (line 305) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 314) | def forward(self, hidden_states):
    method extra_repr (line 321) | def extra_repr(self):
  class Qwen3MoeDecoderLayer (line 325) | class Qwen3MoeDecoderLayer(nn.Module):
    method __init__ (line 326) | def __init__(self, config: Qwen3MoeConfig, layer_idx: int):
    method forward (line 345) | def forward(
  function _compute_default_rope_parameters (line 422) | def _compute_default_rope_parameters(
  class Qwen3MoeRotaryEmbedding (line 462) | class Qwen3MoeRotaryEmbedding(nn.Module):
    method __init__ (line 463) | def __init__(self, config: Qwen3MoeConfig, device=None):
    method _dynamic_frequency_update (line 486) | def _dynamic_frequency_update(self, position_ids, device):
    method forward (line 506) | def forward(self, x, position_ids):
  class Qwen3MoePreTrainedModel (line 551) | class Qwen3MoePreTrainedModel(PreTrainedModel):
    method _init_weights (line 565) | def _init_weights(self, module):
  class Qwen3MoeModel (line 648) | class Qwen3MoeModel(Qwen3MoePreTrainedModel):
    method __init__ (line 656) | def __init__(self, config: Qwen3MoeConfig):
    method get_input_embeddings (line 672) | def get_input_embeddings(self):
    method set_input_embeddings (line 675) | def set_input_embeddings(self, value):
    method forward (line 679) | def forward(
    method _update_causal_mask (line 797) | def _update_causal_mask(
    method _prepare_4d_causal_attention_mask_with_cache_position (line 881) | def _prepare_4d_causal_attention_mask_with_cache_position(
  class KwargsForCausalLM (line 951) | class KwargsForCausalLM(): ...
  function load_balancing_loss_func (line 954) | def load_balancing_loss_func(
  class Qwen3MoeForCausalLM (line 1036) | class Qwen3MoeForCausalLM(Qwen3MoePreTrainedModel, GenerationMixin):
    method __init__ (line 1041) | def __init__(self, config):
    method get_input_embeddings (line 1053) | def get_input_embeddings(self):
    method set_input_embeddings (line 1056) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 1059) | def get_output_embeddings(self):
    method set_output_embeddings (line 1062) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 1065) | def set_decoder(self, decoder):
    method get_decoder (line 1068) | def get_decoder(self):
    method forward (line 1074) | def forward(
  class Qwen3MoeForSequenceClassification (line 1200) | class Qwen3MoeForSequenceClassification(Qwen3MoePreTrainedModel):
    method __init__ (line 1201) | def __init__(self, config):
    method get_input_embeddings (line 1210) | def get_input_embeddings(self):
    method set_input_embeddings (line 1213) | def set_input_embeddings(self, value):
    method forward (line 1217) | def forward(
  class Qwen3MoeForTokenClassification (line 1299) | class Qwen3MoeForTokenClassification(Qwen3MoePreTrainedModel):
    method __init__ (line 1300) | def __init__(self, config):
    method get_input_embeddings (line 1316) | def get_input_embeddings(self):
    method set_input_embeddings (line 1319) | def set_input_embeddings(self, value):
    method forward (line 1328) | def forward(
  class Qwen3MoeForQuestionAnswering (line 1387) | class Qwen3MoeForQuestionAnswering(Qwen3MoePreTrainedModel):
    method __init__ (line 1390) | def __init__(self, config):
    method get_input_embeddings (line 1398) | def get_input_embeddings(self):
    method set_input_embeddings (line 1401) | def set_input_embeddings(self, value):
    method forward (line 1405) | def forward(

FILE: kt-sft/ktransformers/moe_test_module.py
  class TestKExpertsTorch (line 31) | class TestKExpertsTorch(unittest.TestCase):
    method setUp (line 32) | def setUp(self):
    method _create_fixed_data (line 41) | def _create_fixed_data(self, device, batch_size=2):
    method _run_single_device_test (line 60) | def _run_single_device_test(self, device, seed=42):
    method test_forward_gradient (line 92) | def test_forward_gradient(self):

FILE: kt-sft/ktransformers/moe_test_module_old.py
  class TestKExpertsTorch (line 31) | class TestKExpertsTorch(unittest.TestCase):
    method setUp (line 32) | def setUp(self):
    method _run_single_device_test (line 46) | def _run_single_device_test(self, device, seed=42):
    method test_forward_gradient (line 101) | def test_forward_gradient(self):

FILE: kt-sft/ktransformers/operators/RoPE.py
  class RotaryEmbedding (line 33) | class RotaryEmbedding(BaseInjectedModule, DeepseekV2RotaryEmbedding):
    method __init__ (line 34) | def __init__(
    method load (line 54) | def load(self):
  class RotaryEmbeddingV3 (line 63) | class RotaryEmbeddingV3(BaseInjectedModule):
    method __init__ (line 64) | def __init__(
    method forward (line 82) | def forward(self, x, position_ids):
    method load (line 97) | def load(self):
    method _init (line 104) | def _init(self, dim, max_position_embeddings, base, device, scaling_fa...
  class RotaryEmbeddingV2 (line 114) | class RotaryEmbeddingV2(BaseInjectedModule, LlamaRotaryEmbedding):
    method __init__ (line 115) | def __init__(
    method load (line 140) | def load(self):
  class YarnRotaryEmbedding (line 151) | class YarnRotaryEmbedding(BaseInjectedModule, DeepseekV2YarnRotaryEmbedd...
    method __init__ (line 152) | def __init__(
    method load (line 181) | def load(self):
  class YarnRotaryEmbeddingV3 (line 221) | class YarnRotaryEmbeddingV3(BaseInjectedModule):
    method __init__ (line 222) | def __init__(
    method load (line 239) | def load(self):
    method forward (line 261) | def forward(self, x, position_ids):
    method _init (line 276) | def _init(
  class DynamicNTKScalingRotaryEmbedding (line 327) | class DynamicNTKScalingRotaryEmbedding(
    method __init__ (line 330) | def __init__(
    method load (line 353) | def load(self):
  class RotaryEmbeddingV4 (line 366) | class RotaryEmbeddingV4(BaseInjectedModule):
    method __init__ (line 367) | def __init__(
    method forward (line 385) | def forward(self, x, position_ids):
    method load (line 400) | def load(self):
    method _init (line 407) | def _init(self, dim, max_position_embeddings, base, device, scaling_fa...
  class KQwen3MoeRotaryEmbedding (line 417) | class KQwen3MoeRotaryEmbedding(BaseInjectedModule, DeepseekV2RotaryEmbed...
    method __init__ (line 418) | def __init__(
    method load (line 438) | def load(self):

FILE: kt-sft/ktransformers/operators/attention.py
  function rotate_half (line 42) | def rotate_half(x):
  class KDeepseekV2Attention (line 49) | class KDeepseekV2Attention(BaseInjectedModule, DeepseekV2Attention):
    method __init__ (line 53) | def __init__(self,
    method get_absorbed (line 70) | def get_absorbed(self) -> Tuple[torch.Tensor, torch.Tensor]:
    method forward_chunck (line 78) | def forward_chunck(
    method forward_linux_triton (line 197) | def forward_linux_triton(
    method forward_linux_flashinfer (line 350) | def forward_linux_flashinfer(
    method forward_windows (line 526) | def forward_windows(
    method forward_xpu (line 592) | def forward_xpu(
    method forward (line 686) | def forward(
  class KLlamaAttention (line 747) | class KLlamaAttention(BaseInjectedModule):
    method __init__ (line 750) | def __init__(self,
    method apply_rotary_pos_emb (line 761) | def apply_rotary_pos_emb(self, q, k, cos, sin, position_ids=None, unsq...
    method forward (line 786) | def forward(
  class KQwen3MoeAttentionIPEXLLM (line 877) | class KQwen3MoeAttentionIPEXLLM(BaseInjectedModule, Qwen3MoeAttention):
    method __init__ (line 878) | def __init__(self,
    method forward (line 894) | def forward(
  function repeat_kv (line 949) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  function eager_attention_forward (line 961) | def eager_attention_forward(
  class KQwen3MoeAttention (line 987) | class KQwen3MoeAttention(BaseInjectedModule, Qwen3MoeAttention ):
    method __init__ (line 988) | def __init__(self,
    method apply_rotary_pos_emb (line 1004) | def apply_rotary_pos_emb(self, q, k, cos, sin, position_ids=None, unsq...
    method forward (line 1030) | def forward(self,

FILE: kt-sft/ktransformers/operators/balance_serve_attention.py
  function rotate_half (line 23) | def rotate_half(x):
  class flashinfer_attn (line 29) | class flashinfer_attn(BaseInjectedModule, DeepseekV2Attention):
    method __init__ (line 30) | def __init__(self,
    method get_absorbed (line 45) | def get_absorbed(self) -> Tuple[torch.Tensor, torch.Tensor]:
    method forward (line 62) | def forward(self,
  class KQwen2MoeAttention (line 117) | class KQwen2MoeAttention(BaseInjectedModule, Qwen2MoeAttention):
    method __init__ (line 118) | def __init__(self,
    method apply_rotary_pos_emb (line 134) | def apply_rotary_pos_emb(self, q, k, cos, sin, position_ids=None, unsq...
    method forward (line 161) | def forward(self,
  class KQwen3MoeAttention (line 203) | class KQwen3MoeAttention(BaseInjectedModule, Qwen3MoeAttention):
    method __init__ (line 204) | def __init__(self,
    method apply_rotary_pos_emb (line 220) | def apply_rotary_pos_emb(self, q, k, cos, sin, position_ids=None, unsq...
    method forward (line 247) | def forward(self,
  class deepseek_torch_attn (line 293) | class deepseek_torch_attn(BaseInjectedModule, DeepseekV2Attention):
    method __init__ (line 294) | def __init__(self,
    method get_absorbed (line 309) | def get_absorbed(self) -> Tuple[torch.Tensor, torch.Tensor]:
    method forward (line 327) | def forward(self,

FILE: kt-sft/ktransformers/operators/base_operator.py
  class BaseInjectedModule (line 12) | class BaseInjectedModule(nn.Module):
    method __init__ (line 14) | def __init__(self,
    method __getattr__ (line 31) | def __getattr__(self, name: str) -> Any:
    method __setattr__ (line 51) | def __setattr__(self, name: str, value: Tensor | nn.Module) -> None:
    method forward (line 60) | def forward(self, *args, **kwargs):
    method load (line 63) | def load(self, gguf_loader=None, adapter_gguf : bool = False):

FILE: kt-sft/ktransformers/operators/cpuinfer.py
  class CPUInferKVCache (line 29) | class CPUInferKVCache:
    method __init__ (line 30) | def __init__(
    method load_kvcache (line 100) | def load_kvcache(self, tensor_file_path: str):
    method dump_kvcache (line 105) | def dump_kvcache(
    method update_cache_total_len (line 135) | def update_cache_total_len(self, cache_total_len: int):
    method attn (line 143) | def attn(
    method update_kvcache_one_block_fp16 (line 256) | def update_kvcache_one_block_fp16(
    method get_kvcache_one_block_fp16 (line 292) | def get_kvcache_one_block_fp16(
    method update_importance_one_block (line 328) | def update_importance_one_block(
    method get_importance_one_block (line 354) | def get_importance_one_block(
    method get_anchor_one_block (line 380) | def get_anchor_one_block(self, anchor: torch.Tensor, layer_id: int, bl...
    method update_anchor_one_block (line 406) | def update_anchor_one_block(
    method calc_anchor_all_layers (line 434) | def calc_anchor_all_layers(
    method clear_importance_all_layers (line 473) | def clear_importance_all_layers(
    method get_cache_total_len (line 512) | def get_cache_total_len(self):
    method update_kvcache_q4 (line 515) | def update_kvcache_q4(
    method update_kvcache_fp16 (line 528) | def update_kvcache_fp16(
    method get_kvcache_q4 (line 550) | def get_kvcache_q4(
    method get_kvcache_fp16 (line 563) | def get_kvcache_fp16(
    method get_and_update_kvcache_fp16 (line 584) | def get_and_update_kvcache_fp16(
    method update_importance (line 606) | def update_importance(
    method get_attn_sparsity (line 627) | def get_attn_sparsity(
    method attn_with_kvcache (line 665) | def attn_with_kvcache(
    method get_all_kvcache_one_layer (line 704) | def get_all_kvcache_one_layer(
    method get_importance (line 713) | def get_importance(
    method get_anchor (line 720) | def get_anchor(
  class CPUInfer (line 728) | class CPUInfer:
    method __init__ (line 732) | def __init__(self, thread_num):
    method submit (line 738) | def submit(self, task):
    method submit_with_cuda_stream (line 741) | def submit_with_cuda_stream(self, current_cuda_stream, task):
    method sync (line 744) | def sync(self):
    method sync_with_cuda_stream (line 747) | def sync_with_cuda_stream(self, current_cuda_stream):

FILE: kt-sft/ktransformers/operators/dynamic_attention.py
  class DynamicScaledDotProductAttention (line 30) | class DynamicScaledDotProductAttention:
    method __init__ (line 34) | def __init__(
    method get_attn_score_one_block (line 233) | def get_attn_score_one_block(
    method get_preselect_block_table_and_attn_score (line 271) | def get_preselect_block_table_and_attn_score(
    method get_attn_score (line 374) | def get_attn_score(
    method swap_in_and_swap_out (line 467) | def swap_in_and_swap_out(self, layer_idx, past_len, q_len, key, value):
    method calc_anchor (line 518) | def calc_anchor(self, cache_seqlens: int):
    method clear_importance (line 533) | def clear_importance(self, cache_seqlens: int):
    method clear_kvcache (line 549) | def clear_kvcache(self, cache_seqlens: int):
    method get_attn_sparsity (line 564) | def get_attn_sparsity(
    method apply (line 605) | def apply(
    method save (line 762) | def save(self, path: str, length: int):
    method load (line 775) | def load(self, path: str, length: int):

FILE: kt-sft/ktransformers/operators/experts.py
  function deduplicate_and_sort (line 50) | def deduplicate_and_sort(lst):
  function generate_cuda_graphs (line 52) | def generate_cuda_graphs(chunk_size: int) -> list:
  class KExpertsBase (line 68) | class KExpertsBase(ABC):
    method __init__ (line 69) | def __init__(self, key: str, gguf_loader: GGUFLoader, config: Pretrain...
    method forward (line 77) | def forward(self, input_tensor, expert_ids, weights):
    method load (line 81) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method unload (line 85) | def unload():
    method load_weights (line 88) | def load_weights(self, override_key: str | None = None, device: str = ...
    method load_multi (line 138) | def load_multi(self, key: str, keys: list[str], device: str = "cpu"):
  class KExpertsCPU (line 143) | class KExpertsCPU(KExpertsBase):
    method __init__ (line 152) | def __init__(
    method load (line 169) | def load(self, w: dict | nn.Parameter | tuple | None = None, device:st...
    method submit_for_one_decode (line 279) | def submit_for_one_decode(self, input_tensor, expert_ids, weights, bsz...
    method sync_for_one_decode (line 296) | def sync_for_one_decode(self, cuda_graph_idx=0):
    method forward (line 306) | def forward(self, input_tensor, expert_ids, weights, bsz_tensor=None, ...
    method unload (line 350) | def unload(self):
    method load_weights (line 353) | def load_weights(self, override_key: str | None = None, device: str = ...
  class KSFTExpertsCPU (line 412) | class KSFTExpertsCPU(torch.autograd.Function):
    method __init__ (line 421) | def __init__(
    method load (line 452) | def load(self, w: dict | nn.Parameter | tuple | None = None, device:st...
    method submit_for_one_decode (line 556) | def submit_for_one_decode(self, input_tensor, expert_ids, weights):
    method sync_for_one_decode (line 562) | def sync_for_one_decode(self):
    method forward (line 568) | def forward(ctx, input_tensor, expert_ids, weights, cpu_infer, moe, ou...
    method backward (line 633) | def backward(ctx, output_grad):
    method unload (line 680) | def unload(self):
    method load_weights (line 683) | def load_weights(self, override_key: str | None = None, device: str = ...
  class KExpertsMarlin (line 743) | class KExpertsMarlin(KExpertsBase):
    method __init__ (line 746) | def __init__(
    method load (line 772) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method unload (line 805) | def unload(self):
    method load_weights (line 812) | def load_weights(self, override_key: str | None = None):
    method forward (line 831) | def forward(self, hidden_states_cpu: torch.Tensor, selected_experts_cp...
  class KExpertsTorch (line 868) | class KExpertsTorch(KExpertsBase):
    method __init__ (line 874) | def __init__(
    method load (line 906) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method unload (line 947) | def unload(self):
    method load_weights (line 953) | def load_weights(self, override_key: str | None = None):
    method forward (line 986) | def forward(self, hidden_states_cpu: torch.Tensor, selected_experts_cp...
  class KTransformersExperts (line 1181) | class KTransformersExperts(BaseInjectedModule, KExpertsBase):
    method __init__ (line 1182) | def __init__(self,
    method load (line 1207) | def load(self, w: dict = None,  mode: InferenceState = None, warmup: b...
    method unload (line 1227) | def unload(self):
    method forward (line 1234) | def forward(self, input_tensor, expert_ids, weights):
    method set_inference_mode (line 1248) | def set_inference_mode(self, mode: InferenceState):
  class KQwen2MoeSparseMoeBlock (line 1266) | class KQwen2MoeSparseMoeBlock(BaseInjectedModule, Qwen2MoeSparseMoeBlock):
    method forward (line 1267) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
    method moe_kexperts (line 1321) | def moe_kexperts(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_w...
    method moe_infer_simple (line 1327) | def moe_infer_simple(self, hidden_states_cpu: torch.Tensor, selected_e...
    method moe_infer (line 1341) | def moe_infer(self, hidden_states_cpu: torch.Tensor, selected_experts_...
  class KDeepseekV2MoE (line 1370) | class KDeepseekV2MoE(BaseInjectedModule, DeepseekV2MoE):
    method forward (line 1371) | def forward(self, hidden_states):
    method moe_kexperts (line 1411) | def moe_kexperts(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_w...
    method moe_infer_simple (line 1417) | def moe_infer_simple(
    method moe_infer (line 1435) | def moe_infer(self, x, topk_ids, topk_weight):
  class KDeepseekV3MoE (line 1468) | class KDeepseekV3MoE(BaseInjectedModule, DeepseekV3MoE):
    method forward (line 1470) | def forward(self, hidden_states):
    method moe_kexperts (line 1511) | def moe_kexperts(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_w...
    method moe_infer_simple (line 1517) | def moe_infer_simple(
    method moe_infer (line 1535) | def moe_infer(self, x, topk_ids, topk_weight):
  class KMistralSparseMoEBlock (line 1568) | class KMistralSparseMoEBlock(BaseInjectedModule, MixtralSparseMoeBlock):
    method forward (line 1570) | def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
    method moe_kexperts (line 1617) | def moe_kexperts(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_w...
    method moe_infer_simple (line 1623) | def moe_infer_simple(self, hidden_states_cpu: torch.Tensor, selected_e...
    method moe_infer (line 1637) | def moe_infer(self, hidden_states_cpu: torch.Tensor, selected_experts_...
  class KDeepseekV3MoEV2 (line 1666) | class KDeepseekV3MoEV2(BaseInjectedModule, DeepseekV3MoE):
    method forward (line 1667) | def forward(self, hidden_states, bsz_tensor, cuda_graph_idx=0):
    method moe_on_cpuinfer (line 1709) | def moe_on_cpuinfer(self, x: torch.Tensor, topk_ids: torch.Tensor, top...
    method moe_infer_simple (line 1716) | def moe_infer_simple(
    method moe_infer (line 1734) | def moe_infer(self, x, topk_ids, topk_weight):
  class KTransformersExpertsV2 (line 1767) | class KTransformersExpertsV2(BaseInjectedModule, KExpertsBase):
    method __init__ (line 1768) | def __init__(self,
    method load (line 1793) | def load(self, w: dict = None,  mode: InferenceState = None, warmup: b...
    method unload (line 1813) | def unload(self):
    method forward (line 1820) | def forward(self, input_tensor, expert_ids, weights, bsz_tensor, cuda_...
    method set_inference_mode (line 1830) | def set_inference_mode(self, mode: InferenceState):
  class KQwen2MoeSparseMoeBlockV2 (line 1840) | class KQwen2MoeSparseMoeBlockV2(BaseInjectedModule, Qwen2MoeSparseMoeBlo...
    method forward (line 1841) | def forward(self, hidden_states, bsz_tensor, cuda_graph_idx=0):
    method moe_on_cpuinfer (line 1895) | def moe_on_cpuinfer(self, x: torch.Tensor, topk_ids: torch.Tensor, top...
    method moe_infer_simple (line 1902) | def moe_infer_simple(
    method moe_infer (line 1920) | def moe_infer(self, x, topk_ids, topk_weight):
  class KQwen3MoeSparseMoeBlockV2 (line 1953) | class KQwen3MoeSparseMoeBlockV2(BaseInjectedModule, Qwen3MoeSparseMoeBlo...
    method forward (line 1954) | def forward(self, hidden_states, bsz_tensor=None, cuda_graph_idx=0):
    method moe_on_cpuinfer (line 2017) | def moe_on_cpuinfer(self, x: torch.Tensor, topk_ids: torch.Tensor, top...
    method moe_infer_simple (line 2024) | def moe_infer_simple(
    method moe_infer (line 2042) | def moe_infer(self, x, topk_ids, topk_weight):
  class KQwen3MoeSparseMoeBlock (line 2076) | class KQwen3MoeSparseMoeBlock(BaseInjectedModule, Qwen3MoeSparseMoeBlock):
    method forward (line 2077) | def forward(self, hidden_states):
    method moe_kexperts (line 2139) | def moe_kexperts(self, x: torch.Tensor, topk_ids: torch.Tensor, topk_w...
    method moe_infer_simple (line 2145) | def moe_infer_simple(
    method moe_infer (line 2163) | def moe_infer(self, x, topk_ids, topk_weight):

FILE: kt-sft/ktransformers/operators/flashinfer_batch_prefill_wrapper.py
  function setup_seed (line 13) | def setup_seed(seed):
  class flashInferAttn (line 27) | class flashInferAttn():
    method __init__ (line 30) | def __init__(self,
    method plan (line 65) | def plan(self,
    method calc_batch_indices (line 99) | def calc_batch_indices(self, ragged_size = None):
    method forward (line 107) | def forward(self, q, k_cache, v_cache, k, v):
  function testCudaGraph (line 116) | def testCudaGraph():
  function testAttentionFlashInfer (line 260) | def testAttentionFlashInfer(

FILE: kt-sft/ktransformers/operators/flashinfer_wrapper.py
  function attention_ref_torch (line 22) | def attention_ref_torch(
  class MLAWrapper (line 70) | class MLAWrapper():
    method __init__ (line 71) | def __init__(self,
    method plan (line 109) | def plan(self,
    method run (line 152) | def run(self, q_nope, q_pe, ckv, k_pe, return_lse = False):
  class MLAWrapperSingleton (line 155) | class MLAWrapperSingleton():
    method get_instance (line 159) | def get_instance(cls, device, *args, **kwargs)->MLAWrapper:
    method make_instance (line 165) | def make_instance(cls, device, *args, **kwargs):
    method plan_all (line 169) | def plan_all(cls, qo_indptr,
    method need_plan_all (line 198) | def need_plan_all(cls):
    method reset_buffer (line 203) | def reset_buffer(cls):
    method update_buffer (line 208) | def update_buffer(cls, max_pages):
  function checksame (line 214) | def checksame():

FILE: kt-sft/ktransformers/operators/gate.py
  class KMoEGateBase (line 15) | class KMoEGateBase(ABC):
    method __init__ (line 16) | def __init__(self,
    method forward (line 32) | def forward(self, input_tensor, expert_ids, weights):
    method load (line 36) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method unload (line 40) | def unload():
    method load_weights (line 43) | def load_weights(self, override_key: str | None = None, device: str = ...
    method load_multi (line 74) | def load_multi(self, key: str, keys: list[str], device: str = "cpu"):
  class KMoEGate (line 81) | class KMoEGate(BaseInjectedModule, KMoEGateBase):
    method __init__ (line 82) | def __init__(
    method forward (line 97) | def forward(self, hidden_states) -> torch.Tensor:
    method load (line 100) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method unload (line 112) | def unload(self):
  class KMoEGateQwen2Moe (line 119) | class KMoEGateQwen2Moe(BaseInjectedModule, KMoEGateBase):
    method __init__ (line 120) | def __init__(
    method forward (line 149) | def forward(self, hidden_states) -> torch.Tensor:
    method load (line 167) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method unload (line 181) | def unload(self):
  class KMoEGateIPEXLLM (line 188) | class KMoEGateIPEXLLM(KMoEGate):
    method __init__ (line 189) | def __init__(
    method forward (line 204) | def forward(self, hidden_states) -> torch.Tensor:

FILE: kt-sft/ktransformers/operators/layernorm.py
  class RMSNorm (line 43) | class RMSNorm(DeepseekV3RMSNorm, BaseInjectedModule):
    method __init__ (line 44) | def __init__(self,
    method forward (line 56) | def forward(
    method forward_native (line 74) | def forward_native(
  class KQwen2MoeRMSNorm (line 84) | class KQwen2MoeRMSNorm(Qwen2MoeRMSNorm, BaseInjectedModule):
    method __init__ (line 85) | def __init__(self,
    method forward (line 97) | def forward(
    method forward_native (line 115) | def forward_native(
  class KQwen3MoeRMSNorm (line 125) | class KQwen3MoeRMSNorm(Qwen3MoeRMSNorm, BaseInjectedModule):
    method __init__ (line 126) | def __init__(self,
    method forward (line 138) | def forward(
    method forward_native (line 159) | def forward_native(
  class DeepseekV3RMSNormTorch (line 168) | class DeepseekV3RMSNormTorch(DeepseekV3RMSNorm, BaseInjectedModule):
    method __init__ (line 169) | def __init__(self,
    method forward (line 181) | def forward(
  class KDeepseekRMSNormIPEXLLM (line 200) | class KDeepseekRMSNormIPEXLLM(DeepseekV3RMSNorm, BaseInjectedModule):
    method __init__ (line 201) | def __init__(self,
    method forward (line 214) | def forward(self, x: torch.Tensor) -> torch.Tensor:
    method load (line 222) | def load(self):

FILE: kt-sft/ktransformers/operators/linear.py
  class KLinearBase (line 48) | class KLinearBase(nn.Module, ABC):
    method __init__ (line 49) | def __init__(
    method forward (line 80) | def forward(self, x: torch.Tensor) -> torch.Tensor:
    method load_weight (line 83) | def load_weight(self, override_key: str | None = None, device: str | N...
    method load_multi (line 127) | def load_multi(self, key: str, keys: list[str], device: str = "cpu"):
    method load (line 134) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method unload (line 138) | def unload(self):
  class KLinearTorch (line 142) | class KLinearTorch(KLinearBase):
    method __init__ (line 143) | def __init__(
    method forward (line 158) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor=None, **kw...
    method load (line 172) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method unload (line 199) | def unload(self):
  class KLinearQ8 (line 205) | class KLinearQ8(KLinearBase):
    method __init__ (line 206) | def __init__(
    method forward (line 224) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor=None) -> t...
    method _dequantize_weight (line 239) | def _dequantize_weight(self, q_matrix, scales, bits=8):
    method _quantize_weight (line 275) | def _quantize_weight(self, matrix, bits=8):
    method load (line 330) | def load(self, w: Union[Dict, nn.Parameter, Tuple, None] = None, devic...
    method unload (line 361) | def unload(self):
  class KLinearFP8 (line 373) | class KLinearFP8(KLinearBase):
    method __init__ (line 379) | def __init__(
    method forward (line 394) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor) -> torch....
    method load (line 401) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method unload (line 416) | def unload(self):
  class VLinearMarlin (line 424) | class VLinearMarlin(KLinearBase):
    method __init__ (line 430) | def __init__(
    method load (line 462) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method forward (line 510) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor = None) ->...
    method unload (line 549) | def unload(self):
    method _pad_input (line 559) | def _pad_input(self, x):
  class KLinearMarlin (line 580) | class KLinearMarlin(KLinearBase):
    method __init__ (line 586) | def __init__(
    method load (line 618) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method forward (line 664) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor=None, **kw...
    method unload (line 698) | def unload(self):
  class KLinearCPUInfer (line 708) | class KLinearCPUInfer(KLinearBase):
    method __init__ (line 710) | def __init__(
    method forward (line 733) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor = None) ->...
    method load (line 772) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method load_weights (line 793) | def load_weights(self, w: dict | nn.Parameter | tuple | None = None, d...
    method unload (line 806) | def unload(self):
  class KLinearIPEXLLM (line 812) | class KLinearIPEXLLM(KLinearBase):
    method __init__ (line 813) | def __init__(
    method forward (line 831) | def forward(self, x: torch.Tensor, bsz_tensor: torch.Tensor = None) ->...
    method load (line 842) | def load(self, w: dict | nn.Parameter | tuple | None = None, device: s...
    method unload (line 875) | def unload(self):
  class KTransformersLinear (line 891) | class KTransformersLinear(BaseInjectedModule, KLinearBase):
    method __init__ (line 892) | def __init__(
    method forward (line 920) | def forward(self, x, bsz_tensor=None):
    method load (line 940) | def load(self, w: dict | nn.Parameter | tuple | None = None, mode: Inf...
    method unload (line 962) | def unload(self):
    method set_inference_mode (line 969) | def set_inference_mode(self, mode: InferenceState):

FILE: kt-sft/ktransformers/operators/mlp.py
  class kDeepseekV3MLP (line 8) | class kDeepseekV3MLP(DeepseekV3MLP, BaseInjectedModule):
    method __init__ (line 9) | def __init__(self,
    method forward (line 20) | def forward(self, x, bsz_tensor):
  class KQwen2MoeMLP (line 23) | class KQwen2MoeMLP(Qwen2MoeMLP, BaseInjectedModule):
    method __init__ (line 24) | def __init__(self,
    method forward (line 35) | def forward(self, x, bsz_tensor):

FILE: kt-sft/ktransformers/operators/models.py
  class KQwen2MoeModel (line 188) | class KQwen2MoeModel(BaseInjectedModule):
    method __init__ (line 196) | def __init__(
    method forward (line 215) | def forward(
    method load_layer_to (line 446) | def load_layer_to(self, layer: Qwen2MoeDecoderLayer, target: Inference...
  class KDeepseekV2Model (line 550) | class KDeepseekV2Model(BaseInjectedModule):
    method __init__ (line 558) | def __init__(
    method forward (line 577) | def forward(
    method load_layer_to (line 834) | def load_layer_to(self, layer: DeepseekV2DecoderLayer, target: Inferen...
  class LlamaPreTrainedModel (line 960) | class LlamaPreTrainedModel(PreTrainedModel):
    method _init_weights (line 972) | def _init_weights(self, module):
  class KLlamaModel (line 984) | class KLlamaModel(BaseInjectedModule):
    method __init__ (line 994) | def __init__(
    method get_input_embeddings (line 1041) | def get_input_embeddings(self):
    method set_input_embeddings (line 1044) | def set_input_embeddings(self, value):
    method forward (line 1048) | def forward(
    method forward_chunk (line 1185) | def forward_chunk(
    method _update_causal_mask (line 1286) | def _update_causal_mask(
  class KQwen3MoeModel (line 1466) | class KQwen3MoeModel(BaseInjectedModule):
    method __init__ (line 1474) | def __init__(
    method forward (line 1501) | def forward(
    method load_layer_to (line 1725) | def load_layer_to(self, layer: Qwen3MoeDecoderLayer, target: Inference...

FILE: kt-sft/ktransformers/operators/triton_attention.py
  function tanh (line 11) | def tanh(x):
  function _fwd_grouped_kernel_stage1 (line 16) | def _fwd_grouped_kernel_stage1(
  function _decode_grouped_att_m_fwd (line 165) | def _decode_grouped_att_m_fwd(
  function _fwd_kernel_stage2 (line 258) | def _fwd_kernel_stage2(
  function _decode_softmax_reducev_fwd (line 313) | def _decode_softmax_reducev_fwd(
  function decode_attention_fwd_grouped (line 358) | def decode_attention_fwd_grouped(

FILE: kt-sft/ktransformers/operators/triton_attention_prefill.py
  function _fwd_kernel (line 24) | def _fwd_kernel(
  function context_attention_fwd (line 159) | def context_attention_fwd(

FILE: kt-sft/ktransformers/optimize/optimize.py
  function inject (line 20) | def inject(module, local_optimization_dict, model_config:AutoConfig ,ggu...
  function del_meta (line 44) | def del_meta(module:nn.Module):
  function gen_optimize_config (line 55) | def gen_optimize_config(module: nn.Module, out_data: Mapping, rule_list:...
  function translate_model_config (line 109) | def translate_model_config(model_config: PretrainedConfig):
  function optimize_and_load_gguf (line 117) | def optimize_and_load_gguf(module: nn.Module, rule_file: str, gguf_path:...

FILE: kt-sft/ktransformers/server/api/ollama/completions.py
  class OllamaGenerateCompletionRequest (line 21) | class OllamaGenerateCompletionRequest(BaseModel):
  class OllamaGenerationStreamResponse (line 45) | class OllamaGenerationStreamResponse(BaseModel):
  class OllamaGenerationResponse (line 51) | class OllamaGenerationResponse(BaseModel):
  function generate (line 58) | async def generate(request: Request, input: OllamaGenerateCompletionRequ...
  class OllamaChatCompletionMessage (line 103) | class OllamaChatCompletionMessage(BaseModel):
  class OllamaChatCompletionRequest (line 107) | class OllamaChatCompletionRequest(BaseModel):
  class OllamaChatCompletionStreamResponse (line 113) | class OllamaChatCompletionStreamResponse(BaseModel):
  class OllamaChatCompletionResponse (line 126) | class OllamaChatCompletionResponse(BaseModel):
  function chat (line 140) | async def chat(request: Request, input: OllamaChatCompletionRequest):
  class OllamaModel (line 226) | class OllamaModel(BaseModel):
  function tags (line 234) | async def tags():
  class OllamaModelInfo (line 239) | class OllamaModelInfo(BaseModel):
  class OllamaShowRequest (line 243) | class OllamaShowRequest(BaseModel):
  class OllamaShowDetial (line 248) | class OllamaShowDetial(BaseModel):
  class OllamaShowResponse (line 256) | class OllamaShowResponse(BaseModel):
    class Config (line 263) | class Config:
  function show (line 267) | async def show(request: Request, input: OllamaShowRequest):

FILE: kt-sft/ktransformers/server/api/openai/__init__.py
  function post_db_creation_operations (line 14) | def post_db_creation_operations():

FILE: kt-sft/ktransformers/server/api/openai/assistants/assistants.py
  function create_assistant (line 19) | async def create_assistant(
  function list_assistants (line 26) | async def list_assistants(
  function list_assistants_with_status (line 38) | async def list_assistants_with_status(
  function retrieve_assistant (line 48) | async def retrieve_assistant(
  function modify_assistant (line 55) | async def modify_assistant(
  function delete_assistant (line 63) | async def delete_assistant(assistant_id: str):
  function get_related_thread (line 69) | async def get_related_thread(assistant_id: ObjectID):
  function create_default_assistant (line 74) | def create_default_assistant():
  function test_create_assistant (line 90) | def test_create_assistant():

FILE: kt-sft/ktransformers/server/api/openai/assistants/messages.py
  function create_message (line 16) | async def create_message(thread_id: str, msg: MessageCreate):
  function list_messages (line 26) | async def list_messages(
  function retrieve_message (line 38) | async def retrieve_message(thread_id: ObjectID, message_id: ObjectID):
  function modify_message (line 43) | async def modify_message(thread_id: ObjectID, message_id: ObjectID, msg:...
  function delete_message (line 49) | async def delete_message(thread_id: ObjectID, message_id: ObjectID):

FILE: kt-sft/ktransformers/server/api/openai/assistants/runs.py
  function create_run (line 20) | async def create_run(request: Request, thread_id: str, run_create: RunCr...
  function create_thread_and_run (line 40) | async def create_thread_and_run(run_thread: RunThreadCreate):
  function list_runs (line 45) | async def list_runs(
  function retrieve_run (line 56) | async def retrieve_run(
  function modify_run (line 67) | async def modify_run(
  function submit_tool_outputs_to_run (line 76) | async def submit_tool_outputs_to_run(thread_id: str, run_id: str, submit...
  function cancel_run (line 81) | async def cancel_run(thread_id: str, run_id: str):

FILE: kt-sft/ktransformers/server/api/openai/assistants/threads.py
  function create_thread (line 14) | async def create_thread(thread: ThreadCreate):
  function list_threads (line 19) | async def list_threads(limit: Optional[int] = 20, order: Order = Order.D...
  function retrieve_thread (line 24) | async def retrieve_thread(thread_id: ObjectID):
  function modify_thread (line 29) | async def modify_thread(thread_id: ObjectID, thread: ThreadModify):
  function delete_thread (line 34) | async def delete_thread(thread_id: ObjectID):

FILE: kt-sft/ktransformers/server/api/openai/endpoints/chat.py
  class Choice (line 22) | class Choice(BaseModel):
  class ChatCompletion (line 30) | class ChatCompletion(BaseModel):
  class ChatCompletionMessageToolCallFunction (line 41) | class ChatCompletionMessageToolCallFunction(BaseModel):
  class ChatCompletionMessageToolCall (line 45) | class ChatCompletionMessageToolCall(BaseModel):
  class ChatCompletionMessage (line 50) | class ChatCompletionMessage(BaseModel):
  function list_models (line 58) | async def list_models():
  function getTools (line 61) | def getTools(buffer):
  function get_tool_instructions (line 117) | def get_tool_instructions():
  function chat_completion (line 136) | async def chat_completion(request: Request, create: ChatCompletionCreate):

FILE: kt-sft/ktransformers/server/api/openai/legacy/completions.py
  function create_completion (line 15) | async def create_completion(request:Request, create:CompletionCreate):

FILE: kt-sft/ktransformers/server/api/web/system.py
  function system_info (line 8) | def system_info():

FILE: kt-sft/ktransformers/server/args.py
  class ArgumentParser (line 6) | class ArgumentParser:
    method __init__ (line 7) | def __init__(self, cfg):
    method parse_args (line 10) | def parse_args(self):

FILE: kt-sft/ktransformers/server/backend/args.py
  class ConfigArgs (line 6) | class ConfigArgs(BaseModel):
    class Config (line 12) | class Config:

FILE: kt-sft/ktransformers/server/backend/base.py
  class BackendInterfaceBase (line 27) | class BackendInterfaceBase:
    method __init__ (line 36) | def __init__(self, args:ConfigArgs = default_args):
    method inference (line 40) | async def inference(self,local_messages,request_unique_id:Optional[str...
    method report_last_time_performance (line 57) | def report_last_time_performance(self):
  class ThreadContext (line 70) | class ThreadContext:
    method __init__ (line 89) | def __init__(self, run: RunObject,interface:BackendInterfaceBase, args...
    method get_local_messages (line 102) | def get_local_messages(self):
    method update_by_run (line 109) | def update_by_run(self,run:RunObject,args:ConfigArgs = default_args):
    method put_user_message (line 113) | def put_user_message(self, message: MessageObject):
    method delete_user_message (line 119) | def delete_user_message(self,message_id: ObjectID):
    method work (line 122) | async def work(self)->AsyncIterator:

FILE: kt-sft/ktransformers/server/backend/context_manager.py
  class ThreadContextManager (line 17) | class ThreadContextManager:
    method __init__ (line 22) | def __init__(self,interface) -> None:
    method get_context_by_run_object (line 29) | async def get_context_by_run_object(self, run: RunObject) -> ThreadCon...
    method get_context_by_thread_id (line 57) | async def get_context_by_thread_id(self, thread_id: ObjectID) -> Optio...

FILE: kt-sft/ktransformers/server/backend/interfaces/balance_serve.py
  function chat_stream (line 66) | async def chat_stream(queue: asyncio.Queue, tokenizer: AutoTokenizer):
  function fill_generated_tokens (line 84) | def fill_generated_tokens(query_updates: list[sched_ext.QueryUpdate], ge...
  function report_last_time_performance (line 94) | def report_last_time_performance(profiler: Profiler):
  class Engine (line 106) | class Engine:
    method __init__ (line 114) | def __init__(self, args: ConfigArgs = default_args, generated_token_qu...
    method sampling (line 208) | def sampling(self, forward_output: ForwardBatchOutput):
    method loop (line 226) | def loop(self):
  class BalanceServeThreadContext (line 266) | class BalanceServeThreadContext(ThreadContext):
    method get_local_messages (line 267) | def get_local_messages(self):
  function run_engine (line 275) | def run_engine(args, token_queue, broadcast_endpoint, event, kvcache_eve...
  class BalanceServeInterface (line 284) | class BalanceServeInterface(BackendInterfaceBase):
    method __init__ (line 300) | def __init__(self, args: ConfigArgs = default_args):
    method get_params (line 359) | def get_params(self, temperature: Optional[float] = None, top_p: Optio...
    method run_queue_proxy (line 380) | def run_queue_proxy(self):
    method lifespan (line 386) | async def lifespan(self, app: FastAPI):
    method queue_proxy (line 390) | async def queue_proxy(self):
    method tokenize_prompt (line 407) | def tokenize_prompt(self, prompt: str):
    method format_and_tokenize_input_ids (line 411) | def format_and_tokenize_input_ids(self, thread_id: ObjectID, messages:...
    method inference (line 420) | async def inference(self, local_messages, thread_id: str, temperature:...

FILE: kt-sft/ktransformers/server/backend/interfaces/exllamav2.py
  class ExllamaThreadContext (line 14) | class ExllamaThreadContext(ThreadContext):
    method __init__ (line 15) | def __init__(self, run: RunObject, args: ConfigArgs = default_args) ->...
    method get_interface (line 18) | def get_interface(self):
    method get_local_messages (line 21) | def get_local_messages(self):
  class ExllamaInterface (line 27) | class ExllamaInterface(BackendInterfaceBase):
    method __init__ (line 29) | def __init__(self, args: ConfigArgs = ...):
    method tokenize_prompt (line 32) | def tokenize_prompt(self, prompt: str) -> torch.Tensor:
    method inference (line 35) | async def inference(self,local_messages,request_unique_id:Optional[str...

FILE: kt-sft/ktransformers/server/backend/interfaces/ktransformers.py
  class KTransformersThreadContext (line 25) | class KTransformersThreadContext(TransformersThreadContext):
  class KTransformersInterface (line 29) | class KTransformersInterface(TransformersInterface):
    method __init__ (line 30) | def __init__(self, args: ConfigArgs = default_args):
    method decode_one_tokens (line 83) | def decode_one_tokens(self):
    method prefill (line 133) | def prefill(self, input_ids: torch.Tensor, is_new: bool, temperature: ...
    method active_cache_position (line 236) | def active_cache_position(self):
    method inference (line 240) | async def inference(self, local_messages, thread_id: str, temperature:...

FILE: kt-sft/ktransformers/server/backend/interfaces/transformers.py
  class TextStreamer (line 37) | class TextStreamer:
    method __init__ (line 39) | def __init__(self, tokenizer: "AutoTokenizer", skip_prompt: bool = Fal...
    method reset (line 49) | def reset(self):
    method put (line 53) | def put(self, value) -> Optional[str]:
    method end (line 83) | def end(self) -> Optional[str]:
    method _is_chinese_char (line 96) | def _is_chinese_char(self, cp):
  class TransformersThreadContext (line 121) | class TransformersThreadContext(ThreadContext):
    method get_local_messages (line 122) | def get_local_messages(self):
  class TransformersInterface (line 130) | class TransformersInterface(BackendInterfaceBase):
    method __init__ (line 146) | def __init__(self, args: ConfigArgs = default_args):
    method current_ids (line 165) | def current_ids(self):
    method active_cache_position (line 169) | def active_cache_position(self):
    method tokenize_prompt (line 172) | def tokenize_prompt(self, prompt: str):
    method format_and_tokenize_input_ids (line 176) | def format_and_tokenize_input_ids(self, thread_id: ObjectID, messages:...
    method append_new_tokens (line 213) | def append_new_tokens(self, new_tokens: int) -> Optional[str]:
    method tf_logits_warper (line 219) | def tf_logits_warper(generation_config):
    method prepare_logits_wrapper (line 270) | def prepare_logits_wrapper(self, inputs, device, temperature: Optional...
    method logits_to_token (line 289) | def logits_to_token(self, logits: torch.Tensor):
    method decode_one_tokens (line 304) | def decode_one_tokens(self):
    method prefill (line 320) | def prefill(self, input_ids: torch.Tensor, is_new: bool, temperature: ...
    method generate (line 397) | def generate(self):
    method check_is_new (line 427) | def check_is_new(self, thread_id: str):
    method inference (line 440) | async def inference(self, local_messages, thread_id: str, temperature:...

FILE: kt-sft/ktransformers/server/balance_serve/inference/config.py
  class ModelConfig (line 19) | class ModelConfig:
    method __init__ (line 56) | def __init__(self, config):
    method load_config (line 70) | def load_config(self):
  class ParallelConfig (line 88) | class ParallelConfig:
    method __init__ (line 89) | def __init__(
  class AttnConfig (line 98) | class AttnConfig:
    method __init__ (line 104) | def __init__(self, config):
  class SamplerConfig (line 111) | class SamplerConfig():
    method __init__ (line 116) | def __init__(self, config):
  function load_yaml_config (line 121) | def load_yaml_config(file_path):
  class LLMConfig (line 128) | class LLMConfig:
    method __init__ (line 135) | def __init__(self, config_file):

FILE: kt-sft/ktransformers/server/balance_serve/inference/distributed/communication_op.py
  function tensor_model_parallel_all_reduce (line 15) | def tensor_model_parallel_all_reduce(input_: torch.Tensor, bsz_tensor: t...
  function tensor_model_parallel_all_gather (line 20) | def tensor_model_parallel_all_gather(
  function tensor_model_parallel_gather (line 27) | def tensor_model_parallel_gather(
  function broadcast_tensor_dict (line 34) | def broadcast_tensor_dict(

FILE: kt-sft/ktransformers/server/balance_serve/inference/distributed/cuda_wrapper.py
  class cudaIpcMemHandle_t (line 21) | class cudaIpcMemHandle_t(ctypes.Structure):
  class Function (line 26) | class Function:
  function find_loaded_library (line 32) | def find_loaded_library(lib_name) -> Optional[str]:
  class CudaRTLibrary (line 58) | class CudaRTLibrary:
    method __init__ (line 100) | def __init__(self, so_file: Optional[str] = None):
    method CUDART_CHECK (line 120) | def CUDART_CHECK(self, result: cudaError_t) -> None:
    method cudaGetErrorString (line 125) | def cudaGetErrorString(self, error: cudaError_t) -> str:
    method cudaSetDevice (line 128) | def cudaSetDevice(self, device: int) -> None:
    method cudaDeviceSynchronize (line 131) | def cudaDeviceSynchronize(self) -> None:
    method cudaDeviceReset (line 134) | def cudaDeviceReset(self) -> None:
    method cudaMalloc (line 137) | def cudaMalloc(self, size: int) -> ctypes.c_void_p:
    method cudaFree (line 142) | def cudaFree(self, devPtr: ctypes.c_void_p) -> None:
    method cudaMemset (line 145) | def cudaMemset(self, devPtr: ctypes.c_void_p, value: int,
    method cudaMemcpy (line 149) | def cudaMemcpy(self, dst: ctypes.c_void_p, src: ctypes.c_void_p,
    method cudaIpcGetMemHandle (line 155) | def cudaIpcGetMemHandle(self,
    method cudaIpcOpenMemHandle (line 162) | def cudaIpcOpenMemHandle(self,

FILE: kt-sft/ktransformers/server/balance_serve/inference/distributed/custom_all_reduce.py
  function _can_p2p (line 25) | def _can_p2p(rank: int, world_size: int) -> bool:
  function is_weak_contiguous (line 37) | def is_weak_contiguous(inp: torch.Tensor):
  class CustomAllreduce (line 44) | class CustomAllreduce:
    method __init__ (line 49) | def __init__(
    method create_shared_buffer (line 179) | def create_shared_buffer(
    method free_shared_buffer (line 204) | def free_shared_buffer(
    method capture (line 212) | def capture(self):
    method register_graph_buffers (line 226) | def register_graph_buffers(self):
    method should_custom_ar (line 244) | def should_custom_ar(self, inp: torch.Tensor):
    method all_reduce (line 259) | def all_reduce(
    method custom_all_reduce (line 284) | def custom_all_reduce(self, input: torch.Tensor, bsz_tensor: torch.Ten...
    method close (line 302) | def close(self):
    method __del__ (line 309) | def __del__(self):

FILE: kt-sft/ktransformers/server/balance_serve/inference/distributed/custom_all_reduce_utils.py
  function producer (line 19) | def producer(
  function consumer (line 53) | def consumer(
  function can_actually_p2p (line 94) | def can_actually_p2p(
  function gpu_p2p_access_check (line 194) | def gpu_p2p_access_check(src: int, tgt: int) -> bool:

FILE: kt-sft/ktransformers/server/balance_serve/inference/distributed/parallel_state.py
  class GraphCaptureContext (line 43) | class GraphCaptureContext:
  function _split_tensor_dict (line 50) | def _split_tensor_dict(
  function _get_unique_name (line 79) | def _get_unique_name(name: str) -> str:
  function _register_group (line 95) | def _register_group(group: "GroupCoordinator") -> None:
  function inplace_all_reduce (line 101) | def inplace_all_reduce(tensor: torch.Tensor, group_name: str) -> None:
  function inplace_all_reduce_fake (line 108) | def inplace_all_reduce_fake(tensor: torch.Tensor, group_name: str) -> None:
  function outplace_all_reduce (line 118) | def outplace_all_reduce(tensor: torch.Tensor, group_name: str, bsz_tenso...
  function outplace_all_reduce_fake (line 125) | def outplace_all_reduce_fake(tensor: torch.Tensor, group_name: str, bsz_...
  class GroupCoordinator (line 136) | class GroupCoordinator:
    method __init__ (line 169) | def __init__(
    method first_rank (line 271) | def first_rank(self):
    method last_rank (line 276) | def last_rank(self):
    method is_first_rank (line 281) | def is_first_rank(self):
    method is_last_rank (line 286) | def is_last_rank(self):
    method next_rank (line 291) | def next_rank(self):
    method prev_rank (line 298) | def prev_rank(self):
    method graph_capture (line 305) | def graph_capture(
    method all_reduce (line 352) | def all_reduce(self, input_: torch.Tensor, bsz_tensor: torch.Tensor, i...
    method _all_reduce_out_place (line 406) | def _all_reduce_out_place(self, input_: torch.Tensor, bsz_tensor: torc...
    method _all_reduce_in_place (line 414) | def _all_reduce_in_place(self, input_: torch.Tensor) -> None:
    method all_gather (line 421) | def all_gather(self, input_: torch.Tensor, dim: int = -1) -> torch.Ten...
    method gather (line 464) | def gather(
    method broadcast (line 499) | def broadcast(self, input_: torch.Tensor, src: int = 0):
    method broadcast_object (line 514) | def broadcast_object(self, obj: Optional[Any] = None, src: int = 0):
    method broadcast_object_list (line 538) | def broadcast_object_list(
    method send_object (line 555) | def send_object(self, obj: Any, dst: int) -> None:
    method recv_object (line 582) | def recv_object(self, src: int) -> Any:
    method broadcast_tensor_dict (line 618) | def broadcast_tensor_dict(
    method send_tensor_dict (line 700) | def send_tensor_dict(
    method recv_tensor_dict (line 753) | def recv_tensor_dict(
    method barrier (line 815) | def barrier(self):
    method send (line 824) | def send(self, tensor: torch.Tensor, dst: Optional[int] = None) -> None:
    method recv (line 836) | def recv(
    method destroy (line 852) | def destroy(self):
  function get_world_group (line 870) | def get_world_group() -> GroupCoordinator:
  function init_world_group (line 875) | def init_world_group(
  function init_model_parallel_group (line 891) | def init_model_parallel_group(
  function get_tp_group (line 918) | def get_tp_group() -> GroupCoordinator:
  function get_pp_group (line 929) | def get_pp_group() -> GroupCoordinator:
  function graph_capture (line 939) | def graph_capture():
  function set_custom_all_reduce (line 962) | def set_custom_all_reduce(enable: bool):
  function init_distributed_environment (line 967) | def init_distributed_environment(
  function initialize_model_parallel (line 1014) | def initialize_model_parallel(
  function ensure_model_parallel_initialized (line 1091) | def ensure_model_parallel_initialized(
  function model_parallel_is_initialized (line 1120) | def model_parallel_is_initialized():
  function patch_tensor_parallel_group (line 1129) | def patch_tensor_parallel_group(tp_group: GroupCoordinator):
  function get_tensor_model_parallel_world_size (line 1153) | def get_tensor_model_parallel_world_size():
  function get_tensor_model_parallel_rank (line 1158) | def get_tensor_model_parallel_rank():
  function destroy_model_parallel (line 1163) | def destroy_model_parallel():
  function destroy_distributed_environment (line 1176) | def destroy_distributed_environment():
  function cleanup_dist_env_and_memory (line 1185) | def cleanup_dist_env_and_memory(shutdown_ray: bool = False):
  function in_the_same_node_as (line 1199) | def in_the_same_node_as(pg: ProcessGroup, source_rank: int = 0) -> List[...

FILE: kt-sft/ktransformers/server/balance_serve/inference/distributed/pynccl.py
  class PyNcclCommunicator (line 21) | class PyNcclCommunicator:
    method __init__ (line 23) | def __init__(
    method all_reduce (line 119) | def all_reduce(
    method send (line 143) | def send(self, tensor: torch.Tensor, dst: int, stream=None):
    method recv (line 161) | def recv(self, tensor: torch.Tensor, src: int, stream=None):
    method change_state (line 180) | def change_state(

FILE: kt-sft/ktransformers/server/balance_serve/inference/distributed/pynccl_wrapper.py
  class ncclUniqueId (line 41) | class ncclUniqueId(ctypes.Structure):
  class ncclDataTypeEnum (line 51) | class ncclDataTypeEnum:
    method from_torch (line 70) | def from_torch(cls, dtype: torch.dtype) -> int:
  class ncclRedOpTypeEnum (line 93) | class ncclRedOpTypeEnum:
    method from_torch (line 102) | def from_torch(cls, op: ReduceOp) -> int:
  class Function (line 117) | class Function:
  class NCCLLibrary (line 123) | class NCCLLibrary:
    method __init__ (line 184) | def __init__(self, so_file: Optional[str] = None):
    method ncclGetErrorString (line 215) | def ncclGetErrorString(self, result: ncclResult_t) -> str:
    method NCCL_CHECK (line 218) | def NCCL_CHECK(self, result: ncclResult_t) -> None:
    method ncclGetVersion (line 223) | def ncclGetVersion(self) -> str:
    method ncclGetUniqueId (line 233) | def ncclGetUniqueId(self) -> ncclUniqueId:
    method ncclCommInitRank (line 239) | def ncclCommInitRank(self, world_size: int, unique_id: ncclUniqueId,
    method ncclAllReduce (line 247) | def ncclAllReduce(self, sendbuff: buffer_type, recvbuff: buffer_type,
    method ncclSend (line 259) | def ncclSend(self, sendbuff: buffer_type, count: int, datatype: int,
    method ncclRecv (line 264) | def ncclRecv(self, recvbuff: buffer_type, count: int, datatype: int,
    method ncclCommDestroy (line 269) | def ncclCommDestroy(self, comm: ncclComm_t) -> None:

FILE: kt-sft/ktransformers/server/balance_serve/inference/distributed/utils.py
  function ensure_divisibility (line 17) | def ensure_divisibility(numerator, denominator):
  function divide (line 24) | def divide(numerator, denominator):
  function split_tensor_along_last_dim (line 31) | def split_tensor_along_last_dim(
  function get_pp_indices (line 59) | def get_pp_indices(
  class StatelessProcessGroup (line 92) | class StatelessProcessGroup:
    method __post_init__ (line 113) | def __post_init__(self):
    method send_obj (line 119) | def send_obj(self, obj: Any, dst: int):
    method expire_data (line 127) | def expire_data(self):
    method recv_obj (line 138) | def recv_obj(self, src: int) -> Any:
    method broadcast_obj (line 146) | def broadcast_obj(self, obj: Optional[Any], src: int) -> Any:
    method all_gather_obj (line 164) | def all_gather_obj(self, obj: Any) -> list[Any]:
    method barrier (line 176) | def barrier(self):
    method create (line 185) | def create(

FILE: kt-sft/ktransformers/server/balance_serve/inference/forward_batch.py
  class ForwardBatchInput (line 11) | class ForwardBatchInput:
    class ForwardMiniBatch (line 13) | class ForwardMiniBatch:
      method __init__ (line 31) | def __init__(self, prefill_querys_info: list[QueryInfo], decode_quer...
      method fill (line 94) | def fill(self, prefill_querys_info: list[QueryInfo], decode_querys_i...
    method __init__ (line 170) | def __init__(self, batch : sched_ext.BatchQueryTodo = None, query_mana...
    method gen_max_forward_batch (line 198) | def gen_max_forward_batch(
    method fill (line 244) | def fill(self, batch : sched_ext.BatchQueryTodo = None, query_manager:...
  class ForwardBatchOutput (line 268) | class ForwardBatchOutput:
    method __init__ (line 278) | def __init__(self):

FILE: kt-sft/ktransformers/server/balance_serve/inference/model_runner.py
  function pad_num_tokens (line 37) | def pad_num_tokens(num_tokens):
  function deduplicate_and_sort (line 40) | def deduplicate_and_sort(lst):
  function generate_cuda_graphs (line 42) | def generate_cuda_graphs(chunk_size: int) -> list:
  class ModelRunner (line 52) | class ModelRunner:
    method __init__ (line 59) | def __init__(self, model = None, device = None, use_cuda_graph = False...
    method model_attn_plan (line 88) | def model_attn_plan(self, batch, cuda_graph_idx=0):
    method warmup (line 104) | def warmup(self):
    method run (line 159) | def run(self, batch: sched_ext.BatchQueryTodo = None, query_manager: Q...
    method replay (line 225) | def replay(self, cuda_graph_idx=-1):
    method sync (line 233) | def sync(self, calc_time = True):

FILE: kt-sft/ktransformers/server/balance_serve/inference/query_manager.py
  class QueryInfo (line 11) | class QueryInfo:
    method __init__ (line 25) | def __init__(self, id, query_length: int, max_length: int, page_size: ...
    method check_stop (line 41) | def check_stop(self):
    method print (line 64) | def print(self):
  class QueryManager (line 69) | class QueryManager:
    method __init__ (line 75) | def __init__(self, page_size = 256, device = torch.device('cuda')):
    method add_query (line 80) | def add_query(self, batch: sched_ext.BatchQueryTodo):
    method update (line 103) | def update(self, batch: sched_ext.BatchQueryTodo) -> list[sched_ext.Qu...

FILE: kt-sft/ktransformers/server/balance_serve/inference/sampling/penaltylib/orchestrator.py
  class _ReqLike (line 9) | class _ReqLike:
  class _BatchLike (line 14) | class _BatchLike:
    method batch_size (line 17) | def batch_size(self):
  class BatchedPenalizerOrchestrator (line 21) | class BatchedPenalizerOrchestrator:
    method __init__ (line 27) | def __init__(
    method reqs (line 51) | def reqs(self):
    method batch_size (line 54) | def batch_size(self):
    method cumulate_input_tokens (line 57) | def cumulate_input_tokens(
    method cumulate_output_tokens (line 74) | def cumulate_output_tokens(
    method apply (line 94) | def apply(self, logits: torch.Tensor) -> torch.Tensor:
    method filter (line 113) | def filter(
    method merge (line 149) | def merge(self, their: "BatchedPenalizerOrchestrator"):
  class _TokenIDs (line 171) | class _TokenIDs:
    method __init__ (line 185) | def __init__(
    method occurrence_count (line 204) | def occurrence_count(self) -> torch.Tensor:
  class _BatchedPenalizer (line 244) | class _BatchedPenalizer(abc.ABC):
    method __init__ (line 252) | def __init__(self, orchestrator: BatchedPenalizerOrchestrator):
    method is_prepared (line 255) | def is_prepared(self) -> bool:
    method is_required (line 258) | def is_required(self) -> bool:
    method prepare (line 261) | def prepare(self):
    method prepare_if_required (line 266) | def prepare_if_required(self):
    method teardown (line 273) | def teardown(self):
    method cumulate_input_tokens (line 278) | def cumulate_input_tokens(self, input_ids: _TokenIDs):
    method cumulate_output_tokens (line 284) | def cumulate_output_tokens(self, output_ids: _TokenIDs):
    method apply (line 290) | def apply(self, logits: torch.Tensor) -> torch.Tensor:
    method filter (line 296) | def filter(
    method merge (line 307) | def merge(self, their: "_BatchedPenalizer"):
    method _is_required (line 316) | def _is_required(self) -> bool:
    method _prepare (line 323) | def _prepare(self):
    method _teardown (line 331) | def _teardown(self):
    method _cumulate_input_tokens (line 339) | def _cumulate_input_tokens(self, input_ids: _TokenIDs):
    method _cumulate_output_tokens (line 347) | def _cumulate_output_tokens(self, output_ids: _TokenIDs):
    method _apply (line 355) | def _apply(self, logits: torch.Tensor) -> torch.Tensor:
    method _filter (line 363) | def _filter(
    method _merge (line 372) | def _merge(self, their: "_BatchedPenalizer"):

FILE: kt-sft/ktransformers/server/balance_serve/inference/sampling/penaltylib/penalizers/frequency_penalty.py
  class BatchedFrequencyPenalizer (line 8) | class BatchedFrequencyPenalizer(_BatchedPenalizer):
    method _is_required (line 16) | def _is_required(self) -> bool:
    method _prepare (line 22) | def _prepare(self):
    method _teardown (line 46) | def _teardown(self):
    method _cumulate_input_tokens (line 53) | def _cumulate_input_tokens(self, input_ids: _TokenIDs):
    method _cumulate_output_tokens (line 56) | def _cumulate_output_tokens(self, output_ids: _TokenIDs):
    method _apply (line 61) | def _apply(self, logits: torch.Tensor) -> torch.Tensor:
    method _filter (line 65) | def _filter(
    method _merge (line 73) | def _merge(self, their: "BatchedFrequencyPenalizer"):

FILE: kt-sft/ktransformers/server/balance_serve/inference/sampling/penaltylib/penalizers/min_new_tokens.py
  class BatchedMinNewTokensPenalizer (line 8) | class BatchedMinNewTokensPenalizer(_BatchedPenalizer):
    method _is_required (line 17) | def _is_required(self) -> bool:
    method _prepare (line 22) | def _prepare(self):
    method _teardown (line 72) | def _teardown(self):
    method _cumulate_input_tokens (line 81) | def _cumulate_input_tokens(self, input_ids: _TokenIDs):
    method _cumulate_output_tokens (line 84) | def _cumulate_output_tokens(self, output_ids: _TokenIDs):
    method _apply (line 87) | def _apply(self, logits: torch.Tensor) -> torch.Tensor:
    method _filter (line 92) | def _filter(
    method _merge (line 99) | def _merge(self, their: "BatchedMinNewTokensPenalizer"):

FILE: kt-sft/ktransformers/server/balance_serve/inference/sampling/penaltylib/penalizers/presence_penalty.py
  class BatchedPresencePenalizer (line 8) | class BatchedPresencePenalizer(_BatchedPenalizer):
    method _is_required (line 16) | def _is_required(self) -> bool:
    method _prepare (line 22) | def _prepare(self):
    method _teardown (line 46) | def _teardown(self):
    method _cumulate_input_tokens (line 53) | def _cumulate_input_tokens(self, input_ids: _TokenIDs):
    method _cumulate_output_tokens (line 56) | def _cumulate_output_tokens(self, output_ids: _TokenIDs):
    method _apply (line 60) | def _apply(self, logits: torch.Tensor) -> torch.Tensor:
    method _filter (line 64) | def _filter(
    method _merge (line 72) | def _merge(self, their: "BatchedPresencePenalizer"):

FILE: kt-sft/ktransformers/server/balance_serve/inference/sampling/penaltylib/penalizers/repetition_penalty.py
  class BatchedRepetitionPenalizer (line 8) | class BatchedRepetitionPenalizer(_BatchedPenalizer):
    method _is_required (line 16) | def _is_required(self) -> bool:
    method _prepare (line 22) | def _prepare(self):
    method _teardown (line 46) | def _teardown(self):
    method _cumulate_input_tokens (line 53) | def _cumulate_input_tokens(self, input_ids: _TokenIDs):
    method _cumulate_output_tokens (line 57) | def _cumulate_output_tokens(self, output_ids: _TokenIDs):
    method _apply (line 61) | def _apply(self, logits: torch.Tensor) -> torch.Tensor:
    method _filter (line 68) | def _filter(
    method _merge (line 76) | def _merge(self, their: "BatchedRepetitionPenalizer"):

FILE: kt-sft/ktransformers/server/balance_serve/inference/sampling/sampler.py
  class SamplingOptions (line 20) | class SamplingOptions():
    method __init__ (line 33) | def __init__(self, bsz = 1, device = torch.device('cuda'), pretrained_...
  class Sampler (line 54) | class Sampler(nn.Module):
    method __init__ (line 55) | def __init__(self):
    method forward (line 58) | def forward(

FILE: kt-sft/ktransformers/server/balance_serve/sched_rpc.py
  class SchedulerServer (line 24) | class SchedulerServer:
    method __init__ (line 25) | def __init__(self, settings, main_args):
    method run_scheduler (line 36) | def run_scheduler(self):
    method stop_scheduler (line 39) | def stop_scheduler(self):
    method start_proxy (line 42) | def start_proxy(self):
    method worker_routine (line 45) | def worker_routine(self):
    method start_rpc_service (line 102) | def start_rpc_service(self):
    method stop_rpc_service (line 117) | def stop_rpc_service(self):
  function start_server (line 123) | def start_server(settings, main_args):
  class SchedulerClient (line 129) | class SchedulerClient:
    method __init__ (line 130) | def __init__(self, sched_port):
    method __del__ (line 138) | def __del__(self):
    method send_request (line 142) | def send_request(self, method, params=None):
    method add_query (line 159) | def add_query(self, query):
    method cancel_query (line 163) | def cancel_query(self, query_id):
    method update_last_batch (line 166) | def update_last_batch(self, updates):
    method rebuild_inferece_context (line 171) | def rebuild_inferece_context(self,response):
    method get_inference_context_raw (line 179) | def get_inference_context_raw(self):

FILE: kt-sft/ktransformers/server/balance_serve/settings.py
  function create_sched_settings (line 16) | def create_sched_settings(args):
  function create_sched_settings_qwen2moe (line 69) | def create_sched_settings_qwen2moe(args):
  function create_sched_settings_qwen3moe (line 123) | def create_sched_settings_qwen3moe(args):

FILE: kt-sft/ktransformers/server/config/config.py
  class Config (line 20) | class Config(metaclass=Singleton):
    method load (line 26) | def load() -> dict:
    method to_path (line 53) | def to_path(path: str) -> str:
    method __init__ (line 61) | def __init__(self):

FILE: kt-sft/ktransformers/server/config/log.py
  class DailyRotatingFileHandler (line 25) | class DailyRotatingFileHandler(BaseRotatingHandler):
    method __init__ (line 32) | def __init__(self, filename, backupCount=0, encoding=None, delay=False...
    method shouldRollover (line 46) | def shouldRollover(self, record):
    method doRollover (line 59) | def doRollover(self):
    method _compute_fn (line 78) | def _compute_fn(self):
    method _open (line 84) | def _open(self):
    method delete_expired_files (line 106) | def delete_expired_files(self):
  class Logger (line 132) | class Logger(object):
    method __init__ (line 144) | def __init__(self, level: str = 'info'):

FILE: kt-sft/ktransformers/server/config/singleton.py
  class Singleton (line 13) | class Singleton(abc.ABCMeta, type):
    method __call__ (line 24) | def __call__(cls, *args, **kwds):
  class AbstractSingleton (line 29) | class AbstractSingleton(abc.ABC, metaclass=Singleton):

FILE: kt-sft/ktransformers/server/crud/assistants/assistants.py
  class AssistantDatabaseManager (line 12) | class AssistantDatabaseManager:
    method __init__ (line 13) | def __init__(self) -> None:
    method create_assistant_object (line 16) | def create_assistant_object(self, assistant: AssistantCreate) -> Assis...
    method db_count_assistants (line 25) | def db_count_assistants(self) -> int:
    method db_create_assistant (line 29) | def db_create_assistant(self, assistant: AssistantCreate):
    method db_list_assistants (line 34) | def db_list_assistants(self, limit: Optional[int], order: Order) -> Li...
    method db_get_assistant_by_id (line 44) | def db_get_assistant_by_id(self, assistant_id: str) -> Optional[Assist...
    method db_update_assistant_by_id (line 53) | def db_update_assistant_by_id(self, assistant_id: str, assistant: Assi...
    method db_delete_assistant_by_id (line 60) | def db_delete_assistant_by_id(self, assistant_id: str):

FILE: kt-sft/ktransformers/server/crud/assistants/messages.py
  class MessageDatabaseManager (line 10) | class MessageDatabaseManager:
    method __init__ (line 11) | def __init__(self) -> None:
    method create_db_message_by_core (line 15) | def create_db_message_by_core(message: MessageCore):
    method create_db_message (line 19) | def create_db_message(self, message: MessageCreate):
    method db_add_message (line 22) | def db_add_message(self, message: Message):
    method db_create_message (line 27) | def db_create_message(self, thread_id: str, message: MessageCreate, st...
    method create_message_object (line 35) | def create_message_object(thread_id: ObjectID, run_id: ObjectID, messa...
    method db_sync_message (line 47) | def db_sync_message(self, message: MessageObject):
    method db_list_messages_of_thread (line 54) | def db_list_messages_of_thread(
    method db_get_message_by_id (line 72) | def db_get_message_by_id(self, thread_id: ObjectID, message_id: Object...
    method db_delete_message_by_id (line 80) | def db_delete_message_by_id(self, thread_id: ObjectID, message_id: Obj...

FILE: kt-sft/ktransformers/server/crud/assistants/runs.py
  class RunsDatabaseManager (line 10) | class RunsDatabaseManager:
    method __init__ (line 11) | def __init__(self) -> None:
    method create_run_object (line 14) | def create_run_object(self, thread_id: ObjectID, run: RunCreate) -> Ru...
    method db_create_run (line 26) | def db_create_run(self, thread_id: str, run: RunCreate):
    method db_sync_run (line 40) | def db_sync_run(self, run: RunObject) -> None:
    method db_get_run (line 47) | def db_get_run(self, run_id: ObjectID) -> RunObject:

FILE: kt-sft/ktransformers/server/crud/assistants/threads.py
  class ThreadsDatabaseManager (line 15) | class ThreadsDatabaseManager:
    method __init__ (line 16) | def __init__(self) -> None:
    method db_create_thread (line 21) | def db_create_thread(self, thread: ThreadCreate):
    method db_get_thread_by_id (line 54) | def db_get_thread_by_id(self, thread_id: ObjectID):
    method db_list_threads (line 59) | def db_list_threads(self, limit: Optional[int], order: Order) -> List[...
    method db_list_threads_preview (line 71) | def db_list_threads_preview(self, limit: Optional[int], order: Order) ...
    method db_delete_thread_by_id (line 88) | def db_delete_thread_by_id(self, thread_id: ObjectID):

FILE: kt-sft/ktransformers/server/exceptions.py
  function db_exception (line 4) | def db_exception():
  function not_implemented (line 11) | def not_implemented(what):
  function internal_server_error (line 18) | def internal_server_error(what):
  function request_error (line 22) | def request_error(what):

FILE: kt-sft/ktransformers/server/main.py
  function mount_app_routes (line 21) | def mount_app_routes(mount_app: FastAPI):
  function create_app (line 29) | def create_app():
  function update_web_port (line 49) | def update_web_port(config_file: str):
  function mount_index_routes (line 61) | def mount_index_routes(app: FastAPI):
  function run_api (line 75) | def run_api(app, host, port, **kwargs):
  function custom_openapi (line 88) | def custom_openapi(app):
  function main (line 103) | def main():

FILE: kt-sft/ktransformers/server/models/assistants/assistants.py
  class Assistant (line 7) | class Assistant(Base):

FILE: kt-sft/ktransformers/server/models/assistants/messages.py
  class Message (line 7) | class Message(Base):

FILE: kt-sft/ktransformers/server/models/assistants/run_steps.py
  class RunStep (line 7) | class RunStep(Base):

FILE: kt-sft/ktransformers/server/models/assistants/runs.py
  class Run (line 7) | class Run(Base):

FILE: kt-sft/ktransformers/server/models/assistants/threads.py
  class Thread (line 7) | class Thread(Base):

FILE: kt-sft/ktransformers/server/schemas/assistants/assistants.py
  class AssistantBase (line 21) | class AssistantBase(BaseModel):
    method validate_tools (line 28) | def validate_tools(cls, value):
    method validate_tool_resources (line 51) | def validate_tool_resources(cls, value):
    method convert_meta_data (line 70) | def convert_meta_data(cls, values):
  class AssistantCreate (line 79) | class AssistantCreate(AssistantBase):
  class AssistantBuildStatus (line 83) | class AssistantBuildStatus(BaseModel):
    class Status (line 84) | class Status(Enum):
    method to_stream_reply (line 112) | def to_stream_reply(self) -> str:
  class AssistantObject (line 116) | class AssistantObject(AssistantBase, ObjectWithCreatedTime):
    method as_api_response (line 123) | def as_api_response(self):
    method get_related_threads_ids (line 126) | def get_related_threads_ids(self) -> List[ObjectID]:
    method get_related_threads_objects (line 133) | def get_related_threads_objects(self) -> List:
    method append_related_threads (line 145) | def append_related_threads(self, thread_ids: List[ObjectID]):
    method update_build_status (line 156) | async def update_build_status(self, events: AsyncIterable) -> AsyncIte...
    method get_build_status (line 178) | def get_build_status(self) -> AssistantBuildStatus:
    method sync_db (line 182) | def sync_db(self)->None:
    method get_encoded_instruction (line 191) | def get_encoded_instruction(self,encode_fn:Callable)->torch.Tensor:
  class AssistantModify (line 198) | class AssistantModify(AssistantBase):

FILE: kt-sft/ktransformers/server/schemas/assistants/messages.py
  class IncompleteDetails (line 15) | class IncompleteDetails(BaseModel):
  class ContentType (line 19) | class ContentType(Enum):
  class ContentObject (line 25) | class ContentObject(BaseModel):
  class ImageFile (line 29) | class ImageFile(BaseModel):
  class ImageFileObject (line 34) | class ImageFileObject(ContentObject):
  class ImageUrl (line 38) | class ImageUrl(BaseModel):
  class ImageUrlObject (line 43) | class ImageUrlObject(ContentObject):
  class Annotation (line 47) | class Annotation(BaseModel):
  class Text (line 51) | class Text(BaseModel):
  class TextObject (line 56) | class TextObject(ContentObject):
    method filter_append (line 62) | def filter_append(self,text:str):
  class Attachment (line 72) | class Attachment(BaseModel):
  class Role (line 77) | class Role(Enum):
    method is_user (line 81) | def is_user(self)->bool:
  class MessageCore (line 85) | class MessageCore(BaseModel):
    method convert_meta_data (line 92) | def convert_meta_data(cls,values):
  class MessageBase (line 98) | class MessageBase(MessageCore):
    class Status (line 99) | class Status(Enum):
  class MessageObject (line 116) | class MessageObject(MessageBase, ObjectWithCreatedTime):
    method get_text_content (line 120) | def get_text_content(self) -> str:
    method get_encoded_content (line 129) | async def get_encoded_content(self,encode_fn:Callable):
    method get_attached_files (line 142) | def get_attached_files(self):
    method append_message_delta (line 147) | def append_message_delta(self,text:str):
    method sync_db (line 150) | def sync_db(self):
    method stream_response_with_event (line 160) | def stream_response_with_event(self, event: MessageBase.Status) -> Mes...
  class MessageStreamResponse (line 169) | class MessageStreamResponse(BaseModel):
    method to_stream_reply (line 173) | def to_stream_reply(self):
  class MessageCreate (line 177) | class MessageCreate(BaseModel):
    method convert_meta_data (line 184) | def convert_meta_data(cls,values):
    method to_core (line 189) | def to_core(self) -> MessageCore:
  class MessageModify (line 206) | class MessageModify(BaseModel):
    method convert_meta_data (line 210) | def convert_meta_data(cls,values):

FILE: kt-sft/ktransformers/server/schemas/assistants/runs.py
  class ToolCall (line 13) | class ToolCall(BaseModel):
  class SubmitToolOutputs (line 19) | class SubmitToolOutputs(BaseModel):
  class RequiredAction (line 23) | class RequiredAction(BaseModel):
  class LastError (line 28) | class LastError(BaseModel):
  class IncompleteDetails (line 33) | class IncompleteDetails(BaseModel):
  class Usage (line 37) | class Usage(BaseModel):
  class TruncationStrategy (line 43) | class TruncationStrategy(BaseModel):
  class ToolChoiceType (line 48) | class ToolChoiceType(Enum):
  class RunBase (line 54) | class RunBase(BaseModel):
    class Status (line 55) | class Status(Enum):
    method convert_meta_data (line 84) | def convert_meta_data(cls,values):
    method set_compute_save (line 89) | def set_compute_save(self,save:int):
  class RunObject (line 104) | class RunObject(RunBase, ObjectWithCreatedTime):
    method stream_response_with_event (line 105) | def stream_response_with_event(self,event:RunBase.Status)->RunStreamRe...
    method sync_db (line 114) | def sync_db(self):
    method create_message_creation_step (line 123) | def create_message_creation_step(self):
  class RunStreamResponse (line 127) | class RunStreamResponse(BaseModel):
    method to_stream_reply (line 130) | def to_stream_reply(self):
  class RunCreate (line 133) | class RunCreate(BaseModel):
    method convert_meta_data (line 144) | def convert_meta_data(cls,values):
  class RunThreadCreate (line 159) | class RunThreadCreate(BaseModel):
    method convert_meta_data (line 169) | def convert_meta_data(cls,values):
  class RunModify (line 184) | class RunModify(BaseModel):
    method convert_meta_data (line 188) | def convert_meta_data(cls,values):
  class ToolOutput (line 194) | class ToolOutput(BaseModel):
  class RunSubmit (line 199) | class RunSubmit(BaseModel):

FILE: kt-sft/ktransformers/server/schemas/assistants/streaming.py
  class TextObjectWithIndex (line 15) | class TextObjectWithIndex(TextObject):
  class ImageFileObjectWithIndex (line 19) | class ImageFileObjectWithIndex(ImageFileObject):
  class ImageUrlObjectWithIndex (line 23) | class ImageUrlObjectWithIndex(ImageUrlObject):
  class MessageDeltaImpl (line 31) | class MessageDeltaImpl(BaseModel):
  class MessageDelta (line 36) | class MessageDelta(Object):
    method to_stream_reply (line 39) | def to_stream_reply(self):
  function text_delta (line 43) | def text_delta(index: int, text: str):
  function append_message_delta (line 47) | def append_message_delta(self: MessageObject, text: str):
  class RunStepDeltaImpl (line 63) | class RunStepDeltaImpl(BaseModel):
  class RunStepDelta (line 67) | class RunStepDelta(Object):
    method to_stream_reply (line 70) | def to_stream_reply(self):
  class Done (line 74) | class Done():
    method to_stream_reply (line 75) | def to_stream_reply(self):
  function check_client_link (line 79) | async def check_client_link(request: Request, async_events: AsyncIterable):
  function add_done (line 86) | async def add_done(async_events: AsyncIterable):
  function to_stream_reply (line 92) | async def to_stream_reply(async_events: AsyncIterable):
  function filter_api_event (line 100) | async def filter_api_event(async_events: AsyncIterable):
  function filter_chat_chunk (line 106) | async def filter_chat_chunk(async_events: AsyncIterable):
  function filter_by_types (line 112) | async def filter_by_types(async_events: AsyncIterable, types: List):
  function api_stream_response (line 120) | def api_stream_response(request: Request, async_events: AsyncIterable):
  function chat_stream_response (line 124) | def chat_stream_response(request: Request, async_events: AsyncIterable):
  function stream_response (line 128) | def stream_response(request: Request, async_events: AsyncIterable):
  function check_link_response (line 132) | def check_link_response(request: Request, async_events: AsyncIterable):
  function wrap_async_generator_into_queue (line 136) | def wrap_async_generator_into_queue(async_events: AsyncIterable) -> asyn...
  function unwrap_async_queue (line 151) | async def unwrap_async_queue(queue: asyncio.Queue) -> AsyncIterable:
  function unwrap_async_queue_slow (line 163) | async def unwrap_async_queue_slow(queue: asyncio.Queue) -> AsyncIterable:

FILE: kt-sft/ktransformers/server/schemas/assistants/threads.py
  class ThreadBase (line 12) | class ThreadBase(BaseModel):
    method convert_meta_data (line 16) | def convert_meta_data(cls,values):
  class ThreadObject (line 24) | class ThreadObject(ThreadBase, ObjectWithCreatedTime):
    method check_is_related_threads (line 28) | def check_is_related_threads(self)->Self:
    class StreamEvent (line 34) | class StreamEvent(Enum):
    method to_stream_reply (line 37) | def to_stream_reply(self,event:StreamEvent):
  class ThreadCreate (line 41) | class ThreadCreate(ThreadBase):
  class ThreadModify (line 45) | class ThreadModify(ThreadBase):

FILE: kt-sft/ktransformers/server/schemas/assistants/tool.py
  class ToolType (line 9) | class ToolType(str, Enum):
  class ToolBase (line 16) | class ToolBase(BaseModel):
  class CodeInterpreter (line 20) | class CodeInterpreter(ToolBase):
  class FileSearch (line 24) | class FileSearch(ToolBase):
  class RelatedThreads (line 28) | class RelatedThreads(ToolBase):
  class FuntionTool (line 32) | class FuntionTool(ToolBase):
  class CodeInterpreterResource (line 41) | class CodeInterpreterResource(BaseModel):
  class FileSearchResource (line 45) | class FileSearchResource(BaseModel):
  class RelatedThreadsResource (line 50) | class RelatedThreadsResource(BaseModel):

FILE: kt-sft/ktransformers/server/schemas/base.py
  class Object (line 12) | class Object(BaseModel):
  class ObjectWithCreatedTime (line 20) | class ObjectWithCreatedTime(Object):
  class Order (line 25) | class Order(str, Enum):
    method to_sqlalchemy_order (line 29) | def to_sqlalchemy_order(self):
  class DeleteResponse (line 41) | class DeleteResponse(Object):
  class OperationResponse (line 44) | class OperationResponse(BaseModel):

FILE: kt-sft/ktransformers/server/schemas/conversation.py
  class ThreadPreview (line 9) | class ThreadPreview(BaseModel):

FILE: kt-sft/ktransformers/server/schemas/endpoints/chat.py
  class CompletionUsage (line 13) | class CompletionUsage(BaseModel):
  class Role (line 22) | class Role(Enum):
  class Message (line 29) | class Message(BaseModel):
    method to_tokenizer_message (line 36) | def to_tokenizer_message(self):
  class FunctionParameters (line 48) | class FunctionParameters(BaseModel):
  class FunctionDefinition (line 53) | class FunctionDefinition(BaseModel):
  class ToolFunction (line 58) | class ToolFunction(BaseModel):
  class Tool (line 61) | class Tool(BaseModel):
  class ChatCompletionCreate (line 65) | class ChatCompletionCreate(BaseModel):
    method get_tokenizer_messages (line 79) | def get_tokenizer_messages(self):
  class ChatCompletionChunk (line 82) | class ChatCompletionChunk(BaseModel):
    method to_stream_reply (line 92) | def to_stream_reply(self):
  class RawUsage (line 95) | class RawUsage(BaseModel):

FILE: kt-sft/ktransformers/server/schemas/legacy/completions.py
  class CompletionCreate (line 7) | class CompletionCreate(BaseModel):
    method get_tokenizer_messages (line 16) | def get_tokenizer_messages(self):
  class FinishReason (line 22) | class FinishReason(Enum):
  class Choice (line 26) | class Choice(BaseModel):
  class CompletionObject (line 33) | class CompletionObject(Object):
    method set_token (line 40) | def set_token(self,token:str):
    method append_token (line 45) | def append_token(self,token:str):
    method to_stream_reply (line 50) | def to_stream_reply(self):

FILE: kt-sft/ktransformers/server/utils/create_interface.py
  function create_interface (line 19) | def create_interface(config: Config, default_args: ConfigArgs):
  class GlobalContextManager (line 33) | class GlobalContextManager:
  class GlobalInterface (line 35) | class GlobalInterface:
  function get_thread_context_manager (line 38) | def get_thread_context_manager() -> GlobalContextManager:
  function get_interface (line 40) | def get_interface() -> GlobalInterface:

FILE: kt-sft/ktransformers/server/utils/multi_timer.py
  function format_time (line 4) | def format_time(seconds):
  class Profiler (line 20) | class Profiler:
    method __init__ (line 21) | def __init__(self):
    method create_timer (line 25) | def create_timer(self, name):
    method start_timer (line 32) | def start_timer(self, name):
    method pause_timer (line 40) | def pause_timer(self, name):
    method get_timer_sec (line 48) | def get_timer_sec(self, name):
    method get_all_timers (line 57) | def get_all_timers(self):
    method report_timer_string (line 63) | def report_timer_string(self, name):
    method create_and_start_timer (line 66) | def create_and_start_timer(self, name):
    method inc (line 72) | def inc(self,key:str,delta:int=1):
    method set_counter (line 75) | def set_counter(self,key:str,to=0):
    method get_counter (line 78) | def get_counter(self,key:str):

FILE: kt-sft/ktransformers/server/utils/sql_utils.py
  class SQLUtil (line 27) | class SQLUtil(metaclass=Singleton):
    method __init__ (line 34) | def __init__(self) -> None:
    method get_db (line 40) | def get_db(self):
    method init_engine (line 53) | def init_engine(cfg: Config):
    method create_sqllite_url (line 70) | def create_sqllite_url(cfg):
    method db_add_commit_refresh (line 89) | def db_add_commit_refresh(self, session: Session, what):
    method db_merge_commit (line 104) | def db_merge_commit(self, session: Session, what):
    method db_update_commit_refresh (line 115) | def db_update_commit_refresh(self, session: Session, existing, what):

FILE: kt-sft/ktransformers/sft/flops_utils/custom_profile.py
  function profile_origin (line 77) | def profile_origin(model, inputs, custom_ops=None, verbose=True, report_...
  function custom_profile (line 162) | def custom_profile(

FILE: kt-sft/ktransformers/sft/flops_utils/lora_test_utils.py
  class ProfilerCallback (line 5) | class ProfilerCallback(TrainerCallback):
    method __init__ (line 6) | def __init__(self, profiler):
    method on_step_end (line 9) | def on_step_end(self, args, state, control, **kwargs):
  function _short (line 12) | def _short(t):
  function install_shape_probes (line 15) | def install_shape_probes(model):
  function inspect_device (line 110) | def inspect_device(model, write_file):
  function print_model_params (line 121) | def print_model_params(model):
  function print_lora_params (line 165) | def print_lora_params(model):
  function print_grad_fn (line 188) | def print_grad_fn(grad_fn, indent=0):
  function forward_hook (line 198) | def forward_hook(module, inputs, output):
  function check_moe_gradients (line 210) | def check_moe_gradients(model):
  function disable_all_dropout (line 219) | def disable_all_dropout(module):
  function verify_lora_layers (line 226) | def verify_lora_layers(model):
  function print_moe_stats (line 260) | def print_moe_stats(moe_layer: KExpertsTorch):
  function recursive_traverse (line 276) | def recursive_traverse(model, parent_name=''):
  function log_step_state (line 289) | def log_step_state(
  function collect_gradients (line 325) | def collect_gradients(model, input_ids):
  function report_meta_tensors (line 343) | def report_meta_tensors(model):

FILE: kt-sft/ktransformers/sft/lora.py
  class KAccelerator (line 50) | class KAccelerator(Accelerator):
    method __init__ (line 51) | def __init__(self, *args, **kwargs):
    method prepare_model (line 55) | def prepare_model(self, model, *args, **kwargs):
    method prepare (line 58) | def prepare(self, *args, **kwargs):
  class KTrainer (line 67) | class KTrainer(Trainer):
    method save_model (line 68) | def save_model(self, output_dir=None, _internal_call=False):
    method _move_model_to_device (line 74) | def _move_model_to_device(self, model, device):
    method _wrap_model (line 78) | def _wrap_model(self, model, training=True, dataloader=None):
    method create_accelerator_and_postprocess (line 82) | def create_accelerator_and_postprocess(self):
    method get_train_dataloader (line 210) | def get_train_dataloader(self) -> DataLoader:
    method training_step (line 257) | def training_step(
  class SFTJsonListDataset (line 333) | class SFTJsonListDataset(TorchDataset):
    method __init__ (line 334) | def __init__(self, path: str, tokenizer: AutoTokenizer, max_len: int =...
    method build_example (line 342) | def build_example(ins: str, inp: str, out: str) -> Dict[str, str]:
    method __len__ (line 349) | def __len__(self):
    method __getitem__ (line 352) | def __getitem__(self, idx: int):
  function lora_and_load_adapter (line 387) | def lora_and_load_adapter(model, tokenizer, sft_data_path, save_adapter_...
  function inject_lora_layer (line 455) | def inject_lora_layer(model, use_adapter_path):

FILE: kt-sft/ktransformers/sft/metrics.py
  function eval_logit_processor (line 47) | def eval_logit_processor(logits: "torch.Tensor", labels: "torch.Tensor")...
  class ComputeSimilarity (line 61) | class ComputeSimilarity:
    method _dump (line 69) | def _dump(self) -> Optional[dict[str, float]]:
    method __post_init__ (line 78) | def __post_init__(self):
    method __call__ (line 81) | def __call__(self, eval_preds: "EvalPrediction", compute_result: bool ...

FILE: kt-sft/ktransformers/sft/metrics_utils/constants.py
  class AttentionFunction (line 99) | class AttentionFunction(str, Enum):
  class EngineName (line 106) | class EngineName(str, Enum):
  class DownloadSource (line 112) | class DownloadSource(str, Enum):
  class QuantizationMethod (line 119) | class QuantizationMethod(str, Enum):
  class RopeScaling (line 132) | class RopeScaling(str, Enum):
  function register_model_group (line 139) | def register_model_group(

FILE: kt-sft/ktransformers/sft/metrics_utils/env.py
  function print_env (line 33) | def print_env() -> None:

FILE: kt-sft/ktransformers/sft/metrics_utils/logging.py
  class LoggerHandler (line 34) | class LoggerHandler(logging.Handler):
    method __init__ (line 37) | def __init__(self, output_dir: str) -> None:
    method _write_log (line 51) | def _write_log(self, log_entry: str) -> None:
    method emit (line 55) | def emit(self, record) -> None:
    method close (line 62) | def close(self) -> None:
  class _Logger (line 67) | class _Logger(logging.Logger):
    method info_rank0 (line 70) | def info_rank0(self, *args, **kwargs) -> None:
    method warning_rank0 (line 73) | def warning_rank0(self, *args, **kwargs) -> None:
    method warning_rank0_once (line 76) | def warning_rank0_once(self, *args, **kwargs) -> None:
  function _get_default_logging_level (line 80) | def _get_default_logging_level() -> "logging._Level":
  function _get_library_name (line 92) | def _get_library_name() -> str:
  function _get_library_root_logger (line 96) | def _get_library_root_logger() -> "_Logger":
  function _configure_library_root_logger (line 100) | def _configure_library_root_logger() -> None:
  function get_logger (line 120) | def get_logger(name: Optional[str] = None) -> "_Logger":
  function add_handler (line 129) | def add_handler(handler: "logging.Handler") -> None:
  function remove_handler (line 135) | def remove_handler(handler: logging.Handler) -> None:
  function info_rank0 (line 141) | def info_rank0(self: "logging.Logger", *args, **kwargs) -> None:
  function warning_rank0 (line 146) | def warning_rank0(self: "logging.Logger", *args, **kwargs) -> None:
  function warning_rank0_once (line 152) | def warning_rank0_once(self: "logging.Logger", *args, **kwargs) -> None:

FILE: kt-sft/ktransformers/sft/metrics_utils/misc.py
  class AverageMeter (line 57) | class AverageMeter:
    method __init__ (line 60) | def __init__(self):
    method reset (line 63) | def reset(self):
    method update (line 69) | def update(self, val, n=1):
  function check_version (line 76) | def check_version(requirement: str, mandatory: bool = False) -> None:
  function check_dependencies (line 95) | def check_dependencies() -> None:
  function calculate_tps (line 104) | def calculate_tps(dataset: list[dict[str, Any]], metrics: dict[str, floa...
  function count_parameters (line 117) | def count_parameters(model: "torch.nn.Module") -> tuple[int, int]:
  function get_current_device (line 144) | def get_current_device() -> "torch.device":
  function get_device_count (line 160) | def get_device_count() -> int:
  function get_logits_processor (line 174) | def get_logits_processor() -> "LogitsProcessorList":
  function get_current_memory (line 181) | def get_current_memory() -> tuple[int, int]:
  function get_peak_memory (line 195) | def get_peak_memory() -> tuple[int, int]:
  function has_tokenized_data (line 209) | def has_tokenized_data(path: "os.PathLike") -> bool:
  function infer_optim_dtype (line 214) | def infer_optim_dtype(model_dtype: Optional["torch.dtype"]) -> "torch.dt...
  function is_accelerator_available (line 224) | def is_accelerator_available() -> bool:
  function is_env_enabled (line 231) | def is_env_enabled(env_var: str, default: str = "0") -> bool:
  function numpify (line 236) | def numpify(inputs: Union["NDArray", "torch.Tensor"]) -> "NDArray":
  function skip_check_imports (line 248) | def skip_check_imports() -> None:
  function torch_gc (line 254) | def torch_gc() -> None:
  function try_download_model_from_other_hub (line 267) | def try_download_model_from_other_hub(model_args: "ModelArguments") -> str:
  function use_modelscope (line 304) | def use_modelscope() -> bool:
  function use_openmind (line 308) | def use_openmind() -> bool:
  function use_ray (line 312) | def use_ray() -> bool:
  function find_available_port (line 316) | def find_available_port() -> int:
  function fix_proxy (line 325) | def fix_proxy(ipv6_enabled: bool = False) -> None:

FILE: kt-sft/ktransformers/sft/metrics_utils/packages.py
  function _is_package_available (line 30) | def _is_package_available(name: str) -> bool:
  function _get_package_version (line 34) | def _get_package_version(name: str) -> "Version":
  function is_pyav_available (line 41) | def is_pyav_available():
  function is_librosa_available (line 45) | def is_librosa_available():
  function is_fastapi_available (line 49) | def is_fastapi_available():
  function is_galore_available (line 53) | def is_galore_available():
  function is_apollo_available (line 57) | def is_apollo_available():
  function is_gradio_available (line 61) | def is_gradio_available():
  function is_matplotlib_available (line 65) | def is_matplotlib_available():
  function is_pillow_available (line 69) | def is_pillow_available():
  function is_ray_available (line 73) | def is_ray_available():
  function is_requests_available (line 77) | def is_requests_available():
  function is_rouge_available (line 81) | def is_rouge_available():
  function is_starlette_available (line 85) | def is_starlette_available():
  function is_transformers_version_greater_than (line 90) | def is_transformers_version_greater_than(content: str):
  function is_uvicorn_available (line 94) | def is_uvicorn_available():
  function is_vllm_available (line 98) | def is_vllm_available():
  function is_sglang_available (line 102) | def is_sglang_available():

FILE: kt-sft/ktransformers/sft/metrics_utils/ploting.py
  function smooth (line 34) | def smooth(scalars: list[float]) -> list[float]:
  function gen_loss_plot (line 49) | def gen_loss_plot(trainer_log: list[dict[str, Any]]) -> "matplotlib.figu...
  function plot_loss (line 69) | def plot_loss(save_dictionary: str, keys: list[str] = ["loss"]) -> None:

FILE: kt-sft/ktransformers/sft/monkey_patch_torch_module.py
  function _patched_module_init (line 7) | def _patched_module_init(self, *args, **kwargs):
  function install_patch (line 42) | def install_patch():
  function restore_patch (line 45) | def restore_patch():

FILE: kt-sft/ktransformers/sft/peft_utils/lora_layer.py
  function dispatch_default (line 20) | def dispatch_default(
  class BaseTunerLayer (line 55) | class BaseTunerLayer(ABC):
    method get_orig_module (line 80) | def get_orig_module(self) -> nn.Module:
    method weight (line 93) | def weight(self) -> torch.Tensor:
    method bias (line 109) | def bias(self) -> torch.Tensor:
    method merge (line 113) | def merge(self, safe_merge: bool = False, adapter_names: Optional[list...
    method unmerge (line 116) | def unmerge(self) -> None:
    method merged (line 120) | def merged(self) -> bool:
    method disable_adapters (line 124) | def disable_adapters(self) -> bool:
    method active_adapter (line 129) | def active_adapter(self) -> str | list[str]:
    method _get_available_adapters (line 133) | def _get_available_adapters(self) -> set[str]:
    method active_adapters (line 144) | def active_adapters(self):
    method enable_adapters (line 150) | def enable_adapters(self, enabled: bool) -> None:
    method set_adapter (line 168) | def set_adapter(self, adapter_names: str | list[str]) -> None:
    method _all_available_adapter_names (line 199) | def _all_available_adapter_names(self) -> list[str]:
    method delete_adapter (line 210) | def delete_adapter(self, adapter_name: str) -> None:
    method _move_adapter_to_device_of_orig_module (line 247) | def _move_adapter_to_device_of_orig_module(self, adapter_name: str, de...
  class LoraLayer (line 283) | class LoraLayer(BaseTunerLayer):
    method __init__ (line 289) | def __init__(self, orig_module: nn.Module, ephemeral_gpu_offload: bool...
    method update_layer (line 321) | def update_layer(
    method reset_lora_parameters (line 364) | def reset_lora_parameters(self, adapter_name, init_lora_weights):
    method olora_init (line 389) | def olora_init(self, adapter_name):
    method pissa_init (line 414) | def pissa_init(self, adapter_name, init_lora_weights):
    method loftq_init (line 449) | def loftq_init(self, adapter_name):
    method _cache_store (line 470) | def _cache_store(self, key: str, value: Any) -> None:
    method _cache_pop (line 473) | def _cache_pop(self, key: str) -> Any:
    method set_scale (line 477) | def set_scale(self, adapter, scale):
    method scale_layer (line 483) | def scale_layer(self, scale: float) -> None:
    method unscale_layer (line 493) | def unscale_layer(self, scale=None) -> None:
    method _check_forward_args (line 503) | def _check_forward_args(self, x, *args, **kwargs):
    method _mixed_batch_forward (line 530) | def _mixed_batch_forward(
  class Linear (line 562) | class Linear(nn.Module, LoraLayer):
    method __init__ (line 564) | def __init__(
    method merge (line 596) | def merge(self, safe_merge: bool = False, adapter_names: Optional[list...
    method unmerge (line 682) | def unmerge(self) -> None:
    method get_delta_weight (line 705) | def get_delta_weight(self, adapter) -> torch.Tensor:
    method forward (line 739) | def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch...
    method __repr__ (line 770) | def __repr__(self) -> str:
  class Embedding (line 775) | class Embedding(nn.Module, LoraLayer):
    method __init__ (line 777) | def __init__(
    method update_layer (line 809) | def update_layer(
    method merge (line 845) | def merge(self, safe_merge: bool = False, adapter_names: Optional[list...
    method unmerge (line 881) | def unmerge(self) -> None:
    method get_delta_weight (line 893) | def get_delta_weight(self, adapter) -> torch.Tensor:
    method _mixed_batch_forward (line 927) | def _mixed_batch_forward(
    method _embed (line 957) | def _embed(self, input: torch.Tensor, weight: torch.Tensor) -> torch.T...
    method forward (line 969) | def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch...
    method __repr__ (line 1009) | def __repr__(self) -> str:
  class KTransformersLinearLora (line 1013) | class KTransformersLinearLora(KTransformersLinear, LoraLayer):
    method __init__ (line 1014) | def __init__(
    method merge (line 1065) | def merge(self, safe_merge: bool = False, adapter_names: Optional[list...
    method unmerge (line 1117) | def unmerge(self) -> None:
    method get_delta_weight (line 1137) | def get_delta_weight(self, adapter: str) -> torch.Tensor:
    method forward (line 1143) | def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch...

FILE: kt-sft/ktransformers/sft/peft_utils/lora_model.py
  class LoraModel (line 40) | class LoraModel(nn.Module, ABC):
    method __init__ (line 115) | def __init__(
    method inject_adapter (line 150) | def inject_adapter(
    method _create_and_replace (line 223) | def _create_and_replace(
    method _replace_module (line 256) | def _replace_module(self, parent, child_name, new_module, child):
    method _mark_only_adapters_as_trainable (line 302) | def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None:
    method _create_new_module (line 324) | def _create_new_module(lora_config, adapter_name, target, parent, **kw...
    method __getattr__ (line 351) | def __getattr__(self, name: str):
    method _pre_injection_hook (line 360) | def _pre_injection_hook(self, model: nn.Module, config: PeftConfig, ad...
    method _set_adapter_layers (line 375) | def _set_adapter_layers(self, enabled: bool = True) -> None:
    method disable_adapter_layers (line 380) | def disable_adapter_layers(self) -> None:
    method enable_adapter_layers (line 389) | def enable_adapter_layers(self) -> None:
    method active_adapters (line 429) | def active_adapters(self) -> list[str]:

FILE: kt-sft/ktransformers/sft/peft_utils/mapping.py
  function get_peft_model (line 11) | def get_peft_model(
  function inject_adapter_in_model (line 55) | def inject_adapter_in_model(

FILE: kt-sft/ktransformers/sft/peft_utils/peft_model.py
  class PeftModel (line 68) | class PeftModel(PushToHubMixin, torch.nn.Module):
    method __init__ (line 104) | def __init__(
    method peft_config (line 147) | def peft_config(self) -> dict[str, PeftConfig]:
    method active_adapters (line 153) | def active_adapters(self) -> list[str]:
    method peft_config (line 172) | def peft_config(self, value: dict[str, PeftConfig]):
    method save_pretrained (line 178) | def save_pretrained(
    method from_pretrained (line 375) | def from_pretrained(
    method _setup_prompt_encoder (line 556) | def _setup_prompt_encoder(self, adapter_name: str):
    method _prepare_model_for_gradient_checkpointing (line 626) | def _prepare_model_for_gradient_checkpointing(self, model: PreTrainedM...
    method get_prompt_embedding_to_save (line 645) | def get_prompt_embedding_to_save(self, adapter_name: str) -> torch.Ten...
    method get_prompt (line 664) | def get_prompt(self, batch_size: int, task_ids: Optional[torch.Tensor]...
    method get_nb_trainable_parameters (line 727) | def get_nb_trainable_parameters(self) -> tuple[int, int]:
    method print_trainable_parameters (line 757) | def print_trainable_parameters(self) -> None:
    method __getattr__ (line 774) | def __getattr__(self, name: str):
    method _enable_peft_forward_hooks (line 784) | def _enable_peft_forward_hooks(self, *args, **kwargs):
    method forward (line 796) | def forward(self, *args: Any, **kwargs: Any):
    method generate (line 804) | def generate(self, *args, **kwargs):
    method _get_base_model_class (line 809) | def _get_base_model_class(self, is_prompt_tuning=False):
    method disable_adapter (line 818) | def disable_adapter(self):
    method get_base_model (line 865) | def get_base_model(self) -> torch.nn.Module:
    method add_adapter (line 875) | def add_adapter(self, adapter_name: str, peft_config: PeftConfig, low_...
    method set_additional_trainable_modules (line 926) | def set_additional_trainable_modules(self, peft_config, adapter_name):
    method get_layer_status (line 934) | def get_layer_status(self) -> list[TunerLayerStatus]:
    method get_model_status (line 964) | def get_model_status(self) -> TunerModelStatus:
    method _split_kwargs (line 1005) | def _split_kwargs(cls, kwargs: dict[str, Any]):
    method _update_offload (line 1018) | def _update_offload(self, offload_index: dict[str, dict[str, str]], ad...
    method _check_new_adapter_config (line 1098) | def _check_new_adapter_config(self, peft_config: PeftConfig, is_traina...
    method load_adapter (line 1121) | def load_adapter(
    method set_adapter (line 1268) | def set_adapter(self, adapter_name: str) -> None:
    method base_model_torch_dtype (line 1295) | def base_model_torch_dtype(self):
    method active_peft_config (line 1299) | def active_peft_config(self):
    method create_or_update_model_card (line 1302) | def create_or_update_model_card(self, output_dir: str):
  class PeftModelForCausalLM (line 1356) | class PeftModelForCausalLM(PeftModel):
    method __init__ (line 1397) | def __init__(
    method forward (line 1403) | def forward(
    method _cpt_forward (line 1498) | def _cpt_forward(
    method generate (line 1555) | def generate(self, *args, **kwargs):
    method prepare_inputs_for_generation (line 1576) | def prepare_inputs_for_generation(self, *args, task_ids: Optional[torc...
  class TunerLayerStatus (line 1651) | class TunerLayerStatus:
  function get_layer_status (line 1662) | def get_layer_status(model: torch.nn.Module) -> list[TunerLayerStatus]:
  class TunerModelStatus (line 1765) | class TunerModelStatus:
  function get_model_status (line 1780) | def get_model_status(model: torch.nn.Module) -> TunerModelStatus:

FILE: kt-sft/ktransformers/sft/torchviz_test.py
  class SimpleNet (line 5) | class SimpleNet(nn.Module):
    method __init__ (line 6) | def __init__(self):
    method forward (line 12) | def forward(self, x):

FILE: kt-sft/ktransformers/tests/AIME_2024/eval_api.py
  function generate_text (line 16) | def generate_text(api_url,question , model_name, stream=False, auth_toke...
  function load_data (line 39) | def load_data(file_path):
  function get_score (line 52) | def get_score(pred, answer):
  function run_eval_api (line 72) | def run_eval_api(
  function main (line 118) | def main(output_path, api_url, model_name, auth_token, format_tabs,probl...

FILE: kt-sft/ktransformers/tests/AIME_2024/evaluation.py
  function filter_answer (line 2) | def filter_answer(completion: str) -> str:

FILE: kt-sft/ktransformers/tests/AIME_2024/prompts.py
  function instruct_prompt (line 1) | def instruct_prompt(prompt: str) -> str:

FILE: kt-sft/ktransformers/tests/function_call_test.py
  function send_messages (line 3) | def send_messages(messages):

FILE: kt-sft/ktransformers/tests/humaneval/eval_api.py
  function generate_text (line 11) | def generate_text(api_url,question , model_name, stream=False, auth_toke...
  function run_eval_api (line 34) | def run_eval_api(
  function main (line 80) | def main(output_path, api_url, model_name, auth_token, format_tabs,probl...

FILE: kt-sft/ktransformers/tests/humaneval/evaluation.py
  function filter_code (line 2) | def filter_code(completion: str) -> str:
  function fix_indents (line 14) | def fix_indents(text: str) -> str:

FILE: kt-sft/ktransformers/tests/humaneval/prompts.py
  function instruct_prompt (line 1) | def instruct_prompt(prompt: str) -> str:
  function standard_prompt (line 5) | def standard_prompt(prompt: str) -> str:
  function write_prompt (line 9) | def write_prompt(prompt: str) -> str:
  function replit_glaive_prompt (line 13) | def replit_glaive_prompt(prompt: str) -> str:

FILE: kt-sft/ktransformers/tests/mmlu_pro_test.py
  class DataEvaluator (line 16) | class DataEvaluator:
    method __init__ (line 17) | def __init__(self):
    method load_data (line 21) | def load_data(self, file_path):
    method get_prompt (line 43) | def get_prompt(self, record):
    method post_processing (line 53) | def post_processing(self, text):
    method score (line 62) | def score(self, pred, answers):
  function generate_text (line 77) | def generate_text(api_url, question, model_name, stream=False):
  function main (line 101) | def main(concurrent_requests, data_evaluator: DataEvaluator, result_file...

FILE: kt-sft/ktransformers/tests/mmlu_test.py
  class DataEvaluator (line 16) | class DataEvaluator:
    method __init__ (line 17) | def __init__(self):
    method load_data (line 21) | def load_data(self, file_path):
    method get_prompt (line 35) | def get_prompt(self, record):
    method post_processing (line 45) | def post_processing(self, text):
    method score (line 54) | def score(self, pred, answers):
  function generate_text (line 69) | def generate_text(api_url, question, model_name, stream=False):
  function main (line 93) | def main(concurrent_requests, data_evaluator: DataEvaluator, result_file...

FILE: kt-sft/ktransformers/tests/mmlu_test_multi.py
  function extract_final_answer (line 19) | def extract_final_answer(text):
  class DataEvaluator (line 57) | class DataEvaluator:
    method __init__ (line 58) | def __init__(self):
    method load_data (line 61) | def load_data(self, file_path):
    method get_prompt (line 72) | def get_prompt(self, record):
    method post_processing (line 80) | def post_processing(self, text):
    method score (line 87) | def score(self, pred, answer):
  function generate_text (line 95) | def generate_text(api_url, question, model_name, stream=False):
  function main (line 115) | def main(concurrent_requests, data_evaluator: DataEvaluator, result_file...

FILE: kt-sft/ktransformers/tests/score.py
  function wait_for_server (line 7) | def wait_for_server(base_url: str, timeout: int = None) -> None:
  function enqueue_output (line 63) | def enqueue_output(out, queue):

FILE: kt-sft/ktransformers/tests/test_client.py
  function fetch_event_stream (line 15) | async def fetch_event_stream(session, payload, request_id, stream):
  function main (line 76) | async def main(prompt_id, model, stream, max_tokens, temperature, top_p):

FILE: kt-sft/ktransformers/tests/test_pytorch_q8.py
  class LinearModel (line 3) | class LinearModel(torch.nn.Module):
    method __init__ (line 4) | def __init__(self, in_features, out_features):
    method forward (line 8) | def forward(self, x):

FILE: kt-sft/ktransformers/tests/test_speed.py
  function fetch_event_stream (line 48) | async def fetch_event_stream(session, request_id, prompt, max_tokens, mo...
  function main (line 137) | async def main(concurrent_requests , prompt, max_tokens, model):

FILE: kt-sft/ktransformers/tests/triton_fp8gemm_test.py
  function test_fp8_gemm_vs_torch_matmul (line 21) | def test_fp8_gemm_vs_torch_matmul():
  function test_fp8_gemm_vs_torch_matmul_load (line 48) | def test_fp8_gemm_vs_torch_matmul_load():
  function test_fp8_gemm_tplops (line 71) | def test_fp8_gemm_tplops():

FILE: kt-sft/ktransformers/util/cuda_graph_runner.py
  class CUDAGraphRunner (line 10) | class CUDAGraphRunner:
    method __init__ (line 12) | def __init__(self):
    method capture (line 17) | def capture(
    method forward (line 63) | def forward(
    method __call__ (line 83) | def __call__(self, *args, **kwargs):

FILE: kt-sft/ktransformers/util/custom_gguf.py
  class GGMLQuantizationType (line 32) | class GGMLQuantizationType(IntEnum):
  function quant_shape_to_byte_shape (line 97) | def quant_shape_to_byte_shape(shape: Sequence[int], quant_type: GGMLQuan...
  function read_value (line 170) | def read_value(f, data_type):
  function dequantize_q2_k (line 218) | def dequantize_q2_k(data):
  function dequantize_q2_k_gpu (line 255) | def dequantize_q2_k_gpu(data, device:str ="cuda", target_dtype = torch.g...
  function dequantize_q3_k (line 265) | def dequantize_q3_k(data):
  function dequantize_q3_k_gpu (line 307) | def dequantize_q3_k_gpu(data, device:str ="cuda", target_dtype = torch.g...
  function dequantize_q4_k (line 317) | def dequantize_q4_k(data):
  function dequantize_q4_k_gpu (line 339) | def dequantize_q4_k_gpu(data, device:str ="cuda", target_dtype = torch.g...
  function dequantize_q5_k (line 349) | def dequantize_q5_k(data):
  function dequantize_q5_k_gpu (line 405) | def dequantize_q5_k_gpu(data, device:str ="cuda", target_dtype = torch.g...
  function dequantize_q6_k (line 415) | def dequantize_q6_k(data):
  function dequantize_q6_k_gpu (line 464) | def dequantize_q6_k_gpu(data: np.ndarray, device:str = "cuda", target_dt...
  function dequantize_iq4_xs (line 475) | def dequantize_iq4_xs(data):
  function dequantize_iq4_xs_gpu (line 505) | def dequantize_iq4_xs_gpu(data: np.ndarray, device:str = "cuda", target_...
  function dequantize_q4_0 (line 514) | def dequantize_q4_0(data):
  function dequantize_q4_0_gpu (line 529) | def dequantize_q4_0_gpu(data, device:str = "cuda", target_dtype = torch....
  function dequantize_q5_0 (line 532) | def dequantize_q5_0(data):
  function dequantize_q5_0_gpu (line 553) | def dequantize_q5_0_gpu(data, device:str = "cuda", target_dtype = torch....
  function dequantize_q8_0 (line 556) | def dequantize_q8_0(data):
  function dequantize_q8_0_gpu (line 565) | def dequantize_q8_0_gpu(data, device:str = "cuda", target_dtype = torch....
  function dequantize_f32 (line 577) | def dequantize_f32(data):
  function dequantize_f32_gpu (line 580) | def dequantize_f32_gpu(data, device, target_dtype = torch.get_default_dt...
  function dequantize_f16 (line 587) | def dequantize_f16(data):
  function dequantize_f16_gpu (line 590) | def dequantize_f16_gpu(data, device, target_dtype = torch.get_default_dt...
  function dequantize_bf16_gpu (line 597) | def dequantize_bf16_gpu(data, device, target_dtype = torch.get_default_d...
  function translate_name_to_gguf_mixtral (line 635) | def translate_name_to_gguf_mixtral(name):
  function translate_name_to_gguf (line 658) | def translate_name_to_gguf(name):
  function translate_adapter_name_to_gguf (line 704) | def translate_adapter_name_to_gguf(name):

FILE: kt-sft/ktransformers/util/custom_loader.py
  class ModelLoader (line 19) | class ModelLoader(ABC):
    method has_tensor (line 26) | def has_tensor(cls, name: str):
  class SafeTensorLoader (line 38) | class SafeTensorLoader(ModelLoader):
    method __init__ (line 44) | def __init__(self, file_path: str):
    method __load_tensor_file_map (line 47) | def __load_tensor_file_map(self, file_path: str):
    method load_tensor (line 86) | def load_tensor(self, key: str, device: str="cpu"):
    method load_experts (line 100) | def load_experts(self, key: str, device: str="cpu"):
    method load_gate (line 201) | def load_gate(self, key: str, device: str="cpu"):
    method close_all_handles (line 228) | def close_all_handles(self):
    method load_dequantized_tensor (line 233) | def load_dequantized_tensor(self, key:str, device: str="cpu"):
    method has_tensor (line 251) | def has_tensor(self, name: str):
  class GGUFLoader (line 254) | class GGUFLoader(ModelLoader):
    method __init__ (line 260) | def __init__(self, gguf_path: str):
    method load_gguf (line 296) | def load_gguf(self, f):
    method get_mmap_tensor (line 378) | def get_mmap_tensor(self, name):
    method get_undequanted_tensor_and_ggml_type (line 389) | def get_undequanted_tensor_and_ggml_type(self, name):
    method load_expert_tensor (line 397) | def load_expert_tensor(self, name, data, expert_id, elements_per_exper...
    method load_gguf_tensor (line 426) | def load_gguf_tensor(self, name: str, device:str = "cpu", target_dtype...
    method has_tensor (line 491) | def has_tensor(self, name: str):
    method get_ggml_type (line 495) | def get_ggml_type(self, name: str):
  class ModelLoaderFactory (line 501) | class ModelLoaderFactory:
    method create_loader (line 508) | def create_loader(path: str):

FILE: kt-sft/ktransformers/util/globals.py
  class _GlobalConfig (line 3) | class _GlobalConfig:
    method __init__ (line 4) | def __init__(self):
    method get (line 9) | def get(self, key, default=None):
    method set (line 12) | def set(self, key, value):
    method update (line 15) | def update(self, **kwargs):
    method all (line 18) | def all(self):
    method __getitem__ (line 21) | def __getitem__(self, key):
    method __setitem__ (line 24) | def __setitem__(self, key, value):

FILE: kt-sft/ktransformers/util/grad_wrapper.py
  function maybe_no_grad (line 12) | def maybe_no_grad(_func=None):

FILE: kt-sft/ktransformers/util/inference_state.py
  class InferenceState (line 5) | class InferenceState(enum.Enum):

FILE: kt-sft/ktransformers/util/modeling_rope_utils.py
  function _compute_default_rope_parameters (line 29) | def _compute_default_rope_parameters(
  function _compute_linear_scaling_rope_parameters (line 71) | def _compute_linear_scaling_rope_parameters(
  function _compute_dynamic_ntk_parameters (line 112) | def _compute_dynamic_ntk_parameters(
  function _compute_yarn_parameters (line 163) | def _compute_yarn_parameters(
  function _compute_longrope_parameters (line 259) | def _compute_longrope_parameters(
  function _compute_llama3_parameters (line 322) | def _compute_llama3_parameters(
  function _check_received_keys (line 378) | def _check_received_keys(
  function _validate_default_rope_parameters (line 407) | def _validate_default_rope_parameters(config: PretrainedConfig, ignore_k...
  function _validate_linear_scaling_rope_parameters (line 415) | def _validate_linear_scaling_rope_parameters(config: PretrainedConfig, i...
  function _validate_dynamic_scaling_rope_parameters (line 427) | def _validate_dynamic_scaling_rope_parameters(config: PretrainedConfig, ...
  function _validate_yarn_parameters (line 441) | def _validate_yarn_parameters(config: PretrainedConfig, ignore_keys: Opt...
  function _validate_longrope_parameters (line 479) | def _validate_longrope_parameters(config: PretrainedConfig, ignore_keys:...
  function _validate_llama3_parameters (line 529) | def _validate_llama3_parameters(config: PretrainedConfig, ignore_keys: O...
  function rope_config_validation (line 576) | def rope_config_validation(config: PretrainedConfig, ignore_keys: Option...

FILE: kt-sft/ktransformers/util/textstream.py
  class TextStreamer (line 2) | class TextStreamer:
    method __init__ (line 4) | def __init__(self, tokenizer: "AutoTokenizer", skip_prompt: bool = Fal...
    method reset (line 14) | def reset(self):
    method put (line 18) | def put(self, value)->Optional[str]:
    method end (line 49) | def end(self)->Optional[str]:
    method _is_chinese_char (line 62) | def _is_chinese_char(self, cp):

FILE: kt-sft/ktransformers/util/utils.py
  class NoEosUntil (line 40) | class NoEosUntil(LogitsProcessor):
    method __init__ (line 41) | def __init__(self, prompt_len: int, min_gen_len: int, eos_ids):
    method __call__ (line 47) | def __call__(self, input_ids, scores):
  class SilentCaptureStreamer (line 52) | class SilentCaptureStreamer(TextStreamer):
    method __init__ (line 53) | def __init__(self, tokenizer: "AutoTokenizer", skip_prompt: bool = Fal...
    method _append_piece (line 57) | def _append_piece(self, piece: Optional[str]):
    method put (line 61) | def put(self, value) -> str:
    method end (line 84) | def end(self) -> str:
    method getvalue (line 89) | def getvalue(self) -> str:
    method clear (line 92) | def clear(self):
  function get_free_ports (line 97) | def get_free_ports(n: int, continue_prot: list):
  function get_compute_capability (line 113) | def get_compute_capability(device:torch.device = None):
  function set_module (line 127) | def set_module(model, submodule_key, module):
  function set_param (line 141) | def set_param(module: nn.Module, name: str, weights: torch.Tensor):
  function get_device (line 148) | def get_device(gguf_module_key:str, device_map:dict):
  function get_all_used_cuda_device (line 156) | def get_all_used_cuda_device(device_map:dict):
  function load_cur_state_dict (line 166) | def load_cur_state_dict(module: nn.Module, gguf_loader: ModelLoader, pre...
  function sync_all_device (line 254) | def sync_all_device(all_device_list):
  function xpu_fp16_model (line 265) | def xpu_fp16_model(config):
  function load_weights (line 277) | def load_weights(module:nn.Module, gguf_loader:ModelLoader, prefix='', d...
  function tf_logits_warper (line 293) | def tf_logits_warper(generation_config):
  function prefill_and_generate (line 344) | def prefill_and_generate(model, tokenizer, inputs, max_new_tokens=10000,...
  function prefill_and_generate_capture (line 527) | def prefill_and_generate_capture(

FILE: kt-sft/ktransformers/util/vendors.py
  class GPUVendor (line 7) | class GPUVendor(IntEnum):
  class DeviceManager (line 15) | class DeviceManager:
    method __init__ (line 19) | def __init__(self):
    method _detect_gpu_vendor (line 23) | def _detect_gpu_vendor(self) -> GPUVendor:
    method _get_available_devices (line 60) | def _get_available_devices(self) -> List[int]:
    method get_device_str (line 75) | def get_device_str(self, device_id: Union[int, str]) -> str:
    method to_torch_device (line 102) | def to_torch_device(self, device_id: Union[int, str] = 0) -> torch.dev...
    method move_tensor_to_device (line 126) | def move_tensor_to_device(self, tensor: torch.Tensor, device_id: Union...
    method is_available (line 140) | def is_available(self, index: int = 0) -> bool:
    method get_all_devices (line 155) | def get_all_devices(self) -> List[int]:
  function get_device (line 168) | def get_device(device_id: Union[int, str] = 0) -> torch.device:
  function to_device (line 180) | def to_device(tensor: torch.Tensor, device_id: Union[int, str] = 0) -> t...

FILE: kt-sft/ktransformers/util/weight_loader.py
  class ModelLoader (line 8) | class ModelLoader(ABC):
    method load_tensor (line 15) | def load_tensor(self, name: str, device: str = "cpu") -> torch.Tensor:
    method supports_format (line 30) | def supports_format(cls, path: str) -> bool:
  class SafeTensorLoader (line 43) | class SafeTensorLoader(ModelLoader):
    method __init__ (line 48) | def __init__(self, path: str):
    method _load_tensor_file_map (line 59) | def _load_tensor_file_map(self, path: str) -> None:
    method load_tensor (line 102) | def load_tensor(self, name: str, device: str = "cpu") -> torch.Tensor:
    method load_dequantized_tensor (line 122) | def load_dequantized_tensor(self, name: str, device: str = "cpu") -> t...
    method close_all_handles (line 148) | def close_all_handles(self) -> None:
    method supports_format (line 157) | def supports_format(cls, path: str) -> bool:
  class GGUFLoader (line 185) | class GGUFLoader(ModelLoader):
    method __init__ (line 190) | def __init__(self, path: str):
    method _load_gguf (line 228) | def _load_gguf(self, f) -> None:
    method _read_value (line 287) | def _read_value(self, f, data_type) -> Any:
    method load_tensor (line 310) | def load_tensor(self, name: str, device: str = "cpu") -> torch.Tensor:
    method load_gguf_tensor (line 324) | def load_gguf_tensor(self, name: str, device: str = "cpu", target_dtyp...
    method supports_format (line 346) | def supports_format(cls, path: str) -> bool:

FILE: kt-sft/ktransformers/website/src/api/assistant.ts
  function filterAndConvert (line 3) | function filterAndConvert(
  type IAssistantData (line 12) | interface IAssistantData {

FILE: kt-sft/ktransformers/website/src/api/run.ts
  type IRunData (line 4) | interface IRunData {
  function cancelRun (line 87) | async function cancelRun(threadId: string, runId: string){

FILE: kt-sft/ktransformers/website/src/assets/iconfont/iconfont.js
  function s (line 1) | function s(){h||(h=!0,e())}
  function d (line 1) | function d(){try{a.documentElement.doScroll("left")}catch(t){return void...

FILE: kt-sft/ktransformers/website/src/conf/config.ts
  type Window (line 2) | interface Window {

FILE: kt-sft/ktransformers/website/src/utils/copy.ts
  function showCopySuccessMessage (line 75) | function showCopySuccessMessage() {
  function showCopyErrorMessage (line 93) | function showCopyErrorMessage() {

FILE: kt-sft/ktransformers/website/src/utils/types.ts
  type IAssistant (line 1) | interface IAssistant {
  type IAssistantWithStatus (line 17) | interface IAssistantWithStatus {
  type IMessage (line 34) | interface IMessage {
  type IThread (line 51) | interface IThread {
  type IRun (line 59) | interface IRun {
  type IFile (line 88) | interface IFile {
  type IMessageData (line 97) | interface IMessageData {
  type IThreadAndMessageAndAssistant (line 104) | interface IThreadAndMessageAndAssistant {
  type IDeleteResult (line 110) | interface IDeleteResult {
  type IBuildData (line 115) | interface IBuildData {

FILE: kt-sft/merge_tensors/merge_safetensor_gguf.py
  function read_safetensor_keys_from_folder (line 15) | def read_safetensor_keys_from_folder(folder_path)->dict:
  function translate_name (line 58) | def translate_name(name:str)->str:
  function combine_tensor_sources (line 71) | def combine_tensor_sources(safetensor_path:str, gguf_path:str):
  function write_combined_tensor (line 97) | def write_combined_tensor(target_tensor_map: dict, output_path: str, ggu...
  function main (line 190) | def main():

FILE: kt-sft/setup.py
  function _load_pyproject_deps (line 48) | def _load_pyproject_deps():
  function _strip_req (line 68) | def _strip_req(reqs, name: str):
  class CpuInstructInfo (line 87) | class CpuInstructInfo:
  class VersionInfo (line 97) | class VersionInfo:
    method get_musa_bare_metal_version (line 105) | def get_musa_bare_metal_version(self, musa_dir):
    method get_rocm_bare_metal_version (line 115) | def get_rocm_bare_metal_version(self, rocm_dir):
    method get_cuda_bare_metal_version (line 179) | def get_cuda_bare_metal_version(self, cuda_dir):
    method get_cuda_version_of_torch (line 188) | def get_cuda_version_of_torch(self):
    method get_platform (line 193) | def get_platform(self,):
    method get_cpu_instruct (line 204) | def get_cpu_instruct(self,):
    method get_torch_version (line 245) | def get_torch_version(self,):
    method get_flash_version (line 250) | def get_flash_version(self,):
    method get_package_version (line 259) | def get_package_version(self, full_version=False):
  class BuildWheelsCommand (line 282) | class BuildWheelsCommand(_bdist_wheel):
    method get_wheel_name (line 283) | def get_wheel_name(self,):
    method run (line 293) | def run(self):
  function colored (line 323) | def colored(text, color=None, bold=False):
  function split_line (line 335) | def split_line(text: str) -> List[str]:
  function colored (line 356) | def colored(text, color=None, bold=False):
  function split_line (line 368) | def split_line(text: str) -> List[str]:
  function run_command_with_live_tail (line 384) | def run_command_with_live_tail(ext: str, command: List[str], output_line...
  class CMakeExtension (line 494) | class CMakeExtension(Extension):
    method __init__ (line 495) | def __init__(self, name: str, sourcedir: str) -> None:
  function get_cmake_abi_args (line 500) | def get_cmake_abi_args(cmake_args):
  class CMakeBuild (line 507) | class CMakeBuild(BuildExtension):
    method build_extension (line 509) | def build_extension(self, ext) -> None:

FILE: kt-sft/test_adapter/inspect_adapter.py
  function load_json (line 20) | def load_json(p: Path):
  function human_readable (line 25) | def human_readable(num: int) -> str:
  function inspect_adapter_weights (line 33) | def inspect_adapter_weights(weight_path: Path):
  function maybe_print_optimizer (line 57) | def maybe_print_optimizer(optimizer_pt: Path, max_keys: int = 20):
  function maybe_print_scheduler (line 74) | def maybe_print_scheduler(scheduler_pt: Path, max_keys: int = 20):
  function maybe_print_rng (line 91) | def maybe_print_rng(rng_pth: Path):
  function dump_tensors (line 105) | def dump_tensors(state: dict, out_dir="tensor_dump"):
  function main (line 125) | def main():

FILE: kt-sft/test_adapter/pred2metrics.py
  function load_pred_ref (line 8) | def load_pred_ref(pred_file: Path):
  function main (line 16) | def main():

FILE: kt-sft/withoutKT_PEFT.py
  function preprocess_function (line 21) | def preprocess_function(examples):
  function print_model_with_params (line 31) | def print_model_with_params(model, prefix="", max_layers=3, max_params=5):
  class KTrainer (line 133) | class KTrainer(Trainer):
    method save_model (line 134) | def save_model(self, output_dir=None, _internal_call=False):

FILE: third_party/llamafile/micros.h
  function GetQueryPerformanceFrequency (line 19) | static long long GetQueryPerformanceFrequency() {
  function GetQueryPerformanceCounter (line 24) | static long long GetQueryPerformanceCounter() {
  function micros (line 31) | static long long micros(void) {

FILE: third_party/llamafile/numba.h
  function rand32 (line 8) | inline int rand32(void) {
  function popcount (line 15) | inline int popcount(unsigned x) {
  function hamming (line 23) | inline int hamming(int x, int y) {
  function float01 (line 27) | inline float float01(unsigned x) {  // (0,1)
  function numba (line 31) | inline float numba(void) {  // (-10,10)

FILE: third_party/llamafile/sgemm.cpp
  type GemmFuncs (line 32) | struct GemmFuncs {
    type ggml_compute_params (line 34) | struct ggml_compute_params
    type ggml_tensor (line 34) | struct ggml_tensor
    type ggml_tensor (line 34) | struct ggml_tensor
    type ggml_tensor (line 34) | struct ggml_tensor
    type ggml_tensor (line 34) | struct ggml_tensor
    method GemmFuncs (line 39) | GemmFuncs() {
  function llamafile_sgemm (line 190) | bool llamafile_sgemm(long m, long n, long k, const void* A, long lda, co...
  function llamafile_mixmul (line 198) | bool llamafile_mixmul(const ggml_compute_params* params, const ggml_tens...
  function llamafile_mixmul_iqk (line 202) | bool llamafile_mixmul_iqk(long Nx, long Ny, long ne00, int ne11, int typ...

FILE: third_party/llamafile/sgemm.h
  type ggml_tensor (line 13) | struct ggml_tensor
  type ggml_compute_params (line 14) | struct ggml_compute_params
  type ggml_compute_params (line 27) | struct ggml_compute_params
  type ggml_tensor (line 27) | struct ggml_tensor
  type ggml_tensor (line 27) | struct ggml_tensor
  type ggml_tensor (line 27) | struct ggml_tensor
  type ggml_tensor (line 27) | struct ggml_tensor
  type ggml_tensor (line 28) | struct ggml_tensor
  type ggml_tensor (line 28) | struct ggml_tensor
  type ggml_tensor (line 28) | struct ggml_tensor
  type ggml_compute_params (line 40) | struct ggml_compute_params
  type ggml_tensor (line 40) | struct ggml_tensor
  type ggml_tensor (line 40) | struct ggml_tensor
  type ggml_tensor (line 40) | struct ggml_tensor
  type ggml_tensor (line 40) | struct ggml_tensor
  type ggml_compute_params (line 41) | struct ggml_compute_params
  type ggml_tensor (line 41) | struct ggml_tensor
  type ggml_tensor (line 41) | struct ggml_tensor
  type ggml_tensor (line 41) | struct ggml_tensor
  type ggml_tensor (line 41) | struct ggml_tensor
  type ggml_compute_params (line 42) | struct ggml_compute_params
  type ggml_tensor (line 42) | struct ggml_tensor
  type ggml_tensor (line 42) | struct ggml_tensor
  type ggml_tensor (line 42) | struct ggml_tensor
  type ggml_tensor (line 42) | struct ggml_tensor
  type ggml_compute_params (line 43) | struct ggml_compute_params
  type ggml_tensor (line 43) | struct ggml_tensor
  type ggml_tensor (line 43) | struct ggml_tensor
  type ggml_tensor (line 43) | struct ggml_tensor
  type ggml_tensor (line 43) | struct ggml_tensor
  type ggml_compute_params (line 44) | struct ggml_compute_params
  type ggml_tensor (line 44) | struct ggml_tensor
  type ggml_tensor (line 44) | struct ggml_tensor
  type ggml_tensor (line 44) | struct ggml_tensor
  type ggml_tensor (line 44) | struct ggml_tensor
  type ggml_compute_params (line 45) | struct ggml_compute_params
  type ggml_tensor (line 45) | struct ggml_tensor
  type ggml_tensor (line 45) | struct ggml_tensor
  type ggml_tensor (line 45) | struct ggml_tensor
  type ggml_tensor (line 45) | struct ggml_tensor
  type ggml_compute_params (line 46) | struct ggml_compute_params
  type ggml_tensor (line 46) | struct ggml_tensor
  type ggml_tensor (line 46) | struct ggml_tensor
  type ggml_tensor (line 46) | struct ggml_tensor
  type ggml_tensor (line 46) | struct ggml_tensor
  type ggml_compute_params (line 47) | struct ggml_compute_params
  type ggml_tensor (line 47) | struct ggml_tensor
  type ggml_tensor (line 47) | struct ggml_tensor
  type ggml_tensor (line 47) | struct ggml_tensor
  type ggml_tensor (line 47) | struct ggml_tensor
  type ggml_compute_params (line 48) | struct ggml_compute_params
  type ggml_tensor (line 48) | struct ggml_tensor
  type ggml_tensor (line 48) | struct ggml_tensor
  type ggml_tensor (line 48) | struct ggml_tensor
  type ggml_tensor (line 48) | struct ggml_tensor
  type ggml_compute_params (line 64) | struct ggml_compute_params
  type ggml_tensor (line 64) | struct ggml_tensor
  type ggml_tensor (line 64) | struct ggml_tensor
  type ggml_tensor (line 64) | struct ggml_tensor
  type ggml_tensor (line 64) | struct ggml_tensor
  type ggml_tensor (line 65) | struct ggml_tensor
  type ggml_tensor (line 65) | struct ggml_tensor
  type ggml_tensor (line 65) | struct ggml_tensor
  type ggml_compute_params (line 77) | struct ggml_compute_params
  type ggml_tensor (line 77) | struct ggml_tensor
  type ggml_tensor (line 77) | struct ggml_tensor
  type ggml_tensor (line 77) | struct ggml_tensor
  type ggml_tensor (line 77) | struct ggml_tensor
  type ggml_compute_params (line 78) | struct ggml_compute_params
  type ggml_tensor (line 78) | struct ggml_tensor
  type ggml_tensor (line 78) | struct ggml_tensor
  type ggml_tensor (line 78) | struct ggml_tensor
  type ggml_tensor (line 78) | struct ggml_tensor
  type ggml_compute_params (line 79) | struct ggml_compute_params
  type ggml_tensor (line 79) | struct ggml_tensor
  type ggml_tensor (line 79) | struct ggml_tensor
  type ggml_tensor (line 79) | struct ggml_tensor
  type ggml_tensor (line 79) | struct ggml_tensor
  type ggml_compute_params (line 80) | struct ggml_compute_params
  type ggml_tensor (line 80) | struct ggml_tensor
  type ggml_tensor (line 80) | struct ggml_tensor
  type ggml_tensor (line 80) | struct ggml_tensor
  type ggml_tensor (line 80) | struct ggml_tensor
  type ggml_compute_params (line 81) | struct ggml_compute_params
  type ggml_tensor (line 81) | struct ggml_tensor
  type ggml_tensor (line 81) | struct ggml_tensor
  type ggml_tensor (line 81) | struct ggml_tensor
  type ggml_tensor (line 81) | struct ggml_tensor
  type ggml_compute_params (line 82) | struct ggml_compute_params
  type ggml_tensor (line 82) | struct ggml_tensor
  type ggml_tensor (line 82) | struct ggml_tensor
  type ggml_tensor (line 82) | struct ggml_tensor
  type ggml_tensor (line 82) | struct ggml_tensor
  type ggml_compute_params (line 83) | struct ggml_compute_params
  type ggml_tensor (line 83) | struct ggml_tensor
  type ggml_tensor (line 83) | struct ggml_tensor
  type ggml_tensor (line 83) | struct ggml_tensor
  type ggml_tensor (line 83) | struct ggml_tensor
  type ggml_compute_params (line 84) | struct ggml_compute_params
  type ggml_tensor (line 84) | struct ggml_tensor
  type ggml_tensor (line 84) | struct ggml_tensor
  type ggml_tensor (line 84) | struct ggml_tensor
  type ggml_tensor (line 84) | struct ggml_tensor
  type ggml_compute_params (line 85) | struct ggml_compute_params
  type ggml_tensor (line 85) | struct ggml_tensor
  type ggml_tensor (line 85) | struct ggml_tensor
  type ggml_tensor (line 85) | struct ggml_tensor
  type ggml_tensor (line 85) | struct ggml_tensor

FILE: third_party/llamafile/tinyblas_cpu.h
  function tinyBLAS_not_supported (line 85) | bool tinyBLAS_not_supported(const char* file, int line) {
  function unhalf (line 90) | inline float unhalf(ggml_fp16_t d) {
  function unhalf (line 93) | inline float unhalf(ggml_bf16_t d) {
  function float (line 112) | struct ggml_type_trait<float> {
  function ggml_bf16_t (line 116) | struct ggml_type_trait<ggml_bf16_t> {
  function ggml_fp16_t (line 120) | struct ggml_type_trait<ggml_fp16_t> {
  function block_q8_0 (line 124) | struct ggml_type_trait<block_q8_0> {
  function __m128 (line 132) | inline __m128 add(__m128 x, __m128 y) {
  function __m128 (line 135) | inline __m128 sub(__m128 x, __m128 y) {
  function __m128 (line 138) | inline __m128 mul(__m128 x, __m128 y) {
  function __m256 (line 144) | inline __m256 add(__m256 x, __m256 y) {
  function __m256 (line 147) | inline __m256 sub(__m256 x, __m256 y) {
  function __m256 (line 150) | inline __m256 mul(__m256 x, __m256 y) {
  function __m512 (line 156) | inline __m512 add(__m512 x, __m512 y) {
  function __m512 (line 159) | inline __m512 sub(__m512 x, __m512 y) {
  function __m512 (line 162) | inline __m512 mul(__m512 x, __m512 y) {
  function float32x4_t (line 168) | inline float32x4_t add(float32x4_t x, float32x4_t y) {
  function float32x4_t (line 171) | inline float32x4_t sub(float32x4_t x, float32x4_t y) {
  function float32x4_t (line 174) | inline float32x4_t mul(float32x4_t x, float32x4_t y) {
  function float16x8_t (line 180) | inline float16x8_t add(float16x8_t x, float16x8_t y) {
  function float16x8_t (line 183) | inline float16x8_t sub(float16x8_t x, float16x8_t y) {
  function float16x8_t (line 186) | inline float16x8_t mul(float16x8_t x, float16x8_t y) {
  function U (line 198) | U madd(T a, T b, U c) {
  function U (line 210) | U madder(T a, T b, U c, U* e) {
  function float32x4_t (line 218) | inline float32x4_t badder(float32x4_t a, float b, float32x4_t c, float32...
  function __m256 (line 229) | inline __m256 madd(__m256 a, __m256 b, __m256 c) {
  function __m512 (line 235) | inline __m512 madd(__m512 a, __m512 b, __m512 c) {
  function float32x4_t (line 243) | inline float32x4_t madd(float32x4_t a, float32x4_t b, float32x4_t c) {
  function float16x8_t (line 249) | inline float16x8_t madd(float16x8_t a, float16x8_t b, float16x8_t c) {
  function __m512 (line 258) | inline __m512 madd(__m512bh x, __m512bh y, __m512 z) {
  function __m512 (line 262) | inline __m512 madder(__m512bh x, __m512bh y, __m512 z, __m512* _) {
  function hsum (line 271) | inline float hsum(float32x4_t x) {
  function hsum (line 277) | inline float hsum(float16x8_t x) {
  function hsum (line 284) | inline float hsum(__m128 x) {
  function hsum (line 300) | inline float hsum(__m256 x) {
  function hsum (line 306) | inline float hsum(__m512 x) {
  function load (line 318) | inline float load(const float* p) {
  function load (line 322) | inline float load(const ggml_fp16_t* p) {
  function load (line 326) | inline float load(const ggml_bf16_t* p) {
  function float32x4_t (line 332) | inline float32x4_t load(const float* p) {
  function float32x4_t (line 336) | inline float32x4_t load(const ggml_bf16_t* p) {
  function float16x8_t (line 341) | inline float16x8_t load(const ggml_fp16_t* p) {
  function float32x4_t (line 345) | inline float32x4_t load(const ggml_fp16_t* p) {
  function __m128 (line 353) | inline __m128 load(const float* p) {
  function __m256 (line 360) | inline __m256 load(const float* p) {
  function __m256 (line 367) | inline __m256 load(const ggml_bf16_t* p) {
  function __m256 (line 375) | inline __m256 load(const ggml_fp16_t* p) {
  function __m512 (line 382) | inline __m512 load(const float* p) {
  function __m512 (line 386) | inline __m512 load(const ggml_fp16_t* p) {
  function __m512 (line 390) | inline __m512 load(const ggml_bf16_t* p) {
  function __m512bh (line 398) | inline __m512bh load(const ggml_bf16_t* p) {
  function __m512bh (line 402) | inline __m512bh load(const float* p) {
  function store (line 410) | inline void store(float* p, float f) {
  function store (line 414) | inline void store(ggml_fp16_t* p, float f) {
  function store (line 418) | inline void store(ggml_bf16_t* p, float f) {
  function gemm (line 616) | void gemm(long m0, long m, long n0, long n) {
  function gemm (line 759) | void gemm(long m0, long m, long n0, long n) {
  function int8x16_t (line 797) | inline int8x16_t load_lo(const block_q8_0* b) {
  function int8x16_t (line 801) | inline int8x16_t load_hi(const block_q8_0* b) {
  function int8x16_t (line 805) | inline int8x16_t load_lo(const block_q4_0* b) {
  function int8x16_t (line 810) | inline int8x16_t load_hi(const block_q4_0* b) {
  function gemm (line 982) | void gemm(long m0, long m, long n0, long n) {
  function __m256i (line 1020) | inline __m256i load(const block_q8_0* b) {
  function __m256i (line 1024) | inline __m256i load(const block_q4_0* b) {
  function __m256 (line 1032) | inline __m256 updot(__m256i u, __m256i s) {

FILE: third_party/llamafile/tinyblas_cpu_mixmul_amd_avx.cpp
  function llamafile_mixmul_needs (line 13) | size_t llamafile_mixmul_needs(const ggml_tensor* weights, const ggml_ten...

FILE: third_party/llamafile/tinyblas_cpu_mixmul_arm80.cpp
  function llamafile_mixmul_needs (line 13) | size_t llamafile_mixmul_needs(const ggml_tensor* weights, const ggml_ten...

FILE: third_party/llamafile/tinyblas_cpu_unsupported.cpp
  function llamafile_sgemm_unsupported (line 25) | bool llamafile_sgemm_unsupported(long m, long n, long k, const void* A, ...
  function llamafile_mixmul_unsupported (line 29) | bool llamafile_mixmul_unsupported(const struct ggml_compute_params* params,
  function iqk_mul_mat_moe_unsupported (line 37) | bool iqk_mul_mat_moe_unsupported(long, long, long, int, int, const void*...