SYMBOL INDEX (580 symbols across 33 files) FILE: gpu/convert_checkpoint.py function convert_ts_checkpoint (line 15) | def convert_ts_checkpoint( FILE: gpu/convert_safetensors.py class ModelArgs (line 14) | class ModelArgs: method __post_init__ (line 26) | def __post_init__(self): method from_name (line 36) | def from_name(cls, name: str): function invert_convert_q (line 43) | def invert_convert_q(w: torch.Tensor, config: ModelArgs) -> torch.Tensor: function invert_convert_k (line 46) | def invert_convert_k(w: torch.Tensor, config: ModelArgs) -> torch.Tensor: function convert_back (line 49) | def convert_back( FILE: gpu/generate.py class GenArgs (line 27) | class GenArgs: class FastGen (line 37) | class FastGen: method build (line 42) | def build( method __init__ (line 79) | def __init__( method compile_prefill (line 102) | def compile_prefill(self): method compile_generate (line 159) | def compile_generate(self): method generate_all (line 217) | def generate_all( function get_prompts (line 307) | def get_prompts(interactive: bool) -> Iterable[list[str]]: function main (line 322) | def main(ckpt_dir: str, interactive: bool = False, chat_format: bool = F... FILE: gpu/model.py function bitnet_int8xint2_linear (line 21) | def bitnet_int8xint2_linear(input0, input1, s, ws): class ModelArgs (line 40) | class ModelArgs: class BitLinearKernel (line 54) | class BitLinearKernel(nn.Module): method __init__ (line 60) | def __init__(self, in_features: int, out_features: int, bias: bool = F... method quant_input (line 69) | def quant_input(self, input): method forward (line 73) | def forward(self, input): class BitLinear (line 77) | class BitLinear(nn.Linear): method quant_input (line 79) | def quant_input(self, input): method forward (line 83) | def forward(self, input): class Attention (line 87) | class Attention(nn.Module): method __init__ (line 88) | def __init__( method forward (line 121) | def forward( function squared_relu (line 165) | def squared_relu(x: torch.Tensor) -> torch.Tensor: class FeedForward (line 168) | class FeedForward(nn.Module): method __init__ (line 169) | def __init__( method forward (line 192) | def forward(self, x: torch.Tensor) -> torch.Tensor: class TransformerBlock (line 200) | class TransformerBlock(nn.Module): method __init__ (line 201) | def __init__(self, args: ModelArgs): method forward (line 231) | def forward( class Transformer (line 246) | class Transformer(nn.Module): method __init__ (line 247) | def __init__(self, args: ModelArgs): method forward_with_attn_bias (line 269) | def forward_with_attn_bias( method forward (line 283) | def forward( function make_cache (line 299) | def make_cache( function cache_prefix (line 346) | def cache_prefix(cache: list[LayerCache], length: int) -> list[LayerCache]: FILE: gpu/pack_weight.py function B_global_16x32_to_shared_load_16x32_layout (line 5) | def B_global_16x32_to_shared_load_16x32_layout(i, j): function permutate_weight_fastest (line 17) | def permutate_weight_fastest(weight): function compress_int2_to_int8 (line 46) | def compress_int2_to_int8(int2_weight): function interleave_weight_int8 (line 56) | def interleave_weight_int8(qweight, nbits=2):\ function convert_weight_int8_to_int2 (line 76) | def convert_weight_int8_to_int2(weight): FILE: gpu/sample_utils.py function top_p (line 9) | def top_p(probs: torch.Tensor, p: float) -> torch.Tensor: FILE: gpu/stats.py class PhaseStats (line 12) | class PhaseStats: method show (line 17) | def show(self) -> str: class Stats (line 27) | class Stats: method __init__ (line 32) | def __init__(self): method end_phase (line 36) | def end_phase(self, tokens: int, now: Optional[float] = None): method phase (line 50) | def phase(self, name: str, tokens: int = 0): FILE: gpu/test.py function bitnet_int8xint2_linear (line 15) | def bitnet_int8xint2_linear(input0, input1, s, ws, ret): FILE: gpu/tokenizer.py class Message (line 26) | class Message(TypedDict): class Tokenizer (line 34) | class Tokenizer: method __init__ (line 45) | def __init__(self, model_path: str): method encode (line 95) | def encode( method decode (line 158) | def decode(self, t: Sequence[int]) -> str: method _split_whitespaces_or_nonwhitespaces (line 172) | def _split_whitespaces_or_nonwhitespaces( class ChatFormat (line 197) | class ChatFormat: method __init__ (line 198) | def __init__(self, tokenizer: Tokenizer): method decode (line 202) | def decode(self, tokens: List[int]) -> str: method encode_header (line 209) | def encode_header(self, message: Message) -> List[int]: method encode_message (line 225) | def encode_message(self, message: Message, return_target=False) -> Lis... method encode_dialog_prompt (line 242) | def encode_dialog_prompt(self, dialog: Dialog, completion=False, retur... FILE: include/ggml-bitnet.h type float32_t (line 8) | typedef float32_t bitnet_float_type; type bitnet_float_type (line 10) | typedef float bitnet_float_type; type bitnet_tensor_extra (line 17) | struct bitnet_tensor_extra { type ggml_tensor (line 31) | struct ggml_tensor type ggml_tensor (line 31) | struct ggml_tensor type ggml_tensor (line 31) | struct ggml_tensor type ggml_tensor (line 32) | struct ggml_tensor type ggml_tensor (line 32) | struct ggml_tensor type ggml_tensor (line 32) | struct ggml_tensor type ggml_tensor (line 35) | struct ggml_tensor type ggml_type (line 36) | enum ggml_type FILE: preset_kernels/Llama3-8B-1.58-100B-tokens/bitnet-lut-kernels-tl1.h function aligned_free (line 16) | static void aligned_free(void * ptr) {{ function per_tensor_quant (line 24) | void per_tensor_quant(int k, void* lut_scales_, void* b_) {{ function partial_max_reset (line 53) | void partial_max_reset(void* lut_scales_) {{ function Transpose_8_8 (line 59) | inline void Transpose_8_8( function lut_ctor (line 96) | void lut_ctor(int8_t* qlut, bitnet_float_type* b, bitnet_float_type* lut... function is_type_supported (line 174) | static bool is_type_supported(enum ggml_type type) {{ function tbl_impl_14336_4096 (line 186) | inline void tbl_impl_14336_4096(int32_t* c, int8_t* lut, uint8_t* a) { function qgemm_lut_14336_4096 (line 303) | int32_t qgemm_lut_14336_4096(void* A, void* LUT, void* Scales, void* LUT... function tbl_impl_4096_14336 (line 320) | inline void tbl_impl_4096_14336(int32_t* c, int8_t* lut, uint8_t* a) { function qgemm_lut_4096_14336 (line 421) | int32_t qgemm_lut_4096_14336(void* A, void* LUT, void* Scales, void* LUT... function tbl_impl_1024_4096 (line 438) | inline void tbl_impl_1024_4096(int32_t* c, int8_t* lut, uint8_t* a) { function qgemm_lut_1024_4096 (line 555) | int32_t qgemm_lut_1024_4096(void* A, void* LUT, void* Scales, void* LUT_... function tbl_impl_4096_4096 (line 572) | inline void tbl_impl_4096_4096(int32_t* c, int8_t* lut, uint8_t* a) { function qgemm_lut_4096_4096 (line 673) | int32_t qgemm_lut_4096_4096(void* A, void* LUT, void* Scales, void* LUT_... function ggml_preprocessor (line 694) | void ggml_preprocessor(int m, int k, void* B, void* LUT_Scales, void* QL... function ggml_qgemm_lut (line 708) | void ggml_qgemm_lut(int m, int k, void* A, void* LUT, void* Scales, void... function ggml_bitnet_transform_tensor (line 723) | void ggml_bitnet_transform_tensor(struct ggml_tensor * tensor) { FILE: preset_kernels/Llama3-8B-1.58-100B-tokens/bitnet-lut-kernels-tl2.h function aligned_free (line 17) | static void aligned_free(void * ptr) { function _mm256_merge_epi32 (line 26) | inline void _mm256_merge_epi32(const __m256i v0, const __m256i v1, __m25... function _mm256_merge_epi64 (line 33) | inline void _mm256_merge_epi64(const __m256i v0, const __m256i v1, __m25... function _mm256_merge_si128 (line 40) | inline void _mm256_merge_si128(const __m256i v0, const __m256i v1, __m25... function Transpose_8_8 (line 45) | inline void Transpose_8_8( function per_tensor_quant (line 71) | inline int32_t per_tensor_quant(int k, void* lut_scales_, void* b_) { function partial_max_reset (line 90) | inline int32_t partial_max_reset(int32_t bs, void* lut_scales_) { function three_lut_ctor (line 99) | int32_t three_lut_ctor(int8_t* qlut, bitnet_float_type* b, bitnet_float_... function two_lut_ctor (line 185) | int32_t two_lut_ctor(int8_t* qlut, bitnet_float_type* b, bitnet_float_ty... function is_type_supported (line 261) | static bool is_type_supported(enum ggml_type type) { function three_tbl_impl_14336_4096 (line 274) | void three_tbl_impl_14336_4096(int32_t* c, int8_t* lut, uint8_t* a, uint... function two_tbl_impl14336_4096 (line 411) | int32_t two_tbl_impl14336_4096(int32_t* c, int8_t* lut, uint8_t* a) { function three_tbl_impl_4096_14336 (line 518) | void three_tbl_impl_4096_14336(int32_t* c, int8_t* lut, uint8_t* a, uint... function two_tbl_impl4096_14336 (line 655) | int32_t two_tbl_impl4096_14336(int32_t* c, int8_t* lut, uint8_t* a) { function three_tbl_impl_1024_4096 (line 762) | void three_tbl_impl_1024_4096(int32_t* c, int8_t* lut, uint8_t* a, uint8... function two_tbl_impl1024_4096 (line 899) | int32_t two_tbl_impl1024_4096(int32_t* c, int8_t* lut, uint8_t* a) { function three_tbl_impl_4096_4096 (line 1006) | void three_tbl_impl_4096_4096(int32_t* c, int8_t* lut, uint8_t* a, uint8... function two_tbl_impl4096_4096 (line 1143) | int32_t two_tbl_impl4096_4096(int32_t* c, int8_t* lut, uint8_t* a) { function ggml_preprocessor (line 1245) | void ggml_preprocessor(int bs, int m, int three_k, int two_k, void* B, v... function ggml_qgemm_lut (line 1276) | void ggml_qgemm_lut(int bs, int m, int k, int BK, void* A, void* sign, v... function ggml_bitnet_transform_tensor (line 1407) | void ggml_bitnet_transform_tensor(struct ggml_tensor * tensor) { FILE: preset_kernels/bitnet_b1_58-3B/bitnet-lut-kernels-tl1.h function aligned_free (line 16) | static void aligned_free(void * ptr) {{ function per_tensor_quant (line 24) | void per_tensor_quant(int k, void* lut_scales_, void* b_) {{ function partial_max_reset (line 53) | void partial_max_reset(void* lut_scales_) {{ function Transpose_8_8 (line 59) | inline void Transpose_8_8( function lut_ctor (line 96) | void lut_ctor(int8_t* qlut, bitnet_float_type* b, bitnet_float_type* lut... function is_type_supported (line 174) | static bool is_type_supported(enum ggml_type type) {{ function tbl_impl_3200_8640 (line 186) | inline void tbl_impl_3200_8640(int32_t* c, int8_t* lut, uint8_t* a) { function qgemm_lut_3200_8640 (line 287) | int32_t qgemm_lut_3200_8640(void* A, void* LUT, void* Scales, void* LUT_... function tbl_impl_3200_3200 (line 304) | inline void tbl_impl_3200_3200(int32_t* c, int8_t* lut, uint8_t* a) { function qgemm_lut_3200_3200 (line 421) | int32_t qgemm_lut_3200_3200(void* A, void* LUT, void* Scales, void* LUT_... function tbl_impl_8640_3200 (line 438) | inline void tbl_impl_8640_3200(int32_t* c, int8_t* lut, uint8_t* a) { function qgemm_lut_8640_3200 (line 539) | int32_t qgemm_lut_8640_3200(void* A, void* LUT, void* Scales, void* LUT_... function ggml_preprocessor (line 560) | void ggml_preprocessor(int m, int k, void* B, void* LUT_Scales, void* QL... function ggml_qgemm_lut (line 571) | void ggml_qgemm_lut(int m, int k, void* A, void* LUT, void* Scales, void... function ggml_bitnet_transform_tensor (line 583) | void ggml_bitnet_transform_tensor(struct ggml_tensor * tensor) { FILE: preset_kernels/bitnet_b1_58-3B/bitnet-lut-kernels-tl2.h function aligned_free (line 17) | static void aligned_free(void * ptr) { function _mm256_merge_epi32 (line 26) | inline void _mm256_merge_epi32(const __m256i v0, const __m256i v1, __m25... function _mm256_merge_epi64 (line 33) | inline void _mm256_merge_epi64(const __m256i v0, const __m256i v1, __m25... function _mm256_merge_si128 (line 40) | inline void _mm256_merge_si128(const __m256i v0, const __m256i v1, __m25... function Transpose_8_8 (line 45) | inline void Transpose_8_8( function per_tensor_quant (line 71) | inline int32_t per_tensor_quant(int k, void* lut_scales_, void* b_) { function partial_max_reset (line 90) | inline int32_t partial_max_reset(int32_t bs, void* lut_scales_) { function three_lut_ctor (line 99) | int32_t three_lut_ctor(int8_t* qlut, bitnet_float_type* b, bitnet_float_... function two_lut_ctor (line 185) | int32_t two_lut_ctor(int8_t* qlut, bitnet_float_type* b, bitnet_float_ty... function is_type_supported (line 261) | static bool is_type_supported(enum ggml_type type) { function three_tbl_impl_3200_8640 (line 274) | void three_tbl_impl_3200_8640(int32_t* c, int8_t* lut, uint8_t* a, uint8... function two_tbl_impl3200_8640 (line 411) | int32_t two_tbl_impl3200_8640(int32_t* c, int8_t* lut, uint8_t* a) { function three_tbl_impl_3200_3200 (line 518) | void three_tbl_impl_3200_3200(int32_t* c, int8_t* lut, uint8_t* a, uint8... function two_tbl_impl3200_3200 (line 655) | int32_t two_tbl_impl3200_3200(int32_t* c, int8_t* lut, uint8_t* a) { function three_tbl_impl_8640_3200 (line 762) | void three_tbl_impl_8640_3200(int32_t* c, int8_t* lut, uint8_t* a, uint8... function two_tbl_impl8640_3200 (line 899) | int32_t two_tbl_impl8640_3200(int32_t* c, int8_t* lut, uint8_t* a) { function ggml_preprocessor (line 1001) | void ggml_preprocessor(int bs, int m, int three_k, int two_k, void* B, v... function ggml_qgemm_lut (line 1025) | void ggml_qgemm_lut(int bs, int m, int k, int BK, void* A, void* sign, v... function ggml_bitnet_transform_tensor (line 1124) | void ggml_bitnet_transform_tensor(struct ggml_tensor * tensor) { FILE: preset_kernels/bitnet_b1_58-large/bitnet-lut-kernels-tl1.h function aligned_free (line 16) | static void aligned_free(void * ptr) {{ function per_tensor_quant (line 24) | void per_tensor_quant(int k, void* lut_scales_, void* b_) {{ function partial_max_reset (line 53) | void partial_max_reset(void* lut_scales_) {{ function Transpose_8_8 (line 59) | inline void Transpose_8_8( function lut_ctor (line 96) | void lut_ctor(int8_t* qlut, bitnet_float_type* b, bitnet_float_type* lut... function is_type_supported (line 174) | static bool is_type_supported(enum ggml_type type) {{ function tbl_impl_1536_4096 (line 186) | inline void tbl_impl_1536_4096(int32_t* c, int8_t* lut, uint8_t* a) { function qgemm_lut_1536_4096 (line 287) | int32_t qgemm_lut_1536_4096(void* A, void* LUT, void* Scales, void* LUT_... function tbl_impl_1536_1536 (line 304) | inline void tbl_impl_1536_1536(int32_t* c, int8_t* lut, uint8_t* a) { function qgemm_lut_1536_1536 (line 421) | int32_t qgemm_lut_1536_1536(void* A, void* LUT, void* Scales, void* LUT_... function tbl_impl_4096_1536 (line 438) | inline void tbl_impl_4096_1536(int32_t* c, int8_t* lut, uint8_t* a) { function qgemm_lut_4096_1536 (line 539) | int32_t qgemm_lut_4096_1536(void* A, void* LUT, void* Scales, void* LUT_... function ggml_preprocessor (line 560) | void ggml_preprocessor(int m, int k, void* B, void* LUT_Scales, void* QL... function ggml_qgemm_lut (line 571) | void ggml_qgemm_lut(int m, int k, void* A, void* LUT, void* Scales, void... function ggml_bitnet_transform_tensor (line 583) | void ggml_bitnet_transform_tensor(struct ggml_tensor * tensor) { FILE: preset_kernels/bitnet_b1_58-large/bitnet-lut-kernels-tl2.h function aligned_free (line 17) | static void aligned_free(void * ptr) { function _mm256_merge_epi32 (line 26) | inline void _mm256_merge_epi32(const __m256i v0, const __m256i v1, __m25... function _mm256_merge_epi64 (line 33) | inline void _mm256_merge_epi64(const __m256i v0, const __m256i v1, __m25... function _mm256_merge_si128 (line 40) | inline void _mm256_merge_si128(const __m256i v0, const __m256i v1, __m25... function Transpose_8_8 (line 45) | inline void Transpose_8_8( function per_tensor_quant (line 71) | inline int32_t per_tensor_quant(int k, void* lut_scales_, void* b_) { function partial_max_reset (line 90) | inline int32_t partial_max_reset(int32_t bs, void* lut_scales_) { function three_lut_ctor (line 99) | int32_t three_lut_ctor(int8_t* qlut, bitnet_float_type* b, bitnet_float_... function two_lut_ctor (line 185) | int32_t two_lut_ctor(int8_t* qlut, bitnet_float_type* b, bitnet_float_ty... function is_type_supported (line 261) | static bool is_type_supported(enum ggml_type type) { function three_tbl_impl_1536_4096 (line 274) | void three_tbl_impl_1536_4096(int32_t* c, int8_t* lut, uint8_t* a, uint8... function two_tbl_impl1536_4096 (line 411) | int32_t two_tbl_impl1536_4096(int32_t* c, int8_t* lut, uint8_t* a) { function three_tbl_impl_1536_1536 (line 518) | void three_tbl_impl_1536_1536(int32_t* c, int8_t* lut, uint8_t* a, uint8... function two_tbl_impl1536_1536 (line 655) | int32_t two_tbl_impl1536_1536(int32_t* c, int8_t* lut, uint8_t* a) { function three_tbl_impl_4096_1536 (line 762) | void three_tbl_impl_4096_1536(int32_t* c, int8_t* lut, uint8_t* a, uint8... function two_tbl_impl4096_1536 (line 899) | int32_t two_tbl_impl4096_1536(int32_t* c, int8_t* lut, uint8_t* a) { function ggml_preprocessor (line 1001) | void ggml_preprocessor(int bs, int m, int three_k, int two_k, void* B, v... function ggml_qgemm_lut (line 1025) | void ggml_qgemm_lut(int bs, int m, int k, int BK, void* A, void* sign, v... function ggml_bitnet_transform_tensor (line 1124) | void ggml_bitnet_transform_tensor(struct ggml_tensor * tensor) { FILE: run_inference.py function run_command (line 8) | def run_command(command, shell=False): function run_inference (line 16) | def run_inference(): function signal_handler (line 39) | def signal_handler(sig, frame): FILE: run_inference_server.py function run_command (line 8) | def run_command(command, shell=False): function run_server (line 16) | def run_server(): function signal_handler (line 46) | def signal_handler(sig, frame): FILE: setup_env.py function system_info (line 84) | def system_info(): function get_model_name (line 87) | def get_model_name(): function run_command (line 92) | def run_command(command, shell=False, log_step=None): function prepare_model (line 109) | def prepare_model(): function setup_gguf (line 152) | def setup_gguf(): function gen_code (line 156) | def gen_code(): function compile (line 203) | def compile(): function main (line 218) | def main(): function parse_args (line 224) | def parse_args(): function signal_handler (line 235) | def signal_handler(sig, frame): FILE: src/ggml-bitnet-lut.cpp function ggml_bitnet_init (line 14) | void ggml_bitnet_init(void) { function ggml_bitnet_free (line 31) | void ggml_bitnet_free(void) { function do_permutate (line 49) | static bool do_permutate(enum ggml_type type) { function ggml_bitnet_can_mul_mat (line 58) | bool ggml_bitnet_can_mul_mat(const struct ggml_tensor * src0, const stru... function ggml_bitnet_mul_mat_get_wsize (line 70) | size_t ggml_bitnet_mul_mat_get_wsize(const struct ggml_tensor * src0, co... function ggml_bitnet_get_type_bits (line 85) | int ggml_bitnet_get_type_bits(enum ggml_type type) { function ggml_bitnet_init (line 98) | void ggml_bitnet_init(void) { function ggml_bitnet_free (line 115) | void ggml_bitnet_free(void) { function ggml_bitnet_can_mul_mat (line 133) | bool ggml_bitnet_can_mul_mat(const struct ggml_tensor * src0, const stru... function ggml_bitnet_mul_mat_get_wsize (line 143) | size_t ggml_bitnet_mul_mat_get_wsize(const struct ggml_tensor * src0, co... function ggml_bitnet_get_type_bits (line 157) | int ggml_bitnet_get_type_bits(enum ggml_type type) { FILE: src/ggml-bitnet-mad.cpp function hsum_i32_8 (line 20) | static inline int hsum_i32_8(const __m256i a) { function hsum_i32_8 (line 29) | static inline int hsum_i32_8(const __m256i a) { function quantize_i2_s (line 51) | size_t quantize_i2_s(const float * src, void * dst, int64_t nrow, int64_... function ggml_vec_dot_i2_i8_s_1x1 (line 198) | void ggml_vec_dot_i2_i8_s_1x1(int n, float * s, size_t bs, const void * ... function ggml_vec_dot_i2_i8_s_1x4_32W (line 414) | void ggml_vec_dot_i2_i8_s_1x4_32W(int n, float * s, size_t bs, const voi... function ggml_vec_dot_i2_i8_s_1xN (line 512) | void ggml_vec_dot_i2_i8_s_1xN(int n, float * s, size_t bs, const void * ... function ggml_vec_dot_i2_i8_s_Nx1 (line 791) | void ggml_vec_dot_i2_i8_s_Nx1(int n, float * s, size_t bs, const void * ... function ggml_vec_dot_i2_i8_s (line 1043) | void ggml_vec_dot_i2_i8_s(int n, float * s, size_t bs, const void * vx, ... FILE: utils/codegen_tl1.py function gen_ctor_code (line 5) | def gen_ctor_code(): function gen_body_core_code (line 190) | def gen_body_core_code(bm, by): function gen_tbl_impl (line 224) | def gen_tbl_impl(pre, BM, BK, bm, k): function gen_top_api (line 285) | def gen_top_api(kernel_shapes): function gen_preprocess_code (line 310) | def gen_preprocess_code(): function gen_transform_code (line 321) | def gen_transform_code(kernel_shape): FILE: utils/codegen_tl2.py function gen_ctor_code (line 5) | def gen_ctor_code(): function gen_tbl_impl (line 279) | def gen_tbl_impl(pre, BM, BK, bm, k_list): function gen_top_api (line 532) | def gen_top_api(kernel_shapes, k_list): function gen_transform_code (line 626) | def gen_transform_code(kernel_shapes): function get_three_k_two_k (line 676) | def get_three_k_two_k(K, bk): FILE: utils/convert-helper-bitnet.py function run_command (line 9) | def run_command(command_list, cwd=None, check=True): function main (line 19) | def main(): FILE: utils/convert-hf-to-gguf-bitnet.py class SentencePieceTokenTypes (line 36) | class SentencePieceTokenTypes(IntEnum): class Model (line 48) | class Model(ABC): method __init__ (line 51) | def __init__(self, dir_model: Path, ftype: int, fname_out: Path, is_bi... method model_arch (line 68) | def model_arch(self) -> gguf.MODEL_ARCH: method find_hparam (line 71) | def find_hparam(self, keys: Sequence[str], optional: bool = False) -> ... method set_vocab (line 79) | def set_vocab(self): method get_tensors (line 82) | def get_tensors(self) -> Iterator[tuple[str, Tensor]]: method match_model_tensor_name (line 97) | def match_model_tensor_name(self, name: str, key: gguf.MODEL_TENSOR, b... method map_tensor_name (line 110) | def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".... method set_gguf_parameters (line 116) | def set_gguf_parameters(self): method write_tensors (line 159) | def write_tensors(self): method write (line 199) | def write(self): method write_vocab (line 206) | def write_vocab(self): method count_model_parts (line 212) | def count_model_parts(dir_model: Path, prefix: str) -> int: method load_hparams (line 221) | def load_hparams(dir_model): method register (line 226) | def register(cls, *names: str) -> Callable[[AnyModel], AnyModel]: method from_model_architecture (line 236) | def from_model_architecture(cls, arch): method _is_model_safetensors (line 242) | def _is_model_safetensors(self) -> bool: method _get_part_names (line 245) | def _get_part_names(self): method get_vocab_base (line 256) | def get_vocab_base(self) -> tuple[list[str], list[int], str]: method get_vocab_base_pre (line 291) | def get_vocab_base_pre(self, tokenizer) -> str: method _set_vocab_gpt2 (line 366) | def _set_vocab_gpt2(self) -> None: method _set_vocab_sentencepiece (line 376) | def _set_vocab_sentencepiece(self): method _set_vocab_llama_hf (line 441) | def _set_vocab_llama_hf(self): function process_tl1 (line 465) | def process_tl1(weight, BM, BY, bm, by, M, K): function preprocess_weights_tl1 (line 479) | def preprocess_weights_tl1( function preprocess_two_weights_tl2 (line 523) | def preprocess_two_weights_tl2(M, K, weight_num, BM, BY, bm, by, weight,... function preprocess_three_weights_tl2 (line 549) | def preprocess_three_weights_tl2(M, K, weight_num, BM, BY, bm, by, weigh... function preprocess_weights_tl2 (line 597) | def preprocess_weights_tl2( function transform_to_tl1 (line 662) | def transform_to_tl1(x: np.ndarray): function transform_to_tl2 (line 668) | def transform_to_tl2(x: np.ndarray): function read_model_config (line 675) | def read_model_config(model_dir: str) -> dict[str, Any]: class LlamaModel (line 683) | class LlamaModel(Model): method set_vocab (line 686) | def set_vocab(self): method write_tensors (line 708) | def write_tensors(self): method set_gguf_parameters (line 834) | def set_gguf_parameters(self): method permute (line 862) | def permute(weights: Tensor, n_head: int, n_head_kv: int | None): method modify_tensors (line 871) | def modify_tensors(self, data_torch: Tensor, name: str, bid: int | Non... method generate_extra_tensors (line 916) | def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]: method prepare_tensors (line 945) | def prepare_tensors(self): class BitnetModel (line 956) | class BitnetModel(Model): method set_vocab (line 959) | def set_vocab(self): method set_gguf_parameters (line 962) | def set_gguf_parameters(self): method weight_quant (line 970) | def weight_quant(self, weight): method modify_tensors (line 977) | def modify_tensors(self, data_torch: Tensor, name: str, bid: int | Non... method write_tensors (line 986) | def write_tensors(self): function parse_args (line 1095) | def parse_args() -> argparse.Namespace: function main (line 1126) | def main() -> None: FILE: utils/convert-ms-to-gguf-bitnet.py class DataType (line 58) | class DataType: method elements_to_bytes (line 63) | def elements_to_bytes(self, n_elements: int) -> int: class UnquantizedDataType (line 68) | class UnquantizedDataType(DataType): class QuantizedDataType (line 79) | class QuantizedDataType(DataType): method quantize (line 84) | def quantize(self, arr: NDArray) -> NDArray: method elements_to_bytes (line 87) | def elements_to_bytes(self, n_elements: int) -> int: class Q8_0QuantizedDataType (line 93) | class Q8_0QuantizedDataType(QuantizedDataType): method quantize (line 95) | def quantize(self, arr: NDArray) -> NDArray: class GGMLFileType (line 177) | class GGMLFileType(enum.IntEnum): method type_for_tensor (line 183) | def type_for_tensor(self, name: str, tensor: LazyTensor) -> DataType: class Params (line 208) | class Params: method guessed (line 232) | def guessed(model: LazyModel) -> Params: method loadHFTransformerJson (line 269) | def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params: method loadOriginalParamsJson (line 326) | def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Par... method load (line 372) | def load(model_plus: ModelPlus) -> Params: class BaseVocab (line 395) | class BaseVocab(Protocol): class NoVocab (line 400) | class NoVocab(BaseVocab): method __repr__ (line 404) | def __repr__(self) -> str: class Vocab (line 409) | class Vocab(BaseVocab, Protocol): method __init__ (line 415) | def __init__(self, base_path: Path): ... method all_tokens (line 416) | def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:... class BpeVocab (line 419) | class BpeVocab(Vocab): method __init__ (line 423) | def __init__(self, base_path: Path): method bpe_tokens (line 475) | def bpe_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: method added_tokens (line 481) | def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: method all_tokens (line 486) | def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: method __repr__ (line 490) | def __repr__(self) -> str: class SentencePieceVocab (line 494) | class SentencePieceVocab(Vocab): method __init__ (line 498) | def __init__(self, base_path: Path): method sentencepiece_tokens (line 528) | def sentencepiece_tokens(self) -> Iterable[tuple[bytes, float, gguf.To... method added_tokens (line 552) | def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: method all_tokens (line 557) | def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: method __repr__ (line 561) | def __repr__(self) -> str: class LlamaHfVocab (line 565) | class LlamaHfVocab(Vocab): method __init__ (line 569) | def __init__(self, base_path: Path): method hf_tokens (line 635) | def hf_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: method get_token_type (line 653) | def get_token_type(self, token_id: int, token_text: bytes, special_ids... method get_token_score (line 661) | def get_token_score(self, token_id: int) -> float: method added_tokens (line 666) | def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: method has_newline_token (line 677) | def has_newline_token(self): method all_tokens (line 680) | def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: method __repr__ (line 684) | def __repr__(self) -> str: function permute (line 694) | def permute(weights: NDArray, n_head: int, n_head_kv: int) -> NDArray: class Tensor (line 702) | class Tensor(ABC): method astype (line 707) | def astype(self, data_type: DataType) -> Self: ... method permute (line 709) | def permute(self, n_head: int, n_head_kv: int) -> Self: ... method permute_part (line 711) | def permute_part(self, n_part: int, n_head: int, n_head_kv: int) -> Se... method part (line 713) | def part(self, n_part: int) -> Self: ... method to_ggml (line 715) | def to_ggml(self) -> GGMLCompatibleTensor: ... function bf16_to_fp32 (line 718) | def bf16_to_fp32(bf16_arr: np.ndarray[Any, np.dtype[np.uint16]]) -> NDAr... function preprocess_weights (line 723) | def preprocess_weights( function transform_to_i2 (line 768) | def transform_to_i2(x : NDArray): class UnquantizedTensor (line 781) | class UnquantizedTensor(Tensor): method __init__ (line 782) | def __init__(self, ndarray: NDArray, i2_scale: NDArray = None): method astype (line 788) | def astype(self, data_type: DataType) -> UnquantizedTensor: method to_ggml (line 796) | def to_ggml(self) -> Self: method permute_part (line 799) | def permute_part(self, n_part: int, n_head: int, n_head_kv: int) -> Un... method part (line 803) | def part(self, n_part: int) -> UnquantizedTensor: method permute (line 807) | def permute(self, n_head: int, n_head_kv: int) -> UnquantizedTensor: function load_unquantized (line 811) | def load_unquantized(lazy_tensor: LazyTensor, expected_dtype: Any = None... class LazyTensor (line 831) | class LazyTensor: method load (line 837) | def load(self) -> Tensor: method astype (line 844) | def astype(self, data_type: DataType) -> LazyTensor: method validate_conversion_to (line 851) | def validate_conversion_to(self, data_type: DataType) -> None: class ModelPlus (line 860) | class ModelPlus: function merge_sharded (line 867) | def merge_sharded(models: list[LazyModel]) -> LazyModel: function merge_multifile_models (line 901) | def merge_multifile_models(models_plus: list[ModelPlus]) -> ModelPlus: function permute_lazy (line 924) | def permute_lazy(lazy_tensor: LazyTensor, n_head: int, n_head_kv: int) -... function permute_part_lazy (line 930) | def permute_part_lazy(lazy_tensor: LazyTensor, n_part: int, n_head: int,... function part_lazy (line 937) | def part_lazy(lazy_tensor: LazyTensor, n_part: int) -> LazyTensor: function forward_t (line 946) | def forward_t(x): function weight_quant (line 953) | def weight_quant(weight): function part_lazy_q (line 960) | def part_lazy_q(lazy_tensor: LazyTensor, n_part: int) -> LazyTensor: function part_lazy_k (line 968) | def part_lazy_k(lazy_tensor: LazyTensor, n_part: int) -> LazyTensor: function part_lazy_v (line 976) | def part_lazy_v(lazy_tensor: LazyTensor, n_part: int) -> LazyTensor: function part_lazy_w1 (line 986) | def part_lazy_w1(lazy_tensor: LazyTensor, n_part: int) -> LazyTensor: function part_lazy_w3 (line 995) | def part_lazy_w3(lazy_tensor: LazyTensor, n_part: int) -> LazyTensor: function part_lazy_rope (line 1004) | def part_lazy_rope(lazy_tensor: LazyTensor) -> LazyTensor: function part_lazy_weight_quant (line 1011) | def part_lazy_weight_quant(lazy_tensor: LazyTensor, name) -> LazyTensor: function pack_experts_lazy (line 1020) | def pack_experts_lazy(lazy_tensors: list[LazyTensor]) -> LazyTensor: function lazy_load_safetensors_file (line 1029) | def lazy_load_safetensors_file(fp: IO[bytes], path: Path) -> ModelPlus: function must_read (line 1053) | def must_read(fp: IO[bytes], length: int) -> bytes: function lazy_load_file (line 1061) | def lazy_load_file(path: Path) -> ModelPlus: function bounded_parallel_map (line 1076) | def bounded_parallel_map(func: Callable[[In], Out], iterable: Iterable[I... function check_vocab_size (line 1111) | def check_vocab_size(params: Params, vocab: BaseVocab, pad_vocab: bool =... class OutputFile (line 1146) | class OutputFile: method __init__ (line 1147) | def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian = gguf.G... method add_meta_arch (line 1150) | def add_meta_arch(self, params: Params) -> None: method extract_vocabulary_from_model (line 1193) | def extract_vocabulary_from_model(self, vocab: Vocab) -> tuple[list[by... method add_meta_vocab (line 1208) | def add_meta_vocab(self, vocab: Vocab) -> None: method add_meta_special_vocab (line 1219) | def add_meta_special_vocab(self, svocab: gguf.SpecialVocab) -> None: method add_tensor_info (line 1222) | def add_tensor_info(self, name: str, tensor: LazyTensor) -> None: method write_meta (line 1240) | def write_meta(self) -> None: method write_tensor_info (line 1244) | def write_tensor_info(self) -> None: method write_tensor_data (line 1247) | def write_tensor_data(self, ftype: GGMLFileType, model: LazyModel, con... method close (line 1279) | def close(self) -> None: method write_vocab_only (line 1283) | def write_vocab_only( method do_item (line 1301) | def do_item(item: tuple[str, LazyTensor]) -> tuple[DataType, NDArray]: method maybe_do_quantize (line 1307) | def maybe_do_quantize(item: tuple[DataType, NDArray]) -> NDArray: method write_all (line 1314) | def write_all( function pick_output_type (line 1347) | def pick_output_type(model: LazyModel, output_type_str: str | None) -> G... function convert_to_output_type (line 1364) | def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) ... function convert_model_names (line 1374) | def convert_model_names(model: LazyModel, params: Params, skip_unknown: ... function nth_multifile_path (line 1508) | def nth_multifile_path(path: Path, n: int) -> Path | None: function find_multifile_paths (line 1529) | def find_multifile_paths(path: Path) -> list[Path]: function load_some_model (line 1547) | def load_some_model(path: Path) -> ModelPlus: class VocabFactory (line 1570) | class VocabFactory: method __init__ (line 1573) | def __init__(self, path: Path): method _create_special_vocab (line 1576) | def _create_special_vocab(self, vocab: BaseVocab, model_parent_path: P... method _create_vocab_by_path (line 1586) | def _create_vocab_by_path(self, vocab_types: list[str]) -> Vocab: method load_vocab (line 1607) | def load_vocab(self, vocab_types: list[str] | None, model_parent_path:... function default_outfile (line 1621) | def default_outfile(model_paths: list[Path], file_type: GGMLFileType) ->... function do_dump_model (line 1637) | def do_dump_model(model_plus: ModelPlus) -> None: function main (line 1645) | def main(args_in: list[str] | None = None) -> None: FILE: utils/convert.py class DataType (line 58) | class DataType: method elements_to_bytes (line 63) | def elements_to_bytes(self, n_elements: int) -> int: class UnquantizedDataType (line 68) | class UnquantizedDataType(DataType): class QuantizedDataType (line 79) | class QuantizedDataType(DataType): method quantize (line 84) | def quantize(self, arr: NDArray) -> NDArray: method elements_to_bytes (line 87) | def elements_to_bytes(self, n_elements: int) -> int: class Q8_0QuantizedDataType (line 93) | class Q8_0QuantizedDataType(QuantizedDataType): method quantize (line 95) | def quantize(self, arr: NDArray) -> NDArray: class GGMLFileType (line 177) | class GGMLFileType(enum.IntEnum): method type_for_tensor (line 183) | def type_for_tensor(self, name: str, tensor: LazyTensor) -> DataType: class Params (line 208) | class Params: method guessed (line 232) | def guessed(model: LazyModel) -> Params: method loadHFTransformerJson (line 269) | def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params: method loadOriginalParamsJson (line 326) | def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Par... method load (line 372) | def load(model_plus: ModelPlus) -> Params: class BaseVocab (line 395) | class BaseVocab(Protocol): class NoVocab (line 400) | class NoVocab(BaseVocab): method __repr__ (line 404) | def __repr__(self) -> str: class Vocab (line 409) | class Vocab(BaseVocab, Protocol): method __init__ (line 415) | def __init__(self, base_path: Path): ... method all_tokens (line 416) | def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:... class BpeVocab (line 419) | class BpeVocab(Vocab): method __init__ (line 423) | def __init__(self, base_path: Path): method bpe_tokens (line 475) | def bpe_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: method added_tokens (line 481) | def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: method all_tokens (line 486) | def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: method __repr__ (line 490) | def __repr__(self) -> str: class SentencePieceVocab (line 494) | class SentencePieceVocab(Vocab): method __init__ (line 498) | def __init__(self, base_path: Path): method sentencepiece_tokens (line 528) | def sentencepiece_tokens(self) -> Iterable[tuple[bytes, float, gguf.To... method added_tokens (line 552) | def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: method all_tokens (line 557) | def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: method __repr__ (line 561) | def __repr__(self) -> str: class LlamaHfVocab (line 565) | class LlamaHfVocab(Vocab): method __init__ (line 569) | def __init__(self, base_path: Path): method hf_tokens (line 635) | def hf_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: method get_token_type (line 653) | def get_token_type(self, token_id: int, token_text: bytes, special_ids... method get_token_score (line 661) | def get_token_score(self, token_id: int) -> float: method added_tokens (line 666) | def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: method has_newline_token (line 677) | def has_newline_token(self): method all_tokens (line 680) | def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: method __repr__ (line 684) | def __repr__(self) -> str: function permute (line 694) | def permute(weights: NDArray, n_head: int, n_head_kv: int) -> NDArray: class Tensor (line 702) | class Tensor(ABC): method astype (line 707) | def astype(self, data_type: DataType) -> Self: ... method permute (line 709) | def permute(self, n_head: int, n_head_kv: int) -> Self: ... method permute_part (line 711) | def permute_part(self, n_part: int, n_head: int, n_head_kv: int) -> Se... method part (line 713) | def part(self, n_part: int) -> Self: ... method to_ggml (line 715) | def to_ggml(self) -> GGMLCompatibleTensor: ... function bf16_to_fp32 (line 718) | def bf16_to_fp32(bf16_arr: np.ndarray[Any, np.dtype[np.uint16]]) -> NDAr... function preprocess_weights (line 723) | def preprocess_weights( function transform_to_i2 (line 768) | def transform_to_i2(x : NDArray): class UnquantizedTensor (line 781) | class UnquantizedTensor(Tensor): method __init__ (line 782) | def __init__(self, ndarray: NDArray, i2_scale: NDArray = None): method astype (line 788) | def astype(self, data_type: DataType) -> UnquantizedTensor: method to_ggml (line 796) | def to_ggml(self) -> Self: method permute_part (line 799) | def permute_part(self, n_part: int, n_head: int, n_head_kv: int) -> Un... method part (line 803) | def part(self, n_part: int) -> UnquantizedTensor: method permute (line 807) | def permute(self, n_head: int, n_head_kv: int) -> UnquantizedTensor: function load_unquantized (line 811) | def load_unquantized(lazy_tensor: LazyTensor, expected_dtype: Any = None... class LazyTensor (line 831) | class LazyTensor: method load (line 837) | def load(self) -> Tensor: method astype (line 844) | def astype(self, data_type: DataType) -> LazyTensor: method validate_conversion_to (line 851) | def validate_conversion_to(self, data_type: DataType) -> None: class ModelPlus (line 860) | class ModelPlus: function merge_sharded (line 867) | def merge_sharded(models: list[LazyModel]) -> LazyModel: function merge_multifile_models (line 901) | def merge_multifile_models(models_plus: list[ModelPlus]) -> ModelPlus: function permute_lazy (line 924) | def permute_lazy(lazy_tensor: LazyTensor, n_head: int, n_head_kv: int) -... function permute_part_lazy (line 930) | def permute_part_lazy(lazy_tensor: LazyTensor, n_part: int, n_head: int,... function part_lazy (line 938) | def part_lazy(lazy_tensor: LazyTensor, n_part: int) -> LazyTensor: function pack_experts_lazy (line 946) | def pack_experts_lazy(lazy_tensors: list[LazyTensor]) -> LazyTensor: function lazy_load_safetensors_file (line 955) | def lazy_load_safetensors_file(fp: IO[bytes], path: Path) -> ModelPlus: function must_read (line 979) | def must_read(fp: IO[bytes], length: int) -> bytes: function lazy_load_file (line 987) | def lazy_load_file(path: Path) -> ModelPlus: function bounded_parallel_map (line 1002) | def bounded_parallel_map(func: Callable[[In], Out], iterable: Iterable[I... function check_vocab_size (line 1037) | def check_vocab_size(params: Params, vocab: BaseVocab, pad_vocab: bool =... class OutputFile (line 1072) | class OutputFile: method __init__ (line 1073) | def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian = gguf.G... method add_meta_arch (line 1076) | def add_meta_arch(self, params: Params) -> None: method extract_vocabulary_from_model (line 1123) | def extract_vocabulary_from_model(self, vocab: Vocab) -> tuple[list[by... method add_meta_vocab (line 1138) | def add_meta_vocab(self, vocab: Vocab) -> None: method add_meta_special_vocab (line 1150) | def add_meta_special_vocab(self, svocab: gguf.SpecialVocab) -> None: method add_tensor_info (line 1153) | def add_tensor_info(self, name: str, tensor: LazyTensor) -> None: method write_meta (line 1171) | def write_meta(self) -> None: method write_tensor_info (line 1175) | def write_tensor_info(self) -> None: method write_tensor_data (line 1178) | def write_tensor_data(self, ftype: GGMLFileType, model: LazyModel, con... method close (line 1210) | def close(self) -> None: method write_vocab_only (line 1214) | def write_vocab_only( method do_item (line 1232) | def do_item(item: tuple[str, LazyTensor]) -> tuple[DataType, NDArray]: method maybe_do_quantize (line 1238) | def maybe_do_quantize(item: tuple[DataType, NDArray]) -> NDArray: method write_all (line 1245) | def write_all( function pick_output_type (line 1275) | def pick_output_type(model: LazyModel, output_type_str: str | None) -> G... function convert_to_output_type (line 1292) | def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) ... function convert_model_names (line 1302) | def convert_model_names(model: LazyModel, params: Params, skip_unknown: ... function nth_multifile_path (line 1363) | def nth_multifile_path(path: Path, n: int) -> Path | None: function find_multifile_paths (line 1384) | def find_multifile_paths(path: Path) -> list[Path]: function load_some_model (line 1402) | def load_some_model(path: Path) -> ModelPlus: class VocabFactory (line 1425) | class VocabFactory: method __init__ (line 1428) | def __init__(self, path: Path): method _create_special_vocab (line 1431) | def _create_special_vocab(self, vocab: BaseVocab, model_parent_path: P... method _create_vocab_by_path (line 1441) | def _create_vocab_by_path(self, vocab_types: list[str]) -> Vocab: method load_vocab (line 1462) | def load_vocab(self, vocab_types: list[str] | None, model_parent_path:... function default_outfile (line 1476) | def default_outfile(model_paths: list[Path], file_type: GGMLFileType) ->... function do_dump_model (line 1492) | def do_dump_model(model_plus: ModelPlus) -> None: function main (line 1500) | def main(args_in: list[str] | None = None) -> None: FILE: utils/e2e_benchmark.py function run_command (line 8) | def run_command(command, shell=False, log_step=None): function run_benchmark (line 25) | def run_benchmark(): function parse_args (line 48) | def parse_args(): FILE: utils/generate-dummy-bitnet-model.py class SentencePieceTokenTypes (line 108) | class SentencePieceTokenTypes(IntEnum): class Model (line 120) | class Model(ABC): method __init__ (line 123) | def __init__(self, dir_model: Path, ftype: int, fname_out: Path, is_bi... method model_arch (line 140) | def model_arch(self) -> gguf.MODEL_ARCH: method find_hparam (line 143) | def find_hparam(self, keys: Sequence[str], optional: bool = False) -> ... method set_vocab (line 151) | def set_vocab(self): method get_tensors (line 154) | def get_tensors(self) -> Iterator[tuple[str, Tensor]]: method match_model_tensor_name (line 169) | def match_model_tensor_name(self, name: str, key: gguf.MODEL_TENSOR, b... method map_tensor_name (line 182) | def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".... method set_gguf_parameters (line 188) | def set_gguf_parameters(self): method write_tensors (line 231) | def write_tensors(self): method write (line 271) | def write(self): method write_vocab (line 278) | def write_vocab(self): method count_model_parts (line 284) | def count_model_parts(dir_model: Path, prefix: str) -> int: method load_hparams (line 293) | def load_hparams(dir_model): method register (line 298) | def register(cls, *names: str) -> Callable[[AnyModel], AnyModel]: method from_model_architecture (line 308) | def from_model_architecture(cls, arch): method _is_model_safetensors (line 314) | def _is_model_safetensors(self) -> bool: method _get_part_names (line 317) | def _get_part_names(self): method get_vocab_base (line 328) | def get_vocab_base(self) -> tuple[list[str], list[int], str]: method get_vocab_base_pre (line 361) | def get_vocab_base_pre(self, tokenizer) -> str: method _set_vocab_sentencepiece (line 431) | def _set_vocab_sentencepiece(self): function process_tl1 (line 498) | def process_tl1(weight, BM, BY, bm, by, M, K): function preprocess_weights_tl1 (line 528) | def preprocess_weights_tl1( function preprocess_two_weights_tl2 (line 577) | def preprocess_two_weights_tl2(M, K, weight_num, BM, BY, bm, by, weight,... function preprocess_three_weights_tl2 (line 620) | def preprocess_three_weights_tl2(M, K, weight_num, BM, BY, bm, by, weigh... function preprocess_weights_tl2 (line 699) | def preprocess_weights_tl2( class BitnetModel (line 777) | class BitnetModel(Model): method set_params (line 781) | def set_params(self, params: str): method set_vocab (line 793) | def set_vocab(self): method set_gguf_parameters (line 796) | def set_gguf_parameters(self): method weight_quant (line 804) | def weight_quant(self, weight): method transform_to_tl1 (line 811) | def transform_to_tl1(self, x: np.ndarray): method transform_to_tl2 (line 817) | def transform_to_tl2(self, x: np.ndarray): method generate_tensors (line 824) | def generate_tensors(self) -> Iterator[tuple[str, np.ndarray]]: method modify_tensors (line 852) | def modify_tensors(self, data_torch: Tensor, name: str, bid: int | Non... method write_tensors (line 861) | def write_tensors(self): function main (line 963) | def main() -> None: function read_gguf_file (line 990) | def read_gguf_file(gguf_file_path): function parse_args (line 1019) | def parse_args() -> argparse.Namespace: FILE: utils/preprocess-huggingface-bitnet.py function quant_weight_fp16 (line 5) | def quant_weight_fp16(weight): function quant_model (line 11) | def quant_model(input, output): FILE: utils/quantize_embeddings.py class EmbeddingQuantizer (line 17) | class EmbeddingQuantizer: method __init__ (line 18) | def __init__(self, input_model, output_dir, quantize_bin="../build/bin... method quantize (line 46) | def quantize(self, embedding_type, output_suffix): method benchmark_model (line 127) | def benchmark_model(self, output_suffix): method parse_benchmark_output (line 187) | def parse_benchmark_output(self, output, output_suffix): method cleanup_model (line 256) | def cleanup_model(self, output_suffix): method run_all_quantizations (line 275) | def run_all_quantizations(self, types_to_quantize): method save_results_to_csv (line 329) | def save_results_to_csv(self): method print_summary (line 370) | def print_summary(self, total_duration): function main (line 394) | def main(): FILE: utils/test_perplexity.py class PerplexityTester (line 20) | class PerplexityTester: method __init__ (line 21) | def __init__(self, model_path, llama_perplexity_bin="../build/bin/llam... method find_datasets (line 63) | def find_datasets(self): method create_quick_dataset (line 91) | def create_quick_dataset(self, dataset_path, num_chars=4096): method cleanup_temp_files (line 107) | def cleanup_temp_files(self): method run_perplexity_test (line 116) | def run_perplexity_test(self, dataset_name, dataset_path, threads=16, ... method parse_perplexity (line 207) | def parse_perplexity(self, output): method quantize_embedding (line 241) | def quantize_embedding(self, embedding_type, output_suffix): method cleanup_model (line 314) | def cleanup_model(self, model_path): method run_all_tests (line 326) | def run_all_tests(self, threads=16, ctx_size=512): method save_results (line 441) | def save_results(self): method print_summary (line 490) | def print_summary(self, total_time): function main (line 539) | def main(): FILE: utils/tune_gemm_config.py class GemmTuner (line 18) | class GemmTuner: method __init__ (line 19) | def __init__(self, config_path, model_path, threads=16): method backup_config (line 27) | def backup_config(self): method restore_config (line 32) | def restore_config(self): method generate_config (line 37) | def generate_config(self, act_parallel, row_block_size, col_block_size... method rebuild_project (line 52) | def rebuild_project(self): method run_benchmark (line 66) | def run_benchmark(self): method parse_throughput (line 93) | def parse_throughput(self, output): method test_configuration (line 110) | def test_configuration(self, act_parallel, row_block_size, col_block_s... method save_results (line 153) | def save_results(self, csv_path): method find_best_config (line 166) | def find_best_config(self): method run_tuning (line 175) | def run_tuning(self, configurations, output_csv=None): function generate_configurations (line 262) | def generate_configurations(): function main (line 296) | def main():