SYMBOL INDEX (580 symbols across 33 files)

FILE: gpu/convert_checkpoint.py
  function convert_ts_checkpoint (line 15) | def convert_ts_checkpoint(

FILE: gpu/convert_safetensors.py
  class ModelArgs (line 14) | class ModelArgs:
    method __post_init__ (line 26) | def __post_init__(self):
    method from_name (line 36) | def from_name(cls, name: str):
  function invert_convert_q (line 43) | def invert_convert_q(w: torch.Tensor, config: ModelArgs) -> torch.Tensor:
  function invert_convert_k (line 46) | def invert_convert_k(w: torch.Tensor, config: ModelArgs) -> torch.Tensor:
  function convert_back (line 49) | def convert_back(

FILE: gpu/generate.py
  class GenArgs (line 27) | class GenArgs:
  class FastGen (line 37) | class FastGen:
    method build (line 42) | def build(
    method __init__ (line 79) | def __init__(
    method compile_prefill (line 102) | def compile_prefill(self):
    method compile_generate (line 159) | def compile_generate(self):
    method generate_all (line 217) | def generate_all(
  function get_prompts (line 307) | def get_prompts(interactive: bool) -> Iterable[list[str]]:
  function main (line 322) | def main(ckpt_dir: str, interactive: bool = False, chat_format: bool = F...

FILE: gpu/model.py
  function bitnet_int8xint2_linear (line 21) | def bitnet_int8xint2_linear(input0, input1, s, ws):
  class ModelArgs (line 40) | class ModelArgs:
  class BitLinearKernel (line 54) | class BitLinearKernel(nn.Module):
    method __init__ (line 60) | def __init__(self, in_features: int, out_features: int, bias: bool = F...
    method quant_input (line 69) | def quant_input(self, input):
    method forward (line 73) | def forward(self, input):
  class BitLinear (line 77) | class BitLinear(nn.Linear):
    method quant_input (line 79) | def quant_input(self, input):
    method forward (line 83) | def forward(self, input):
  class Attention (line 87) | class Attention(nn.Module):
    method __init__ (line 88) | def __init__(
    method forward (line 121) | def forward(
  function squared_relu (line 165) | def squared_relu(x: torch.Tensor) -> torch.Tensor:
  class FeedForward (line 168) | class FeedForward(nn.Module):
    method __init__ (line 169) | def __init__(
    method forward (line 192) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class TransformerBlock (line 200) | class TransformerBlock(nn.Module):
    method __init__ (line 201) | def __init__(self, args: ModelArgs):
    method forward (line 231) | def forward(
  class Transformer (line 246) | class Transformer(nn.Module):
    method __init__ (line 247) | def __init__(self, args: ModelArgs):
    method forward_with_attn_bias (line 269) | def forward_with_attn_bias(
    method forward (line 283) | def forward(
  function make_cache (line 299) | def make_cache(
  function cache_prefix (line 346) | def cache_prefix(cache: list[LayerCache], length: int) -> list[LayerCache]:

FILE: gpu/pack_weight.py
  function B_global_16x32_to_shared_load_16x32_layout (line 5) | def B_global_16x32_to_shared_load_16x32_layout(i, j):
  function permutate_weight_fastest (line 17) | def permutate_weight_fastest(weight):
  function compress_int2_to_int8 (line 46) | def compress_int2_to_int8(int2_weight):
  function interleave_weight_int8 (line 56) | def interleave_weight_int8(qweight, nbits=2):\
  function convert_weight_int8_to_int2 (line 76) | def convert_weight_int8_to_int2(weight):

FILE: gpu/sample_utils.py
  function top_p (line 9) | def top_p(probs: torch.Tensor, p: float) -> torch.Tensor:

FILE: gpu/stats.py
  class PhaseStats (line 12) | class PhaseStats:
    method show (line 17) | def show(self) -> str:
  class Stats (line 27) | class Stats:
    method __init__ (line 32) | def __init__(self):
    method end_phase (line 36) | def end_phase(self, tokens: int, now: Optional[float] = None):
    method phase (line 50) | def phase(self, name: str, tokens: int = 0):

FILE: gpu/test.py
  function bitnet_int8xint2_linear (line 15) | def bitnet_int8xint2_linear(input0, input1, s, ws, ret):

FILE: gpu/tokenizer.py
  class Message (line 26) | class Message(TypedDict):
  class Tokenizer (line 34) | class Tokenizer:
    method __init__ (line 45) | def __init__(self, model_path: str):
    method encode (line 95) | def encode(
    method decode (line 158) | def decode(self, t: Sequence[int]) -> str:
    method _split_whitespaces_or_nonwhitespaces (line 172) | def _split_whitespaces_or_nonwhitespaces(
  class ChatFormat (line 197) | class ChatFormat:
    method __init__ (line 198) | def __init__(self, tokenizer: Tokenizer):
    method decode (line 202) | def decode(self, tokens: List[int]) -> str:
    method encode_header (line 209) | def encode_header(self, message: Message) -> List[int]:
    method encode_message (line 225) | def encode_message(self, message: Message, return_target=False) -> Lis...
    method encode_dialog_prompt (line 242) | def encode_dialog_prompt(self, dialog: Dialog, completion=False, retur...

FILE: include/ggml-bitnet.h
  type float32_t (line 8) | typedef float32_t bitnet_float_type;
  type bitnet_float_type (line 10) | typedef float bitnet_float_type;
  type bitnet_tensor_extra (line 17) | struct bitnet_tensor_extra {
  type ggml_tensor (line 31) | struct ggml_tensor
  type ggml_tensor (line 31) | struct ggml_tensor
  type ggml_tensor (line 31) | struct ggml_tensor
  type ggml_tensor (line 32) | struct ggml_tensor
  type ggml_tensor (line 32) | struct ggml_tensor
  type ggml_tensor (line 32) | struct ggml_tensor
  type ggml_tensor (line 35) | struct ggml_tensor
  type ggml_type (line 36) | enum ggml_type

FILE: preset_kernels/Llama3-8B-1.58-100B-tokens/bitnet-lut-kernels-tl1.h
  function aligned_free (line 16) | static void aligned_free(void * ptr) {{
  function per_tensor_quant (line 24) | void per_tensor_quant(int k, void* lut_scales_, void* b_) {{
  function partial_max_reset (line 53) | void partial_max_reset(void* lut_scales_) {{
  function Transpose_8_8 (line 59) | inline void Transpose_8_8(
  function lut_ctor (line 96) | void lut_ctor(int8_t* qlut, bitnet_float_type* b, bitnet_float_type* lut...
  function is_type_supported (line 174) | static bool is_type_supported(enum ggml_type type) {{
  function tbl_impl_14336_4096 (line 186) | inline void tbl_impl_14336_4096(int32_t* c, int8_t* lut, uint8_t* a) {
  function qgemm_lut_14336_4096 (line 303) | int32_t qgemm_lut_14336_4096(void* A, void* LUT, void* Scales, void* LUT...
  function tbl_impl_4096_14336 (line 320) | inline void tbl_impl_4096_14336(int32_t* c, int8_t* lut, uint8_t* a) {
  function qgemm_lut_4096_14336 (line 421) | int32_t qgemm_lut_4096_14336(void* A, void* LUT, void* Scales, void* LUT...
  function tbl_impl_1024_4096 (line 438) | inline void tbl_impl_1024_4096(int32_t* c, int8_t* lut, uint8_t* a) {
  function qgemm_lut_1024_4096 (line 555) | int32_t qgemm_lut_1024_4096(void* A, void* LUT, void* Scales, void* LUT_...
  function tbl_impl_4096_4096 (line 572) | inline void tbl_impl_4096_4096(int32_t* c, int8_t* lut, uint8_t* a) {
  function qgemm_lut_4096_4096 (line 673) | int32_t qgemm_lut_4096_4096(void* A, void* LUT, void* Scales, void* LUT_...
  function ggml_preprocessor (line 694) | void ggml_preprocessor(int m, int k, void* B, void* LUT_Scales, void* QL...
  function ggml_qgemm_lut (line 708) | void ggml_qgemm_lut(int m, int k, void* A, void* LUT, void* Scales, void...
  function ggml_bitnet_transform_tensor (line 723) | void ggml_bitnet_transform_tensor(struct ggml_tensor * tensor) {

FILE: preset_kernels/Llama3-8B-1.58-100B-tokens/bitnet-lut-kernels-tl2.h
  function aligned_free (line 17) | static void aligned_free(void * ptr) {
  function _mm256_merge_epi32 (line 26) | inline void _mm256_merge_epi32(const __m256i v0, const __m256i v1, __m25...
  function _mm256_merge_epi64 (line 33) | inline void _mm256_merge_epi64(const __m256i v0, const __m256i v1, __m25...
  function _mm256_merge_si128 (line 40) | inline void _mm256_merge_si128(const __m256i v0, const __m256i v1, __m25...
  function Transpose_8_8 (line 45) | inline void Transpose_8_8(
  function per_tensor_quant (line 71) | inline int32_t per_tensor_quant(int k, void* lut_scales_, void* b_) {
  function partial_max_reset (line 90) | inline int32_t partial_max_reset(int32_t bs, void* lut_scales_) {
  function three_lut_ctor (line 99) | int32_t three_lut_ctor(int8_t* qlut, bitnet_float_type* b, bitnet_float_...
  function two_lut_ctor (line 185) | int32_t two_lut_ctor(int8_t* qlut, bitnet_float_type* b, bitnet_float_ty...
  function is_type_supported (line 261) | static bool is_type_supported(enum ggml_type type) {
  function three_tbl_impl_14336_4096 (line 274) | void three_tbl_impl_14336_4096(int32_t* c, int8_t* lut, uint8_t* a, uint...
  function two_tbl_impl14336_4096 (line 411) | int32_t two_tbl_impl14336_4096(int32_t* c, int8_t* lut, uint8_t* a) {
  function three_tbl_impl_4096_14336 (line 518) | void three_tbl_impl_4096_14336(int32_t* c, int8_t* lut, uint8_t* a, uint...
  function two_tbl_impl4096_14336 (line 655) | int32_t two_tbl_impl4096_14336(int32_t* c, int8_t* lut, uint8_t* a) {
  function three_tbl_impl_1024_4096 (line 762) | void three_tbl_impl_1024_4096(int32_t* c, int8_t* lut, uint8_t* a, uint8...
  function two_tbl_impl1024_4096 (line 899) | int32_t two_tbl_impl1024_4096(int32_t* c, int8_t* lut, uint8_t* a) {
  function three_tbl_impl_4096_4096 (line 1006) | void three_tbl_impl_4096_4096(int32_t* c, int8_t* lut, uint8_t* a, uint8...
  function two_tbl_impl4096_4096 (line 1143) | int32_t two_tbl_impl4096_4096(int32_t* c, int8_t* lut, uint8_t* a) {
  function ggml_preprocessor (line 1245) | void ggml_preprocessor(int bs, int m, int three_k, int two_k, void* B, v...
  function ggml_qgemm_lut (line 1276) | void ggml_qgemm_lut(int bs, int m, int k, int BK, void* A, void* sign, v...
  function ggml_bitnet_transform_tensor (line 1407) | void ggml_bitnet_transform_tensor(struct ggml_tensor * tensor) {

FILE: preset_kernels/bitnet_b1_58-3B/bitnet-lut-kernels-tl1.h
  function aligned_free (line 16) | static void aligned_free(void * ptr) {{
  function per_tensor_quant (line 24) | void per_tensor_quant(int k, void* lut_scales_, void* b_) {{
  function partial_max_reset (line 53) | void partial_max_reset(void* lut_scales_) {{
  function Transpose_8_8 (line 59) | inline void Transpose_8_8(
  function lut_ctor (line 96) | void lut_ctor(int8_t* qlut, bitnet_float_type* b, bitnet_float_type* lut...
  function is_type_supported (line 174) | static bool is_type_supported(enum ggml_type type) {{
  function tbl_impl_3200_8640 (line 186) | inline void tbl_impl_3200_8640(int32_t* c, int8_t* lut, uint8_t* a) {
  function qgemm_lut_3200_8640 (line 287) | int32_t qgemm_lut_3200_8640(void* A, void* LUT, void* Scales, void* LUT_...
  function tbl_impl_3200_3200 (line 304) | inline void tbl_impl_3200_3200(int32_t* c, int8_t* lut, uint8_t* a) {
  function qgemm_lut_3200_3200 (line 421) | int32_t qgemm_lut_3200_3200(void* A, void* LUT, void* Scales, void* LUT_...
  function tbl_impl_8640_3200 (line 438) | inline void tbl_impl_8640_3200(int32_t* c, int8_t* lut, uint8_t* a) {
  function qgemm_lut_8640_3200 (line 539) | int32_t qgemm_lut_8640_3200(void* A, void* LUT, void* Scales, void* LUT_...
  function ggml_preprocessor (line 560) | void ggml_preprocessor(int m, int k, void* B, void* LUT_Scales, void* QL...
  function ggml_qgemm_lut (line 571) | void ggml_qgemm_lut(int m, int k, void* A, void* LUT, void* Scales, void...
  function ggml_bitnet_transform_tensor (line 583) | void ggml_bitnet_transform_tensor(struct ggml_tensor * tensor) {

FILE: preset_kernels/bitnet_b1_58-3B/bitnet-lut-kernels-tl2.h
  function aligned_free (line 17) | static void aligned_free(void * ptr) {
  function _mm256_merge_epi32 (line 26) | inline void _mm256_merge_epi32(const __m256i v0, const __m256i v1, __m25...
  function _mm256_merge_epi64 (line 33) | inline void _mm256_merge_epi64(const __m256i v0, const __m256i v1, __m25...
  function _mm256_merge_si128 (line 40) | inline void _mm256_merge_si128(const __m256i v0, const __m256i v1, __m25...
  function Transpose_8_8 (line 45) | inline void Transpose_8_8(
  function per_tensor_quant (line 71) | inline int32_t per_tensor_quant(int k, void* lut_scales_, void* b_) {
  function partial_max_reset (line 90) | inline int32_t partial_max_reset(int32_t bs, void* lut_scales_) {
  function three_lut_ctor (line 99) | int32_t three_lut_ctor(int8_t* qlut, bitnet_float_type* b, bitnet_float_...
  function two_lut_ctor (line 185) | int32_t two_lut_ctor(int8_t* qlut, bitnet_float_type* b, bitnet_float_ty...
  function is_type_supported (line 261) | static bool is_type_supported(enum ggml_type type) {
  function three_tbl_impl_3200_8640 (line 274) | void three_tbl_impl_3200_8640(int32_t* c, int8_t* lut, uint8_t* a, uint8...
  function two_tbl_impl3200_8640 (line 411) | int32_t two_tbl_impl3200_8640(int32_t* c, int8_t* lut, uint8_t* a) {
  function three_tbl_impl_3200_3200 (line 518) | void three_tbl_impl_3200_3200(int32_t* c, int8_t* lut, uint8_t* a, uint8...
  function two_tbl_impl3200_3200 (line 655) | int32_t two_tbl_impl3200_3200(int32_t* c, int8_t* lut, uint8_t* a) {
  function three_tbl_impl_8640_3200 (line 762) | void three_tbl_impl_8640_3200(int32_t* c, int8_t* lut, uint8_t* a, uint8...
  function two_tbl_impl8640_3200 (line 899) | int32_t two_tbl_impl8640_3200(int32_t* c, int8_t* lut, uint8_t* a) {
  function ggml_preprocessor (line 1001) | void ggml_preprocessor(int bs, int m, int three_k, int two_k, void* B, v...
  function ggml_qgemm_lut (line 1025) | void ggml_qgemm_lut(int bs, int m, int k, int BK, void* A, void* sign, v...
  function ggml_bitnet_transform_tensor (line 1124) | void ggml_bitnet_transform_tensor(struct ggml_tensor * tensor) {

FILE: preset_kernels/bitnet_b1_58-large/bitnet-lut-kernels-tl1.h
  function aligned_free (line 16) | static void aligned_free(void * ptr) {{
  function per_tensor_quant (line 24) | void per_tensor_quant(int k, void* lut_scales_, void* b_) {{
  function partial_max_reset (line 53) | void partial_max_reset(void* lut_scales_) {{
  function Transpose_8_8 (line 59) | inline void Transpose_8_8(
  function lut_ctor (line 96) | void lut_ctor(int8_t* qlut, bitnet_float_type* b, bitnet_float_type* lut...
  function is_type_supported (line 174) | static bool is_type_supported(enum ggml_type type) {{
  function tbl_impl_1536_4096 (line 186) | inline void tbl_impl_1536_4096(int32_t* c, int8_t* lut, uint8_t* a) {
  function qgemm_lut_1536_4096 (line 287) | int32_t qgemm_lut_1536_4096(void* A, void* LUT, void* Scales, void* LUT_...
  function tbl_impl_1536_1536 (line 304) | inline void tbl_impl_1536_1536(int32_t* c, int8_t* lut, uint8_t* a) {
  function qgemm_lut_1536_1536 (line 421) | int32_t qgemm_lut_1536_1536(void* A, void* LUT, void* Scales, void* LUT_...
  function tbl_impl_4096_1536 (line 438) | inline void tbl_impl_4096_1536(int32_t* c, int8_t* lut, uint8_t* a) {
  function qgemm_lut_4096_1536 (line 539) | int32_t qgemm_lut_4096_1536(void* A, void* LUT, void* Scales, void* LUT_...
  function ggml_preprocessor (line 560) | void ggml_preprocessor(int m, int k, void* B, void* LUT_Scales, void* QL...
  function ggml_qgemm_lut (line 571) | void ggml_qgemm_lut(int m, int k, void* A, void* LUT, void* Scales, void...
  function ggml_bitnet_transform_tensor (line 583) | void ggml_bitnet_transform_tensor(struct ggml_tensor * tensor) {

FILE: preset_kernels/bitnet_b1_58-large/bitnet-lut-kernels-tl2.h
  function aligned_free (line 17) | static void aligned_free(void * ptr) {
  function _mm256_merge_epi32 (line 26) | inline void _mm256_merge_epi32(const __m256i v0, const __m256i v1, __m25...
  function _mm256_merge_epi64 (line 33) | inline void _mm256_merge_epi64(const __m256i v0, const __m256i v1, __m25...
  function _mm256_merge_si128 (line 40) | inline void _mm256_merge_si128(const __m256i v0, const __m256i v1, __m25...
  function Transpose_8_8 (line 45) | inline void Transpose_8_8(
  function per_tensor_quant (line 71) | inline int32_t per_tensor_quant(int k, void* lut_scales_, void* b_) {
  function partial_max_reset (line 90) | inline int32_t partial_max_reset(int32_t bs, void* lut_scales_) {
  function three_lut_ctor (line 99) | int32_t three_lut_ctor(int8_t* qlut, bitnet_float_type* b, bitnet_float_...
  function two_lut_ctor (line 185) | int32_t two_lut_ctor(int8_t* qlut, bitnet_float_type* b, bitnet_float_ty...
  function is_type_supported (line 261) | static bool is_type_supported(enum ggml_type type) {
  function three_tbl_impl_1536_4096 (line 274) | void three_tbl_impl_1536_4096(int32_t* c, int8_t* lut, uint8_t* a, uint8...
  function two_tbl_impl1536_4096 (line 411) | int32_t two_tbl_impl1536_4096(int32_t* c, int8_t* lut, uint8_t* a) {
  function three_tbl_impl_1536_1536 (line 518) | void three_tbl_impl_1536_1536(int32_t* c, int8_t* lut, uint8_t* a, uint8...
  function two_tbl_impl1536_1536 (line 655) | int32_t two_tbl_impl1536_1536(int32_t* c, int8_t* lut, uint8_t* a) {
  function three_tbl_impl_4096_1536 (line 762) | void three_tbl_impl_4096_1536(int32_t* c, int8_t* lut, uint8_t* a, uint8...
  function two_tbl_impl4096_1536 (line 899) | int32_t two_tbl_impl4096_1536(int32_t* c, int8_t* lut, uint8_t* a) {
  function ggml_preprocessor (line 1001) | void ggml_preprocessor(int bs, int m, int three_k, int two_k, void* B, v...
  function ggml_qgemm_lut (line 1025) | void ggml_qgemm_lut(int bs, int m, int k, int BK, void* A, void* sign, v...
  function ggml_bitnet_transform_tensor (line 1124) | void ggml_bitnet_transform_tensor(struct ggml_tensor * tensor) {

FILE: run_inference.py
  function run_command (line 8) | def run_command(command, shell=False):
  function run_inference (line 16) | def run_inference():
  function signal_handler (line 39) | def signal_handler(sig, frame):

FILE: run_inference_server.py
  function run_command (line 8) | def run_command(command, shell=False):
  function run_server (line 16) | def run_server():
  function signal_handler (line 46) | def signal_handler(sig, frame):

FILE: setup_env.py
  function system_info (line 84) | def system_info():
  function get_model_name (line 87) | def get_model_name():
  function run_command (line 92) | def run_command(command, shell=False, log_step=None):
  function prepare_model (line 109) | def prepare_model():
  function setup_gguf (line 152) | def setup_gguf():
  function gen_code (line 156) | def gen_code():
  function compile (line 203) | def compile():
  function main (line 218) | def main():
  function parse_args (line 224) | def parse_args():
  function signal_handler (line 235) | def signal_handler(sig, frame):

FILE: src/ggml-bitnet-lut.cpp
  function ggml_bitnet_init (line 14) | void ggml_bitnet_init(void) {
  function ggml_bitnet_free (line 31) | void ggml_bitnet_free(void) {
  function do_permutate (line 49) | static bool do_permutate(enum ggml_type type) {
  function ggml_bitnet_can_mul_mat (line 58) | bool ggml_bitnet_can_mul_mat(const struct ggml_tensor * src0, const stru...
  function ggml_bitnet_mul_mat_get_wsize (line 70) | size_t ggml_bitnet_mul_mat_get_wsize(const struct ggml_tensor * src0, co...
  function ggml_bitnet_get_type_bits (line 85) | int ggml_bitnet_get_type_bits(enum ggml_type type) {
  function ggml_bitnet_init (line 98) | void ggml_bitnet_init(void) {
  function ggml_bitnet_free (line 115) | void ggml_bitnet_free(void) {
  function ggml_bitnet_can_mul_mat (line 133) | bool ggml_bitnet_can_mul_mat(const struct ggml_tensor * src0, const stru...
  function ggml_bitnet_mul_mat_get_wsize (line 143) | size_t ggml_bitnet_mul_mat_get_wsize(const struct ggml_tensor * src0, co...
  function ggml_bitnet_get_type_bits (line 157) | int ggml_bitnet_get_type_bits(enum ggml_type type) {

FILE: src/ggml-bitnet-mad.cpp
  function hsum_i32_8 (line 20) | static inline int hsum_i32_8(const __m256i a) {
  function hsum_i32_8 (line 29) | static inline int hsum_i32_8(const __m256i a) {
  function quantize_i2_s (line 51) | size_t quantize_i2_s(const float * src, void * dst, int64_t nrow, int64_...
  function ggml_vec_dot_i2_i8_s_1x1 (line 198) | void ggml_vec_dot_i2_i8_s_1x1(int n, float * s, size_t bs, const void * ...
  function ggml_vec_dot_i2_i8_s_1x4_32W (line 414) | void ggml_vec_dot_i2_i8_s_1x4_32W(int n, float * s, size_t bs, const voi...
  function ggml_vec_dot_i2_i8_s_1xN (line 512) | void ggml_vec_dot_i2_i8_s_1xN(int n, float * s, size_t bs, const void * ...
  function ggml_vec_dot_i2_i8_s_Nx1 (line 791) | void ggml_vec_dot_i2_i8_s_Nx1(int n, float * s, size_t bs, const void * ...
  function ggml_vec_dot_i2_i8_s (line 1043) | void ggml_vec_dot_i2_i8_s(int n, float * s, size_t bs, const void * vx, ...

FILE: utils/codegen_tl1.py
  function gen_ctor_code (line 5) | def gen_ctor_code():
  function gen_body_core_code (line 190) | def gen_body_core_code(bm, by):
  function gen_tbl_impl (line 224) | def gen_tbl_impl(pre, BM, BK, bm, k):
  function gen_top_api (line 285) | def gen_top_api(kernel_shapes):
  function gen_preprocess_code (line 310) | def gen_preprocess_code():
  function gen_transform_code (line 321) | def gen_transform_code(kernel_shape):

FILE: utils/codegen_tl2.py
  function gen_ctor_code (line 5) | def gen_ctor_code():
  function gen_tbl_impl (line 279) | def gen_tbl_impl(pre, BM, BK, bm, k_list):
  function gen_top_api (line 532) | def gen_top_api(kernel_shapes, k_list):
  function gen_transform_code (line 626) | def gen_transform_code(kernel_shapes):
  function get_three_k_two_k (line 676) | def get_three_k_two_k(K, bk):

FILE: utils/convert-helper-bitnet.py
  function run_command (line 9) | def run_command(command_list, cwd=None, check=True):
  function main (line 19) | def main():

FILE: utils/convert-hf-to-gguf-bitnet.py
  class SentencePieceTokenTypes (line 36) | class SentencePieceTokenTypes(IntEnum):
  class Model (line 48) | class Model(ABC):
    method __init__ (line 51) | def __init__(self, dir_model: Path, ftype: int, fname_out: Path, is_bi...
    method model_arch (line 68) | def model_arch(self) -> gguf.MODEL_ARCH:
    method find_hparam (line 71) | def find_hparam(self, keys: Sequence[str], optional: bool = False) -> ...
    method set_vocab (line 79) | def set_vocab(self):
    method get_tensors (line 82) | def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
    method match_model_tensor_name (line 97) | def match_model_tensor_name(self, name: str, key: gguf.MODEL_TENSOR, b...
    method map_tensor_name (line 110) | def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = ("....
    method set_gguf_parameters (line 116) | def set_gguf_parameters(self):
    method write_tensors (line 159) | def write_tensors(self):
    method write (line 199) | def write(self):
    method write_vocab (line 206) | def write_vocab(self):
    method count_model_parts (line 212) | def count_model_parts(dir_model: Path, prefix: str) -> int:
    method load_hparams (line 221) | def load_hparams(dir_model):
    method register (line 226) | def register(cls, *names: str) -> Callable[[AnyModel], AnyModel]:
    method from_model_architecture (line 236) | def from_model_architecture(cls, arch):
    method _is_model_safetensors (line 242) | def _is_model_safetensors(self) -> bool:
    method _get_part_names (line 245) | def _get_part_names(self):
    method get_vocab_base (line 256) | def get_vocab_base(self) -> tuple[list[str], list[int], str]:
    method get_vocab_base_pre (line 291) | def get_vocab_base_pre(self, tokenizer) -> str:
    method _set_vocab_gpt2 (line 366) | def _set_vocab_gpt2(self) -> None:
    method _set_vocab_sentencepiece (line 376) | def _set_vocab_sentencepiece(self):
    method _set_vocab_llama_hf (line 441) | def _set_vocab_llama_hf(self):
  function process_tl1 (line 465) | def process_tl1(weight, BM, BY, bm, by, M, K):
  function preprocess_weights_tl1 (line 479) | def preprocess_weights_tl1(
  function preprocess_two_weights_tl2 (line 523) | def preprocess_two_weights_tl2(M, K, weight_num, BM, BY, bm, by, weight,...
  function preprocess_three_weights_tl2 (line 549) | def preprocess_three_weights_tl2(M, K, weight_num, BM, BY, bm, by, weigh...
  function preprocess_weights_tl2 (line 597) | def preprocess_weights_tl2(
  function transform_to_tl1 (line 662) | def transform_to_tl1(x: np.ndarray):
  function transform_to_tl2 (line 668) | def transform_to_tl2(x: np.ndarray):
  function read_model_config (line 675) | def read_model_config(model_dir: str) -> dict[str, Any]:
  class LlamaModel (line 683) | class LlamaModel(Model):
    method set_vocab (line 686) | def set_vocab(self):
    method write_tensors (line 708) | def write_tensors(self):
    method set_gguf_parameters (line 834) | def set_gguf_parameters(self):
    method permute (line 862) | def permute(weights: Tensor, n_head: int, n_head_kv: int | None):
    method modify_tensors (line 871) | def modify_tensors(self, data_torch: Tensor, name: str, bid: int | Non...
    method generate_extra_tensors (line 916) | def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
    method prepare_tensors (line 945) | def prepare_tensors(self):
  class BitnetModel (line 956) | class BitnetModel(Model):
    method set_vocab (line 959) | def set_vocab(self):
    method set_gguf_parameters (line 962) | def set_gguf_parameters(self):
    method weight_quant (line 970) | def weight_quant(self, weight):
    method modify_tensors (line 977) | def modify_tensors(self, data_torch: Tensor, name: str, bid: int | Non...
    method write_tensors (line 986) | def write_tensors(self):
  function parse_args (line 1095) | def parse_args() -> argparse.Namespace:
  function main (line 1126) | def main() -> None:

FILE: utils/convert-ms-to-gguf-bitnet.py
  class DataType (line 58) | class DataType:
    method elements_to_bytes (line 63) | def elements_to_bytes(self, n_elements: int) -> int:
  class UnquantizedDataType (line 68) | class UnquantizedDataType(DataType):
  class QuantizedDataType (line 79) | class QuantizedDataType(DataType):
    method quantize (line 84) | def quantize(self, arr: NDArray) -> NDArray:
    method elements_to_bytes (line 87) | def elements_to_bytes(self, n_elements: int) -> int:
  class Q8_0QuantizedDataType (line 93) | class Q8_0QuantizedDataType(QuantizedDataType):
    method quantize (line 95) | def quantize(self, arr: NDArray) -> NDArray:
  class GGMLFileType (line 177) | class GGMLFileType(enum.IntEnum):
    method type_for_tensor (line 183) | def type_for_tensor(self, name: str, tensor: LazyTensor) -> DataType:
  class Params (line 208) | class Params:
    method guessed (line 232) | def guessed(model: LazyModel) -> Params:
    method loadHFTransformerJson (line 269) | def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
    method loadOriginalParamsJson (line 326) | def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Par...
    method load (line 372) | def load(model_plus: ModelPlus) -> Params:
  class BaseVocab (line 395) | class BaseVocab(Protocol):
  class NoVocab (line 400) | class NoVocab(BaseVocab):
    method __repr__ (line 404) | def __repr__(self) -> str:
  class Vocab (line 409) | class Vocab(BaseVocab, Protocol):
    method __init__ (line 415) | def __init__(self, base_path: Path): ...
    method all_tokens (line 416) | def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:...
  class BpeVocab (line 419) | class BpeVocab(Vocab):
    method __init__ (line 423) | def __init__(self, base_path: Path):
    method bpe_tokens (line 475) | def bpe_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
    method added_tokens (line 481) | def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
    method all_tokens (line 486) | def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
    method __repr__ (line 490) | def __repr__(self) -> str:
  class SentencePieceVocab (line 494) | class SentencePieceVocab(Vocab):
    method __init__ (line 498) | def __init__(self, base_path: Path):
    method sentencepiece_tokens (line 528) | def sentencepiece_tokens(self) -> Iterable[tuple[bytes, float, gguf.To...
    method added_tokens (line 552) | def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
    method all_tokens (line 557) | def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
    method __repr__ (line 561) | def __repr__(self) -> str:
  class LlamaHfVocab (line 565) | class LlamaHfVocab(Vocab):
    method __init__ (line 569) | def __init__(self, base_path: Path):
    method hf_tokens (line 635) | def hf_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
    method get_token_type (line 653) | def get_token_type(self, token_id: int, token_text: bytes, special_ids...
    method get_token_score (line 661) | def get_token_score(self, token_id: int) -> float:
    method added_tokens (line 666) | def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
    method has_newline_token (line 677) | def has_newline_token(self):
    method all_tokens (line 680) | def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
    method __repr__ (line 684) | def __repr__(self) -> str:
  function permute (line 694) | def permute(weights: NDArray, n_head: int, n_head_kv: int) -> NDArray:
  class Tensor (line 702) | class Tensor(ABC):
    method astype (line 707) | def astype(self, data_type: DataType) -> Self: ...
    method permute (line 709) | def permute(self, n_head: int, n_head_kv: int) -> Self: ...
    method permute_part (line 711) | def permute_part(self, n_part: int, n_head: int, n_head_kv: int) -> Se...
    method part (line 713) | def part(self, n_part: int) -> Self: ...
    method to_ggml (line 715) | def to_ggml(self) -> GGMLCompatibleTensor: ...
  function bf16_to_fp32 (line 718) | def bf16_to_fp32(bf16_arr: np.ndarray[Any, np.dtype[np.uint16]]) -> NDAr...
  function preprocess_weights (line 723) | def preprocess_weights(
  function transform_to_i2 (line 768) | def transform_to_i2(x : NDArray):
  class UnquantizedTensor (line 781) | class UnquantizedTensor(Tensor):
    method __init__ (line 782) | def __init__(self, ndarray: NDArray, i2_scale: NDArray = None):
    method astype (line 788) | def astype(self, data_type: DataType) -> UnquantizedTensor:
    method to_ggml (line 796) | def to_ggml(self) -> Self:
    method permute_part (line 799) | def permute_part(self, n_part: int, n_head: int, n_head_kv: int) -> Un...
    method part (line 803) | def part(self, n_part: int) -> UnquantizedTensor:
    method permute (line 807) | def permute(self, n_head: int, n_head_kv: int) -> UnquantizedTensor:
  function load_unquantized (line 811) | def load_unquantized(lazy_tensor: LazyTensor, expected_dtype: Any = None...
  class LazyTensor (line 831) | class LazyTensor:
    method load (line 837) | def load(self) -> Tensor:
    method astype (line 844) | def astype(self, data_type: DataType) -> LazyTensor:
    method validate_conversion_to (line 851) | def validate_conversion_to(self, data_type: DataType) -> None:
  class ModelPlus (line 860) | class ModelPlus:
  function merge_sharded (line 867) | def merge_sharded(models: list[LazyModel]) -> LazyModel:
  function merge_multifile_models (line 901) | def merge_multifile_models(models_plus: list[ModelPlus]) -> ModelPlus:
  function permute_lazy (line 924) | def permute_lazy(lazy_tensor: LazyTensor, n_head: int, n_head_kv: int) -...
  function permute_part_lazy (line 930) | def permute_part_lazy(lazy_tensor: LazyTensor, n_part: int, n_head: int,...
  function part_lazy (line 937) | def part_lazy(lazy_tensor: LazyTensor, n_part: int) -> LazyTensor:
  function forward_t (line 946) | def forward_t(x):
  function weight_quant (line 953) | def weight_quant(weight):
  function part_lazy_q (line 960) | def part_lazy_q(lazy_tensor: LazyTensor, n_part: int) -> LazyTensor:
  function part_lazy_k (line 968) | def part_lazy_k(lazy_tensor: LazyTensor, n_part: int) -> LazyTensor:
  function part_lazy_v (line 976) | def part_lazy_v(lazy_tensor: LazyTensor, n_part: int) -> LazyTensor:
  function part_lazy_w1 (line 986) | def part_lazy_w1(lazy_tensor: LazyTensor, n_part: int) -> LazyTensor:
  function part_lazy_w3 (line 995) | def part_lazy_w3(lazy_tensor: LazyTensor, n_part: int) -> LazyTensor:
  function part_lazy_rope (line 1004) | def part_lazy_rope(lazy_tensor: LazyTensor) -> LazyTensor:
  function part_lazy_weight_quant (line 1011) | def part_lazy_weight_quant(lazy_tensor: LazyTensor, name) -> LazyTensor:
  function pack_experts_lazy (line 1020) | def pack_experts_lazy(lazy_tensors: list[LazyTensor]) -> LazyTensor:
  function lazy_load_safetensors_file (line 1029) | def lazy_load_safetensors_file(fp: IO[bytes], path: Path) -> ModelPlus:
  function must_read (line 1053) | def must_read(fp: IO[bytes], length: int) -> bytes:
  function lazy_load_file (line 1061) | def lazy_load_file(path: Path) -> ModelPlus:
  function bounded_parallel_map (line 1076) | def bounded_parallel_map(func: Callable[[In], Out], iterable: Iterable[I...
  function check_vocab_size (line 1111) | def check_vocab_size(params: Params, vocab: BaseVocab, pad_vocab: bool =...
  class OutputFile (line 1146) | class OutputFile:
    method __init__ (line 1147) | def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian = gguf.G...
    method add_meta_arch (line 1150) | def add_meta_arch(self, params: Params) -> None:
    method extract_vocabulary_from_model (line 1193) | def extract_vocabulary_from_model(self, vocab: Vocab) -> tuple[list[by...
    method add_meta_vocab (line 1208) | def add_meta_vocab(self, vocab: Vocab) -> None:
    method add_meta_special_vocab (line 1219) | def add_meta_special_vocab(self, svocab: gguf.SpecialVocab) -> None:
    method add_tensor_info (line 1222) | def add_tensor_info(self, name: str, tensor: LazyTensor) -> None:
    method write_meta (line 1240) | def write_meta(self) -> None:
    method write_tensor_info (line 1244) | def write_tensor_info(self) -> None:
    method write_tensor_data (line 1247) | def write_tensor_data(self, ftype: GGMLFileType, model: LazyModel, con...
    method close (line 1279) | def close(self) -> None:
    method write_vocab_only (line 1283) | def write_vocab_only(
    method do_item (line 1301) | def do_item(item: tuple[str, LazyTensor]) -> tuple[DataType, NDArray]:
    method maybe_do_quantize (line 1307) | def maybe_do_quantize(item: tuple[DataType, NDArray]) -> NDArray:
    method write_all (line 1314) | def write_all(
  function pick_output_type (line 1347) | def pick_output_type(model: LazyModel, output_type_str: str | None) -> G...
  function convert_to_output_type (line 1364) | def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) ...
  function convert_model_names (line 1374) | def convert_model_names(model: LazyModel, params: Params, skip_unknown: ...
  function nth_multifile_path (line 1508) | def nth_multifile_path(path: Path, n: int) -> Path | None:
  function find_multifile_paths (line 1529) | def find_multifile_paths(path: Path) -> list[Path]:
  function load_some_model (line 1547) | def load_some_model(path: Path) -> ModelPlus:
  class VocabFactory (line 1570) | class VocabFactory:
    method __init__ (line 1573) | def __init__(self, path: Path):
    method _create_special_vocab (line 1576) | def _create_special_vocab(self, vocab: BaseVocab, model_parent_path: P...
    method _create_vocab_by_path (line 1586) | def _create_vocab_by_path(self, vocab_types: list[str]) -> Vocab:
    method load_vocab (line 1607) | def load_vocab(self, vocab_types: list[str] | None, model_parent_path:...
  function default_outfile (line 1621) | def default_outfile(model_paths: list[Path], file_type: GGMLFileType) ->...
  function do_dump_model (line 1637) | def do_dump_model(model_plus: ModelPlus) -> None:
  function main (line 1645) | def main(args_in: list[str] | None = None) -> None:

FILE: utils/convert.py
  class DataType (line 58) | class DataType:
    method elements_to_bytes (line 63) | def elements_to_bytes(self, n_elements: int) -> int:
  class UnquantizedDataType (line 68) | class UnquantizedDataType(DataType):
  class QuantizedDataType (line 79) | class QuantizedDataType(DataType):
    method quantize (line 84) | def quantize(self, arr: NDArray) -> NDArray:
    method elements_to_bytes (line 87) | def elements_to_bytes(self, n_elements: int) -> int:
  class Q8_0QuantizedDataType (line 93) | class Q8_0QuantizedDataType(QuantizedDataType):
    method quantize (line 95) | def quantize(self, arr: NDArray) -> NDArray:
  class GGMLFileType (line 177) | class GGMLFileType(enum.IntEnum):
    method type_for_tensor (line 183) | def type_for_tensor(self, name: str, tensor: LazyTensor) -> DataType:
  class Params (line 208) | class Params:
    method guessed (line 232) | def guessed(model: LazyModel) -> Params:
    method loadHFTransformerJson (line 269) | def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
    method loadOriginalParamsJson (line 326) | def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Par...
    method load (line 372) | def load(model_plus: ModelPlus) -> Params:
  class BaseVocab (line 395) | class BaseVocab(Protocol):
  class NoVocab (line 400) | class NoVocab(BaseVocab):
    method __repr__ (line 404) | def __repr__(self) -> str:
  class Vocab (line 409) | class Vocab(BaseVocab, Protocol):
    method __init__ (line 415) | def __init__(self, base_path: Path): ...
    method all_tokens (line 416) | def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:...
  class BpeVocab (line 419) | class BpeVocab(Vocab):
    method __init__ (line 423) | def __init__(self, base_path: Path):
    method bpe_tokens (line 475) | def bpe_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
    method added_tokens (line 481) | def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
    method all_tokens (line 486) | def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
    method __repr__ (line 490) | def __repr__(self) -> str:
  class SentencePieceVocab (line 494) | class SentencePieceVocab(Vocab):
    method __init__ (line 498) | def __init__(self, base_path: Path):
    method sentencepiece_tokens (line 528) | def sentencepiece_tokens(self) -> Iterable[tuple[bytes, float, gguf.To...
    method added_tokens (line 552) | def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
    method all_tokens (line 557) | def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
    method __repr__ (line 561) | def __repr__(self) -> str:
  class LlamaHfVocab (line 565) | class LlamaHfVocab(Vocab):
    method __init__ (line 569) | def __init__(self, base_path: Path):
    method hf_tokens (line 635) | def hf_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
    method get_token_type (line 653) | def get_token_type(self, token_id: int, token_text: bytes, special_ids...
    method get_token_score (line 661) | def get_token_score(self, token_id: int) -> float:
    method added_tokens (line 666) | def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
    method has_newline_token (line 677) | def has_newline_token(self):
    method all_tokens (line 680) | def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
    method __repr__ (line 684) | def __repr__(self) -> str:
  function permute (line 694) | def permute(weights: NDArray, n_head: int, n_head_kv: int) -> NDArray:
  class Tensor (line 702) | class Tensor(ABC):
    method astype (line 707) | def astype(self, data_type: DataType) -> Self: ...
    method permute (line 709) | def permute(self, n_head: int, n_head_kv: int) -> Self: ...
    method permute_part (line 711) | def permute_part(self, n_part: int, n_head: int, n_head_kv: int) -> Se...
    method part (line 713) | def part(self, n_part: int) -> Self: ...
    method to_ggml (line 715) | def to_ggml(self) -> GGMLCompatibleTensor: ...
  function bf16_to_fp32 (line 718) | def bf16_to_fp32(bf16_arr: np.ndarray[Any, np.dtype[np.uint16]]) -> NDAr...
  function preprocess_weights (line 723) | def preprocess_weights(
  function transform_to_i2 (line 768) | def transform_to_i2(x : NDArray):
  class UnquantizedTensor (line 781) | class UnquantizedTensor(Tensor):
    method __init__ (line 782) | def __init__(self, ndarray: NDArray, i2_scale: NDArray = None):
    method astype (line 788) | def astype(self, data_type: DataType) -> UnquantizedTensor:
    method to_ggml (line 796) | def to_ggml(self) -> Self:
    method permute_part (line 799) | def permute_part(self, n_part: int, n_head: int, n_head_kv: int) -> Un...
    method part (line 803) | def part(self, n_part: int) -> UnquantizedTensor:
    method permute (line 807) | def permute(self, n_head: int, n_head_kv: int) -> UnquantizedTensor:
  function load_unquantized (line 811) | def load_unquantized(lazy_tensor: LazyTensor, expected_dtype: Any = None...
  class LazyTensor (line 831) | class LazyTensor:
    method load (line 837) | def load(self) -> Tensor:
    method astype (line 844) | def astype(self, data_type: DataType) -> LazyTensor:
    method validate_conversion_to (line 851) | def validate_conversion_to(self, data_type: DataType) -> None:
  class ModelPlus (line 860) | class ModelPlus:
  function merge_sharded (line 867) | def merge_sharded(models: list[LazyModel]) -> LazyModel:
  function merge_multifile_models (line 901) | def merge_multifile_models(models_plus: list[ModelPlus]) -> ModelPlus:
  function permute_lazy (line 924) | def permute_lazy(lazy_tensor: LazyTensor, n_head: int, n_head_kv: int) -...
  function permute_part_lazy (line 930) | def permute_part_lazy(lazy_tensor: LazyTensor, n_part: int, n_head: int,...
  function part_lazy (line 938) | def part_lazy(lazy_tensor: LazyTensor, n_part: int) -> LazyTensor:
  function pack_experts_lazy (line 946) | def pack_experts_lazy(lazy_tensors: list[LazyTensor]) -> LazyTensor:
  function lazy_load_safetensors_file (line 955) | def lazy_load_safetensors_file(fp: IO[bytes], path: Path) -> ModelPlus:
  function must_read (line 979) | def must_read(fp: IO[bytes], length: int) -> bytes:
  function lazy_load_file (line 987) | def lazy_load_file(path: Path) -> ModelPlus:
  function bounded_parallel_map (line 1002) | def bounded_parallel_map(func: Callable[[In], Out], iterable: Iterable[I...
  function check_vocab_size (line 1037) | def check_vocab_size(params: Params, vocab: BaseVocab, pad_vocab: bool =...
  class OutputFile (line 1072) | class OutputFile:
    method __init__ (line 1073) | def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian = gguf.G...
    method add_meta_arch (line 1076) | def add_meta_arch(self, params: Params) -> None:
    method extract_vocabulary_from_model (line 1123) | def extract_vocabulary_from_model(self, vocab: Vocab) -> tuple[list[by...
    method add_meta_vocab (line 1138) | def add_meta_vocab(self, vocab: Vocab) -> None:
    method add_meta_special_vocab (line 1150) | def add_meta_special_vocab(self, svocab: gguf.SpecialVocab) -> None:
    method add_tensor_info (line 1153) | def add_tensor_info(self, name: str, tensor: LazyTensor) -> None:
    method write_meta (line 1171) | def write_meta(self) -> None:
    method write_tensor_info (line 1175) | def write_tensor_info(self) -> None:
    method write_tensor_data (line 1178) | def write_tensor_data(self, ftype: GGMLFileType, model: LazyModel, con...
    method close (line 1210) | def close(self) -> None:
    method write_vocab_only (line 1214) | def write_vocab_only(
    method do_item (line 1232) | def do_item(item: tuple[str, LazyTensor]) -> tuple[DataType, NDArray]:
    method maybe_do_quantize (line 1238) | def maybe_do_quantize(item: tuple[DataType, NDArray]) -> NDArray:
    method write_all (line 1245) | def write_all(
  function pick_output_type (line 1275) | def pick_output_type(model: LazyModel, output_type_str: str | None) -> G...
  function convert_to_output_type (line 1292) | def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) ...
  function convert_model_names (line 1302) | def convert_model_names(model: LazyModel, params: Params, skip_unknown: ...
  function nth_multifile_path (line 1363) | def nth_multifile_path(path: Path, n: int) -> Path | None:
  function find_multifile_paths (line 1384) | def find_multifile_paths(path: Path) -> list[Path]:
  function load_some_model (line 1402) | def load_some_model(path: Path) -> ModelPlus:
  class VocabFactory (line 1425) | class VocabFactory:
    method __init__ (line 1428) | def __init__(self, path: Path):
    method _create_special_vocab (line 1431) | def _create_special_vocab(self, vocab: BaseVocab, model_parent_path: P...
    method _create_vocab_by_path (line 1441) | def _create_vocab_by_path(self, vocab_types: list[str]) -> Vocab:
    method load_vocab (line 1462) | def load_vocab(self, vocab_types: list[str] | None, model_parent_path:...
  function default_outfile (line 1476) | def default_outfile(model_paths: list[Path], file_type: GGMLFileType) ->...
  function do_dump_model (line 1492) | def do_dump_model(model_plus: ModelPlus) -> None:
  function main (line 1500) | def main(args_in: list[str] | None = None) -> None:

FILE: utils/e2e_benchmark.py
  function run_command (line 8) | def run_command(command, shell=False, log_step=None):
  function run_benchmark (line 25) | def run_benchmark():
  function parse_args (line 48) | def parse_args():

FILE: utils/generate-dummy-bitnet-model.py
  class SentencePieceTokenTypes (line 108) | class SentencePieceTokenTypes(IntEnum):
  class Model (line 120) | class Model(ABC):
    method __init__ (line 123) | def __init__(self, dir_model: Path, ftype: int, fname_out: Path, is_bi...
    method model_arch (line 140) | def model_arch(self) -> gguf.MODEL_ARCH:
    method find_hparam (line 143) | def find_hparam(self, keys: Sequence[str], optional: bool = False) -> ...
    method set_vocab (line 151) | def set_vocab(self):
    method get_tensors (line 154) | def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
    method match_model_tensor_name (line 169) | def match_model_tensor_name(self, name: str, key: gguf.MODEL_TENSOR, b...
    method map_tensor_name (line 182) | def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = ("....
    method set_gguf_parameters (line 188) | def set_gguf_parameters(self):
    method write_tensors (line 231) | def write_tensors(self):
    method write (line 271) | def write(self):
    method write_vocab (line 278) | def write_vocab(self):
    method count_model_parts (line 284) | def count_model_parts(dir_model: Path, prefix: str) -> int:
    method load_hparams (line 293) | def load_hparams(dir_model):
    method register (line 298) | def register(cls, *names: str) -> Callable[[AnyModel], AnyModel]:
    method from_model_architecture (line 308) | def from_model_architecture(cls, arch):
    method _is_model_safetensors (line 314) | def _is_model_safetensors(self) -> bool:
    method _get_part_names (line 317) | def _get_part_names(self):
    method get_vocab_base (line 328) | def get_vocab_base(self) -> tuple[list[str], list[int], str]:
    method get_vocab_base_pre (line 361) | def get_vocab_base_pre(self, tokenizer) -> str:
    method _set_vocab_sentencepiece (line 431) | def _set_vocab_sentencepiece(self):
  function process_tl1 (line 498) | def process_tl1(weight, BM, BY, bm, by, M, K):
  function preprocess_weights_tl1 (line 528) | def preprocess_weights_tl1(
  function preprocess_two_weights_tl2 (line 577) | def preprocess_two_weights_tl2(M, K, weight_num, BM, BY, bm, by, weight,...
  function preprocess_three_weights_tl2 (line 620) | def preprocess_three_weights_tl2(M, K, weight_num, BM, BY, bm, by, weigh...
  function preprocess_weights_tl2 (line 699) | def preprocess_weights_tl2(
  class BitnetModel (line 777) | class BitnetModel(Model):
    method set_params (line 781) | def set_params(self, params: str):
    method set_vocab (line 793) | def set_vocab(self):
    method set_gguf_parameters (line 796) | def set_gguf_parameters(self):
    method weight_quant (line 804) | def weight_quant(self, weight):
    method transform_to_tl1 (line 811) | def transform_to_tl1(self, x: np.ndarray):
    method transform_to_tl2 (line 817) | def transform_to_tl2(self, x: np.ndarray):
    method generate_tensors (line 824) | def generate_tensors(self) -> Iterator[tuple[str, np.ndarray]]:
    method modify_tensors (line 852) | def modify_tensors(self, data_torch: Tensor, name: str, bid: int | Non...
    method write_tensors (line 861) | def write_tensors(self):
  function main (line 963) | def main() -> None:
  function read_gguf_file (line 990) | def read_gguf_file(gguf_file_path):
  function parse_args (line 1019) | def parse_args() -> argparse.Namespace:

FILE: utils/preprocess-huggingface-bitnet.py
  function quant_weight_fp16 (line 5) | def quant_weight_fp16(weight):
  function quant_model (line 11) | def quant_model(input, output):

FILE: utils/quantize_embeddings.py
  class EmbeddingQuantizer (line 17) | class EmbeddingQuantizer:
    method __init__ (line 18) | def __init__(self, input_model, output_dir, quantize_bin="../build/bin...
    method quantize (line 46) | def quantize(self, embedding_type, output_suffix):
    method benchmark_model (line 127) | def benchmark_model(self, output_suffix):
    method parse_benchmark_output (line 187) | def parse_benchmark_output(self, output, output_suffix):
    method cleanup_model (line 256) | def cleanup_model(self, output_suffix):
    method run_all_quantizations (line 275) | def run_all_quantizations(self, types_to_quantize):
    method save_results_to_csv (line 329) | def save_results_to_csv(self):
    method print_summary (line 370) | def print_summary(self, total_duration):
  function main (line 394) | def main():

FILE: utils/test_perplexity.py
  class PerplexityTester (line 20) | class PerplexityTester:
    method __init__ (line 21) | def __init__(self, model_path, llama_perplexity_bin="../build/bin/llam...
    method find_datasets (line 63) | def find_datasets(self):
    method create_quick_dataset (line 91) | def create_quick_dataset(self, dataset_path, num_chars=4096):
    method cleanup_temp_files (line 107) | def cleanup_temp_files(self):
    method run_perplexity_test (line 116) | def run_perplexity_test(self, dataset_name, dataset_path, threads=16, ...
    method parse_perplexity (line 207) | def parse_perplexity(self, output):
    method quantize_embedding (line 241) | def quantize_embedding(self, embedding_type, output_suffix):
    method cleanup_model (line 314) | def cleanup_model(self, model_path):
    method run_all_tests (line 326) | def run_all_tests(self, threads=16, ctx_size=512):
    method save_results (line 441) | def save_results(self):
    method print_summary (line 490) | def print_summary(self, total_time):
  function main (line 539) | def main():

FILE: utils/tune_gemm_config.py
  class GemmTuner (line 18) | class GemmTuner:
    method __init__ (line 19) | def __init__(self, config_path, model_path, threads=16):
    method backup_config (line 27) | def backup_config(self):
    method restore_config (line 32) | def restore_config(self):
    method generate_config (line 37) | def generate_config(self, act_parallel, row_block_size, col_block_size...
    method rebuild_project (line 52) | def rebuild_project(self):
    method run_benchmark (line 66) | def run_benchmark(self):
    method parse_throughput (line 93) | def parse_throughput(self, output):
    method test_configuration (line 110) | def test_configuration(self, act_parallel, row_block_size, col_block_s...
    method save_results (line 153) | def save_results(self, csv_path):
    method find_best_config (line 166) | def find_best_config(self):
    method run_tuning (line 175) | def run_tuning(self, configurations, output_csv=None):
  function generate_configurations (line 262) | def generate_configurations():
  function main (line 296) | def main():