SYMBOL INDEX (467 symbols across 34 files)

FILE: app.py
  function revise (line 40) | def revise(history, latest_message):
  function revoke (line 48) | def revoke(history, last_state):
  function interrupt (line 57) | def interrupt(allow_generate):
  function regenerate (line 61) | def regenerate(last_state, max_length, top_p, temperature, allow_generate):

FILE: app_fastapi.py
  function getLogger (line 34) | def getLogger(name, file_name, use_formatter=True):
  function start_server (line 59) | def start_server(quantize_level, http_address: str, port: int, gpu_id: s...

FILE: chatglm/configuration_chatglm.py
  class ChatGLMConfig (line 9) | class ChatGLMConfig(PretrainedConfig):
    method __init__ (line 59) | def __init__(

FILE: chatglm/modeling_chatglm.py
  class InvalidScoreLogitsProcessor (line 54) | class InvalidScoreLogitsProcessor(LogitsProcessor):
    method __call__ (line 55) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
  function load_tf_weights_in_chatglm_6b (line 62) | def load_tf_weights_in_chatglm_6b(model, config, tf_checkpoint_path):
  class PrefixEncoder (line 136) | class PrefixEncoder(torch.nn.Module):
    method __init__ (line 143) | def __init__(self, config):
    method forward (line 157) | def forward(self, prefix: torch.Tensor):
  function gelu_impl (line 167) | def gelu_impl(x):
  function gelu (line 173) | def gelu(x):
  class RotaryEmbedding (line 177) | class RotaryEmbedding(torch.nn.Module):
    method __init__ (line 178) | def __init__(self, dim, base=10000, precision=torch.half, learnable=Fa...
    method _load_from_state_dict (line 194) | def _load_from_state_dict(self, state_dict, prefix, local_metadata, st...
    method forward (line 198) | def forward(self, x, seq_dim=1, seq_len=None):
    method _apply (line 221) | def _apply(self, fn):
  function rotate_half (line 229) | def rotate_half(x):
  function apply_rotary_pos_emb_index (line 235) | def apply_rotary_pos_emb_index(q, k, cos, sin, position_id):
  function attention_fn (line 243) | def attention_fn(
  function default_init (line 350) | def default_init(cls, *args, **kwargs):
  class SelfAttention (line 354) | class SelfAttention(torch.nn.Module):
    method __init__ (line 355) | def __init__(self, hidden_size, num_attention_heads,
    method attention_mask_func (line 406) | def attention_mask_func(attention_scores, attention_mask):
    method split_tensor_along_last_dim (line 410) | def split_tensor_along_last_dim(self, tensor, num_partitions,
    method forward (line 430) | def forward(
  class GEGLU (line 497) | class GEGLU(torch.nn.Module):
    method __init__ (line 498) | def __init__(self):
    method forward (line 502) | def forward(self, x):
  class GLU (line 508) | class GLU(torch.nn.Module):
    method __init__ (line 509) | def __init__(self, hidden_size, inner_hidden_size=None,
    method forward (line 540) | def forward(self, hidden_states):
  class GLMBlock (line 555) | class GLMBlock(torch.nn.Module):
    method __init__ (line 556) | def __init__(
    method forward (line 608) | def forward(
  class ChatGLMPreTrainedModel (line 662) | class ChatGLMPreTrainedModel(PreTrainedModel):
    method __init__ (line 674) | def __init__(self, *inputs, **kwargs):
    method _init_weights (line 677) | def _init_weights(self, module: nn.Module):
    method get_masks (line 681) | def get_masks(self, input_ids, device):
    method get_position_ids (line 693) | def get_position_ids(self, input_ids, mask_positions, device, use_gmas...
    method _set_gradient_checkpointing (line 716) | def _set_gradient_checkpointing(self, module, value=False):
  class ChatGLMModel (line 786) | class ChatGLMModel(ChatGLMPreTrainedModel):
    method __init__ (line 802) | def __init__(self, config: ChatGLMConfig, empty_init=True):
    method get_input_embeddings (line 862) | def get_input_embeddings(self):
    method set_input_embeddings (line 865) | def set_input_embeddings(self, new_embeddings: torch.Tensor):
    method get_prompt (line 868) | def get_prompt(self, batch_size, device, dtype=torch.half):
    method forward (line 890) | def forward(
  class ChatGLMForConditionalGeneration (line 1032) | class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
    method __init__ (line 1033) | def __init__(self, config: ChatGLMConfig, empty_init=True):
    method get_output_embeddings (line 1064) | def get_output_embeddings(self):
    method set_output_embeddings (line 1067) | def set_output_embeddings(self, new_embeddings):
    method _update_model_kwargs_for_generation (line 1070) | def _update_model_kwargs_for_generation(
    method prepare_inputs_for_generation (line 1105) | def prepare_inputs_for_generation(
    method forward (line 1175) | def forward(
    method _reorder_cache (line 1234) | def _reorder_cache(
    method process_response (line 1252) | def process_response(self, response):
    method chat (line 1268) | def chat(self, tokenizer, query: str, history: List[Tuple[str, str]] =...
    method stream_chat (line 1294) | def stream_chat(self, tokenizer, query: str, history: List[Tuple[str, ...
    method stream_generate (line 1320) | def stream_generate(
    method quantize (line 1421) | def quantize(self, bits: int, empty_init=False, **kwargs):

FILE: chatglm/quantization.py
  class Kernel (line 18) | class Kernel:
    method __init__ (line 19) | def __init__(self, code: bytes, function_names: List[str]):
  class W8A16Linear (line 44) | class W8A16Linear(torch.autograd.Function):
    method forward (line 46) | def forward(ctx, inp: torch.Tensor, quant_w: torch.Tensor, scale_w: to...
    method backward (line 58) | def backward(ctx, grad_output: torch.Tensor):
  function compress_int4_weight (line 67) | def compress_int4_weight(weight: torch.Tensor):  # (n, m)
  function extract_weight_to_half (line 88) | def extract_weight_to_half(weight: torch.Tensor, scale_list: torch.Tenso...
  class QuantizedLinear (line 120) | class QuantizedLinear(Linear):
    method __init__ (line 121) | def __init__(self, weight_bit_width: int, weight_tensor=None, bias_ten...
    method forward (line 146) | def forward(self, input):
  function quantize (line 153) | def quantize(model, weight_bit_width, empty_init=False, **kwargs):

FILE: chatglm/tokenization_chatglm.py
  class TextTokenizer (line 19) | class TextTokenizer:
    method __init__ (line 20) | def __init__(self, model_path):
    method encode (line 25) | def encode(self, text):
    method decode (line 28) | def decode(self, ids: List[int]):
    method tokenize (line 31) | def tokenize(self, text):
    method convert_tokens_to_ids (line 34) | def convert_tokens_to_ids(self, tokens):
    method convert_token_to_id (line 37) | def convert_token_to_id(self, token):
    method convert_id_to_token (line 40) | def convert_id_to_token(self, idx):
    method __len__ (line 43) | def __len__(self):
  class SPTokenizer (line 47) | class SPTokenizer:
    method __init__ (line 48) | def __init__(
    method _get_text_tokenizer (line 63) | def _get_text_tokenizer(self):
    method get_blank_token (line 67) | def get_blank_token(length: int):
    method get_tab_token (line 72) | def get_tab_token():
    method num_text_tokens (line 76) | def num_text_tokens(self):
    method num_tokens (line 80) | def num_tokens(self):
    method _encode_whitespaces (line 84) | def _encode_whitespaces(text: str, max_len: int = 80):
    method _preprocess (line 90) | def _preprocess(self, text: str, linebreak=True, whitespaces=True):
    method encode (line 97) | def encode(
    method decode (line 114) | def decode(self, text_ids: List[int]) -> str:
    method tokenize (line 124) | def tokenize(
    method __getitem__ (line 140) | def __getitem__(self, x: Union[int, str]):
  class ChatGLMTokenizer (line 155) | class ChatGLMTokenizer(PreTrainedTokenizer):
    method __init__ (line 168) | def __init__(
    method gmask_token_id (line 214) | def gmask_token_id(self) -> Optional[int]:
    method end_token_id (line 220) | def end_token_id(self) -> Optional[int]:
    method vocab_size (line 230) | def vocab_size(self):
    method get_vocab (line 234) | def get_vocab(self):
    method preprocess_text (line 240) | def preprocess_text(self, inputs):
    method _tokenize (line 251) | def _tokenize(self, text, **kwargs):
    method _decode (line 259) | def _decode(
    method _convert_token_to_id (line 274) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 278) | def _convert_id_to_token(self, index):
    method save_vocabulary (line 282) | def save_vocabulary(self, save_directory, filename_prefix=None):
    method build_inputs_with_special_tokens (line 310) | def build_inputs_with_special_tokens(
    method _pad (line 336) | def _pad(

FILE: chatglm2/configuration_chatglm.py
  class ChatGLMConfig (line 4) | class ChatGLMConfig(PretrainedConfig):
    method __init__ (line 6) | def __init__(

FILE: chatglm2/modeling_chatglm.py
  function default_init (line 47) | def default_init(cls, *args, **kwargs):
  class InvalidScoreLogitsProcessor (line 51) | class InvalidScoreLogitsProcessor(LogitsProcessor):
    method __call__ (line 52) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
  function split_tensor_along_last_dim (line 59) | def split_tensor_along_last_dim(
  class RotaryEmbedding (line 87) | class RotaryEmbedding(nn.Module):
    method __init__ (line 88) | def __init__(self, dim, original_impl=False, device=None, dtype=None):
    method forward_impl (line 95) | def forward_impl(
    method forward (line 120) | def forward(self, max_seq_len, offset=0):
  function apply_rotary_pos_emb (line 127) | def apply_rotary_pos_emb(x: torch.Tensor, rope_cache: torch.Tensor) -> t...
  class RMSNorm (line 147) | class RMSNorm(torch.nn.Module):
    method __init__ (line 148) | def __init__(self, normalized_shape, eps=1e-5, device=None, dtype=None...
    method forward (line 153) | def forward(self, hidden_states: torch.Tensor):
  class CoreAttention (line 161) | class CoreAttention(torch.nn.Module):
    method __init__ (line 162) | def __init__(self, config: ChatGLMConfig, layer_number):
    method forward (line 187) | def forward(self, query_layer, key_layer, value_layer, attention_mask):
  class SelfAttention (line 279) | class SelfAttention(torch.nn.Module):
    method __init__ (line 286) | def __init__(self, config: ChatGLMConfig, layer_number, device=None):
    method _allocate_memory (line 315) | def _allocate_memory(self, inference_max_sequence_len, batch_size, dev...
    method forward (line 329) | def forward(
  function _config_to_kwargs (line 418) | def _config_to_kwargs(args):
  class MLP (line 425) | class MLP(torch.nn.Module):
    method __init__ (line 433) | def __init__(self, config: ChatGLMConfig, device=None):
    method forward (line 462) | def forward(self, hidden_states):
  class GLMBlock (line 471) | class GLMBlock(torch.nn.Module):
    method __init__ (line 478) | def __init__(self, config: ChatGLMConfig, layer_number, device=None):
    method forward (line 502) | def forward(
  class GLMTransformer (line 545) | class GLMTransformer(torch.nn.Module):
    method __init__ (line 548) | def __init__(self, config: ChatGLMConfig, device=None):
    method _get_layer (line 569) | def _get_layer(self, layer_number):
    method forward (line 572) | def forward(
  class ChatGLMPreTrainedModel (line 608) | class ChatGLMPreTrainedModel(PreTrainedModel):
    method _init_weights (line 620) | def _init_weights(self, module: nn.Module):
    method get_masks (line 624) | def get_masks(self, input_ids, past_key_values, padding_mask=None):
    method get_position_ids (line 642) | def get_position_ids(self, input_ids, device):
    method _set_gradient_checkpointing (line 647) | def _set_gradient_checkpointing(self, module, value=False):
  class Embedding (line 652) | class Embedding(torch.nn.Module):
    method __init__ (line 655) | def __init__(self, config: ChatGLMConfig, device=None):
    method forward (line 668) | def forward(self, input_ids):
  class ChatGLMModel (line 680) | class ChatGLMModel(ChatGLMPreTrainedModel):
    method __init__ (line 681) | def __init__(self, config: ChatGLMConfig, device=None, empty_init=True):
    method get_input_embeddings (line 705) | def get_input_embeddings(self):
    method forward (line 708) | def forward(
    method quantize (line 759) | def quantize(self, weight_bit_width: int):
  class ChatGLMForConditionalGeneration (line 765) | class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
    method __init__ (line 766) | def __init__(self, config: ChatGLMConfig, empty_init=True, device=None):
    method _update_model_kwargs_for_generation (line 777) | def _update_model_kwargs_for_generation(
    method prepare_inputs_for_generation (line 808) | def prepare_inputs_for_generation(
    method forward (line 831) | def forward(
    method _reorder_cache (line 892) | def _reorder_cache(
    method process_response (line 910) | def process_response(self, response):
    method build_inputs (line 915) | def build_inputs(self, tokenizer, query: str, history: List[Tuple[str,...
    method build_stream_inputs (line 924) | def build_stream_inputs(self, tokenizer, query: str, history: List[Tup...
    method chat (line 938) | def chat(self, tokenizer, query: str, history: List[Tuple[str, str]] =...
    method stream_chat (line 956) | def stream_chat(self, tokenizer, query: str, history: List[Tuple[str, ...
    method stream_generate (line 991) | def stream_generate(
    method quantize (line 1095) | def quantize(self, bits: int, empty_init=False, device=None, **kwargs):

FILE: chatglm2/quantization.py
  class Kernel (line 18) | class Kernel:
    method __init__ (line 19) | def __init__(self, code: bytes, function_names: List[str]):
  class W8A16Linear (line 44) | class W8A16Linear(torch.autograd.Function):
    method forward (line 46) | def forward(ctx, inp: torch.Tensor, quant_w: torch.Tensor, scale_w: to...
    method backward (line 58) | def backward(ctx, grad_output: torch.Tensor):
  function compress_int4_weight (line 67) | def compress_int4_weight(weight: torch.Tensor):  # (n, m)
  function extract_weight_to_half (line 88) | def extract_weight_to_half(weight: torch.Tensor, scale_list: torch.Tenso...
  class QuantizedLinear (line 124) | class QuantizedLinear(torch.nn.Module):
    method __init__ (line 125) | def __init__(self, weight_bit_width: int, weight, bias=None, device="c...
    method forward (line 145) | def forward(self, input):
  function quantize (line 152) | def quantize(model, weight_bit_width, empty_init=False, device=None):

FILE: chatglm2/tokenization_chatglm.py
  class SPTokenizer (line 10) | class SPTokenizer:
    method __init__ (line 11) | def __init__(self, model_path: str):
    method tokenize (line 31) | def tokenize(self, s: str):
    method encode (line 34) | def encode(self, s: str, bos: bool = False, eos: bool = False) -> List...
    method decode (line 43) | def decode(self, t: List[int]) -> str:
    method decode_tokens (line 46) | def decode_tokens(self, tokens: List[str]) -> str:
    method convert_token_to_id (line 50) | def convert_token_to_id(self, token):
    method convert_id_to_token (line 56) | def convert_id_to_token(self, index):
  class ChatGLMTokenizer (line 63) | class ChatGLMTokenizer(PreTrainedTokenizer):
    method __init__ (line 68) | def __init__(self, vocab_file, padding_side="left", **kwargs):
    method get_command (line 80) | def get_command(self, token):
    method pad_token (line 87) | def pad_token(self) -> str:
    method pad_token_id (line 91) | def pad_token_id(self):
    method vocab_size (line 95) | def vocab_size(self):
    method get_vocab (line 98) | def get_vocab(self):
    method _tokenize (line 104) | def _tokenize(self, text, **kwargs):
    method _convert_token_to_id (line 107) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 111) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 115) | def convert_tokens_to_string(self, tokens: List[str]) -> str:
    method save_vocabulary (line 118) | def save_vocabulary(self, save_directory, filename_prefix=None):
    method get_prefix_tokens (line 146) | def get_prefix_tokens(self):
    method build_inputs_with_special_tokens (line 150) | def build_inputs_with_special_tokens(
    method _pad (line 175) | def _pad(

FILE: chatglm3/configuration_chatglm.py
  class ChatGLMConfig (line 4) | class ChatGLMConfig(PretrainedConfig):
    method __init__ (line 6) | def __init__(

FILE: chatglm3/modeling_chatglm.py
  function default_init (line 49) | def default_init(cls, *args, **kwargs):
  class InvalidScoreLogitsProcessor (line 53) | class InvalidScoreLogitsProcessor(LogitsProcessor):
    method __call__ (line 54) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
  class PrefixEncoder (line 61) | class PrefixEncoder(torch.nn.Module):
    method __init__ (line 68) | def __init__(self, config: ChatGLMConfig):
    method forward (line 84) | def forward(self, prefix: torch.Tensor):
  function split_tensor_along_last_dim (line 93) | def split_tensor_along_last_dim(
  class RotaryEmbedding (line 121) | class RotaryEmbedding(nn.Module):
    method __init__ (line 122) | def __init__(self, dim, original_impl=False, device=None, dtype=None):
    method forward_impl (line 129) | def forward_impl(
    method forward (line 154) | def forward(self, max_seq_len, offset=0):
  function apply_rotary_pos_emb (line 161) | def apply_rotary_pos_emb(x: torch.Tensor, rope_cache: torch.Tensor) -> t...
  class RMSNorm (line 181) | class RMSNorm(torch.nn.Module):
    method __init__ (line 182) | def __init__(self, normalized_shape, eps=1e-5, device=None, dtype=None...
    method forward (line 187) | def forward(self, hidden_states: torch.Tensor):
  class CoreAttention (line 195) | class CoreAttention(torch.nn.Module):
    method __init__ (line 196) | def __init__(self, config: ChatGLMConfig, layer_number):
    method forward (line 221) | def forward(self, query_layer, key_layer, value_layer, attention_mask):
  class SelfAttention (line 313) | class SelfAttention(torch.nn.Module):
    method __init__ (line 320) | def __init__(self, config: ChatGLMConfig, layer_number, device=None):
    method _allocate_memory (line 349) | def _allocate_memory(self, inference_max_sequence_len, batch_size, dev...
    method forward (line 363) | def forward(
  function _config_to_kwargs (line 452) | def _config_to_kwargs(args):
  class MLP (line 459) | class MLP(torch.nn.Module):
    method __init__ (line 467) | def __init__(self, config: ChatGLMConfig, device=None):
    method forward (line 496) | def forward(self, hidden_states):
  class GLMBlock (line 505) | class GLMBlock(torch.nn.Module):
    method __init__ (line 512) | def __init__(self, config: ChatGLMConfig, layer_number, device=None):
    method forward (line 536) | def forward(
  class GLMTransformer (line 579) | class GLMTransformer(torch.nn.Module):
    method __init__ (line 582) | def __init__(self, config: ChatGLMConfig, device=None):
    method _get_layer (line 605) | def _get_layer(self, layer_number):
    method forward (line 608) | def forward(
  class ChatGLMPreTrainedModel (line 661) | class ChatGLMPreTrainedModel(PreTrainedModel):
    method _init_weights (line 673) | def _init_weights(self, module: nn.Module):
    method get_masks (line 677) | def get_masks(self, input_ids, past_key_values, padding_mask=None):
    method get_position_ids (line 695) | def get_position_ids(self, input_ids, device):
    method _set_gradient_checkpointing (line 700) | def _set_gradient_checkpointing(self, module, value=False):
  class Embedding (line 705) | class Embedding(torch.nn.Module):
    method __init__ (line 708) | def __init__(self, config: ChatGLMConfig, device=None):
    method forward (line 721) | def forward(self, input_ids):
  class ChatGLMModel (line 733) | class ChatGLMModel(ChatGLMPreTrainedModel):
    method __init__ (line 734) | def __init__(self, config: ChatGLMConfig, device=None, empty_init=True):
    method get_input_embeddings (line 768) | def get_input_embeddings(self):
    method get_prompt (line 771) | def get_prompt(self, batch_size, device, dtype=torch.half):
    method forward (line 786) | def forward(
    method quantize (line 845) | def quantize(self, weight_bit_width: int):
  class ChatGLMForConditionalGeneration (line 851) | class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
    method __init__ (line 852) | def __init__(self, config: ChatGLMConfig, empty_init=True, device=None):
    method _update_model_kwargs_for_generation (line 863) | def _update_model_kwargs_for_generation(
    method prepare_inputs_for_generation (line 894) | def prepare_inputs_for_generation(
    method forward (line 920) | def forward(
    method _reorder_cache (line 981) | def _reorder_cache(
    method process_response (line 999) | def process_response(self, output, history):
    method chat (line 1024) | def chat(self, tokenizer, query: str, history: List[Dict] = None, role...
    method stream_chat (line 1046) | def stream_chat(self, tokenizer, query: str, history: List[Dict] = Non...
    method stream_generate (line 1087) | def stream_generate(
    method quantize (line 1194) | def quantize(self, bits: int, empty_init=False, device=None, **kwargs):
  class ChatGLMForSequenceClassification (line 1213) | class ChatGLMForSequenceClassification(ChatGLMPreTrainedModel):
    method __init__ (line 1214) | def __init__(self, config: ChatGLMConfig, empty_init=True, device=None):
    method forward (line 1230) | def forward(

FILE: chatglm3/quantization.py
  class Kernel (line 18) | class Kernel:
    method __init__ (line 19) | def __init__(self, code: bytes, function_names: List[str]):
  class W8A16Linear (line 44) | class W8A16Linear(torch.autograd.Function):
    method forward (line 46) | def forward(ctx, inp: torch.Tensor, quant_w: torch.Tensor, scale_w: to...
    method backward (line 58) | def backward(ctx, grad_output: torch.Tensor):
  function compress_int4_weight (line 67) | def compress_int4_weight(weight: torch.Tensor):  # (n, m)
  function extract_weight_to_half (line 88) | def extract_weight_to_half(weight: torch.Tensor, scale_list: torch.Tenso...
  class QuantizedLinear (line 124) | class QuantizedLinear(torch.nn.Module):
    method __init__ (line 125) | def __init__(self, weight_bit_width: int, weight, bias=None, device="c...
    method forward (line 145) | def forward(self, input):
  function quantize (line 152) | def quantize(model, weight_bit_width, empty_init=False, device=None):

FILE: chatglm3/tokenization_chatglm.py
  class SPTokenizer (line 11) | class SPTokenizer:
    method __init__ (line 12) | def __init__(self, model_path: str):
    method tokenize (line 34) | def tokenize(self, s: str, encode_special_tokens=False):
    method encode (line 49) | def encode(self, s: str, bos: bool = False, eos: bool = False) -> List...
    method decode (line 58) | def decode(self, t: List[int]) -> str:
    method decode_tokens (line 72) | def decode_tokens(self, tokens: List[str]) -> str:
    method convert_token_to_id (line 76) | def convert_token_to_id(self, token):
    method convert_id_to_token (line 82) | def convert_id_to_token(self, index):
  class ChatGLMTokenizer (line 91) | class ChatGLMTokenizer(PreTrainedTokenizer):
    method __init__ (line 96) | def __init__(self, vocab_file, padding_side="left", clean_up_tokenizat...
    method get_command (line 112) | def get_command(self, token):
    method unk_token (line 119) | def unk_token(self) -> str:
    method pad_token (line 123) | def pad_token(self) -> str:
    method pad_token_id (line 127) | def pad_token_id(self):
    method eos_token (line 131) | def eos_token(self) -> str:
    method eos_token_id (line 135) | def eos_token_id(self):
    method vocab_size (line 139) | def vocab_size(self):
    method get_vocab (line 142) | def get_vocab(self):
    method _tokenize (line 148) | def _tokenize(self, text, **kwargs):
    method _convert_token_to_id (line 151) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 155) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 159) | def convert_tokens_to_string(self, tokens: List[str]) -> str:
    method save_vocabulary (line 162) | def save_vocabulary(self, save_directory, filename_prefix=None):
    method get_prefix_tokens (line 190) | def get_prefix_tokens(self):
    method build_single_message (line 194) | def build_single_message(self, role, metadata, message):
    method build_chat_input (line 201) | def build_chat_input(self, query, history=None, role="user"):
    method build_inputs_with_special_tokens (line 214) | def build_inputs_with_special_tokens(
    method _pad (line 239) | def _pad(

FILE: glm4/configuration_chatglm.py
  class ChatGLMConfig (line 4) | class ChatGLMConfig(PretrainedConfig):
    method __init__ (line 7) | def __init__(

FILE: glm4/modeling_chatglm.py
  function default_init (line 53) | def default_init(cls, *args, **kwargs):
  class InvalidScoreLogitsProcessor (line 57) | class InvalidScoreLogitsProcessor(LogitsProcessor):
    method __call__ (line 58) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
  function split_tensor_along_last_dim (line 65) | def split_tensor_along_last_dim(
  class RotaryEmbedding (line 93) | class RotaryEmbedding(nn.Module):
    method __init__ (line 94) | def __init__(self, dim, rope_ratio=1, original_impl=False, device=None...
    method forward_impl (line 102) | def forward_impl(
    method forward (line 128) | def forward(self, max_seq_len, offset=0):
  function apply_rotary_pos_emb (line 135) | def apply_rotary_pos_emb(x: torch.Tensor, rope_cache: torch.Tensor) -> t...
  class RMSNorm (line 155) | class RMSNorm(torch.nn.Module):
    method __init__ (line 156) | def __init__(self, normalized_shape, eps=1e-5, device=None, dtype=None...
    method forward (line 161) | def forward(self, hidden_states: torch.Tensor):
  class CoreAttention (line 169) | class CoreAttention(torch.nn.Module):
    method __init__ (line 170) | def __init__(self, config: ChatGLMConfig, layer_number):
    method forward (line 196) | def forward(self, query_layer, key_layer, value_layer, attention_mask):
  class SdpaAttention (line 268) | class SdpaAttention(CoreAttention):
    method forward (line 269) | def forward(self, query_layer, key_layer, value_layer, attention_mask):
  function _get_unpad_data (line 286) | def _get_unpad_data(attention_mask):
  class FlashAttention2 (line 299) | class FlashAttention2(CoreAttention):
    method __init__ (line 300) | def __init__(self, *args, **kwargs):
    method forward (line 304) | def forward(self, query_states, key_states, value_states, attention_ma...
    method _upad_input (line 345) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m...
  class SelfAttention (line 391) | class SelfAttention(torch.nn.Module):
    method __init__ (line 398) | def __init__(self, config: ChatGLMConfig, layer_number, device=None):
    method _allocate_memory (line 427) | def _allocate_memory(self, inference_max_sequence_len, batch_size, dev...
    method forward (line 441) | def forward(
  function _config_to_kwargs (line 537) | def _config_to_kwargs(args):
  class MLP (line 544) | class MLP(torch.nn.Module):
    method __init__ (line 552) | def __init__(self, config: ChatGLMConfig, device=None):
    method forward (line 581) | def forward(self, hidden_states):
  class GLMBlock (line 590) | class GLMBlock(torch.nn.Module):
    method __init__ (line 597) | def __init__(self, config: ChatGLMConfig, layer_number, device=None):
    method forward (line 621) | def forward(
  class GLMTransformer (line 664) | class GLMTransformer(torch.nn.Module):
    method __init__ (line 667) | def __init__(self, config: ChatGLMConfig, device=None):
    method _get_layer (line 690) | def _get_layer(self, layer_number):
    method forward (line 693) | def forward(
  class ChatGLMPreTrainedModel (line 755) | class ChatGLMPreTrainedModel(PreTrainedModel):
    method _init_weights (line 769) | def _init_weights(self, module: nn.Module):
    method get_masks (line 773) | def get_masks(self, input_ids, past_key_values, padding_mask=None):
    method get_position_ids (line 795) | def get_position_ids(self, input_ids, device):
    method gradient_checkpointing_enable (line 800) | def gradient_checkpointing_enable(self, gradient_checkpointing_kwargs=...
  class Embedding (line 805) | class Embedding(torch.nn.Module):
    method __init__ (line 808) | def __init__(self, config: ChatGLMConfig, device=None):
    method forward (line 821) | def forward(self, input_ids):
  class ChatGLMModel (line 831) | class ChatGLMModel(ChatGLMPreTrainedModel):
    method __init__ (line 832) | def __init__(self, config: ChatGLMConfig, device=None, empty_init=True):
    method get_input_embeddings (line 859) | def get_input_embeddings(self):
    method set_input_embeddings (line 862) | def set_input_embeddings(self, value):
    method forward (line 865) | def forward(
  class ChatGLMForConditionalGeneration (line 923) | class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
    method __init__ (line 924) | def __init__(self, config: ChatGLMConfig, empty_init=True, device=None):
    method _update_model_kwargs_for_generation (line 931) | def _update_model_kwargs_for_generation(
    method prepare_inputs_for_generation (line 962) | def prepare_inputs_for_generation(
    method forward (line 988) | def forward(
    method _reorder_cache (line 1048) | def _reorder_cache(
    method process_response (line 1066) | def process_response(self, output, history):
    method chat (line 1088) | def chat(self, tokenizer, query: str, history: List[Dict] = None, role...
    method stream_chat (line 1111) | def stream_chat(self, tokenizer, query: str, history: List[Dict] = Non...
    method stream_generate (line 1154) | def stream_generate(
  class ChatGLMForSequenceClassification (line 1262) | class ChatGLMForSequenceClassification(ChatGLMPreTrainedModel):
    method __init__ (line 1263) | def __init__(self, config: ChatGLMConfig, empty_init=True, device=None):
    method forward (line 1276) | def forward(

FILE: glm4/tokenization_chatglm.py
  class ChatGLM4Tokenizer (line 13) | class ChatGLM4Tokenizer(PreTrainedTokenizer):
    method __init__ (line 17) | def __init__(
    method vocab_size (line 57) | def vocab_size(self):
    method get_vocab (line 60) | def get_vocab(self):
    method convert_tokens_to_string (line 66) | def convert_tokens_to_string(self, tokens: List[Union[bytes, str, int]...
    method _tokenize (line 86) | def _tokenize(self, text, **kwargs):
    method _convert_token_to_id (line 93) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 97) | def _convert_id_to_token(self, index):
    method save_vocabulary (line 101) | def save_vocabulary(self, save_directory, filename_prefix=None):
    method get_prefix_tokens (line 129) | def get_prefix_tokens(self):
    method build_single_message (line 133) | def build_single_message(self, role, metadata, message, tokenize=True):
    method build_inputs_with_special_tokens (line 237) | def build_inputs_with_special_tokens(
    method _pad (line 262) | def _pad(

FILE: gptq/gptq.py
  class GPTQ (line 17) | class GPTQ:
    method __init__ (line 18) | def __init__(self, layer):
    method add_batch (line 31) | def add_batch(self, inp, out):
    method fasterquant (line 59) | def fasterquant(
    method free (line 156) | def free(self):

FILE: gptq/llama.py
  function get_llama (line 11) | def get_llama(model):
  function llama_sequential (line 24) | def llama_sequential(model, dataloader, dev):
  function llama_eval (line 126) | def llama_eval(model, testenc, dev):
  function llama_pack (line 219) | def llama_pack(model, quantizers, wbits, groupsize):
  function load_quant (line 233) | def load_quant(model, checkpoint, wbits, groupsize=-1,faster_kernel=False):
  function llama_multigpu (line 267) | def llama_multigpu(model, gpus):
  function benchmark (line 298) | def benchmark(model, input_ids, check=False):

FILE: gptq/llama_inference.py
  function get_llama (line 14) | def get_llama(model):
  function load_quant (line 26) | def load_quant(model, checkpoint, wbits, groupsize):

FILE: gptq/modelutils.py
  function find_layers (line 8) | def find_layers(module, layers=[nn.Conv2d, nn.Linear], name=''):

FILE: gptq/quant.py
  function quantize (line 6) | def quantize(x, scale, zero, maxq):
  class Quantizer (line 10) | class Quantizer(nn.Module):
    method __init__ (line 12) | def __init__(self, shape=1):
    method configure (line 18) | def configure(
    method find_params (line 31) | def find_params(self, x, weight=False):
    method quantize (line 110) | def quantize(self, x):
    method enabled (line 115) | def enabled(self):
    method ready (line 118) | def ready(self):
  class QuantLinear (line 128) | class QuantLinear(nn.Module):
    method __init__ (line 129) | def __init__(self, bits, groupsize, infeatures, outfeatures):
    method pack (line 148) | def pack(self, linear, scales, zeros):
    method forward (line 235) | def forward(self, x):
  function make_quant (line 269) | def make_quant(module, names, bits, groupsize, name=''):

FILE: gptq/quant_cuda.cpp
  function vecquant2matmul (line 11) | void vecquant2matmul(
  function vecquant3matmul (line 26) | void vecquant3matmul(
  function vecquant4matmul (line 41) | void vecquant4matmul(
  function vecquant8matmul (line 56) | void vecquant8matmul(
  function PYBIND11_MODULE (line 65) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: predictors/base.py
  function parse_codeblock (line 5) | def parse_codeblock(text):
  class BasePredictor (line 20) | class BasePredictor(ABC):
    method __init__ (line 23) | def __init__(self, model_name, predict_mode='tuple'):
    method stream_chat_continue (line 30) | def stream_chat_continue(self, *args, **kwargs):
    method predict_continue (line 33) | def predict_continue(self, *args, **kwargs):
    method predict_continue_tuple (line 39) | def predict_continue_tuple(self, query, latest_message, max_length, to...
    method predict_continue_dict (line 65) | def predict_continue_dict(self, query, latest_message, max_length, top_p,

FILE: predictors/chatglm2_predictor.py
  class InvalidScoreLogitsProcessor (line 11) | class InvalidScoreLogitsProcessor(LogitsProcessor):
    method __call__ (line 12) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
  class ChatGLM2 (line 19) | class ChatGLM2(BasePredictor):
    method __init__ (line 21) | def __init__(self, model_name):
    method stream_chat_continue (line 64) | def stream_chat_continue(self,
  function test (line 125) | def test():
  function test2 (line 144) | def test2():

FILE: predictors/chatglm3_predictor.py
  class InvalidScoreLogitsProcessor (line 12) | class InvalidScoreLogitsProcessor(LogitsProcessor):
    method __call__ (line 14) | def __call__(self, input_ids: torch.LongTensor,
  class ChatGLM3 (line 22) | class ChatGLM3(BasePredictor):
    method __init__ (line 24) | def __init__(self, model_name):
    method stream_chat_continue (line 65) | def stream_chat_continue(self,
  function test (line 141) | def test():
  function test2 (line 166) | def test2():

FILE: predictors/chatglm_predictor.py
  class InvalidScoreLogitsProcessor (line 11) | class InvalidScoreLogitsProcessor(LogitsProcessor):
    method __init__ (line 13) | def __init__(self, start_pos=5):
    method __call__ (line 16) | def __call__(self, input_ids: torch.LongTensor,
  class ChatGLM (line 24) | class ChatGLM(BasePredictor):
    method __init__ (line 26) | def __init__(self, model_name):
    method stream_chat_continue (line 69) | def stream_chat_continue(self,
  function test (line 140) | def test():

FILE: predictors/debug.py
  class Debug (line 1) | class Debug:
    method __init__ (line 2) | def __init__(self, *args, **kwargs):
    method inference (line 5) | def inference(self, *args, **kwargs):
    method predict_continue (line 16) | def predict_continue(self, *args, **kwargs):

FILE: predictors/glm4_predictor.py
  class InvalidScoreLogitsProcessor (line 13) | class InvalidScoreLogitsProcessor(LogitsProcessor):
    method __call__ (line 15) | def __call__(self, input_ids: torch.LongTensor,
  class GLM4 (line 23) | class GLM4(BasePredictor):
    method __init__ (line 25) | def __init__(self, model_name, int4=False):
    method stream_chat_continue (line 67) | def stream_chat_continue(self,
  function test (line 158) | def test():
  function test2 (line 183) | def test2():

FILE: predictors/llama.py
  function stream_generate (line 18) | def stream_generate(
  class LLaMa (line 120) | class LLaMa(BasePredictor):
    method __init__ (line 122) | def __init__(self, model_name):
    method stream_chat_continue (line 141) | def stream_chat_continue(self,
  function test (line 197) | def test():

FILE: predictors/llama_gptq.py
  class LLaMaGPTQ (line 13) | class LLaMaGPTQ(LLaMa):
    method __init__ (line 14) | def __init__(self, model_name, checkpoint_path='llama7b-2m-4bit-128g.p...
  function test (line 32) | def test():

FILE: test_fastapi.py
  function event_source_response_iterator (line 17) | def event_source_response_iterator(response):

FILE: test_models.py
  function test_model (line 5) | def test_model(model_name):
  function main (line 46) | def main():

FILE: utils_env.py
  function collect_env (line 1) | def collect_env():