SYMBOL INDEX (467 symbols across 34 files) FILE: app.py function revise (line 40) | def revise(history, latest_message): function revoke (line 48) | def revoke(history, last_state): function interrupt (line 57) | def interrupt(allow_generate): function regenerate (line 61) | def regenerate(last_state, max_length, top_p, temperature, allow_generate): FILE: app_fastapi.py function getLogger (line 34) | def getLogger(name, file_name, use_formatter=True): function start_server (line 59) | def start_server(quantize_level, http_address: str, port: int, gpu_id: s... FILE: chatglm/configuration_chatglm.py class ChatGLMConfig (line 9) | class ChatGLMConfig(PretrainedConfig): method __init__ (line 59) | def __init__( FILE: chatglm/modeling_chatglm.py class InvalidScoreLogitsProcessor (line 54) | class InvalidScoreLogitsProcessor(LogitsProcessor): method __call__ (line 55) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen... function load_tf_weights_in_chatglm_6b (line 62) | def load_tf_weights_in_chatglm_6b(model, config, tf_checkpoint_path): class PrefixEncoder (line 136) | class PrefixEncoder(torch.nn.Module): method __init__ (line 143) | def __init__(self, config): method forward (line 157) | def forward(self, prefix: torch.Tensor): function gelu_impl (line 167) | def gelu_impl(x): function gelu (line 173) | def gelu(x): class RotaryEmbedding (line 177) | class RotaryEmbedding(torch.nn.Module): method __init__ (line 178) | def __init__(self, dim, base=10000, precision=torch.half, learnable=Fa... method _load_from_state_dict (line 194) | def _load_from_state_dict(self, state_dict, prefix, local_metadata, st... method forward (line 198) | def forward(self, x, seq_dim=1, seq_len=None): method _apply (line 221) | def _apply(self, fn): function rotate_half (line 229) | def rotate_half(x): function apply_rotary_pos_emb_index (line 235) | def apply_rotary_pos_emb_index(q, k, cos, sin, position_id): function attention_fn (line 243) | def attention_fn( function default_init (line 350) | def default_init(cls, *args, **kwargs): class SelfAttention (line 354) | class SelfAttention(torch.nn.Module): method __init__ (line 355) | def __init__(self, hidden_size, num_attention_heads, method attention_mask_func (line 406) | def attention_mask_func(attention_scores, attention_mask): method split_tensor_along_last_dim (line 410) | def split_tensor_along_last_dim(self, tensor, num_partitions, method forward (line 430) | def forward( class GEGLU (line 497) | class GEGLU(torch.nn.Module): method __init__ (line 498) | def __init__(self): method forward (line 502) | def forward(self, x): class GLU (line 508) | class GLU(torch.nn.Module): method __init__ (line 509) | def __init__(self, hidden_size, inner_hidden_size=None, method forward (line 540) | def forward(self, hidden_states): class GLMBlock (line 555) | class GLMBlock(torch.nn.Module): method __init__ (line 556) | def __init__( method forward (line 608) | def forward( class ChatGLMPreTrainedModel (line 662) | class ChatGLMPreTrainedModel(PreTrainedModel): method __init__ (line 674) | def __init__(self, *inputs, **kwargs): method _init_weights (line 677) | def _init_weights(self, module: nn.Module): method get_masks (line 681) | def get_masks(self, input_ids, device): method get_position_ids (line 693) | def get_position_ids(self, input_ids, mask_positions, device, use_gmas... method _set_gradient_checkpointing (line 716) | def _set_gradient_checkpointing(self, module, value=False): class ChatGLMModel (line 786) | class ChatGLMModel(ChatGLMPreTrainedModel): method __init__ (line 802) | def __init__(self, config: ChatGLMConfig, empty_init=True): method get_input_embeddings (line 862) | def get_input_embeddings(self): method set_input_embeddings (line 865) | def set_input_embeddings(self, new_embeddings: torch.Tensor): method get_prompt (line 868) | def get_prompt(self, batch_size, device, dtype=torch.half): method forward (line 890) | def forward( class ChatGLMForConditionalGeneration (line 1032) | class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel): method __init__ (line 1033) | def __init__(self, config: ChatGLMConfig, empty_init=True): method get_output_embeddings (line 1064) | def get_output_embeddings(self): method set_output_embeddings (line 1067) | def set_output_embeddings(self, new_embeddings): method _update_model_kwargs_for_generation (line 1070) | def _update_model_kwargs_for_generation( method prepare_inputs_for_generation (line 1105) | def prepare_inputs_for_generation( method forward (line 1175) | def forward( method _reorder_cache (line 1234) | def _reorder_cache( method process_response (line 1252) | def process_response(self, response): method chat (line 1268) | def chat(self, tokenizer, query: str, history: List[Tuple[str, str]] =... method stream_chat (line 1294) | def stream_chat(self, tokenizer, query: str, history: List[Tuple[str, ... method stream_generate (line 1320) | def stream_generate( method quantize (line 1421) | def quantize(self, bits: int, empty_init=False, **kwargs): FILE: chatglm/quantization.py class Kernel (line 18) | class Kernel: method __init__ (line 19) | def __init__(self, code: bytes, function_names: List[str]): class W8A16Linear (line 44) | class W8A16Linear(torch.autograd.Function): method forward (line 46) | def forward(ctx, inp: torch.Tensor, quant_w: torch.Tensor, scale_w: to... method backward (line 58) | def backward(ctx, grad_output: torch.Tensor): function compress_int4_weight (line 67) | def compress_int4_weight(weight: torch.Tensor): # (n, m) function extract_weight_to_half (line 88) | def extract_weight_to_half(weight: torch.Tensor, scale_list: torch.Tenso... class QuantizedLinear (line 120) | class QuantizedLinear(Linear): method __init__ (line 121) | def __init__(self, weight_bit_width: int, weight_tensor=None, bias_ten... method forward (line 146) | def forward(self, input): function quantize (line 153) | def quantize(model, weight_bit_width, empty_init=False, **kwargs): FILE: chatglm/tokenization_chatglm.py class TextTokenizer (line 19) | class TextTokenizer: method __init__ (line 20) | def __init__(self, model_path): method encode (line 25) | def encode(self, text): method decode (line 28) | def decode(self, ids: List[int]): method tokenize (line 31) | def tokenize(self, text): method convert_tokens_to_ids (line 34) | def convert_tokens_to_ids(self, tokens): method convert_token_to_id (line 37) | def convert_token_to_id(self, token): method convert_id_to_token (line 40) | def convert_id_to_token(self, idx): method __len__ (line 43) | def __len__(self): class SPTokenizer (line 47) | class SPTokenizer: method __init__ (line 48) | def __init__( method _get_text_tokenizer (line 63) | def _get_text_tokenizer(self): method get_blank_token (line 67) | def get_blank_token(length: int): method get_tab_token (line 72) | def get_tab_token(): method num_text_tokens (line 76) | def num_text_tokens(self): method num_tokens (line 80) | def num_tokens(self): method _encode_whitespaces (line 84) | def _encode_whitespaces(text: str, max_len: int = 80): method _preprocess (line 90) | def _preprocess(self, text: str, linebreak=True, whitespaces=True): method encode (line 97) | def encode( method decode (line 114) | def decode(self, text_ids: List[int]) -> str: method tokenize (line 124) | def tokenize( method __getitem__ (line 140) | def __getitem__(self, x: Union[int, str]): class ChatGLMTokenizer (line 155) | class ChatGLMTokenizer(PreTrainedTokenizer): method __init__ (line 168) | def __init__( method gmask_token_id (line 214) | def gmask_token_id(self) -> Optional[int]: method end_token_id (line 220) | def end_token_id(self) -> Optional[int]: method vocab_size (line 230) | def vocab_size(self): method get_vocab (line 234) | def get_vocab(self): method preprocess_text (line 240) | def preprocess_text(self, inputs): method _tokenize (line 251) | def _tokenize(self, text, **kwargs): method _decode (line 259) | def _decode( method _convert_token_to_id (line 274) | def _convert_token_to_id(self, token): method _convert_id_to_token (line 278) | def _convert_id_to_token(self, index): method save_vocabulary (line 282) | def save_vocabulary(self, save_directory, filename_prefix=None): method build_inputs_with_special_tokens (line 310) | def build_inputs_with_special_tokens( method _pad (line 336) | def _pad( FILE: chatglm2/configuration_chatglm.py class ChatGLMConfig (line 4) | class ChatGLMConfig(PretrainedConfig): method __init__ (line 6) | def __init__( FILE: chatglm2/modeling_chatglm.py function default_init (line 47) | def default_init(cls, *args, **kwargs): class InvalidScoreLogitsProcessor (line 51) | class InvalidScoreLogitsProcessor(LogitsProcessor): method __call__ (line 52) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen... function split_tensor_along_last_dim (line 59) | def split_tensor_along_last_dim( class RotaryEmbedding (line 87) | class RotaryEmbedding(nn.Module): method __init__ (line 88) | def __init__(self, dim, original_impl=False, device=None, dtype=None): method forward_impl (line 95) | def forward_impl( method forward (line 120) | def forward(self, max_seq_len, offset=0): function apply_rotary_pos_emb (line 127) | def apply_rotary_pos_emb(x: torch.Tensor, rope_cache: torch.Tensor) -> t... class RMSNorm (line 147) | class RMSNorm(torch.nn.Module): method __init__ (line 148) | def __init__(self, normalized_shape, eps=1e-5, device=None, dtype=None... method forward (line 153) | def forward(self, hidden_states: torch.Tensor): class CoreAttention (line 161) | class CoreAttention(torch.nn.Module): method __init__ (line 162) | def __init__(self, config: ChatGLMConfig, layer_number): method forward (line 187) | def forward(self, query_layer, key_layer, value_layer, attention_mask): class SelfAttention (line 279) | class SelfAttention(torch.nn.Module): method __init__ (line 286) | def __init__(self, config: ChatGLMConfig, layer_number, device=None): method _allocate_memory (line 315) | def _allocate_memory(self, inference_max_sequence_len, batch_size, dev... method forward (line 329) | def forward( function _config_to_kwargs (line 418) | def _config_to_kwargs(args): class MLP (line 425) | class MLP(torch.nn.Module): method __init__ (line 433) | def __init__(self, config: ChatGLMConfig, device=None): method forward (line 462) | def forward(self, hidden_states): class GLMBlock (line 471) | class GLMBlock(torch.nn.Module): method __init__ (line 478) | def __init__(self, config: ChatGLMConfig, layer_number, device=None): method forward (line 502) | def forward( class GLMTransformer (line 545) | class GLMTransformer(torch.nn.Module): method __init__ (line 548) | def __init__(self, config: ChatGLMConfig, device=None): method _get_layer (line 569) | def _get_layer(self, layer_number): method forward (line 572) | def forward( class ChatGLMPreTrainedModel (line 608) | class ChatGLMPreTrainedModel(PreTrainedModel): method _init_weights (line 620) | def _init_weights(self, module: nn.Module): method get_masks (line 624) | def get_masks(self, input_ids, past_key_values, padding_mask=None): method get_position_ids (line 642) | def get_position_ids(self, input_ids, device): method _set_gradient_checkpointing (line 647) | def _set_gradient_checkpointing(self, module, value=False): class Embedding (line 652) | class Embedding(torch.nn.Module): method __init__ (line 655) | def __init__(self, config: ChatGLMConfig, device=None): method forward (line 668) | def forward(self, input_ids): class ChatGLMModel (line 680) | class ChatGLMModel(ChatGLMPreTrainedModel): method __init__ (line 681) | def __init__(self, config: ChatGLMConfig, device=None, empty_init=True): method get_input_embeddings (line 705) | def get_input_embeddings(self): method forward (line 708) | def forward( method quantize (line 759) | def quantize(self, weight_bit_width: int): class ChatGLMForConditionalGeneration (line 765) | class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel): method __init__ (line 766) | def __init__(self, config: ChatGLMConfig, empty_init=True, device=None): method _update_model_kwargs_for_generation (line 777) | def _update_model_kwargs_for_generation( method prepare_inputs_for_generation (line 808) | def prepare_inputs_for_generation( method forward (line 831) | def forward( method _reorder_cache (line 892) | def _reorder_cache( method process_response (line 910) | def process_response(self, response): method build_inputs (line 915) | def build_inputs(self, tokenizer, query: str, history: List[Tuple[str,... method build_stream_inputs (line 924) | def build_stream_inputs(self, tokenizer, query: str, history: List[Tup... method chat (line 938) | def chat(self, tokenizer, query: str, history: List[Tuple[str, str]] =... method stream_chat (line 956) | def stream_chat(self, tokenizer, query: str, history: List[Tuple[str, ... method stream_generate (line 991) | def stream_generate( method quantize (line 1095) | def quantize(self, bits: int, empty_init=False, device=None, **kwargs): FILE: chatglm2/quantization.py class Kernel (line 18) | class Kernel: method __init__ (line 19) | def __init__(self, code: bytes, function_names: List[str]): class W8A16Linear (line 44) | class W8A16Linear(torch.autograd.Function): method forward (line 46) | def forward(ctx, inp: torch.Tensor, quant_w: torch.Tensor, scale_w: to... method backward (line 58) | def backward(ctx, grad_output: torch.Tensor): function compress_int4_weight (line 67) | def compress_int4_weight(weight: torch.Tensor): # (n, m) function extract_weight_to_half (line 88) | def extract_weight_to_half(weight: torch.Tensor, scale_list: torch.Tenso... class QuantizedLinear (line 124) | class QuantizedLinear(torch.nn.Module): method __init__ (line 125) | def __init__(self, weight_bit_width: int, weight, bias=None, device="c... method forward (line 145) | def forward(self, input): function quantize (line 152) | def quantize(model, weight_bit_width, empty_init=False, device=None): FILE: chatglm2/tokenization_chatglm.py class SPTokenizer (line 10) | class SPTokenizer: method __init__ (line 11) | def __init__(self, model_path: str): method tokenize (line 31) | def tokenize(self, s: str): method encode (line 34) | def encode(self, s: str, bos: bool = False, eos: bool = False) -> List... method decode (line 43) | def decode(self, t: List[int]) -> str: method decode_tokens (line 46) | def decode_tokens(self, tokens: List[str]) -> str: method convert_token_to_id (line 50) | def convert_token_to_id(self, token): method convert_id_to_token (line 56) | def convert_id_to_token(self, index): class ChatGLMTokenizer (line 63) | class ChatGLMTokenizer(PreTrainedTokenizer): method __init__ (line 68) | def __init__(self, vocab_file, padding_side="left", **kwargs): method get_command (line 80) | def get_command(self, token): method pad_token (line 87) | def pad_token(self) -> str: method pad_token_id (line 91) | def pad_token_id(self): method vocab_size (line 95) | def vocab_size(self): method get_vocab (line 98) | def get_vocab(self): method _tokenize (line 104) | def _tokenize(self, text, **kwargs): method _convert_token_to_id (line 107) | def _convert_token_to_id(self, token): method _convert_id_to_token (line 111) | def _convert_id_to_token(self, index): method convert_tokens_to_string (line 115) | def convert_tokens_to_string(self, tokens: List[str]) -> str: method save_vocabulary (line 118) | def save_vocabulary(self, save_directory, filename_prefix=None): method get_prefix_tokens (line 146) | def get_prefix_tokens(self): method build_inputs_with_special_tokens (line 150) | def build_inputs_with_special_tokens( method _pad (line 175) | def _pad( FILE: chatglm3/configuration_chatglm.py class ChatGLMConfig (line 4) | class ChatGLMConfig(PretrainedConfig): method __init__ (line 6) | def __init__( FILE: chatglm3/modeling_chatglm.py function default_init (line 49) | def default_init(cls, *args, **kwargs): class InvalidScoreLogitsProcessor (line 53) | class InvalidScoreLogitsProcessor(LogitsProcessor): method __call__ (line 54) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen... class PrefixEncoder (line 61) | class PrefixEncoder(torch.nn.Module): method __init__ (line 68) | def __init__(self, config: ChatGLMConfig): method forward (line 84) | def forward(self, prefix: torch.Tensor): function split_tensor_along_last_dim (line 93) | def split_tensor_along_last_dim( class RotaryEmbedding (line 121) | class RotaryEmbedding(nn.Module): method __init__ (line 122) | def __init__(self, dim, original_impl=False, device=None, dtype=None): method forward_impl (line 129) | def forward_impl( method forward (line 154) | def forward(self, max_seq_len, offset=0): function apply_rotary_pos_emb (line 161) | def apply_rotary_pos_emb(x: torch.Tensor, rope_cache: torch.Tensor) -> t... class RMSNorm (line 181) | class RMSNorm(torch.nn.Module): method __init__ (line 182) | def __init__(self, normalized_shape, eps=1e-5, device=None, dtype=None... method forward (line 187) | def forward(self, hidden_states: torch.Tensor): class CoreAttention (line 195) | class CoreAttention(torch.nn.Module): method __init__ (line 196) | def __init__(self, config: ChatGLMConfig, layer_number): method forward (line 221) | def forward(self, query_layer, key_layer, value_layer, attention_mask): class SelfAttention (line 313) | class SelfAttention(torch.nn.Module): method __init__ (line 320) | def __init__(self, config: ChatGLMConfig, layer_number, device=None): method _allocate_memory (line 349) | def _allocate_memory(self, inference_max_sequence_len, batch_size, dev... method forward (line 363) | def forward( function _config_to_kwargs (line 452) | def _config_to_kwargs(args): class MLP (line 459) | class MLP(torch.nn.Module): method __init__ (line 467) | def __init__(self, config: ChatGLMConfig, device=None): method forward (line 496) | def forward(self, hidden_states): class GLMBlock (line 505) | class GLMBlock(torch.nn.Module): method __init__ (line 512) | def __init__(self, config: ChatGLMConfig, layer_number, device=None): method forward (line 536) | def forward( class GLMTransformer (line 579) | class GLMTransformer(torch.nn.Module): method __init__ (line 582) | def __init__(self, config: ChatGLMConfig, device=None): method _get_layer (line 605) | def _get_layer(self, layer_number): method forward (line 608) | def forward( class ChatGLMPreTrainedModel (line 661) | class ChatGLMPreTrainedModel(PreTrainedModel): method _init_weights (line 673) | def _init_weights(self, module: nn.Module): method get_masks (line 677) | def get_masks(self, input_ids, past_key_values, padding_mask=None): method get_position_ids (line 695) | def get_position_ids(self, input_ids, device): method _set_gradient_checkpointing (line 700) | def _set_gradient_checkpointing(self, module, value=False): class Embedding (line 705) | class Embedding(torch.nn.Module): method __init__ (line 708) | def __init__(self, config: ChatGLMConfig, device=None): method forward (line 721) | def forward(self, input_ids): class ChatGLMModel (line 733) | class ChatGLMModel(ChatGLMPreTrainedModel): method __init__ (line 734) | def __init__(self, config: ChatGLMConfig, device=None, empty_init=True): method get_input_embeddings (line 768) | def get_input_embeddings(self): method get_prompt (line 771) | def get_prompt(self, batch_size, device, dtype=torch.half): method forward (line 786) | def forward( method quantize (line 845) | def quantize(self, weight_bit_width: int): class ChatGLMForConditionalGeneration (line 851) | class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel): method __init__ (line 852) | def __init__(self, config: ChatGLMConfig, empty_init=True, device=None): method _update_model_kwargs_for_generation (line 863) | def _update_model_kwargs_for_generation( method prepare_inputs_for_generation (line 894) | def prepare_inputs_for_generation( method forward (line 920) | def forward( method _reorder_cache (line 981) | def _reorder_cache( method process_response (line 999) | def process_response(self, output, history): method chat (line 1024) | def chat(self, tokenizer, query: str, history: List[Dict] = None, role... method stream_chat (line 1046) | def stream_chat(self, tokenizer, query: str, history: List[Dict] = Non... method stream_generate (line 1087) | def stream_generate( method quantize (line 1194) | def quantize(self, bits: int, empty_init=False, device=None, **kwargs): class ChatGLMForSequenceClassification (line 1213) | class ChatGLMForSequenceClassification(ChatGLMPreTrainedModel): method __init__ (line 1214) | def __init__(self, config: ChatGLMConfig, empty_init=True, device=None): method forward (line 1230) | def forward( FILE: chatglm3/quantization.py class Kernel (line 18) | class Kernel: method __init__ (line 19) | def __init__(self, code: bytes, function_names: List[str]): class W8A16Linear (line 44) | class W8A16Linear(torch.autograd.Function): method forward (line 46) | def forward(ctx, inp: torch.Tensor, quant_w: torch.Tensor, scale_w: to... method backward (line 58) | def backward(ctx, grad_output: torch.Tensor): function compress_int4_weight (line 67) | def compress_int4_weight(weight: torch.Tensor): # (n, m) function extract_weight_to_half (line 88) | def extract_weight_to_half(weight: torch.Tensor, scale_list: torch.Tenso... class QuantizedLinear (line 124) | class QuantizedLinear(torch.nn.Module): method __init__ (line 125) | def __init__(self, weight_bit_width: int, weight, bias=None, device="c... method forward (line 145) | def forward(self, input): function quantize (line 152) | def quantize(model, weight_bit_width, empty_init=False, device=None): FILE: chatglm3/tokenization_chatglm.py class SPTokenizer (line 11) | class SPTokenizer: method __init__ (line 12) | def __init__(self, model_path: str): method tokenize (line 34) | def tokenize(self, s: str, encode_special_tokens=False): method encode (line 49) | def encode(self, s: str, bos: bool = False, eos: bool = False) -> List... method decode (line 58) | def decode(self, t: List[int]) -> str: method decode_tokens (line 72) | def decode_tokens(self, tokens: List[str]) -> str: method convert_token_to_id (line 76) | def convert_token_to_id(self, token): method convert_id_to_token (line 82) | def convert_id_to_token(self, index): class ChatGLMTokenizer (line 91) | class ChatGLMTokenizer(PreTrainedTokenizer): method __init__ (line 96) | def __init__(self, vocab_file, padding_side="left", clean_up_tokenizat... method get_command (line 112) | def get_command(self, token): method unk_token (line 119) | def unk_token(self) -> str: method pad_token (line 123) | def pad_token(self) -> str: method pad_token_id (line 127) | def pad_token_id(self): method eos_token (line 131) | def eos_token(self) -> str: method eos_token_id (line 135) | def eos_token_id(self): method vocab_size (line 139) | def vocab_size(self): method get_vocab (line 142) | def get_vocab(self): method _tokenize (line 148) | def _tokenize(self, text, **kwargs): method _convert_token_to_id (line 151) | def _convert_token_to_id(self, token): method _convert_id_to_token (line 155) | def _convert_id_to_token(self, index): method convert_tokens_to_string (line 159) | def convert_tokens_to_string(self, tokens: List[str]) -> str: method save_vocabulary (line 162) | def save_vocabulary(self, save_directory, filename_prefix=None): method get_prefix_tokens (line 190) | def get_prefix_tokens(self): method build_single_message (line 194) | def build_single_message(self, role, metadata, message): method build_chat_input (line 201) | def build_chat_input(self, query, history=None, role="user"): method build_inputs_with_special_tokens (line 214) | def build_inputs_with_special_tokens( method _pad (line 239) | def _pad( FILE: glm4/configuration_chatglm.py class ChatGLMConfig (line 4) | class ChatGLMConfig(PretrainedConfig): method __init__ (line 7) | def __init__( FILE: glm4/modeling_chatglm.py function default_init (line 53) | def default_init(cls, *args, **kwargs): class InvalidScoreLogitsProcessor (line 57) | class InvalidScoreLogitsProcessor(LogitsProcessor): method __call__ (line 58) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen... function split_tensor_along_last_dim (line 65) | def split_tensor_along_last_dim( class RotaryEmbedding (line 93) | class RotaryEmbedding(nn.Module): method __init__ (line 94) | def __init__(self, dim, rope_ratio=1, original_impl=False, device=None... method forward_impl (line 102) | def forward_impl( method forward (line 128) | def forward(self, max_seq_len, offset=0): function apply_rotary_pos_emb (line 135) | def apply_rotary_pos_emb(x: torch.Tensor, rope_cache: torch.Tensor) -> t... class RMSNorm (line 155) | class RMSNorm(torch.nn.Module): method __init__ (line 156) | def __init__(self, normalized_shape, eps=1e-5, device=None, dtype=None... method forward (line 161) | def forward(self, hidden_states: torch.Tensor): class CoreAttention (line 169) | class CoreAttention(torch.nn.Module): method __init__ (line 170) | def __init__(self, config: ChatGLMConfig, layer_number): method forward (line 196) | def forward(self, query_layer, key_layer, value_layer, attention_mask): class SdpaAttention (line 268) | class SdpaAttention(CoreAttention): method forward (line 269) | def forward(self, query_layer, key_layer, value_layer, attention_mask): function _get_unpad_data (line 286) | def _get_unpad_data(attention_mask): class FlashAttention2 (line 299) | class FlashAttention2(CoreAttention): method __init__ (line 300) | def __init__(self, *args, **kwargs): method forward (line 304) | def forward(self, query_states, key_states, value_states, attention_ma... method _upad_input (line 345) | def _upad_input(self, query_layer, key_layer, value_layer, attention_m... class SelfAttention (line 391) | class SelfAttention(torch.nn.Module): method __init__ (line 398) | def __init__(self, config: ChatGLMConfig, layer_number, device=None): method _allocate_memory (line 427) | def _allocate_memory(self, inference_max_sequence_len, batch_size, dev... method forward (line 441) | def forward( function _config_to_kwargs (line 537) | def _config_to_kwargs(args): class MLP (line 544) | class MLP(torch.nn.Module): method __init__ (line 552) | def __init__(self, config: ChatGLMConfig, device=None): method forward (line 581) | def forward(self, hidden_states): class GLMBlock (line 590) | class GLMBlock(torch.nn.Module): method __init__ (line 597) | def __init__(self, config: ChatGLMConfig, layer_number, device=None): method forward (line 621) | def forward( class GLMTransformer (line 664) | class GLMTransformer(torch.nn.Module): method __init__ (line 667) | def __init__(self, config: ChatGLMConfig, device=None): method _get_layer (line 690) | def _get_layer(self, layer_number): method forward (line 693) | def forward( class ChatGLMPreTrainedModel (line 755) | class ChatGLMPreTrainedModel(PreTrainedModel): method _init_weights (line 769) | def _init_weights(self, module: nn.Module): method get_masks (line 773) | def get_masks(self, input_ids, past_key_values, padding_mask=None): method get_position_ids (line 795) | def get_position_ids(self, input_ids, device): method gradient_checkpointing_enable (line 800) | def gradient_checkpointing_enable(self, gradient_checkpointing_kwargs=... class Embedding (line 805) | class Embedding(torch.nn.Module): method __init__ (line 808) | def __init__(self, config: ChatGLMConfig, device=None): method forward (line 821) | def forward(self, input_ids): class ChatGLMModel (line 831) | class ChatGLMModel(ChatGLMPreTrainedModel): method __init__ (line 832) | def __init__(self, config: ChatGLMConfig, device=None, empty_init=True): method get_input_embeddings (line 859) | def get_input_embeddings(self): method set_input_embeddings (line 862) | def set_input_embeddings(self, value): method forward (line 865) | def forward( class ChatGLMForConditionalGeneration (line 923) | class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel): method __init__ (line 924) | def __init__(self, config: ChatGLMConfig, empty_init=True, device=None): method _update_model_kwargs_for_generation (line 931) | def _update_model_kwargs_for_generation( method prepare_inputs_for_generation (line 962) | def prepare_inputs_for_generation( method forward (line 988) | def forward( method _reorder_cache (line 1048) | def _reorder_cache( method process_response (line 1066) | def process_response(self, output, history): method chat (line 1088) | def chat(self, tokenizer, query: str, history: List[Dict] = None, role... method stream_chat (line 1111) | def stream_chat(self, tokenizer, query: str, history: List[Dict] = Non... method stream_generate (line 1154) | def stream_generate( class ChatGLMForSequenceClassification (line 1262) | class ChatGLMForSequenceClassification(ChatGLMPreTrainedModel): method __init__ (line 1263) | def __init__(self, config: ChatGLMConfig, empty_init=True, device=None): method forward (line 1276) | def forward( FILE: glm4/tokenization_chatglm.py class ChatGLM4Tokenizer (line 13) | class ChatGLM4Tokenizer(PreTrainedTokenizer): method __init__ (line 17) | def __init__( method vocab_size (line 57) | def vocab_size(self): method get_vocab (line 60) | def get_vocab(self): method convert_tokens_to_string (line 66) | def convert_tokens_to_string(self, tokens: List[Union[bytes, str, int]... method _tokenize (line 86) | def _tokenize(self, text, **kwargs): method _convert_token_to_id (line 93) | def _convert_token_to_id(self, token): method _convert_id_to_token (line 97) | def _convert_id_to_token(self, index): method save_vocabulary (line 101) | def save_vocabulary(self, save_directory, filename_prefix=None): method get_prefix_tokens (line 129) | def get_prefix_tokens(self): method build_single_message (line 133) | def build_single_message(self, role, metadata, message, tokenize=True): method build_inputs_with_special_tokens (line 237) | def build_inputs_with_special_tokens( method _pad (line 262) | def _pad( FILE: gptq/gptq.py class GPTQ (line 17) | class GPTQ: method __init__ (line 18) | def __init__(self, layer): method add_batch (line 31) | def add_batch(self, inp, out): method fasterquant (line 59) | def fasterquant( method free (line 156) | def free(self): FILE: gptq/llama.py function get_llama (line 11) | def get_llama(model): function llama_sequential (line 24) | def llama_sequential(model, dataloader, dev): function llama_eval (line 126) | def llama_eval(model, testenc, dev): function llama_pack (line 219) | def llama_pack(model, quantizers, wbits, groupsize): function load_quant (line 233) | def load_quant(model, checkpoint, wbits, groupsize=-1,faster_kernel=False): function llama_multigpu (line 267) | def llama_multigpu(model, gpus): function benchmark (line 298) | def benchmark(model, input_ids, check=False): FILE: gptq/llama_inference.py function get_llama (line 14) | def get_llama(model): function load_quant (line 26) | def load_quant(model, checkpoint, wbits, groupsize): FILE: gptq/modelutils.py function find_layers (line 8) | def find_layers(module, layers=[nn.Conv2d, nn.Linear], name=''): FILE: gptq/quant.py function quantize (line 6) | def quantize(x, scale, zero, maxq): class Quantizer (line 10) | class Quantizer(nn.Module): method __init__ (line 12) | def __init__(self, shape=1): method configure (line 18) | def configure( method find_params (line 31) | def find_params(self, x, weight=False): method quantize (line 110) | def quantize(self, x): method enabled (line 115) | def enabled(self): method ready (line 118) | def ready(self): class QuantLinear (line 128) | class QuantLinear(nn.Module): method __init__ (line 129) | def __init__(self, bits, groupsize, infeatures, outfeatures): method pack (line 148) | def pack(self, linear, scales, zeros): method forward (line 235) | def forward(self, x): function make_quant (line 269) | def make_quant(module, names, bits, groupsize, name=''): FILE: gptq/quant_cuda.cpp function vecquant2matmul (line 11) | void vecquant2matmul( function vecquant3matmul (line 26) | void vecquant3matmul( function vecquant4matmul (line 41) | void vecquant4matmul( function vecquant8matmul (line 56) | void vecquant8matmul( function PYBIND11_MODULE (line 65) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: predictors/base.py function parse_codeblock (line 5) | def parse_codeblock(text): class BasePredictor (line 20) | class BasePredictor(ABC): method __init__ (line 23) | def __init__(self, model_name, predict_mode='tuple'): method stream_chat_continue (line 30) | def stream_chat_continue(self, *args, **kwargs): method predict_continue (line 33) | def predict_continue(self, *args, **kwargs): method predict_continue_tuple (line 39) | def predict_continue_tuple(self, query, latest_message, max_length, to... method predict_continue_dict (line 65) | def predict_continue_dict(self, query, latest_message, max_length, top_p, FILE: predictors/chatglm2_predictor.py class InvalidScoreLogitsProcessor (line 11) | class InvalidScoreLogitsProcessor(LogitsProcessor): method __call__ (line 12) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen... class ChatGLM2 (line 19) | class ChatGLM2(BasePredictor): method __init__ (line 21) | def __init__(self, model_name): method stream_chat_continue (line 64) | def stream_chat_continue(self, function test (line 125) | def test(): function test2 (line 144) | def test2(): FILE: predictors/chatglm3_predictor.py class InvalidScoreLogitsProcessor (line 12) | class InvalidScoreLogitsProcessor(LogitsProcessor): method __call__ (line 14) | def __call__(self, input_ids: torch.LongTensor, class ChatGLM3 (line 22) | class ChatGLM3(BasePredictor): method __init__ (line 24) | def __init__(self, model_name): method stream_chat_continue (line 65) | def stream_chat_continue(self, function test (line 141) | def test(): function test2 (line 166) | def test2(): FILE: predictors/chatglm_predictor.py class InvalidScoreLogitsProcessor (line 11) | class InvalidScoreLogitsProcessor(LogitsProcessor): method __init__ (line 13) | def __init__(self, start_pos=5): method __call__ (line 16) | def __call__(self, input_ids: torch.LongTensor, class ChatGLM (line 24) | class ChatGLM(BasePredictor): method __init__ (line 26) | def __init__(self, model_name): method stream_chat_continue (line 69) | def stream_chat_continue(self, function test (line 140) | def test(): FILE: predictors/debug.py class Debug (line 1) | class Debug: method __init__ (line 2) | def __init__(self, *args, **kwargs): method inference (line 5) | def inference(self, *args, **kwargs): method predict_continue (line 16) | def predict_continue(self, *args, **kwargs): FILE: predictors/glm4_predictor.py class InvalidScoreLogitsProcessor (line 13) | class InvalidScoreLogitsProcessor(LogitsProcessor): method __call__ (line 15) | def __call__(self, input_ids: torch.LongTensor, class GLM4 (line 23) | class GLM4(BasePredictor): method __init__ (line 25) | def __init__(self, model_name, int4=False): method stream_chat_continue (line 67) | def stream_chat_continue(self, function test (line 158) | def test(): function test2 (line 183) | def test2(): FILE: predictors/llama.py function stream_generate (line 18) | def stream_generate( class LLaMa (line 120) | class LLaMa(BasePredictor): method __init__ (line 122) | def __init__(self, model_name): method stream_chat_continue (line 141) | def stream_chat_continue(self, function test (line 197) | def test(): FILE: predictors/llama_gptq.py class LLaMaGPTQ (line 13) | class LLaMaGPTQ(LLaMa): method __init__ (line 14) | def __init__(self, model_name, checkpoint_path='llama7b-2m-4bit-128g.p... function test (line 32) | def test(): FILE: test_fastapi.py function event_source_response_iterator (line 17) | def event_source_response_iterator(response): FILE: test_models.py function test_model (line 5) | def test_model(model_name): function main (line 46) | def main(): FILE: utils_env.py function collect_env (line 1) | def collect_env():