SYMBOL INDEX (11552 symbols across 625 files)

FILE: code/NEZHA/configuration_nezha.py
  class NeZhaConfig (line 6) | class NeZhaConfig(PretrainedConfig):
    method __init__ (line 82) | def __init__(

FILE: code/NEZHA/modeling_nezha.py
  function load_tf_weights_in_bert (line 48) | def load_tf_weights_in_bert(model, config, tf_checkpoint_path):
  class BertEmbeddings (line 122) | class BertEmbeddings(nn.Module):
    method __init__ (line 125) | def __init__(self, config):
    method forward (line 134) | def forward(self, input_ids=None, token_type_ids=None, inputs_embeds=N...
  function relative_position_encoding (line 151) | def relative_position_encoding(depth, max_length=512, max_relative_posit...
  class BertSelfAttention (line 175) | class BertSelfAttention(nn.Module):
    method __init__ (line 176) | def __init__(self, config):
    method transpose_for_scores (line 200) | def transpose_for_scores(self, x):
    method forward (line 205) | def forward(
  class BertSelfOutput (line 308) | class BertSelfOutput(nn.Module):
    method __init__ (line 309) | def __init__(self, config):
    method forward (line 315) | def forward(self, hidden_states, input_tensor):
  class BertAttention (line 322) | class BertAttention(nn.Module):
    method __init__ (line 323) | def __init__(self, config):
    method prune_heads (line 329) | def prune_heads(self, heads):
    method forward (line 347) | def forward(
  class BertIntermediate (line 373) | class BertIntermediate(nn.Module):
    method __init__ (line 374) | def __init__(self, config):
    method forward (line 382) | def forward(self, hidden_states):
  class BertOutput (line 388) | class BertOutput(nn.Module):
    method __init__ (line 389) | def __init__(self, config):
    method forward (line 395) | def forward(self, hidden_states, input_tensor):
  class BertLayer (line 402) | class BertLayer(nn.Module):
    method __init__ (line 403) | def __init__(self, config):
    method forward (line 416) | def forward(
    method feed_forward_chunk (line 481) | def feed_forward_chunk(self, attention_output):
  class NeZhaEncoder (line 487) | class NeZhaEncoder(nn.Module):
    method __init__ (line 488) | def __init__(self, config):
    method forward (line 495) | def forward(
  class BertPooler (line 588) | class BertPooler(nn.Module):
    method __init__ (line 589) | def __init__(self, config):
    method forward (line 594) | def forward(self, hidden_states):
  class BertPredictionHeadTransform (line 603) | class BertPredictionHeadTransform(nn.Module):
    method __init__ (line 604) | def __init__(self, config):
    method forward (line 613) | def forward(self, hidden_states):
  class BertLMPredictionHead (line 620) | class BertLMPredictionHead(nn.Module):
    method __init__ (line 621) | def __init__(self, config):
    method forward (line 634) | def forward(self, hidden_states):
  class BertOnlyMLMHead (line 640) | class BertOnlyMLMHead(nn.Module):
    method __init__ (line 641) | def __init__(self, config):
    method forward (line 645) | def forward(self, sequence_output):
  class BertOnlyNSPHead (line 650) | class BertOnlyNSPHead(nn.Module):
    method __init__ (line 651) | def __init__(self, config):
    method forward (line 655) | def forward(self, pooled_output):
  class BertPreTrainingHeads (line 660) | class BertPreTrainingHeads(nn.Module):
    method __init__ (line 661) | def __init__(self, config):
    method forward (line 666) | def forward(self, sequence_output, pooled_output):
  class BertPreTrainedModel (line 672) | class BertPreTrainedModel(PreTrainedModel):
    method _init_weights (line 682) | def _init_weights(self, module):
  class BertForPreTrainingOutput (line 700) | class BertForPreTrainingOutput(ModelOutput):
  class NeZhaModel (line 805) | class NeZhaModel(BertPreTrainedModel):
    method __init__ (line 819) | def __init__(self, config, add_pooling_layer=True):
    method get_input_embeddings (line 830) | def get_input_embeddings(self):
    method set_input_embeddings (line 833) | def set_input_embeddings(self, value):
    method _prune_heads (line 836) | def _prune_heads(self, heads_to_prune):
    method forward (line 851) | def forward(
  class BertForPreTraining (line 982) | class BertForPreTraining(BertPreTrainedModel):
    method __init__ (line 983) | def __init__(self, config):
    method get_output_embeddings (line 991) | def get_output_embeddings(self):
    method set_output_embeddings (line 994) | def set_output_embeddings(self, new_embeddings):
    method forward (line 999) | def forward(
  class BertLMHeadModel (line 1083) | class BertLMHeadModel(BertPreTrainedModel):
    method __init__ (line 1088) | def __init__(self, config):
    method get_output_embeddings (line 1099) | def get_output_embeddings(self):
    method set_output_embeddings (line 1102) | def set_output_embeddings(self, new_embeddings):
    method forward (line 1107) | def forward(
    method prepare_inputs_for_generation (line 1209) | def prepare_inputs_for_generation(self, input_ids, past=None, attentio...
    method _reorder_cache (line 1221) | def _reorder_cache(self, past, beam_idx):
  class NeZhaForMaskedLM (line 1229) | class NeZhaForMaskedLM(BertPreTrainedModel):
    method __init__ (line 1234) | def __init__(self, config):
    method get_output_embeddings (line 1248) | def get_output_embeddings(self):
    method set_output_embeddings (line 1251) | def set_output_embeddings(self, new_embeddings):
    method forward (line 1261) | def forward(
    method prepare_inputs_for_generation (line 1318) | def prepare_inputs_for_generation(self, input_ids, attention_mask=None...
  class BertForNextSentencePrediction (line 1337) | class BertForNextSentencePrediction(BertPreTrainedModel):
    method __init__ (line 1338) | def __init__(self, config):
    method forward (line 1348) | def forward(
  class BertForSequenceClassification (line 1438) | class BertForSequenceClassification(BertPreTrainedModel):
    method __init__ (line 1439) | def __init__(self, config):
    method forward (line 1456) | def forward(
  class BertForMultipleChoice (line 1523) | class BertForMultipleChoice(BertPreTrainedModel):
    method __init__ (line 1524) | def __init__(self, config):
    method forward (line 1540) | def forward(
  class BertForTokenClassification (line 1613) | class BertForTokenClassification(BertPreTrainedModel):
    method __init__ (line 1617) | def __init__(self, config):
    method forward (line 1634) | def forward(
  class BertForQuestionAnswering (line 1704) | class BertForQuestionAnswering(BertPreTrainedModel):
    method __init__ (line 1708) | def __init__(self, config):
    method forward (line 1724) | def forward(

FILE: code/bert-base-count3-len100/finetuning/NEZHA/configuration_nezha.py
  class NeZhaConfig (line 6) | class NeZhaConfig(PretrainedConfig):
    method __init__ (line 82) | def __init__(

FILE: code/bert-base-count3-len100/finetuning/NEZHA/modeling_nezha.py
  function load_tf_weights_in_nezha (line 33) | def load_tf_weights_in_nezha(model, config, tf_checkpoint_path):
  class NeZhaEmbeddings (line 108) | class NeZhaEmbeddings(nn.Module):
    method __init__ (line 113) | def __init__(self, config):
    method forward (line 123) | def forward(self, input_ids=None, token_type_ids=None, inputs_embeds=N...
  function relative_position_encoding (line 140) | def relative_position_encoding(depth, max_length=512, max_relative_posit...
  class NeZhaSelfAttention (line 165) | class NeZhaSelfAttention(nn.Module):
    method __init__ (line 166) | def __init__(self, config):
    method transpose_for_scores (line 188) | def transpose_for_scores(self, x):
    method forward (line 193) | def forward(
  class NeZhaAttention (line 270) | class NeZhaAttention(nn.Module):
    method __init__ (line 271) | def __init__(self, config):
    method prune_heads (line 277) | def prune_heads(self, heads):
    method forward (line 298) | def forward(
  class NeZhaLayer (line 314) | class NeZhaLayer(nn.Module):
    method __init__ (line 315) | def __init__(self, config):
    method forward (line 324) | def forward(
  class NeZhaEncoder (line 349) | class NeZhaEncoder(nn.Module):
    method __init__ (line 350) | def __init__(self, config):
    method forward (line 357) | def forward(
  class NeZhaPreTrainedModel (line 388) | class NeZhaPreTrainedModel(PreTrainedModel):
    method _init_weights (line 397) | def _init_weights(self, module):
  class NeZhaModel (line 414) | class NeZhaModel(NeZhaPreTrainedModel):
    method __init__ (line 430) | def __init__(self, config):
    method get_input_embeddings (line 438) | def get_input_embeddings(self):
    method set_input_embeddings (line 441) | def set_input_embeddings(self, value):
    method _prune_heads (line 444) | def _prune_heads(self, heads_to_prune):
    method forward (line 453) | def forward(
  class NeZhaForPreTraining (line 569) | class NeZhaForPreTraining(NeZhaPreTrainedModel):
    method __init__ (line 570) | def __init__(self, config):
    method get_output_embeddings (line 576) | def get_output_embeddings(self):
    method forward (line 580) | def forward(
  class NeZhaForMaskedLM (line 664) | class NeZhaForMaskedLM(NeZhaPreTrainedModel):
    method __init__ (line 665) | def __init__(self, config):
    method get_output_embeddings (line 671) | def get_output_embeddings(self):
    method forward (line 675) | def forward(
    method prepare_inputs_for_generation (line 760) | def prepare_inputs_for_generation(self, input_ids, attention_mask=None...
  class NeZhaForNextSentencePrediction (line 786) | class NeZhaForNextSentencePrediction(NeZhaPreTrainedModel):
    method __init__ (line 787) | def __init__(self, config):
    method forward (line 794) | def forward(
  class NeZhaForSequenceClassification (line 868) | class NeZhaForSequenceClassification(NeZhaPreTrainedModel):
    method __init__ (line 869) | def __init__(self, config):
    method forward (line 878) | def forward(
  class NeZhaForMultipleChoice (line 962) | class NeZhaForMultipleChoice(NeZhaPreTrainedModel):
    method __init__ (line 963) | def __init__(self, config):
    method forward (line 971) | def forward(
  class NeZhaForTokenClassification (line 1058) | class NeZhaForTokenClassification(NeZhaPreTrainedModel):
    method __init__ (line 1059) | def __init__(self, config):
    method forward (line 1068) | def forward(
  class NeZhaForQuestionAnswering (line 1153) | class NeZhaForQuestionAnswering(NeZhaPreTrainedModel):
    method __init__ (line 1154) | def __init__(self, config):
    method forward (line 1162) | def forward(

FILE: code/bert-base-count3-len100/finetuning/model.py
  class BertForClass (line 11) | class BertForClass(nn.Module):
    method __init__ (line 12) | def __init__(self, config):
    method forward (line 24) | def forward(self, input_ids, input_masks, segment_ids):
  class BertForClass_MultiDropout (line 37) | class BertForClass_MultiDropout(nn.Module):
    method __init__ (line 38) | def __init__(self, config):
    method forward (line 50) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastTwoCls (line 63) | class BertLastTwoCls(nn.Module):
    method __init__ (line 64) | def __init__(self, config):
    method forward (line 75) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastCls (line 83) | class BertLastCls(nn.Module):
    method __init__ (line 84) | def __init__(self, config):
    method forward (line 95) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastTwoClsPooler (line 108) | class BertLastTwoClsPooler(nn.Module):
    method __init__ (line 109) | def __init__(self, config):
    method forward (line 120) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastTwoEmbeddings (line 132) | class BertLastTwoEmbeddings(nn.Module):
    method __init__ (line 133) | def __init__(self, config):
    method forward (line 144) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastTwoEmbeddingsPooler (line 160) | class BertLastTwoEmbeddingsPooler(nn.Module):
    method __init__ (line 161) | def __init__(self, config):
    method forward (line 172) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastFourCls (line 187) | class BertLastFourCls(nn.Module):
    method __init__ (line 188) | def __init__(self, config):
    method forward (line 199) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastFourClsPooler (line 215) | class BertLastFourClsPooler(nn.Module):
    method __init__ (line 216) | def __init__(self, config):
    method forward (line 227) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastFourEmbeddings (line 239) | class BertLastFourEmbeddings(nn.Module):
    method __init__ (line 240) | def __init__(self, config):
    method forward (line 251) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastFourEmbeddingsPooler (line 268) | class BertLastFourEmbeddingsPooler(nn.Module):
    method __init__ (line 269) | def __init__(self, config):
    method forward (line 280) | def forward(self, input_ids, input_masks, segment_ids):
  class BertDynCls (line 296) | class BertDynCls(nn.Module):
    method __init__ (line 297) | def __init__(self, config):
    method forward (line 311) | def forward(self, input_ids, input_masks, segment_ids):
  class BertDynEmbeddings (line 343) | class BertDynEmbeddings(nn.Module):
    method __init__ (line 344) | def __init__(self, config):
    method forward (line 358) | def forward(self, input_ids, input_masks, segment_ids):
  class BertRNN (line 392) | class BertRNN(nn.Module):
    method __init__ (line 394) | def __init__(self, config):
    method forward (line 434) | def forward(self, input_ids, input_masks, segment_ids):
  class BertCNN (line 459) | class BertCNN(nn.Module):
    method __init__ (line 461) | def __init__(self, config):
    method conv_and_pool (line 480) | def conv_and_pool(self, x, conv):
    method forward (line 485) | def forward(self, input_ids, input_masks, segment_ids):
  class BertRCNN (line 497) | class BertRCNN(nn.Module):
    method __init__ (line 498) | def __init__(self, config):
    method forward (line 540) | def forward(self, input_ids, input_masks, segment_ids):
  class XLNet (line 564) | class XLNet(nn.Module):
    method __init__ (line 566) | def __init__(self, config):
    method forward (line 574) | def forward(self, input_ids, input_masks, segment_ids):
  class ElectraClassificationHead (line 584) | class ElectraClassificationHead(nn.Module):
    method __init__ (line 587) | def __init__(self, config):
    method forward (line 593) | def forward(self, features, **kwargs):
  class Electra (line 602) | class Electra(nn.Module):
    method __init__ (line 604) | def __init__(self, config):
    method forward (line 613) | def forward(self, input_ids, input_masks, segment_ids):
  class NEZHA (line 621) | class NEZHA(nn.Module):
    method __init__ (line 622) | def __init__(self, config):
    method forward (line 637) | def forward(self, input_ids, input_masks, segment_ids):

FILE: code/bert-base-count3-len100/finetuning/multi_gpu_QA.py
  class Config (line 46) | class Config:
    method __init__ (line 47) | def __init__(self):

FILE: code/bert-base-count3-len100/finetuning/utils.py
  function paddingList (line 12) | def paddingList(ls:list,val,returnTensor=False):
  function fastTokenizer (line 19) | def fastTokenizer(a:str,b:str,maxLen,tk):
  class data_generator (line 39) | class data_generator:
    method __init__ (line 40) | def __init__(self, data, config, shuffle=False):
    method __len__ (line 53) | def __len__(self):
    method __iter__ (line 56) | def __iter__(self):
  class PGD (line 95) | class PGD():
    method __init__ (line 96) | def __init__(self, model):
    method attack (line 101) | def attack(self, epsilon=0.3, alpha=0.1, emb_name='word_embeddings', i...
    method restore (line 113) | def restore(self, emb_name='word_embeddings'):
    method project (line 121) | def project(self, param_name, param_data, epsilon):
    method backup_grad (line 127) | def backup_grad(self):
    method restore_grad (line 132) | def restore_grad(self):
  class FGM (line 139) | class FGM():
    method __init__ (line 140) | def __init__(self, model):
    method attack (line 144) | def attack(self, epsilon=0.25, emb_name='word_embeddings'):
    method restore (line 154) | def restore(self, emb_name='word_embeddings'):
  class FocalLoss (line 164) | class FocalLoss(nn.Module):
    method __init__ (line 180) | def __init__(self, num_class, alpha=None, gamma=2,
    method forward (line 201) | def forward(self, input, target):
  function f1_match (line 244) | def f1_match(y_true,y_pred):

FILE: code/bert-base-count3/finetuning/NEZHA/configuration_nezha.py
  class NeZhaConfig (line 6) | class NeZhaConfig(PretrainedConfig):
    method __init__ (line 82) | def __init__(

FILE: code/bert-base-count3/finetuning/NEZHA/modeling_nezha.py
  function load_tf_weights_in_nezha (line 33) | def load_tf_weights_in_nezha(model, config, tf_checkpoint_path):
  class NeZhaEmbeddings (line 108) | class NeZhaEmbeddings(nn.Module):
    method __init__ (line 113) | def __init__(self, config):
    method forward (line 123) | def forward(self, input_ids=None, token_type_ids=None, inputs_embeds=N...
  function relative_position_encoding (line 140) | def relative_position_encoding(depth, max_length=512, max_relative_posit...
  class NeZhaSelfAttention (line 165) | class NeZhaSelfAttention(nn.Module):
    method __init__ (line 166) | def __init__(self, config):
    method transpose_for_scores (line 188) | def transpose_for_scores(self, x):
    method forward (line 193) | def forward(
  class NeZhaAttention (line 270) | class NeZhaAttention(nn.Module):
    method __init__ (line 271) | def __init__(self, config):
    method prune_heads (line 277) | def prune_heads(self, heads):
    method forward (line 298) | def forward(
  class NeZhaLayer (line 314) | class NeZhaLayer(nn.Module):
    method __init__ (line 315) | def __init__(self, config):
    method forward (line 324) | def forward(
  class NeZhaEncoder (line 349) | class NeZhaEncoder(nn.Module):
    method __init__ (line 350) | def __init__(self, config):
    method forward (line 357) | def forward(
  class NeZhaPreTrainedModel (line 388) | class NeZhaPreTrainedModel(PreTrainedModel):
    method _init_weights (line 397) | def _init_weights(self, module):
  class NeZhaModel (line 414) | class NeZhaModel(NeZhaPreTrainedModel):
    method __init__ (line 430) | def __init__(self, config):
    method get_input_embeddings (line 438) | def get_input_embeddings(self):
    method set_input_embeddings (line 441) | def set_input_embeddings(self, value):
    method _prune_heads (line 444) | def _prune_heads(self, heads_to_prune):
    method forward (line 453) | def forward(
  class NeZhaForPreTraining (line 569) | class NeZhaForPreTraining(NeZhaPreTrainedModel):
    method __init__ (line 570) | def __init__(self, config):
    method get_output_embeddings (line 576) | def get_output_embeddings(self):
    method forward (line 580) | def forward(
  class NeZhaForMaskedLM (line 664) | class NeZhaForMaskedLM(NeZhaPreTrainedModel):
    method __init__ (line 665) | def __init__(self, config):
    method get_output_embeddings (line 671) | def get_output_embeddings(self):
    method forward (line 675) | def forward(
    method prepare_inputs_for_generation (line 760) | def prepare_inputs_for_generation(self, input_ids, attention_mask=None...
  class NeZhaForNextSentencePrediction (line 786) | class NeZhaForNextSentencePrediction(NeZhaPreTrainedModel):
    method __init__ (line 787) | def __init__(self, config):
    method forward (line 794) | def forward(
  class NeZhaForSequenceClassification (line 868) | class NeZhaForSequenceClassification(NeZhaPreTrainedModel):
    method __init__ (line 869) | def __init__(self, config):
    method forward (line 878) | def forward(
  class NeZhaForMultipleChoice (line 962) | class NeZhaForMultipleChoice(NeZhaPreTrainedModel):
    method __init__ (line 963) | def __init__(self, config):
    method forward (line 971) | def forward(
  class NeZhaForTokenClassification (line 1058) | class NeZhaForTokenClassification(NeZhaPreTrainedModel):
    method __init__ (line 1059) | def __init__(self, config):
    method forward (line 1068) | def forward(
  class NeZhaForQuestionAnswering (line 1153) | class NeZhaForQuestionAnswering(NeZhaPreTrainedModel):
    method __init__ (line 1154) | def __init__(self, config):
    method forward (line 1162) | def forward(

FILE: code/bert-base-count3/finetuning/model.py
  class BertForClass (line 11) | class BertForClass(nn.Module):
    method __init__ (line 12) | def __init__(self, config):
    method forward (line 24) | def forward(self, input_ids, input_masks, segment_ids):
  class BertForClass_MultiDropout (line 37) | class BertForClass_MultiDropout(nn.Module):
    method __init__ (line 38) | def __init__(self, config):
    method forward (line 50) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastTwoCls (line 63) | class BertLastTwoCls(nn.Module):
    method __init__ (line 64) | def __init__(self, config):
    method forward (line 75) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastCls (line 83) | class BertLastCls(nn.Module):
    method __init__ (line 84) | def __init__(self, config):
    method forward (line 95) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastTwoClsPooler (line 108) | class BertLastTwoClsPooler(nn.Module):
    method __init__ (line 109) | def __init__(self, config):
    method forward (line 120) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastTwoEmbeddings (line 132) | class BertLastTwoEmbeddings(nn.Module):
    method __init__ (line 133) | def __init__(self, config):
    method forward (line 144) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastTwoEmbeddingsPooler (line 160) | class BertLastTwoEmbeddingsPooler(nn.Module):
    method __init__ (line 161) | def __init__(self, config):
    method forward (line 172) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastFourCls (line 187) | class BertLastFourCls(nn.Module):
    method __init__ (line 188) | def __init__(self, config):
    method forward (line 199) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastFourClsPooler (line 215) | class BertLastFourClsPooler(nn.Module):
    method __init__ (line 216) | def __init__(self, config):
    method forward (line 227) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastFourEmbeddings (line 239) | class BertLastFourEmbeddings(nn.Module):
    method __init__ (line 240) | def __init__(self, config):
    method forward (line 251) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastFourEmbeddingsPooler (line 268) | class BertLastFourEmbeddingsPooler(nn.Module):
    method __init__ (line 269) | def __init__(self, config):
    method forward (line 280) | def forward(self, input_ids, input_masks, segment_ids):
  class BertDynCls (line 296) | class BertDynCls(nn.Module):
    method __init__ (line 297) | def __init__(self, config):
    method forward (line 311) | def forward(self, input_ids, input_masks, segment_ids):
  class BertDynEmbeddings (line 343) | class BertDynEmbeddings(nn.Module):
    method __init__ (line 344) | def __init__(self, config):
    method forward (line 358) | def forward(self, input_ids, input_masks, segment_ids):
  class BertRNN (line 392) | class BertRNN(nn.Module):
    method __init__ (line 394) | def __init__(self, config):
    method forward (line 434) | def forward(self, input_ids, input_masks, segment_ids):
  class BertCNN (line 459) | class BertCNN(nn.Module):
    method __init__ (line 461) | def __init__(self, config):
    method conv_and_pool (line 480) | def conv_and_pool(self, x, conv):
    method forward (line 485) | def forward(self, input_ids, input_masks, segment_ids):
  class BertRCNN (line 497) | class BertRCNN(nn.Module):
    method __init__ (line 498) | def __init__(self, config):
    method forward (line 540) | def forward(self, input_ids, input_masks, segment_ids):
  class XLNet (line 564) | class XLNet(nn.Module):
    method __init__ (line 566) | def __init__(self, config):
    method forward (line 574) | def forward(self, input_ids, input_masks, segment_ids):
  class ElectraClassificationHead (line 584) | class ElectraClassificationHead(nn.Module):
    method __init__ (line 587) | def __init__(self, config):
    method forward (line 593) | def forward(self, features, **kwargs):
  class Electra (line 602) | class Electra(nn.Module):
    method __init__ (line 604) | def __init__(self, config):
    method forward (line 613) | def forward(self, input_ids, input_masks, segment_ids):
  class NEZHA (line 621) | class NEZHA(nn.Module):
    method __init__ (line 622) | def __init__(self, config):
    method forward (line 637) | def forward(self, input_ids, input_masks, segment_ids):

FILE: code/bert-base-count3/finetuning/multi_gpu_QA.py
  class Config (line 46) | class Config:
    method __init__ (line 47) | def __init__(self):

FILE: code/bert-base-count3/finetuning/utils.py
  function paddingList (line 12) | def paddingList(ls:list,val,returnTensor=False):
  function fastTokenizer (line 19) | def fastTokenizer(a:str,b:str,maxLen,tk):
  class data_generator (line 39) | class data_generator:
    method __init__ (line 40) | def __init__(self, data, config, shuffle=False):
    method __len__ (line 53) | def __len__(self):
    method __iter__ (line 56) | def __iter__(self):
  class PGD (line 95) | class PGD():
    method __init__ (line 96) | def __init__(self, model):
    method attack (line 101) | def attack(self, epsilon=0.3, alpha=0.1, emb_name='word_embeddings', i...
    method restore (line 113) | def restore(self, emb_name='word_embeddings'):
    method project (line 121) | def project(self, param_name, param_data, epsilon):
    method backup_grad (line 127) | def backup_grad(self):
    method restore_grad (line 132) | def restore_grad(self):
  class FGM (line 139) | class FGM():
    method __init__ (line 140) | def __init__(self, model):
    method attack (line 144) | def attack(self, epsilon=0.25, emb_name='word_embeddings'):
    method restore (line 154) | def restore(self, emb_name='word_embeddings'):
  class FocalLoss (line 164) | class FocalLoss(nn.Module):
    method __init__ (line 180) | def __init__(self, num_class, alpha=None, gamma=2,
    method forward (line 201) | def forward(self, input, target):
  function f1_match (line 244) | def f1_match(y_true,y_pred):

FILE: code/bert-base-count3/pretrain/NLP_Utils.py
  function writeToJsonFile (line 10) | def writeToJsonFile(path: str, obj):
  function readFromJsonFile (line 13) | def readFromJsonFile(path: str):
  function loadData (line 17) | def loadData(path):
  function calNegPos (line 35) | def calNegPos(ls):#计算正负比例
  function paddingList (line 54) | def paddingList(ls:list,val,returnTensor=False):
  function truncate (line 61) | def truncate(a:list,b:list,maxLen):
  class MLM_Data (line 77) | class MLM_Data(Dataset):
    method __init__ (line 79) | def __init__(self,textLs:list,maxLen:int,tk:BertTokenizer):
    method __len__ (line 87) | def __len__(self):
    method random_mask (line 90) | def random_mask(self,text_ids):
    method __getitem__ (line 128) | def __getitem__(self, item):
    method collate (line 143) | def collate(cls,batch):
  function blockShuffle (line 163) | def blockShuffle(data:list,bs:int,sortBsNum,key):
  class blockShuffleDataLoader (line 179) | class blockShuffleDataLoader(DataLoader):
    method __init__ (line 180) | def __init__(self, dataset: Dataset,sortBsNum,key,**kwargs):
    method __iter__ (line 186) | def __iter__(self):

FILE: code/bert-base-count3/pretrain/transformers1/__main__.py
  function main (line 2) | def main():

FILE: code/bert-base-count3/pretrain/transformers1/activations.py
  function swish (line 11) | def swish(x):
  function _gelu_python (line 15) | def _gelu_python(x):
  function gelu_new (line 25) | def gelu_new(x):
  function gelu_fast (line 38) | def gelu_fast(x):
  function get_activation (line 52) | def get_activation(activation_string):

FILE: code/bert-base-count3/pretrain/transformers1/benchmark/benchmark.py
  class PyTorchBenchmark (line 38) | class PyTorchBenchmark(Benchmark):
    method framework_version (line 45) | def framework_version(self):
    method train (line 48) | def train(self, model_name, batch_size, sequence_length, trace_memory=...
    method inference (line 100) | def inference(self, model_name, batch_size, sequence_length, trace_mem...

FILE: code/bert-base-count3/pretrain/transformers1/benchmark/benchmark_args.py
  function is_tpu_available (line 37) | def is_tpu_available():
  class PyTorchBenchmarkArguments (line 45) | class PyTorchBenchmarkArguments(BenchmarkArguments):
    method _setup_devices (line 52) | def _setup_devices(self) -> Tuple["torch.device", int]:
    method device_idx (line 67) | def device_idx(self) -> int:
    method device (line 72) | def device(self) -> "torch.device":
    method n_gpu (line 77) | def n_gpu(self):

FILE: code/bert-base-count3/pretrain/transformers1/benchmark/benchmark_args_utils.py
  function list_field (line 24) | def list_field(default=None, metadata=None):
  class BenchmarkArguments (line 29) | class BenchmarkArguments:
    method to_json_string (line 90) | def to_json_string(self):
    method model_names (line 97) | def model_names(self):

FILE: code/bert-base-count3/pretrain/transformers1/benchmark/benchmark_utils.py
  function is_memory_tracing_enabled (line 43) | def is_memory_tracing_enabled():
  class Frame (line 48) | class Frame(NamedTuple):
  class UsedMemoryState (line 65) | class UsedMemoryState(NamedTuple):
  class Memory (line 77) | class Memory(NamedTuple):
    method __repr__ (line 85) | def __repr__(self) -> str:
  class MemoryState (line 89) | class MemoryState(NamedTuple):
  class MemorySummary (line 103) | class MemorySummary(NamedTuple):
  function start_memory_tracing (line 123) | def start_memory_tracing(
  function stop_memory_tracing (line 273) | def stop_memory_tracing(
  function bytes_to_mega_bytes (line 370) | def bytes_to_mega_bytes(memory_amount: int) -> int:
  class Benchmark (line 376) | class Benchmark(ABC):
    method __init__ (line 386) | def __init__(self, args: BenchmarkArguments = None, configs: Pretraine...
    method print_fn (line 401) | def print_fn(self):
    method is_gpu (line 421) | def is_gpu(self):
    method framework_version (line 426) | def framework_version(self):
    method train (line 430) | def train(self, model_name, batch_size, sequence_length):
    method inference (line 434) | def inference(self, model_name, batch_size, sequence_length):
    method run (line 437) | def run(self):
    method environment_info (line 512) | def environment_info(self):
    method print_results (line 572) | def print_results(self, result_dict):
    method print_memory_trace_statistics (line 585) | def print_memory_trace_statistics(self, summary: MemorySummary):
    method save_to_csv (line 609) | def save_to_csv(self, result_dict, filename):

FILE: code/bert-base-count3/pretrain/transformers1/benchmark_utils.py
  function is_memory_tracing_enabled (line 29) | def is_memory_tracing_enabled():
  class Frame (line 34) | class Frame(NamedTuple):
  class UsedMemoryState (line 51) | class UsedMemoryState(NamedTuple):
  class Memory (line 63) | class Memory(NamedTuple):
    method __repr__ (line 71) | def __repr__(self) -> str:
  class MemoryState (line 75) | class MemoryState(NamedTuple):
  class MemorySummary (line 89) | class MemorySummary(NamedTuple):
  function start_memory_tracing (line 108) | def start_memory_tracing(
  function stop_memory_tracing (line 256) | def stop_memory_tracing(
  function bytes_to_human_readable (line 334) | def bytes_to_human_readable(memory_amount: int) -> str:

FILE: code/bert-base-count3/pretrain/transformers1/commands/__init__.py
  class BaseTransformersCLICommand (line 5) | class BaseTransformersCLICommand(ABC):
    method register_subcommand (line 8) | def register_subcommand(parser: ArgumentParser):
    method run (line 12) | def run(self):

FILE: code/bert-base-count3/pretrain/transformers1/commands/convert.py
  function convert_command_factory (line 7) | def convert_command_factory(args: Namespace):
  class ConvertCommand (line 17) | class ConvertCommand(BaseTransformersCLICommand):
    method register_subcommand (line 19) | def register_subcommand(parser: ArgumentParser):
    method __init__ (line 46) | def __init__(
    method run (line 64) | def run(self):

FILE: code/bert-base-count3/pretrain/transformers1/commands/download.py
  function download_command_factory (line 6) | def download_command_factory(args):
  class DownloadCommand (line 10) | class DownloadCommand(BaseTransformersCLICommand):
    method register_subcommand (line 12) | def register_subcommand(parser: ArgumentParser):
    method __init__ (line 23) | def __init__(self, model: str, cache: str, force: bool):
    method run (line 28) | def run(self):

FILE: code/bert-base-count3/pretrain/transformers1/commands/env.py
  function info_command_factory (line 9) | def info_command_factory(_):
  class EnvironmentCommand (line 13) | class EnvironmentCommand(BaseTransformersCLICommand):
    method register_subcommand (line 15) | def register_subcommand(parser: ArgumentParser):
    method run (line 19) | def run(self):
    method format_dict (line 57) | def format_dict(d):

FILE: code/bert-base-count3/pretrain/transformers1/commands/run.py
  function try_infer_format_from_ext (line 11) | def try_infer_format_from_ext(path: str):
  function run_command_factory (line 25) | def run_command_factory(args):
  class RunCommand (line 44) | class RunCommand(BaseTransformersCLICommand):
    method __init__ (line 45) | def __init__(self, nlp: Pipeline, reader: PipelineDataFormat):
    method register_subcommand (line 50) | def register_subcommand(parser: ArgumentParser):
    method run (line 81) | def run(self):

FILE: code/bert-base-count3/pretrain/transformers1/commands/serving.py
  function Body (line 21) | def Body(*x, **y):
  function serve_command_factory (line 30) | def serve_command_factory(args: Namespace):
  class ServeModelInfoResult (line 45) | class ServeModelInfoResult(BaseModel):
  class ServeTokenizeResult (line 53) | class ServeTokenizeResult(BaseModel):
  class ServeDeTokenizeResult (line 62) | class ServeDeTokenizeResult(BaseModel):
  class ServeForwardResult (line 70) | class ServeForwardResult(BaseModel):
  class ServeCommand (line 78) | class ServeCommand(BaseTransformersCLICommand):
    method register_subcommand (line 80) | def register_subcommand(parser: ArgumentParser):
    method __init__ (line 106) | def __init__(self, pipeline: Pipeline, host: str, port: int, workers: ...
    method run (line 156) | def run(self):
    method model_info (line 159) | def model_info(self):
    method tokenize (line 162) | def tokenize(self, text_input: str = Body(None, embed=True), return_id...
    method detokenize (line 180) | def detokenize(
    method forward (line 198) | async def forward(self, inputs=Body(None, embed=True)):

FILE: code/bert-base-count3/pretrain/transformers1/commands/train.py
  function train_command_factory (line 18) | def train_command_factory(args: Namespace):
  class TrainCommand (line 26) | class TrainCommand(BaseTransformersCLICommand):
    method register_subcommand (line 28) | def register_subcommand(parser: ArgumentParser):
    method __init__ (line 78) | def __init__(self, args: Namespace):
    method run (line 124) | def run(self):
    method run_torch (line 129) | def run_torch(self):
    method run_tf (line 132) | def run_tf(self):

FILE: code/bert-base-count3/pretrain/transformers1/commands/transformers_cli.py
  function main (line 12) | def main():

FILE: code/bert-base-count3/pretrain/transformers1/commands/user.py
  class UserCommands (line 16) | class UserCommands(BaseTransformersCLICommand):
    method register_subcommand (line 18) | def register_subcommand(parser: ArgumentParser):
  class ANSI (line 47) | class ANSI:
    method bold (line 57) | def bold(cls, s):
    method red (line 61) | def red(cls, s):
  class BaseUserCommand (line 65) | class BaseUserCommand:
    method __init__ (line 66) | def __init__(self, args):
  class LoginCommand (line 71) | class LoginCommand(BaseUserCommand):
    method run (line 72) | def run(self):
  class WhoamiCommand (line 98) | class WhoamiCommand(BaseUserCommand):
    method run (line 99) | def run(self):
  class LogoutCommand (line 115) | class LogoutCommand(BaseUserCommand):
    method run (line 116) | def run(self):
  class ListObjsCommand (line 126) | class ListObjsCommand(BaseUserCommand):
    method tabulate (line 127) | def tabulate(self, rows: List[List[Union[str, int]]], headers: List[st...
    method run (line 142) | def run(self):
  class DeleteObjCommand (line 160) | class DeleteObjCommand(BaseUserCommand):
    method run (line 161) | def run(self):
  class UploadCommand (line 175) | class UploadCommand(BaseUserCommand):
    method walk_dir (line 176) | def walk_dir(self, rel_path):
    method run (line 187) | def run(self):

FILE: code/bert-base-count3/pretrain/transformers1/configuration_albert.py
  class AlbertConfig (line 33) | class AlbertConfig(PretrainedConfig):
    method __init__ (line 104) | def __init__(

FILE: code/bert-base-count3/pretrain/transformers1/configuration_auto.py
  class AutoConfig (line 98) | class AutoConfig:
    method __init__ (line 109) | def __init__(self):
    method for_model (line 116) | def for_model(cls, model_type: str, *args, **kwargs):
    method from_pretrained (line 127) | def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):

FILE: code/bert-base-count3/pretrain/transformers1/configuration_bart.py
  class BartConfig (line 34) | class BartConfig(PretrainedConfig):
    method __init__ (line 40) | def __init__(
    method num_attention_heads (line 121) | def num_attention_heads(self) -> int:
    method hidden_size (line 125) | def hidden_size(self) -> int:
    method is_valid_mbart (line 128) | def is_valid_mbart(self) -> bool:

FILE: code/bert-base-count3/pretrain/transformers1/configuration_bert.py
  class BertConfig (line 53) | class BertConfig(PretrainedConfig):
    method __init__ (line 109) | def __init__(

FILE: code/bert-base-count3/pretrain/transformers1/configuration_camembert.py
  class CamembertConfig (line 33) | class CamembertConfig(RobertaConfig):

FILE: code/bert-base-count3/pretrain/transformers1/configuration_ctrl.py
  class CTRLConfig (line 28) | class CTRLConfig(PretrainedConfig):
    method __init__ (line 83) | def __init__(
    method max_position_embeddings (line 125) | def max_position_embeddings(self):
    method hidden_size (line 129) | def hidden_size(self):
    method num_attention_heads (line 133) | def num_attention_heads(self):
    method num_hidden_layers (line 137) | def num_hidden_layers(self):

FILE: code/bert-base-count3/pretrain/transformers1/configuration_distilbert.py
  class DistilBertConfig (line 36) | class DistilBertConfig(PretrainedConfig):
    method __init__ (line 96) | def __init__(
    method hidden_size (line 130) | def hidden_size(self):
    method num_attention_heads (line 134) | def num_attention_heads(self):
    method num_hidden_layers (line 138) | def num_hidden_layers(self):

FILE: code/bert-base-count3/pretrain/transformers1/configuration_electra.py
  class ElectraConfig (line 36) | class ElectraConfig(PretrainedConfig):
    method __init__ (line 95) | def __init__(

FILE: code/bert-base-count3/pretrain/transformers1/configuration_encoder_decoder.py
  class EncoderDecoderConfig (line 26) | class EncoderDecoderConfig(PretrainedConfig):
    method __init__ (line 62) | def __init__(self, **kwargs):
    method from_encoder_decoder_configs (line 79) | def from_encoder_decoder_configs(
    method to_dict (line 90) | def to_dict(self):

FILE: code/bert-base-count3/pretrain/transformers1/configuration_flaubert.py
  class FlaubertConfig (line 33) | class FlaubertConfig(XLMConfig):
    method __init__ (line 147) | def __init__(self, layerdrop=0.0, pre_norm=False, pad_token_id=2, bos_...

FILE: code/bert-base-count3/pretrain/transformers1/configuration_gpt2.py
  class GPT2Config (line 35) | class GPT2Config(PretrainedConfig):
    method __init__ (line 117) | def __init__(
    method max_position_embeddings (line 164) | def max_position_embeddings(self):
    method hidden_size (line 168) | def hidden_size(self):
    method num_attention_heads (line 172) | def num_attention_heads(self):
    method num_hidden_layers (line 176) | def num_hidden_layers(self):

FILE: code/bert-base-count3/pretrain/transformers1/configuration_longformer.py
  class LongformerConfig (line 34) | class LongformerConfig(RobertaConfig):
    method __init__ (line 65) | def __init__(self, attention_window: Union[List[int], int] = 512, sep_...

FILE: code/bert-base-count3/pretrain/transformers1/configuration_marian.py
  class MarianConfig (line 25) | class MarianConfig(BartConfig):

FILE: code/bert-base-count3/pretrain/transformers1/configuration_mmbt.py
  class MMBTConfig (line 25) | class MMBTConfig(object):
    method __init__ (line 38) | def __init__(self, config, num_labels=None, modal_hidden_size=2048):

FILE: code/bert-base-count3/pretrain/transformers1/configuration_openai.py
  class OpenAIGPTConfig (line 31) | class OpenAIGPTConfig(PretrainedConfig):
    method __init__ (line 115) | def __init__(
    method max_position_embeddings (line 159) | def max_position_embeddings(self):
    method hidden_size (line 163) | def hidden_size(self):
    method num_attention_heads (line 167) | def num_attention_heads(self):
    method num_hidden_layers (line 171) | def num_hidden_layers(self):

FILE: code/bert-base-count3/pretrain/transformers1/configuration_reformer.py
  class ReformerConfig (line 32) | class ReformerConfig(PretrainedConfig):
    method __init__ (line 141) | def __init__(

FILE: code/bert-base-count3/pretrain/transformers1/configuration_roberta.py
  class RobertaConfig (line 36) | class RobertaConfig(BertConfig):
    method __init__ (line 65) | def __init__(self, pad_token_id=1, bos_token_id=0, eos_token_id=2, **k...

FILE: code/bert-base-count3/pretrain/transformers1/configuration_t5.py
  class T5Config (line 34) | class T5Config(PretrainedConfig):
    method __init__ (line 64) | def __init__(
    method max_position_embeddings (line 98) | def max_position_embeddings(self):
    method hidden_size (line 102) | def hidden_size(self):
    method num_attention_heads (line 106) | def num_attention_heads(self):
    method num_hidden_layers (line 110) | def num_hidden_layers(self):

FILE: code/bert-base-count3/pretrain/transformers1/configuration_transfo_xl.py
  class TransfoXLConfig (line 31) | class TransfoXLConfig(PretrainedConfig):
    method __init__ (line 117) | def __init__(
    method max_position_embeddings (line 186) | def max_position_embeddings(self):
    method n_token (line 190) | def n_token(self):  # Backward compatibility
    method n_token (line 194) | def n_token(self, value):  # Backward compatibility
    method hidden_size (line 198) | def hidden_size(self):
    method num_attention_heads (line 202) | def num_attention_heads(self):
    method num_hidden_layers (line 206) | def num_hidden_layers(self):

FILE: code/bert-base-count3/pretrain/transformers1/configuration_utils.py
  class PretrainedConfig (line 31) | class PretrainedConfig(object):
    method __init__ (line 56) | def __init__(self, **kwargs):
    method num_labels (line 118) | def num_labels(self):
    method num_labels (line 122) | def num_labels(self, num_labels):
    method save_pretrained (line 126) | def save_pretrained(self, save_directory):
    method from_pretrained (line 146) | def from_pretrained(cls, pretrained_model_name_or_path, **kwargs) -> "...
    method get_config_dict (line 205) | def get_config_dict(cls, pretrained_model_name_or_path: str, **kwargs)...
    method from_dict (line 270) | def from_dict(cls, config_dict: Dict, **kwargs) -> "PretrainedConfig":
    method from_json_file (line 308) | def from_json_file(cls, json_file: str) -> "PretrainedConfig":
    method _dict_from_json_file (line 324) | def _dict_from_json_file(cls, json_file: str):
    method __eq__ (line 329) | def __eq__(self, other):
    method __repr__ (line 332) | def __repr__(self):
    method to_diff_dict (line 335) | def to_diff_dict(self):
    method to_dict (line 358) | def to_dict(self):
    method to_json_string (line 370) | def to_json_string(self, use_diff=True):
    method to_json_file (line 387) | def to_json_file(self, json_file_path, use_diff=True):
    method update (line 400) | def update(self, config_dict: Dict):

FILE: code/bert-base-count3/pretrain/transformers1/configuration_xlm.py
  class XLMConfig (line 39) | class XLMConfig(PretrainedConfig):
    method __init__ (line 159) | def __init__(
    method n_words (line 235) | def n_words(self):  # For backward compatibility
    method n_words (line 239) | def n_words(self, value):  # For backward compatibility
    method hidden_size (line 243) | def hidden_size(self):
    method num_attention_heads (line 247) | def num_attention_heads(self):
    method num_hidden_layers (line 251) | def num_hidden_layers(self):

FILE: code/bert-base-count3/pretrain/transformers1/configuration_xlm_roberta.py
  class XLMRobertaConfig (line 36) | class XLMRobertaConfig(RobertaConfig):

FILE: code/bert-base-count3/pretrain/transformers1/configuration_xlnet.py
  class XLNetConfig (line 32) | class XLNetConfig(PretrainedConfig):
    method __init__ (line 129) | def __init__(
    method max_position_embeddings (line 194) | def max_position_embeddings(self):
    method n_token (line 198) | def n_token(self):  # Backward compatibility
    method n_token (line 202) | def n_token(self, value):  # Backward compatibility
    method hidden_size (line 206) | def hidden_size(self):
    method num_attention_heads (line 210) | def num_attention_heads(self):
    method num_hidden_layers (line 214) | def num_hidden_layers(self):

FILE: code/bert-base-count3/pretrain/transformers1/convert_albert_original_tf_checkpoint_to_pytorch.py
  function convert_tf_checkpoint_to_pytorch (line 29) | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, albert_config_f...

FILE: code/bert-base-count3/pretrain/transformers1/convert_bart_original_pytorch_checkpoint_to_pytorch.py
  function remove_ignore_keys_ (line 56) | def remove_ignore_keys_(state_dict):
  function rename_key (line 68) | def rename_key(dct, old, new):
  function load_xsum_checkpoint (line 73) | def load_xsum_checkpoint(checkpoint_path):
  function convert_checkpoint_from_disk (line 81) | def convert_checkpoint_from_disk(checkpoint_path, **config_kwargs):
  function convert_bart_checkpoint (line 95) | def convert_bart_checkpoint(checkpoint_path, pytorch_dump_folder_path, h...

FILE: code/bert-base-count3/pretrain/transformers1/convert_bert_original_tf_checkpoint_to_pytorch.py
  function convert_tf_checkpoint_to_pytorch (line 29) | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_fil...

FILE: code/bert-base-count3/pretrain/transformers1/convert_bert_pytorch_checkpoint_to_original_tf.py
  function convert_pytorch_checkpoint_to_tf (line 28) | def convert_pytorch_checkpoint_to_tf(model: BertModel, ckpt_dir: str, mo...
  function main (line 92) | def main(raw_args=None):

FILE: code/bert-base-count3/pretrain/transformers1/convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py
  function convert_dialogpt_checkpoint (line 15) | def convert_dialogpt_checkpoint(checkpoint_path: str, pytorch_dump_folde...

FILE: code/bert-base-count3/pretrain/transformers1/convert_electra_original_tf_checkpoint_to_pytorch.py
  function convert_tf_checkpoint_to_pytorch (line 29) | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, py...

FILE: code/bert-base-count3/pretrain/transformers1/convert_gpt2_original_tf_checkpoint_to_pytorch.py
  function convert_gpt2_checkpoint_to_pytorch (line 29) | def convert_gpt2_checkpoint_to_pytorch(gpt2_checkpoint_path, gpt2_config...

FILE: code/bert-base-count3/pretrain/transformers1/convert_graph_to_onnx.py
  class OnnxConverterArgumentParser (line 11) | class OnnxConverterArgumentParser(ArgumentParser):
    method __init__ (line 16) | def __init__(self):
  function ensure_valid_input (line 28) | def ensure_valid_input(model, tokens, input_names):
  function infer_shapes (line 53) | def infer_shapes(nlp: Pipeline, framework: str) -> Tuple[List[str], List...
  function load_graph_from_args (line 100) | def load_graph_from_args(framework: str, model: str, tokenizer: Optional...
  function convert_pytorch (line 111) | def convert_pytorch(nlp: Pipeline, opset: int, output: str, use_external...
  function convert_tensorflow (line 138) | def convert_tensorflow(nlp: Pipeline, opset: int, output: str):
  function convert (line 166) | def convert(
  function verify (line 193) | def verify(path: str):

FILE: code/bert-base-count3/pretrain/transformers1/convert_longformer_original_pytorch_lightning_to_pytorch.py
  class LightningModel (line 26) | class LightningModel(pl.LightningModule):
    method __init__ (line 27) | def __init__(self, model):
    method forward (line 34) | def forward(self):
  function convert_longformer_qa_checkpoint_to_pytorch (line 38) | def convert_longformer_qa_checkpoint_to_pytorch(

FILE: code/bert-base-count3/pretrain/transformers1/convert_marian_to_pytorch.py
  function remove_prefix (line 18) | def remove_prefix(text: str, prefix: str):
  function convert_encoder_layer (line 24) | def convert_encoder_layer(opus_dict, layer_prefix: str, converter: dict):
  function load_layers_ (line 35) | def load_layers_(layer_lst: torch.nn.ModuleList, opus_state: dict, conve...
  function find_pretrained_model (line 42) | def find_pretrained_model(src_lang: str, tgt_lang: str) -> List[str]:
  function add_emb_entries (line 55) | def add_emb_entries(wemb, final_bias, n_special_tokens=1):
  function _cast_yaml_str (line 64) | def _cast_yaml_str(v):
  function cast_marian_config (line 76) | def cast_marian_config(raw_cfg: Dict[str, str]) -> Dict:
  function load_config_from_state_dict (line 83) | def load_config_from_state_dict(opus_dict):
  function find_model_file (line 91) | def find_model_file(dest_dir):  # this one better
  function convert_opus_name_to_hf_name (line 136) | def convert_opus_name_to_hf_name(x):
  function convert_hf_name_to_opus_name (line 142) | def convert_hf_name_to_opus_name(hf_model_name):
  function write_model_card (line 152) | def write_model_card(
  function get_clean_model_id_mapping (line 185) | def get_clean_model_id_mapping(multiling_model_ids):
  function make_registry (line 189) | def make_registry(repo_path="Opus-MT-train/models"):
  function convert_all_sentencepiece_models (line 206) | def convert_all_sentencepiece_models(model_list=None, repo_path=None):
  function lmap (line 222) | def lmap(f, x) -> List:
  function fetch_test_set (line 226) | def fetch_test_set(test_set_url):
  function convert_whole_dir (line 239) | def convert_whole_dir(path=Path("marian_ckpt/")):
  function _parse_readme (line 247) | def _parse_readme(lns):
  function save_tokenizer_config (line 270) | def save_tokenizer_config(dest_dir: Path):
  function add_to_vocab_ (line 276) | def add_to_vocab_(vocab: Dict[str, int], special_tokens: List[str]):
  function find_vocab_file (line 287) | def find_vocab_file(model_dir):
  function add_special_tokens_to_vocab (line 291) | def add_special_tokens_to_vocab(model_dir: Path) -> None:
  function save_tokenizer (line 300) | def save_tokenizer(self, save_directory):
  function check_equal (line 309) | def check_equal(marian_cfg, k1, k2):
  function check_marian_cfg_assumptions (line 314) | def check_marian_cfg_assumptions(marian_cfg):
  class OpusState (line 371) | class OpusState:
    method __init__ (line 372) | def __init__(self, source_dir):
    method _check_layer_entries (line 420) | def _check_layer_entries(self):
    method extra_keys (line 432) | def extra_keys(self):
    method sub_keys (line 445) | def sub_keys(self, layer_prefix):
    method load_marian_model (line 448) | def load_marian_model(self) -> MarianMTModel:
  function download_and_unzip (line 483) | def download_and_unzip(url, dest_dir):
  function convert (line 494) | def convert(source_dir: Path, dest_dir):
  function load_yaml (line 525) | def load_yaml(path):
  function save_json (line 532) | def save_json(content: Union[Dict, List], path: str) -> None:
  function unzip (line 537) | def unzip(zip_path: str, dest_dir: str) -> None:

FILE: code/bert-base-count3/pretrain/transformers1/convert_openai_original_tf_checkpoint_to_pytorch.py
  function convert_openai_checkpoint_to_pytorch (line 29) | def convert_openai_checkpoint_to_pytorch(openai_checkpoint_folder_path, ...

FILE: code/bert-base-count3/pretrain/transformers1/convert_pytorch_checkpoint_to_tf2.py
  function convert_pt_checkpoint_to_tf (line 187) | def convert_pt_checkpoint_to_tf(
  function convert_all_pt_checkpoints_to_tf (line 233) | def convert_all_pt_checkpoints_to_tf(

FILE: code/bert-base-count3/pretrain/transformers1/convert_reformer_trax_checkpoint_to_pytorch.py
  function set_param (line 31) | def set_param(torch_layer, weight, bias=None):
  function set_layer_weights_in_torch_lsh (line 40) | def set_layer_weights_in_torch_lsh(weights, torch_layer, hidden_size):
  function set_layer_weights_in_torch_local (line 58) | def set_layer_weights_in_torch_local(weights, torch_layer, hidden_size):
  function set_block_weights_in_torch (line 79) | def set_block_weights_in_torch(weights, torch_block, hidden_size):
  function set_model_weights_in_torch (line 128) | def set_model_weights_in_torch(weights, torch_model, hidden_size):
  function convert_trax_checkpoint_to_pytorch (line 174) | def convert_trax_checkpoint_to_pytorch(trax_model_pkl_path, config_file,...

FILE: code/bert-base-count3/pretrain/transformers1/convert_roberta_original_pytorch_checkpoint_to_pytorch.py
  function convert_roberta_checkpoint_to_pytorch (line 42) | def convert_roberta_checkpoint_to_pytorch(

FILE: code/bert-base-count3/pretrain/transformers1/convert_t5_original_tf_checkpoint_to_pytorch.py
  function convert_tf_checkpoint_to_pytorch (line 29) | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, py...

FILE: code/bert-base-count3/pretrain/transformers1/convert_transfo_xl_original_tf_checkpoint_to_pytorch.py
  function convert_transfo_xl_checkpoint_to_pytorch (line 47) | def convert_transfo_xl_checkpoint_to_pytorch(

FILE: code/bert-base-count3/pretrain/transformers1/convert_xlm_original_pytorch_checkpoint_to_pytorch.py
  function convert_xlm_checkpoint_to_pytorch (line 32) | def convert_xlm_checkpoint_to_pytorch(xlm_checkpoint_path, pytorch_dump_...

FILE: code/bert-base-count3/pretrain/transformers1/convert_xlnet_original_tf_checkpoint_to_pytorch.py
  function convert_xlnet_checkpoint_to_pytorch (line 51) | def convert_xlnet_checkpoint_to_pytorch(

FILE: code/bert-base-count3/pretrain/transformers1/data/data_collator.py
  class DataCollator (line 12) | class DataCollator(ABC):
    method collate_batch (line 19) | def collate_batch(self) -> Dict[str, torch.Tensor]:
  class DefaultDataCollator (line 33) | class DefaultDataCollator(DataCollator):
    method collate_batch (line 46) | def collate_batch(self, features: List[InputDataClass]) -> Dict[str, t...
  class DataCollatorForLanguageModeling (line 80) | class DataCollatorForLanguageModeling(DataCollator):
    method collate_batch (line 91) | def collate_batch(self, examples: List[torch.Tensor]) -> Dict[str, tor...
    method _tensorize_batch (line 99) | def _tensorize_batch(self, examples: List[torch.Tensor]) -> torch.Tensor:
    method mask_tokens (line 112) | def mask_tokens(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, tor...
    method mask_tokens2 (line 148) | def mask_tokens2(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, to...
    method mask_tokens3 (line 192) | def mask_tokens3(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, to...
    method mask_tokens4 (line 259) | def mask_tokens4(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, to...
    method mask_tokens5 (line 342) | def mask_tokens5(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, to...
    method mask_tokens6 (line 427) | def mask_tokens6(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, to...
    method mask_tokens7 (line 507) | def mask_tokens7(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, to...

FILE: code/bert-base-count3/pretrain/transformers1/data/datasets/glue.py
  class GlueDataTrainingArguments (line 23) | class GlueDataTrainingArguments:
    method __post_init__ (line 47) | def __post_init__(self):
  class Split (line 51) | class Split(Enum):
  class GlueDataset (line 57) | class GlueDataset(Dataset):
    method __init__ (line 67) | def __init__(
    method __len__ (line 135) | def __len__(self):
    method __getitem__ (line 138) | def __getitem__(self, i) -> InputFeatures:
    method get_labels (line 141) | def get_labels(self):

FILE: code/bert-base-count3/pretrain/transformers1/data/datasets/language_modeling.py
  class TextDataset (line 16) | class TextDataset(Dataset):
    method __init__ (line 22) | def __init__(
    method __len__ (line 71) | def __len__(self):
    method __getitem__ (line 74) | def __getitem__(self, i) -> torch.Tensor:
  class LineByLineTextDataset (line 78) | class LineByLineTextDataset(Dataset):
    method __init__ (line 84) | def __init__(self, tokenizer: PreTrainedTokenizer, file_path: str, blo...
    method __len__ (line 97) | def __len__(self):
    method __getitem__ (line 100) | def __getitem__(self, i) -> torch.Tensor:

FILE: code/bert-base-count3/pretrain/transformers1/data/metrics/__init__.py
  function is_sklearn_available (line 26) | def is_sklearn_available():
  function simple_accuracy (line 32) | def simple_accuracy(preds, labels):
  function acc_and_f1 (line 35) | def acc_and_f1(preds, labels):
  function pearson_and_spearman (line 44) | def pearson_and_spearman(preds, labels):
  function glue_compute_metrics (line 53) | def glue_compute_metrics(task_name, preds, labels):
  function xnli_compute_metrics (line 80) | def xnli_compute_metrics(task_name, preds, labels):

FILE: code/bert-base-count3/pretrain/transformers1/data/metrics/squad_metrics.py
  function normalize_answer (line 24) | def normalize_answer(s):
  function get_tokens (line 44) | def get_tokens(s):
  function compute_exact (line 50) | def compute_exact(a_gold, a_pred):
  function compute_f1 (line 54) | def compute_f1(a_gold, a_pred):
  function get_raw_scores (line 70) | def get_raw_scores(examples, preds):
  function apply_no_ans_threshold (line 96) | def apply_no_ans_threshold(scores, na_probs, qid_to_has_ans, na_prob_thr...
  function make_eval_dict (line 107) | def make_eval_dict(exact_scores, f1_scores, qid_list=None):
  function merge_eval (line 128) | def merge_eval(main_eval, new_eval, prefix):
  function find_best_thresh_v2 (line 133) | def find_best_thresh_v2(preds, scores, na_probs, qid_to_has_ans):
  function find_all_best_thresh_v2 (line 167) | def find_all_best_thresh_v2(main_eval, preds, exact_raw, f1_raw, na_prob...
  function find_best_thresh (line 178) | def find_best_thresh(preds, scores, na_probs, qid_to_has_ans):
  function find_all_best_thresh (line 201) | def find_all_best_thresh(main_eval, preds, exact_raw, f1_raw, na_probs, ...
  function squad_evaluate (line 211) | def squad_evaluate(examples, preds, no_answer_probs=None, no_answer_prob...
  function get_final_text (line 242) | def get_final_text(pred_text, orig_text, do_lower_case, verbose_logging=...
  function _get_best_indexes (line 336) | def _get_best_indexes(logits, n_best_size):
  function _compute_softmax (line 348) | def _compute_softmax(scores):
  function compute_predictions_logits (line 371) | def compute_predictions_logits(
  function compute_predictions_log_probs (line 576) | def compute_predictions_log_probs(

FILE: code/bert-base-count3/pretrain/transformers1/data/processors/glue.py
  function glue_convert_examples_to_features (line 34) | def glue_convert_examples_to_features(
  function _tf_glue_convert_examples_to_features (line 70) | def _tf_glue_convert_examples_to_features(
  function _glue_convert_examples_to_features (line 107) | def _glue_convert_examples_to_features(
  class OutputMode (line 159) | class OutputMode(Enum):
  class MrpcProcessor (line 164) | class MrpcProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 167) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 176) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 181) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 185) | def get_test_examples(self, data_dir):
    method get_labels (line 189) | def get_labels(self):
    method _create_examples (line 193) | def _create_examples(self, lines, set_type):
  class MnliProcessor (line 207) | class MnliProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 210) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 219) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 223) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 227) | def get_test_examples(self, data_dir):
    method get_labels (line 231) | def get_labels(self):
    method _create_examples (line 235) | def _create_examples(self, lines, set_type):
  class MnliMismatchedProcessor (line 249) | class MnliMismatchedProcessor(MnliProcessor):
    method get_dev_examples (line 252) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 256) | def get_test_examples(self, data_dir):
  class ColaProcessor (line 261) | class ColaProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 264) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 273) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 277) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 281) | def get_test_examples(self, data_dir):
    method get_labels (line 285) | def get_labels(self):
    method _create_examples (line 289) | def _create_examples(self, lines, set_type):
  class Sst2Processor (line 304) | class Sst2Processor(DataProcessor):
    method get_example_from_tensor_dict (line 307) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 316) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 320) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 324) | def get_test_examples(self, data_dir):
    method get_labels (line 328) | def get_labels(self):
    method _create_examples (line 332) | def _create_examples(self, lines, set_type):
  class StsbProcessor (line 346) | class StsbProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 349) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 358) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 362) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 366) | def get_test_examples(self, data_dir):
    method get_labels (line 370) | def get_labels(self):
    method _create_examples (line 374) | def _create_examples(self, lines, set_type):
  class QqpProcessor (line 388) | class QqpProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 391) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 400) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 404) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 408) | def get_test_examples(self, data_dir):
    method get_labels (line 412) | def get_labels(self):
    method _create_examples (line 416) | def _create_examples(self, lines, set_type):
  class QnliProcessor (line 436) | class QnliProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 439) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 448) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 452) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 456) | def get_test_examples(self, data_dir):
    method get_labels (line 460) | def get_labels(self):
    method _create_examples (line 464) | def _create_examples(self, lines, set_type):
  class RteProcessor (line 478) | class RteProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 481) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 490) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 494) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 498) | def get_test_examples(self, data_dir):
    method get_labels (line 502) | def get_labels(self):
    method _create_examples (line 506) | def _create_examples(self, lines, set_type):
  class WnliProcessor (line 520) | class WnliProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 523) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 532) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 536) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 540) | def get_test_examples(self, data_dir):
    method get_labels (line 544) | def get_labels(self):
    method _create_examples (line 548) | def _create_examples(self, lines, set_type):

FILE: code/bert-base-count3/pretrain/transformers1/data/processors/squad.py
  function _improve_answer_span (line 25) | def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer, ...
  function _check_is_max_context (line 38) | def _check_is_max_context(doc_spans, cur_span_index, position):
  function _new_check_is_max_context (line 58) | def _new_check_is_max_context(doc_spans, cur_span_index, position):
  function _is_whitespace (line 80) | def _is_whitespace(c):
  function squad_convert_example_to_features (line 86) | def squad_convert_example_to_features(example, max_seq_length, doc_strid...
  function squad_convert_example_to_features_init (line 264) | def squad_convert_example_to_features_init(tokenizer_for_convert):
  function squad_convert_examples_to_features (line 269) | def squad_convert_examples_to_features(
  class SquadProcessor (line 445) | class SquadProcessor(DataProcessor):
    method _get_example_from_tensor_dict (line 454) | def _get_example_from_tensor_dict(self, tensor_dict, evaluate=False):
    method get_examples_from_dataset (line 478) | def get_examples_from_dataset(self, dataset, evaluate=False):
    method get_train_examples (line 509) | def get_train_examples(self, data_dir, filename=None):
    method get_dev_examples (line 531) | def get_dev_examples(self, data_dir, filename=None):
    method _create_examples (line 552) | def _create_examples(self, input_data, set_type):
  class SquadV1Processor (line 594) | class SquadV1Processor(SquadProcessor):
  class SquadV2Processor (line 599) | class SquadV2Processor(SquadProcessor):
  class SquadExample (line 604) | class SquadExample(object):
    method __init__ (line 619) | def __init__(
  class SquadFeatures (line 667) | class SquadFeatures(object):
    method __init__ (line 692) | def __init__(
  class SquadResult (line 729) | class SquadResult(object):
    method __init__ (line 739) | def __init__(self, unique_id, start_logits, end_logits, start_top_inde...

FILE: code/bert-base-count3/pretrain/transformers1/data/processors/utils.py
  class InputExample (line 31) | class InputExample:
    method to_json_string (line 50) | def to_json_string(self):
  class InputFeatures (line 56) | class InputFeatures:
    method to_json_string (line 77) | def to_json_string(self):
  class DataProcessor (line 82) | class DataProcessor:
    method get_example_from_tensor_dict (line 85) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 93) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 97) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 101) | def get_test_examples(self, data_dir):
    method get_labels (line 105) | def get_labels(self):
    method tfds_map (line 109) | def tfds_map(self, example):
    method _read_tsv (line 117) | def _read_tsv(cls, input_file, quotechar=None):
  class SingleSentenceClassificationProcessor (line 123) | class SingleSentenceClassificationProcessor(DataProcessor):
    method __init__ (line 126) | def __init__(self, labels=None, examples=None, mode="classification", ...
    method __len__ (line 132) | def __len__(self):
    method __getitem__ (line 135) | def __getitem__(self, idx):
    method create_from_csv (line 141) | def create_from_csv(
    method create_from_examples (line 158) | def create_from_examples(cls, texts_or_text_and_labels, labels=None, *...
    method add_examples_from_csv (line 163) | def add_examples_from_csv(
    method add_examples (line 193) | def add_examples(
    method get_features (line 226) | def get_features(

FILE: code/bert-base-count3/pretrain/transformers1/data/processors/xnli.py
  class XnliProcessor (line 28) | class XnliProcessor(DataProcessor):
    method __init__ (line 32) | def __init__(self, language, train_language=None):
    method get_train_examples (line 36) | def get_train_examples(self, data_dir):
    method get_test_examples (line 52) | def get_test_examples(self, data_dir):
    method get_labels (line 70) | def get_labels(self):

FILE: code/bert-base-count3/pretrain/transformers1/file_utils.py
  function is_torch_available (line 93) | def is_torch_available():
  function is_tf_available (line 97) | def is_tf_available():
  function add_start_docstrings (line 101) | def add_start_docstrings(*docstr):
  function add_start_docstrings_to_callable (line 109) | def add_start_docstrings_to_callable(*docstr):
  function add_end_docstrings (line 127) | def add_end_docstrings(*docstr):
  function is_remote_url (line 135) | def is_remote_url(url_or_filename):
  function hf_bucket_url (line 140) | def hf_bucket_url(model_id: str, filename: str, use_cdn=True) -> str:
  function url_to_filename (line 164) | def url_to_filename(url, etag=None):
  function filename_to_url (line 188) | def filename_to_url(filename, cache_dir=None):
  function cached_path (line 214) | def cached_path(
  function http_get (line 306) | def http_get(url, temp_file, proxies=None, resume_size=0, user_agent=None):
  function get_from_cache (line 339) | def get_from_cache(
  class cached_property (line 453) | class cached_property(property):
    method __get__ (line 462) | def __get__(self, obj, objtype=None):
  function torch_required (line 476) | def torch_required(func):
  function tf_required (line 488) | def tf_required(func):

FILE: code/bert-base-count3/pretrain/transformers1/hf_api.py
  class S3Obj (line 29) | class S3Obj:
    method __init__ (line 34) | def __init__(self, filename: str, LastModified: str, ETag: str, Size: ...
  class PresignedUrl (line 41) | class PresignedUrl:
    method __init__ (line 42) | def __init__(self, write: str, access: str, type: str, **kwargs):
  class S3Object (line 48) | class S3Object:
    method __init__ (line 53) | def __init__(
  class ModelInfo (line 69) | class ModelInfo:
    method __init__ (line 74) | def __init__(
  class HfApi (line 92) | class HfApi:
    method __init__ (line 93) | def __init__(self, endpoint=None):
    method login (line 96) | def login(self, username: str, password: str) -> str:
    method whoami (line 112) | def whoami(self, token: str) -> Tuple[str, List[str]]:
    method logout (line 122) | def logout(self, token: str) -> None:
    method presign (line 130) | def presign(self, token: str, filename: str, organization: Optional[st...
    method presign_and_upload (line 144) | def presign_and_upload(self, token: str, filename: str, filepath: str,...
    method list_objs (line 166) | def list_objs(self, token: str, organization: Optional[str] = None) ->...
    method delete_obj (line 177) | def delete_obj(self, token: str, filename: str, organization: Optional...
    method model_list (line 189) | def model_list(self) -> List[ModelInfo]:
  class TqdmProgressFileReader (line 200) | class TqdmProgressFileReader:
    method __init__ (line 209) | def __init__(self, f: io.BufferedReader):
    method _read (line 216) | def _read(self, n=-1):
    method close (line 220) | def close(self):
  class HfFolder (line 224) | class HfFolder:
    method save_token (line 228) | def save_token(cls, token):
    method get_token (line 237) | def get_token(cls):
    method delete_token (line 248) | def delete_token(cls):

FILE: code/bert-base-count3/pretrain/transformers1/hf_argparser.py
  class HfArgumentParser (line 14) | class HfArgumentParser(ArgumentParser):
    method __init__ (line 26) | def __init__(self, dataclass_types: Union[DataClassType, Iterable[Data...
    method _add_dataclass_arguments (line 42) | def _add_dataclass_arguments(self, dtype: DataClassType):
    method parse_args_into_dataclasses (line 88) | def parse_args_into_dataclasses(
    method parse_json_file (line 146) | def parse_json_file(self, json_file: str) -> Tuple[DataClass, ...]:

FILE: code/bert-base-count3/pretrain/transformers1/modelcard.py
  class ModelCard (line 38) | class ModelCard:
    method __init__ (line 55) | def __init__(self, **kwargs):
    method save_pretrained (line 75) | def save_pretrained(self, save_directory_or_file):
    method from_pretrained (line 88) | def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
    method from_dict (line 186) | def from_dict(cls, json_object):
    method from_json_file (line 191) | def from_json_file(cls, json_file):
    method __eq__ (line 198) | def __eq__(self, other):
    method __repr__ (line 201) | def __repr__(self):
    method to_dict (line 204) | def to_dict(self):
    method to_json_string (line 209) | def to_json_string(self):
    method to_json_file (line 213) | def to_json_file(self, json_file_path):

FILE: code/bert-base-count3/pretrain/transformers1/modeling_albert.py
  function load_tf_weights_in_albert (line 47) | def load_tf_weights_in_albert(model, config, tf_checkpoint_path):
  class AlbertEmbeddings (line 171) | class AlbertEmbeddings(BertEmbeddings):
    method __init__ (line 176) | def __init__(self, config):
  class AlbertAttention (line 185) | class AlbertAttention(BertSelfAttention):
    method __init__ (line 186) | def __init__(self, config):
    method prune_heads (line 198) | def prune_heads(self, heads):
    method forward (line 221) | def forward(self, input_ids, attention_mask=None, head_mask=None):
  class AlbertLayer (line 266) | class AlbertLayer(nn.Module):
    method __init__ (line 267) | def __init__(self, config):
    method forward (line 277) | def forward(self, hidden_states, attention_mask=None, head_mask=None):
  class AlbertLayerGroup (line 287) | class AlbertLayerGroup(nn.Module):
    method __init__ (line 288) | def __init__(self, config):
    method forward (line 295) | def forward(self, hidden_states, attention_mask=None, head_mask=None):
  class AlbertTransformer (line 317) | class AlbertTransformer(nn.Module):
    method __init__ (line 318) | def __init__(self, config):
    method forward (line 327) | def forward(self, hidden_states, attention_mask=None, head_mask=None):
  class AlbertPreTrainedModel (line 363) | class AlbertPreTrainedModel(PreTrainedModel):
    method _init_weights (line 371) | def _init_weights(self, module):
  class AlbertModel (line 439) | class AlbertModel(AlbertPreTrainedModel):
    method __init__ (line 445) | def __init__(self, config):
    method get_input_embeddings (line 456) | def get_input_embeddings(self):
    method set_input_embeddings (line 459) | def set_input_embeddings(self, value):
    method _resize_token_embeddings (line 462) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 468) | def _prune_heads(self, heads_to_prune):
    method forward (line 487) | def forward(
  class AlbertForPreTraining (line 576) | class AlbertForPreTraining(AlbertPreTrainedModel):
    method __init__ (line 577) | def __init__(self, config):
    method tie_weights (line 587) | def tie_weights(self):
    method get_output_embeddings (line 590) | def get_output_embeddings(self):
    method forward (line 594) | def forward(
  class AlbertMLMHead (line 680) | class AlbertMLMHead(nn.Module):
    method __init__ (line 681) | def __init__(self, config):
    method forward (line 693) | def forward(self, hidden_states):
  class AlbertSOPHead (line 704) | class AlbertSOPHead(nn.Module):
    method __init__ (line 705) | def __init__(self, config):
    method forward (line 711) | def forward(self, pooled_output):
  class AlbertForMaskedLM (line 720) | class AlbertForMaskedLM(AlbertPreTrainedModel):
    method __init__ (line 721) | def __init__(self, config):
    method tie_weights (line 730) | def tie_weights(self):
    method get_output_embeddings (line 733) | def get_output_embeddings(self):
    method forward (line 737) | def forward(
  class AlbertForSequenceClassification (line 810) | class AlbertForSequenceClassification(AlbertPreTrainedModel):
    method __init__ (line 811) | def __init__(self, config):
    method forward (line 822) | def forward(
  class AlbertForTokenClassification (line 905) | class AlbertForTokenClassification(AlbertPreTrainedModel):
    method __init__ (line 906) | def __init__(self, config):
    method forward (line 917) | def forward(
  class AlbertForQuestionAnswering (line 1002) | class AlbertForQuestionAnswering(AlbertPreTrainedModel):
    method __init__ (line 1003) | def __init__(self, config):
    method forward (line 1013) | def forward(

FILE: code/bert-base-count3/pretrain/transformers1/modeling_auto.py
  class AutoModel (line 269) | class AutoModel:
    method __init__ (line 279) | def __init__(self):
    method from_config (line 287) | def from_config(cls, config):
    method from_pretrained (line 329) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class AutoModelForPreTraining (line 424) | class AutoModelForPreTraining:
    method __init__ (line 433) | def __init__(self):
    method from_config (line 441) | def from_config(cls, config):
    method from_pretrained (line 483) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class AutoModelWithLMHead (line 570) | class AutoModelWithLMHead:
    method __init__ (line 580) | def __init__(self):
    method from_config (line 588) | def from_config(cls, config):
    method from_pretrained (line 630) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class AutoModelForSequenceClassification (line 718) | class AutoModelForSequenceClassification:
    method __init__ (line 728) | def __init__(self):
    method from_config (line 736) | def from_config(cls, config):
    method from_pretrained (line 778) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class AutoModelForQuestionAnswering (line 867) | class AutoModelForQuestionAnswering:
    method __init__ (line 877) | def __init__(self):
    method from_config (line 885) | def from_config(cls, config):
    method from_pretrained (line 924) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class AutoModelForTokenClassification (line 1009) | class AutoModelForTokenClassification:
    method __init__ (line 1019) | def __init__(self):
    method from_config (line 1027) | def from_config(cls, config):
    method from_pretrained (line 1069) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class AutoModelForMultipleChoice (line 1156) | class AutoModelForMultipleChoice:
    method __init__ (line 1166) | def __init__(self):
    method from_config (line 1174) | def from_config(cls, config):
    method from_pretrained (line 1189) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...

FILE: code/bert-base-count3/pretrain/transformers1/modeling_bart.py
  function invert_mask (line 94) | def invert_mask(attention_mask):
  function _prepare_bart_decoder_inputs (line 99) | def _prepare_bart_decoder_inputs(
  class PretrainedBartModel (line 120) | class PretrainedBartModel(PreTrainedModel):
    method _init_weights (line 124) | def _init_weights(self, module):
    method dummy_inputs (line 138) | def dummy_inputs(self):
  function _make_linear_from_emb (line 148) | def _make_linear_from_emb(emb):
  function _check_shapes (line 156) | def _check_shapes(shape_1, shape2):
  function shift_tokens_right (line 161) | def shift_tokens_right(input_ids, pad_token_id):
  function make_padding_mask (line 170) | def make_padding_mask(input_ids, padding_idx=1):
  class EncoderLayer (line 181) | class EncoderLayer(nn.Module):
    method __init__ (line 182) | def __init__(self, config: BartConfig):
    method forward (line 198) | def forward(self, x, encoder_padding_mask):
  class BartEncoder (line 234) | class BartEncoder(nn.Module):
    method __init__ (line 243) | def __init__(self, config: BartConfig, embed_tokens):
    method forward (line 270) | def forward(
  class DecoderLayer (line 327) | class DecoderLayer(nn.Module):
    method __init__ (line 328) | def __init__(self, config: BartConfig):
    method forward (line 352) | def forward(
  class BartDecoder (line 416) | class BartDecoder(nn.Module):
    method __init__ (line 425) | def __init__(self, config: BartConfig, embed_tokens: nn.Embedding):
    method forward (line 449) | def forward(
  function _reorder_buffer (line 542) | def _reorder_buffer(attn_cache, new_order):
  class SelfAttention (line 549) | class SelfAttention(nn.Module):
    method __init__ (line 552) | def __init__(
    method _shape (line 575) | def _shape(self, tensor, dim_0, bsz):
    method forward (line 578) | def forward(
    method _use_saved_state (line 663) | def _use_saved_state(self, k, v, saved_state, key_padding_mask, static...
    method _cat_prev_key_padding_mask (line 691) | def _cat_prev_key_padding_mask(
  class BartClassificationHead (line 718) | class BartClassificationHead(nn.Module):
    method __init__ (line 723) | def __init__(
    method forward (line 731) | def forward(self, x):
  class LearnedPositionalEmbedding (line 740) | class LearnedPositionalEmbedding(nn.Embedding):
    method __init__ (line 748) | def __init__(
    method forward (line 757) | def forward(self, input, use_cache=False):
  function LayerNorm (line 767) | def LayerNorm(normalized_shape, eps=1e-5, elementwise_affine=True):
  function fill_with_neg_inf (line 778) | def fill_with_neg_inf(t):
  function _filter_out_falsey_values (line 783) | def _filter_out_falsey_values(tup) -> Tuple:
  function _get_shape (line 789) | def _get_shape(t):
  class BartModel (line 796) | class BartModel(PretrainedBartModel):
    method __init__ (line 797) | def __init__(self, config: BartConfig):
    method forward (line 811) | def forward(
    method get_input_embeddings (line 854) | def get_input_embeddings(self):
    method set_input_embeddings (line 857) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 862) | def get_output_embeddings(self):
  class BartForConditionalGeneration (line 870) | class BartForConditionalGeneration(PretrainedBartModel):
    method __init__ (line 873) | def __init__(self, config: BartConfig):
    method resize_token_embeddings (line 879) | def resize_token_embeddings(self, new_num_tokens: int) -> nn.Embedding:
    method _resize_final_logits_bias (line 886) | def _resize_final_logits_bias(self, new_num_tokens: int, old_num_token...
    method forward (line 895) | def forward(
    method prepare_inputs_for_generation (line 967) | def prepare_inputs_for_generation(self, decoder_input_ids, past, atten...
    method prepare_logits_for_generation (line 984) | def prepare_logits_for_generation(self, logits, cur_len, max_length):
    method _force_token_ids_generation (line 991) | def _force_token_ids_generation(self, scores, token_ids) -> None:
    method _reorder_cache (line 1004) | def _reorder_cache(past, beam_idx):
    method get_encoder (line 1020) | def get_encoder(self):
    method get_output_embeddings (line 1023) | def get_output_embeddings(self):
  class BartForSequenceClassification (line 1031) | class BartForSequenceClassification(PretrainedBartModel):
    method __init__ (line 1032) | def __init__(self, config: BartConfig, **kwargs):
    method forward (line 1042) | def forward(
  class SinusoidalPositionalEmbedding (line 1109) | class SinusoidalPositionalEmbedding(nn.Embedding):
    method __init__ (line 1112) | def __init__(self, num_positions, embedding_dim, padding_idx=None):
    method _init_weight (line 1119) | def _init_weight(out: nn.Parameter):
    method forward (line 1134) | def forward(self, input_ids, use_cache=False):

FILE: code/bert-base-count3/pretrain/transformers1/modeling_beam_search.py
  class TransformerBeamSearch (line 29) | class TransformerBeamSearch(nn.Module):
    method __init__ (line 30) | def __init__(
    method step (line 80) | def step(self, log_probabilities):
    method forward (line 177) | def forward(self, encoder_input_ids, **kwargs):
    method remove_repeating_trigrams (line 224) | def remove_repeating_trigrams(self, log_probabilities, _B):
    method enforce_min_length (line 233) | def enforce_min_length(self):
    method enforce_max_length (line 237) | def enforce_max_length(self):
    method length_penalty (line 241) | def length_penalty(self):
  function tile (line 245) | def tile(x, count, dim=0):

FILE: code/bert-base-count3/pretrain/transformers1/modeling_bert.py
  function load_tf_weights_in_bert (line 62) | def load_tf_weights_in_bert(model, config, tf_checkpoint_path):
  function mish (line 134) | def mish(x):
  class BertEmbeddings (line 144) | class BertEmbeddings(nn.Module):
    method __init__ (line 148) | def __init__(self, config):
    method forward (line 159) | def forward(self, input_ids=None, token_type_ids=None, position_ids=No...
  class BertSelfAttention (line 184) | class BertSelfAttention(nn.Module):
    method __init__ (line 185) | def __init__(self, config):
    method transpose_for_scores (line 204) | def transpose_for_scores(self, x):
    method forward (line 209) | def forward(
  class BertSelfOutput (line 262) | class BertSelfOutput(nn.Module):
    method __init__ (line 263) | def __init__(self, config):
    method forward (line 269) | def forward(self, hidden_states, input_tensor):
  class BertAttention (line 276) | class BertAttention(nn.Module):
    method __init__ (line 277) | def __init__(self, config):
    method prune_heads (line 283) | def prune_heads(self, heads):
    method forward (line 306) | def forward(
  class BertIntermediate (line 322) | class BertIntermediate(nn.Module):
    method __init__ (line 323) | def __init__(self, config):
    method forward (line 331) | def forward(self, hidden_states):
  class BertOutput (line 337) | class BertOutput(nn.Module):
    method __init__ (line 338) | def __init__(self, config):
    method forward (line 344) | def forward(self, hidden_states, input_tensor):
  class BertLayer (line 351) | class BertLayer(nn.Module):
    method __init__ (line 352) | def __init__(self, config):
    method forward (line 361) | def forward(
  class BertEncoder (line 386) | class BertEncoder(nn.Module):
    method __init__ (line 387) | def __init__(self, config):
    method forward (line 393) | def forward(
  class BertPooler (line 427) | class BertPooler(nn.Module):
    method __init__ (line 428) | def __init__(self, config):
    method forward (line 433) | def forward(self, hidden_states):
  class BertPredictionHeadTransform (line 442) | class BertPredictionHeadTransform(nn.Module):
    method __init__ (line 443) | def __init__(self, config):
    method forward (line 452) | def forward(self, hidden_states):
  class BertLMPredictionHead (line 459) | class BertLMPredictionHead(nn.Module):
    method __init__ (line 460) | def __init__(self, config):
    method forward (line 473) | def forward(self, hidden_states):
  class BertOnlyMLMHead (line 479) | class BertOnlyMLMHead(nn.Module):
    method __init__ (line 480) | def __init__(self, config):
    method forward (line 484) | def forward(self, sequence_output):
  class BertOnlyNSPHead (line 489) | class BertOnlyNSPHead(nn.Module):
    method __init__ (line 490) | def __init__(self, config):
    method forward (line 494) | def forward(self, pooled_output):
  class BertPreTrainingHeads (line 499) | class BertPreTrainingHeads(nn.Module):
    method __init__ (line 500) | def __init__(self, config):
    method forward (line 505) | def forward(self, sequence_output, pooled_output):
  class BertPreTrainedModel (line 511) | class BertPreTrainedModel(PreTrainedModel):
    method _init_weights (line 520) | def _init_weights(self, module):
  class BertModel (line 594) | class BertModel(BertPreTrainedModel):
    method __init__ (line 611) | def __init__(self, config):
    method get_input_embeddings (line 621) | def get_input_embeddings(self):
    method set_input_embeddings (line 624) | def set_input_embeddings(self, value):
    method _prune_heads (line 627) | def _prune_heads(self, heads_to_prune):
    method forward (line 636) | def forward(
  class BertForPreTraining (line 750) | class BertForPreTraining(BertPreTrainedModel):
    method __init__ (line 751) | def __init__(self, config):
    method get_output_embeddings (line 759) | def get_output_embeddings(self):
    method forward (line 763) | def forward(
  class BertForMaskedLM (line 850) | class BertForMaskedLM(BertPreTrainedModel):
    method __init__ (line 851) | def __init__(self, config):
    method get_output_embeddings (line 859) | def get_output_embeddings(self):
    method forward (line 863) | def forward(
    method prepare_inputs_for_generation (line 960) | def prepare_inputs_for_generation(self, input_ids, attention_mask=None...
  class BertForNextSentencePrediction (line 986) | class BertForNextSentencePrediction(BertPreTrainedModel):
    method __init__ (line 987) | def __init__(self, config):
    method forward (line 996) | def forward(
  class BertForSequenceClassification (line 1074) | class BertForSequenceClassification(BertPreTrainedModel):
    method __init__ (line 1075) | def __init__(self, config):
    method forward (line 1086) | def forward(
  class BertForMultipleChoice (line 1171) | class BertForMultipleChoice(BertPreTrainedModel):
    method __init__ (line 1172) | def __init__(self, config):
    method forward (line 1182) | def forward(
  class BertForTokenClassification (line 1274) | class BertForTokenClassification(BertPreTrainedModel):
    method __init__ (line 1275) | def __init__(self, config):
    method forward (line 1286) | def forward(
  class BertForQuestionAnswering (line 1372) | class BertForQuestionAnswering(BertPreTrainedModel):
    method __init__ (line 1373) | def __init__(self, config):
    method forward (line 1383) | def forward(

FILE: code/bert-base-count3/pretrain/transformers1/modeling_camembert.py
  class CamembertModel (line 59) | class CamembertModel(RobertaModel):
  class CamembertForMaskedLM (line 71) | class CamembertForMaskedLM(RobertaForMaskedLM):
  class CamembertForSequenceClassification (line 85) | class CamembertForSequenceClassification(RobertaForSequenceClassification):
  class CamembertForMultipleChoice (line 99) | class CamembertForMultipleChoice(RobertaForMultipleChoice):
  class CamembertForTokenClassification (line 113) | class CamembertForTokenClassification(RobertaForTokenClassification):
  class CamembertForQuestionAnswering (line 127) | class CamembertForQuestionAnswering(RobertaForQuestionAnswering):

FILE: code/bert-base-count3/pretrain/transformers1/modeling_ctrl.py
  function angle_defn (line 39) | def angle_defn(pos, i, d_model_size):
  function positional_encoding (line 44) | def positional_encoding(position, d_model_size, dtype):
  function scaled_dot_product_attention (line 59) | def scaled_dot_product_attention(q, k, v, mask, attention_mask=None, hea...
  class MultiHeadAttention (line 85) | class MultiHeadAttention(torch.nn.Module):
    method __init__ (line 86) | def __init__(self, d_model_size, num_heads, output_attentions=False):
    method split_into_heads (line 100) | def split_into_heads(self, x, batch_size):
    method forward (line 104) | def forward(self, v, k, q, mask, layer_past=None, attention_mask=None,...
  function point_wise_feed_forward_network (line 136) | def point_wise_feed_forward_network(d_model_size, dff):
  class EncoderLayer (line 140) | class EncoderLayer(torch.nn.Module):
    method __init__ (line 141) | def __init__(self, d_model_size, num_heads, dff, rate=0.1, output_atte...
    method forward (line 153) | def forward(self, x, mask, layer_past=None, attention_mask=None, head_...
  class CTRLPreTrainedModel (line 178) | class CTRLPreTrainedModel(PreTrainedModel):
    method _init_weights (line 186) | def _init_weights(self, module):
  class CTRLModel (line 263) | class CTRLModel(CTRLPreTrainedModel):
    method __init__ (line 264) | def __init__(self, config):
    method get_input_embeddings (line 287) | def get_input_embeddings(self):
    method set_input_embeddings (line 290) | def set_input_embeddings(self, new_embeddings):
    method _prune_heads (line 293) | def _prune_heads(self, heads_to_prune):
    method forward (line 301) | def forward(
  class CTRLLMHeadModel (line 458) | class CTRLLMHeadModel(CTRLPreTrainedModel):
    method __init__ (line 459) | def __init__(self, config):
    method get_output_embeddings (line 466) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 469) | def prepare_inputs_for_generation(self, input_ids, past, **kwargs):
    method forward (line 477) | def forward(

FILE: code/bert-base-count3/pretrain/transformers1/modeling_distilbert.py
  function create_sinusoidal_embeddings (line 54) | def create_sinusoidal_embeddings(n_pos, dim, out):
  class Embeddings (line 62) | class Embeddings(nn.Module):
    method __init__ (line 63) | def __init__(self, config):
    method forward (line 75) | def forward(self, input_ids):
  class MultiHeadSelfAttention (line 100) | class MultiHeadSelfAttention(nn.Module):
    method __init__ (line 101) | def __init__(self, config):
    method prune_heads (line 118) | def prune_heads(self, heads):
    method forward (line 139) | def forward(self, query, key, value, mask, head_mask=None):
  class FFN (line 198) | class FFN(nn.Module):
    method __init__ (line 199) | def __init__(self, config):
    method forward (line 209) | def forward(self, input):
  class TransformerBlock (line 217) | class TransformerBlock(nn.Module):
    method __init__ (line 218) | def __init__(self, config):
    method forward (line 231) | def forward(self, x, attn_mask=None, head_mask=None):
  class Transformer (line 264) | class Transformer(nn.Module):
    method __init__ (line 265) | def __init__(self, config):
    method forward (line 274) | def forward(self, x, attn_mask=None, head_mask=None):
  class DistilBertPreTrainedModel (line 325) | class DistilBertPreTrainedModel(PreTrainedModel):
    method _init_weights (line 334) | def _init_weights(self, module):
  class DistilBertModel (line 392) | class DistilBertModel(DistilBertPreTrainedModel):
    method __init__ (line 393) | def __init__(self, config):
    method get_input_embeddings (line 401) | def get_input_embeddings(self):
    method set_input_embeddings (line 404) | def set_input_embeddings(self, new_embeddings):
    method _prune_heads (line 407) | def _prune_heads(self, heads_to_prune):
    method forward (line 416) | def forward(self, input_ids=None, attention_mask=None, head_mask=None,...
  class DistilBertForMaskedLM (line 477) | class DistilBertForMaskedLM(DistilBertPreTrainedModel):
    method __init__ (line 478) | def __init__(self, config):
    method get_output_embeddings (line 492) | def get_output_embeddings(self):
    method forward (line 496) | def forward(self, input_ids=None, attention_mask=None, head_mask=None,...
  class DistilBertForSequenceClassification (line 558) | class DistilBertForSequenceClassification(DistilBertPreTrainedModel):
    method __init__ (line 559) | def __init__(self, config):
    method forward (line 571) | def forward(self, input_ids=None, attention_mask=None, head_mask=None,...
  class DistilBertForQuestionAnswering (line 638) | class DistilBertForQuestionAnswering(DistilBertPreTrainedModel):
    method __init__ (line 639) | def __init__(self, config):
    method forward (line 650) | def forward(
  class DistilBertForTokenClassification (line 740) | class DistilBertForTokenClassification(DistilBertPreTrainedModel):
    method __init__ (line 741) | def __init__(self, config):
    method forward (line 752) | def forward(self, input_ids=None, attention_mask=None, head_mask=None,...

FILE: code/bert-base-count3/pretrain/transformers1/modeling_electra.py
  function load_tf_weights_in_electra (line 28) | def load_tf_weights_in_electra(model, config, tf_checkpoint_path, discri...
  class ElectraEmbeddings (line 109) | class ElectraEmbeddings(BertEmbeddings):
    method __init__ (line 112) | def __init__(self, config):
  class ElectraDiscriminatorPredictions (line 123) | class ElectraDiscriminatorPredictions(nn.Module):
    method __init__ (line 126) | def __init__(self, config):
    method forward (line 133) | def forward(self, discriminator_hidden_states, attention_mask):
  class ElectraGeneratorPredictions (line 141) | class ElectraGeneratorPredictions(nn.Module):
    method __init__ (line 144) | def __init__(self, config):
    method forward (line 150) | def forward(self, generator_hidden_states):
  class ElectraPreTrainedModel (line 158) | class ElectraPreTrainedModel(BertPreTrainedModel):
  class ElectraModel (line 233) | class ElectraModel(ElectraPreTrainedModel):
    method __init__ (line 237) | def __init__(self, config):
    method get_input_embeddings (line 248) | def get_input_embeddings(self):
    method set_input_embeddings (line 251) | def set_input_embeddings(self, value):
    method _prune_heads (line 254) | def _prune_heads(self, heads_to_prune):
    method forward (line 263) | def forward(
  class ElectraClassificationHead (line 334) | class ElectraClassificationHead(nn.Module):
    method __init__ (line 337) | def __init__(self, config):
    method forward (line 343) | def forward(self, features, **kwargs):
  class ElectraForSequenceClassification (line 358) | class ElectraForSequenceClassification(ElectraPreTrainedModel):
    method __init__ (line 359) | def __init__(self, config):
    method forward (line 368) | def forward(
  class ElectraForPreTraining (line 448) | class ElectraForPreTraining(ElectraPreTrainedModel):
    method __init__ (line 449) | def __init__(self, config):
    method forward (line 457) | def forward(
  class ElectraForMaskedLM (line 542) | class ElectraForMaskedLM(ElectraPreTrainedModel):
    method __init__ (line 543) | def __init__(self, config):
    method get_output_embeddings (line 552) | def get_output_embeddings(self):
    method forward (line 556) | def forward(
  class ElectraForTokenClassification (line 634) | class ElectraForTokenClassification(ElectraPreTrainedModel):
    method __init__ (line 635) | def __init__(self, config):
    method forward (line 644) | def forward(

FILE: code/bert-base-count3/pretrain/transformers1/modeling_encoder_decoder.py
  class EncoderDecoderModel (line 29) | class EncoderDecoderModel(PreTrainedModel):
    method __init__ (line 40) | def __init__(
    method tie_weights (line 74) | def tie_weights(self):
    method get_encoder (line 78) | def get_encoder(self):
    method get_decoder (line 81) | def get_decoder(self):
    method get_input_embeddings (line 84) | def get_input_embeddings(self):
    method get_output_embeddings (line 87) | def get_output_embeddings(self):
    method from_encoder_decoder_pretrained (line 91) | def from_encoder_decoder_pretrained(
    method forward (line 183) | def forward(
    method prepare_inputs_for_generation (line 303) | def prepare_inputs_for_generation(self, input_ids, past, attention_mas...
    method _reorder_cache (line 321) | def _reorder_cache(self, past, beam_idx):

FILE: code/bert-base-count3/pretrain/transformers1/modeling_flaubert.py
  class FlaubertModel (line 110) | class FlaubertModel(XLMModel):
    method __init__ (line 114) | def __init__(self, config):  # , dico, is_encoder, with_output):
    method forward (line 120) | def forward(
  class FlaubertWithLMHeadModel (line 300) | class FlaubertWithLMHeadModel(XLMWithLMHeadModel):
    method __init__ (line 308) | def __init__(self, config):
  class FlaubertForSequenceClassification (line 319) | class FlaubertForSequenceClassification(XLMForSequenceClassification):
    method __init__ (line 327) | def __init__(self, config):
  class FlaubertForQuestionAnsweringSimple (line 338) | class FlaubertForQuestionAnsweringSimple(XLMForQuestionAnsweringSimple):
    method __init__ (line 346) | def __init__(self, config):
  class FlaubertForQuestionAnswering (line 357) | class FlaubertForQuestionAnswering(XLMForQuestionAnswering):
    method __init__ (line 365) | def __init__(self, config):

FILE: code/bert-base-count3/pretrain/transformers1/modeling_gpt2.py
  function load_tf_weights_in_gpt2 (line 44) | def load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path):
  class Attention (line 99) | class Attention(nn.Module):
    method __init__ (line 100) | def __init__(self, nx, n_ctx, config, scale=False):
    method prune_heads (line 121) | def prune_heads(self, heads):
    method _attn (line 143) | def _attn(self, q, k, v, attention_mask=None, head_mask=None):
    method merge_heads (line 167) | def merge_heads(self, x):
    method split_heads (line 172) | def split_heads(self, x, k=False):
    method forward (line 180) | def forward(self, x, layer_past=None, attention_mask=None, head_mask=N...
  class MLP (line 207) | class MLP(nn.Module):
    method __init__ (line 208) | def __init__(self, n_state, config):  # in MLP: n_state=3072 (4 * n_embd)
    method forward (line 216) | def forward(self, x):
  class Block (line 222) | class Block(nn.Module):
    method __init__ (line 223) | def __init__(self, n_ctx, config, scale=False):
    method forward (line 231) | def forward(self, x, layer_past=None, attention_mask=None, head_mask=N...
  class GPT2PreTrainedModel (line 249) | class GPT2PreTrainedModel(PreTrainedModel):
    method __init__ (line 258) | def __init__(self, *inputs, **kwargs):
    method _init_weights (line 261) | def _init_weights(self, module):
  class GPT2Model (line 339) | class GPT2Model(GPT2PreTrainedModel):
    method __init__ (line 340) | def __init__(self, config):
    method get_input_embeddings (line 353) | def get_input_embeddings(self):
    method set_input_embeddings (line 356) | def set_input_embeddings(self, new_embeddings):
    method _prune_heads (line 359) | def _prune_heads(self, heads_to_prune):
    method forward (line 367) | def forward(
  class GPT2LMHeadModel (line 523) | class GPT2LMHeadModel(GPT2PreTrainedModel):
    method __init__ (line 524) | def __init__(self, config):
    method get_output_embeddings (line 531) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 534) | def prepare_inputs_for_generation(self, input_ids, past, **kwargs):
    method forward (line 542) | def forward(
  class GPT2DoubleHeadsModel (line 631) | class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
    method __init__ (line 632) | def __init__(self, config):
    method get_output_embeddings (line 641) | def get_output_embeddings(self):
    method forward (line 645) | def forward(

FILE: code/bert-base-count3/pretrain/transformers1/modeling_longformer.py
  function _get_question_end_index (line 43) | def _get_question_end_index(input_ids, sep_token_id):
  function _compute_global_attention_mask (line 59) | def _compute_global_attention_mask(input_ids, sep_token_id, before_sep_t...
  class LongformerSelfAttention (line 81) | class LongformerSelfAttention(nn.Module):
    method __init__ (line 82) | def __init__(self, config, layer_id):
    method _skew (line 117) | def _skew(x, direction):
    method _skew2 (line 124) | def _skew2(x):
    method _chunk (line 136) | def _chunk(x, w):
    method _mask_invalid_locations (line 150) | def _mask_invalid_locations(self, input_tensor, w) -> torch.Tensor:
    method _sliding_chunks_matmul_qk (line 163) | def _sliding_chunks_matmul_qk(self, q: torch.Tensor, k: torch.Tensor, ...
    method _sliding_chunks_matmul_pv (line 210) | def _sliding_chunks_matmul_pv(self, prob: torch.Tensor, v: torch.Tenso...
    method forward (line 238) | def forward(
  class LongformerModel (line 498) | class LongformerModel(RobertaModel):
    method __init__ (line 519) | def __init__(self, config):
    method _pad_to_window_size (line 538) | def _pad_to_window_size(
    method forward (line 582) | def forward(
  class LongformerForMaskedLM (line 686) | class LongformerForMaskedLM(BertPreTrainedModel):
    method __init__ (line 690) | def __init__(self, config):
    method forward (line 699) | def forward(
  class LongformerForSequenceClassification (line 776) | class LongformerForSequenceClassification(BertPreTrainedModel):
    method __init__ (line 780) | def __init__(self, config):
    method forward (line 788) | def forward(
  class LongformerClassificationHead (line 868) | class LongformerClassificationHead(nn.Module):
    method __init__ (line 871) | def __init__(self, config):
    method forward (line 877) | def forward(self, hidden_states, **kwargs):
  class LongformerForQuestionAnswering (line 892) | class LongformerForQuestionAnswering(BertPreTrainedModel):
    method __init__ (line 896) | def __init__(self, config):
    method forward (line 906) | def forward(
  class LongformerForTokenClassification (line 1016) | class LongformerForTokenClassification(BertPreTrainedModel):
    method __init__ (line 1020) | def __init__(self, config):
    method forward (line 1031) | def forward(
  class LongformerForMultipleChoice (line 1116) | class LongformerForMultipleChoice(BertPreTrainedModel):
    method __init__ (line 1120) | def __init__(self, config):
    method forward (line 1130) | def forward(

FILE: code/bert-base-count3/pretrain/transformers1/modeling_marian.py
  class MarianMTModel (line 26) | class MarianMTModel(BartForConditionalGeneration):
    method prepare_logits_for_generation (line 49) | def prepare_logits_for_generation(self, logits, cur_len, max_length):

FILE: code/bert-base-count3/pretrain/transformers1/modeling_mmbt.py
  class ModalEmbeddings (line 32) | class ModalEmbeddings(nn.Module):
    method __init__ (line 36) | def __init__(self, config, encoder, embeddings):
    method forward (line 47) | def forward(self, input_modal, start_token=None, end_token=None, posit...
  class MMBTModel (line 152) | class MMBTModel(nn.Module, ModuleUtilsMixin):
    method __init__ (line 180) | def __init__(self, config, transformer, encoder):
    method forward (line 186) | def forward(
    method get_input_embeddings (line 268) | def get_input_embeddings(self):
    method set_input_embeddings (line 271) | def set_input_embeddings(self, value):
  class MMBTForClassification (line 281) | class MMBTForClassification(nn.Module):
    method __init__ (line 312) | def __init__(self, config, transformer, encoder):
    method forward (line 320) | def forward(

FILE: code/bert-base-count3/pretrain/transformers1/modeling_openai.py
  function load_tf_weights_in_openai_gpt (line 42) | def load_tf_weights_in_openai_gpt(model, config, openai_checkpoint_folde...
  class Attention (line 122) | class Attention(nn.Module):
    method __init__ (line 123) | def __init__(self, nx, n_ctx, config, scale=False):
    method prune_heads (line 141) | def prune_heads(self, heads):
    method _attn (line 160) | def _attn(self, q, k, v, attention_mask=None, head_mask=None):
    method merge_heads (line 185) | def merge_heads(self, x):
    method split_heads (line 190) | def split_heads(self, x, k=False):
    method forward (line 198) | def forward(self, x, attention_mask=None, head_mask=None):
  class MLP (line 216) | class MLP(nn.Module):
    method __init__ (line 217) | def __init__(self, n_state, config):  # in MLP: n_state=3072 (4 * n_embd)
    method forward (line 225) | def forward(self, x):
  class Block (line 231) | class Block(nn.Module):
    method __init__ (line 232) | def __init__(self, n_ctx, config, scale=False):
    method forward (line 240) | def forward(self, x, attention_mask=None, head_mask=None):
  class OpenAIGPTPreTrainedModel (line 252) | class OpenAIGPTPreTrainedModel(PreTrainedModel):
    method _init_weights (line 261) | def _init_weights(self, module):
  class OpenAIGPTModel (line 329) | class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
    method __init__ (line 330) | def __init__(self, config):
    method get_input_embeddings (line 342) | def get_input_embeddings(self):
    method set_input_embeddings (line 345) | def set_input_embeddings(self, new_embeddings):
    method _prune_heads (line 348) | def _prune_heads(self, heads_to_prune):
    method forward (line 356) | def forward(
  class OpenAIGPTLMHeadModel (line 471) | class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel):
    method __init__ (line 472) | def __init__(self, config):
    method get_output_embeddings (line 479) | def get_output_embeddings(self):
    method forward (line 483) | def forward(
  class OpenAIGPTDoubleHeadsModel (line 567) | class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
    method __init__ (line 568) | def __init__(self, config):
    method get_output_embeddings (line 578) | def get_output_embeddings(self):
    method forward (line 582) | def forward(

FILE: code/bert-base-count3/pretrain/transformers1/modeling_reformer.py
  function mish (line 45) | def mish(x):
  function _get_least_common_mult_chunk_len (line 70) | def _get_least_common_mult_chunk_len(config):
  class AxialPositionEmbeddings (line 87) | class AxialPositionEmbeddings(nn.Module):
    method __init__ (line 92) | def __init__(self, config):
    method forward (line 117) | def forward(self, position_ids):
  class PositionEmbeddings (line 166) | class PositionEmbeddings(nn.Module):
    method __init__ (line 170) | def __init__(self, config):
    method forward (line 175) | def forward(self, position_ids):
  class ReformerEmbeddings (line 181) | class ReformerEmbeddings(nn.Module):
    method __init__ (line 185) | def __init__(self, config):
    method forward (line 195) | def forward(self, input_ids=None, position_ids=None, inputs_embeds=None):
  class EfficientAttentionMixin (line 226) | class EfficientAttentionMixin:
    method _look_adjacent (line 231) | def _look_adjacent(self, vectors, num_chunks_before, num_chunks_after):
    method _split_hidden_size_dim (line 254) | def _split_hidden_size_dim(self, x, num_attn_heads, attn_head_size):
    method _merge_hidden_size_dims (line 262) | def _merge_hidden_size_dims(self, x, num_attn_heads, attn_head_size):
    method _split_seq_length_dim_to (line 269) | def _split_seq_length_dim_to(self, vectors, dim_factor_1, dim_factor_2...
  class LSHSelfAttention (line 284) | class LSHSelfAttention(nn.Module, EfficientAttentionMixin):
    method __init__ (line 285) | def __init__(self, config):
    method forward (line 315) | def forward(
    method _hash_vectors (line 441) | def _hash_vectors(self, vectors, num_hashes):
    method _get_sorted_bucket_idx_and_undo_sorted_bucket_idx (line 506) | def _get_sorted_bucket_idx_and_undo_sorted_bucket_idx(self, sequence_l...
    method _set_num_buckets (line 537) | def _set_num_buckets(self, sequence_length):
    method _attend (line 556) | def _attend(
    method _compute_attn_mask (line 635) | def _compute_attn_mask(self, query_indices, key_indices, attention_mask):
    method _len_and_dim_norm (line 663) | def _len_and_dim_norm(self, vectors):
    method _len_norm (line 673) | def _len_norm(self, x, epsilon=1e-6):
    method _gather_by_expansion (line 681) | def _gather_by_expansion(self, vectors, idxs, num_hashes):
  class ReverseSort (line 690) | class ReverseSort(Function):
    method forward (line 700) | def forward(ctx, out_vectors, logits, sorted_bucket_idx, undo_sorted_b...
    method backward (line 713) | def backward(ctx, grad_out_vectors, grad_logits):
  class LocalSelfAttention (line 747) | class LocalSelfAttention(nn.Module, EfficientAttentionMixin):
    method __init__ (line 748) | def __init__(self, config):
    method forward (line 773) | def forward(self, hidden_states, attention_mask=None, head_mask=None, ...
    method _compute_attn_mask (line 888) | def _compute_attn_mask(self, query_indices, key_indices, attention_mas...
  class ReformerSelfOutput (line 913) | class ReformerSelfOutput(nn.Module):
    method __init__ (line 914) | def __init__(self, config):
    method forward (line 921) | def forward(self, hidden_states):
  class ReformerAttention (line 927) | class ReformerAttention(nn.Module):
    method __init__ (line 928) | def __init__(self, config, layer_id=0):
    method forward (line 953) | def forward(
  class ReformerFeedForwardDense (line 986) | class ReformerFeedForwardDense(nn.Module):
    method __init__ (line 987) | def __init__(self, config):
    method forward (line 998) | def forward(self, hidden_states):
  class ReformerFeedForwardOutput (line 1005) | class ReformerFeedForwardOutput(nn.Module):
    method __init__ (line 1006) | def __init__(self, config):
    method forward (line 1012) | def forward(self, hidden_states):
  class ChunkReformerFeedForward (line 1018) | class ChunkReformerFeedForward(nn.Module):
    method __init__ (line 1019) | def __init__(self, config):
    method forward (line 1028) | def forward(self, attention_output):
    method forward_chunk (line 1033) | def forward_chunk(self, hidden_states):
  class ReformerLayer (line 1039) | class ReformerLayer(nn.Module):
    method __init__ (line 1040) | def __init__(self, config, layer_id=0):
    method _init_attention_seed (line 1050) | def _init_attention_seed(self):
    method _init_feed_forward_seed (line 1070) | def _init_feed_forward_seed(self):
    method forward (line 1090) | def forward(
    method backward_pass (line 1134) | def backward_pass(
  class _ReversibleFunction (line 1195) | class _ReversibleFunction(Function):
    method forward (line 1205) | def forward(
    method backward (line 1256) | def backward(ctx, grad_hidden_states):
  class ReformerEncoder (line 1302) | class ReformerEncoder(nn.Module):
    method __init__ (line 1303) | def __init__(self, config):
    method forward (line 1312) | def forward(
  class ReformerOnlyLMHead (line 1350) | class ReformerOnlyLMHead(nn.Module):
    method __init__ (line 1351) | def __init__(self, config):
    method forward (line 1363) | def forward(self, hidden_states):
    method forward_chunk (line 1366) | def forward_chunk(self, hidden_states):
  class ReformerPreTrainedModel (line 1371) | class ReformerPreTrainedModel(PreTrainedModel):
    method dummy_inputs (line 1380) | def dummy_inputs(self):
    method _init_weights (line 1389) | def _init_weights(self, module):
  class ReformerModel (line 1470) | class ReformerModel(ReformerPreTrainedModel):
    method __init__ (line 1471) | def __init__(self, config):
    method get_input_embeddings (line 1483) | def get_input_embeddings(self):
    method set_input_embeddings (line 1486) | def set_input_embeddings(self, value):
    method _prune_heads (line 1489) | def _prune_heads(self, heads_to_prune):
    method forward (line 1498) | def forward(
    method _pad_to_mult_of_chunk_length (line 1615) | def _pad_to_mult_of_chunk_length(
  class ReformerModelWithLMHead (line 1674) | class ReformerModelWithLMHead(ReformerPreTrainedModel):
    method __init__ (line 1675) | def __init__(self, config):
    method get_output_embeddings (line 1682) | def get_output_embeddings(self):
    method tie_weights (line 1685) | def tie_weights(self):
    method forward (line 1690) | def forward(
    method prepare_inputs_for_generation (line 1766) | def prepare_inputs_for_generation(self, input_ids, past, **kwargs):

FILE: code/bert-base-count3/pretrain/transformers1/modeling_roberta.py
  class RobertaEmbeddings (line 44) | class RobertaEmbeddings(BertEmbeddings):
    method __init__ (line 49) | def __init__(self, config):
    method forward (line 57) | def forward(self, input_ids=None, token_type_ids=None, position_ids=No...
    method create_position_ids_from_inputs_embeds (line 69) | def create_position_ids_from_inputs_embeds(self, inputs_embeds):
  class RobertaModel (line 139) | class RobertaModel(BertModel):
    method __init__ (line 148) | def __init__(self, config):
    method get_input_embeddings (line 154) | def get_input_embeddings(self):
    method set_input_embeddings (line 157) | def set_input_embeddings(self, value):
  class RobertaForMaskedLM (line 162) | class RobertaForMaskedLM(BertPreTrainedModel):
    method __init__ (line 166) | def __init__(self, config):
    method get_output_embeddings (line 174) | def get_output_embeddings(self):
    method forward (line 178) | def forward(
  class RobertaLMHead (line 246) | class RobertaLMHead(nn.Module):
    method __init__ (line 249) | def __init__(self, config):
    method forward (line 260) | def forward(self, features, **kwargs):
  class RobertaForSequenceClassification (line 276) | class RobertaForSequenceClassification(BertPreTrainedModel):
    method __init__ (line 280) | def __init__(self, config):
    method forward (line 288) | def forward(
  class RobertaForMultipleChoice (line 366) | class RobertaForMultipleChoice(BertPreTrainedModel):
    method __init__ (line 370) | def __init__(self, config):
    method forward (line 380) | def forward(
  class RobertaForTokenClassification (line 464) | class RobertaForTokenClassification(BertPreTrainedModel):
    method __init__ (line 468) | def __init__(self, config):
    method forward (line 479) | def forward(
  class RobertaClassificationHead (line 559) | class RobertaClassificationHead(nn.Module):
    method __init__ (line 562) | def __init__(self, config):
    method forward (line 568) | def forward(self, features, **kwargs):
  class RobertaForQuestionAnswering (line 583) | class RobertaForQuestionAnswering(BertPreTrainedModel):
    method __init__ (line 587) | def __init__(self, config):
    method forward (line 597) | def forward(

FILE: code/bert-base-count3/pretrain/transformers1/modeling_t5.py
  function load_tf_weights_in_t5 (line 53) | def load_tf_weights_in_t5(model, config, tf_checkpoint_path):
  class T5LayerNorm (line 143) | class T5LayerNorm(nn.Module):
    method __init__ (line 144) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 152) | def forward(self, x):
  class T5DenseReluDense (line 162) | class T5DenseReluDense(nn.Module):
    method __init__ (line 163) | def __init__(self, config):
    method forward (line 169) | def forward(self, hidden_states):
  class T5LayerFF (line 177) | class T5LayerFF(nn.Module):
    method __init__ (line 178) | def __init__(self, config):
    method forward (line 184) | def forward(self, hidden_states):
  class T5Attention (line 191) | class T5Attention(nn.Module):
    method __init__ (line 192) | def __init__(self, config: T5Config, has_relative_attention_bias=False):
    method prune_heads (line 215) | def prune_heads(self, heads):
    method _relative_position_bucket (line 236) | def _relative_position_bucket(relative_position, bidirectional=True, n...
    method compute_bias (line 283) | def compute_bias(self, qlen, klen):
    method forward (line 298) | def forward(
  class T5LayerSelfAttention (line 401) | class T5LayerSelfAttention(nn.Module):
    method __init__ (line 402) | def __init__(self, config, has_relative_attention_bias=False):
    method forward (line 408) | def forward(
  class T5LayerCrossAttention (line 432) | class T5LayerCrossAttention(nn.Module):
    method __init__ (line 433) | def __init__(self, config, has_relative_attention_bias=False):
    method forward (line 439) | def forward(
  class T5Block (line 467) | class T5Block(nn.Module):
    method __init__ (line 468) | def __init__(self, config, has_relative_attention_bias=False):
    method forward (line 478) | def forward(
  class T5PreTrainedModel (line 553) | class T5PreTrainedModel(PreTrainedModel):
    method dummy_inputs (line 563) | def dummy_inputs(self):
    method _init_weights (line 573) | def _init_weights(self, module):
    method _shift_right (line 605) | def _shift_right(self, input_ids):
  class T5Stack (line 627) | class T5Stack(T5PreTrainedModel):
    method __init__ (line 628) | def __init__(self, config, embed_tokens=None):
    method get_input_embeddings (line 644) | def get_input_embeddings(self):
    method get_output_embeddings (line 647) | def get_output_embeddings(self):
    method set_input_embeddings (line 650) | def set_input_embeddings(self, new_embeddings):
    method forward (line 653) | def forward(
  class T5Model (line 846) | class T5Model(T5PreTrainedModel):
    method __init__ (line 847) | def __init__(self, config):
    method get_input_embeddings (line 860) | def get_input_embeddings(self):
    method set_input_embeddings (line 863) | def set_input_embeddings(self, new_embeddings):
    method get_encoder (line 868) | def get_encoder(self):
    method get_decoder (line 871) | def get_decoder(self):
    method _prune_heads (line 874) | def _prune_heads(self, heads_to_prune):
    method forward (line 883) | def forward(
  class T5ForConditionalGeneration (line 966) | class T5ForConditionalGeneration(T5PreTrainedModel):
    method __init__ (line 967) | def __init__(self, config):
    method get_input_embeddings (line 984) | def get_input_embeddings(self):
    method set_input_embeddings (line 987) | def set_input_embeddings(self, new_embeddings):
    method get_output_embeddings (line 992) | def get_output_embeddings(self):
    method get_encoder (line 995) | def get_encoder(self):
    method get_decoder (line 998) | def get_decoder(self):
    method forward (line 1002) | def forward(
    method prepare_inputs_for_generation (line 1114) | def prepare_inputs_for_generation(self, input_ids, past, attention_mas...
    method _reorder_cache (line 1131) | def _reorder_cache(self, past, beam_idx):

FILE: code/bert-base-count3/pretrain/transformers1/modeling_tf_albert.py
  class TFAlbertEmbeddings (line 45) | class TFAlbertEmbeddings(tf.keras.layers.Layer):
    method __init__ (line 49) | def __init__(self, config, **kwargs):
    method build (line 71) | def build(self, input_shape):
    method call (line 83) | def call(self, inputs, mode="embedding", training=False):
    method _embedding (line 105) | def _embedding(self, inputs, training=False):
    method _linear (line 130) | def _linear(self, inputs):
  class TFAlbertSelfAttention (line 144) | class TFAlbertSelfAttention(tf.keras.layers.Layer):
    method __init__ (line 145) | def __init__(self, config, **kwargs):
    method transpose_for_scores (line 171) | def transpose_for_scores(self, x, batch_size):
    method call (line 175) | def call(self, inputs, training=False):
  class TFAlbertSelfOutput (line 220) | class TFAlbertSelfOutput(tf.keras.layers.Layer):
    method __init__ (line 221) | def __init__(self, config, **kwargs):
    method call (line 229) | def call(self, inputs, training=False):
  class TFAlbertAttention (line 238) | class TFAlbertAttention(TFBertSelfAttention):
    method __init__ (line 239) | def __init__(self, config, **kwargs):
    method prune_heads (line 249) | def prune_heads(self, heads):
    method call (line 252) | def call(self, inputs, training=False):
  class TFAlbertLayer (line 306) | class TFAlbertLayer(tf.keras.layers.Layer):
    method __init__ (line 307) | def __init__(self, config, **kwargs):
    method call (line 328) | def call(self, inputs, training=False):
  class TFAlbertLayerGroup (line 344) | class TFAlbertLayerGroup(tf.keras.layers.Layer):
    method __init__ (line 345) | def __init__(self, config, **kwargs):
    method call (line 354) | def call(self, inputs, training=False):
  class TFAlbertTransformer (line 379) | class TFAlbertTransformer(tf.keras.layers.Layer):
    method __init__ (line 380) | def __init__(self, config, **kwargs):
    method call (line 396) | def call(self, inputs, training=False):
  class TFAlbertPreTrainedModel (line 438) | class TFAlbertPreTrainedModel(TFPreTrainedModel):
  class TFAlbertMLMHead (line 447) | class TFAlbertMLMHead(tf.keras.layers.Layer):
    method __init__ (line 448) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 466) | def build(self, input_shape):
    method call (line 473) | def call(self, hidden_states):
  class TFAlbertMainLayer (line 482) | class TFAlbertMainLayer(tf.keras.layers.Layer):
    method __init__ (line 485) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 498) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 501) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 504) | def _prune_heads(self, heads_to_prune):
    method call (line 511) | def call(
  class TFAlbertModel (line 674) | class TFAlbertModel(TFAlbertPreTrainedModel):
    method __init__ (line 675) | def __init__(self, config, *inputs, **kwargs):
    method call (line 680) | def call(self, inputs, **kwargs):
  class TFAlbertForPreTraining (line 725) | class TFAlbertForPreTraining(TFAlbertPreTrainedModel):
    method __init__ (line 726) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 734) | def get_output_embeddings(self):
    method call (line 738) | def call(self, inputs, **kwargs):
  class TFAlbertSOPHead (line 772) | class TFAlbertSOPHead(tf.keras.layers.Layer):
    method __init__ (line 773) | def __init__(self, config, **kwargs):
    method call (line 781) | def call(self, pooled_output, training: bool):
  class TFAlbertForMaskedLM (line 788) | class TFAlbertForMaskedLM(TFAlbertPreTrainedModel):
    method __init__ (line 789) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 795) | def get_output_embeddings(self):
    method call (line 799) | def call(self, inputs, **kwargs):
  class TFAlbertForSequenceClassification (line 844) | class TFAlbertForSequenceClassification(TFAlbertPreTrainedModel):
    method __init__ (line 845) | def __init__(self, config, *inputs, **kwargs):
    method call (line 856) | def call(self, inputs, **kwargs):
  class TFAlbertForQuestionAnswering (line 901) | class TFAlbertForQuestionAnswering(TFAlbertPreTrainedModel):
    method __init__ (line 902) | def __init__(self, config, *inputs, **kwargs):
    method call (line 912) | def call(self, inputs, **kwargs):
  class TFAlbertForMultipleChoice (line 967) | class TFAlbertForMultipleChoice(TFAlbertPreTrainedModel):
    method __init__ (line 968) | def __init__(self, config, *inputs, **kwargs):
    method dummy_inputs (line 978) | def dummy_inputs(self):
    method call (line 987) | def call(

FILE: code/bert-base-count3/pretrain/transformers1/modeling_tf_auto.py
  class TFAutoModel (line 174) | class TFAutoModel(object):
    method __init__ (line 198) | def __init__(self):
    method from_config (line 206) | def from_config(cls, config):
    method from_pretrained (line 244) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class TFAutoModelForPreTraining (line 336) | class TFAutoModelForPreTraining(object):
    method __init__ (line 345) | def __init__(self):
    method from_config (line 353) | def from_config(cls, config):
    method from_pretrained (line 392) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class TFAutoModelWithLMHead (line 486) | class TFAutoModelWithLMHead(object):
    method __init__ (line 510) | def __init__(self):
    method from_config (line 518) | def from_config(cls, config):
    method from_pretrained (line 556) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class TFAutoModelForMultipleChoice (line 649) | class TFAutoModelForMultipleChoice:
    method __init__ (line 665) | def __init__(self):
    method from_config (line 673) | def from_config(cls, config):
    method from_pretrained (line 706) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class TFAutoModelForSequenceClassification (line 796) | class TFAutoModelForSequenceClassification(object):
    method __init__ (line 815) | def __init__(self):
    method from_config (line 823) | def from_config(cls, config):
    method from_pretrained (line 859) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class TFAutoModelForQuestionAnswering (line 952) | class TFAutoModelForQuestionAnswering(object):
    method __init__ (line 972) | def __init__(self):
    method from_config (line 980) | def from_config(cls, config):
    method from_pretrained (line 1017) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class TFAutoModelForTokenClassification (line 1111) | class TFAutoModelForTokenClassification:
    method __init__ (line 1112) | def __init__(self):
    method from_config (line 1120) | def from_config(cls, config):
    method from_pretrained (line 1155) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...

FILE: code/bert-base-count3/pretrain/transformers1/modeling_tf_bert.py
  function gelu (line 58) | def gelu(x):
  function gelu_new (line 69) | def gelu_new(x):
  function swish (line 82) | def swish(x):
  class TFBertEmbeddings (line 94) | class TFBertEmbeddings(tf.keras.layers.Layer):
    method __init__ (line 98) | def __init__(self, config, **kwargs):
    method build (line 122) | def build(self, input_shape):
    method call (line 134) | def call(self, inputs, mode="embedding", training=False):
    method _embedding (line 156) | def _embedding(self, inputs, training=False):
    method _linear (line 181) | def _linear(self, inputs):
  class TFBertSelfAttention (line 197) | class TFBertSelfAttention(tf.keras.layers.Layer):
    method __init__ (line 198) | def __init__(self, config, **kwargs):
    method transpose_for_scores (line 224) | def transpose_for_scores(self, x, batch_size):
    method call (line 228) | def call(self, inputs, training=False):
  class TFBertSelfOutput (line 273) | class TFBertSelfOutput(tf.keras.layers.Layer):
    method __init__ (line 274) | def __init__(self, config, **kwargs):
    method call (line 282) | def call(self, inputs, training=False):
  class TFBertAttention (line 291) | class TFBertAttention(tf.keras.layers.Layer):
    method __init__ (line 292) | def __init__(self, config, **kwargs):
    method prune_heads (line 297) | def prune_heads(self, heads):
    method call (line 300) | def call(self, inputs, training=False):
  class TFBertIntermediate (line 309) | class TFBertIntermediate(tf.keras.layers.Layer):
    method __init__ (line 310) | def __init__(self, config, **kwargs):
    method call (line 320) | def call(self, hidden_states):
  class TFBertOutput (line 326) | class TFBertOutput(tf.keras.layers.Layer):
    method __init__ (line 327) | def __init__(self, config, **kwargs):
    method call (line 335) | def call(self, inputs, training=False):
  class TFBertLayer (line 344) | class TFBertLayer(tf.keras.layers.Layer):
    method __init__ (line 345) | def __init__(self, config, **kwargs):
    method call (line 351) | def call(self, inputs, training=False):
  class TFBertEncoder (line 362) | class TFBertEncoder(tf.keras.layers.Layer):
    method __init__ (line 363) | def __init__(self, config, **kwargs):
    method call (line 369) | def call(self, inputs, training=False):
  class TFBertPooler (line 396) | class TFBertPooler(tf.keras.layers.Layer):
    method __init__ (line 397) | def __init__(self, config, **kwargs):
    method call (line 406) | def call(self, hidden_states):
  class TFBertPredictionHeadTransform (line 414) | class TFBertPredictionHeadTransform(tf.keras.layers.Layer):
    method __init__ (line 415) | def __init__(self, config, **kwargs):
    method call (line 426) | def call(self, hidden_states):
  class TFBertLMPredictionHead (line 433) | class TFBertLMPredictionHead(tf.keras.layers.Layer):
    method __init__ (line 434) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 443) | def build(self, input_shape):
    method call (line 447) | def call(self, hidden_states):
  class TFBertMLMHead (line 454) | class TFBertMLMHead(tf.keras.layers.Layer):
    method __init__ (line 455) | def __init__(self, config, input_embeddings, **kwargs):
    method call (line 459) | def call(self, sequence_output):
  class TFBertNSPHead (line 464) | class TFBertNSPHead(tf.keras.layers.Layer):
    method __init__ (line 465) | def __init__(self, config, **kwargs):
    method call (line 471) | def call(self, pooled_output):
  class TFBertMainLayer (line 477) | class TFBertMainLayer(tf.keras.layers.Layer):
    method __init__ (line 480) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 488) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 491) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 494) | def _prune_heads(self, heads_to_prune):
    method call (line 501) | def call(
  class TFBertPreTrainedModel (line 583) | class TFBertPreTrainedModel(TFPreTrainedModel):
  class TFBertModel (line 667) | class TFBertModel(TFBertPreTrainedModel):
    method __init__ (line 668) | def __init__(self, config, *inputs, **kwargs):
    method call (line 673) | def call(self, inputs, **kwargs):
  class TFBertForPreTraining (line 718) | class TFBertForPreTraining(TFBertPreTrainedModel):
    method __init__ (line 719) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 726) | def get_output_embeddings(self):
    method call (line 730) | def call(self, inputs, **kwargs):
  class TFBertForMaskedLM (line 775) | class TFBertForMaskedLM(TFBertPreTrainedModel):
    method __init__ (line 776) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 782) | def get_output_embeddings(self):
    method call (line 786) | def call(self, inputs, **kwargs):
  class TFBertForNextSentencePrediction (line 828) | class TFBertForNextSentencePrediction(TFBertPreTrainedModel):
    method __init__ (line 829) | def __init__(self, config, *inputs, **kwargs):
    method call (line 836) | def call(self, inputs, **kwargs):
  class TFBertForSequenceClassification (line 883) | class TFBertForSequenceClassification(TFBertPreTrainedModel):
    method __init__ (line 884) | def __init__(self, config, *inputs, **kwargs):
    method call (line 895) | def call(self, inputs, **kwargs):
  class TFBertForMultipleChoice (line 941) | class TFBertForMultipleChoice(TFBertPreTrainedModel):
    method __init__ (line 942) | def __init__(self, config, *inputs, **kwargs):
    method dummy_inputs (line 952) | def dummy_inputs(self):
    method call (line 961) | def call(
  class TFBertForTokenClassification (line 1064) | class TFBertForTokenClassification(TFBertPreTrainedModel):
    method __init__ (line 1065) | def __init__(self, config, *inputs, **kwargs):
    method call (line 1076) | def call(self, inputs, **kwargs):
  class TFBertForQuestionAnswering (line 1122) | class TFBertForQuestionAnswering(TFBertPreTrainedModel):
    method __init__ (line 1123) | def __init__(self, config, *inputs, **kwargs):
    method call (line 1133) | def call(self, inputs, **kwargs):

FILE: code/bert-base-count3/pretrain/transformers1/modeling_tf_camembert.py
  class TFCamembertModel (line 70) | class TFCamembertModel(TFRobertaModel):
  class TFCamembertForMaskedLM (line 82) | class TFCamembertForMaskedLM(TFRobertaForMaskedLM):
  class TFCamembertForSequenceClassification (line 96) | class TFCamembertForSequenceClassification(TFRobertaForSequenceClassific...
  class TFCamembertForTokenClassification (line 110) | class TFCamembertForTokenClassification(TFRobertaForTokenClassification):

FILE: code/bert-base-count3/pretrain/transformers1/modeling_tf_ctrl.py
  function angle_defn (line 38) | def angle_defn(pos, i, d_model_size):
  function positional_encoding (line 43) | def positional_encoding(position, d_model_size):
  function scaled_dot_product_attention (line 55) | def scaled_dot_product_attention(q, k, v, mask, attention_mask=None, hea...
  class TFMultiHeadAttention (line 80) | class TFMultiHeadAttention(tf.keras.layers.Layer):
    method __init__ (line 81) | def __init__(self, d_model_size, num_heads, output_attentions=False, *...
    method split_into_heads (line 95) | def split_into_heads(self, x, batch_size):
    method call (line 99) | def call(self, inputs, training=False):
  function point_wise_feed_forward_network (line 142) | def point_wise_feed_forward_network(d_model_size, dff, name=""):
  class TFEncoderLayer (line 149) | class TFEncoderLayer(tf.keras.layers.Layer):
    method __init__ (line 150) | def __init__(
    method call (line 166) | def call(self, inputs, training=False):
  class TFCTRLMainLayer (line 186) | class TFCTRLMainLayer(tf.keras.layers.Layer):
    method __init__ (line 189) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 218) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 221) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 224) | def _prune_heads(self, heads_to_prune):
    method call (line 230) | def call(
  class TFCTRLPreTrainedModel (line 379) | class TFCTRLPreTrainedModel(TFPreTrainedModel):
  class TFCTRLModel (line 471) | class TFCTRLModel(TFCTRLPreTrainedModel):
    method __init__ (line 472) | def __init__(self, config, *inputs, **kwargs):
    method call (line 477) | def call(self, inputs, **kwargs):
  class TFCTRLLMHead (line 515) | class TFCTRLLMHead(tf.keras.layers.Layer):
    method __init__ (line 516) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 524) | def build(self, input_shape):
    method call (line 528) | def call(self, hidden_states):
  class TFCTRLLMHeadModel (line 539) | class TFCTRLLMHeadModel(TFCTRLPreTrainedModel):
    method __init__ (line 540) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 546) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 549) | def prepare_inputs_for_generation(self, inputs, past, **kwargs):
    method call (line 557) | def call(self, inputs, **kwargs):

FILE: code/bert-base-count3/pretrain/transformers1/modeling_tf_distilbert.py
  function gelu (line 46) | def gelu(x):
  function gelu_new (line 57) | def gelu_new(x):
  class TFEmbeddings (line 70) | class TFEmbeddings(tf.keras.layers.Layer):
    method __init__ (line 71) | def __init__(self, config, **kwargs):
    method build (line 89) | def build(self, input_shape):
    method call (line 99) | def call(self, inputs, inputs_embeds=None, mode="embedding", training=...
    method _embedding (line 121) | def _embedding(self, inputs, inputs_embeds=None, training=False):
    method _linear (line 156) | def _linear(self, inputs):
  class TFMultiHeadSelfAttention (line 172) | class TFMultiHeadSelfAttention(tf.keras.layers.Layer):
    method __init__ (line 173) | def __init__(self, config, **kwargs):
    method prune_heads (line 198) | def prune_heads(self, heads):
    method call (line 201) | def call(self, inputs, training=False):
  class TFFFN (line 262) | class TFFFN(tf.keras.layers.Layer):
    method __init__ (line 263) | def __init__(self, config, **kwargs):
    method call (line 279) | def call(self, input, training=False):
  class TFTransformerBlock (line 287) | class TFTransformerBlock(tf.keras.layers.Layer):
    method __init__ (line 288) | def __init__(self, config, **kwargs):
    method call (line 306) | def call(self, inputs, training=False):  # removed: src_enc=None, src_...
  class TFTransformer (line 341) | class TFTransformer(tf.keras.layers.Layer):
    method __init__ (line 342) | def __init__(self, config, **kwargs):
    method call (line 350) | def call(self, inputs, training=False):
  class TFDistilBertMainLayer (line 402) | class TFDistilBertMainLayer(tf.keras.layers.Layer):
    method __init__ (line 403) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 410) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 413) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 416) | def _prune_heads(self, heads_to_prune):
    method call (line 419) | def call(self, inputs, attention_mask=None, head_mask=None, inputs_emb...
  class TFDistilBertPreTrainedModel (line 465) | class TFDistilBertPreTrainedModel(TFPreTrainedModel):
  class TFDistilBertModel (line 539) | class TFDistilBertModel(TFDistilBertPreTrainedModel):
    method __init__ (line 540) | def __init__(self, config, *inputs, **kwargs):
    method call (line 545) | def call(self, inputs, **kwargs):
  class TFDistilBertLMHead (line 577) | class TFDistilBertLMHead(tf.keras.layers.Layer):
    method __init__ (line 578) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 586) | def build(self, input_shape):
    method call (line 590) | def call(self, hidden_states):
  class TFDistilBertForMaskedLM (line 599) | class TFDistilBertForMaskedLM(TFDistilBertPreTrainedModel):
    method __init__ (line 600) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 614) | def get_output_embeddings(self):
    method call (line 618) | def call(self, inputs, **kwargs):
  class TFDistilBertForSequenceClassification (line 665) | class TFDistilBertForSequenceClassification(TFDistilBertPreTrainedModel):
    method __init__ (line 666) | def __init__(self, config, *inputs, **kwargs):
    method call (line 683) | def call(self, inputs, **kwargs):
  class TFDistilBertForTokenClassification (line 729) | class TFDistilBertForTokenClassification(TFDistilBertPreTrainedModel):
    method __init__ (line 730) | def __init__(self, config, *inputs, **kwargs):
    method call (line 741) | def call(self, inputs, **kwargs):
  class TFDistilBertForQuestionAnswering (line 786) | class TFDistilBertForQuestionAnswering(TFDistilBertPreTrainedModel):
    method __init__ (line 787) | def __init__(self, config, *inputs, **kwargs):
    method call (line 798) | def call(self, inputs, **kwargs):

FILE: code/bert-base-count3/pretrain/transformers1/modeling_tf_electra.py
  class TFElectraEmbeddings (line 27) | class TFElectraEmbeddings(tf.keras.layers.Layer):
    method __init__ (line 31) | def __init__(self, config, **kwargs):
    method build (line 55) | def build(self, input_shape):
    method call (line 67) | def call(self, inputs, mode="embedding", training=False):
    method _embedding (line 89) | def _embedding(self, inputs, training=False):
    method _linear (line 114) | def _linear(self, inputs):
  class TFElectraDiscriminatorPredictions (line 130) | class TFElectraDiscriminatorPredictions(tf.keras.layers.Layer):
    method __init__ (line 131) | def __init__(self, config, **kwargs):
    method call (line 138) | def call(self, discriminator_hidden_states, training=False):
  class TFElectraGeneratorPredictions (line 146) | class TFElectraGeneratorPredictions(tf.keras.layers.Layer):
    method __init__ (line 147) | def __init__(self, config, **kwargs):
    method call (line 153) | def call(self, generator_hidden_states, training=False):
  class TFElectraPreTrainedModel (line 161) | class TFElectraPreTrainedModel(TFBertPreTrainedModel):
    method get_extended_attention_mask (line 166) | def get_extended_attention_mask(self, attention_mask, input_shape):
    method get_head_mask (line 188) | def get_head_mask(self, head_mask):
  class TFElectraMainLayer (line 197) | class TFElectraMainLayer(TFElectraPreTrainedModel):
    method __init__ (line 201) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 210) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 213) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 216) | def _prune_heads(self, heads_to_prune):
    method call (line 223) | def call(
  class TFElectraModel (line 348) | class TFElectraModel(TFElectraPreTrainedModel):
    method __init__ (line 349) | def __init__(self, config, *inputs, **kwargs):
    method get_input_embeddings (line 353) | def get_input_embeddings(self):
    method call (line 357) | def call(self, inputs, **kwargs):
  class TFElectraForPreTraining (line 398) | class TFElectraForPreTraining(TFElectraPreTrainedModel):
    method __init__ (line 399) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 405) | def get_input_embeddings(self):
    method call (line 409) | def call(
  class TFElectraMaskedLMHead (line 458) | class TFElectraMaskedLMHead(tf.keras.layers.Layer):
    method __init__ (line 459) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 464) | def build(self, input_shape):
    method call (line 468) | def call(self, hidden_states, training=False):
  class TFElectraForMaskedLM (line 482) | class TFElectraForMaskedLM(TFElectraPreTrainedModel):
    method __init__ (line 483) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 495) | def get_input_embeddings(self):
    method get_output_embeddings (line 498) | def get_output_embeddings(self):
    method call (line 502) | def call(
  class TFElectraForTokenClassification (line 560) | class TFElectraForTokenClassification(TFElectraPreTrainedModel):
    method __init__ (line 561) | def __init__(self, config, **kwargs):
    method call (line 569) | def call(

FILE: code/bert-base-count3/pretrain/transformers1/modeling_tf_flaubert.py
  class TFFlaubertModel (line 107) | class TFFlaubertModel(TFXLMModel):
    method __init__ (line 110) | def __init__(self, config, *inputs, **kwargs):
  class TFFlaubertMainLayer (line 115) | class TFFlaubertMainLayer(TFXLMMainLayer):
    method __init__ (line 116) | def __init__(self, config, *inputs, **kwargs):
    method call (line 121) | def call(
  class TFFlaubertWithLMHeadModel (line 311) | class TFFlaubertWithLMHeadModel(TFXLMWithLMHeadModel):
    method __init__ (line 314) | def __init__(self, config, *inputs, **kwargs):
  class TFFlaubertForSequenceClassification (line 324) | class TFFlaubertForSequenceClassification(TFXLMForSequenceClassification):
    method __init__ (line 327) | def __init__(self, config, *inputs, **kwargs):

FILE: code/bert-base-count3/pretrain/transformers1/modeling_tf_gpt2.py
  function gelu (line 50) | def gelu(x):
  class TFAttention (line 63) | class TFAttention(tf.keras.layers.Layer):
    method __init__ (line 64) | def __init__(self, nx, n_ctx, config, scale=False, **kwargs):
    method prune_heads (line 82) | def prune_heads(self, heads):
    method causal_attention_mask (line 86) | def causal_attention_mask(nd, ns, dtype):
    method _attn (line 95) | def _attn(self, inputs, training=False):
    method merge_heads (line 125) | def merge_heads(self, x):
    method split_heads (line 131) | def split_heads(self, x):
    method call (line 137) | def call(self, inputs, training=False):
  class TFMLP (line 175) | class TFMLP(tf.keras.layers.Layer):
    method __init__ (line 176) | def __init__(self, n_state, config, **kwargs):
    method call (line 184) | def call(self, x, training=False):
  class TFBlock (line 191) | class TFBlock(tf.keras.layers.Layer):
    method __init__ (line 192) | def __init__(self, n_ctx, config, scale=False, **kwargs):
    method call (line 200) | def call(self, inputs, training=False):
  class TFGPT2MainLayer (line 217) | class TFGPT2MainLayer(tf.keras.layers.Layer):
    method __init__ (line 220) | def __init__(self, config, *inputs, **kwargs):
    method get_input_embeddings (line 241) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 244) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 247) | def _prune_heads(self, heads_to_prune):
    method call (line 253) | def call(
  class TFGPT2PreTrainedModel (line 387) | class TFGPT2PreTrainedModel(TFPreTrainedModel):
  class TFGPT2Model (line 475) | class TFGPT2Model(TFGPT2PreTrainedModel):
    method __init__ (line 476) | def __init__(self, config, *inputs, **kwargs):
    method call (line 481) | def call(self, inputs, **kwargs):
  class TFGPT2LMHeadModel (line 524) | class TFGPT2LMHeadModel(TFGPT2PreTrainedModel):
    method __init__ (line 525) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 529) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 532) | def prepare_inputs_for_generation(self, inputs, past, **kwargs):
    method call (line 540) | def call(self, inputs, **kwargs):
  class TFGPT2DoubleHeadsModel (line 593) | class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel):
    method __init__ (line 594) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 602) | def get_output_embeddings(self):
    method call (line 606) | def call(

FILE: code/bert-base-count3/pretrain/transformers1/modeling_tf_openai.py
  function gelu (line 45) | def gelu(x):
  function swish (line 58) | def swish(x):
  class TFAttention (line 69) | class TFAttention(tf.keras.layers.Layer):
    method __init__ (line 70) | def __init__(self, nx, n_ctx, config, scale=False, **kwargs):
    method prune_heads (line 88) | def prune_heads(self, heads):
    method causal_attention_mask (line 92) | def causal_attention_mask(nd, ns, dtype):
    method _attn (line 101) | def _attn(self, inputs, training=False):
    method merge_heads (line 131) | def merge_heads(self, x):
    method split_heads (line 137) | def split_heads(self, x):
    method call (line 143) | def call(self, inputs, training=False):
  class TFMLP (line 163) | class TFMLP(tf.keras.layers.Layer):
    method __init__ (line 164) | def __init__(self, n_state, config, **kwargs):
    method call (line 172) | def call(self, x, training=False):
  class TFBlock (line 179) | class TFBlock(tf.keras.layers.Layer):
    method __init__ (line 180) | def __init__(self, n_ctx, config, scale=False, **kwargs):
    method call (line 188) | def call(self, inputs, training=False):
  class TFOpenAIGPTMainLayer (line 202) | class TFOpenAIGPTMainLayer(tf.keras.layers.Layer):
    method __init__ (line 203) | def __init__(self, config, *inputs, **kwargs):
    method get_input_embeddings (line 223) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 226) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 229) | def _prune_heads(self, heads_to_prune):
    method call (line 235) | def call(
  class TFOpenAIGPTPreTrainedModel (line 349) | class TFOpenAIGPTPreTrainedModel(TFPreTrainedModel):
  class TFOpenAIGPTModel (line 430) | class TFOpenAIGPTModel(TFOpenAIGPTPreTrainedModel):
    method __init__ (line 431) | def __init__(self, config, *inputs, **kwargs):
    method call (line 436) | def call(self, inputs, **kwargs):
  class TFOpenAIGPTLMHeadModel (line 475) | class TFOpenAIGPTLMHeadModel(TFOpenAIGPTPreTrainedModel):
    method __init__ (line 476) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 480) | def get_output_embeddings(self):
    method call (line 484) | def call(self, inputs, **kwargs):
  class TFOpenAIGPTDoubleHeadsModel (line 532) | class TFOpenAIGPTDoubleHeadsModel(TFOpenAIGPTPreTrainedModel):
    method __init__ (line 533) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 541) | def get_output_embeddings(self):
    method call (line 545) | def call(

FILE: code/bert-base-count3/pretrain/transformers1/modeling_tf_pytorch_utils.py
  function convert_tf_weight_name_to_pt_weight_name (line 29) | def convert_tf_weight_name_to_pt_weight_name(tf_name, start_prefix_to_re...
  function load_pytorch_checkpoint_in_tf2_model (line 73) | def load_pytorch_checkpoint_in_tf2_model(tf_model, pytorch_checkpoint_pa...
  function load_pytorch_model_in_tf2_model (line 97) | def load_pytorch_model_in_tf2_model(tf_model, pt_model, tf_inputs=None, ...
  function load_pytorch_weights_in_tf2_model (line 107) | def load_pytorch_weights_in_tf2_model(tf_model, pt_state_dict, tf_inputs...
  function load_tf2_checkpoint_in_pytorch_model (line 205) | def load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path, t...
  function load_tf2_model_in_pytorch_model (line 240) | def load_tf2_model_in_pytorch_model(pt_model, tf_model, allow_missing_ke...
  function load_tf2_weights_in_pytorch_model (line 248) | def load_tf2_weights_in_pytorch_model(pt_model, tf_weights, allow_missin...

FILE: code/bert-base-count3/pretrain/transformers1/modeling_tf_roberta.py
  class TFRobertaEmbeddings (line 40) | class TFRobertaEmbeddings(TFBertEmbeddings):
    method __init__ (line 45) | def __init__(self, config, **kwargs):
    method create_position_ids_from_input_ids (line 49) | def create_position_ids_from_input_ids(self, x):
    method create_position_ids_from_inputs_embeds (line 60) | def create_position_ids_from_inputs_embeds(self, inputs_embeds):
    method _embedding (line 71) | def _embedding(self, inputs, training=False):
  class TFRobertaMainLayer (line 85) | class TFRobertaMainLayer(TFBertMainLayer):
    method __init__ (line 90) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 94) | def get_input_embeddings(self):
  class TFRobertaPreTrainedModel (line 98) | class TFRobertaPreTrainedModel(TFPreTrainedModel):
  class TFRobertaModel (line 182) | class TFRobertaModel(TFRobertaPreTrainedModel):
    method __init__ (line 183) | def __init__(self, config, *inputs, **kwargs):
    method call (line 188) | def call(self, inputs, **kwargs):
  class TFRobertaLMHead (line 228) | class TFRobertaLMHead(tf.keras.layers.Layer):
    method __init__ (line 231) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 244) | def build(self, input_shape):
    method call (line 248) | def call(self, features):
  class TFRobertaForMaskedLM (line 260) | class TFRobertaForMaskedLM(TFRobertaPreTrainedModel):
    method __init__ (line 261) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 267) | def get_output_embeddings(self):
    method call (line 271) | def call(self, inputs, **kwargs):
  class TFRobertaClassificationHead (line 310) | class TFRobertaClassificationHead(tf.keras.layers.Layer):
    method __init__ (line 313) | def __init__(self, config, **kwargs):
    method call (line 326) | def call(self, features, training=False):
  class TFRobertaForSequenceClassification (line 340) | class TFRobertaForSequenceClassification(TFRobertaPreTrainedModel):
    method __init__ (line 341) | def __init__(self, config, *inputs, **kwargs):
    method call (line 349) | def call(self, inputs, **kwargs):
  class TFRobertaForTokenClassification (line 394) | class TFRobertaForTokenClassification(TFRobertaPreTrainedModel):
    method __init__ (line 395) | def __init__(self, config, *inputs, **kwargs):
    method call (line 406) | def call(self, inputs, **kwargs):
  class TFRobertaForQuestionAnswering (line 451) | class TFRobertaForQuestionAnswering(TFRobertaPreTrainedModel):
    method __init__ (line 452) | def __init__(self, config, *inputs, **kwargs):
    method call (line 462) | def call(self, inputs, **kwargs):

FILE: code/bert-base-count3/pretrain/transformers1/modeling_tf_t5.py
  class TFT5LayerNorm (line 49) | class TFT5LayerNorm(tf.keras.layers.Layer):
    method __init__ (line 50) | def __init__(self, epsilon=1e-6, **kwargs):
    method build (line 57) | def build(self, input_shape):
    method call (line 62) | def call(self, x):
  class TFT5DenseReluDense (line 68) | class TFT5DenseReluDense(tf.keras.layers.Layer):
    method __init__ (line 69) | def __init__(self, config, **kwargs):
    method call (line 76) | def call(self, hidden_states, training=False):
  class TFT5LayerFF (line 84) | class TFT5LayerFF(tf.keras.layers.Layer):
    method __init__ (line 85) | def __init__(self, config, **kwargs):
    method call (line 91) | def call(self, hidden_states, training=False):
  class TFT5Attention (line 98) | class TFT5Attention(tf.keras.layers.Layer):
    method __init__ (line 101) | def __init__(self, config, has_relative_attention_bias=False, **kwargs):
    method prune_heads (line 127) | def prune_heads(self, heads):
    method _relative_position_bucket (line 131) | def _relative_position_bucket(relative_position, bidirectional=True, n...
    method compute_bias (line 176) | def compute_bias(self, qlen, klen):
    method call (line 188) | def call(
  class TFT5LayerSelfAttention (line 302) | class TFT5LayerSelfAttention(tf.keras.layers.Layer):
    method __init__ (line 303) | def __init__(self, config, has_relative_attention_bias=False, **kwargs):
    method call (line 311) | def call(
  class TFT5LayerCrossAttention (line 337) | class TFT5LayerCrossAttention(tf.keras.layers.Layer):
    method __init__ (line 338) | def __init__(self, config, has_relative_attention_bias=False, **kwargs):
    method call (line 346) | def call(
  class TFT5Block (line 376) | class TFT5Block(tf.keras.layers.Layer):
    method __init__ (line 377) | def __init__(self, config, has_relative_attention_bias=False, **kwargs):
    method call (line 393) | def call(
  class _NoLayerEmbedTokens (line 471) | class _NoLayerEmbedTokens(object):
    method __init__ (line 478) | def __init__(self, layer, abs_scope_name=None):
    method call (line 482) | def call(self, inputs, mode="embedding"):
    method __call__ (line 491) | def __call__(self, inputs, mode="embedding"):
  class TFT5MainLayer (line 505) | class TFT5MainLayer(tf.keras.layers.Layer):
    method __init__ (line 506) | def __init__(self, config, embed_tokens=None, **kwargs):
    method get_input_embeddings (line 524) | def get_input_embeddings(self):
    method get_output_embeddings (line 527) | def get_output_embeddings(self):
    method set_embed_tokens (line 530) | def set_embed_tokens(self, embed_tokens):
    method _resize_token_embeddings (line 533) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 536) | def _prune_heads(self, heads_to_prune):
    method call (line 539) | def call(
  class TFT5PreTrainedModel (line 718) | class TFT5PreTrainedModel(TFPreTrainedModel):
    method dummy_inputs (line 727) | def dummy_inputs(self):
  class TFT5Model (line 828) | class TFT5Model(TFT5PreTrainedModel):
    method __init__ (line 829) | def __init__(self, config, *inputs, **kwargs):
    method get_input_embeddings (line 846) | def get_input_embeddings(self):
    method get_output_embeddings (line 849) | def get_output_embeddings(self):
    method get_encoder (line 852) | def get_encoder(self):
    method get_decoder (line 855) | def get_decoder(self):
    method call (line 859) | def call(self, inputs, **kwargs):
  class TFT5ForConditionalGeneration (line 947) | class TFT5ForConditionalGeneration(TFT5PreTrainedModel):
    method __init__ (line 948) | def __init__(self, config, *inputs, **kwargs):
    method get_input_embeddings (line 967) | def get_input_embeddings(self):
    method get_output_embeddings (line 970) | def get_output_embeddings(self):
    method get_encoder (line 973) | def get_encoder(self):
    method get_decoder (line 976) | def get_decoder(self):
    method call (line 980) | def call(self, inputs, **kwargs):
    method prepare_inputs_for_generation (line 1079) | def prepare_inputs_for_generation(self, inputs, past, attention_mask, ...
    method _reorder_cache (line 1097) | def _reorder_cache(self, past, beam_idx):

FILE: code/bert-base-count3/pretrain/transformers1/modeling_tf_transfo_xl.py
  class TFPositionalEmbedding (line 39) | class TFPositionalEmbedding(tf.keras.layers.Layer):
    method __init__ (line 40) | def __init__(self, demb, **kwargs):
    method call (line 45) | def call(self, pos_seq, bsz=None):
  class TFPositionwiseFF (line 55) | class TFPositionwiseFF(tf.keras.layers.Layer):
    method __init__ (line 56) | def __init__(self, d_model, d_inner, dropout, pre_lnorm=False, layer_n...
    method call (line 74) | def call(self, inp, training=False):
  class TFRelPartialLearnableMultiHeadAttn (line 98) | class TFRelPartialLearnableMultiHeadAttn(tf.keras.layers.Layer):
    method __init__ (line 99) | def __init__(
    method build (line 152) | def build(self, input_shape):
    method _rel_shift (line 162) | def _rel_shift(self, x):
    method call (line 172) | def call(self, inputs, training=False):
  class TFRelPartialLearnableDecoderLayer (line 252) | class TFRelPartialLearnableDecoderLayer(tf.keras.layers.Layer):
    method __init__ (line 253) | def __init__(
    method call (line 301) | def call(self, inputs, training=False):
  class TFAdaptiveEmbedding (line 311) | class TFAdaptiveEmbedding(tf.keras.layers.Layer):
    method __init__ (line 312) | def __init__(self, n_token, d_embed, d_proj, cutoffs, div_val=1, init_...
    method build (line 344) | def build(self, input_shape):
    method call (line 357) | def call(self, inp):
  class TFTransfoXLMainLayer (line 384) | class TFTransfoXLMainLayer(tf.keras.layers.Layer):
    method __init__ (line 387) | def __init__(self, config, **kwargs):
    method build (line 455) | def build(self, input_shape):
    method get_input_embeddings (line 465) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 468) | def _resize_token_embeddings(self, new_num_tokens):
    method backward_compatible (line 471) | def backward_compatible(self):
    method reset_length (line 474) | def reset_length(self, tgt_len, ext_len, mem_len):
    method _prune_heads (line 479) | def _prune_heads(self, heads):
    method init_mems (line 482) | def init_mems(self, bsz):
    method _update_mems (line 493) | def _update_mems(self, hids, mems, mlen, qlen):
    method call (line 517) | def call(self, inputs, mems=None, head_mask=None, inputs_embeds=None, ...
  class TFTransfoXLPreTrainedModel (line 628) | class TFTransfoXLPreTrainedModel(TFPreTrainedModel):
  class TFTransfoXLModel (line 693) | class TFTransfoXLModel(TFTransfoXLPreTrainedModel):
    method __init__ (line 694) | def __init__(self, config, *inputs, **kwargs):
    method call (line 699) | def call(self, inputs, **kwargs):
  class TFTransfoXLLMHead (line 737) | class TFTransfoXLLMHead(tf.keras.layers.Layer):
    method __init__ (line 738) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 746) | def build(self, input_shape):
    method call (line 750) | def call(self, hidden_states):
  class TFTransfoXLLMHeadModel (line 761) | class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel):
    method __init__ (line 762) | def __init__(self, config):
    method get_output_embeddings (line 774) | def get_output_embeddings(self):
    method reset_length (line 781) | def reset_length(self, tgt_len, ext_len, mem_len):
    method init_mems (line 784) | def init_mems(self, bsz):
    method call (line 788) | def call(self, inputs, mems=None, head_mask=None, inputs_embeds=None, ...
    method prepare_inputs_for_generation (line 855) | def prepare_inputs_for_generation(self, inputs, past, **model_kwargs):

FILE: code/bert-base-count3/pretrain/transformers1/modeling_tf_transfo_xl_utilities.py
  class TFAdaptiveSoftmaxMask (line 25) | class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer):
    method __init__ (line 26) | def __init__(self, vocab_size, d_embed, d_proj, cutoffs, div_val=1, ke...
    method build (line 45) | def build(self, input_shape):
    method _logit (line 104) | def _logit(x, W, b, proj=None):
    method _gather_logprob (line 111) | def _gather_logprob(logprob, target):
    method call (line 117) | def call(self, inputs, return_mean=True, training=False):

FILE: code/bert-base-count3/pretrain/transformers1/modeling_tf_utils.py
  class TFModelUtilsMixin (line 34) | class TFModelUtilsMixin:
    method num_parameters (line 39) | def num_parameters(self, only_trainable: bool = False) -> int:
  function keras_serializable (line 49) | def keras_serializable(cls):
  class TFPreTrainedModel (line 107) | class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin):
    method dummy_inputs (line 127) | def dummy_inputs(self):
    method __init__ (line 135) | def __init__(self, config, *inputs, **kwargs):
    method get_input_embeddings (line 148) | def get_input_embeddings(self):
    method get_output_embeddings (line 162) | def get_output_embeddings(self):
    method _get_resized_embeddings (line 172) | def _get_resized_embeddings(self, old_embeddings, new_num_tokens=None):
    method resize_token_embeddings (line 206) | def resize_token_embeddings(self, new_num_tokens=None):
    method prune_heads (line 221) | def prune_heads(self, heads_to_prune):
    method save_pretrained (line 230) | def save_pretrained(self, save_directory):
    method from_pretrained (line 247) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
    method prepare_inputs_for_generation (line 438) | def prepare_inputs_for_generation(self, inputs, **kwargs):
    method _use_cache (line 441) | def _use_cache(self, outputs, use_cache):
    method generate (line 449) | def generate(
    method _generate_no_beam_search (line 810) | def _generate_no_beam_search(
    method _generate_beam_search (line 973) | def _generate_beam_search(
    method _reorder_cache (line 1294) | def _reorder_cache(past, beam_idx):
  function _create_next_token_logits_penalties (line 1298) | def _create_next_token_logits_penalties(input_ids, logits, repetition_pe...
  function calc_banned_ngram_tokens (line 1312) | def calc_banned_ngram_tokens(prev_input_ids, num_hypos, no_repeat_ngram_...
  function calc_banned_bad_words_ids (line 1335) | def calc_banned_bad_words_ids(prev_input_ids, bad_words_ids):
  function tf_top_k_top_p_filtering (line 1371) | def tf_top_k_top_p_filtering(logits, top_k=0, top_p=1.0, filter_value=-f...
  function scatter_values_on_batch_indices (line 1421) | def scatter_values_on_batch_indices(values, batch_indices):
  function set_tensor_by_indices_to_value (line 1431) | def set_tensor_by_indices_to_value(tensor, indices, value):
  class BeamHypotheses (line 1437) | class BeamHypotheses(object):
    method __init__ (line 1438) | def __init__(self, num_beams, max_length, length_penalty, early_stoppi...
    method __len__ (line 1449) | def __len__(self):
    method add (line 1455) | def add(self, hyp, sum_logprobs):
    method is_done (line 1469) | def is_done(self, best_sum_logprobs, cur_len=None):
  class TFConv1D (line 1487) | class TFConv1D(tf.keras.layers.Layer):
    method __init__ (line 1488) | def __init__(self, nf, nx, initializer_range=0.02, **kwargs):
    method build (line 1497) | def build(self, input_shape):
    method call (line 1503) | def call(self, x):
  class TFSharedEmbeddings (line 1514) | class TFSharedEmbeddings(tf.keras.layers.Layer):
    method __init__ (line 1518) | def __init__(self, vocab_size, hidden_size, initializer_range=None, **...
    method build (line 1524) | def build(self, input_shape):
    method call (line 1534) | def call(self, inputs, mode="embedding"):
    method _embedding (line 1556) | def _embedding(self, input_ids):
    method _linear (line 1560) | def _linear(self, inputs):
  class TFSequenceSummary (line 1575) | class TFSequenceSummary(tf.keras.layers.Layer):
    method __init__ (line 1591) | def __init__(self, config, initializer_range=0.02, **kwargs):
    method call (line 1623) | def call(self, inputs, training=False):
  function shape_list (line 1682) | def shape_list(x):
  function get_initializer (line 1689) | def get_initializer(initializer_range=0.02):

FILE: code/bert-base-count3/pretrain/transformers1/modeling_tf_xlm.py
  function create_sinusoidal_embeddings (line 49) | def create_sinusoidal_embeddings(n_pos, dim, out):
  function gelu (line 55) | def gelu(x):
  function get_masks (line 66) | def get_masks(slen, lengths, causal, padding_mask=None, dtype=tf.float32):
  class TFMultiHeadAttention (line 97) | class TFMultiHeadAttention(tf.keras.layers.Layer):
    method __init__ (line 101) | def __init__(self, n_heads, dim, config, **kwargs):
    method prune_heads (line 116) | def prune_heads(self, heads):
    method call (line 119) | def call(self, inputs, training=False):
  class TFTransformerFFN (line 185) | class TFTransformerFFN(tf.keras.layers.Layer):
    method __init__ (line 186) | def __init__(self, in_dim, dim_hidden, out_dim, config, **kwargs):
    method call (line 193) | def call(self, input, training=False):
  class TFXLMMainLayer (line 201) | class TFXLMMainLayer(tf.keras.layers.Layer):
    method __init__ (line 202) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 292) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 295) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 298) | def _prune_heads(self, heads_to_prune):
    method call (line 305) | def call(
  class TFXLMPreTrainedModel (line 468) | class TFXLMPreTrainedModel(TFPreTrainedModel):
    method dummy_inputs (line 477) | def dummy_inputs(self):
  class TFXLMModel (line 574) | class TFXLMModel(TFXLMPreTrainedModel):
    method __init__ (line 575) | def __init__(self, config, *inputs, **kwargs):
    method call (line 580) | def call(self, inputs, **kwargs):
  class TFXLMPredLayer (line 614) | class TFXLMPredLayer(tf.keras.layers.Layer):
    method __init__ (line 619) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 636) | def build(self, input_shape):
    method call (line 641) | def call(self, hidden_states):
  class TFXLMWithLMHeadModel (line 652) | class TFXLMWithLMHeadModel(TFXLMPreTrainedModel):
    method __init__ (line 653) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 658) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 661) | def prepare_inputs_for_generation(self, inputs, **kwargs):
    method call (line 676) | def call(self, inputs, **kwargs):
  class TFXLMForSequenceClassification (line 720) | class TFXLMForSequenceClassification(TFXLMPreTrainedModel):
    method __init__ (line 721) | def __init__(self, config, *inputs, **kwargs):
    method call (line 729) | def call(self, inputs, **kwargs):
  class TFXLMForQuestionAnsweringSimple (line 774) | class TFXLMForQuestionAnsweringSimple(TFXLMPreTrainedModel):
    method __init__ (line 775) | def __init__(self, config, *inputs, **kwargs):
    method call (line 783) | def call(self, inputs, **kwargs):

FILE: code/bert-base-count3/pretrain/transformers1/modeling_tf_xlm_roberta.py
  class TFXLMRobertaModel (line 70) | class TFXLMRobertaModel(TFRobertaModel):
  class TFXLMRobertaForMaskedLM (line 82) | class TFXLMRobertaForMaskedLM(TFRobertaForMaskedLM):
  class TFXLMRobertaForSequenceClassification (line 96) | class TFXLMRobertaForSequenceClassification(TFRobertaForSequenceClassifi...
  class TFXLMRobertaForTokenClassification (line 110) | class TFXLMRobertaForTokenClassification(TFRobertaForTokenClassification):

FILE: code/bert-base-count3/pretrain/transformers1/modeling_tf_xlnet.py
  function gelu (line 47) | def gelu(x):
  function swish (line 56) | def swish(x):
  class TFXLNetRelativeAttention (line 67) | class TFXLNetRelativeAttention(tf.keras.layers.Layer):
    method __init__ (line 68) | def __init__(self, config, **kwargs):
    method build (line 87) | def build(self, input_shape):
    method prune_heads (line 118) | def prune_heads(self, heads):
    method rel_shift (line 121) | def rel_shift(self, x, klen=-1):
    method rel_attn_core (line 133) | def rel_attn_core(self, inputs, training=False):
    method post_attention (line 178) | def post_attention(self, inputs, residual=True, training=False):
    method call (line 193) | def call(self, inputs, training=False):
  class TFXLNetFeedForward (line 290) | class TFXLNetFeedForward(tf.keras.layers.Layer):
    method __init__ (line 291) | def __init__(self, config, **kwargs):
    method call (line 306) | def call(self, inp, training=False):
  class TFXLNetLayer (line 317) | class TFXLNetLayer(tf.keras.layers.Layer):
    method __init__ (line 318) | def __init__(self, config, **kwargs):
    method call (line 324) | def call(self, inputs, training=False):
  class TFXLNetLMHead (line 336) | class TFXLNetLMHead(tf.keras.layers.Layer):
    method __init__ (line 337) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 344) | def build(self, input_shape):
    method call (line 348) | def call(self, hidden_states):
  class TFXLNetMainLayer (line 355) | class TFXLNetMainLayer(tf.keras.layers.Layer):
    method __init__ (line 358) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 380) | def get_input_embeddings(self):
    method build (line 383) | def build(self, input_shape):
    method _resize_token_embeddings (line 389) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 392) | def _prune_heads(self, heads_to_prune):
    method create_mask (line 395) | def create_mask(self, qlen, mlen, dtype=tf.float32):
    method cache_mem (line 424) | def cache_mem(self, curr_out, prev_mem):
    method positional_embedding (line 437) | def positional_embedding(pos_seq, inv_freq, bsz=None):
    method relative_positional_encoding (line 447) | def relative_positional_encoding(self, qlen, klen, bsz=None, dtype=None):
    method call (line 495) | def call(
  class TFXLNetPreTrainedModel (line 699) | class TFXLNetPreTrainedModel(TFPreTrainedModel):
  class TFXLNetModel (line 795) | class TFXLNetModel(TFXLNetPreTrainedModel):
    method __init__ (line 796) | def __init__(self, config, *inputs, **kwargs):
    method call (line 801) | def call(self, inputs, **kwargs):
  class TFXLNetLMHeadModel (line 844) | class TFXLNetLMHeadModel(TFXLNetPreTrainedModel):
    method __init__ (line 845) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 850) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 853) | def prepare_inputs_for_generation(self, inputs, past, **kwargs):
    method call (line 885) | def call(self, inputs, **kwargs):
  class TFXLNetForSequenceClassification (line 941) | class TFXLNetForSequenceClassification(TFXLNetPreTrainedModel):
    method __init__ (line 942) | def __init__(self, config, *inputs, **kwargs):
    method call (line 955) | def call(self, inputs, **kwargs):
  class TFXLNetForTokenClassification (line 1005) | class TFXLNetForTokenClassification(TFXLNetPreTrainedModel):
    method __init__ (line 1006) | def __init__(self, config, *inputs, **kwargs):
    method call (line 1015) | def call(self, inputs, **kwargs):
  class TFXLNetForQuestionAnsweringSimple (line 1064) | class TFXLNetForQuestionAnsweringSimple(TFXLNetPreTrainedModel):
    method __init__ (line 1065) | def __init__(self, config, *inputs, **kwargs):
    method call (line 1073) | def call(self, inputs, **kwargs):

FILE: code/bert-base-count3/pretrain/transformers1/modeling_transfo_xl.py
  function build_tf_to_pytorch_map (line 42) | def build_tf_to_pytorch_map(model, config):
  function load_tf_weights_in_transfo_xl (line 109) | def load_tf_weights_in_transfo_xl(model, config, tf_path):
  class PositionalEmbedding (line 167) | class PositionalEmbedding(nn.Module):
    method __init__ (line 168) | def __init__(self, demb):
    method forward (line 176) | def forward(self, pos_seq, bsz=None):
  class PositionwiseFF (line 186) | class PositionwiseFF(nn.Module):
    method __init__ (line 187) | def __init__(self, d_model, d_inner, dropout, pre_lnorm=False, layer_n...
    method forward (line 206) | def forward(self, inp):
  class RelPartialLearnableMultiHeadAttn (line 223) | class RelPartialLearnableMultiHeadAttn(nn.Module):
    method __init__ (line 224) | def __init__(
    method _rel_shift (line 269) | def _rel_shift(self, x):
    method forward (line 281) | def forward(self, w, r, attn_mask=None, mems=None, head_mask=None):
  class RelPartialLearnableDecoderLayer (line 370) | class RelPartialLearnableDecoderLayer(nn.Module):
    method __init__ (line 371) | def __init__(self, n_head, d_model, d_head, d_inner, dropout, layer_no...
    method forward (line 381) | def forward(self, dec_inp, r, dec_attn_mask=None, mems=None, head_mask...
  class AdaptiveEmbedding (line 391) | class AdaptiveEmbedding(nn.Module):
    method __init__ (line 392) | def __init__(self, n_token, d_embed, d_proj, cutoffs, div_val=1, sampl...
    method forward (line 419) | def forward(self, inp):
  class TransfoXLPreTrainedModel (line 451) | class TransfoXLPreTrainedModel(PreTrainedModel):
    method _init_weight (line 460) | def _init_weight(self, weight):
    method _init_bias (line 466) | def _init_bias(self, bias):
    method _init_weights (line 469) | def _init_weights(self, m):
  class TransfoXLModel (line 552) | class TransfoXLModel(TransfoXLPreTrainedModel):
    method __init__ (line 553) | def __init__(self, config):
    method get_input_embeddings (line 618) | def get_input_embeddings(self):
    method set_input_embeddings (line 621) | def set_input_embeddings(self, new_embeddings):
    method backward_compatible (line 624) | def backward_compatible(self):
    method reset_length (line 627) | def reset_length(self, tgt_len, ext_len, mem_len):
    method _prune_heads (line 632) | def _prune_heads(self, heads):
    method init_mems (line 636) | def init_mems(self, bsz):
    method _update_mems (line 648) | def _update_mems(self, hids, mems, mlen, qlen):
    method forward (line 673) | def forward(self, input_ids=None, mems=None, head_mask=None, inputs_em...
  class TransfoXLLMHeadModel (line 807) | class TransfoXLLMHeadModel(TransfoXLPreTrainedModel):
    method __init__ (line 808) | def __init__(self, config):
    method tie_weights (line 823) | def tie_weights(self):
    method reset_length (line 844) | def reset_length(self, tgt_len, ext_len, mem_len):
    method init_mems (line 847) | def init_mems(self, bsz):
    method forward (line 851) | def forward(self, input_ids=None, mems=None, head_mask=None, inputs_em...
    method get_output_embeddings (line 917) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 925) | def prepare_inputs_for_generation(self, input_ids, past, **model_kwargs):

FILE: code/bert-base-count3/pretrain/transformers1/modeling_transfo_xl_utilities.py
  class ProjectedAdaptiveLogSoftmax (line 30) | class ProjectedAdaptiveLogSoftmax(nn.Module):
    method __init__ (line 31) | def __init__(self, n_token, d_embed, d_proj, cutoffs, div_val=1, keep_...
    method _compute_logit (line 72) | def _compute_logit(self, hidden, weight, bias, proj):
    method forward (line 86) | def forward(self, hidden, labels=None, keep_order=False):
    method log_prob (line 193) | def log_prob(self, hidden):

FILE: code/bert-base-count3/pretrain/transformers1/modeling_utils.py
  class Identity (line 47) | class Identity(nn.Module):
    method __init__ (line 51) | def __init__(self, *args, **kwargs):
    method forward (line 54) | def forward(self, input):
  class ModuleUtilsMixin (line 58) | class ModuleUtilsMixin:
    method num_parameters (line 63) | def num_parameters(self, only_trainable: bool = False) -> int:
    method _hook_rss_memory_pre_forward (line 71) | def _hook_rss_memory_pre_forward(module, *args, **kwargs):
    method _hook_rss_memory_post_forward (line 83) | def _hook_rss_memory_post_forward(module, *args, **kwargs):
    method add_memory_hooks (line 96) | def add_memory_hooks(self):
    method reset_memory_hooks_state (line 105) | def reset_memory_hooks_state(self):
    method device (line 112) | def device(self) -> device:
    method dtype (line 130) | def dtype(self) -> dtype:
    method invert_attention_mask (line 147) | def invert_attention_mask(self, encoder_attention_mask: Tensor) -> Ten...
    method get_extended_attention_mask (line 173) | def get_extended_attention_mask(self, attention_mask: Tensor, input_sh...
    method get_head_mask (line 217) | def get_head_mask(self, head_mask: Tensor, num_hidden_layers: int, is_...
    method _convert_head_mask_to_5d (line 238) | def _convert_head_mask_to_5d(self, head_mask, num_hidden_layers):
  class PreTrainedModel (line 250) | class PreTrainedModel(nn.Module, ModuleUtilsMixin):
    method dummy_inputs (line 270) | def dummy_inputs(self):
    method __init__ (line 278) | def __init__(self, config, *inputs, **kwargs):
    method base_model (line 292) | def base_model(self):
    method get_input_embeddings (line 295) | def get_input_embeddings(self):
    method set_input_embeddings (line 309) | def set_input_embeddings(self, value: nn.Module):
    method get_output_embeddings (line 323) | def get_output_embeddings(self):
    method tie_weights (line 333) | def tie_weights(self):
    method _tie_or_clone_weights (line 343) | def _tie_or_clone_weights(self, output_embeddings, input_embeddings):
    method resize_token_embeddings (line 361) | def resize_token_embeddings(self, new_num_tokens: Optional[int] = None):
    method _resize_token_embeddings (line 388) | def _resize_token_embeddings(self, new_num_tokens):
    method _get_resized_embeddings (line 394) | def _get_resized_embeddings(
    method init_weights (line 432) | def init_weights(self):
    method prune_heads (line 444) | def prune_heads(self, heads_to_prune: Dict):
    method save_pretrained (line 459) | def save_pretrained(self, save_directory):
    method from_pretrained (line 494) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
    method prepare_inputs_for_generation (line 777) | def prepare_inputs_for_generation(self, input_ids, **kwargs):
    method prepare_logits_for_generation (line 780) | def prepare_logits_for_generation(self, logits, **kwargs):
    method _use_cache (line 783) | def _use_cache(self, outputs, use_cache):
    method enforce_repetition_penalty_ (line 791) | def enforce_repetition_penalty_(self, lprobs, batch_size, num_beams, p...
    method generate (line 802) | def generate(
    method _generate_no_beam_search (line 1186) | def _generate_no_beam_search(
    method _generate_beam_search (line 1307) | def _generate_beam_search(
    method _reorder_cache (line 1582) | def _reorder_cache(past: Tuple, beam_idx: Tensor) -> Tuple[Tensor]:
  function calc_banned_ngram_tokens (line 1586) | def calc_banned_ngram_tokens(prev_input_ids: Tensor, num_hypos: int, no_...
  function calc_banned_bad_words_ids (line 1609) | def calc_banned_bad_words_ids(prev_input_ids: Iterable[int], bad_words_i...
  function top_k_top_p_filtering (line 1645) | def top_k_top_p_filtering(
  class BeamHypotheses (line 1686) | class BeamHypotheses(object):
    method __init__ (line 1687) | def __init__(self, num_beams, max_length, length_penalty, early_stoppi...
    method __len__ (line 1698) | def __len__(self):
    method add (line 1704) | def add(self, hyp, sum_logprobs):
    method is_done (line 1718) | def is_done(self, best_sum_logprobs, cur_len=None):
  class Conv1D (line 1736) | class Conv1D(nn.Module):
    method __init__ (line 1737) | def __init__(self, nf, nx):
    method forward (line 1748) | def forward(self, x):
  class PoolerStartLogits (line 1755) | class PoolerStartLogits(nn.Module):
    method __init__ (line 1758) | def __init__(self, config):
    method forward (line 1762) | def forward(self, hidden_states, p_mask=None):
  class PoolerEndLogits (line 1779) | class PoolerEndLogits(nn.Module):
    method __init__ (line 1783) | def __init__(self, config):
    method forward (line 1790) | def forward(self, hidden_states, start_states=None, start_positions=No...
  class PoolerAnswerClass (line 1826) | class PoolerAnswerClass(nn.Module):
    method __init__ (line 1829) | def __init__(self, config):
    method forward (line 1835) | def forward(self, hidden_states, start_states=None, start_positions=No...
  class SQuADHead (line 1873) | class SQuADHead(nn.Module):
    method __init__ (line 1914) | def __init__(self, config):
    method forward (line 1923) | def forward(
  class SequenceSummary (line 1990) | class SequenceSummary(nn.Module):
    method __init__ (line 2006) | def __init__(self, config: PretrainedConfig):
    method forward (line 2035) | def forward(self, hidden_states, cls_index=None):
  function create_position_ids_from_input_ids (line 2067) | def create_position_ids_from_input_ids(input_ids, padding_idx):
  function prune_linear_layer (line 2081) | def prune_linear_layer(layer, index, dim=0):
  function prune_conv1d_layer (line 2106) | def prune_conv1d_layer(layer, index, dim=1):
  function prune_layer (line 2130) | def prune_layer(layer, index, dim=None):
  function apply_chunking_to_forward (line 2143) | def apply_chunking_to_forward(

FILE: code/bert-base-count3/pretrain/transformers1/modeling_xlm.py
  function create_sinusoidal_embeddings (line 52) | def create_sinusoidal_embeddings(n_pos, dim, out):
  function get_masks (line 60) | def get_masks(slen, lengths, causal, padding_mask=None):
  class MultiHeadAttention (line 85) | class MultiHeadAttention(nn.Module):
    method __init__ (line 89) | def __init__(self, n_heads, dim, config):
    method prune_heads (line 104) | def prune_heads(self, heads):
    method forward (line 125) | def forward(self, input, mask, kv=None, cache=None, head_mask=None):
  class TransformerFFN (line 189) | class TransformerFFN(nn.Module):
    method __init__ (line 190) | def __init__(self, in_dim, dim_hidden, out_dim, config):
    method forward (line 197) | def forward(self, input):
  class XLMPreTrainedModel (line 205) | class XLMPreTrainedModel(PreTrainedModel):
    method __init__ (line 214) | def __init__(self, *inputs, **kwargs):
    method dummy_inputs (line 218) | def dummy_inputs(self):
    method _init_weights (line 227) | def _init_weights(self, module):
  class XLMModel (line 313) | class XLMModel(XLMPreTrainedModel):
    method __init__ (line 314) | def __init__(self, config):  # , dico, is_encoder, with_output):
    method get_input_embeddings (line 384) | def get_input_embeddings(self):
    method set_input_embeddings (line 387) | def set_input_embeddings(self, new_embeddings):
    method _prune_heads (line 390) | def _prune_heads(self, heads_to_prune):
    method forward (line 399) | def forward(
  class XLMPredLayer (line 554) | class XLMPredLayer(nn.Module):
    method __init__ (line 559) | def __init__(self, config):
    method forward (line 577) | def forward(self, x, y=None):
  class XLMWithLMHeadModel (line 602) | class XLMWithLMHeadModel(XLMPreTrainedModel):
    method __init__ (line 603) | def __init__(self, config):
    method get_output_embeddings (line 610) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 613) | def prepare_inputs_for_generation(self, input_ids, **kwargs):
    method forward (line 627) | def forward(
  class XLMForSequenceClassification (line 702) | class XLMForSequenceClassification(XLMPreTrainedModel):
    method __init__ (line 703) | def __init__(self, config):
    method forward (line 713) | def forward(
  class XLMForQuestionAnsweringSimple (line 799) | class XLMForQuestionAnsweringSimple(XLMPreTrainedModel):
    method __init__ (line 800) | def __init__(self, config):
    method forward (line 809) | def forward(
  class XLMForQuestionAnswering (line 917) | class XLMForQuestionAnswering(XLMPreTrainedModel):
    method __init__ (line 918) | def __init__(self, config):
    method forward (line 927) | def forward(
  class XLMForTokenClassification (line 1034) | class XLMForTokenClassification(XLMPreTrainedModel):
    method __init__ (line 1035) | def __init__(self, config):
    method forward (line 1046) | def forward(

FILE: code/bert-base-count3/pretrain/transformers1/modeling_xlm_roberta.py
  class XLMRobertaModel (line 62) | class XLMRobertaModel(RobertaModel):
  class XLMRobertaForMaskedLM (line 74) | class XLMRobertaForMaskedLM(RobertaForMaskedLM):
  class XLMRobertaForSequenceClassification (line 88) | class XLMRobertaForSequenceClassification(RobertaForSequenceClassificati...
  class XLMRobertaForMultipleChoice (line 102) | class XLMRobertaForMultipleChoice(RobertaForMultipleChoice):
  class XLMRobertaForTokenClassification (line 116) | class XLMRobertaForTokenClassification(RobertaForTokenClassification):

FILE: code/bert-base-count3/pretrain/transformers1/modeling_xlnet.py
  function build_tf_xlnet_to_pytorch_map (line 42) | def build_tf_xlnet_to_pytorch_map(model, config, tf_weights=None):
  function load_tf_weights_in_xlnet (line 125) | def load_tf_weights_in_xlnet(model, config, tf_path):
  class XLNetRelativeAttention (line 193) | class XLNetRelativeAttention(nn.Module):
    method __init__ (line 194) | def __init__(self, config):
    method prune_heads (line 223) | def prune_heads(self, heads):
    method rel_shift (line 227) | def rel_shift(x, klen=-1):
    method rel_shift_bnij (line 240) | def rel_shift_bnij(x, klen=-1):
    method rel_attn_core (line 254) | def rel_attn_core(self, q_head, k_head_h, v_head_h, k_head_r, seg_mat=...
    method post_attention (line 296) | def post_attention(self, h, attn_vec, residual=True):
    method forward (line 308) | def forward(self, h, g, attn_mask_h, attn_mask_g, r, seg_mat, mems=Non...
  class XLNetFeedForward (line 403) | class XLNetFeedForward(nn.Module):
    method __init__ (line 404) | def __init__(self, config):
    method forward (line 415) | def forward(self, inp):
  class XLNetLayer (line 426) | class XLNetLayer(nn.Module):
    method __init__ (line 427) | def __init__(self, config):
    method forward (line 433) | def forward(
  class XLNetPreTrainedModel (line 457) | class XLNetPreTrainedModel(PreTrainedModel):
    method _init_weights (line 466) | def _init_weights(self, module):
  class XLNetModel (line 568) | class XLNetModel(XLNetPreTrainedModel):
    method __init__ (line 569) | def __init__(self, config):
    method get_input_embeddings (line 590) | def get_input_embeddings(self):
    method set_input_embeddings (line 593) | def set_input_embeddings(self, new_embeddings):
    method _prune_heads (line 596) | def _prune_heads(self, heads_to_prune):
    method create_mask (line 599) | def create_mask(self, qlen, mlen):
    method cache_mem (line 629) | def cache_mem(self, curr_out, prev_mem):
    method positional_embedding (line 642) | def positional_embedding(pos_seq, inv_freq, bsz=None):
    method relative_positional_encoding (line 652) | def relative_positional_encoding(self, qlen, klen, bsz=None):
    method forward (line 692) | def forward(
  class XLNetLMHeadModel (line 927) | class XLNetLMHeadModel(XLNetPreTrainedModel):
    method __init__ (line 928) | def __init__(self, config):
    method get_output_embeddings (line 938) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 941) | def prepare_inputs_for_generation(self, input_ids, past, **kwargs):
    method forward (line 975) | def forward(
  class XLNetForSequenceClassification (line 1083) | class XLNetForSequenceClassification(XLNetPreTrainedModel):
    method __init__ (line 1084) | def __init__(self, config):
    method forward (line 1095) | def forward(
  class XLNetForTokenClassification (line 1189) | class XLNetForTokenClassification(XLNetPreTrainedModel):
    method __init__ (line 1190) | def __init__(self, config):
    method forward (line 1200) | def forward(
  class XLNetForMultipleChoice (line 1298) | class XLNetForMultipleChoice(XLNetPreTrainedModel):
    method __init__ (line 1299) | def __init__(self, config):
    method forward (line 1309) | def forward(
  class XLNetForQuestionAnsweringSimple (line 1411) | class XLNetForQuestionAnsweringSimple(XLNetPreTrainedModel):
    method __init__ (line 1412) | def __init__(self, config):
    method forward (line 1422) | def forward(
  class XLNetForQuestionAnswering (line 1534) | class XLNetForQuestionAnswering(XLNetPreTrainedModel):
    method __init__ (line 1535) | def __init__(self, config):
    method forward (line 1548) | def forward(

FILE: code/bert-base-count3/pretrain/transformers1/optimization.py
  function get_constant_schedule (line 28) | def get_constant_schedule(optimizer, last_epoch=-1):
  function get_constant_schedule_with_warmup (line 34) | def get_constant_schedule_with_warmup(optimizer, num_warmup_steps, last_...
  function get_linear_schedule_with_warmup (line 47) | def get_linear_schedule_with_warmup(optimizer, num_warmup_steps, num_tra...
  function get_cosine_schedule_with_warmup (line 62) | def get_cosine_schedule_with_warmup(optimizer, num_warmup_steps, num_tra...
  function get_cosine_with_hard_restarts_schedule_with_warmup (line 77) | def get_cosine_with_hard_restarts_schedule_with_warmup(
  class AdamW (line 96) | class AdamW(Optimizer):
    method __init__ (line 107) | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-6, weig...
    method step (line 119) | def step(self, closure=None):

FILE: code/bert-base-count3/pretrain/transformers1/optimization_tf.py
  class WarmUp (line 23) | class WarmUp(tf.keras.optimizers.schedules.LearningRateSchedule):
    method __init__ (line 26) | def __init__(
    method __call__ (line 36) | def __call__(self, step):
    method get_config (line 51) | def get_config(self):
  function create_optimizer (line 61) | def create_optimizer(init_lr, num_train_steps, num_warmup_steps, end_lr=...
  class AdamWeightDecay (line 84) | class AdamWeightDecay(tf.keras.optimizers.Adam):
    method __init__ (line 94) | def __init__(
    method from_config (line 113) | def from_config(cls, config):
    method _prepare_local (line 118) | def _prepare_local(self, var_device, var_dtype, apply_state):
    method _decay_weights_op (line 124) | def _decay_weights_op(self, var, learning_rate, apply_state):
    method apply_gradients (line 133) | def apply_gradients(self, grads_and_vars, name=None):
    method _get_lr (line 137) | def _get_lr(self, var_device, var_dtype, apply_state):
    method _resource_apply_dense (line 150) | def _resource_apply_dense(self, grad, var, apply_state=None):
    method _resource_apply_sparse (line 156) | def _resource_apply_sparse(self, grad, var, indices, apply_state=None):
    method get_config (line 162) | def get_config(self):
    method _do_use_weight_decay (line 167) | def _do_use_weight_decay(self, param_name):
  class GradientAccumulator (line 185) | class GradientAccumulator(object):
    method __init__ (line 197) | def __init__(self):
    method step (line 203) | def step(self):
    method gradients (line 216) | def gradients(self):
    method __call__ (line 222) | def __call__(self, gradients):
    method reset (line 248) | def reset(self):

FILE: code/bert-base-count3/pretrain/transformers1/pipelines.py
  function get_framework (line 69) | def get_framework(model=None):
  class ArgumentHandler (line 89) | class ArgumentHandler(ABC):
    method __call__ (line 95) | def __call__(self, *args, **kwargs):
  class DefaultArgumentHandler (line 99) | class DefaultArgumentHandler(ArgumentHandler):
    method handle_kwargs (line 105) | def handle_kwargs(kwargs: Dict) -> List:
    method handle_args (line 114) | def handle_args(args: Sequence[Any]) -> List[str]:
    method __call__ (line 140) | def __call__(self, *args, **kwargs):
  class PipelineDataFormat (line 150) | class PipelineDataFormat:
    method __init__ (line 164) | def __init__(
    method __iter__ (line 184) | def __iter__(self):
    method save (line 188) | def save(self, data: dict):
    method save_binary (line 196) | def save_binary(self, data: Union[dict, List[dict]]) -> str:
    method from_str (line 211) | def from_str(
  class CsvPipelineDataFormat (line 224) | class CsvPipelineDataFormat(PipelineDataFormat):
    method __init__ (line 225) | def __init__(
    method __iter__ (line 230) | def __iter__(self):
    method save (line 239) | def save(self, data: List[dict]):
  class JsonPipelineDataFormat (line 247) | class JsonPipelineDataFormat(PipelineDataFormat):
    method __init__ (line 248) | def __init__(
    method __iter__ (line 256) | def __iter__(self):
    method save (line 263) | def save(self, data: dict):
  class PipedPipelineDataFormat (line 268) | class PipedPipelineDataFormat(PipelineDataFormat):
    method __iter__ (line 276) | def __iter__(self):
    method save (line 292) | def save(self, data: dict):
    method save_binary (line 295) | def save_binary(self, data: Union[dict, List[dict]]) -> str:
  class _ScikitCompat (line 305) | class _ScikitCompat(ABC):
    method transform (line 311) | def transform(self, X):
    method predict (line 315) | def predict(self, X):
  class Pipeline (line 319) | class Pipeline(_ScikitCompat):
    method __init__ (line 370) | def __init__(
    method save_pretrained (line 402) | def save_pretrained(self, save_directory):
    method transform (line 415) | def transform(self, X):
    method predict (line 421) | def predict(self, X):
    method device_placement (line 428) | def device_placement(self):
    method ensure_tensor_on_device (line 449) | def ensure_tensor_on_device(self, **inputs):
    method _parse_and_tokenize (line 457) | def _parse_and_tokenize(self, *args, pad_to_max_length=True, add_speci...
    method __call__ (line 472) | def __call__(self, *args, **kwargs):
    method _forward (line 476) | def _forward(self, inputs, return_tensors=False):
  class FeatureExtractionPipeline (line 501) | class FeatureExtractionPipeline(Pipeline):
    method __init__ (line 537) | def __init__(
    method __call__ (line 558) | def __call__(self, *args, **kwargs):
  class TextGenerationPipeline (line 562) | class TextGenerationPipeline(Pipeline):
    method __call__ (line 606) | def __call__(
  class TextClassificationPipeline (line 683) | class TextClassificationPipeline(Pipeline):
    method __call__ (line 720) | def __call__(self, *args, **kwargs):
  class FillMaskPipeline (line 726) | class FillMaskPipeline(Pipeline):
    method __init__ (line 764) | def __init__(
    method __call__ (line 788) | def __call__(self, *args, **kwargs):
  class NerPipeline (line 826) | class NerPipeline(Pipeline):
    method __init__ (line 865) | def __init__(
    method __call__ (line 893) | def __call__(self, *args, **kwargs):
    method group_entities (line 973) | def group_entities(self, entities):
  class QuestionAnsweringArgumentHandler (line 993) | class QuestionAnsweringArgumentHandler(ArgumentHandler):
    method __call__ (line 1002) | def __call__(self, *args, **kwargs):
  class QuestionAnsweringPipeline (line 1055) | class QuestionAnsweringPipeline(Pipeline):
    method __init__ (line 1094) | def __init__(
    method create_sample (line 1116) | def create_sample(
    method __call__ (line 1135) | def __call__(self, *args, **kwargs):
    method decode (line 1240) | def decode(self, start: np.ndarray, end: np.ndarray, topk: int, max_an...
    method span_to_answer (line 1280) | def span_to_answer(self, text: str, start: int, end: int):
  class SummarizationPipeline (line 1325) | class SummarizationPipeline(Pipeline):
    method __call__ (line 1373) | def __call__(
  class TranslationPipeline (line 1462) | class TranslationPipeline(Pipeline):
    method __call__ (line 1501) | def __call__(
  function pipeline (line 1677) | def pipeline(

FILE: code/bert-base-count3/pretrain/transformers1/tokenization_albert.py
  class AlbertTokenizer (line 57) | class AlbertTokenizer(PreTrainedTokenizer):
    method __init__ (line 114) | def __init__(
    method vocab_size (line 158) | def vocab_size(self):
    method get_vocab (line 161) | def get_vocab(self):
    method __getstate__ (line 166) | def __getstate__(self):
    method __setstate__ (line 171) | def __setstate__(self, d):
    method preprocess_text (line 184) | def preprocess_text(self, inputs):
    method _tokenize (line 199) | def _tokenize(self, text, sample=False):
    method _convert_token_to_id (line 223) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 227) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 231) | def convert_tokens_to_string(self, tokens):
    method build_inputs_with_special_tokens (line 235) | def build_inputs_with_special_tokens(
    method get_special_tokens_mask (line 261) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 292) | def create_token_type_ids_from_sequences(
    method save_vocabulary (line 323) | def save_vocabulary(self, save_directory):

FILE: code/bert-base-count3/pretrain/transformers1/tokenization_auto.py
  class AutoTokenizer (line 94) | class AutoTokenizer:
    method __init__ (line 122) | def __init__(self):
    method from_pretrained (line 129) | def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwa...

FILE: code/bert-base-count3/pretrain/transformers1/tokenization_bart.py
  class BartTokenizer (line 36) | class BartTokenizer(RobertaTokenizer):
  class MBartTokenizer (line 49) | class MBartTokenizer(XLMRobertaTokenizer):

FILE: code/bert-base-count3/pretrain/transformers1/tokenization_bert.py
  function load_vocab (line 99) | def load_vocab(vocab_file):
  function whitespace_tokenize (line 110) | def whitespace_tokenize(text):
  class BertTokenizer (line 119) | class BertTokenizer(PreTrainedTokenizer):
    method __init__ (line 163) | def __init__(
    method vocab_size (line 201) | def vocab_size(self):
    method get_vocab (line 204) | def get_vocab(self):
    method _tokenize (line 207) | def _tokenize(self, text):
    method _convert_token_to_id (line 217) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 221) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 225) | def convert_tokens_to_string(self, tokens):
    method build_inputs_with_special_tokens (line 230) | def build_inputs_with_special_tokens(
    method get_special_tokens_mask (line 256) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 287) | def create_token_type_ids_from_sequences(
    method save_vocabulary (line 317) | def save_vocabulary(self, vocab_path):
  class BasicTokenizer (line 346) | class BasicTokenizer(object):
    method __init__ (line 349) | def __init__(self, do_lower_case=True, never_split=None, tokenize_chin...
    method tokenize (line 369) | def tokenize(self, text, never_split=None):
    method _run_strip_accents (line 400) | def _run_strip_accents(self, text):
    method _run_split_on_punc (line 411) | def _run_split_on_punc(self, text, never_split=None):
    method _tokenize_chinese_chars (line 433) | def _tokenize_chinese_chars(self, text):
    method _is_chinese_char (line 446) | def _is_chinese_char(self, cp):
    method _clean_text (line 470) | def _clean_text(self, text):
  class WordpieceTokenizer (line 484) | class WordpieceTokenizer(object):
    method __init__ (line 487) | def __init__(self, vocab, unk_token, max_input_chars_per_word=100):
    method tokenize (line 492) | def tokenize(self, text):
  function _is_whitespace (line 544) | def _is_whitespace(char):
  function _is_control (line 556) | def _is_control(char):
  function _is_punctuation (line 568) | def _is_punctuation(char):
  class BertTokenizerFast (line 583) | class BertTokenizerFast(PreTrainedTokenizerFast):
    method __init__ (line 631) | def __init__(
    method build_inputs_with_special_tokens (line 668) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No...
    method create_token_type_ids_from_sequences (line 676) | def create_token_type_ids_from_sequences(

FILE: code/bert-base-count3/pretrain/transformers1/tokenization_bert_japanese.py
  class BertJapaneseTokenizer (line 71) | class BertJapaneseTokenizer(BertTokenizer):
    method __init__ (line 79) | def __init__(
    method _tokenize (line 153) | def _tokenize(self, text):
  class MecabTokenizer (line 167) | class MecabTokenizer:
    method __init__ (line 170) | def __init__(self, do_lower_case=False, never_split=None, normalize_te...
    method tokenize (line 192) | def tokenize(self, text, never_split=None, **kwargs):
  class CharacterTokenizer (line 219) | class CharacterTokenizer(object):
    method __init__ (line 222) | def __init__(self, vocab, unk_token, normalize_text=True):
    method tokenize (line 237) | def tokenize(self, text):

FILE: code/bert-base-count3/pretrain/transformers1/tokenization_camembert.py
  class CamembertTokenizer (line 51) | class CamembertTokenizer(PreTrainedTokenizer):
    method __init__ (line 107) | def __init__(
    method build_inputs_with_special_tokens (line 142) | def build_inputs_with_special_tokens(
    method get_special_tokens_mask (line 169) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 199) | def create_token_type_ids_from_sequences(
    method vocab_size (line 224) | def vocab_size(self):
    method _tokenize (line 227) | def _tokenize(self, text):
    method _convert_token_to_id (line 230) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 239) | def _convert_id_to_token(self, index):
    method __getstate__ (line 245) | def __getstate__(self):
    method __setstate__ (line 250) | def __setstate__(self, d):
    method convert_tokens_to_string (line 263) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 268) | def save_vocabulary(self, save_directory):

FILE: code/bert-base-count3/pretrain/transformers1/tokenization_ctrl.py
  function get_pairs (line 102) | def get_pairs(word):
  class CTRLTokenizer (line 117) | class CTRLTokenizer(PreTrainedTokenizer):
    method __init__ (line 141) | def __init__(self, vocab_file, merges_file, unk_token="<unk>", **kwargs):
    method vocab_size (line 154) | def vocab_size(self):
    method get_vocab (line 157) | def get_vocab(self):
    method bpe (line 160) | def bpe(self, token):
    method _tokenize (line 204) | def _tokenize(self, text):
    method _convert_token_to_id (line 215) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 219) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 223) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 228) | def save_vocabulary(self, save_directory):

FILE: code/bert-base-count3/pretrain/transformers1/tokenization_distilbert.py
  class DistilBertTokenizer (line 58) | class DistilBertTokenizer(BertTokenizer):
  class DistilBertTokenizerFast (line 76) | class DistilBertTokenizerFast(BertTokenizerFast):

FILE: code/bert-base-count3/pretrain/transformers1/tokenization_electra.py
  class ElectraTokenizer (line 52) | class ElectraTokenizer(BertTokenizer):
  class ElectraTokenizerFast (line 68) | class ElectraTokenizerFast(BertTokenizerFast):

FILE: code/bert-base-count3/pretrain/transformers1/tokenization_flaubert.py
  function convert_to_unicode (line 63) | def convert_to_unicode(text):
  class FlaubertTokenizer (line 79) | class FlaubertTokenizer(XLMTokenizer):
    method __init__ (line 98) | def __init__(self, do_lowercase=False, **kwargs):
    method preprocess_text (line 103) | def preprocess_text(self, text):
    method _tokenize (line 113) | def _tokenize(self, text, bypass_tokenizer=False):

FILE: code/bert-base-count3/pretrain/transformers1/tokenization_gpt2.py
  function bytes_to_unicode (line 63) | def bytes_to_unicode():
  function get_pairs (line 88) | def get_pairs(word):
  class GPT2Tokenizer (line 101) | class GPT2Tokenizer(PreTrainedTokenizer):
    method __init__ (line 139) | def __init__(
    method vocab_size (line 167) | def vocab_size(self):
    method get_vocab (line 170) | def get_vocab(self):
    method bpe (line 173) | def bpe(self, token):
    method _tokenize (line 215) | def _tokenize(self, text):
    method _convert_token_to_id (line 225) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 229) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 233) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 239) | def save_vocabulary(self, save_directory):
    method prepare_for_tokenization (line 274) | def prepare_for_tokenization(self, text, **kwargs):
  class GPT2TokenizerFast (line 280) | class GPT2TokenizerFast(PreTrainedTokenizerFast):
    method __init__ (line 326) | def __init__(

FILE: code/bert-base-count3/pretrain/transformers1/tokenization_longformer.py
  class LongformerTokenizer (line 45) | class LongformerTokenizer(RobertaTokenizer):
  class LongformerTokenizerFast (line 54) | class LongformerTokenizerFast(RobertaTokenizerFast):

FILE: code/bert-base-count3/pretrain/transformers1/tokenization_marian.py
  class MarianTokenizer (line 28) | class MarianTokenizer(PreTrainedTokenizer):
    method __init__ (line 49) | def __init__(
    method _setup_normalizer (line 91) | def _setup_normalizer(self):
    method normalize (line 100) | def normalize(self, x: str) -> str:
    method _convert_token_to_id (line 104) | def _convert_token_to_id(self, token):
    method remove_language_code (line 107) | def remove_language_code(self, text: str):
    method _tokenize (line 113) | def _tokenize(self, text: str) -> List[str]:
    method _convert_id_to_token (line 118) | def _convert_id_to_token(self, index: int) -> str:
    method convert_tokens_to_string (line 122) | def convert_tokens_to_string(self, tokens: List[str]) -> str:
    method build_inputs_with_special_tokens (line 126) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No...
    method prepare_translation_batch (line 133) | def prepare_translation_batch(
    method vocab_size (line 182) | def vocab_size(self) -> int:
    method save_vocabulary (line 185) | def save_vocabulary(self, save_directory: str) -> Tuple[str]:
    method get_vocab (line 197) | def get_vocab(self) -> Dict:
    method __getstate__ (line 202) | def __getstate__(self) -> Dict:
    method __setstate__ (line 207) | def __setstate__(self, d: Dict) -> None:
    method num_special_tokens_to_add (line 213) | def num_special_tokens_to_add(self, **unused):
    method _special_token_mask (line 217) | def _special_token_mask(self, seq):
    method get_special_tokens_mask (line 222) | def get_special_tokens_mask(
  function load_spm (line 234) | def load_spm(path: str) -> sentencepiece.SentencePieceProcessor:
  function save_json (line 240) | def save_json(data, path: str) -> None:
  function load_json (line 245) | def load_json(path: str) -> Union[Dict, List]:

FILE: code/bert-base-count3/pretrain/transformers1/tokenization_openai.py
  function get_pairs (line 46) | def get_pairs(word):
  function text_standardize (line 59) | def text_standardize(text):
  class OpenAIGPTTokenizer (line 75) | class OpenAIGPTTokenizer(PreTrainedTokenizer):
    method __init__ (line 99) | def __init__(self, vocab_file, merges_file, unk_token="<unk>", **kwargs):
    method vocab_size (line 124) | def vocab_size(self):
    method get_vocab (line 127) | def get_vocab(self):
    method bpe (line 130) | def bpe(self, token):
    method _tokenize (line 174) | def _tokenize(self, text):
    method _convert_token_to_id (line 189) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 193) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 197) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 202) | def save_vocabulary(self, save_directory):
  class OpenAIGPTTokenizerFast (line 238) | class OpenAIGPTTokenizerFast(PreTrainedTokenizerFast):
    method __init__ (line 264) | def __init__(self, vocab_file, merges_file, unk_token="<unk>", **kwargs):

FILE: code/bert-base-count3/pretrain/transformers1/tokenization_reformer.py
  class ReformerTokenizer (line 54) | class ReformerTokenizer(PreTrainedTokenizer):
    method __init__ (line 85) | def __init__(
    method vocab_size (line 117) | def vocab_size(self):
    method get_vocab (line 120) | def get_vocab(self):
    method __getstate__ (line 125) | def __getstate__(self):
    method __setstate__ (line 130) | def __setstate__(self, d):
    method _tokenize (line 143) | def _tokenize(self, text, sample=False):
    method _convert_token_to_id (line 152) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 156) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 162) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 167) | def save_vocabulary(self, save_directory):

FILE: code/bert-base-count3/pretrain/transformers1/tokenization_roberta.py
  class RobertaTokenizer (line 64) | class RobertaTokenizer(GPT2Tokenizer):
    method __init__ (line 126) | def __init__(
    method build_inputs_with_special_tokens (line 154) | def build_inputs_with_special_tokens(
    method get_special_tokens_mask (line 180) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 210) | def create_token_type_ids_from_sequences(
    method prepare_for_tokenization (line 234) | def prepare_for_tokenization(self, text, add_special_tokens=False, **k...
  class RobertaTokenizerFast (line 244) | class RobertaTokenizerFast(GPT2TokenizerFast):
    method __init__ (line 291) | def __init__(
    method mask_token (line 333) | def mask_token(self, value):
    method build_inputs_with_special_tokens (line 340) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No...
    method create_token_type_ids_from_sequences (line 347) | def create_token_type_ids_from_sequences(

FILE: code/bert-base-count3/pretrain/transformers1/tokenization_t5.py
  class T5Tokenizer (line 62) | class T5Tokenizer(PreTrainedTokenizer):
    method __init__ (line 98) | def __init__(
    method vocab_size (line 139) | def vocab_size(self):
    method get_vocab (line 142) | def get_vocab(self):
    method __getstate__ (line 147) | def __getstate__(self):
    method __setstate__ (line 152) | def __setstate__(self, d):
    method _tokenize (line 165) | def _tokenize(self, text, sample=False):
    method _convert_token_to_id (line 174) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 182) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 190) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 195) | def save_vocabulary(self, save_directory):

FILE: code/bert-base-count3/pretrain/transformers1/tokenization_transfo_xl.py
  class TransfoXLTokenizer (line 72) | class TransfoXLTokenizer(PreTrainedTokenizer):
    method __init__ (line 85) | def __init__(
    method _compile_space_around_punctuation_pattern (line 141) | def _compile_space_around_punctuation_pattern(self):
    method count_file (line 146) | def count_file(self, path, verbose=False, add_eos=False):
    method count_sents (line 162) | def count_sents(self, sents, verbose=False):
    method _build_from_file (line 173) | def _build_from_file(self, vocab_file):
    method save_vocabulary (line 188) | def save_vocabulary(self, vocab_path):
    method build_vocab (line 212) | def build_vocab(self):
    method encode_file (line 232) | def encode_file(self, path, ordered=False, verbose=False, add_eos=True...
    method encode_sents (line 249) | def encode_sents(self, sents, ordered=False, verbose=False):
    method add_special (line 263) | def add_special(self, sym):
    method add_symbol (line 269) | def add_symbol(self, sym):
    method _convert_id_to_token (line 274) | def _convert_id_to_token(self, idx):
    method _convert_token_to_id (line 279) | def _convert_token_to_id(self, sym):
    method convert_tokens_to_string (line 296) | def convert_tokens_to_string(self, tokens):
    method convert_to_tensor (line 301) | def convert_to_tensor(self, symbols):
    method vocab_size (line 305) | def vocab_size(self):
    method get_vocab (line 308) | def get_vocab(self):
    method _tokenize (line 311) | def _tokenize(self, line, add_eos=False, add_double_eos=False):
    method prepare_for_tokenization (line 330) | def prepare_for_tokenization(self, text, **kwargs):
  class _TransfoXLDelimiterLookupTokenizer (line 344) | class _TransfoXLDelimiterLookupTokenizer(BaseTokenizer):
    method __init__ (line 345) | def __init__(
  class TransfoXLTokenizerFast (line 405) | class TransfoXLTokenizerFast(PreTrainedTokenizerFast):
    method __init__ (line 422) | def __init__(
    method save_pretrained (line 458) | def save_pretrained(self, save_directory):
  class LMOrderedIterator (line 467) | class LMOrderedIterator(object):
    method __init__ (line 468) | def __init__(self, data, bsz, bptt, device="cpu", ext_len=None):
    method get_batch (line 490) | def get_batch(self, i, bptt=None):
    method get_fixlen_iter (line 506) | def get_fixlen_iter(self, start=0):
    method get_varlen_iter (line 510) | def get_varlen_iter(self, start=0, std=5, min_len=5, max_deviation=3):
    method __iter__ (line 522) | def __iter__(self):
  class LMShuffledIterator (line 526) | class LMShuffledIterator(object):
    method __init__ (line 527) | def __init__(self, data, bsz, bptt, device="cpu", ext_len=None, shuffl...
    method get_sent_stream (line 540) | def get_sent_stream(self):
    method stream_iterator (line 548) | def stream_iterator(self, sent_stream):
    method __iter__ (line 595) | def __iter__(self):
  class LMMultiFileIterator (line 603) | class LMMultiFileIterator(LMShuffledIterator):
    method __init__ (line 604) | def __init__(self, paths, vocab, bsz, bptt, device="cpu", ext_len=None...
    method get_sent_stream (line 616) | def get_sent_stream(self, path):
    method __iter__ (line 624) | def __iter__(self):
  class TransfoXLCorpus (line 635) | class TransfoXLCorpus(object):
    method from_pretrained (line 637) | def from_pretrained(cls, pretrained_model_name_or_path, cache_dir=None...
    method __init__ (line 680) | def __init__(self, *args, **kwargs):
    method build_corpus (line 687) | def build_corpus(self, path, dataset):
    method get_iterator (line 721) | def get_iterator(self, split, *args, **kwargs):
  function get_lm_corpus (line 738) | def get_lm_corpus(datadir, dataset):

FILE: code/bert-base-count3/pretrain/transformers1/tokenization_utils.py
  class CharSpan (line 61) | class CharSpan(NamedTuple):
  class TokenSpan (line 73) | class TokenSpan(NamedTuple):
  function flatten (line 85) | def flatten(x: Sequence):
  function truncate_and_pad (line 100) | def truncate_and_pad(
  class BatchEncoding (line 164) | class BatchEncoding(UserDict):
    method __init__ (line 177) | def __init__(
    method __getitem__ (line 189) | def __getitem__(self, item: Union[int, str]) -> EncodingFast:
    method __getattr__ (line 203) | def __getattr__(self, item: str):
    method keys (line 206) | def keys(self):
    method values (line 209) | def values(self):
    method items (line 212) | def items(self):
    method encodings (line 220) | def encodings(self) -> Optional[List[EncodingFast]]:
    method tokens (line 228) | def tokens(self, batch_index: int = 0) -> List[int]:
    method words (line 233) | def words(self, batch_index: int = 0) -> List[Optional[int]]:
    method token_to_word (line 238) | def token_to_word(self, batch_or_token_index: int, token_index: Option...
    method word_to_tokens (line 277) | def word_to_tokens(self, batch_or_word_index: int, word_index: Optiona...
    method token_to_chars (line 322) | def token_to_chars(self, batch_or_token_index: int, token_index: Optio...
    method char_to_token (line 359) | def char_to_token(self, batch_or_char_index: int, char_index: Optional...
    method word_to_chars (line 394) | def word_to_chars(self, batch_or_word_index: int, word_index: Optional...
    method char_to_word (line 431) | def char_to_word(self, batch_or_char_index: int, char_index: Optional[...
    method to (line 467) | def to(self, device: str):
  class SpecialTokensMixin (line 473) | class SpecialTokensMixin:
    method __init__ (line 491) | def __init__(self, **kwargs):
    method bos_token (line 517) | def bos_token(self):
    method eos_token (line 524) | def eos_token(self):
    method unk_token (line 531) | def unk_token(self):
    method sep_token (line 538) | def sep_token(self):
    method pad_token (line 545) | def pad_token(self):
    method cls_token (line 552) | def cls_token(self):
    method mask_token (line 559) | def mask_token(self):
    method additional_special_tokens (line 566) | def additional_special_tokens(self):
    method _maybe_update_backend (line 572) | def _maybe_update_backend(self, value):
    method bos_token (line 577) | def bos_token(self, value):
    method eos_token (line 582) | def eos_token(self, value):
    method unk_token (line 587) | def unk_token(self, value):
    method sep_token (line 592) | def sep_token(self, value):
    method pad_token (line 597) | def pad_token(self, value):
    method cls_token (line 602) | def cls_token(self, value):
    method mask_token (line 607) | def mask_token(self, value):
    method additional_special_tokens (line 612) | def additional_special_tokens(self, value):
    method bos_token_id (line 617) | def bos_token_id(self):
    method eos_token_id (line 622) | def eos_token_id(self):
    method unk_token_id (line 627) | def unk_token_id(self):
    method sep_token_id (line 632) | def sep_token_id(self):
    method pad_token_id (line 637) | def pad_token_id(self):
    method pad_token_type_id (line 642) | def pad_token_type_id(self):
    method cls_token_id (line 647) | def cls_token_id(self):
    method mask_token_id (line 652) | def mask_token_id(self):
    method additional_special_tokens_ids (line 657) | def additional_special_tokens_ids(self):
    method special_tokens_map (line 662) | def special_tokens_map(self):
    method all_special_tokens (line 674) | def all_special_tokens(self):
    method all_special_ids (line 686) | def all_special_ids(self):
  class PreTrainedTokenizer (line 695) | class PreTrainedTokenizer(SpecialTokensMixin):
    method vocab_size (line 771) | def vocab_size(self) -> int:
    method is_fast (line 776) | def is_fast(self) -> bool:
    method max_len (line 780) | def max_len(self) -> int:
    method max_len_single_sentence (line 787) | def max_len_single_sentence(self) -> int:
    method max_len_sentences_pair (line 791) | def max_len_sentences_pair(self) -> int:
    method max_len_single_sentence (line 795) | def max_len_single_sentence(self, value) -> int:
    method max_len_sentences_pair (line 807) | def max_len_sentences_pair(self, value) -> int:
    method get_vocab (line 818) | def get_vocab(self):
    method __init__ (line 822) | def __init__(self, model_max_length=None, **kwargs):
    method __len__ (line 854) | def __len__(self):
    method from_pretrained (line 859) | def from_pretrained(cls, *inputs, **kwargs):
    method _from_pretrained (line 914) | def _from_pretrained(cls, pretrained_model_name_or_path, *init_inputs,...
    method save_pretrained (line 1087) | def save_pretrained(self, save_directory):
    method save_vocabulary (line 1128) | def save_vocabulary(self, save_directory) -> Tuple[str]:
    method add_tokens (line 1138) | def add_tokens(self, new_tokens: Union[str, List[str]]) -> int:
    method num_special_tokens_to_add (line 1187) | def num_special_tokens_to_add(self, pair=False):
    method add_special_tokens (line 1206) | def add_special_tokens(self, special_tokens_dict):
    method tokenize (line 1260) | def tokenize(self, text: TextInput, **kwargs):
    method _tokenize (line 1332) | def _tokenize(self, text, **kwargs):
    method convert_tokens_to_ids (line 1341) | def convert_tokens_to_ids(self, tokens):
    method _convert_token_to_id_with_added_voc (line 1356) | def _convert_token_to_id_with_added_voc(self, token):
    method _convert_token_to_id (line 1364) | def _convert_token_to_id(self, token):
    method encode (line 1367) | def encode(
    method encode_plus (line 1439) | def encode_plus(
    method batch_encode_plus (line 1594) | def batch_encode_plus(
    method convert_to_tensors_ (line 1789) | def convert_to_tensors_(self, batch_outputs: dict, return_tensors: str...
    method prepare_for_model (line 1818) | def prepare_for_model(
    method prepare_for_tokenization (line 2018) | def prepare_for_tokenization(self, text: str, **kwargs) -> str:
    method truncate_sequences (line 2022) | def truncate_sequences(
    method create_token_type_ids_from_sequences (line 2082) | def create_token_type_ids_from_sequences(self, token_ids_0: List, toke...
    method build_inputs_with_special_tokens (line 2087) | def build_inputs_with_special_tokens(self, token_ids_0: List, token_id...
    method get_special_tokens_mask (line 2096) | def get_special_tokens_mask(
    method convert_ids_to_tokens (line 2115) | def convert_ids_to_tokens(
    method _convert_id_to_token (line 2140) | def _convert_id_to_token(self, index: int) -> str:
    method convert_tokens_to_string (line 2143) | def convert_tokens_to_string(self, tokens: List[str]) -> str:
    method decode (line 2150) | def decode(
    method batch_decode (line 2190) | def batch_decode(self, sequences: List[List[int]], **kwargs) -> List[s...
    method clean_up_tokenization (line 2194) | def clean_up_tokenization(out_string: str) -> str:
  class PreTrainedTokenizerFast (line 2212) | class PreTrainedTokenizerFast(PreTrainedTokenizer):
    method __init__ (line 2270) | def __init__(self, tokenizer: BaseTokenizerFast, **kwargs):
    method backend_tokenizer (line 2281) | def backend_tokenizer(self) -> BaseTokenizerFast:
    method decoder (line 2285) | def decoder(self) -> DecoderFast:
    method is_fast (line 2289) | def is_fast(self) -> bool:
    method vocab_size (line 2293) | def vocab_size(self) -> int:
    method __len__ (line 2296) | def __len__(self) -> int:
    method _maybe_update_backend (line 2299) | def _maybe_update_backend(self, value):
    method _convert_encoding (line 2304) | def _convert_encoding(
    method _convert_token_to_id_with_added_voc (line 2360) | def _convert_token_to_id_with_added_voc(self, token: int) -> str:
    method _convert_id_to_token (line 2366) | def _convert_id_to_token(self, index: int) -> Optional[str]:
    method get_vocab (line 2369) | def get_vocab(self):
    method convert_tokens_to_string (line 2372) | def convert_tokens_to_string(self, tokens: List[int], skip_special_tok...
    method add_tokens (line 2375) | def add_tokens(self, new_tokens: List[Union[str, AddedTokenFast]]) -> ...
    method add_special_tokens (line 2402) | def add_special_tokens(self, special_tokens_dict: dict) -> int:
    method num_special_tokens_to_add (line 2421) | def num_special_tokens_to_add(self, pair: bool = False) -> int:
    method tokenize (line 2424) | def tokenize(
    method batch_encode_plus (line 2429) | def batch_encode_plus(
    method encode_plus (line 2567) | def encode_plus(
    method decode (line 2659) | def decode(
    method save_vocabulary (line 2670) | def save_vocabulary(self, save_directory: str) -> Tuple[str]:
  function trim_batch (line 2680) | def trim_batch(

FILE: code/bert-base-count3/pretrain/transformers1/tokenization_xlm.py
  function get_pairs (line 430) | def get_pairs(word):
  function lowercase_and_remove_accent (line 443) | def lowercase_and_remove_accent(text):
  function replace_unicode_punct (line 460) | def replace_unicode_punct(text):
  function remove_non_printing_char (line 503) | def remove_non_printing_char(text):
  function romanian_preprocessing (line 516) | def romanian_preprocessing(text):
  class XLMTokenizer (line 530) | class XLMTokenizer(PreTrainedTokenizer):
    method __init__ (line 594) | def __init__(
    method moses_punct_norm (line 656) | def moses_punct_norm(self, text, lang):
    method moses_tokenize (line 664) | def moses_tokenize(self, text, lang):
    method moses_pipeline (line 672) | def moses_pipeline(self, text, lang):
    method ja_tokenize (line 678) | def ja_tokenize(self, text):
    method vocab_size (line 699) | def vocab_size(self):
    method get_vocab (line 702) | def get_vocab(self):
    method bpe (line 705) | def bpe(self, token):
    method _tokenize (line 749) | def _tokenize(self, text, lang="en", bypass_tokenizer=False):
    method _convert_token_to_id (line 839) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 843) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 847) | def convert_tokens_to_string(self, tokens):
    method build_inputs_with_special_tokens (line 852) | def build_inputs_with_special_tokens(
    method get_special_tokens_mask (line 880) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 911) | def create_token_type_ids_from_sequences(
    method save_vocabulary (line 941) | def save_vocabulary(self, save_directory):

FILE: code/bert-base-count3/pretrain/transformers1/tokenization_xlm_roberta.py
  class XLMRobertaTokenizer (line 52) | class XLMRobertaTokenizer(PreTrainedTokenizer):
    method __init__ (line 108) | def __init__(
    method __getstate__ (line 159) | def __getstate__(self):
    method __setstate__ (line 164) | def __setstate__(self, d):
    method build_inputs_with_special_tokens (line 177) | def build_inputs_with_special_tokens(
    method get_special_tokens_mask (line 204) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 235) | def create_token_type_ids_from_sequences(
    method vocab_size (line 261) | def vocab_size(self):
    method get_vocab (line 264) | def get_vocab(self):
    method _tokenize (line 269) | def _tokenize(self, text):
    method _convert_token_to_id (line 272) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 281) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 287) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 292) | def save_vocabulary(self, save_directory):

FILE: code/bert-base-count3/pretrain/transformers1/tokenization_xlnet.py
  class XLNetTokenizer (line 53) | class XLNetTokenizer(PreTrainedTokenizer):
    method __init__ (line 113) | def __init__(
    method vocab_size (line 161) | def vocab_size(self):
    method get_vocab (line 164) | def get_vocab(self):
    method __getstate__ (line 169) | def __getstate__(self):
    method __setstate__ (line 174) | def __setstate__(self, d):
    method preprocess_text (line 187) | def preprocess_text(self, inputs):
    method _tokenize (line 202) | def _tokenize(self, text, sample=False):
    method _convert_token_to_id (line 226) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 230) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 234) | def convert_tokens_to_string(self, tokens):
    method build_inputs_with_special_tokens (line 239) | def build_inputs_with_special_tokens(
    method get_special_tokens_mask (line 265) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 296) | def create_token_type_ids_from_sequences(
    method save_vocabulary (line 324) | def save_vocabulary(self, save_directory):

FILE: code/bert-base-count3/pretrain/transformers1/trainer.py
  function is_apex_available (line 38) | def is_apex_available():
  function is_tensorboard_available (line 60) | def is_tensorboard_available():
  function is_wandb_available (line 77) | def is_wandb_available():
  function set_seed (line 84) | def set_seed(seed: int):
  function torch_distributed_zero_first (line 93) | def torch_distributed_zero_first(local_rank: int):
  class SequentialDistributedSampler (line 104) | class SequentialDistributedSampler(Sampler):
    method __init__ (line 116) | def __init__(self, dataset, num_replicas=None, rank=None):
    method __iter__ (line 131) | def __iter__(self):
    method __len__ (line 144) | def __len__(self):
  function get_tpu_sampler (line 148) | def get_tpu_sampler(dataset: Dataset):
  class Trainer (line 154) | class Trainer:
    method __init__ (line 171) | def __init__(
    method get_test_dataloader (line 222) | def get_test_dataloader(self, test_dataset: Dataset) -> DataLoader:
    method get_optimizers (line 242) | def get_optimizers(
    method _setup_wandb (line 273) | def _setup_wandb(self):
    method num_examples (line 297) | def num_examples(self, dataloader: DataLoader) -> int:
    method train (line 303) | def train(self, model_path: Optional[str] = None):
    method _log (line 510) | def _log(self, logs: Dict[str, float], iterator: Optional[tqdm] = None...
    method _training_step (line 524) | def _training_step(
    method is_local_master (line 547) | def is_local_master(self) -> bool:
    method is_world_master (line 553) | def is_world_master(self) -> bool:
    method save_model (line 563) | def save_model(self, output_dir: Optional[str] = None):
    method _save_tpu (line 576) | def _save_tpu(self, output_dir: Optional[str] = None):
    method _save (line 592) | def _save(self, output_dir: Optional[str] = None):
    method _sorted_checkpoints (line 605) | def _sorted_checkpoints(self, checkpoint_prefix=PREFIX_CHECKPOINT_DIR,...
    method _rotate_checkpoints (line 622) | def _rotate_checkpoints(self, use_mtime=False) -> None:
    method evaluate (line 641) | def evaluate(
    method predict (line 670) | def predict(self, test_dataset: Dataset) -> PredictionOutput:
    method _prediction_loop (line 681) | def _prediction_loop(
    method distributed_concat (line 771) | def distributed_concat(self, tensor: torch.Tensor, num_total_examples:...

FILE: code/bert-base-count3/pretrain/transformers1/trainer_tf.py
  class TFTrainer (line 20) | class TFTrainer:
    method __init__ (line 31) | def __init__(
    method _setup_training (line 50) | def _setup_training(self) -> None:
    method _set_loss_and_metric (line 67) | def _set_loss_and_metric(self) -> None:
    method _create_summary_writer (line 84) | def _create_summary_writer(self) -> None:
    method _prepare_dataset (line 90) | def _prepare_dataset(self) -> None:
    method _create_optimizer (line 122) | def _create_optimizer(self) -> None:
    method _create_checkpoint_manager (line 146) | def _create_checkpoint_manager(self, max_to_keep: int = 5, load_model:...
    method _evaluate_steps (line 162) | def _evaluate_steps(self, per_replica_features, per_replica_labels):
    method _prediction_loop (line 182) | def _prediction_loop(
    method evaluate (line 237) | def evaluate(
    method train (line 250) | def train(self) -> None:
    method _training_steps (line 317) | def _training_steps(self):
    method _apply_gradients (line 327) | def _apply_gradients(self):
    method _step (line 331) | def _step(self):
    method _accumulate_next_gradients (line 342) | def _accumulate_next_gradients(self):
    method _accumulate_gradients (line 358) | def _accumulate_gradients(self, per_replica_features, per_replica_labe...
    method _forward (line 371) | def _forward(self, features, labels):
    method _run_model (line 383) | def _run_model(self, features, labels, training):
    method predict (line 412) | def predict(self, test_dataset: tf.data.Dataset) -> PredictionOutput:
    method save_model (line 426) | def save_model(self) -> None:

FILE: code/bert-base-count3/pretrain/transformers1/trainer_utils.py
  class EvalPrediction (line 6) | class EvalPrediction(NamedTuple):
  class PredictionOutput (line 16) | class PredictionOutput(NamedTuple):
  class TrainOutput (line 22) | class TrainOutput(NamedTuple):

FILE: code/bert-base-count3/pretrain/transformers1/training_args.py
  function is_tpu_available (line 23) | def is_tpu_available():
  class TrainingArguments (line 31) | class TrainingArguments:
    method train_batch_size (line 138) | def train_batch_size(self) -> int:
    method eval_batch_size (line 148) | def eval_batch_size(self) -> int:
    method _setup_devices (line 159) | def _setup_devices(self) -> Tuple["torch.device", int]:
    method device (line 182) | def device(self) -> "torch.device":
    method n_gpu (line 187) | def n_gpu(self):
    method to_json_string (line 190) | def to_json_string(self):
    method to_sanitized_dict (line 196) | def to_sanitized_dict(self) -> Dict[str, Any]:

FILE: code/bert-base-count3/pretrain/transformers1/training_args_tf.py
  class TFTrainingArguments (line 16) | class TFTrainingArguments(TrainingArguments):
    method _setup_strategy (line 46) | def _setup_strategy(self) -> Tuple["tf.distribute.Strategy", int]:
    method strategy (line 80) | def strategy(self) -> "tf.distribute.Strategy":
    method n_gpu (line 85) | def n_gpu(self) -> int:

FILE: code/bert-base-count3/pretrain/transformers1/utils_encoder_decoder.py
  function prepare_encoder_decoder_model_kwargs (line 18) | def prepare_encoder_decoder_model_kwargs(**kwargs):

FILE: code/bert-base-count5-len32/finetuning/NEZHA/configuration_nezha.py
  class NeZhaConfig (line 6) | class NeZhaConfig(PretrainedConfig):
    method __init__ (line 82) | def __init__(

FILE: code/bert-base-count5-len32/finetuning/NEZHA/modeling_nezha.py
  function load_tf_weights_in_nezha (line 33) | def load_tf_weights_in_nezha(model, config, tf_checkpoint_path):
  class NeZhaEmbeddings (line 108) | class NeZhaEmbeddings(nn.Module):
    method __init__ (line 113) | def __init__(self, config):
    method forward (line 123) | def forward(self, input_ids=None, token_type_ids=None, inputs_embeds=N...
  function relative_position_encoding (line 140) | def relative_position_encoding(depth, max_length=512, max_relative_posit...
  class NeZhaSelfAttention (line 165) | class NeZhaSelfAttention(nn.Module):
    method __init__ (line 166) | def __init__(self, config):
    method transpose_for_scores (line 188) | def transpose_for_scores(self, x):
    method forward (line 193) | def forward(
  class NeZhaAttention (line 270) | class NeZhaAttention(nn.Module):
    method __init__ (line 271) | def __init__(self, config):
    method prune_heads (line 277) | def prune_heads(self, heads):
    method forward (line 298) | def forward(
  class NeZhaLayer (line 314) | class NeZhaLayer(nn.Module):
    method __init__ (line 315) | def __init__(self, config):
    method forward (line 324) | def forward(
  class NeZhaEncoder (line 349) | class NeZhaEncoder(nn.Module):
    method __init__ (line 350) | def __init__(self, config):
    method forward (line 357) | def forward(
  class NeZhaPreTrainedModel (line 388) | class NeZhaPreTrainedModel(PreTrainedModel):
    method _init_weights (line 397) | def _init_weights(self, module):
  class NeZhaModel (line 414) | class NeZhaModel(NeZhaPreTrainedModel):
    method __init__ (line 430) | def __init__(self, config):
    method get_input_embeddings (line 438) | def get_input_embeddings(self):
    method set_input_embeddings (line 441) | def set_input_embeddings(self, value):
    method _prune_heads (line 444) | def _prune_heads(self, heads_to_prune):
    method forward (line 453) | def forward(
  class NeZhaForPreTraining (line 569) | class NeZhaForPreTraining(NeZhaPreTrainedModel):
    method __init__ (line 570) | def __init__(self, config):
    method get_output_embeddings (line 576) | def get_output_embeddings(self):
    method forward (line 580) | def forward(
  class NeZhaForMaskedLM (line 664) | class NeZhaForMaskedLM(NeZhaPreTrainedModel):
    method __init__ (line 665) | def __init__(self, config):
    method get_output_embeddings (line 671) | def get_output_embeddings(self):
    method forward (line 675) | def forward(
    method prepare_inputs_for_generation (line 760) | def prepare_inputs_for_generation(self, input_ids, attention_mask=None...
  class NeZhaForNextSentencePrediction (line 786) | class NeZhaForNextSentencePrediction(NeZhaPreTrainedModel):
    method __init__ (line 787) | def __init__(self, config):
    method forward (line 794) | def forward(
  class NeZhaForSequenceClassification (line 868) | class NeZhaForSequenceClassification(NeZhaPreTrainedModel):
    method __init__ (line 869) | def __init__(self, config):
    method forward (line 878) | def forward(
  class NeZhaForMultipleChoice (line 962) | class NeZhaForMultipleChoice(NeZhaPreTrainedModel):
    method __init__ (line 963) | def __init__(self, config):
    method forward (line 971) | def forward(
  class NeZhaForTokenClassification (line 1058) | class NeZhaForTokenClassification(NeZhaPreTrainedModel):
    method __init__ (line 1059) | def __init__(self, config):
    method forward (line 1068) | def forward(
  class NeZhaForQuestionAnswering (line 1153) | class NeZhaForQuestionAnswering(NeZhaPreTrainedModel):
    method __init__ (line 1154) | def __init__(self, config):
    method forward (line 1162) | def forward(

FILE: code/bert-base-count5-len32/finetuning/model.py
  class BertForClass (line 11) | class BertForClass(nn.Module):
    method __init__ (line 12) | def __init__(self, config):
    method forward (line 24) | def forward(self, input_ids, input_masks, segment_ids):
  class BertForClass_MultiDropout (line 37) | class BertForClass_MultiDropout(nn.Module):
    method __init__ (line 38) | def __init__(self, config):
    method forward (line 50) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastTwoCls (line 63) | class BertLastTwoCls(nn.Module):
    method __init__ (line 64) | def __init__(self, config):
    method forward (line 75) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastCls (line 83) | class BertLastCls(nn.Module):
    method __init__ (line 84) | def __init__(self, config):
    method forward (line 95) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastTwoClsPooler (line 108) | class BertLastTwoClsPooler(nn.Module):
    method __init__ (line 109) | def __init__(self, config):
    method forward (line 120) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastTwoEmbeddings (line 132) | class BertLastTwoEmbeddings(nn.Module):
    method __init__ (line 133) | def __init__(self, config):
    method forward (line 144) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastTwoEmbeddingsPooler (line 160) | class BertLastTwoEmbeddingsPooler(nn.Module):
    method __init__ (line 161) | def __init__(self, config):
    method forward (line 172) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastFourCls (line 187) | class BertLastFourCls(nn.Module):
    method __init__ (line 188) | def __init__(self, config):
    method forward (line 199) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastFourClsPooler (line 215) | class BertLastFourClsPooler(nn.Module):
    method __init__ (line 216) | def __init__(self, config):
    method forward (line 227) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastFourEmbeddings (line 239) | class BertLastFourEmbeddings(nn.Module):
    method __init__ (line 240) | def __init__(self, config):
    method forward (line 251) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastFourEmbeddingsPooler (line 268) | class BertLastFourEmbeddingsPooler(nn.Module):
    method __init__ (line 269) | def __init__(self, config):
    method forward (line 280) | def forward(self, input_ids, input_masks, segment_ids):
  class BertDynCls (line 296) | class BertDynCls(nn.Module):
    method __init__ (line 297) | def __init__(self, config):
    method forward (line 311) | def forward(self, input_ids, input_masks, segment_ids):
  class BertDynEmbeddings (line 343) | class BertDynEmbeddings(nn.Module):
    method __init__ (line 344) | def __init__(self, config):
    method forward (line 358) | def forward(self, input_ids, input_masks, segment_ids):
  class BertRNN (line 392) | class BertRNN(nn.Module):
    method __init__ (line 394) | def __init__(self, config):
    method forward (line 434) | def forward(self, input_ids, input_masks, segment_ids):
  class BertCNN (line 459) | class BertCNN(nn.Module):
    method __init__ (line 461) | def __init__(self, config):
    method conv_and_pool (line 480) | def conv_and_pool(self, x, conv):
    method forward (line 485) | def forward(self, input_ids, input_masks, segment_ids):
  class BertRCNN (line 497) | class BertRCNN(nn.Module):
    method __init__ (line 498) | def __init__(self, config):
    method forward (line 540) | def forward(self, input_ids, input_masks, segment_ids):
  class XLNet (line 564) | class XLNet(nn.Module):
    method __init__ (line 566) | def __init__(self, config):
    method forward (line 574) | def forward(self, input_ids, input_masks, segment_ids):
  class ElectraClassificationHead (line 584) | class ElectraClassificationHead(nn.Module):
    method __init__ (line 587) | def __init__(self, config):
    method forward (line 593) | def forward(self, features, **kwargs):
  class Electra (line 602) | class Electra(nn.Module):
    method __init__ (line 604) | def __init__(self, config):
    method forward (line 613) | def forward(self, input_ids, input_masks, segment_ids):
  class NEZHA (line 621) | class NEZHA(nn.Module):
    method __init__ (line 622) | def __init__(self, config):
    method forward (line 637) | def forward(self, input_ids, input_masks, segment_ids):

FILE: code/bert-base-count5-len32/finetuning/multi_gpu_QA.py
  class Config (line 46) | class Config:
    method __init__ (line 47) | def __init__(self):

FILE: code/bert-base-count5-len32/finetuning/utils.py
  function paddingList (line 12) | def paddingList(ls:list,val,returnTensor=False):
  function fastTokenizer (line 19) | def fastTokenizer(a:str,b:str,maxLen,tk):
  class data_generator (line 39) | class data_generator:
    method __init__ (line 40) | def __init__(self, data, config, shuffle=False):
    method __len__ (line 53) | def __len__(self):
    method __iter__ (line 56) | def __iter__(self):
  class PGD (line 95) | class PGD():
    method __init__ (line 96) | def __init__(self, model):
    method attack (line 101) | def attack(self, epsilon=0.3, alpha=0.1, emb_name='word_embeddings', i...
    method restore (line 113) | def restore(self, emb_name='word_embeddings'):
    method project (line 121) | def project(self, param_name, param_data, epsilon):
    method backup_grad (line 127) | def backup_grad(self):
    method restore_grad (line 132) | def restore_grad(self):
  class FGM (line 139) | class FGM():
    method __init__ (line 140) | def __init__(self, model):
    method attack (line 144) | def attack(self, epsilon=0.25, emb_name='word_embeddings'):
    method restore (line 154) | def restore(self, emb_name='word_embeddings'):
  class FocalLoss (line 164) | class FocalLoss(nn.Module):
    method __init__ (line 180) | def __init__(self, num_class, alpha=None, gamma=2,
    method forward (line 201) | def forward(self, input, target):
  function f1_match (line 244) | def f1_match(y_true,y_pred):

FILE: code/bert-base-count5/finetuning/NEZHA/configuration_nezha.py
  class NeZhaConfig (line 6) | class NeZhaConfig(PretrainedConfig):
    method __init__ (line 82) | def __init__(

FILE: code/bert-base-count5/finetuning/NEZHA/modeling_nezha.py
  function load_tf_weights_in_nezha (line 33) | def load_tf_weights_in_nezha(model, config, tf_checkpoint_path):
  class NeZhaEmbeddings (line 108) | class NeZhaEmbeddings(nn.Module):
    method __init__ (line 113) | def __init__(self, config):
    method forward (line 123) | def forward(self, input_ids=None, token_type_ids=None, inputs_embeds=N...
  function relative_position_encoding (line 140) | def relative_position_encoding(depth, max_length=512, max_relative_posit...
  class NeZhaSelfAttention (line 165) | class NeZhaSelfAttention(nn.Module):
    method __init__ (line 166) | def __init__(self, config):
    method transpose_for_scores (line 188) | def transpose_for_scores(self, x):
    method forward (line 193) | def forward(
  class NeZhaAttention (line 270) | class NeZhaAttention(nn.Module):
    method __init__ (line 271) | def __init__(self, config):
    method prune_heads (line 277) | def prune_heads(self, heads):
    method forward (line 298) | def forward(
  class NeZhaLayer (line 314) | class NeZhaLayer(nn.Module):
    method __init__ (line 315) | def __init__(self, config):
    method forward (line 324) | def forward(
  class NeZhaEncoder (line 349) | class NeZhaEncoder(nn.Module):
    method __init__ (line 350) | def __init__(self, config):
    method forward (line 357) | def forward(
  class NeZhaPreTrainedModel (line 388) | class NeZhaPreTrainedModel(PreTrainedModel):
    method _init_weights (line 397) | def _init_weights(self, module):
  class NeZhaModel (line 414) | class NeZhaModel(NeZhaPreTrainedModel):
    method __init__ (line 430) | def __init__(self, config):
    method get_input_embeddings (line 438) | def get_input_embeddings(self):
    method set_input_embeddings (line 441) | def set_input_embeddings(self, value):
    method _prune_heads (line 444) | def _prune_heads(self, heads_to_prune):
    method forward (line 453) | def forward(
  class NeZhaForPreTraining (line 569) | class NeZhaForPreTraining(NeZhaPreTrainedModel):
    method __init__ (line 570) | def __init__(self, config):
    method get_output_embeddings (line 576) | def get_output_embeddings(self):
    method forward (line 580) | def forward(
  class NeZhaForMaskedLM (line 664) | class NeZhaForMaskedLM(NeZhaPreTrainedModel):
    method __init__ (line 665) | def __init__(self, config):
    method get_output_embeddings (line 671) | def get_output_embeddings(self):
    method forward (line 675) | def forward(
    method prepare_inputs_for_generation (line 760) | def prepare_inputs_for_generation(self, input_ids, attention_mask=None...
  class NeZhaForNextSentencePrediction (line 786) | class NeZhaForNextSentencePrediction(NeZhaPreTrainedModel):
    method __init__ (line 787) | def __init__(self, config):
    method forward (line 794) | def forward(
  class NeZhaForSequenceClassification (line 868) | class NeZhaForSequenceClassification(NeZhaPreTrainedModel):
    method __init__ (line 869) | def __init__(self, config):
    method forward (line 878) | def forward(
  class NeZhaForMultipleChoice (line 962) | class NeZhaForMultipleChoice(NeZhaPreTrainedModel):
    method __init__ (line 963) | def __init__(self, config):
    method forward (line 971) | def forward(
  class NeZhaForTokenClassification (line 1058) | class NeZhaForTokenClassification(NeZhaPreTrainedModel):
    method __init__ (line 1059) | def __init__(self, config):
    method forward (line 1068) | def forward(
  class NeZhaForQuestionAnswering (line 1153) | class NeZhaForQuestionAnswering(NeZhaPreTrainedModel):
    method __init__ (line 1154) | def __init__(self, config):
    method forward (line 1162) | def forward(

FILE: code/bert-base-count5/finetuning/model.py
  class BertForClass (line 11) | class BertForClass(nn.Module):
    method __init__ (line 12) | def __init__(self, config):
    method forward (line 24) | def forward(self, input_ids, input_masks, segment_ids):
  class BertForClass_MultiDropout (line 37) | class BertForClass_MultiDropout(nn.Module):
    method __init__ (line 38) | def __init__(self, config):
    method forward (line 50) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastTwoCls (line 63) | class BertLastTwoCls(nn.Module):
    method __init__ (line 64) | def __init__(self, config):
    method forward (line 75) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastCls (line 83) | class BertLastCls(nn.Module):
    method __init__ (line 84) | def __init__(self, config):
    method forward (line 95) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastTwoClsPooler (line 108) | class BertLastTwoClsPooler(nn.Module):
    method __init__ (line 109) | def __init__(self, config):
    method forward (line 120) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastTwoEmbeddings (line 132) | class BertLastTwoEmbeddings(nn.Module):
    method __init__ (line 133) | def __init__(self, config):
    method forward (line 144) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastTwoEmbeddingsPooler (line 160) | class BertLastTwoEmbeddingsPooler(nn.Module):
    method __init__ (line 161) | def __init__(self, config):
    method forward (line 172) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastFourCls (line 187) | class BertLastFourCls(nn.Module):
    method __init__ (line 188) | def __init__(self, config):
    method forward (line 199) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastFourClsPooler (line 215) | class BertLastFourClsPooler(nn.Module):
    method __init__ (line 216) | def __init__(self, config):
    method forward (line 227) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastFourEmbeddings (line 239) | class BertLastFourEmbeddings(nn.Module):
    method __init__ (line 240) | def __init__(self, config):
    method forward (line 251) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastFourEmbeddingsPooler (line 268) | class BertLastFourEmbeddingsPooler(nn.Module):
    method __init__ (line 269) | def __init__(self, config):
    method forward (line 280) | def forward(self, input_ids, input_masks, segment_ids):
  class BertDynCls (line 296) | class BertDynCls(nn.Module):
    method __init__ (line 297) | def __init__(self, config):
    method forward (line 311) | def forward(self, input_ids, input_masks, segment_ids):
  class BertDynEmbeddings (line 343) | class BertDynEmbeddings(nn.Module):
    method __init__ (line 344) | def __init__(self, config):
    method forward (line 358) | def forward(self, input_ids, input_masks, segment_ids):
  class BertRNN (line 392) | class BertRNN(nn.Module):
    method __init__ (line 394) | def __init__(self, config):
    method forward (line 434) | def forward(self, input_ids, input_masks, segment_ids):
  class BertCNN (line 459) | class BertCNN(nn.Module):
    method __init__ (line 461) | def __init__(self, config):
    method conv_and_pool (line 480) | def conv_and_pool(self, x, conv):
    method forward (line 485) | def forward(self, input_ids, input_masks, segment_ids):
  class BertRCNN (line 497) | class BertRCNN(nn.Module):
    method __init__ (line 498) | def __init__(self, config):
    method forward (line 540) | def forward(self, input_ids, input_masks, segment_ids):
  class XLNet (line 564) | class XLNet(nn.Module):
    method __init__ (line 566) | def __init__(self, config):
    method forward (line 574) | def forward(self, input_ids, input_masks, segment_ids):
  class ElectraClassificationHead (line 584) | class ElectraClassificationHead(nn.Module):
    method __init__ (line 587) | def __init__(self, config):
    method forward (line 593) | def forward(self, features, **kwargs):
  class Electra (line 602) | class Electra(nn.Module):
    method __init__ (line 604) | def __init__(self, config):
    method forward (line 613) | def forward(self, input_ids, input_masks, segment_ids):
  class NEZHA (line 621) | class NEZHA(nn.Module):
    method __init__ (line 622) | def __init__(self, config):
    method forward (line 637) | def forward(self, input_ids, input_masks, segment_ids):

FILE: code/bert-base-count5/finetuning/multi_gpu_QA.py
  class Config (line 46) | class Config:
    method __init__ (line 47) | def __init__(self):

FILE: code/bert-base-count5/finetuning/utils.py
  function paddingList (line 12) | def paddingList(ls:list,val,returnTensor=False):
  function fastTokenizer (line 19) | def fastTokenizer(a:str,b:str,maxLen,tk):
  class data_generator (line 39) | class data_generator:
    method __init__ (line 40) | def __init__(self, data, config, shuffle=False):
    method __len__ (line 53) | def __len__(self):
    method __iter__ (line 56) | def __iter__(self):
  class PGD (line 95) | class PGD():
    method __init__ (line 96) | def __init__(self, model):
    method attack (line 101) | def attack(self, epsilon=0.3, alpha=0.1, emb_name='word_embeddings', i...
    method restore (line 113) | def restore(self, emb_name='word_embeddings'):
    method project (line 121) | def project(self, param_name, param_data, epsilon):
    method backup_grad (line 127) | def backup_grad(self):
    method restore_grad (line 132) | def restore_grad(self):
  class FGM (line 139) | class FGM():
    method __init__ (line 140) | def __init__(self, model):
    method attack (line 144) | def attack(self, epsilon=0.25, emb_name='word_embeddings'):
    method restore (line 154) | def restore(self, emb_name='word_embeddings'):
  class FocalLoss (line 164) | class FocalLoss(nn.Module):
    method __init__ (line 180) | def __init__(self, num_class, alpha=None, gamma=2,
    method forward (line 201) | def forward(self, input, target):
  function f1_match (line 244) | def f1_match(y_true,y_pred):

FILE: code/bert-base-count5/pretrain/NLP_Utils.py
  function writeToJsonFile (line 10) | def writeToJsonFile(path: str, obj):
  function readFromJsonFile (line 13) | def readFromJsonFile(path: str):
  function loadData (line 17) | def loadData(path):
  function calNegPos (line 35) | def calNegPos(ls):#计算正负比例
  function paddingList (line 54) | def paddingList(ls:list,val,returnTensor=False):
  function truncate (line 61) | def truncate(a:list,b:list,maxLen):
  class MLM_Data (line 77) | class MLM_Data(Dataset):
    method __init__ (line 79) | def __init__(self,textLs:list,maxLen:int,tk:BertTokenizer):
    method __len__ (line 87) | def __len__(self):
    method random_mask (line 90) | def random_mask(self,text_ids):
    method __getitem__ (line 128) | def __getitem__(self, item):
    method collate (line 143) | def collate(cls,batch):
  function blockShuffle (line 163) | def blockShuffle(data:list,bs:int,sortBsNum,key):
  class blockShuffleDataLoader (line 179) | class blockShuffleDataLoader(DataLoader):
    method __init__ (line 180) | def __init__(self, dataset: Dataset,sortBsNum,key,**kwargs):
    method __iter__ (line 186) | def __iter__(self):

FILE: code/bert-base-count5/pretrain/transformers1/__main__.py
  function main (line 2) | def main():

FILE: code/bert-base-count5/pretrain/transformers1/activations.py
  function swish (line 11) | def swish(x):
  function _gelu_python (line 15) | def _gelu_python(x):
  function gelu_new (line 25) | def gelu_new(x):
  function gelu_fast (line 38) | def gelu_fast(x):
  function get_activation (line 52) | def get_activation(activation_string):

FILE: code/bert-base-count5/pretrain/transformers1/benchmark/benchmark.py
  class PyTorchBenchmark (line 38) | class PyTorchBenchmark(Benchmark):
    method framework_version (line 45) | def framework_version(self):
    method train (line 48) | def train(self, model_name, batch_size, sequence_length, trace_memory=...
    method inference (line 100) | def inference(self, model_name, batch_size, sequence_length, trace_mem...

FILE: code/bert-base-count5/pretrain/transformers1/benchmark/benchmark_args.py
  function is_tpu_available (line 37) | def is_tpu_available():
  class PyTorchBenchmarkArguments (line 45) | class PyTorchBenchmarkArguments(BenchmarkArguments):
    method _setup_devices (line 52) | def _setup_devices(self) -> Tuple["torch.device", int]:
    method device_idx (line 67) | def device_idx(self) -> int:
    method device (line 72) | def device(self) -> "torch.device":
    method n_gpu (line 77) | def n_gpu(self):

FILE: code/bert-base-count5/pretrain/transformers1/benchmark/benchmark_args_utils.py
  function list_field (line 24) | def list_field(default=None, metadata=None):
  class BenchmarkArguments (line 29) | class BenchmarkArguments:
    method to_json_string (line 90) | def to_json_string(self):
    method model_names (line 97) | def model_names(self):

FILE: code/bert-base-count5/pretrain/transformers1/benchmark/benchmark_utils.py
  function is_memory_tracing_enabled (line 43) | def is_memory_tracing_enabled():
  class Frame (line 48) | class Frame(NamedTuple):
  class UsedMemoryState (line 65) | class UsedMemoryState(NamedTuple):
  class Memory (line 77) | class Memory(NamedTuple):
    method __repr__ (line 85) | def __repr__(self) -> str:
  class MemoryState (line 89) | class MemoryState(NamedTuple):
  class MemorySummary (line 103) | class MemorySummary(NamedTuple):
  function start_memory_tracing (line 123) | def start_memory_tracing(
  function stop_memory_tracing (line 273) | def stop_memory_tracing(
  function bytes_to_mega_bytes (line 370) | def bytes_to_mega_bytes(memory_amount: int) -> int:
  class Benchmark (line 376) | class Benchmark(ABC):
    method __init__ (line 386) | def __init__(self, args: BenchmarkArguments = None, configs: Pretraine...
    method print_fn (line 401) | def print_fn(self):
    method is_gpu (line 421) | def is_gpu(self):
    method framework_version (line 426) | def framework_version(self):
    method train (line 430) | def train(self, model_name, batch_size, sequence_length):
    method inference (line 434) | def inference(self, model_name, batch_size, sequence_length):
    method run (line 437) | def run(self):
    method environment_info (line 512) | def environment_info(self):
    method print_results (line 572) | def print_results(self, result_dict):
    method print_memory_trace_statistics (line 585) | def print_memory_trace_statistics(self, summary: MemorySummary):
    method save_to_csv (line 609) | def save_to_csv(self, result_dict, filename):

FILE: code/bert-base-count5/pretrain/transformers1/benchmark_utils.py
  function is_memory_tracing_enabled (line 29) | def is_memory_tracing_enabled():
  class Frame (line 34) | class Frame(NamedTuple):
  class UsedMemoryState (line 51) | class UsedMemoryState(NamedTuple):
  class Memory (line 63) | class Memory(NamedTuple):
    method __repr__ (line 71) | def __repr__(self) -> str:
  class MemoryState (line 75) | class MemoryState(NamedTuple):
  class MemorySummary (line 89) | class MemorySummary(NamedTuple):
  function start_memory_tracing (line 108) | def start_memory_tracing(
  function stop_memory_tracing (line 256) | def stop_memory_tracing(
  function bytes_to_human_readable (line 334) | def bytes_to_human_readable(memory_amount: int) -> str:

FILE: code/bert-base-count5/pretrain/transformers1/commands/__init__.py
  class BaseTransformersCLICommand (line 5) | class BaseTransformersCLICommand(ABC):
    method register_subcommand (line 8) | def register_subcommand(parser: ArgumentParser):
    method run (line 12) | def run(self):

FILE: code/bert-base-count5/pretrain/transformers1/commands/convert.py
  function convert_command_factory (line 7) | def convert_command_factory(args: Namespace):
  class ConvertCommand (line 17) | class ConvertCommand(BaseTransformersCLICommand):
    method register_subcommand (line 19) | def register_subcommand(parser: ArgumentParser):
    method __init__ (line 46) | def __init__(
    method run (line 64) | def run(self):

FILE: code/bert-base-count5/pretrain/transformers1/commands/download.py
  function download_command_factory (line 6) | def download_command_factory(args):
  class DownloadCommand (line 10) | class DownloadCommand(BaseTransformersCLICommand):
    method register_subcommand (line 12) | def register_subcommand(parser: ArgumentParser):
    method __init__ (line 23) | def __init__(self, model: str, cache: str, force: bool):
    method run (line 28) | def run(self):

FILE: code/bert-base-count5/pretrain/transformers1/commands/env.py
  function info_command_factory (line 9) | def info_command_factory(_):
  class EnvironmentCommand (line 13) | class EnvironmentCommand(BaseTransformersCLICommand):
    method register_subcommand (line 15) | def register_subcommand(parser: ArgumentParser):
    method run (line 19) | def run(self):
    method format_dict (line 57) | def format_dict(d):

FILE: code/bert-base-count5/pretrain/transformers1/commands/run.py
  function try_infer_format_from_ext (line 11) | def try_infer_format_from_ext(path: str):
  function run_command_factory (line 25) | def run_command_factory(args):
  class RunCommand (line 44) | class RunCommand(BaseTransformersCLICommand):
    method __init__ (line 45) | def __init__(self, nlp: Pipeline, reader: PipelineDataFormat):
    method register_subcommand (line 50) | def register_subcommand(parser: ArgumentParser):
    method run (line 81) | def run(self):

FILE: code/bert-base-count5/pretrain/transformers1/commands/serving.py
  function Body (line 21) | def Body(*x, **y):
  function serve_command_factory (line 30) | def serve_command_factory(args: Namespace):
  class ServeModelInfoResult (line 45) | class ServeModelInfoResult(BaseModel):
  class ServeTokenizeResult (line 53) | class ServeTokenizeResult(BaseModel):
  class ServeDeTokenizeResult (line 62) | class ServeDeTokenizeResult(BaseModel):
  class ServeForwardResult (line 70) | class ServeForwardResult(BaseModel):
  class ServeCommand (line 78) | class ServeCommand(BaseTransformersCLICommand):
    method register_subcommand (line 80) | def register_subcommand(parser: ArgumentParser):
    method __init__ (line 106) | def __init__(self, pipeline: Pipeline, host: str, port: int, workers: ...
    method run (line 156) | def run(self):
    method model_info (line 159) | def model_info(self):
    method tokenize (line 162) | def tokenize(self, text_input: str = Body(None, embed=True), return_id...
    method detokenize (line 180) | def detokenize(
    method forward (line 198) | async def forward(self, inputs=Body(None, embed=True)):

FILE: code/bert-base-count5/pretrain/transformers1/commands/train.py
  function train_command_factory (line 18) | def train_command_factory(args: Namespace):
  class TrainCommand (line 26) | class TrainCommand(BaseTransformersCLICommand):
    method register_subcommand (line 28) | def register_subcommand(parser: ArgumentParser):
    method __init__ (line 78) | def __init__(self, args: Namespace):
    method run (line 124) | def run(self):
    method run_torch (line 129) | def run_torch(self):
    method run_tf (line 132) | def run_tf(self):

FILE: code/bert-base-count5/pretrain/transformers1/commands/transformers_cli.py
  function main (line 12) | def main():

FILE: code/bert-base-count5/pretrain/transformers1/commands/user.py
  class UserCommands (line 16) | class UserCommands(BaseTransformersCLICommand):
    method register_subcommand (line 18) | def register_subcommand(parser: ArgumentParser):
  class ANSI (line 47) | class ANSI:
    method bold (line 57) | def bold(cls, s):
    method red (line 61) | def red(cls, s):
  class BaseUserCommand (line 65) | class BaseUserCommand:
    method __init__ (line 66) | def __init__(self, args):
  class LoginCommand (line 71) | class LoginCommand(BaseUserCommand):
    method run (line 72) | def run(self):
  class WhoamiCommand (line 98) | class WhoamiCommand(BaseUserCommand):
    method run (line 99) | def run(self):
  class LogoutCommand (line 115) | class LogoutCommand(BaseUserCommand):
    method run (line 116) | def run(self):
  class ListObjsCommand (line 126) | class ListObjsCommand(BaseUserCommand):
    method tabulate (line 127) | def tabulate(self, rows: List[List[Union[str, int]]], headers: List[st...
    method run (line 142) | def run(self):
  class DeleteObjCommand (line 160) | class DeleteObjCommand(BaseUserCommand):
    method run (line 161) | def run(self):
  class UploadCommand (line 175) | class UploadCommand(BaseUserCommand):
    method walk_dir (line 176) | def walk_dir(self, rel_path):
    method run (line 187) | def run(self):

FILE: code/bert-base-count5/pretrain/transformers1/configuration_albert.py
  class AlbertConfig (line 33) | class AlbertConfig(PretrainedConfig):
    method __init__ (line 104) | def __init__(

FILE: code/bert-base-count5/pretrain/transformers1/configuration_auto.py
  class AutoConfig (line 98) | class AutoConfig:
    method __init__ (line 109) | def __init__(self):
    method for_model (line 116) | def for_model(cls, model_type: str, *args, **kwargs):
    method from_pretrained (line 127) | def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):

FILE: code/bert-base-count5/pretrain/transformers1/configuration_bart.py
  class BartConfig (line 34) | class BartConfig(PretrainedConfig):
    method __init__ (line 40) | def __init__(
    method num_attention_heads (line 121) | def num_attention_heads(self) -> int:
    method hidden_size (line 125) | def hidden_size(self) -> int:
    method is_valid_mbart (line 128) | def is_valid_mbart(self) -> bool:

FILE: code/bert-base-count5/pretrain/transformers1/configuration_bert.py
  class BertConfig (line 53) | class BertConfig(PretrainedConfig):
    method __init__ (line 109) | def __init__(

FILE: code/bert-base-count5/pretrain/transformers1/configuration_camembert.py
  class CamembertConfig (line 33) | class CamembertConfig(RobertaConfig):

FILE: code/bert-base-count5/pretrain/transformers1/configuration_ctrl.py
  class CTRLConfig (line 28) | class CTRLConfig(PretrainedConfig):
    method __init__ (line 83) | def __init__(
    method max_position_embeddings (line 125) | def max_position_embeddings(self):
    method hidden_size (line 129) | def hidden_size(self):
    method num_attention_heads (line 133) | def num_attention_heads(self):
    method num_hidden_layers (line 137) | def num_hidden_layers(self):

FILE: code/bert-base-count5/pretrain/transformers1/configuration_distilbert.py
  class DistilBertConfig (line 36) | class DistilBertConfig(PretrainedConfig):
    method __init__ (line 96) | def __init__(
    method hidden_size (line 130) | def hidden_size(self):
    method num_attention_heads (line 134) | def num_attention_heads(self):
    method num_hidden_layers (line 138) | def num_hidden_layers(self):

FILE: code/bert-base-count5/pretrain/transformers1/configuration_electra.py
  class ElectraConfig (line 36) | class ElectraConfig(PretrainedConfig):
    method __init__ (line 95) | def __init__(

FILE: code/bert-base-count5/pretrain/transformers1/configuration_encoder_decoder.py
  class EncoderDecoderConfig (line 26) | class EncoderDecoderConfig(PretrainedConfig):
    method __init__ (line 62) | def __init__(self, **kwargs):
    method from_encoder_decoder_configs (line 79) | def from_encoder_decoder_configs(
    method to_dict (line 90) | def to_dict(self):

FILE: code/bert-base-count5/pretrain/transformers1/configuration_flaubert.py
  class FlaubertConfig (line 33) | class FlaubertConfig(XLMConfig):
    method __init__ (line 147) | def __init__(self, layerdrop=0.0, pre_norm=False, pad_token_id=2, bos_...

FILE: code/bert-base-count5/pretrain/transformers1/configuration_gpt2.py
  class GPT2Config (line 35) | class GPT2Config(PretrainedConfig):
    method __init__ (line 117) | def __init__(
    method max_position_embeddings (line 164) | def max_position_embeddings(self):
    method hidden_size (line 168) | def hidden_size(self):
    method num_attention_heads (line 172) | def num_attention_heads(self):
    method num_hidden_layers (line 176) | def num_hidden_layers(self):

FILE: code/bert-base-count5/pretrain/transformers1/configuration_longformer.py
  class LongformerConfig (line 34) | class LongformerConfig(RobertaConfig):
    method __init__ (line 65) | def __init__(self, attention_window: Union[List[int], int] = 512, sep_...

FILE: code/bert-base-count5/pretrain/transformers1/configuration_marian.py
  class MarianConfig (line 25) | class MarianConfig(BartConfig):

FILE: code/bert-base-count5/pretrain/transformers1/configuration_mmbt.py
  class MMBTConfig (line 25) | class MMBTConfig(object):
    method __init__ (line 38) | def __init__(self, config, num_labels=None, modal_hidden_size=2048):

FILE: code/bert-base-count5/pretrain/transformers1/configuration_openai.py
  class OpenAIGPTConfig (line 31) | class OpenAIGPTConfig(PretrainedConfig):
    method __init__ (line 115) | def __init__(
    method max_position_embeddings (line 159) | def max_position_embeddings(self):
    method hidden_size (line 163) | def hidden_size(self):
    method num_attention_heads (line 167) | def num_attention_heads(self):
    method num_hidden_layers (line 171) | def num_hidden_layers(self):

FILE: code/bert-base-count5/pretrain/transformers1/configuration_reformer.py
  class ReformerConfig (line 32) | class ReformerConfig(PretrainedConfig):
    method __init__ (line 141) | def __init__(

FILE: code/bert-base-count5/pretrain/transformers1/configuration_roberta.py
  class RobertaConfig (line 36) | class RobertaConfig(BertConfig):
    method __init__ (line 65) | def __init__(self, pad_token_id=1, bos_token_id=0, eos_token_id=2, **k...

FILE: code/bert-base-count5/pretrain/transformers1/configuration_t5.py
  class T5Config (line 34) | class T5Config(PretrainedConfig):
    method __init__ (line 64) | def __init__(
    method max_position_embeddings (line 98) | def max_position_embeddings(self):
    method hidden_size (line 102) | def hidden_size(self):
    method num_attention_heads (line 106) | def num_attention_heads(self):
    method num_hidden_layers (line 110) | def num_hidden_layers(self):

FILE: code/bert-base-count5/pretrain/transformers1/configuration_transfo_xl.py
  class TransfoXLConfig (line 31) | class TransfoXLConfig(PretrainedConfig):
    method __init__ (line 117) | def __init__(
    method max_position_embeddings (line 186) | def max_position_embeddings(self):
    method n_token (line 190) | def n_token(self):  # Backward compatibility
    method n_token (line 194) | def n_token(self, value):  # Backward compatibility
    method hidden_size (line 198) | def hidden_size(self):
    method num_attention_heads (line 202) | def num_attention_heads(self):
    method num_hidden_layers (line 206) | def num_hidden_layers(self):

FILE: code/bert-base-count5/pretrain/transformers1/configuration_utils.py
  class PretrainedConfig (line 31) | class PretrainedConfig(object):
    method __init__ (line 56) | def __init__(self, **kwargs):
    method num_labels (line 118) | def num_labels(self):
    method num_labels (line 122) | def num_labels(self, num_labels):
    method save_pretrained (line 126) | def save_pretrained(self, save_directory):
    method from_pretrained (line 146) | def from_pretrained(cls, pretrained_model_name_or_path, **kwargs) -> "...
    method get_config_dict (line 205) | def get_config_dict(cls, pretrained_model_name_or_path: str, **kwargs)...
    method from_dict (line 270) | def from_dict(cls, config_dict: Dict, **kwargs) -> "PretrainedConfig":
    method from_json_file (line 308) | def from_json_file(cls, json_file: str) -> "PretrainedConfig":
    method _dict_from_json_file (line 324) | def _dict_from_json_file(cls, json_file: str):
    method __eq__ (line 329) | def __eq__(self, other):
    method __repr__ (line 332) | def __repr__(self):
    method to_diff_dict (line 335) | def to_diff_dict(self):
    method to_dict (line 358) | def to_dict(self):
    method to_json_string (line 370) | def to_json_string(self, use_diff=True):
    method to_json_file (line 387) | def to_json_file(self, json_file_path, use_diff=True):
    method update (line 400) | def update(self, config_dict: Dict):

FILE: code/bert-base-count5/pretrain/transformers1/configuration_xlm.py
  class XLMConfig (line 39) | class XLMConfig(PretrainedConfig):
    method __init__ (line 159) | def __init__(
    method n_words (line 235) | def n_words(self):  # For backward compatibility
    method n_words (line 239) | def n_words(self, value):  # For backward compatibility
    method hidden_size (line 243) | def hidden_size(self):
    method num_attention_heads (line 247) | def num_attention_heads(self):
    method num_hidden_layers (line 251) | def num_hidden_layers(self):

FILE: code/bert-base-count5/pretrain/transformers1/configuration_xlm_roberta.py
  class XLMRobertaConfig (line 36) | class XLMRobertaConfig(RobertaConfig):

FILE: code/bert-base-count5/pretrain/transformers1/configuration_xlnet.py
  class XLNetConfig (line 32) | class XLNetConfig(PretrainedConfig):
    method __init__ (line 129) | def __init__(
    method max_position_embeddings (line 194) | def max_position_embeddings(self):
    method n_token (line 198) | def n_token(self):  # Backward compatibility
    method n_token (line 202) | def n_token(self, value):  # Backward compatibility
    method hidden_size (line 206) | def hidden_size(self):
    method num_attention_heads (line 210) | def num_attention_heads(self):
    method num_hidden_layers (line 214) | def num_hidden_layers(self):

FILE: code/bert-base-count5/pretrain/transformers1/convert_albert_original_tf_checkpoint_to_pytorch.py
  function convert_tf_checkpoint_to_pytorch (line 29) | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, albert_config_f...

FILE: code/bert-base-count5/pretrain/transformers1/convert_bart_original_pytorch_checkpoint_to_pytorch.py
  function remove_ignore_keys_ (line 56) | def remove_ignore_keys_(state_dict):
  function rename_key (line 68) | def rename_key(dct, old, new):
  function load_xsum_checkpoint (line 73) | def load_xsum_checkpoint(checkpoint_path):
  function convert_checkpoint_from_disk (line 81) | def convert_checkpoint_from_disk(checkpoint_path, **config_kwargs):
  function convert_bart_checkpoint (line 95) | def convert_bart_checkpoint(checkpoint_path, pytorch_dump_folder_path, h...

FILE: code/bert-base-count5/pretrain/transformers1/convert_bert_original_tf_checkpoint_to_pytorch.py
  function convert_tf_checkpoint_to_pytorch (line 29) | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_fil...

FILE: code/bert-base-count5/pretrain/transformers1/convert_bert_pytorch_checkpoint_to_original_tf.py
  function convert_pytorch_checkpoint_to_tf (line 28) | def convert_pytorch_checkpoint_to_tf(model: BertModel, ckpt_dir: str, mo...
  function main (line 92) | def main(raw_args=None):

FILE: code/bert-base-count5/pretrain/transformers1/convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py
  function convert_dialogpt_checkpoint (line 15) | def convert_dialogpt_checkpoint(checkpoint_path: str, pytorch_dump_folde...

FILE: code/bert-base-count5/pretrain/transformers1/convert_electra_original_tf_checkpoint_to_pytorch.py
  function convert_tf_checkpoint_to_pytorch (line 29) | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, py...

FILE: code/bert-base-count5/pretrain/transformers1/convert_gpt2_original_tf_checkpoint_to_pytorch.py
  function convert_gpt2_checkpoint_to_pytorch (line 29) | def convert_gpt2_checkpoint_to_pytorch(gpt2_checkpoint_path, gpt2_config...

FILE: code/bert-base-count5/pretrain/transformers1/convert_graph_to_onnx.py
  class OnnxConverterArgumentParser (line 11) | class OnnxConverterArgumentParser(ArgumentParser):
    method __init__ (line 16) | def __init__(self):
  function ensure_valid_input (line 28) | def ensure_valid_input(model, tokens, input_names):
  function infer_shapes (line 53) | def infer_shapes(nlp: Pipeline, framework: str) -> Tuple[List[str], List...
  function load_graph_from_args (line 100) | def load_graph_from_args(framework: str, model: str, tokenizer: Optional...
  function convert_pytorch (line 111) | def convert_pytorch(nlp: Pipeline, opset: int, output: str, use_external...
  function convert_tensorflow (line 138) | def convert_tensorflow(nlp: Pipeline, opset: int, output: str):
  function convert (line 166) | def convert(
  function verify (line 193) | def verify(path: str):

FILE: code/bert-base-count5/pretrain/transformers1/convert_longformer_original_pytorch_lightning_to_pytorch.py
  class LightningModel (line 26) | class LightningModel(pl.LightningModule):
    method __init__ (line 27) | def __init__(self, model):
    method forward (line 34) | def forward(self):
  function convert_longformer_qa_checkpoint_to_pytorch (line 38) | def convert_longformer_qa_checkpoint_to_pytorch(

FILE: code/bert-base-count5/pretrain/transformers1/convert_marian_to_pytorch.py
  function remove_prefix (line 18) | def remove_prefix(text: str, prefix: str):
  function convert_encoder_layer (line 24) | def convert_encoder_layer(opus_dict, layer_prefix: str, converter: dict):
  function load_layers_ (line 35) | def load_layers_(layer_lst: torch.nn.ModuleList, opus_state: dict, conve...
  function find_pretrained_model (line 42) | def find_pretrained_model(src_lang: str, tgt_lang: str) -> List[str]:
  function add_emb_entries (line 55) | def add_emb_entries(wemb, final_bias, n_special_tokens=1):
  function _cast_yaml_str (line 64) | def _cast_yaml_str(v):
  function cast_marian_config (line 76) | def cast_marian_config(raw_cfg: Dict[str, str]) -> Dict:
  function load_config_from_state_dict (line 83) | def load_config_from_state_dict(opus_dict):
  function find_model_file (line 91) | def find_model_file(dest_dir):  # this one better
  function convert_opus_name_to_hf_name (line 136) | def convert_opus_name_to_hf_name(x):
  function convert_hf_name_to_opus_name (line 142) | def convert_hf_name_to_opus_name(hf_model_name):
  function write_model_card (line 152) | def write_model_card(
  function get_clean_model_id_mapping (line 185) | def get_clean_model_id_mapping(multiling_model_ids):
  function make_registry (line 189) | def make_registry(repo_path="Opus-MT-train/models"):
  function convert_all_sentencepiece_models (line 206) | def convert_all_sentencepiece_models(model_list=None, repo_path=None):
  function lmap (line 222) | def lmap(f, x) -> List:
  function fetch_test_set (line 226) | def fetch_test_set(test_set_url):
  function convert_whole_dir (line 239) | def convert_whole_dir(path=Path("marian_ckpt/")):
  function _parse_readme (line 247) | def _parse_readme(lns):
  function save_tokenizer_config (line 270) | def save_tokenizer_config(dest_dir: Path):
  function add_to_vocab_ (line 276) | def add_to_vocab_(vocab: Dict[str, int], special_tokens: List[str]):
  function find_vocab_file (line 287) | def find_vocab_file(model_dir):
  function add_special_tokens_to_vocab (line 291) | def add_special_tokens_to_vocab(model_dir: Path) -> None:
  function save_tokenizer (line 300) | def save_tokenizer(self, save_directory):
  function check_equal (line 309) | def check_equal(marian_cfg, k1, k2):
  function check_marian_cfg_assumptions (line 314) | def check_marian_cfg_assumptions(marian_cfg):
  class OpusState (line 371) | class OpusState:
    method __init__ (line 372) | def __init__(self, source_dir):
    method _check_layer_entries (line 420) | def _check_layer_entries(self):
    method extra_keys (line 432) | def extra_keys(self):
    method sub_keys (line 445) | def sub_keys(self, layer_prefix):
    method load_marian_model (line 448) | def load_marian_model(self) -> MarianMTModel:
  function download_and_unzip (line 483) | def download_and_unzip(url, dest_dir):
  function convert (line 494) | def convert(source_dir: Path, dest_dir):
  function load_yaml (line 525) | def load_yaml(path):
  function save_json (line 532) | def save_json(content: Union[Dict, List], path: str) -> None:
  function unzip (line 537) | def unzip(zip_path: str, dest_dir: str) -> None:

FILE: code/bert-base-count5/pretrain/transformers1/convert_openai_original_tf_checkpoint_to_pytorch.py
  function convert_openai_checkpoint_to_pytorch (line 29) | def convert_openai_checkpoint_to_pytorch(openai_checkpoint_folder_path, ...

FILE: code/bert-base-count5/pretrain/transformers1/convert_pytorch_checkpoint_to_tf2.py
  function convert_pt_checkpoint_to_tf (line 187) | def convert_pt_checkpoint_to_tf(
  function convert_all_pt_checkpoints_to_tf (line 233) | def convert_all_pt_checkpoints_to_tf(

FILE: code/bert-base-count5/pretrain/transformers1/convert_reformer_trax_checkpoint_to_pytorch.py
  function set_param (line 31) | def set_param(torch_layer, weight, bias=None):
  function set_layer_weights_in_torch_lsh (line 40) | def set_layer_weights_in_torch_lsh(weights, torch_layer, hidden_size):
  function set_layer_weights_in_torch_local (line 58) | def set_layer_weights_in_torch_local(weights, torch_layer, hidden_size):
  function set_block_weights_in_torch (line 79) | def set_block_weights_in_torch(weights, torch_block, hidden_size):
  function set_model_weights_in_torch (line 128) | def set_model_weights_in_torch(weights, torch_model, hidden_size):
  function convert_trax_checkpoint_to_pytorch (line 174) | def convert_trax_checkpoint_to_pytorch(trax_model_pkl_path, config_file,...

FILE: code/bert-base-count5/pretrain/transformers1/convert_roberta_original_pytorch_checkpoint_to_pytorch.py
  function convert_roberta_checkpoint_to_pytorch (line 42) | def convert_roberta_checkpoint_to_pytorch(

FILE: code/bert-base-count5/pretrain/transformers1/convert_t5_original_tf_checkpoint_to_pytorch.py
  function convert_tf_checkpoint_to_pytorch (line 29) | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, py...

FILE: code/bert-base-count5/pretrain/transformers1/convert_transfo_xl_original_tf_checkpoint_to_pytorch.py
  function convert_transfo_xl_checkpoint_to_pytorch (line 47) | def convert_transfo_xl_checkpoint_to_pytorch(

FILE: code/bert-base-count5/pretrain/transformers1/convert_xlm_original_pytorch_checkpoint_to_pytorch.py
  function convert_xlm_checkpoint_to_pytorch (line 32) | def convert_xlm_checkpoint_to_pytorch(xlm_checkpoint_path, pytorch_dump_...

FILE: code/bert-base-count5/pretrain/transformers1/convert_xlnet_original_tf_checkpoint_to_pytorch.py
  function convert_xlnet_checkpoint_to_pytorch (line 51) | def convert_xlnet_checkpoint_to_pytorch(

FILE: code/bert-base-count5/pretrain/transformers1/data/data_collator.py
  class DataCollator (line 12) | class DataCollator(ABC):
    method collate_batch (line 19) | def collate_batch(self) -> Dict[str, torch.Tensor]:
  class DefaultDataCollator (line 33) | class DefaultDataCollator(DataCollator):
    method collate_batch (line 46) | def collate_batch(self, features: List[InputDataClass]) -> Dict[str, t...
  class DataCollatorForLanguageModeling (line 80) | class DataCollatorForLanguageModeling(DataCollator):
    method collate_batch (line 91) | def collate_batch(self, examples: List[torch.Tensor]) -> Dict[str, tor...
    method _tensorize_batch (line 99) | def _tensorize_batch(self, examples: List[torch.Tensor]) -> torch.Tensor:
    method mask_tokens (line 112) | def mask_tokens(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, tor...
    method mask_tokens2 (line 148) | def mask_tokens2(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, to...
    method mask_tokens3 (line 192) | def mask_tokens3(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, to...
    method mask_tokens4 (line 259) | def mask_tokens4(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, to...
    method mask_tokens5 (line 342) | def mask_tokens5(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, to...
    method mask_tokens6 (line 427) | def mask_tokens6(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, to...
    method mask_tokens7 (line 507) | def mask_tokens7(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, to...

FILE: code/bert-base-count5/pretrain/transformers1/data/datasets/glue.py
  class GlueDataTrainingArguments (line 23) | class GlueDataTrainingArguments:
    method __post_init__ (line 47) | def __post_init__(self):
  class Split (line 51) | class Split(Enum):
  class GlueDataset (line 57) | class GlueDataset(Dataset):
    method __init__ (line 67) | def __init__(
    method __len__ (line 135) | def __len__(self):
    method __getitem__ (line 138) | def __getitem__(self, i) -> InputFeatures:
    method get_labels (line 141) | def get_labels(self):

FILE: code/bert-base-count5/pretrain/transformers1/data/datasets/language_modeling.py
  class TextDataset (line 16) | class TextDataset(Dataset):
    method __init__ (line 22) | def __init__(
    method __len__ (line 71) | def __len__(self):
    method __getitem__ (line 74) | def __getitem__(self, i) -> torch.Tensor:
  class LineByLineTextDataset (line 78) | class LineByLineTextDataset(Dataset):
    method __init__ (line 84) | def __init__(self, tokenizer: PreTrainedTokenizer, file_path: str, blo...
    method __len__ (line 97) | def __len__(self):
    method __getitem__ (line 100) | def __getitem__(self, i) -> torch.Tensor:

FILE: code/bert-base-count5/pretrain/transformers1/data/metrics/__init__.py
  function is_sklearn_available (line 26) | def is_sklearn_available():
  function simple_accuracy (line 32) | def simple_accuracy(preds, labels):
  function acc_and_f1 (line 35) | def acc_and_f1(preds, labels):
  function pearson_and_spearman (line 44) | def pearson_and_spearman(preds, labels):
  function glue_compute_metrics (line 53) | def glue_compute_metrics(task_name, preds, labels):
  function xnli_compute_metrics (line 80) | def xnli_compute_metrics(task_name, preds, labels):

FILE: code/bert-base-count5/pretrain/transformers1/data/metrics/squad_metrics.py
  function normalize_answer (line 24) | def normalize_answer(s):
  function get_tokens (line 44) | def get_tokens(s):
  function compute_exact (line 50) | def compute_exact(a_gold, a_pred):
  function compute_f1 (line 54) | def compute_f1(a_gold, a_pred):
  function get_raw_scores (line 70) | def get_raw_scores(examples, preds):
  function apply_no_ans_threshold (line 96) | def apply_no_ans_threshold(scores, na_probs, qid_to_has_ans, na_prob_thr...
  function make_eval_dict (line 107) | def make_eval_dict(exact_scores, f1_scores, qid_list=None):
  function merge_eval (line 128) | def merge_eval(main_eval, new_eval, prefix):
  function find_best_thresh_v2 (line 133) | def find_best_thresh_v2(preds, scores, na_probs, qid_to_has_ans):
  function find_all_best_thresh_v2 (line 167) | def find_all_best_thresh_v2(main_eval, preds, exact_raw, f1_raw, na_prob...
  function find_best_thresh (line 178) | def find_best_thresh(preds, scores, na_probs, qid_to_has_ans):
  function find_all_best_thresh (line 201) | def find_all_best_thresh(main_eval, preds, exact_raw, f1_raw, na_probs, ...
  function squad_evaluate (line 211) | def squad_evaluate(examples, preds, no_answer_probs=None, no_answer_prob...
  function get_final_text (line 242) | def get_final_text(pred_text, orig_text, do_lower_case, verbose_logging=...
  function _get_best_indexes (line 336) | def _get_best_indexes(logits, n_best_size):
  function _compute_softmax (line 348) | def _compute_softmax(scores):
  function compute_predictions_logits (line 371) | def compute_predictions_logits(
  function compute_predictions_log_probs (line 576) | def compute_predictions_log_probs(

FILE: code/bert-base-count5/pretrain/transformers1/data/processors/glue.py
  function glue_convert_examples_to_features (line 34) | def glue_convert_examples_to_features(
  function _tf_glue_convert_examples_to_features (line 70) | def _tf_glue_convert_examples_to_features(
  function _glue_convert_examples_to_features (line 107) | def _glue_convert_examples_to_features(
  class OutputMode (line 159) | class OutputMode(Enum):
  class MrpcProcessor (line 164) | class MrpcProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 167) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 176) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 181) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 185) | def get_test_examples(self, data_dir):
    method get_labels (line 189) | def get_labels(self):
    method _create_examples (line 193) | def _create_examples(self, lines, set_type):
  class MnliProcessor (line 207) | class MnliProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 210) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 219) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 223) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 227) | def get_test_examples(self, data_dir):
    method get_labels (line 231) | def get_labels(self):
    method _create_examples (line 235) | def _create_examples(self, lines, set_type):
  class MnliMismatchedProcessor (line 249) | class MnliMismatchedProcessor(MnliProcessor):
    method get_dev_examples (line 252) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 256) | def get_test_examples(self, data_dir):
  class ColaProcessor (line 261) | class ColaProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 264) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 273) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 277) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 281) | def get_test_examples(self, data_dir):
    method get_labels (line 285) | def get_labels(self):
    method _create_examples (line 289) | def _create_examples(self, lines, set_type):
  class Sst2Processor (line 304) | class Sst2Processor(DataProcessor):
    method get_example_from_tensor_dict (line 307) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 316) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 320) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 324) | def get_test_examples(self, data_dir):
    method get_labels (line 328) | def get_labels(self):
    method _create_examples (line 332) | def _create_examples(self, lines, set_type):
  class StsbProcessor (line 346) | class StsbProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 349) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 358) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 362) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 366) | def get_test_examples(self, data_dir):
    method get_labels (line 370) | def get_labels(self):
    method _create_examples (line 374) | def _create_examples(self, lines, set_type):
  class QqpProcessor (line 388) | class QqpProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 391) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 400) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 404) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 408) | def get_test_examples(self, data_dir):
    method get_labels (line 412) | def get_labels(self):
    method _create_examples (line 416) | def _create_examples(self, lines, set_type):
  class QnliProcessor (line 436) | class QnliProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 439) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 448) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 452) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 456) | def get_test_examples(self, data_dir):
    method get_labels (line 460) | def get_labels(self):
    method _create_examples (line 464) | def _create_examples(self, lines, set_type):
  class RteProcessor (line 478) | class RteProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 481) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 490) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 494) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 498) | def get_test_examples(self, data_dir):
    method get_labels (line 502) | def get_labels(self):
    method _create_examples (line 506) | def _create_examples(self, lines, set_type):
  class WnliProcessor (line 520) | class WnliProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 523) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 532) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 536) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 540) | def get_test_examples(self, data_dir):
    method get_labels (line 544) | def get_labels(self):
    method _create_examples (line 548) | def _create_examples(self, lines, set_type):

FILE: code/bert-base-count5/pretrain/transformers1/data/processors/squad.py
  function _improve_answer_span (line 25) | def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer, ...
  function _check_is_max_context (line 38) | def _check_is_max_context(doc_spans, cur_span_index, position):
  function _new_check_is_max_context (line 58) | def _new_check_is_max_context(doc_spans, cur_span_index, position):
  function _is_whitespace (line 80) | def _is_whitespace(c):
  function squad_convert_example_to_features (line 86) | def squad_convert_example_to_features(example, max_seq_length, doc_strid...
  function squad_convert_example_to_features_init (line 264) | def squad_convert_example_to_features_init(tokenizer_for_convert):
  function squad_convert_examples_to_features (line 269) | def squad_convert_examples_to_features(
  class SquadProcessor (line 445) | class SquadProcessor(DataProcessor):
    method _get_example_from_tensor_dict (line 454) | def _get_example_from_tensor_dict(self, tensor_dict, evaluate=False):
    method get_examples_from_dataset (line 478) | def get_examples_from_dataset(self, dataset, evaluate=False):
    method get_train_examples (line 509) | def get_train_examples(self, data_dir, filename=None):
    method get_dev_examples (line 531) | def get_dev_examples(self, data_dir, filename=None):
    method _create_examples (line 552) | def _create_examples(self, input_data, set_type):
  class SquadV1Processor (line 594) | class SquadV1Processor(SquadProcessor):
  class SquadV2Processor (line 599) | class SquadV2Processor(SquadProcessor):
  class SquadExample (line 604) | class SquadExample(object):
    method __init__ (line 619) | def __init__(
  class SquadFeatures (line 667) | class SquadFeatures(object):
    method __init__ (line 692) | def __init__(
  class SquadResult (line 729) | class SquadResult(object):
    method __init__ (line 739) | def __init__(self, unique_id, start_logits, end_logits, start_top_inde...

FILE: code/bert-base-count5/pretrain/transformers1/data/processors/utils.py
  class InputExample (line 31) | class InputExample:
    method to_json_string (line 50) | def to_json_string(self):
  class InputFeatures (line 56) | class InputFeatures:
    method to_json_string (line 77) | def to_json_string(self):
  class DataProcessor (line 82) | class DataProcessor:
    method get_example_from_tensor_dict (line 85) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 93) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 97) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 101) | def get_test_examples(self, data_dir):
    method get_labels (line 105) | def get_labels(self):
    method tfds_map (line 109) | def tfds_map(self, example):
    method _read_tsv (line 117) | def _read_tsv(cls, input_file, quotechar=None):
  class SingleSentenceClassificationProcessor (line 123) | class SingleSentenceClassificationProcessor(DataProcessor):
    method __init__ (line 126) | def __init__(self, labels=None, examples=None, mode="classification", ...
    method __len__ (line 132) | def __len__(self):
    method __getitem__ (line 135) | def __getitem__(self, idx):
    method create_from_csv (line 141) | def create_from_csv(
    method create_from_examples (line 158) | def create_from_examples(cls, texts_or_text_and_labels, labels=None, *...
    method add_examples_from_csv (line 163) | def add_examples_from_csv(
    method add_examples (line 193) | def add_examples(
    method get_features (line 226) | def get_features(

FILE: code/bert-base-count5/pretrain/transformers1/data/processors/xnli.py
  class XnliProcessor (line 28) | class XnliProcessor(DataProcessor):
    method __init__ (line 32) | def __init__(self, language, train_language=None):
    method get_train_examples (line 36) | def get_train_examples(self, data_dir):
    method get_test_examples (line 52) | def get_test_examples(self, data_dir):
    method get_labels (line 70) | def get_labels(self):

FILE: code/bert-base-count5/pretrain/transformers1/file_utils.py
  function is_torch_available (line 93) | def is_torch_available():
  function is_tf_available (line 97) | def is_tf_available():
  function add_start_docstrings (line 101) | def add_start_docstrings(*docstr):
  function add_start_docstrings_to_callable (line 109) | def add_start_docstrings_to_callable(*docstr):
  function add_end_docstrings (line 127) | def add_end_docstrings(*docstr):
  function is_remote_url (line 135) | def is_remote_url(url_or_filename):
  function hf_bucket_url (line 140) | def hf_bucket_url(model_id: str, filename: str, use_cdn=True) -> str:
  function url_to_filename (line 164) | def url_to_filename(url, etag=None):
  function filename_to_url (line 188) | def filename_to_url(filename, cache_dir=None):
  function cached_path (line 214) | def cached_path(
  function http_get (line 306) | def http_get(url, temp_file, proxies=None, resume_size=0, user_agent=None):
  function get_from_cache (line 339) | def get_from_cache(
  class cached_property (line 453) | class cached_property(property):
    method __get__ (line 462) | def __get__(self, obj, objtype=None):
  function torch_required (line 476) | def torch_required(func):
  function tf_required (line 488) | def tf_required(func):

FILE: code/bert-base-count5/pretrain/transformers1/hf_api.py
  class S3Obj (line 29) | class S3Obj:
    method __init__ (line 34) | def __init__(self, filename: str, LastModified: str, ETag: str, Size: ...
  class PresignedUrl (line 41) | class PresignedUrl:
    method __init__ (line 42) | def __init__(self, write: str, access: str, type: str, **kwargs):
  class S3Object (line 48) | class S3Object:
    method __init__ (line 53) | def __init__(
  class ModelInfo (line 69) | class ModelInfo:
    method __init__ (line 74) | def __init__(
  class HfApi (line 92) | class HfApi:
    method __init__ (line 93) | def __init__(self, endpoint=None):
    method login (line 96) | def login(self, username: str, password: str) -> str:
    method whoami (line 112) | def whoami(self, token: str) -> Tuple[str, List[str]]:
    method logout (line 122) | def logout(self, token: str) -> None:
    method presign (line 130) | def presign(self, token: str, filename: str, organization: Optional[st...
    method presign_and_upload (line 144) | def presign_and_upload(self, token: str, filename: str, filepath: str,...
    method list_objs (line 166) | def list_objs(self, token: str, organization: Optional[str] = None) ->...
    method delete_obj (line 177) | def delete_obj(self, token: str, filename: str, organization: Optional...
    method model_list (line 189) | def model_list(self) -> List[ModelInfo]:
  class TqdmProgressFileReader (line 200) | class TqdmProgressFileReader:
    method __init__ (line 209) | def __init__(self, f: io.BufferedReader):
    method _read (line 216) | def _read(self, n=-1):
    method close (line 220) | def close(self):
  class HfFolder (line 224) | class HfFolder:
    method save_token (line 228) | def save_token(cls, token):
    method get_token (line 237) | def get_token(cls):
    method delete_token (line 248) | def delete_token(cls):

FILE: code/bert-base-count5/pretrain/transformers1/hf_argparser.py
  class HfArgumentParser (line 14) | class HfArgumentParser(ArgumentParser):
    method __init__ (line 26) | def __init__(self, dataclass_types: Union[DataClassType, Iterable[Data...
    method _add_dataclass_arguments (line 42) | def _add_dataclass_arguments(self, dtype: DataClassType):
    method parse_args_into_dataclasses (line 88) | def parse_args_into_dataclasses(
    method parse_json_file (line 146) | def parse_json_file(self, json_file: str) -> Tuple[DataClass, ...]:

FILE: code/bert-base-count5/pretrain/transformers1/modelcard.py
  class ModelCard (line 38) | class ModelCard:
    method __init__ (line 55) | def __init__(self, **kwargs):
    method save_pretrained (line 75) | def save_pretrained(self, save_directory_or_file):
    method from_pretrained (line 88) | def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
    method from_dict (line 186) | def from_dict(cls, json_object):
    method from_json_file (line 191) | def from_json_file(cls, json_file):
    method __eq__ (line 198) | def __eq__(self, other):
    method __repr__ (line 201) | def __repr__(self):
    method to_dict (line 204) | def to_dict(self):
    method to_json_string (line 209) | def to_json_string(self):
    method to_json_file (line 213) | def to_json_file(self, json_file_path):

FILE: code/bert-base-count5/pretrain/transformers1/modeling_albert.py
  function load_tf_weights_in_albert (line 47) | def load_tf_weights_in_albert(model, config, tf_checkpoint_path):
  class AlbertEmbeddings (line 171) | class AlbertEmbeddings(BertEmbeddings):
    method __init__ (line 176) | def __init__(self, config):
  class AlbertAttention (line 185) | class AlbertAttention(BertSelfAttention):
    method __init__ (line 186) | def __init__(self, config):
    method prune_heads (line 198) | def prune_heads(self, heads):
    method forward (line 221) | def forward(self, input_ids, attention_mask=None, head_mask=None):
  class AlbertLayer (line 266) | class AlbertLayer(nn.Module):
    method __init__ (line 267) | def __init__(self, config):
    method forward (line 277) | def forward(self, hidden_states, attention_mask=None, head_mask=None):
  class AlbertLayerGroup (line 287) | class AlbertLayerGroup(nn.Module):
    method __init__ (line 288) | def __init__(self, config):
    method forward (line 295) | def forward(self, hidden_states, attention_mask=None, head_mask=None):
  class AlbertTransformer (line 317) | class AlbertTransformer(nn.Module):
    method __init__ (line 318) | def __init__(self, config):
    method forward (line 327) | def forward(self, hidden_states, attention_mask=None, head_mask=None):
  class AlbertPreTrainedModel (line 363) | class AlbertPreTrainedModel(PreTrainedModel):
    method _init_weights (line 371) | def _init_weights(self, module):
  class AlbertModel (line 439) | class AlbertModel(AlbertPreTrainedModel):
    method __init__ (line 445) | def __init__(self, config):
    method get_input_embeddings (line 456) | def get_input_embeddings(self):
    method set_input_embeddings (line 459) | def set_input_embeddings(self, value):
    method _resize_token_embeddings (line 462) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 468) | def _prune_heads(self, heads_to_prune):
    method forward (line 487) | def forward(
  class AlbertForPreTraining (line 576) | class AlbertForPreTraining(AlbertPreTrainedModel):
    method __init__ (line 577) | def __init__(self, config):
    method tie_weights (line 587) | def tie_weights(self):
    method get_output_embeddings (line 590) | def get_output_embeddings(self):
    method forward (line 594) | def forward(
  class AlbertMLMHead (line 680) | class AlbertMLMHead(nn.Module):
    method __init__ (line 681) | def __init__(self, config):
    method forward (line 693) | def forward(self, hidden_states):
  class AlbertSOPHead (line 704) | class AlbertSOPHead(nn.Module):
    method __init__ (line 705) | def __init__(self, config):
    method forward (line 711) | def forward(self, pooled_output):
  class AlbertForMaskedLM (line 720) | class AlbertForMaskedLM(AlbertPreTrainedModel):
    method __init__ (line 721) | def __init__(self, config):
    method tie_weights (line 730) | def tie_weights(self):
    method get_output_embeddings (line 733) | def get_output_embeddings(self):
    method forward (line 737) | def forward(
  class AlbertForSequenceClassification (line 810) | class AlbertForSequenceClassification(AlbertPreTrainedModel):
    method __init__ (line 811) | def __init__(self, config):
    method forward (line 822) | def forward(
  class AlbertForTokenClassification (line 905) | class AlbertForTokenClassification(AlbertPreTrainedModel):
    method __init__ (line 906) | def __init__(self, config):
    method forward (line 917) | def forward(
  class AlbertForQuestionAnswering (line 1002) | class AlbertForQuestionAnswering(AlbertPreTrainedModel):
    method __init__ (line 1003) | def __init__(self, config):
    method forward (line 1013) | def forward(

FILE: code/bert-base-count5/pretrain/transformers1/modeling_auto.py
  class AutoModel (line 269) | class AutoModel:
    method __init__ (line 279) | def __init__(self):
    method from_config (line 287) | def from_config(cls, config):
    method from_pretrained (line 329) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class AutoModelForPreTraining (line 424) | class AutoModelForPreTraining:
    method __init__ (line 433) | def __init__(self):
    method from_config (line 441) | def from_config(cls, config):
    method from_pretrained (line 483) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class AutoModelWithLMHead (line 570) | class AutoModelWithLMHead:
    method __init__ (line 580) | def __init__(self):
    method from_config (line 588) | def from_config(cls, config):
    method from_pretrained (line 630) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class AutoModelForSequenceClassification (line 718) | class AutoModelForSequenceClassification:
    method __init__ (line 728) | def __init__(self):
    method from_config (line 736) | def from_config(cls, config):
    method from_pretrained (line 778) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class AutoModelForQuestionAnswering (line 867) | class AutoModelForQuestionAnswering:
    method __init__ (line 877) | def __init__(self):
    method from_config (line 885) | def from_config(cls, config):
    method from_pretrained (line 924) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class AutoModelForTokenClassification (line 1009) | class AutoModelForTokenClassification:
    method __init__ (line 1019) | def __init__(self):
    method from_config (line 1027) | def from_config(cls, config):
    method from_pretrained (line 1069) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class AutoModelForMultipleChoice (line 1156) | class AutoModelForMultipleChoice:
    method __init__ (line 1166) | def __init__(self):
    method from_config (line 1174) | def from_config(cls, config):
    method from_pretrained (line 1189) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...

FILE: code/bert-base-count5/pretrain/transformers1/modeling_bart.py
  function invert_mask (line 94) | def invert_mask(attention_mask):
  function _prepare_bart_decoder_inputs (line 99) | def _prepare_bart_decoder_inputs(
  class PretrainedBartModel (line 120) | class PretrainedBartModel(PreTrainedModel):
    method _init_weights (line 124) | def _init_weights(self, module):
    method dummy_inputs (line 138) | def dummy_inputs(self):
  function _make_linear_from_emb (line 148) | def _make_linear_from_emb(emb):
  function _check_shapes (line 156) | def _check_shapes(shape_1, shape2):
  function shift_tokens_right (line 161) | def shift_tokens_right(input_ids, pad_token_id):
  function make_padding_mask (line 170) | def make_padding_mask(input_ids, padding_idx=1):
  class EncoderLayer (line 181) | class EncoderLayer(nn.Module):
    method __init__ (line 182) | def __init__(self, config: BartConfig):
    method forward (line 198) | def forward(self, x, encoder_padding_mask):
  class BartEncoder (line 234) | class BartEncoder(nn.Module):
    method __init__ (line 243) | def __init__(self, config: BartConfig, embed_tokens):
    method forward (line 270) | def forward(
  class DecoderLayer (line 327) | class DecoderLayer(nn.Module):
    method __init__ (line 328) | def __init__(self, config: BartConfig):
    method forward (line 352) | def forward(
  class BartDecoder (line 416) | class BartDecoder(nn.Module):
    method __init__ (line 425) | def __init__(self, config: BartConfig, embed_tokens: nn.Embedding):
    method forward (line 449) | def forward(
  function _reorder_buffer (line 542) | def _reorder_buffer(attn_cache, new_order):
  class SelfAttention (line 549) | class SelfAttention(nn.Module):
    method __init__ (line 552) | def __init__(
    method _shape (line 575) | def _shape(self, tensor, dim_0, bsz):
    method forward (line 578) | def forward(
    method _use_saved_state (line 663) | def _use_saved_state(self, k, v, saved_state, key_padding_mask, static...
    method _cat_prev_key_padding_mask (line 691) | def _cat_prev_key_padding_mask(
  class BartClassificationHead (line 718) | class BartClassificationHead(nn.Module):
    method __init__ (line 723) | def __init__(
    method forward (line 731) | def forward(self, x):
  class LearnedPositionalEmbedding (line 740) | class LearnedPositionalEmbedding(nn.Embedding):
    method __init__ (line 748) | def __init__(
    method forward (line 757) | def forward(self, input, use_cache=False):
  function LayerNorm (line 767) | def LayerNorm(normalized_shape, eps=1e-5, elementwise_affine=True):
  function fill_with_neg_inf (line 778) | def fill_with_neg_inf(t):
  function _filter_out_falsey_values (line 783) | def _filter_out_falsey_values(tup) -> Tuple:
  function _get_shape (line 789) | def _get_shape(t):
  class BartModel (line 796) | class BartModel(PretrainedBartModel):
    method __init__ (line 797) | def __init__(self, config: BartConfig):
    method forward (line 811) | def forward(
    method get_input_embeddings (line 854) | def get_input_embeddings(self):
    method set_input_embeddings (line 857) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 862) | def get_output_embeddings(self):
  class BartForConditionalGeneration (line 870) | class BartForConditionalGeneration(PretrainedBartModel):
    method __init__ (line 873) | def __init__(self, config: BartConfig):
    method resize_token_embeddings (line 879) | def resize_token_embeddings(self, new_num_tokens: int) -> nn.Embedding:
    method _resize_final_logits_bias (line 886) | def _resize_final_logits_bias(self, new_num_tokens: int, old_num_token...
    method forward (line 895) | def forward(
    method prepare_inputs_for_generation (line 967) | def prepare_inputs_for_generation(self, decoder_input_ids, past, atten...
    method prepare_logits_for_generation (line 984) | def prepare_logits_for_generation(self, logits, cur_len, max_length):
    method _force_token_ids_generation (line 991) | def _force_token_ids_generation(self, scores, token_ids) -> None:
    method _reorder_cache (line 1004) | def _reorder_cache(past, beam_idx):
    method get_encoder (line 1020) | def get_encoder(self):
    method get_output_embeddings (line 1023) | def get_output_embeddings(self):
  class BartForSequenceClassification (line 1031) | class BartForSequenceClassification(PretrainedBartModel):
    method __init__ (line 1032) | def __init__(self, config: BartConfig, **kwargs):
    method forward (line 1042) | def forward(
  class SinusoidalPositionalEmbedding (line 1109) | class SinusoidalPositionalEmbedding(nn.Embedding):
    method __init__ (line 1112) | def __init__(self, num_positions, embedding_dim, padding_idx=None):
    method _init_weight (line 1119) | def _init_weight(out: nn.Parameter):
    method forward (line 1134) | def forward(self, input_ids, use_cache=False):

FILE: code/bert-base-count5/pretrain/transformers1/modeling_beam_search.py
  class TransformerBeamSearch (line 29) | class TransformerBeamSearch(nn.Module):
    method __init__ (line 30) | def __init__(
    method step (line 80) | def step(self, log_probabilities):
    method forward (line 177) | def forward(self, encoder_input_ids, **kwargs):
    method remove_repeating_trigrams (line 224) | def remove_repeating_trigrams(self, log_probabilities, _B):
    method enforce_min_length (line 233) | def enforce_min_length(self):
    method enforce_max_length (line 237) | def enforce_max_length(self):
    method length_penalty (line 241) | def length_penalty(self):
  function tile (line 245) | def tile(x, count, dim=0):

FILE: code/bert-base-count5/pretrain/transformers1/modeling_bert.py
  function load_tf_weights_in_bert (line 62) | def load_tf_weights_in_bert(model, config, tf_checkpoint_path):
  function mish (line 134) | def mish(x):
  class BertEmbeddings (line 144) | class BertEmbeddings(nn.Module):
    method __init__ (line 148) | def __init__(self, config):
    method forward (line 159) | def forward(self, input_ids=None, token_type_ids=None, position_ids=No...
  class BertSelfAttention (line 184) | class BertSelfAttention(nn.Module):
    method __init__ (line 185) | def __init__(self, config):
    method transpose_for_scores (line 204) | def transpose_for_scores(self, x):
    method forward (line 209) | def forward(
  class BertSelfOutput (line 262) | class BertSelfOutput(nn.Module):
    method __init__ (line 263) | def __init__(self, config):
    method forward (line 269) | def forward(self, hidden_states, input_tensor):
  class BertAttention (line 276) | class BertAttention(nn.Module):
    method __init__ (line 277) | def __init__(self, config):
    method prune_heads (line 283) | def prune_heads(self, heads):
    method forward (line 306) | def forward(
  class BertIntermediate (line 322) | class BertIntermediate(nn.Module):
    method __init__ (line 323) | def __init__(self, config):
    method forward (line 331) | def forward(self, hidden_states):
  class BertOutput (line 337) | class BertOutput(nn.Module):
    method __init__ (line 338) | def __init__(self, config):
    method forward (line 344) | def forward(self, hidden_states, input_tensor):
  class BertLayer (line 351) | class BertLayer(nn.Module):
    method __init__ (line 352) | def __init__(self, config):
    method forward (line 361) | def forward(
  class BertEncoder (line 386) | class BertEncoder(nn.Module):
    method __init__ (line 387) | def __init__(self, config):
    method forward (line 393) | def forward(
  class BertPooler (line 427) | class BertPooler(nn.Module):
    method __init__ (line 428) | def __init__(self, config):
    method forward (line 433) | def forward(self, hidden_states):
  class BertPredictionHeadTransform (line 442) | class BertPredictionHeadTransform(nn.Module):
    method __init__ (line 443) | def __init__(self, config):
    method forward (line 452) | def forward(self, hidden_states):
  class BertLMPredictionHead (line 459) | class BertLMPredictionHead(nn.Module):
    method __init__ (line 460) | def __init__(self, config):
    method forward (line 473) | def forward(self, hidden_states):
  class BertOnlyMLMHead (line 479) | class BertOnlyMLMHead(nn.Module):
    method __init__ (line 480) | def __init__(self, config):
    method forward (line 484) | def forward(self, sequence_output):
  class BertOnlyNSPHead (line 489) | class BertOnlyNSPHead(nn.Module):
    method __init__ (line 490) | def __init__(self, config):
    method forward (line 494) | def forward(self, pooled_output):
  class BertPreTrainingHeads (line 499) | class BertPreTrainingHeads(nn.Module):
    method __init__ (line 500) | def __init__(self, config):
    method forward (line 505) | def forward(self, sequence_output, pooled_output):
  class BertPreTrainedModel (line 511) | class BertPreTrainedModel(PreTrainedModel):
    method _init_weights (line 520) | def _init_weights(self, module):
  class BertModel (line 594) | class BertModel(BertPreTrainedModel):
    method __init__ (line 611) | def __init__(self, config):
    method get_input_embeddings (line 621) | def get_input_embeddings(self):
    method set_input_embeddings (line 624) | def set_input_embeddings(self, value):
    method _prune_heads (line 627) | def _prune_heads(self, heads_to_prune):
    method forward (line 636) | def forward(
  class BertForPreTraining (line 750) | class BertForPreTraining(BertPreTrainedModel):
    method __init__ (line 751) | def __init__(self, config):
    method get_output_embeddings (line 759) | def get_output_embeddings(self):
    method forward (line 763) | def forward(
  class BertForMaskedLM (line 850) | class BertForMaskedLM(BertPreTrainedModel):
    method __init__ (line 851) | def __init__(self, config):
    method get_output_embeddings (line 859) | def get_output_embeddings(self):
    method forward (line 863) | def forward(
    method prepare_inputs_for_generation (line 960) | def prepare_inputs_for_generation(self, input_ids, attention_mask=None...
  class BertForNextSentencePrediction (line 986) | class BertForNextSentencePrediction(BertPreTrainedModel):
    method __init__ (line 987) | def __init__(self, config):
    method forward (line 996) | def forward(
  class BertForSequenceClassification (line 1074) | class BertForSequenceClassification(BertPreTrainedModel):
    method __init__ (line 1075) | def __init__(self, config):
    method forward (line 1086) | def forward(
  class BertForMultipleChoice (line 1171) | class BertForMultipleChoice(BertPreTrainedModel):
    method __init__ (line 1172) | def __init__(self, config):
    method forward (line 1182) | def forward(
  class BertForTokenClassification (line 1274) | class BertForTokenClassification(BertPreTrainedModel):
    method __init__ (line 1275) | def __init__(self, config):
    method forward (line 1286) | def forward(
  class BertForQuestionAnswering (line 1372) | class BertForQuestionAnswering(BertPreTrainedModel):
    method __init__ (line 1373) | def __init__(self, config):
    method forward (line 1383) | def forward(

FILE: code/bert-base-count5/pretrain/transformers1/modeling_camembert.py
  class CamembertModel (line 59) | class CamembertModel(RobertaModel):
  class CamembertForMaskedLM (line 71) | class CamembertForMaskedLM(RobertaForMaskedLM):
  class CamembertForSequenceClassification (line 85) | class CamembertForSequenceClassification(RobertaForSequenceClassification):
  class CamembertForMultipleChoice (line 99) | class CamembertForMultipleChoice(RobertaForMultipleChoice):
  class CamembertForTokenClassification (line 113) | class CamembertForTokenClassification(RobertaForTokenClassification):
  class CamembertForQuestionAnswering (line 127) | class CamembertForQuestionAnswering(RobertaForQuestionAnswering):

FILE: code/bert-base-count5/pretrain/transformers1/modeling_ctrl.py
  function angle_defn (line 39) | def angle_defn(pos, i, d_model_size):
  function positional_encoding (line 44) | def positional_encoding(position, d_model_size, dtype):
  function scaled_dot_product_attention (line 59) | def scaled_dot_product_attention(q, k, v, mask, attention_mask=None, hea...
  class MultiHeadAttention (line 85) | class MultiHeadAttention(torch.nn.Module):
    method __init__ (line 86) | def __init__(self, d_model_size, num_heads, output_attentions=False):
    method split_into_heads (line 100) | def split_into_heads(self, x, batch_size):
    method forward (line 104) | def forward(self, v, k, q, mask, layer_past=None, attention_mask=None,...
  function point_wise_feed_forward_network (line 136) | def point_wise_feed_forward_network(d_model_size, dff):
  class EncoderLayer (line 140) | class EncoderLayer(torch.nn.Module):
    method __init__ (line 141) | def __init__(self, d_model_size, num_heads, dff, rate=0.1, output_atte...
    method forward (line 153) | def forward(self, x, mask, layer_past=None, attention_mask=None, head_...
  class CTRLPreTrainedModel (line 178) | class CTRLPreTrainedModel(PreTrainedModel):
    method _init_weights (line 186) | def _init_weights(self, module):
  class CTRLModel (line 263) | class CTRLModel(CTRLPreTrainedModel):
    method __init__ (line 264) | def __init__(self, config):
    method get_input_embeddings (line 287) | def get_input_embeddings(self):
    method set_input_embeddings (line 290) | def set_input_embeddings(self, new_embeddings):
    method _prune_heads (line 293) | def _prune_heads(self, heads_to_prune):
    method forward (line 301) | def forward(
  class CTRLLMHeadModel (line 458) | class CTRLLMHeadModel(CTRLPreTrainedModel):
    method __init__ (line 459) | def __init__(self, config):
    method get_output_embeddings (line 466) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 469) | def prepare_inputs_for_generation(self, input_ids, past, **kwargs):
    method forward (line 477) | def forward(

FILE: code/bert-base-count5/pretrain/transformers1/modeling_distilbert.py
  function create_sinusoidal_embeddings (line 54) | def create_sinusoidal_embeddings(n_pos, dim, out):
  class Embeddings (line 62) | class Embeddings(nn.Module):
    method __init__ (line 63) | def __init__(self, config):
    method forward (line 75) | def forward(self, input_ids):
  class MultiHeadSelfAttention (line 100) | class MultiHeadSelfAttention(nn.Module):
    method __init__ (line 101) | def __init__(self, config):
    method prune_heads (line 118) | def prune_heads(self, heads):
    method forward (line 139) | def forward(self, query, key, value, mask, head_mask=None):
  class FFN (line 198) | class FFN(nn.Module):
    method __init__ (line 199) | def __init__(self, config):
    method forward (line 209) | def forward(self, input):
  class TransformerBlock (line 217) | class TransformerBlock(nn.Module):
    method __init__ (line 218) | def __init__(self, config):
    method forward (line 231) | def forward(self, x, attn_mask=None, head_mask=None):
  class Transformer (line 264) | class Transformer(nn.Module):
    method __init__ (line 265) | def __init__(self, config):
    method forward (line 274) | def forward(self, x, attn_mask=None, head_mask=None):
  class DistilBertPreTrainedModel (line 325) | class DistilBertPreTrainedModel(PreTrainedModel):
    method _init_weights (line 334) | def _init_weights(self, module):
  class DistilBertModel (line 392) | class DistilBertModel(DistilBertPreTrainedModel):
    method __init__ (line 393) | def __init__(self, config):
    method get_input_embeddings (line 401) | def get_input_embeddings(self):
    method set_input_embeddings (line 404) | def set_input_embeddings(self, new_embeddings):
    method _prune_heads (line 407) | def _prune_heads(self, heads_to_prune):
    method forward (line 416) | def forward(self, input_ids=None, attention_mask=None, head_mask=None,...
  class DistilBertForMaskedLM (line 477) | class DistilBertForMaskedLM(DistilBertPreTrainedModel):
    method __init__ (line 478) | def __init__(self, config):
    method get_output_embeddings (line 492) | def get_output_embeddings(self):
    method forward (line 496) | def forward(self, input_ids=None, attention_mask=None, head_mask=None,...
  class DistilBertForSequenceClassification (line 558) | class DistilBertForSequenceClassification(DistilBertPreTrainedModel):
    method __init__ (line 559) | def __init__(self, config):
    method forward (line 571) | def forward(self, input_ids=None, attention_mask=None, head_mask=None,...
  class DistilBertForQuestionAnswering (line 638) | class DistilBertForQuestionAnswering(DistilBertPreTrainedModel):
    method __init__ (line 639) | def __init__(self, config):
    method forward (line 650) | def forward(
  class DistilBertForTokenClassification (line 740) | class DistilBertForTokenClassification(DistilBertPreTrainedModel):
    method __init__ (line 741) | def __init__(self, config):
    method forward (line 752) | def forward(self, input_ids=None, attention_mask=None, head_mask=None,...

FILE: code/bert-base-count5/pretrain/transformers1/modeling_electra.py
  function load_tf_weights_in_electra (line 28) | def load_tf_weights_in_electra(model, config, tf_checkpoint_path, discri...
  class ElectraEmbeddings (line 109) | class ElectraEmbeddings(BertEmbeddings):
    method __init__ (line 112) | def __init__(self, config):
  class ElectraDiscriminatorPredictions (line 123) | class ElectraDiscriminatorPredictions(nn.Module):
    method __init__ (line 126) | def __init__(self, config):
    method forward (line 133) | def forward(self, discriminator_hidden_states, attention_mask):
  class ElectraGeneratorPredictions (line 141) | class ElectraGeneratorPredictions(nn.Module):
    method __init__ (line 144) | def __init__(self, config):
    method forward (line 150) | def forward(self, generator_hidden_states):
  class ElectraPreTrainedModel (line 158) | class ElectraPreTrainedModel(BertPreTrainedModel):
  class ElectraModel (line 233) | class ElectraModel(ElectraPreTrainedModel):
    method __init__ (line 237) | def __init__(self, config):
    method get_input_embeddings (line 248) | def get_input_embeddings(self):
    method set_input_embeddings (line 251) | def set_input_embeddings(self, value):
    method _prune_heads (line 254) | def _prune_heads(self, heads_to_prune):
    method forward (line 263) | def forward(
  class ElectraClassificationHead (line 334) | class ElectraClassificationHead(nn.Module):
    method __init__ (line 337) | def __init__(self, config):
    method forward (line 343) | def forward(self, features, **kwargs):
  class ElectraForSequenceClassification (line 358) | class ElectraForSequenceClassification(ElectraPreTrainedModel):
    method __init__ (line 359) | def __init__(self, config):
    method forward (line 368) | def forward(
  class ElectraForPreTraining (line 448) | class ElectraForPreTraining(ElectraPreTrainedModel):
    method __init__ (line 449) | def __init__(self, config):
    method forward (line 457) | def forward(
  class ElectraForMaskedLM (line 542) | class ElectraForMaskedLM(ElectraPreTrainedModel):
    method __init__ (line 543) | def __init__(self, config):
    method get_output_embeddings (line 552) | def get_output_embeddings(self):
    method forward (line 556) | def forward(
  class ElectraForTokenClassification (line 634) | class ElectraForTokenClassification(ElectraPreTrainedModel):
    method __init__ (line 635) | def __init__(self, config):
    method forward (line 644) | def forward(

FILE: code/bert-base-count5/pretrain/transformers1/modeling_encoder_decoder.py
  class EncoderDecoderModel (line 29) | class EncoderDecoderModel(PreTrainedModel):
    method __init__ (line 40) | def __init__(
    method tie_weights (line 74) | def tie_weights(self):
    method get_encoder (line 78) | def get_encoder(self):
    method get_decoder (line 81) | def get_decoder(self):
    method get_input_embeddings (line 84) | def get_input_embeddings(self):
    method get_output_embeddings (line 87) | def get_output_embeddings(self):
    method from_encoder_decoder_pretrained (line 91) | def from_encoder_decoder_pretrained(
    method forward (line 183) | def forward(
    method prepare_inputs_for_generation (line 303) | def prepare_inputs_for_generation(self, input_ids, past, attention_mas...
    method _reorder_cache (line 321) | def _reorder_cache(self, past, beam_idx):

FILE: code/bert-base-count5/pretrain/transformers1/modeling_flaubert.py
  class FlaubertModel (line 110) | class FlaubertModel(XLMModel):
    method __init__ (line 114) | def __init__(self, config):  # , dico, is_encoder, with_output):
    method forward (line 120) | def forward(
  class FlaubertWithLMHeadModel (line 300) | class FlaubertWithLMHeadModel(XLMWithLMHeadModel):
    method __init__ (line 308) | def __init__(self, config):
  class FlaubertForSequenceClassification (line 319) | class FlaubertForSequenceClassification(XLMForSequenceClassification):
    method __init__ (line 327) | def __init__(self, config):
  class FlaubertForQuestionAnsweringSimple (line 338) | class FlaubertForQuestionAnsweringSimple(XLMForQuestionAnsweringSimple):
    method __init__ (line 346) | def __init__(self, config):
  class FlaubertForQuestionAnswering (line 357) | class FlaubertForQuestionAnswering(XLMForQuestionAnswering):
    method __init__ (line 365) | def __init__(self, config):

FILE: code/bert-base-count5/pretrain/transformers1/modeling_gpt2.py
  function load_tf_weights_in_gpt2 (line 44) | def load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path):
  class Attention (line 99) | class Attention(nn.Module):
    method __init__ (line 100) | def __init__(self, nx, n_ctx, config, scale=False):
    method prune_heads (line 121) | def prune_heads(self, heads):
    method _attn (line 143) | def _attn(self, q, k, v, attention_mask=None, head_mask=None):
    method merge_heads (line 167) | def merge_heads(self, x):
    method split_heads (line 172) | def split_heads(self, x, k=False):
    method forward (line 180) | def forward(self, x, layer_past=None, attention_mask=None, head_mask=N...
  class MLP (line 207) | class MLP(nn.Module):
    method __init__ (line 208) | def __init__(self, n_state, config):  # in MLP: n_state=3072 (4 * n_embd)
    method forward (line 216) | def forward(self, x):
  class Block (line 222) | class Block(nn.Module):
    method __init__ (line 223) | def __init__(self, n_ctx, config, scale=False):
    method forward (line 231) | def forward(self, x, layer_past=None, attention_mask=None, head_mask=N...
  class GPT2PreTrainedModel (line 249) | class GPT2PreTrainedModel(PreTrainedModel):
    method __init__ (line 258) | def __init__(self, *inputs, **kwargs):
    method _init_weights (line 261) | def _init_weights(self, module):
  class GPT2Model (line 339) | class GPT2Model(GPT2PreTrainedModel):
    method __init__ (line 340) | def __init__(self, config):
    method get_input_embeddings (line 353) | def get_input_embeddings(self):
    method set_input_embeddings (line 356) | def set_input_embeddings(self, new_embeddings):
    method _prune_heads (line 359) | def _prune_heads(self, heads_to_prune):
    method forward (line 367) | def forward(
  class GPT2LMHeadModel (line 523) | class GPT2LMHeadModel(GPT2PreTrainedModel):
    method __init__ (line 524) | def __init__(self, config):
    method get_output_embeddings (line 531) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 534) | def prepare_inputs_for_generation(self, input_ids, past, **kwargs):
    method forward (line 542) | def forward(
  class GPT2DoubleHeadsModel (line 631) | class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
    method __init__ (line 632) | def __init__(self, config):
    method get_output_embeddings (line 641) | def get_output_embeddings(self):
    method forward (line 645) | def forward(

FILE: code/bert-base-count5/pretrain/transformers1/modeling_longformer.py
  function _get_question_end_index (line 43) | def _get_question_end_index(input_ids, sep_token_id):
  function _compute_global_attention_mask (line 59) | def _compute_global_attention_mask(input_ids, sep_token_id, before_sep_t...
  class LongformerSelfAttention (line 81) | class LongformerSelfAttention(nn.Module):
    method __init__ (line 82) | def __init__(self, config, layer_id):
    method _skew (line 117) | def _skew(x, direction):
    method _skew2 (line 124) | def _skew2(x):
    method _chunk (line 136) | def _chunk(x, w):
    method _mask_invalid_locations (line 150) | def _mask_invalid_locations(self, input_tensor, w) -> torch.Tensor:
    method _sliding_chunks_matmul_qk (line 163) | def _sliding_chunks_matmul_qk(self, q: torch.Tensor, k: torch.Tensor, ...
    method _sliding_chunks_matmul_pv (line 210) | def _sliding_chunks_matmul_pv(self, prob: torch.Tensor, v: torch.Tenso...
    method forward (line 238) | def forward(
  class LongformerModel (line 498) | class LongformerModel(RobertaModel):
    method __init__ (line 519) | def __init__(self, config):
    method _pad_to_window_size (line 538) | def _pad_to_window_size(
    method forward (line 582) | def forward(
  class LongformerForMaskedLM (line 686) | class LongformerForMaskedLM(BertPreTrainedModel):
    method __init__ (line 690) | def __init__(self, config):
    method forward (line 699) | def forward(
  class LongformerForSequenceClassification (line 776) | class LongformerForSequenceClassification(BertPreTrainedModel):
    method __init__ (line 780) | def __init__(self, config):
    method forward (line 788) | def forward(
  class LongformerClassificationHead (line 868) | class LongformerClassificationHead(nn.Module):
    method __init__ (line 871) | def __init__(self, config):
    method forward (line 877) | def forward(self, hidden_states, **kwargs):
  class LongformerForQuestionAnswering (line 892) | class LongformerForQuestionAnswering(BertPreTrainedModel):
    method __init__ (line 896) | def __init__(self, config):
    method forward (line 906) | def forward(
  class LongformerForTokenClassification (line 1016) | class LongformerForTokenClassification(BertPreTrainedModel):
    method __init__ (line 1020) | def __init__(self, config):
    method forward (line 1031) | def forward(
  class LongformerForMultipleChoice (line 1116) | class LongformerForMultipleChoice(BertPreTrainedModel):
    method __init__ (line 1120) | def __init__(self, config):
    method forward (line 1130) | def forward(

FILE: code/bert-base-count5/pretrain/transformers1/modeling_marian.py
  class MarianMTModel (line 26) | class MarianMTModel(BartForConditionalGeneration):
    method prepare_logits_for_generation (line 49) | def prepare_logits_for_generation(self, logits, cur_len, max_length):

FILE: code/bert-base-count5/pretrain/transformers1/modeling_mmbt.py
  class ModalEmbeddings (line 32) | class ModalEmbeddings(nn.Module):
    method __init__ (line 36) | def __init__(self, config, encoder, embeddings):
    method forward (line 47) | def forward(self, input_modal, start_token=None, end_token=None, posit...
  class MMBTModel (line 152) | class MMBTModel(nn.Module, ModuleUtilsMixin):
    method __init__ (line 180) | def __init__(self, config, transformer, encoder):
    method forward (line 186) | def forward(
    method get_input_embeddings (line 268) | def get_input_embeddings(self):
    method set_input_embeddings (line 271) | def set_input_embeddings(self, value):
  class MMBTForClassification (line 281) | class MMBTForClassification(nn.Module):
    method __init__ (line 312) | def __init__(self, config, transformer, encoder):
    method forward (line 320) | def forward(

FILE: code/bert-base-count5/pretrain/transformers1/modeling_openai.py
  function load_tf_weights_in_openai_gpt (line 42) | def load_tf_weights_in_openai_gpt(model, config, openai_checkpoint_folde...
  class Attention (line 122) | class Attention(nn.Module):
    method __init__ (line 123) | def __init__(self, nx, n_ctx, config, scale=False):
    method prune_heads (line 141) | def prune_heads(self, heads):
    method _attn (line 160) | def _attn(self, q, k, v, attention_mask=None, head_mask=None):
    method merge_heads (line 185) | def merge_heads(self, x):
    method split_heads (line 190) | def split_heads(self, x, k=False):
    method forward (line 198) | def forward(self, x, attention_mask=None, head_mask=None):
  class MLP (line 216) | class MLP(nn.Module):
    method __init__ (line 217) | def __init__(self, n_state, config):  # in MLP: n_state=3072 (4 * n_embd)
    method forward (line 225) | def forward(self, x):
  class Block (line 231) | class Block(nn.Module):
    method __init__ (line 232) | def __init__(self, n_ctx, config, scale=False):
    method forward (line 240) | def forward(self, x, attention_mask=None, head_mask=None):
  class OpenAIGPTPreTrainedModel (line 252) | class OpenAIGPTPreTrainedModel(PreTrainedModel):
    method _init_weights (line 261) | def _init_weights(self, module):
  class OpenAIGPTModel (line 329) | class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
    method __init__ (line 330) | def __init__(self, config):
    method get_input_embeddings (line 342) | def get_input_embeddings(self):
    method set_input_embeddings (line 345) | def set_input_embeddings(self, new_embeddings):
    method _prune_heads (line 348) | def _prune_heads(self, heads_to_prune):
    method forward (line 356) | def forward(
  class OpenAIGPTLMHeadModel (line 471) | class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel):
    method __init__ (line 472) | def __init__(self, config):
    method get_output_embeddings (line 479) | def get_output_embeddings(self):
    method forward (line 483) | def forward(
  class OpenAIGPTDoubleHeadsModel (line 567) | class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
    method __init__ (line 568) | def __init__(self, config):
    method get_output_embeddings (line 578) | def get_output_embeddings(self):
    method forward (line 582) | def forward(

FILE: code/bert-base-count5/pretrain/transformers1/modeling_reformer.py
  function mish (line 45) | def mish(x):
  function _get_least_common_mult_chunk_len (line 70) | def _get_least_common_mult_chunk_len(config):
  class AxialPositionEmbeddings (line 87) | class AxialPositionEmbeddings(nn.Module):
    method __init__ (line 92) | def __init__(self, config):
    method forward (line 117) | def forward(self, position_ids):
  class PositionEmbeddings (line 166) | class PositionEmbeddings(nn.Module):
    method __init__ (line 170) | def __init__(self, config):
    method forward (line 175) | def forward(self, position_ids):
  class ReformerEmbeddings (line 181) | class ReformerEmbeddings(nn.Module):
    method __init__ (line 185) | def __init__(self, config):
    method forward (line 195) | def forward(self, input_ids=None, position_ids=None, inputs_embeds=None):
  class EfficientAttentionMixin (line 226) | class EfficientAttentionMixin:
    method _look_adjacent (line 231) | def _look_adjacent(self, vectors, num_chunks_before, num_chunks_after):
    method _split_hidden_size_dim (line 254) | def _split_hidden_size_dim(self, x, num_attn_heads, attn_head_size):
    method _merge_hidden_size_dims (line 262) | def _merge_hidden_size_dims(self, x, num_attn_heads, attn_head_size):
    method _split_seq_length_dim_to (line 269) | def _split_seq_length_dim_to(self, vectors, dim_factor_1, dim_factor_2...
  class LSHSelfAttention (line 284) | class LSHSelfAttention(nn.Module, EfficientAttentionMixin):
    method __init__ (line 285) | def __init__(self, config):
    method forward (line 315) | def forward(
    method _hash_vectors (line 441) | def _hash_vectors(self, vectors, num_hashes):
    method _get_sorted_bucket_idx_and_undo_sorted_bucket_idx (line 506) | def _get_sorted_bucket_idx_and_undo_sorted_bucket_idx(self, sequence_l...
    method _set_num_buckets (line 537) | def _set_num_buckets(self, sequence_length):
    method _attend (line 556) | def _attend(
    method _compute_attn_mask (line 635) | def _compute_attn_mask(self, query_indices, key_indices, attention_mask):
    method _len_and_dim_norm (line 663) | def _len_and_dim_norm(self, vectors):
    method _len_norm (line 673) | def _len_norm(self, x, epsilon=1e-6):
    method _gather_by_expansion (line 681) | def _gather_by_expansion(self, vectors, idxs, num_hashes):
  class ReverseSort (line 690) | class ReverseSort(Function):
    method forward (line 700) | def forward(ctx, out_vectors, logits, sorted_bucket_idx, undo_sorted_b...
    method backward (line 713) | def backward(ctx, grad_out_vectors, grad_logits):
  class LocalSelfAttention (line 747) | class LocalSelfAttention(nn.Module, EfficientAttentionMixin):
    method __init__ (line 748) | def __init__(self, config):
    method forward (line 773) | def forward(self, hidden_states, attention_mask=None, head_mask=None, ...
    method _compute_attn_mask (line 888) | def _compute_attn_mask(self, query_indices, key_indices, attention_mas...
  class ReformerSelfOutput (line 913) | class ReformerSelfOutput(nn.Module):
    method __init__ (line 914) | def __init__(self, config):
    method forward (line 921) | def forward(self, hidden_states):
  class ReformerAttention (line 927) | class ReformerAttention(nn.Module):
    method __init__ (line 928) | def __init__(self, config, layer_id=0):
    method forward (line 953) | def forward(
  class ReformerFeedForwardDense (line 986) | class ReformerFeedForwardDense(nn.Module):
    method __init__ (line 987) | def __init__(self, config):
    method forward (line 998) | def forward(self, hidden_states):
  class ReformerFeedForwardOutput (line 1005) | class ReformerFeedForwardOutput(nn.Module):
    method __init__ (line 1006) | def __init__(self, config):
    method forward (line 1012) | def forward(self, hidden_states):
  class ChunkReformerFeedForward (line 1018) | class ChunkReformerFeedForward(nn.Module):
    method __init__ (line 1019) | def __init__(self, config):
    method forward (line 1028) | def forward(self, attention_output):
    method forward_chunk (line 1033) | def forward_chunk(self, hidden_states):
  class ReformerLayer (line 1039) | class ReformerLayer(nn.Module):
    method __init__ (line 1040) | def __init__(self, config, layer_id=0):
    method _init_attention_seed (line 1050) | def _init_attention_seed(self):
    method _init_feed_forward_seed (line 1070) | def _init_feed_forward_seed(self):
    method forward (line 1090) | def forward(
    method backward_pass (line 1134) | def backward_pass(
  class _ReversibleFunction (line 1195) | class _ReversibleFunction(Function):
    method forward (line 1205) | def forward(
    method backward (line 1256) | def backward(ctx, grad_hidden_states):
  class ReformerEncoder (line 1302) | class ReformerEncoder(nn.Module):
    method __init__ (line 1303) | def __init__(self, config):
    method forward (line 1312) | def forward(
  class ReformerOnlyLMHead (line 1350) | class ReformerOnlyLMHead(nn.Module):
    method __init__ (line 1351) | def __init__(self, config):
    method forward (line 1363) | def forward(self, hidden_states):
    method forward_chunk (line 1366) | def forward_chunk(self, hidden_states):
  class ReformerPreTrainedModel (line 1371) | class ReformerPreTrainedModel(PreTrainedModel):
    method dummy_inputs (line 1380) | def dummy_inputs(self):
    method _init_weights (line 1389) | def _init_weights(self, module):
  class ReformerModel (line 1470) | class ReformerModel(ReformerPreTrainedModel):
    method __init__ (line 1471) | def __init__(self, config):
    method get_input_embeddings (line 1483) | def get_input_embeddings(self):
    method set_input_embeddings (line 1486) | def set_input_embeddings(self, value):
    method _prune_heads (line 1489) | def _prune_heads(self, heads_to_prune):
    method forward (line 1498) | def forward(
    method _pad_to_mult_of_chunk_length (line 1615) | def _pad_to_mult_of_chunk_length(
  class ReformerModelWithLMHead (line 1674) | class ReformerModelWithLMHead(ReformerPreTrainedModel):
    method __init__ (line 1675) | def __init__(self, config):
    method get_output_embeddings (line 1682) | def get_output_embeddings(self):
    method tie_weights (line 1685) | def tie_weights(self):
    method forward (line 1690) | def forward(
    method prepare_inputs_for_generation (line 1766) | def prepare_inputs_for_generation(self, input_ids, past, **kwargs):

FILE: code/bert-base-count5/pretrain/transformers1/modeling_roberta.py
  class RobertaEmbeddings (line 44) | class RobertaEmbeddings(BertEmbeddings):
    method __init__ (line 49) | def __init__(self, config):
    method forward (line 57) | def forward(self, input_ids=None, token_type_ids=None, position_ids=No...
    method create_position_ids_from_inputs_embeds (line 69) | def create_position_ids_from_inputs_embeds(self, inputs_embeds):
  class RobertaModel (line 139) | class RobertaModel(BertModel):
    method __init__ (line 148) | def __init__(self, config):
    method get_input_embeddings (line 154) | def get_input_embeddings(self):
    method set_input_embeddings (line 157) | def set_input_embeddings(self, value):
  class RobertaForMaskedLM (line 162) | class RobertaForMaskedLM(BertPreTrainedModel):
    method __init__ (line 166) | def __init__(self, config):
    method get_output_embeddings (line 174) | def get_output_embeddings(self):
    method forward (line 178) | def forward(
  class RobertaLMHead (line 246) | class RobertaLMHead(nn.Module):
    method __init__ (line 249) | def __init__(self, config):
    method forward (line 260) | def forward(self, features, **kwargs):
  class RobertaForSequenceClassification (line 276) | class RobertaForSequenceClassification(BertPreTrainedModel):
    method __init__ (line 280) | def __init__(self, config):
    method forward (line 288) | def forward(
  class RobertaForMultipleChoice (line 366) | class RobertaForMultipleChoice(BertPreTrainedModel):
    method __init__ (line 370) | def __init__(self, config):
    method forward (line 380) | def forward(
  class RobertaForTokenClassification (line 464) | class RobertaForTokenClassification(BertPreTrainedModel):
    method __init__ (line 468) | def __init__(self, config):
    method forward (line 479) | def forward(
  class RobertaClassificationHead (line 559) | class RobertaClassificationHead(nn.Module):
    method __init__ (line 562) | def __init__(self, config):
    method forward (line 568) | def forward(self, features, **kwargs):
  class RobertaForQuestionAnswering (line 583) | class RobertaForQuestionAnswering(BertPreTrainedModel):
    method __init__ (line 587) | def __init__(self, config):
    method forward (line 597) | def forward(

FILE: code/bert-base-count5/pretrain/transformers1/modeling_t5.py
  function load_tf_weights_in_t5 (line 53) | def load_tf_weights_in_t5(model, config, tf_checkpoint_path):
  class T5LayerNorm (line 143) | class T5LayerNorm(nn.Module):
    method __init__ (line 144) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 152) | def forward(self, x):
  class T5DenseReluDense (line 162) | class T5DenseReluDense(nn.Module):
    method __init__ (line 163) | def __init__(self, config):
    method forward (line 169) | def forward(self, hidden_states):
  class T5LayerFF (line 177) | class T5LayerFF(nn.Module):
    method __init__ (line 178) | def __init__(self, config):
    method forward (line 184) | def forward(self, hidden_states):
  class T5Attention (line 191) | class T5Attention(nn.Module):
    method __init__ (line 192) | def __init__(self, config: T5Config, has_relative_attention_bias=False):
    method prune_heads (line 215) | def prune_heads(self, heads):
    method _relative_position_bucket (line 236) | def _relative_position_bucket(relative_position, bidirectional=True, n...
    method compute_bias (line 283) | def compute_bias(self, qlen, klen):
    method forward (line 298) | def forward(
  class T5LayerSelfAttention (line 401) | class T5LayerSelfAttention(nn.Module):
    method __init__ (line 402) | def __init__(self, config, has_relative_attention_bias=False):
    method forward (line 408) | def forward(
  class T5LayerCrossAttention (line 432) | class T5LayerCrossAttention(nn.Module):
    method __init__ (line 433) | def __init__(self, config, has_relative_attention_bias=False):
    method forward (line 439) | def forward(
  class T5Block (line 467) | class T5Block(nn.Module):
    method __init__ (line 468) | def __init__(self, config, has_relative_attention_bias=False):
    method forward (line 478) | def forward(
  class T5PreTrainedModel (line 553) | class T5PreTrainedModel(PreTrainedModel):
    method dummy_inputs (line 563) | def dummy_inputs(self):
    method _init_weights (line 573) | def _init_weights(self, module):
    method _shift_right (line 605) | def _shift_right(self, input_ids):
  class T5Stack (line 627) | class T5Stack(T5PreTrainedModel):
    method __init__ (line 628) | def __init__(self, config, embed_tokens=None):
    method get_input_embeddings (line 644) | def get_input_embeddings(self):
    method get_output_embeddings (line 647) | def get_output_embeddings(self):
    method set_input_embeddings (line 650) | def set_input_embeddings(self, new_embeddings):
    method forward (line 653) | def forward(
  class T5Model (line 846) | class T5Model(T5PreTrainedModel):
    method __init__ (line 847) | def __init__(self, config):
    method get_input_embeddings (line 860) | def get_input_embeddings(self):
    method set_input_embeddings (line 863) | def set_input_embeddings(self, new_embeddings):
    method get_encoder (line 868) | def get_encoder(self):
    method get_decoder (line 871) | def get_decoder(self):
    method _prune_heads (line 874) | def _prune_heads(self, heads_to_prune):
    method forward (line 883) | def forward(
  class T5ForConditionalGeneration (line 966) | class T5ForConditionalGeneration(T5PreTrainedModel):
    method __init__ (line 967) | def __init__(self, config):
    method get_input_embeddings (line 984) | def get_input_embeddings(self):
    method set_input_embeddings (line 987) | def set_input_embeddings(self, new_embeddings):
    method get_output_embeddings (line 992) | def get_output_embeddings(self):
    method get_encoder (line 995) | def get_encoder(self):
    method get_decoder (line 998) | def get_decoder(self):
    method forward (line 1002) | def forward(
    method prepare_inputs_for_generation (line 1114) | def prepare_inputs_for_generation(self, input_ids, past, attention_mas...
    method _reorder_cache (line 1131) | def _reorder_cache(self, past, beam_idx):

FILE: code/bert-base-count5/pretrain/transformers1/modeling_tf_albert.py
  class TFAlbertEmbeddings (line 45) | class TFAlbertEmbeddings(tf.keras.layers.Layer):
    method __init__ (line 49) | def __init__(self, config, **kwargs):
    method build (line 71) | def build(self, input_shape):
    method call (line 83) | def call(self, inputs, mode="embedding", training=False):
    method _embedding (line 105) | def _embedding(self, inputs, training=False):
    method _linear (line 130) | def _linear(self, inputs):
  class TFAlbertSelfAttention (line 144) | class TFAlbertSelfAttention(tf.keras.layers.Layer):
    method __init__ (line 145) | def __init__(self, config, **kwargs):
    method transpose_for_scores (line 171) | def transpose_for_scores(self, x, batch_size):
    method call (line 175) | def call(self, inputs, training=False):
  class TFAlbertSelfOutput (line 220) | class TFAlbertSelfOutput(tf.keras.layers.Layer):
    method __init__ (line 221) | def __init__(self, config, **kwargs):
    method call (line 229) | def call(self, inputs, training=False):
  class TFAlbertAttention (line 238) | class TFAlbertAttention(TFBertSelfAttention):
    method __init__ (line 239) | def __init__(self, config, **kwargs):
    method prune_heads (line 249) | def prune_heads(self, heads):
    method call (line 252) | def call(self, inputs, training=False):
  class TFAlbertLayer (line 306) | class TFAlbertLayer(tf.keras.layers.Layer):
    method __init__ (line 307) | def __init__(self, config, **kwargs):
    method call (line 328) | def call(self, inputs, training=False):
  class TFAlbertLayerGroup (line 344) | class TFAlbertLayerGroup(tf.keras.layers.Layer):
    method __init__ (line 345) | def __init__(self, config, **kwargs):
    method call (line 354) | def call(self, inputs, training=False):
  class TFAlbertTransformer (line 379) | class TFAlbertTransformer(tf.keras.layers.Layer):
    method __init__ (line 380) | def __init__(self, config, **kwargs):
    method call (line 396) | def call(self, inputs, training=False):
  class TFAlbertPreTrainedModel (line 438) | class TFAlbertPreTrainedModel(TFPreTrainedModel):
  class TFAlbertMLMHead (line 447) | class TFAlbertMLMHead(tf.keras.layers.Layer):
    method __init__ (line 448) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 466) | def build(self, input_shape):
    method call (line 473) | def call(self, hidden_states):
  class TFAlbertMainLayer (line 482) | class TFAlbertMainLayer(tf.keras.layers.Layer):
    method __init__ (line 485) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 498) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 501) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 504) | def _prune_heads(self, heads_to_prune):
    method call (line 511) | def call(
  class TFAlbertModel (line 674) | class TFAlbertModel(TFAlbertPreTrainedModel):
    method __init__ (line 675) | def __init__(self, config, *inputs, **kwargs):
    method call (line 680) | def call(self, inputs, **kwargs):
  class TFAlbertForPreTraining (line 725) | class TFAlbertForPreTraining(TFAlbertPreTrainedModel):
    method __init__ (line 726) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 734) | def get_output_embeddings(self):
    method call (line 738) | def call(self, inputs, **kwargs):
  class TFAlbertSOPHead (line 772) | class TFAlbertSOPHead(tf.keras.layers.Layer):
    method __init__ (line 773) | def __init__(self, config, **kwargs):
    method call (line 781) | def call(self, pooled_output, training: bool):
  class TFAlbertForMaskedLM (line 788) | class TFAlbertForMaskedLM(TFAlbertPreTrainedModel):
    method __init__ (line 789) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 795) | def get_output_embeddings(self):
    method call (line 799) | def call(self, inputs, **kwargs):
  class TFAlbertForSequenceClassification (line 844) | class TFAlbertForSequenceClassification(TFAlbertPreTrainedModel):
    method __init__ (line 845) | def __init__(self, config, *inputs, **kwargs):
    method call (line 856) | def call(self, inputs, **kwargs):
  class TFAlbertForQuestionAnswering (line 901) | class TFAlbertForQuestionAnswering(TFAlbertPreTrainedModel):
    method __init__ (line 902) | def __init__(self, config, *inputs, **kwargs):
    method call (line 912) | def call(self, inputs, **kwargs):
  class TFAlbertForMultipleChoice (line 967) | class TFAlbertForMultipleChoice(TFAlbertPreTrainedModel):
    method __init__ (line 968) | def __init__(self, config, *inputs, **kwargs):
    method dummy_inputs (line 978) | def dummy_inputs(self):
    method call (line 987) | def call(

FILE: code/bert-base-count5/pretrain/transformers1/modeling_tf_auto.py
  class TFAutoModel (line 174) | class TFAutoModel(object):
    method __init__ (line 198) | def __init__(self):
    method from_config (line 206) | def from_config(cls, config):
    method from_pretrained (line 244) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class TFAutoModelForPreTraining (line 336) | class TFAutoModelForPreTraining(object):
    method __init__ (line 345) | def __init__(self):
    method from_config (line 353) | def from_config(cls, config):
    method from_pretrained (line 392) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class TFAutoModelWithLMHead (line 486) | class TFAutoModelWithLMHead(object):
    method __init__ (line 510) | def __init__(self):
    method from_config (line 518) | def from_config(cls, config):
    method from_pretrained (line 556) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class TFAutoModelForMultipleChoice (line 649) | class TFAutoModelForMultipleChoice:
    method __init__ (line 665) | def __init__(self):
    method from_config (line 673) | def from_config(cls, config):
    method from_pretrained (line 706) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class TFAutoModelForSequenceClassification (line 796) | class TFAutoModelForSequenceClassification(object):
    method __init__ (line 815) | def __init__(self):
    method from_config (line 823) | def from_config(cls, config):
    method from_pretrained (line 859) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class TFAutoModelForQuestionAnswering (line 952) | class TFAutoModelForQuestionAnswering(object):
    method __init__ (line 972) | def __init__(self):
    method from_config (line 980) | def from_config(cls, config):
    method from_pretrained (line 1017) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class TFAutoModelForTokenClassification (line 1111) | class TFAutoModelForTokenClassification:
    method __init__ (line 1112) | def __init__(self):
    method from_config (line 1120) | def from_config(cls, config):
    method from_pretrained (line 1155) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...

FILE: code/bert-base-count5/pretrain/transformers1/modeling_tf_bert.py
  function gelu (line 58) | def gelu(x):
  function gelu_new (line 69) | def gelu_new(x):
  function swish (line 82) | def swish(x):
  class TFBertEmbeddings (line 94) | class TFBertEmbeddings(tf.keras.layers.Layer):
    method __init__ (line 98) | def __init__(self, config, **kwargs):
    method build (line 122) | def build(self, input_shape):
    method call (line 134) | def call(self, inputs, mode="embedding", training=False):
    method _embedding (line 156) | def _embedding(self, inputs, training=False):
    method _linear (line 181) | def _linear(self, inputs):
  class TFBertSelfAttention (line 197) | class TFBertSelfAttention(tf.keras.layers.Layer):
    method __init__ (line 198) | def __init__(self, config, **kwargs):
    method transpose_for_scores (line 224) | def transpose_for_scores(self, x, batch_size):
    method call (line 228) | def call(self, inputs, training=False):
  class TFBertSelfOutput (line 273) | class TFBertSelfOutput(tf.keras.layers.Layer):
    method __init__ (line 274) | def __init__(self, config, **kwargs):
    method call (line 282) | def call(self, inputs, training=False):
  class TFBertAttention (line 291) | class TFBertAttention(tf.keras.layers.Layer):
    method __init__ (line 292) | def __init__(self, config, **kwargs):
    method prune_heads (line 297) | def prune_heads(self, heads):
    method call (line 300) | def call(self, inputs, training=False):
  class TFBertIntermediate (line 309) | class TFBertIntermediate(tf.keras.layers.Layer):
    method __init__ (line 310) | def __init__(self, config, **kwargs):
    method call (line 320) | def call(self, hidden_states):
  class TFBertOutput (line 326) | class TFBertOutput(tf.keras.layers.Layer):
    method __init__ (line 327) | def __init__(self, config, **kwargs):
    method call (line 335) | def call(self, inputs, training=False):
  class TFBertLayer (line 344) | class TFBertLayer(tf.keras.layers.Layer):
    method __init__ (line 345) | def __init__(self, config, **kwargs):
    method call (line 351) | def call(self, inputs, training=False):
  class TFBertEncoder (line 362) | class TFBertEncoder(tf.keras.layers.Layer):
    method __init__ (line 363) | def __init__(self, config, **kwargs):
    method call (line 369) | def call(self, inputs, training=False):
  class TFBertPooler (line 396) | class TFBertPooler(tf.keras.layers.Layer):
    method __init__ (line 397) | def __init__(self, config, **kwargs):
    method call (line 406) | def call(self, hidden_states):
  class TFBertPredictionHeadTransform (line 414) | class TFBertPredictionHeadTransform(tf.keras.layers.Layer):
    method __init__ (line 415) | def __init__(self, config, **kwargs):
    method call (line 426) | def call(self, hidden_states):
  class TFBertLMPredictionHead (line 433) | class TFBertLMPredictionHead(tf.keras.layers.Layer):
    method __init__ (line 434) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 443) | def build(self, input_shape):
    method call (line 447) | def call(self, hidden_states):
  class TFBertMLMHead (line 454) | class TFBertMLMHead(tf.keras.layers.Layer):
    method __init__ (line 455) | def __init__(self, config, input_embeddings, **kwargs):
    method call (line 459) | def call(self, sequence_output):
  class TFBertNSPHead (line 464) | class TFBertNSPHead(tf.keras.layers.Layer):
    method __init__ (line 465) | def __init__(self, config, **kwargs):
    method call (line 471) | def call(self, pooled_output):
  class TFBertMainLayer (line 477) | class TFBertMainLayer(tf.keras.layers.Layer):
    method __init__ (line 480) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 488) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 491) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 494) | def _prune_heads(self, heads_to_prune):
    method call (line 501) | def call(
  class TFBertPreTrainedModel (line 583) | class TFBertPreTrainedModel(TFPreTrainedModel):
  class TFBertModel (line 667) | class TFBertModel(TFBertPreTrainedModel):
    method __init__ (line 668) | def __init__(self, config, *inputs, **kwargs):
    method call (line 673) | def call(self, inputs, **kwargs):
  class TFBertForPreTraining (line 718) | class TFBertForPreTraining(TFBertPreTrainedModel):
    method __init__ (line 719) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 726) | def get_output_embeddings(self):
    method call (line 730) | def call(self, inputs, **kwargs):
  class TFBertForMaskedLM (line 775) | class TFBertForMaskedLM(TFBertPreTrainedModel):
    method __init__ (line 776) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 782) | def get_output_embeddings(self):
    method call (line 786) | def call(self, inputs, **kwargs):
  class TFBertForNextSentencePrediction (line 828) | class TFBertForNextSentencePrediction(TFBertPreTrainedModel):
    method __init__ (line 829) | def __init__(self, config, *inputs, **kwargs):
    method call (line 836) | def call(self, inputs, **kwargs):
  class TFBertForSequenceClassification (line 883) | class TFBertForSequenceClassification(TFBertPreTrainedModel):
    method __init__ (line 884) | def __init__(self, config, *inputs, **kwargs):
    method call (line 895) | def call(self, inputs, **kwargs):
  class TFBertForMultipleChoice (line 941) | class TFBertForMultipleChoice(TFBertPreTrainedModel):
    method __init__ (line 942) | def __init__(self, config, *inputs, **kwargs):
    method dummy_inputs (line 952) | def dummy_inputs(self):
    method call (line 961) | def call(
  class TFBertForTokenClassification (line 1064) | class TFBertForTokenClassification(TFBertPreTrainedModel):
    method __init__ (line 1065) | def __init__(self, config, *inputs, **kwargs):
    method call (line 1076) | def call(self, inputs, **kwargs):
  class TFBertForQuestionAnswering (line 1122) | class TFBertForQuestionAnswering(TFBertPreTrainedModel):
    method __init__ (line 1123) | def __init__(self, config, *inputs, **kwargs):
    method call (line 1133) | def call(self, inputs, **kwargs):

FILE: code/bert-base-count5/pretrain/transformers1/modeling_tf_camembert.py
  class TFCamembertModel (line 70) | class TFCamembertModel(TFRobertaModel):
  class TFCamembertForMaskedLM (line 82) | class TFCamembertForMaskedLM(TFRobertaForMaskedLM):
  class TFCamembertForSequenceClassification (line 96) | class TFCamembertForSequenceClassification(TFRobertaForSequenceClassific...
  class TFCamembertForTokenClassification (line 110) | class TFCamembertForTokenClassification(TFRobertaForTokenClassification):

FILE: code/bert-base-count5/pretrain/transformers1/modeling_tf_ctrl.py
  function angle_defn (line 38) | def angle_defn(pos, i, d_model_size):
  function positional_encoding (line 43) | def positional_encoding(position, d_model_size):
  function scaled_dot_product_attention (line 55) | def scaled_dot_product_attention(q, k, v, mask, attention_mask=None, hea...
  class TFMultiHeadAttention (line 80) | class TFMultiHeadAttention(tf.keras.layers.Layer):
    method __init__ (line 81) | def __init__(self, d_model_size, num_heads, output_attentions=False, *...
    method split_into_heads (line 95) | def split_into_heads(self, x, batch_size):
    method call (line 99) | def call(self, inputs, training=False):
  function point_wise_feed_forward_network (line 142) | def point_wise_feed_forward_network(d_model_size, dff, name=""):
  class TFEncoderLayer (line 149) | class TFEncoderLayer(tf.keras.layers.Layer):
    method __init__ (line 150) | def __init__(
    method call (line 166) | def call(self, inputs, training=False):
  class TFCTRLMainLayer (line 186) | class TFCTRLMainLayer(tf.keras.layers.Layer):
    method __init__ (line 189) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 218) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 221) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 224) | def _prune_heads(self, heads_to_prune):
    method call (line 230) | def call(
  class TFCTRLPreTrainedModel (line 379) | class TFCTRLPreTrainedModel(TFPreTrainedModel):
  class TFCTRLModel (line 471) | class TFCTRLModel(TFCTRLPreTrainedModel):
    method __init__ (line 472) | def __init__(self, config, *inputs, **kwargs):
    method call (line 477) | def call(self, inputs, **kwargs):
  class TFCTRLLMHead (line 515) | class TFCTRLLMHead(tf.keras.layers.Layer):
    method __init__ (line 516) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 524) | def build(self, input_shape):
    method call (line 528) | def call(self, hidden_states):
  class TFCTRLLMHeadModel (line 539) | class TFCTRLLMHeadModel(TFCTRLPreTrainedModel):
    method __init__ (line 540) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 546) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 549) | def prepare_inputs_for_generation(self, inputs, past, **kwargs):
    method call (line 557) | def call(self, inputs, **kwargs):

FILE: code/bert-base-count5/pretrain/transformers1/modeling_tf_distilbert.py
  function gelu (line 46) | def gelu(x):
  function gelu_new (line 57) | def gelu_new(x):
  class TFEmbeddings (line 70) | class TFEmbeddings(tf.keras.layers.Layer):
    method __init__ (line 71) | def __init__(self, config, **kwargs):
    method build (line 89) | def build(self, input_shape):
    method call (line 99) | def call(self, inputs, inputs_embeds=None, mode="embedding", training=...
    method _embedding (line 121) | def _embedding(self, inputs, inputs_embeds=None, training=False):
    method _linear (line 156) | def _linear(self, inputs):
  class TFMultiHeadSelfAttention (line 172) | class TFMultiHeadSelfAttention(tf.keras.layers.Layer):
    method __init__ (line 173) | def __init__(self, config, **kwargs):
    method prune_heads (line 198) | def prune_heads(self, heads):
    method call (line 201) | def call(self, inputs, training=False):
  class TFFFN (line 262) | class TFFFN(tf.keras.layers.Layer):
    method __init__ (line 263) | def __init__(self, config, **kwargs):
    method call (line 279) | def call(self, input, training=False):
  class TFTransformerBlock (line 287) | class TFTransformerBlock(tf.keras.layers.Layer):
    method __init__ (line 288) | def __init__(self, config, **kwargs):
    method call (line 306) | def call(self, inputs, training=False):  # removed: src_enc=None, src_...
  class TFTransformer (line 341) | class TFTransformer(tf.keras.layers.Layer):
    method __init__ (line 342) | def __init__(self, config, **kwargs):
    method call (line 350) | def call(self, inputs, training=False):
  class TFDistilBertMainLayer (line 402) | class TFDistilBertMainLayer(tf.keras.layers.Layer):
    method __init__ (line 403) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 410) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 413) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 416) | def _prune_heads(self, heads_to_prune):
    method call (line 419) | def call(self, inputs, attention_mask=None, head_mask=None, inputs_emb...
  class TFDistilBertPreTrainedModel (line 465) | class TFDistilBertPreTrainedModel(TFPreTrainedModel):
  class TFDistilBertModel (line 539) | class TFDistilBertModel(TFDistilBertPreTrainedModel):
    method __init__ (line 540) | def __init__(self, config, *inputs, **kwargs):
    method call (line 545) | def call(self, inputs, **kwargs):
  class TFDistilBertLMHead (line 577) | class TFDistilBertLMHead(tf.keras.layers.Layer):
    method __init__ (line 578) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 586) | def build(self, input_shape):
    method call (line 590) | def call(self, hidden_states):
  class TFDistilBertForMaskedLM (line 599) | class TFDistilBertForMaskedLM(TFDistilBertPreTrainedModel):
    method __init__ (line 600) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 614) | def get_output_embeddings(self):
    method call (line 618) | def call(self, inputs, **kwargs):
  class TFDistilBertForSequenceClassification (line 665) | class TFDistilBertForSequenceClassification(TFDistilBertPreTrainedModel):
    method __init__ (line 666) | def __init__(self, config, *inputs, **kwargs):
    method call (line 683) | def call(self, inputs, **kwargs):
  class TFDistilBertForTokenClassification (line 729) | class TFDistilBertForTokenClassification(TFDistilBertPreTrainedModel):
    method __init__ (line 730) | def __init__(self, config, *inputs, **kwargs):
    method call (line 741) | def call(self, inputs, **kwargs):
  class TFDistilBertForQuestionAnswering (line 786) | class TFDistilBertForQuestionAnswering(TFDistilBertPreTrainedModel):
    method __init__ (line 787) | def __init__(self, config, *inputs, **kwargs):
    method call (line 798) | def call(self, inputs, **kwargs):

FILE: code/bert-base-count5/pretrain/transformers1/modeling_tf_electra.py
  class TFElectraEmbeddings (line 27) | class TFElectraEmbeddings(tf.keras.layers.Layer):
    method __init__ (line 31) | def __init__(self, config, **kwargs):
    method build (line 55) | def build(self, input_shape):
    method call (line 67) | def call(self, inputs, mode="embedding", training=False):
    method _embedding (line 89) | def _embedding(self, inputs, training=False):
    method _linear (line 114) | def _linear(self, inputs):
  class TFElectraDiscriminatorPredictions (line 130) | class TFElectraDiscriminatorPredictions(tf.keras.layers.Layer):
    method __init__ (line 131) | def __init__(self, config, **kwargs):
    method call (line 138) | def call(self, discriminator_hidden_states, training=False):
  class TFElectraGeneratorPredictions (line 146) | class TFElectraGeneratorPredictions(tf.keras.layers.Layer):
    method __init__ (line 147) | def __init__(self, config, **kwargs):
    method call (line 153) | def call(self, generator_hidden_states, training=False):
  class TFElectraPreTrainedModel (line 161) | class TFElectraPreTrainedModel(TFBertPreTrainedModel):
    method get_extended_attention_mask (line 166) | def get_extended_attention_mask(self, attention_mask, input_shape):
    method get_head_mask (line 188) | def get_head_mask(self, head_mask):
  class TFElectraMainLayer (line 197) | class TFElectraMainLayer(TFElectraPreTrainedModel):
    method __init__ (line 201) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 210) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 213) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 216) | def _prune_heads(self, heads_to_prune):
    method call (line 223) | def call(
  class TFElectraModel (line 348) | class TFElectraModel(TFElectraPreTrainedModel):
    method __init__ (line 349) | def __init__(self, config, *inputs, **kwargs):
    method get_input_embeddings (line 353) | def get_input_embeddings(self):
    method call (line 357) | def call(self, inputs, **kwargs):
  class TFElectraForPreTraining (line 398) | class TFElectraForPreTraining(TFElectraPreTrainedModel):
    method __init__ (line 399) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 405) | def get_input_embeddings(self):
    method call (line 409) | def call(
  class TFElectraMaskedLMHead (line 458) | class TFElectraMaskedLMHead(tf.keras.layers.Layer):
    method __init__ (line 459) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 464) | def build(self, input_shape):
    method call (line 468) | def call(self, hidden_states, training=False):
  class TFElectraForMaskedLM (line 482) | class TFElectraForMaskedLM(TFElectraPreTrainedModel):
    method __init__ (line 483) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 495) | def get_input_embeddings(self):
    method get_output_embeddings (line 498) | def get_output_embeddings(self):
    method call (line 502) | def call(
  class TFElectraForTokenClassification (line 560) | class TFElectraForTokenClassification(TFElectraPreTrainedModel):
    method __init__ (line 561) | def __init__(self, config, **kwargs):
    method call (line 569) | def call(

FILE: code/bert-base-count5/pretrain/transformers1/modeling_tf_flaubert.py
  class TFFlaubertModel (line 107) | class TFFlaubertModel(TFXLMModel):
    method __init__ (line 110) | def __init__(self, config, *inputs, **kwargs):
  class TFFlaubertMainLayer (line 115) | class TFFlaubertMainLayer(TFXLMMainLayer):
    method __init__ (line 116) | def __init__(self, config, *inputs, **kwargs):
    method call (line 121) | def call(
  class TFFlaubertWithLMHeadModel (line 311) | class TFFlaubertWithLMHeadModel(TFXLMWithLMHeadModel):
    method __init__ (line 314) | def __init__(self, config, *inputs, **kwargs):
  class TFFlaubertForSequenceClassification (line 324) | class TFFlaubertForSequenceClassification(TFXLMForSequenceClassification):
    method __init__ (line 327) | def __init__(self, config, *inputs, **kwargs):

FILE: code/bert-base-count5/pretrain/transformers1/modeling_tf_gpt2.py
  function gelu (line 50) | def gelu(x):
  class TFAttention (line 63) | class TFAttention(tf.keras.layers.Layer):
    method __init__ (line 64) | def __init__(self, nx, n_ctx, config, scale=False, **kwargs):
    method prune_heads (line 82) | def prune_heads(self, heads):
    method causal_attention_mask (line 86) | def causal_attention_mask(nd, ns, dtype):
    method _attn (line 95) | def _attn(self, inputs, training=False):
    method merge_heads (line 125) | def merge_heads(self, x):
    method split_heads (line 131) | def split_heads(self, x):
    method call (line 137) | def call(self, inputs, training=False):
  class TFMLP (line 175) | class TFMLP(tf.keras.layers.Layer):
    method __init__ (line 176) | def __init__(self, n_state, config, **kwargs):
    method call (line 184) | def call(self, x, training=False):
  class TFBlock (line 191) | class TFBlock(tf.keras.layers.Layer):
    method __init__ (line 192) | def __init__(self, n_ctx, config, scale=False, **kwargs):
    method call (line 200) | def call(self, inputs, training=False):
  class TFGPT2MainLayer (line 217) | class TFGPT2MainLayer(tf.keras.layers.Layer):
    method __init__ (line 220) | def __init__(self, config, *inputs, **kwargs):
    method get_input_embeddings (line 241) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 244) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 247) | def _prune_heads(self, heads_to_prune):
    method call (line 253) | def call(
  class TFGPT2PreTrainedModel (line 387) | class TFGPT2PreTrainedModel(TFPreTrainedModel):
  class TFGPT2Model (line 475) | class TFGPT2Model(TFGPT2PreTrainedModel):
    method __init__ (line 476) | def __init__(self, config, *inputs, **kwargs):
    method call (line 481) | def call(self, inputs, **kwargs):
  class TFGPT2LMHeadModel (line 524) | class TFGPT2LMHeadModel(TFGPT2PreTrainedModel):
    method __init__ (line 525) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 529) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 532) | def prepare_inputs_for_generation(self, inputs, past, **kwargs):
    method call (line 540) | def call(self, inputs, **kwargs):
  class TFGPT2DoubleHeadsModel (line 593) | class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel):
    method __init__ (line 594) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 602) | def get_output_embeddings(self):
    method call (line 606) | def call(

FILE: code/bert-base-count5/pretrain/transformers1/modeling_tf_openai.py
  function gelu (line 45) | def gelu(x):
  function swish (line 58) | def swish(x):
  class TFAttention (line 69) | class TFAttention(tf.keras.layers.Layer):
    method __init__ (line 70) | def __init__(self, nx, n_ctx, config, scale=False, **kwargs):
    method prune_heads (line 88) | def prune_heads(self, heads):
    method causal_attention_mask (line 92) | def causal_attention_mask(nd, ns, dtype):
    method _attn (line 101) | def _attn(self, inputs, training=False):
    method merge_heads (line 131) | def merge_heads(self, x):
    method split_heads (line 137) | def split_heads(self, x):
    method call (line 143) | def call(self, inputs, training=False):
  class TFMLP (line 163) | class TFMLP(tf.keras.layers.Layer):
    method __init__ (line 164) | def __init__(self, n_state, config, **kwargs):
    method call (line 172) | def call(self, x, training=False):
  class TFBlock (line 179) | class TFBlock(tf.keras.layers.Layer):
    method __init__ (line 180) | def __init__(self, n_ctx, config, scale=False, **kwargs):
    method call (line 188) | def call(self, inputs, training=False):
  class TFOpenAIGPTMainLayer (line 202) | class TFOpenAIGPTMainLayer(tf.keras.layers.Layer):
    method __init__ (line 203) | def __init__(self, config, *inputs, **kwargs):
    method get_input_embeddings (line 223) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 226) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 229) | def _prune_heads(self, heads_to_prune):
    method call (line 235) | def call(
  class TFOpenAIGPTPreTrainedModel (line 349) | class TFOpenAIGPTPreTrainedModel(TFPreTrainedModel):
  class TFOpenAIGPTModel (line 430) | class TFOpenAIGPTModel(TFOpenAIGPTPreTrainedModel):
    method __init__ (line 431) | def __init__(self, config, *inputs, **kwargs):
    method call (line 436) | def call(self, inputs, **kwargs):
  class TFOpenAIGPTLMHeadModel (line 475) | class TFOpenAIGPTLMHeadModel(TFOpenAIGPTPreTrainedModel):
    method __init__ (line 476) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 480) | def get_output_embeddings(self):
    method call (line 484) | def call(self, inputs, **kwargs):
  class TFOpenAIGPTDoubleHeadsModel (line 532) | class TFOpenAIGPTDoubleHeadsModel(TFOpenAIGPTPreTrainedModel):
    method __init__ (line 533) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 541) | def get_output_embeddings(self):
    method call (line 545) | def call(

FILE: code/bert-base-count5/pretrain/transformers1/modeling_tf_pytorch_utils.py
  function convert_tf_weight_name_to_pt_weight_name (line 29) | def convert_tf_weight_name_to_pt_weight_name(tf_name, start_prefix_to_re...
  function load_pytorch_checkpoint_in_tf2_model (line 73) | def load_pytorch_checkpoint_in_tf2_model(tf_model, pytorch_checkpoint_pa...
  function load_pytorch_model_in_tf2_model (line 97) | def load_pytorch_model_in_tf2_model(tf_model, pt_model, tf_inputs=None, ...
  function load_pytorch_weights_in_tf2_model (line 107) | def load_pytorch_weights_in_tf2_model(tf_model, pt_state_dict, tf_inputs...
  function load_tf2_checkpoint_in_pytorch_model (line 205) | def load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path, t...
  function load_tf2_model_in_pytorch_model (line 240) | def load_tf2_model_in_pytorch_model(pt_model, tf_model, allow_missing_ke...
  function load_tf2_weights_in_pytorch_model (line 248) | def load_tf2_weights_in_pytorch_model(pt_model, tf_weights, allow_missin...

FILE: code/bert-base-count5/pretrain/transformers1/modeling_tf_roberta.py
  class TFRobertaEmbeddings (line 40) | class TFRobertaEmbeddings(TFBertEmbeddings):
    method __init__ (line 45) | def __init__(self, config, **kwargs):
    method create_position_ids_from_input_ids (line 49) | def create_position_ids_from_input_ids(self, x):
    method create_position_ids_from_inputs_embeds (line 60) | def create_position_ids_from_inputs_embeds(self, inputs_embeds):
    method _embedding (line 71) | def _embedding(self, inputs, training=False):
  class TFRobertaMainLayer (line 85) | class TFRobertaMainLayer(TFBertMainLayer):
    method __init__ (line 90) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 94) | def get_input_embeddings(self):
  class TFRobertaPreTrainedModel (line 98) | class TFRobertaPreTrainedModel(TFPreTrainedModel):
  class TFRobertaModel (line 182) | class TFRobertaModel(TFRobertaPreTrainedModel):
    method __init__ (line 183) | def __init__(self, config, *inputs, **kwargs):
    method call (line 188) | def call(self, inputs, **kwargs):
  class TFRobertaLMHead (line 228) | class TFRobertaLMHead(tf.keras.layers.Layer):
    method __init__ (line 231) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 244) | def build(self, input_shape):
    method call (line 248) | def call(self, features):
  class TFRobertaForMaskedLM (line 260) | class TFRobertaForMaskedLM(TFRobertaPreTrainedModel):
    method __init__ (line 261) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 267) | def get_output_embeddings(self):
    method call (line 271) | def call(self, inputs, **kwargs):
  class TFRobertaClassificationHead (line 310) | class TFRobertaClassificationHead(tf.keras.layers.Layer):
    method __init__ (line 313) | def __init__(self, config, **kwargs):
    method call (line 326) | def call(self, features, training=False):
  class TFRobertaForSequenceClassification (line 340) | class TFRobertaForSequenceClassification(TFRobertaPreTrainedModel):
    method __init__ (line 341) | def __init__(self, config, *inputs, **kwargs):
    method call (line 349) | def call(self, inputs, **kwargs):
  class TFRobertaForTokenClassification (line 394) | class TFRobertaForTokenClassification(TFRobertaPreTrainedModel):
    method __init__ (line 395) | def __init__(self, config, *inputs, **kwargs):
    method call (line 406) | def call(self, inputs, **kwargs):
  class TFRobertaForQuestionAnswering (line 451) | class TFRobertaForQuestionAnswering(TFRobertaPreTrainedModel):
    method __init__ (line 452) | def __init__(self, config, *inputs, **kwargs):
    method call (line 462) | def call(self, inputs, **kwargs):

FILE: code/bert-base-count5/pretrain/transformers1/modeling_tf_t5.py
  class TFT5LayerNorm (line 49) | class TFT5LayerNorm(tf.keras.layers.Layer):
    method __init__ (line 50) | def __init__(self, epsilon=1e-6, **kwargs):
    method build (line 57) | def build(self, input_shape):
    method call (line 62) | def call(self, x):
  class TFT5DenseReluDense (line 68) | class TFT5DenseReluDense(tf.keras.layers.Layer):
    method __init__ (line 69) | def __init__(self, config, **kwargs):
    method call (line 76) | def call(self, hidden_states, training=False):
  class TFT5LayerFF (line 84) | class TFT5LayerFF(tf.keras.layers.Layer):
    method __init__ (line 85) | def __init__(self, config, **kwargs):
    method call (line 91) | def call(self, hidden_states, training=False):
  class TFT5Attention (line 98) | class TFT5Attention(tf.keras.layers.Layer):
    method __init__ (line 101) | def __init__(self, config, has_relative_attention_bias=False, **kwargs):
    method prune_heads (line 127) | def prune_heads(self, heads):
    method _relative_position_bucket (line 131) | def _relative_position_bucket(relative_position, bidirectional=True, n...
    method compute_bias (line 176) | def compute_bias(self, qlen, klen):
    method call (line 188) | def call(
  class TFT5LayerSelfAttention (line 302) | class TFT5LayerSelfAttention(tf.keras.layers.Layer):
    method __init__ (line 303) | def __init__(self, config, has_relative_attention_bias=False, **kwargs):
    method call (line 311) | def call(
  class TFT5LayerCrossAttention (line 337) | class TFT5LayerCrossAttention(tf.keras.layers.Layer):
    method __init__ (line 338) | def __init__(self, config, has_relative_attention_bias=False, **kwargs):
    method call (line 346) | def call(
  class TFT5Block (line 376) | class TFT5Block(tf.keras.layers.Layer):
    method __init__ (line 377) | def __init__(self, config, has_relative_attention_bias=False, **kwargs):
    method call (line 393) | def call(
  class _NoLayerEmbedTokens (line 471) | class _NoLayerEmbedTokens(object):
    method __init__ (line 478) | def __init__(self, layer, abs_scope_name=None):
    method call (line 482) | def call(self, inputs, mode="embedding"):
    method __call__ (line 491) | def __call__(self, inputs, mode="embedding"):
  class TFT5MainLayer (line 505) | class TFT5MainLayer(tf.keras.layers.Layer):
    method __init__ (line 506) | def __init__(self, config, embed_tokens=None, **kwargs):
    method get_input_embeddings (line 524) | def get_input_embeddings(self):
    method get_output_embeddings (line 527) | def get_output_embeddings(self):
    method set_embed_tokens (line 530) | def set_embed_tokens(self, embed_tokens):
    method _resize_token_embeddings (line 533) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 536) | def _prune_heads(self, heads_to_prune):
    method call (line 539) | def call(
  class TFT5PreTrainedModel (line 718) | class TFT5PreTrainedModel(TFPreTrainedModel):
    method dummy_inputs (line 727) | def dummy_inputs(self):
  class TFT5Model (line 828) | class TFT5Model(TFT5PreTrainedModel):
    method __init__ (line 829) | def __init__(self, config, *inputs, **kwargs):
    method get_input_embeddings (line 846) | def get_input_embeddings(self):
    method get_output_embeddings (line 849) | def get_output_embeddings(self):
    method get_encoder (line 852) | def get_encoder(self):
    method get_decoder (line 855) | def get_decoder(self):
    method call (line 859) | def call(self, inputs, **kwargs):
  class TFT5ForConditionalGeneration (line 947) | class TFT5ForConditionalGeneration(TFT5PreTrainedModel):
    method __init__ (line 948) | def __init__(self, config, *inputs, **kwargs):
    method get_input_embeddings (line 967) | def get_input_embeddings(self):
    method get_output_embeddings (line 970) | def get_output_embeddings(self):
    method get_encoder (line 973) | def get_encoder(self):
    method get_decoder (line 976) | def get_decoder(self):
    method call (line 980) | def call(self, inputs, **kwargs):
    method prepare_inputs_for_generation (line 1079) | def prepare_inputs_for_generation(self, inputs, past, attention_mask, ...
    method _reorder_cache (line 1097) | def _reorder_cache(self, past, beam_idx):

FILE: code/bert-base-count5/pretrain/transformers1/modeling_tf_transfo_xl.py
  class TFPositionalEmbedding (line 39) | class TFPositionalEmbedding(tf.keras.layers.Layer):
    method __init__ (line 40) | def __init__(self, demb, **kwargs):
    method call (line 45) | def call(self, pos_seq, bsz=None):
  class TFPositionwiseFF (line 55) | class TFPositionwiseFF(tf.keras.layers.Layer):
    method __init__ (line 56) | def __init__(self, d_model, d_inner, dropout, pre_lnorm=False, layer_n...
    method call (line 74) | def call(self, inp, training=False):
  class TFRelPartialLearnableMultiHeadAttn (line 98) | class TFRelPartialLearnableMultiHeadAttn(tf.keras.layers.Layer):
    method __init__ (line 99) | def __init__(
    method build (line 152) | def build(self, input_shape):
    method _rel_shift (line 162) | def _rel_shift(self, x):
    method call (line 172) | def call(self, inputs, training=False):
  class TFRelPartialLearnableDecoderLayer (line 252) | class TFRelPartialLearnableDecoderLayer(tf.keras.layers.Layer):
    method __init__ (line 253) | def __init__(
    method call (line 301) | def call(self, inputs, training=False):
  class TFAdaptiveEmbedding (line 311) | class TFAdaptiveEmbedding(tf.keras.layers.Layer):
    method __init__ (line 312) | def __init__(self, n_token, d_embed, d_proj, cutoffs, div_val=1, init_...
    method build (line 344) | def build(self, input_shape):
    method call (line 357) | def call(self, inp):
  class TFTransfoXLMainLayer (line 384) | class TFTransfoXLMainLayer(tf.keras.layers.Layer):
    method __init__ (line 387) | def __init__(self, config, **kwargs):
    method build (line 455) | def build(self, input_shape):
    method get_input_embeddings (line 465) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 468) | def _resize_token_embeddings(self, new_num_tokens):
    method backward_compatible (line 471) | def backward_compatible(self):
    method reset_length (line 474) | def reset_length(self, tgt_len, ext_len, mem_len):
    method _prune_heads (line 479) | def _prune_heads(self, heads):
    method init_mems (line 482) | def init_mems(self, bsz):
    method _update_mems (line 493) | def _update_mems(self, hids, mems, mlen, qlen):
    method call (line 517) | def call(self, inputs, mems=None, head_mask=None, inputs_embeds=None, ...
  class TFTransfoXLPreTrainedModel (line 628) | class TFTransfoXLPreTrainedModel(TFPreTrainedModel):
  class TFTransfoXLModel (line 693) | class TFTransfoXLModel(TFTransfoXLPreTrainedModel):
    method __init__ (line 694) | def __init__(self, config, *inputs, **kwargs):
    method call (line 699) | def call(self, inputs, **kwargs):
  class TFTransfoXLLMHead (line 737) | class TFTransfoXLLMHead(tf.keras.layers.Layer):
    method __init__ (line 738) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 746) | def build(self, input_shape):
    method call (line 750) | def call(self, hidden_states):
  class TFTransfoXLLMHeadModel (line 761) | class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel):
    method __init__ (line 762) | def __init__(self, config):
    method get_output_embeddings (line 774) | def get_output_embeddings(self):
    method reset_length (line 781) | def reset_length(self, tgt_len, ext_len, mem_len):
    method init_mems (line 784) | def init_mems(self, bsz):
    method call (line 788) | def call(self, inputs, mems=None, head_mask=None, inputs_embeds=None, ...
    method prepare_inputs_for_generation (line 855) | def prepare_inputs_for_generation(self, inputs, past, **model_kwargs):

FILE: code/bert-base-count5/pretrain/transformers1/modeling_tf_transfo_xl_utilities.py
  class TFAdaptiveSoftmaxMask (line 25) | class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer):
    method __init__ (line 26) | def __init__(self, vocab_size, d_embed, d_proj, cutoffs, div_val=1, ke...
    method build (line 45) | def build(self, input_shape):
    method _logit (line 104) | def _logit(x, W, b, proj=None):
    method _gather_logprob (line 111) | def _gather_logprob(logprob, target):
    method call (line 117) | def call(self, inputs, return_mean=True, training=False):

FILE: code/bert-base-count5/pretrain/transformers1/modeling_tf_utils.py
  class TFModelUtilsMixin (line 34) | class TFModelUtilsMixin:
    method num_parameters (line 39) | def num_parameters(self, only_trainable: bool = False) -> int:
  function keras_serializable (line 49) | def keras_serializable(cls):
  class TFPreTrainedModel (line 107) | class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin):
    method dummy_inputs (line 127) | def dummy_inputs(self):
    method __init__ (line 135) | def __init__(self, config, *inputs, **kwargs):
    method get_input_embeddings (line 148) | def get_input_embeddings(self):
    method get_output_embeddings (line 162) | def get_output_embeddings(self):
    method _get_resized_embeddings (line 172) | def _get_resized_embeddings(self, old_embeddings, new_num_tokens=None):
    method resize_token_embeddings (line 206) | def resize_token_embeddings(self, new_num_tokens=None):
    method prune_heads (line 221) | def prune_heads(self, heads_to_prune):
    method save_pretrained (line 230) | def save_pretrained(self, save_directory):
    method from_pretrained (line 247) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
    method prepare_inputs_for_generation (line 438) | def prepare_inputs_for_generation(self, inputs, **kwargs):
    method _use_cache (line 441) | def _use_cache(self, outputs, use_cache):
    method generate (line 449) | def generate(
    method _generate_no_beam_search (line 810) | def _generate_no_beam_search(
    method _generate_beam_search (line 973) | def _generate_beam_search(
    method _reorder_cache (line 1294) | def _reorder_cache(past, beam_idx):
  function _create_next_token_logits_penalties (line 1298) | def _create_next_token_logits_penalties(input_ids, logits, repetition_pe...
  function calc_banned_ngram_tokens (line 1312) | def calc_banned_ngram_tokens(prev_input_ids, num_hypos, no_repeat_ngram_...
  function calc_banned_bad_words_ids (line 1335) | def calc_banned_bad_words_ids(prev_input_ids, bad_words_ids):
  function tf_top_k_top_p_filtering (line 1371) | def tf_top_k_top_p_filtering(logits, top_k=0, top_p=1.0, filter_value=-f...
  function scatter_values_on_batch_indices (line 1421) | def scatter_values_on_batch_indices(values, batch_indices):
  function set_tensor_by_indices_to_value (line 1431) | def set_tensor_by_indices_to_value(tensor, indices, value):
  class BeamHypotheses (line 1437) | class BeamHypotheses(object):
    method __init__ (line 1438) | def __init__(self, num_beams, max_length, length_penalty, early_stoppi...
    method __len__ (line 1449) | def __len__(self):
    method add (line 1455) | def add(self, hyp, sum_logprobs):
    method is_done (line 1469) | def is_done(self, best_sum_logprobs, cur_len=None):
  class TFConv1D (line 1487) | class TFConv1D(tf.keras.layers.Layer):
    method __init__ (line 1488) | def __init__(self, nf, nx, initializer_range=0.02, **kwargs):
    method build (line 1497) | def build(self, input_shape):
    method call (line 1503) | def call(self, x):
  class TFSharedEmbeddings (line 1514) | class TFSharedEmbeddings(tf.keras.layers.Layer):
    method __init__ (line 1518) | def __init__(self, vocab_size, hidden_size, initializer_range=None, **...
    method build (line 1524) | def build(self, input_shape):
    method call (line 1534) | def call(self, inputs, mode="embedding"):
    method _embedding (line 1556) | def _embedding(self, input_ids):
    method _linear (line 1560) | def _linear(self, inputs):
  class TFSequenceSummary (line 1575) | class TFSequenceSummary(tf.keras.layers.Layer):
    method __init__ (line 1591) | def __init__(self, config, initializer_range=0.02, **kwargs):
    method call (line 1623) | def call(self, inputs, training=False):
  function shape_list (line 1682) | def shape_list(x):
  function get_initializer (line 1689) | def get_initializer(initializer_range=0.02):

FILE: code/bert-base-count5/pretrain/transformers1/modeling_tf_xlm.py
  function create_sinusoidal_embeddings (line 49) | def create_sinusoidal_embeddings(n_pos, dim, out):
  function gelu (line 55) | def gelu(x):
  function get_masks (line 66) | def get_masks(slen, lengths, causal, padding_mask=None, dtype=tf.float32):
  class TFMultiHeadAttention (line 97) | class TFMultiHeadAttention(tf.keras.layers.Layer):
    method __init__ (line 101) | def __init__(self, n_heads, dim, config, **kwargs):
    method prune_heads (line 116) | def prune_heads(self, heads):
    method call (line 119) | def call(self, inputs, training=False):
  class TFTransformerFFN (line 185) | class TFTransformerFFN(tf.keras.layers.Layer):
    method __init__ (line 186) | def __init__(self, in_dim, dim_hidden, out_dim, config, **kwargs):
    method call (line 193) | def call(self, input, training=False):
  class TFXLMMainLayer (line 201) | class TFXLMMainLayer(tf.keras.layers.Layer):
    method __init__ (line 202) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 292) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 295) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 298) | def _prune_heads(self, heads_to_prune):
    method call (line 305) | def call(
  class TFXLMPreTrainedModel (line 468) | class TFXLMPreTrainedModel(TFPreTrainedModel):
    method dummy_inputs (line 477) | def dummy_inputs(self):
  class TFXLMModel (line 574) | class TFXLMModel(TFXLMPreTrainedModel):
    method __init__ (line 575) | def __init__(self, config, *inputs, **kwargs):
    method call (line 580) | def call(self, inputs, **kwargs):
  class TFXLMPredLayer (line 614) | class TFXLMPredLayer(tf.keras.layers.Layer):
    method __init__ (line 619) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 636) | def build(self, input_shape):
    method call (line 641) | def call(self, hidden_states):
  class TFXLMWithLMHeadModel (line 652) | class TFXLMWithLMHeadModel(TFXLMPreTrainedModel):
    method __init__ (line 653) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 658) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 661) | def prepare_inputs_for_generation(self, inputs, **kwargs):
    method call (line 676) | def call(self, inputs, **kwargs):
  class TFXLMForSequenceClassification (line 720) | class TFXLMForSequenceClassification(TFXLMPreTrainedModel):
    method __init__ (line 721) | def __init__(self, config, *inputs, **kwargs):
    method call (line 729) | def call(self, inputs, **kwargs):
  class TFXLMForQuestionAnsweringSimple (line 774) | class TFXLMForQuestionAnsweringSimple(TFXLMPreTrainedModel):
    method __init__ (line 775) | def __init__(self, config, *inputs, **kwargs):
    method call (line 783) | def call(self, inputs, **kwargs):

FILE: code/bert-base-count5/pretrain/transformers1/modeling_tf_xlm_roberta.py
  class TFXLMRobertaModel (line 70) | class TFXLMRobertaModel(TFRobertaModel):
  class TFXLMRobertaForMaskedLM (line 82) | class TFXLMRobertaForMaskedLM(TFRobertaForMaskedLM):
  class TFXLMRobertaForSequenceClassification (line 96) | class TFXLMRobertaForSequenceClassification(TFRobertaForSequenceClassifi...
  class TFXLMRobertaForTokenClassification (line 110) | class TFXLMRobertaForTokenClassification(TFRobertaForTokenClassification):

FILE: code/bert-base-count5/pretrain/transformers1/modeling_tf_xlnet.py
  function gelu (line 47) | def gelu(x):
  function swish (line 56) | def swish(x):
  class TFXLNetRelativeAttention (line 67) | class TFXLNetRelativeAttention(tf.keras.layers.Layer):
    method __init__ (line 68) | def __init__(self, config, **kwargs):
    method build (line 87) | def build(self, input_shape):
    method prune_heads (line 118) | def prune_heads(self, heads):
    method rel_shift (line 121) | def rel_shift(self, x, klen=-1):
    method rel_attn_core (line 133) | def rel_attn_core(self, inputs, training=False):
    method post_attention (line 178) | def post_attention(self, inputs, residual=True, training=False):
    method call (line 193) | def call(self, inputs, training=False):
  class TFXLNetFeedForward (line 290) | class TFXLNetFeedForward(tf.keras.layers.Layer):
    method __init__ (line 291) | def __init__(self, config, **kwargs):
    method call (line 306) | def call(self, inp, training=False):
  class TFXLNetLayer (line 317) | class TFXLNetLayer(tf.keras.layers.Layer):
    method __init__ (line 318) | def __init__(self, config, **kwargs):
    method call (line 324) | def call(self, inputs, training=False):
  class TFXLNetLMHead (line 336) | class TFXLNetLMHead(tf.keras.layers.Layer):
    method __init__ (line 337) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 344) | def build(self, input_shape):
    method call (line 348) | def call(self, hidden_states):
  class TFXLNetMainLayer (line 355) | class TFXLNetMainLayer(tf.keras.layers.Layer):
    method __init__ (line 358) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 380) | def get_input_embeddings(self):
    method build (line 383) | def build(self, input_shape):
    method _resize_token_embeddings (line 389) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 392) | def _prune_heads(self, heads_to_prune):
    method create_mask (line 395) | def create_mask(self, qlen, mlen, dtype=tf.float32):
    method cache_mem (line 424) | def cache_mem(self, curr_out, prev_mem):
    method positional_embedding (line 437) | def positional_embedding(pos_seq, inv_freq, bsz=None):
    method relative_positional_encoding (line 447) | def relative_positional_encoding(self, qlen, klen, bsz=None, dtype=None):
    method call (line 495) | def call(
  class TFXLNetPreTrainedModel (line 699) | class TFXLNetPreTrainedModel(TFPreTrainedModel):
  class TFXLNetModel (line 795) | class TFXLNetModel(TFXLNetPreTrainedModel):
    method __init__ (line 796) | def __init__(self, config, *inputs, **kwargs):
    method call (line 801) | def call(self, inputs, **kwargs):
  class TFXLNetLMHeadModel (line 844) | class TFXLNetLMHeadModel(TFXLNetPreTrainedModel):
    method __init__ (line 845) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 850) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 853) | def prepare_inputs_for_generation(self, inputs, past, **kwargs):
    method call (line 885) | def call(self, inputs, **kwargs):
  class TFXLNetForSequenceClassification (line 941) | class TFXLNetForSequenceClassification(TFXLNetPreTrainedModel):
    method __init__ (line 942) | def __init__(self, config, *inputs, **kwargs):
    method call (line 955) | def call(self, inputs, **kwargs):
  class TFXLNetForTokenClassification (line 1005) | class TFXLNetForTokenClassification(TFXLNetPreTrainedModel):
    method __init__ (line 1006) | def __init__(self, config, *inputs, **kwargs):
    method call (line 1015) | def call(self, inputs, **kwargs):
  class TFXLNetForQuestionAnsweringSimple (line 1064) | class TFXLNetForQuestionAnsweringSimple(TFXLNetPreTrainedModel):
    method __init__ (line 1065) | def __init__(self, config, *inputs, **kwargs):
    method call (line 1073) | def call(self, inputs, **kwargs):

FILE: code/bert-base-count5/pretrain/transformers1/modeling_transfo_xl.py
  function build_tf_to_pytorch_map (line 42) | def build_tf_to_pytorch_map(model, config):
  function load_tf_weights_in_transfo_xl (line 109) | def load_tf_weights_in_transfo_xl(model, config, tf_path):
  class PositionalEmbedding (line 167) | class PositionalEmbedding(nn.Module):
    method __init__ (line 168) | def __init__(self, demb):
    method forward (line 176) | def forward(self, pos_seq, bsz=None):
  class PositionwiseFF (line 186) | class PositionwiseFF(nn.Module):
    method __init__ (line 187) | def __init__(self, d_model, d_inner, dropout, pre_lnorm=False, layer_n...
    method forward (line 206) | def forward(self, inp):
  class RelPartialLearnableMultiHeadAttn (line 223) | class RelPartialLearnableMultiHeadAttn(nn.Module):
    method __init__ (line 224) | def __init__(
    method _rel_shift (line 269) | def _rel_shift(self, x):
    method forward (line 281) | def forward(self, w, r, attn_mask=None, mems=None, head_mask=None):
  class RelPartialLearnableDecoderLayer (line 370) | class RelPartialLearnableDecoderLayer(nn.Module):
    method __init__ (line 371) | def __init__(self, n_head, d_model, d_head, d_inner, dropout, layer_no...
    method forward (line 381) | def forward(self, dec_inp, r, dec_attn_mask=None, mems=None, head_mask...
  class AdaptiveEmbedding (line 391) | class AdaptiveEmbedding(nn.Module):
    method __init__ (line 392) | def __init__(self, n_token, d_embed, d_proj, cutoffs, div_val=1, sampl...
    method forward (line 419) | def forward(self, inp):
  class TransfoXLPreTrainedModel (line 451) | class TransfoXLPreTrainedModel(PreTrainedModel):
    method _init_weight (line 460) | def _init_weight(self, weight):
    method _init_bias (line 466) | def _init_bias(self, bias):
    method _init_weights (line 469) | def _init_weights(self, m):
  class TransfoXLModel (line 552) | class TransfoXLModel(TransfoXLPreTrainedModel):
    method __init__ (line 553) | def __init__(self, config):
    method get_input_embeddings (line 618) | def get_input_embeddings(self):
    method set_input_embeddings (line 621) | def set_input_embeddings(self, new_embeddings):
    method backward_compatible (line 624) | def backward_compatible(self):
    method reset_length (line 627) | def reset_length(self, tgt_len, ext_len, mem_len):
    method _prune_heads (line 632) | def _prune_heads(self, heads):
    method init_mems (line 636) | def init_mems(self, bsz):
    method _update_mems (line 648) | def _update_mems(self, hids, mems, mlen, qlen):
    method forward (line 673) | def forward(self, input_ids=None, mems=None, head_mask=None, inputs_em...
  class TransfoXLLMHeadModel (line 807) | class TransfoXLLMHeadModel(TransfoXLPreTrainedModel):
    method __init__ (line 808) | def __init__(self, config):
    method tie_weights (line 823) | def tie_weights(self):
    method reset_length (line 844) | def reset_length(self, tgt_len, ext_len, mem_len):
    method init_mems (line 847) | def init_mems(self, bsz):
    method forward (line 851) | def forward(self, input_ids=None, mems=None, head_mask=None, inputs_em...
    method get_output_embeddings (line 917) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 925) | def prepare_inputs_for_generation(self, input_ids, past, **model_kwargs):

FILE: code/bert-base-count5/pretrain/transformers1/modeling_transfo_xl_utilities.py
  class ProjectedAdaptiveLogSoftmax (line 30) | class ProjectedAdaptiveLogSoftmax(nn.Module):
    method __init__ (line 31) | def __init__(self, n_token, d_embed, d_proj, cutoffs, div_val=1, keep_...
    method _compute_logit (line 72) | def _compute_logit(self, hidden, weight, bias, proj):
    method forward (line 86) | def forward(self, hidden, labels=None, keep_order=False):
    method log_prob (line 193) | def log_prob(self, hidden):

FILE: code/bert-base-count5/pretrain/transformers1/modeling_utils.py
  class Identity (line 47) | class Identity(nn.Module):
    method __init__ (line 51) | def __init__(self, *args, **kwargs):
    method forward (line 54) | def forward(self, input):
  class ModuleUtilsMixin (line 58) | class ModuleUtilsMixin:
    method num_parameters (line 63) | def num_parameters(self, only_trainable: bool = False) -> int:
    method _hook_rss_memory_pre_forward (line 71) | def _hook_rss_memory_pre_forward(module, *args, **kwargs):
    method _hook_rss_memory_post_forward (line 83) | def _hook_rss_memory_post_forward(module, *args, **kwargs):
    method add_memory_hooks (line 96) | def add_memory_hooks(self):
    method reset_memory_hooks_state (line 105) | def reset_memory_hooks_state(self):
    method device (line 112) | def device(self) -> device:
    method dtype (line 130) | def dtype(self) -> dtype:
    method invert_attention_mask (line 147) | def invert_attention_mask(self, encoder_attention_mask: Tensor) -> Ten...
    method get_extended_attention_mask (line 173) | def get_extended_attention_mask(self, attention_mask: Tensor, input_sh...
    method get_head_mask (line 217) | def get_head_mask(self, head_mask: Tensor, num_hidden_layers: int, is_...
    method _convert_head_mask_to_5d (line 238) | def _convert_head_mask_to_5d(self, head_mask, num_hidden_layers):
  class PreTrainedModel (line 250) | class PreTrainedModel(nn.Module, ModuleUtilsMixin):
    method dummy_inputs (line 270) | def dummy_inputs(self):
    method __init__ (line 278) | def __init__(self, config, *inputs, **kwargs):
    method base_model (line 292) | def base_model(self):
    method get_input_embeddings (line 295) | def get_input_embeddings(self):
    method set_input_embeddings (line 309) | def set_input_embeddings(self, value: nn.Module):
    method get_output_embeddings (line 323) | def get_output_embeddings(self):
    method tie_weights (line 333) | def tie_weights(self):
    method _tie_or_clone_weights (line 343) | def _tie_or_clone_weights(self, output_embeddings, input_embeddings):
    method resize_token_embeddings (line 361) | def resize_token_embeddings(self, new_num_tokens: Optional[int] = None):
    method _resize_token_embeddings (line 388) | def _resize_token_embeddings(self, new_num_tokens):
    method _get_resized_embeddings (line 394) | def _get_resized_embeddings(
    method init_weights (line 432) | def init_weights(self):
    method prune_heads (line 444) | def prune_heads(self, heads_to_prune: Dict):
    method save_pretrained (line 459) | def save_pretrained(self, save_directory):
    method from_pretrained (line 494) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
    method prepare_inputs_for_generation (line 777) | def prepare_inputs_for_generation(self, input_ids, **kwargs):
    method prepare_logits_for_generation (line 780) | def prepare_logits_for_generation(self, logits, **kwargs):
    method _use_cache (line 783) | def _use_cache(self, outputs, use_cache):
    method enforce_repetition_penalty_ (line 791) | def enforce_repetition_penalty_(self, lprobs, batch_size, num_beams, p...
    method generate (line 802) | def generate(
    method _generate_no_beam_search (line 1186) | def _generate_no_beam_search(
    method _generate_beam_search (line 1307) | def _generate_beam_search(
    method _reorder_cache (line 1582) | def _reorder_cache(past: Tuple, beam_idx: Tensor) -> Tuple[Tensor]:
  function calc_banned_ngram_tokens (line 1586) | def calc_banned_ngram_tokens(prev_input_ids: Tensor, num_hypos: int, no_...
  function calc_banned_bad_words_ids (line 1609) | def calc_banned_bad_words_ids(prev_input_ids: Iterable[int], bad_words_i...
  function top_k_top_p_filtering (line 1645) | def top_k_top_p_filtering(
  class BeamHypotheses (line 1686) | class BeamHypotheses(object):
    method __init__ (line 1687) | def __init__(self, num_beams, max_length, length_penalty, early_stoppi...
    method __len__ (line 1698) | def __len__(self):
    method add (line 1704) | def add(self, hyp, sum_logprobs):
    method is_done (line 1718) | def is_done(self, best_sum_logprobs, cur_len=None):
  class Conv1D (line 1736) | class Conv1D(nn.Module):
    method __init__ (line 1737) | def __init__(self, nf, nx):
    method forward (line 1748) | def forward(self, x):
  class PoolerStartLogits (line 1755) | class PoolerStartLogits(nn.Module):
    method __init__ (line 1758) | def __init__(self, config):
    method forward (line 1762) | def forward(self, hidden_states, p_mask=None):
  class PoolerEndLogits (line 1779) | class PoolerEndLogits(nn.Module):
    method __init__ (line 1783) | def __init__(self, config):
    method forward (line 1790) | def forward(self, hidden_states, start_states=None, start_positions=No...
  class PoolerAnswerClass (line 1826) | class PoolerAnswerClass(nn.Module):
    method __init__ (line 1829) | def __init__(self, config):
    method forward (line 1835) | def forward(self, hidden_states, start_states=None, start_positions=No...
  class SQuADHead (line 1873) | class SQuADHead(nn.Module):
    method __init__ (line 1914) | def __init__(self, config):
    method forward (line 1923) | def forward(
  class SequenceSummary (line 1990) | class SequenceSummary(nn.Module):
    method __init__ (line 2006) | def __init__(self, config: PretrainedConfig):
    method forward (line 2035) | def forward(self, hidden_states, cls_index=None):
  function create_position_ids_from_input_ids (line 2067) | def create_position_ids_from_input_ids(input_ids, padding_idx):
  function prune_linear_layer (line 2081) | def prune_linear_layer(layer, index, dim=0):
  function prune_conv1d_layer (line 2106) | def prune_conv1d_layer(layer, index, dim=1):
  function prune_layer (line 2130) | def prune_layer(layer, index, dim=None):
  function apply_chunking_to_forward (line 2143) | def apply_chunking_to_forward(

FILE: code/bert-base-count5/pretrain/transformers1/modeling_xlm.py
  function create_sinusoidal_embeddings (line 52) | def create_sinusoidal_embeddings(n_pos, dim, out):
  function get_masks (line 60) | def get_masks(slen, lengths, causal, padding_mask=None):
  class MultiHeadAttention (line 85) | class MultiHeadAttention(nn.Module):
    method __init__ (line 89) | def __init__(self, n_heads, dim, config):
    method prune_heads (line 104) | def prune_heads(self, heads):
    method forward (line 125) | def forward(self, input, mask, kv=None, cache=None, head_mask=None):
  class TransformerFFN (line 189) | class TransformerFFN(nn.Module):
    method __init__ (line 190) | def __init__(self, in_dim, dim_hidden, out_dim, config):
    method forward (line 197) | def forward(self, input):
  class XLMPreTrainedModel (line 205) | class XLMPreTrainedModel(PreTrainedModel):
    method __init__ (line 214) | def __init__(self, *inputs, **kwargs):
    method dummy_inputs (line 218) | def dummy_inputs(self):
    method _init_weights (line 227) | def _init_weights(self, module):
  class XLMModel (line 313) | class XLMModel(XLMPreTrainedModel):
    method __init__ (line 314) | def __init__(self, config):  # , dico, is_encoder, with_output):
    method get_input_embeddings (line 384) | def get_input_embeddings(self):
    method set_input_embeddings (line 387) | def set_input_embeddings(self, new_embeddings):
    method _prune_heads (line 390) | def _prune_heads(self, heads_to_prune):
    method forward (line 399) | def forward(
  class XLMPredLayer (line 554) | class XLMPredLayer(nn.Module):
    method __init__ (line 559) | def __init__(self, config):
    method forward (line 577) | def forward(self, x, y=None):
  class XLMWithLMHeadModel (line 602) | class XLMWithLMHeadModel(XLMPreTrainedModel):
    method __init__ (line 603) | def __init__(self, config):
    method get_output_embeddings (line 610) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 613) | def prepare_inputs_for_generation(self, input_ids, **kwargs):
    method forward (line 627) | def forward(
  class XLMForSequenceClassification (line 702) | class XLMForSequenceClassification(XLMPreTrainedModel):
    method __init__ (line 703) | def __init__(self, config):
    method forward (line 713) | def forward(
  class XLMForQuestionAnsweringSimple (line 799) | class XLMForQuestionAnsweringSimple(XLMPreTrainedModel):
    method __init__ (line 800) | def __init__(self, config):
    method forward (line 809) | def forward(
  class XLMForQuestionAnswering (line 917) | class XLMForQuestionAnswering(XLMPreTrainedModel):
    method __init__ (line 918) | def __init__(self, config):
    method forward (line 927) | def forward(
  class XLMForTokenClassification (line 1034) | class XLMForTokenClassification(XLMPreTrainedModel):
    method __init__ (line 1035) | def __init__(self, config):
    method forward (line 1046) | def forward(

FILE: code/bert-base-count5/pretrain/transformers1/modeling_xlm_roberta.py
  class XLMRobertaModel (line 62) | class XLMRobertaModel(RobertaModel):
  class XLMRobertaForMaskedLM (line 74) | class XLMRobertaForMaskedLM(RobertaForMaskedLM):
  class XLMRobertaForSequenceClassification (line 88) | class XLMRobertaForSequenceClassification(RobertaForSequenceClassificati...
  class XLMRobertaForMultipleChoice (line 102) | class XLMRobertaForMultipleChoice(RobertaForMultipleChoice):
  class XLMRobertaForTokenClassification (line 116) | class XLMRobertaForTokenClassification(RobertaForTokenClassification):

FILE: code/bert-base-count5/pretrain/transformers1/modeling_xlnet.py
  function build_tf_xlnet_to_pytorch_map (line 42) | def build_tf_xlnet_to_pytorch_map(model, config, tf_weights=None):
  function load_tf_weights_in_xlnet (line 125) | def load_tf_weights_in_xlnet(model, config, tf_path):
  class XLNetRelativeAttention (line 193) | class XLNetRelativeAttention(nn.Module):
    method __init__ (line 194) | def __init__(self, config):
    method prune_heads (line 223) | def prune_heads(self, heads):
    method rel_shift (line 227) | def rel_shift(x, klen=-1):
    method rel_shift_bnij (line 240) | def rel_shift_bnij(x, klen=-1):
    method rel_attn_core (line 254) | def rel_attn_core(self, q_head, k_head_h, v_head_h, k_head_r, seg_mat=...
    method post_attention (line 296) | def post_attention(self, h, attn_vec, residual=True):
    method forward (line 308) | def forward(self, h, g, attn_mask_h, attn_mask_g, r, seg_mat, mems=Non...
  class XLNetFeedForward (line 403) | class XLNetFeedForward(nn.Module):
    method __init__ (line 404) | def __init__(self, config):
    method forward (line 415) | def forward(self, inp):
  class XLNetLayer (line 426) | class XLNetLayer(nn.Module):
    method __init__ (line 427) | def __init__(self, config):
    method forward (line 433) | def forward(
  class XLNetPreTrainedModel (line 457) | class XLNetPreTrainedModel(PreTrainedModel):
    method _init_weights (line 466) | def _init_weights(self, module):
  class XLNetModel (line 568) | class XLNetModel(XLNetPreTrainedModel):
    method __init__ (line 569) | def __init__(self, config):
    method get_input_embeddings (line 590) | def get_input_embeddings(self):
    method set_input_embeddings (line 593) | def set_input_embeddings(self, new_embeddings):
    method _prune_heads (line 596) | def _prune_heads(self, heads_to_prune):
    method create_mask (line 599) | def create_mask(self, qlen, mlen):
    method cache_mem (line 629) | def cache_mem(self, curr_out, prev_mem):
    method positional_embedding (line 642) | def positional_embedding(pos_seq, inv_freq, bsz=None):
    method relative_positional_encoding (line 652) | def relative_positional_encoding(self, qlen, klen, bsz=None):
    method forward (line 692) | def forward(
  class XLNetLMHeadModel (line 927) | class XLNetLMHeadModel(XLNetPreTrainedModel):
    method __init__ (line 928) | def __init__(self, config):
    method get_output_embeddings (line 938) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 941) | def prepare_inputs_for_generation(self, input_ids, past, **kwargs):
    method forward (line 975) | def forward(
  class XLNetForSequenceClassification (line 1083) | class XLNetForSequenceClassification(XLNetPreTrainedModel):
    method __init__ (line 1084) | def __init__(self, config):
    method forward (line 1095) | def forward(
  class XLNetForTokenClassification (line 1189) | class XLNetForTokenClassification(XLNetPreTrainedModel):
    method __init__ (line 1190) | def __init__(self, config):
    method forward (line 1200) | def forward(
  class XLNetForMultipleChoice (line 1298) | class XLNetForMultipleChoice(XLNetPreTrainedModel):
    method __init__ (line 1299) | def __init__(self, config):
    method forward (line 1309) | def forward(
  class XLNetForQuestionAnsweringSimple (line 1411) | class XLNetForQuestionAnsweringSimple(XLNetPreTrainedModel):
    method __init__ (line 1412) | def __init__(self, config):
    method forward (line 1422) | def forward(
  class XLNetForQuestionAnswering (line 1534) | class XLNetForQuestionAnswering(XLNetPreTrainedModel):
    method __init__ (line 1535) | def __init__(self, config):
    method forward (line 1548) | def forward(

FILE: code/bert-base-count5/pretrain/transformers1/optimization.py
  function get_constant_schedule (line 28) | def get_constant_schedule(optimizer, last_epoch=-1):
  function get_constant_schedule_with_warmup (line 34) | def get_constant_schedule_with_warmup(optimizer, num_warmup_steps, last_...
  function get_linear_schedule_with_warmup (line 47) | def get_linear_schedule_with_warmup(optimizer, num_warmup_steps, num_tra...
  function get_cosine_schedule_with_warmup (line 62) | def get_cosine_schedule_with_warmup(optimizer, num_warmup_steps, num_tra...
  function get_cosine_with_hard_restarts_schedule_with_warmup (line 77) | def get_cosine_with_hard_restarts_schedule_with_warmup(
  class AdamW (line 96) | class AdamW(Optimizer):
    method __init__ (line 107) | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-6, weig...
    method step (line 119) | def step(self, closure=None):

FILE: code/bert-base-count5/pretrain/transformers1/optimization_tf.py
  class WarmUp (line 23) | class WarmUp(tf.keras.optimizers.schedules.LearningRateSchedule):
    method __init__ (line 26) | def __init__(
    method __call__ (line 36) | def __call__(self, step):
    method get_config (line 51) | def get_config(self):
  function create_optimizer (line 61) | def create_optimizer(init_lr, num_train_steps, num_warmup_steps, end_lr=...
  class AdamWeightDecay (line 84) | class AdamWeightDecay(tf.keras.optimizers.Adam):
    method __init__ (line 94) | def __init__(
    method from_config (line 113) | def from_config(cls, config):
    method _prepare_local (line 118) | def _prepare_local(self, var_device, var_dtype, apply_state):
    method _decay_weights_op (line 124) | def _decay_weights_op(self, var, learning_rate, apply_state):
    method apply_gradients (line 133) | def apply_gradients(self, grads_and_vars, name=None):
    method _get_lr (line 137) | def _get_lr(self, var_device, var_dtype, apply_state):
    method _resource_apply_dense (line 150) | def _resource_apply_dense(self, grad, var, apply_state=None):
    method _resource_apply_sparse (line 156) | def _resource_apply_sparse(self, grad, var, indices, apply_state=None):
    method get_config (line 162) | def get_config(self):
    method _do_use_weight_decay (line 167) | def _do_use_weight_decay(self, param_name):
  class GradientAccumulator (line 185) | class GradientAccumulator(object):
    method __init__ (line 197) | def __init__(self):
    method step (line 203) | def step(self):
    method gradients (line 216) | def gradients(self):
    method __call__ (line 222) | def __call__(self, gradients):
    method reset (line 248) | def reset(self):

FILE: code/bert-base-count5/pretrain/transformers1/pipelines.py
  function get_framework (line 69) | def get_framework(model=None):
  class ArgumentHandler (line 89) | class ArgumentHandler(ABC):
    method __call__ (line 95) | def __call__(self, *args, **kwargs):
  class DefaultArgumentHandler (line 99) | class DefaultArgumentHandler(ArgumentHandler):
    method handle_kwargs (line 105) | def handle_kwargs(kwargs: Dict) -> List:
    method handle_args (line 114) | def handle_args(args: Sequence[Any]) -> List[str]:
    method __call__ (line 140) | def __call__(self, *args, **kwargs):
  class PipelineDataFormat (line 150) | class PipelineDataFormat:
    method __init__ (line 164) | def __init__(
    method __iter__ (line 184) | def __iter__(self):
    method save (line 188) | def save(self, data: dict):
    method save_binary (line 196) | def save_binary(self, data: Union[dict, List[dict]]) -> str:
    method from_str (line 211) | def from_str(
  class CsvPipelineDataFormat (line 224) | class CsvPipelineDataFormat(PipelineDataFormat):
    method __init__ (line 225) | def __init__(
    method __iter__ (line 230) | def __iter__(self):
    method save (line 239) | def save(self, data: List[dict]):
  class JsonPipelineDataFormat (line 247) | class JsonPipelineDataFormat(PipelineDataFormat):
    method __init__ (line 248) | def __init__(
    method __iter__ (line 256) | def __iter__(self):
    method save (line 263) | def save(self, data: dict):
  class PipedPipelineDataFormat (line 268) | class PipedPipelineDataFormat(PipelineDataFormat):
    method __iter__ (line 276) | def __iter__(self):
    method save (line 292) | def save(self, data: dict):
    method save_binary (line 295) | def save_binary(self, data: Union[dict, List[dict]]) -> str:
  class _ScikitCompat (line 305) | class _ScikitCompat(ABC):
    method transform (line 311) | def transform(self, X):
    method predict (line 315) | def predict(self, X):
  class Pipeline (line 319) | class Pipeline(_ScikitCompat):
    method __init__ (line 370) | def __init__(
    method save_pretrained (line 402) | def save_pretrained(self, save_directory):
    method transform (line 415) | def transform(self, X):
    method predict (line 421) | def predict(self, X):
    method device_placement (line 428) | def device_placement(self):
    method ensure_tensor_on_device (line 449) | def ensure_tensor_on_device(self, **inputs):
    method _parse_and_tokenize (line 457) | def _parse_and_tokenize(self, *args, pad_to_max_length=True, add_speci...
    method __call__ (line 472) | def __call__(self, *args, **kwargs):
    method _forward (line 476) | def _forward(self, inputs, return_tensors=False):
  class FeatureExtractionPipeline (line 501) | class FeatureExtractionPipeline(Pipeline):
    method __init__ (line 537) | def __init__(
    method __call__ (line 558) | def __call__(self, *args, **kwargs):
  class TextGenerationPipeline (line 562) | class TextGenerationPipeline(Pipeline):
    method __call__ (line 606) | def __call__(
  class TextClassificationPipeline (line 683) | class TextClassificationPipeline(Pipeline):
    method __call__ (line 720) | def __call__(self, *args, **kwargs):
  class FillMaskPipeline (line 726) | class FillMaskPipeline(Pipeline):
    method __init__ (line 764) | def __init__(
    method __call__ (line 788) | def __call__(self, *args, **kwargs):
  class NerPipeline (line 826) | class NerPipeline(Pipeline):
    method __init__ (line 865) | def __init__(
    method __call__ (line 893) | def __call__(self, *args, **kwargs):
    method group_entities (line 973) | def group_entities(self, entities):
  class QuestionAnsweringArgumentHandler (line 993) | class QuestionAnsweringArgumentHandler(ArgumentHandler):
    method __call__ (line 1002) | def __call__(self, *args, **kwargs):
  class QuestionAnsweringPipeline (line 1055) | class QuestionAnsweringPipeline(Pipeline):
    method __init__ (line 1094) | def __init__(
    method create_sample (line 1116) | def create_sample(
    method __call__ (line 1135) | def __call__(self, *args, **kwargs):
    method decode (line 1240) | def decode(self, start: np.ndarray, end: np.ndarray, topk: int, max_an...
    method span_to_answer (line 1280) | def span_to_answer(self, text: str, start: int, end: int):
  class SummarizationPipeline (line 1325) | class SummarizationPipeline(Pipeline):
    method __call__ (line 1373) | def __call__(
  class TranslationPipeline (line 1462) | class TranslationPipeline(Pipeline):
    method __call__ (line 1501) | def __call__(
  function pipeline (line 1677) | def pipeline(

FILE: code/bert-base-count5/pretrain/transformers1/tokenization_albert.py
  class AlbertTokenizer (line 57) | class AlbertTokenizer(PreTrainedTokenizer):
    method __init__ (line 114) | def __init__(
    method vocab_size (line 158) | def vocab_size(self):
    method get_vocab (line 161) | def get_vocab(self):
    method __getstate__ (line 166) | def __getstate__(self):
    method __setstate__ (line 171) | def __setstate__(self, d):
    method preprocess_text (line 184) | def preprocess_text(self, inputs):
    method _tokenize (line 199) | def _tokenize(self, text, sample=False):
    method _convert_token_to_id (line 223) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 227) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 231) | def convert_tokens_to_string(self, tokens):
    method build_inputs_with_special_tokens (line 235) | def build_inputs_with_special_tokens(
    method get_special_tokens_mask (line 261) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 292) | def create_token_type_ids_from_sequences(
    method save_vocabulary (line 323) | def save_vocabulary(self, save_directory):

FILE: code/bert-base-count5/pretrain/transformers1/tokenization_auto.py
  class AutoTokenizer (line 94) | class AutoTokenizer:
    method __init__ (line 122) | def __init__(self):
    method from_pretrained (line 129) | def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwa...

FILE: code/bert-base-count5/pretrain/transformers1/tokenization_bart.py
  class BartTokenizer (line 36) | class BartTokenizer(RobertaTokenizer):
  class MBartTokenizer (line 49) | class MBartTokenizer(XLMRobertaTokenizer):

FILE: code/bert-base-count5/pretrain/transformers1/tokenization_bert.py
  function load_vocab (line 99) | def load_vocab(vocab_file):
  function whitespace_tokenize (line 110) | def whitespace_tokenize(text):
  class BertTokenizer (line 119) | class BertTokenizer(PreTrainedTokenizer):
    method __init__ (line 163) | def __init__(
    method vocab_size (line 201) | def vocab_size(self):
    method get_vocab (line 204) | def get_vocab(self):
    method _tokenize (line 207) | def _tokenize(self, text):
    method _convert_token_to_id (line 217) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 221) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 225) | def convert_tokens_to_string(self, tokens):
    method build_inputs_with_special_tokens (line 230) | def build_inputs_with_special_tokens(
    method get_special_tokens_mask (line 256) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 287) | def create_token_type_ids_from_sequences(
    method save_vocabulary (line 317) | def save_vocabulary(self, vocab_path):
  class BasicTokenizer (line 346) | class BasicTokenizer(object):
    method __init__ (line 349) | def __init__(self, do_lower_case=True, never_split=None, tokenize_chin...
    method tokenize (line 369) | def tokenize(self, text, never_split=None):
    method _run_strip_accents (line 400) | def _run_strip_accents(self, text):
    method _run_split_on_punc (line 411) | def _run_split_on_punc(self, text, never_split=None):
    method _tokenize_chinese_chars (line 433) | def _tokenize_chinese_chars(self, text):
    method _is_chinese_char (line 446) | def _is_chinese_char(self, cp):
    method _clean_text (line 470) | def _clean_text(self, text):
  class WordpieceTokenizer (line 484) | class WordpieceTokenizer(object):
    method __init__ (line 487) | def __init__(self, vocab, unk_token, max_input_chars_per_word=100):
    method tokenize (line 492) | def tokenize(self, text):
  function _is_whitespace (line 544) | def _is_whitespace(char):
  function _is_control (line 556) | def _is_control(char):
  function _is_punctuation (line 568) | def _is_punctuation(char):
  class BertTokenizerFast (line 583) | class BertTokenizerFast(PreTrainedTokenizerFast):
    method __init__ (line 631) | def __init__(
    method build_inputs_with_special_tokens (line 668) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No...
    method create_token_type_ids_from_sequences (line 676) | def create_token_type_ids_from_sequences(

FILE: code/bert-base-count5/pretrain/transformers1/tokenization_bert_japanese.py
  class BertJapaneseTokenizer (line 71) | class BertJapaneseTokenizer(BertTokenizer):
    method __init__ (line 79) | def __init__(
    method _tokenize (line 153) | def _tokenize(self, text):
  class MecabTokenizer (line 167) | class MecabTokenizer:
    method __init__ (line 170) | def __init__(self, do_lower_case=False, never_split=None, normalize_te...
    method tokenize (line 192) | def tokenize(self, text, never_split=None, **kwargs):
  class CharacterTokenizer (line 219) | class CharacterTokenizer(object):
    method __init__ (line 222) | def __init__(self, vocab, unk_token, normalize_text=True):
    method tokenize (line 237) | def tokenize(self, text):

FILE: code/bert-base-count5/pretrain/transformers1/tokenization_camembert.py
  class CamembertTokenizer (line 51) | class CamembertTokenizer(PreTrainedTokenizer):
    method __init__ (line 107) | def __init__(
    method build_inputs_with_special_tokens (line 142) | def build_inputs_with_special_tokens(
    method get_special_tokens_mask (line 169) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 199) | def create_token_type_ids_from_sequences(
    method vocab_size (line 224) | def vocab_size(self):
    method _tokenize (line 227) | def _tokenize(self, text):
    method _convert_token_to_id (line 230) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 239) | def _convert_id_to_token(self, index):
    method __getstate__ (line 245) | def __getstate__(self):
    method __setstate__ (line 250) | def __setstate__(self, d):
    method convert_tokens_to_string (line 263) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 268) | def save_vocabulary(self, save_directory):

FILE: code/bert-base-count5/pretrain/transformers1/tokenization_ctrl.py
  function get_pairs (line 102) | def get_pairs(word):
  class CTRLTokenizer (line 117) | class CTRLTokenizer(PreTrainedTokenizer):
    method __init__ (line 141) | def __init__(self, vocab_file, merges_file, unk_token="<unk>", **kwargs):
    method vocab_size (line 154) | def vocab_size(self):
    method get_vocab (line 157) | def get_vocab(self):
    method bpe (line 160) | def bpe(self, token):
    method _tokenize (line 204) | def _tokenize(self, text):
    method _convert_token_to_id (line 215) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 219) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 223) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 228) | def save_vocabulary(self, save_directory):

FILE: code/bert-base-count5/pretrain/transformers1/tokenization_distilbert.py
  class DistilBertTokenizer (line 58) | class DistilBertTokenizer(BertTokenizer):
  class DistilBertTokenizerFast (line 76) | class DistilBertTokenizerFast(BertTokenizerFast):

FILE: code/bert-base-count5/pretrain/transformers1/tokenization_electra.py
  class ElectraTokenizer (line 52) | class ElectraTokenizer(BertTokenizer):
  class ElectraTokenizerFast (line 68) | class ElectraTokenizerFast(BertTokenizerFast):

FILE: code/bert-base-count5/pretrain/transformers1/tokenization_flaubert.py
  function convert_to_unicode (line 63) | def convert_to_unicode(text):
  class FlaubertTokenizer (line 79) | class FlaubertTokenizer(XLMTokenizer):
    method __init__ (line 98) | def __init__(self, do_lowercase=False, **kwargs):
    method preprocess_text (line 103) | def preprocess_text(self, text):
    method _tokenize (line 113) | def _tokenize(self, text, bypass_tokenizer=False):

FILE: code/bert-base-count5/pretrain/transformers1/tokenization_gpt2.py
  function bytes_to_unicode (line 63) | def bytes_to_unicode():
  function get_pairs (line 88) | def get_pairs(word):
  class GPT2Tokenizer (line 101) | class GPT2Tokenizer(PreTrainedTokenizer):
    method __init__ (line 139) | def __init__(
    method vocab_size (line 167) | def vocab_size(self):
    method get_vocab (line 170) | def get_vocab(self):
    method bpe (line 173) | def bpe(self, token):
    method _tokenize (line 215) | def _tokenize(self, text):
    method _convert_token_to_id (line 225) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 229) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 233) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 239) | def save_vocabulary(self, save_directory):
    method prepare_for_tokenization (line 274) | def prepare_for_tokenization(self, text, **kwargs):
  class GPT2TokenizerFast (line 280) | class GPT2TokenizerFast(PreTrainedTokenizerFast):
    method __init__ (line 326) | def __init__(

FILE: code/bert-base-count5/pretrain/transformers1/tokenization_longformer.py
  class LongformerTokenizer (line 45) | class LongformerTokenizer(RobertaTokenizer):
  class LongformerTokenizerFast (line 54) | class LongformerTokenizerFast(RobertaTokenizerFast):

FILE: code/bert-base-count5/pretrain/transformers1/tokenization_marian.py
  class MarianTokenizer (line 28) | class MarianTokenizer(PreTrainedTokenizer):
    method __init__ (line 49) | def __init__(
    method _setup_normalizer (line 91) | def _setup_normalizer(self):
    method normalize (line 100) | def normalize(self, x: str) -> str:
    method _convert_token_to_id (line 104) | def _convert_token_to_id(self, token):
    method remove_language_code (line 107) | def remove_language_code(self, text: str):
    method _tokenize (line 113) | def _tokenize(self, text: str) -> List[str]:
    method _convert_id_to_token (line 118) | def _convert_id_to_token(self, index: int) -> str:
    method convert_tokens_to_string (line 122) | def convert_tokens_to_string(self, tokens: List[str]) -> str:
    method build_inputs_with_special_tokens (line 126) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No...
    method prepare_translation_batch (line 133) | def prepare_translation_batch(
    method vocab_size (line 182) | def vocab_size(self) -> int:
    method save_vocabulary (line 185) | def save_vocabulary(self, save_directory: str) -> Tuple[str]:
    method get_vocab (line 197) | def get_vocab(self) -> Dict:
    method __getstate__ (line 202) | def __getstate__(self) -> Dict:
    method __setstate__ (line 207) | def __setstate__(self, d: Dict) -> None:
    method num_special_tokens_to_add (line 213) | def num_special_tokens_to_add(self, **unused):
    method _special_token_mask (line 217) | def _special_token_mask(self, seq):
    method get_special_tokens_mask (line 222) | def get_special_tokens_mask(
  function load_spm (line 234) | def load_spm(path: str) -> sentencepiece.SentencePieceProcessor:
  function save_json (line 240) | def save_json(data, path: str) -> None:
  function load_json (line 245) | def load_json(path: str) -> Union[Dict, List]:

FILE: code/bert-base-count5/pretrain/transformers1/tokenization_openai.py
  function get_pairs (line 46) | def get_pairs(word):
  function text_standardize (line 59) | def text_standardize(text):
  class OpenAIGPTTokenizer (line 75) | class OpenAIGPTTokenizer(PreTrainedTokenizer):
    method __init__ (line 99) | def __init__(self, vocab_file, merges_file, unk_token="<unk>", **kwargs):
    method vocab_size (line 124) | def vocab_size(self):
    method get_vocab (line 127) | def get_vocab(self):
    method bpe (line 130) | def bpe(self, token):
    method _tokenize (line 174) | def _tokenize(self, text):
    method _convert_token_to_id (line 189) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 193) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 197) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 202) | def save_vocabulary(self, save_directory):
  class OpenAIGPTTokenizerFast (line 238) | class OpenAIGPTTokenizerFast(PreTrainedTokenizerFast):
    method __init__ (line 264) | def __init__(self, vocab_file, merges_file, unk_token="<unk>", **kwargs):

FILE: code/bert-base-count5/pretrain/transformers1/tokenization_reformer.py
  class ReformerTokenizer (line 54) | class ReformerTokenizer(PreTrainedTokenizer):
    method __init__ (line 85) | def __init__(
    method vocab_size (line 117) | def vocab_size(self):
    method get_vocab (line 120) | def get_vocab(self):
    method __getstate__ (line 125) | def __getstate__(self):
    method __setstate__ (line 130) | def __setstate__(self, d):
    method _tokenize (line 143) | def _tokenize(self, text, sample=False):
    method _convert_token_to_id (line 152) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 156) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 162) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 167) | def save_vocabulary(self, save_directory):

FILE: code/bert-base-count5/pretrain/transformers1/tokenization_roberta.py
  class RobertaTokenizer (line 64) | class RobertaTokenizer(GPT2Tokenizer):
    method __init__ (line 126) | def __init__(
    method build_inputs_with_special_tokens (line 154) | def build_inputs_with_special_tokens(
    method get_special_tokens_mask (line 180) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 210) | def create_token_type_ids_from_sequences(
    method prepare_for_tokenization (line 234) | def prepare_for_tokenization(self, text, add_special_tokens=False, **k...
  class RobertaTokenizerFast (line 244) | class RobertaTokenizerFast(GPT2TokenizerFast):
    method __init__ (line 291) | def __init__(
    method mask_token (line 333) | def mask_token(self, value):
    method build_inputs_with_special_tokens (line 340) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No...
    method create_token_type_ids_from_sequences (line 347) | def create_token_type_ids_from_sequences(

FILE: code/bert-base-count5/pretrain/transformers1/tokenization_t5.py
  class T5Tokenizer (line 62) | class T5Tokenizer(PreTrainedTokenizer):
    method __init__ (line 98) | def __init__(
    method vocab_size (line 139) | def vocab_size(self):
    method get_vocab (line 142) | def get_vocab(self):
    method __getstate__ (line 147) | def __getstate__(self):
    method __setstate__ (line 152) | def __setstate__(self, d):
    method _tokenize (line 165) | def _tokenize(self, text, sample=False):
    method _convert_token_to_id (line 174) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 182) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 190) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 195) | def save_vocabulary(self, save_directory):

FILE: code/bert-base-count5/pretrain/transformers1/tokenization_transfo_xl.py
  class TransfoXLTokenizer (line 72) | class TransfoXLTokenizer(PreTrainedTokenizer):
    method __init__ (line 85) | def __init__(
    method _compile_space_around_punctuation_pattern (line 141) | def _compile_space_around_punctuation_pattern(self):
    method count_file (line 146) | def count_file(self, path, verbose=False, add_eos=False):
    method count_sents (line 162) | def count_sents(self, sents, verbose=False):
    method _build_from_file (line 173) | def _build_from_file(self, vocab_file):
    method save_vocabulary (line 188) | def save_vocabulary(self, vocab_path):
    method build_vocab (line 212) | def build_vocab(self):
    method encode_file (line 232) | def encode_file(self, path, ordered=False, verbose=False, add_eos=True...
    method encode_sents (line 249) | def encode_sents(self, sents, ordered=False, verbose=False):
    method add_special (line 263) | def add_special(self, sym):
    method add_symbol (line 269) | def add_symbol(self, sym):
    method _convert_id_to_token (line 274) | def _convert_id_to_token(self, idx):
    method _convert_token_to_id (line 279) | def _convert_token_to_id(self, sym):
    method convert_tokens_to_string (line 296) | def convert_tokens_to_string(self, tokens):
    method convert_to_tensor (line 301) | def convert_to_tensor(self, symbols):
    method vocab_size (line 305) | def vocab_size(self):
    method get_vocab (line 308) | def get_vocab(self):
    method _tokenize (line 311) | def _tokenize(self, line, add_eos=False, add_double_eos=False):
    method prepare_for_tokenization (line 330) | def prepare_for_tokenization(self, text, **kwargs):
  class _TransfoXLDelimiterLookupTokenizer (line 344) | class _TransfoXLDelimiterLookupTokenizer(BaseTokenizer):
    method __init__ (line 345) | def __init__(
  class TransfoXLTokenizerFast (line 405) | class TransfoXLTokenizerFast(PreTrainedTokenizerFast):
    method __init__ (line 422) | def __init__(
    method save_pretrained (line 458) | def save_pretrained(self, save_directory):
  class LMOrderedIterator (line 467) | class LMOrderedIterator(object):
    method __init__ (line 468) | def __init__(self, data, bsz, bptt, device="cpu", ext_len=None):
    method get_batch (line 490) | def get_batch(self, i, bptt=None):
    method get_fixlen_iter (line 506) | def get_fixlen_iter(self, start=0):
    method get_varlen_iter (line 510) | def get_varlen_iter(self, start=0, std=5, min_len=5, max_deviation=3):
    method __iter__ (line 522) | def __iter__(self):
  class LMShuffledIterator (line 526) | class LMShuffledIterator(object):
    method __init__ (line 527) | def __init__(self, data, bsz, bptt, device="cpu", ext_len=None, shuffl...
    method get_sent_stream (line 540) | def get_sent_stream(self):
    method stream_iterator (line 548) | def stream_iterator(self, sent_stream):
    method __iter__ (line 595) | def __iter__(self):
  class LMMultiFileIterator (line 603) | class LMMultiFileIterator(LMShuffledIterator):
    method __init__ (line 604) | def __init__(self, paths, vocab, bsz, bptt, device="cpu", ext_len=None...
    method get_sent_stream (line 616) | def get_sent_stream(self, path):
    method __iter__ (line 624) | def __iter__(self):
  class TransfoXLCorpus (line 635) | class TransfoXLCorpus(object):
    method from_pretrained (line 637) | def from_pretrained(cls, pretrained_model_name_or_path, cache_dir=None...
    method __init__ (line 680) | def __init__(self, *args, **kwargs):
    method build_corpus (line 687) | def build_corpus(self, path, dataset):
    method get_iterator (line 721) | def get_iterator(self, split, *args, **kwargs):
  function get_lm_corpus (line 738) | def get_lm_corpus(datadir, dataset):

FILE: code/bert-base-count5/pretrain/transformers1/tokenization_utils.py
  class CharSpan (line 61) | class CharSpan(NamedTuple):
  class TokenSpan (line 73) | class TokenSpan(NamedTuple):
  function flatten (line 85) | def flatten(x: Sequence):
  function truncate_and_pad (line 100) | def truncate_and_pad(
  class BatchEncoding (line 164) | class BatchEncoding(UserDict):
    method __init__ (line 177) | def __init__(
    method __getitem__ (line 189) | def __getitem__(self, item: Union[int, str]) -> EncodingFast:
    method __getattr__ (line 203) | def __getattr__(self, item: str):
    method keys (line 206) | def keys(self):
    method values (line 209) | def values(self):
    method items (line 212) | def items(self):
    method encodings (line 220) | def encodings(self) -> Optional[List[EncodingFast]]:
    method tokens (line 228) | def tokens(self, batch_index: int = 0) -> List[int]:
    method words (line 233) | def words(self, batch_index: int = 0) -> List[Optional[int]]:
    method token_to_word (line 238) | def token_to_word(self, batch_or_token_index: int, token_index: Option...
    method word_to_tokens (line 277) | def word_to_tokens(self, batch_or_word_index: int, word_index: Optiona...
    method token_to_chars (line 322) | def token_to_chars(self, batch_or_token_index: int, token_index: Optio...
    method char_to_token (line 359) | def char_to_token(self, batch_or_char_index: int, char_index: Optional...
    method word_to_chars (line 394) | def word_to_chars(self, batch_or_word_index: int, word_index: Optional...
    method char_to_word (line 431) | def char_to_word(self, batch_or_char_index: int, char_index: Optional[...
    method to (line 467) | def to(self, device: str):
  class SpecialTokensMixin (line 473) | class SpecialTokensMixin:
    method __init__ (line 491) | def __init__(self, **kwargs):
    method bos_token (line 517) | def bos_token(self):
    method eos_token (line 524) | def eos_token(self):
    method unk_token (line 531) | def unk_token(self):
    method sep_token (line 538) | def sep_token(self):
    method pad_token (line 545) | def pad_token(self):
    method cls_token (line 552) | def cls_token(self):
    method mask_token (line 559) | def mask_token(self):
    method additional_special_tokens (line 566) | def additional_special_tokens(self):
    method _maybe_update_backend (line 572) | def _maybe_update_backend(self, value):
    method bos_token (line 577) | def bos_token(self, value):
    method eos_token (line 582) | def eos_token(self, value):
    method unk_token (line 587) | def unk_token(self, value):
    method sep_token (line 592) | def sep_token(self, value):
    method pad_token (line 597) | def pad_token(self, value):
    method cls_token (line 602) | def cls_token(self, value):
    method mask_token (line 607) | def mask_token(self, value):
    method additional_special_tokens (line 612) | def additional_special_tokens(self, value):
    method bos_token_id (line 617) | def bos_token_id(self):
    method eos_token_id (line 622) | def eos_token_id(self):
    method unk_token_id (line 627) | def unk_token_id(self):
    method sep_token_id (line 632) | def sep_token_id(self):
    method pad_token_id (line 637) | def pad_token_id(self):
    method pad_token_type_id (line 642) | def pad_token_type_id(self):
    method cls_token_id (line 647) | def cls_token_id(self):
    method mask_token_id (line 652) | def mask_token_id(self):
    method additional_special_tokens_ids (line 657) | def additional_special_tokens_ids(self):
    method special_tokens_map (line 662) | def special_tokens_map(self):
    method all_special_tokens (line 674) | def all_special_tokens(self):
    method all_special_ids (line 686) | def all_special_ids(self):
  class PreTrainedTokenizer (line 695) | class PreTrainedTokenizer(SpecialTokensMixin):
    method vocab_size (line 771) | def vocab_size(self) -> int:
    method is_fast (line 776) | def is_fast(self) -> bool:
    method max_len (line 780) | def max_len(self) -> int:
    method max_len_single_sentence (line 787) | def max_len_single_sentence(self) -> int:
    method max_len_sentences_pair (line 791) | def max_len_sentences_pair(self) -> int:
    method max_len_single_sentence (line 795) | def max_len_single_sentence(self, value) -> int:
    method max_len_sentences_pair (line 807) | def max_len_sentences_pair(self, value) -> int:
    method get_vocab (line 818) | def get_vocab(self):
    method __init__ (line 822) | def __init__(self, model_max_length=None, **kwargs):
    method __len__ (line 854) | def __len__(self):
    method from_pretrained (line 859) | def from_pretrained(cls, *inputs, **kwargs):
    method _from_pretrained (line 914) | def _from_pretrained(cls, pretrained_model_name_or_path, *init_inputs,...
    method save_pretrained (line 1087) | def save_pretrained(self, save_directory):
    method save_vocabulary (line 1128) | def save_vocabulary(self, save_directory) -> Tuple[str]:
    method add_tokens (line 1138) | def add_tokens(self, new_tokens: Union[str, List[str]]) -> int:
    method num_special_tokens_to_add (line 1187) | def num_special_tokens_to_add(self, pair=False):
    method add_special_tokens (line 1206) | def add_special_tokens(self, special_tokens_dict):
    method tokenize (line 1260) | def tokenize(self, text: TextInput, **kwargs):
    method _tokenize (line 1332) | def _tokenize(self, text, **kwargs):
    method convert_tokens_to_ids (line 1341) | def convert_tokens_to_ids(self, tokens):
    method _convert_token_to_id_with_added_voc (line 1356) | def _convert_token_to_id_with_added_voc(self, token):
    method _convert_token_to_id (line 1364) | def _convert_token_to_id(self, token):
    method encode (line 1367) | def encode(
    method encode_plus (line 1439) | def encode_plus(
    method batch_encode_plus (line 1594) | def batch_encode_plus(
    method convert_to_tensors_ (line 1789) | def convert_to_tensors_(self, batch_outputs: dict, return_tensors: str...
    method prepare_for_model (line 1818) | def prepare_for_model(
    method prepare_for_tokenization (line 2018) | def prepare_for_tokenization(self, text: str, **kwargs) -> str:
    method truncate_sequences (line 2022) | def truncate_sequences(
    method create_token_type_ids_from_sequences (line 2082) | def create_token_type_ids_from_sequences(self, token_ids_0: List, toke...
    method build_inputs_with_special_tokens (line 2087) | def build_inputs_with_special_tokens(self, token_ids_0: List, token_id...
    method get_special_tokens_mask (line 2096) | def get_special_tokens_mask(
    method convert_ids_to_tokens (line 2115) | def convert_ids_to_tokens(
    method _convert_id_to_token (line 2140) | def _convert_id_to_token(self, index: int) -> str:
    method convert_tokens_to_string (line 2143) | def convert_tokens_to_string(self, tokens: List[str]) -> str:
    method decode (line 2150) | def decode(
    method batch_decode (line 2190) | def batch_decode(self, sequences: List[List[int]], **kwargs) -> List[s...
    method clean_up_tokenization (line 2194) | def clean_up_tokenization(out_string: str) -> str:
  class PreTrainedTokenizerFast (line 2212) | class PreTrainedTokenizerFast(PreTrainedTokenizer):
    method __init__ (line 2270) | def __init__(self, tokenizer: BaseTokenizerFast, **kwargs):
    method backend_tokenizer (line 2281) | def backend_tokenizer(self) -> BaseTokenizerFast:
    method decoder (line 2285) | def decoder(self) -> DecoderFast:
    method is_fast (line 2289) | def is_fast(self) -> bool:
    method vocab_size (line 2293) | def vocab_size(self) -> int:
    method __len__ (line 2296) | def __len__(self) -> int:
    method _maybe_update_backend (line 2299) | def _maybe_update_backend(self, value):
    method _convert_encoding (line 2304) | def _convert_encoding(
    method _convert_token_to_id_with_added_voc (line 2360) | def _convert_token_to_id_with_added_voc(self, token: int) -> str:
    method _convert_id_to_token (line 2366) | def _convert_id_to_token(self, index: int) -> Optional[str]:
    method get_vocab (line 2369) | def get_vocab(self):
    method convert_tokens_to_string (line 2372) | def convert_tokens_to_string(self, tokens: List[int], skip_special_tok...
    method add_tokens (line 2375) | def add_tokens(self, new_tokens: List[Union[str, AddedTokenFast]]) -> ...
    method add_special_tokens (line 2402) | def add_special_tokens(self, special_tokens_dict: dict) -> int:
    method num_special_tokens_to_add (line 2421) | def num_special_tokens_to_add(self, pair: bool = False) -> int:
    method tokenize (line 2424) | def tokenize(
    method batch_encode_plus (line 2429) | def batch_encode_plus(
    method encode_plus (line 2567) | def encode_plus(
    method decode (line 2659) | def decode(
    method save_vocabulary (line 2670) | def save_vocabulary(self, save_directory: str) -> Tuple[str]:
  function trim_batch (line 2680) | def trim_batch(

FILE: code/bert-base-count5/pretrain/transformers1/tokenization_xlm.py
  function get_pairs (line 430) | def get_pairs(word):
  function lowercase_and_remove_accent (line 443) | def lowercase_and_remove_accent(text):
  function replace_unicode_punct (line 460) | def replace_unicode_punct(text):
  function remove_non_printing_char (line 503) | def remove_non_printing_char(text):
  function romanian_preprocessing (line 516) | def romanian_preprocessing(text):
  class XLMTokenizer (line 530) | class XLMTokenizer(PreTrainedTokenizer):
    method __init__ (line 594) | def __init__(
    method moses_punct_norm (line 656) | def moses_punct_norm(self, text, lang):
    method moses_tokenize (line 664) | def moses_tokenize(self, text, lang):
    method moses_pipeline (line 672) | def moses_pipeline(self, text, lang):
    method ja_tokenize (line 678) | def ja_tokenize(self, text):
    method vocab_size (line 699) | def vocab_size(self):
    method get_vocab (line 702) | def get_vocab(self):
    method bpe (line 705) | def bpe(self, token):
    method _tokenize (line 749) | def _tokenize(self, text, lang="en", bypass_tokenizer=False):
    method _convert_token_to_id (line 839) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 843) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 847) | def convert_tokens_to_string(self, tokens):
    method build_inputs_with_special_tokens (line 852) | def build_inputs_with_special_tokens(
    method get_special_tokens_mask (line 880) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 911) | def create_token_type_ids_from_sequences(
    method save_vocabulary (line 941) | def save_vocabulary(self, save_directory):

FILE: code/bert-base-count5/pretrain/transformers1/tokenization_xlm_roberta.py
  class XLMRobertaTokenizer (line 52) | class XLMRobertaTokenizer(PreTrainedTokenizer):
    method __init__ (line 108) | def __init__(
    method __getstate__ (line 159) | def __getstate__(self):
    method __setstate__ (line 164) | def __setstate__(self, d):
    method build_inputs_with_special_tokens (line 177) | def build_inputs_with_special_tokens(
    method get_special_tokens_mask (line 204) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 235) | def create_token_type_ids_from_sequences(
    method vocab_size (line 261) | def vocab_size(self):
    method get_vocab (line 264) | def get_vocab(self):
    method _tokenize (line 269) | def _tokenize(self, text):
    method _convert_token_to_id (line 272) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 281) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 287) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 292) | def save_vocabulary(self, save_directory):

FILE: code/bert-base-count5/pretrain/transformers1/tokenization_xlnet.py
  class XLNetTokenizer (line 53) | class XLNetTokenizer(PreTrainedTokenizer):
    method __init__ (line 113) | def __init__(
    method vocab_size (line 161) | def vocab_size(self):
    method get_vocab (line 164) | def get_vocab(self):
    method __getstate__ (line 169) | def __getstate__(self):
    method __setstate__ (line 174) | def __setstate__(self, d):
    method preprocess_text (line 187) | def preprocess_text(self, inputs):
    method _tokenize (line 202) | def _tokenize(self, text, sample=False):
    method _convert_token_to_id (line 226) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 230) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 234) | def convert_tokens_to_string(self, tokens):
    method build_inputs_with_special_tokens (line 239) | def build_inputs_with_special_tokens(
    method get_special_tokens_mask (line 265) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 296) | def create_token_type_ids_from_sequences(
    method save_vocabulary (line 324) | def save_vocabulary(self, save_directory):

FILE: code/bert-base-count5/pretrain/transformers1/trainer.py
  function is_apex_available (line 38) | def is_apex_available():
  function is_tensorboard_available (line 60) | def is_tensorboard_available():
  function is_wandb_available (line 77) | def is_wandb_available():
  function set_seed (line 84) | def set_seed(seed: int):
  function torch_distributed_zero_first (line 93) | def torch_distributed_zero_first(local_rank: int):
  class SequentialDistributedSampler (line 104) | class SequentialDistributedSampler(Sampler):
    method __init__ (line 116) | def __init__(self, dataset, num_replicas=None, rank=None):
    method __iter__ (line 131) | def __iter__(self):
    method __len__ (line 144) | def __len__(self):
  function get_tpu_sampler (line 148) | def get_tpu_sampler(dataset: Dataset):
  class Trainer (line 154) | class Trainer:
    method __init__ (line 171) | def __init__(
    method get_test_dataloader (line 222) | def get_test_dataloader(self, test_dataset: Dataset) -> DataLoader:
    method get_optimizers (line 242) | def get_optimizers(
    method _setup_wandb (line 273) | def _setup_wandb(self):
    method num_examples (line 297) | def num_examples(self, dataloader: DataLoader) -> int:
    method train (line 303) | def train(self, model_path: Optional[str] = None):
    method _log (line 510) | def _log(self, logs: Dict[str, float], iterator: Optional[tqdm] = None...
    method _training_step (line 524) | def _training_step(
    method is_local_master (line 547) | def is_local_master(self) -> bool:
    method is_world_master (line 553) | def is_world_master(self) -> bool:
    method save_model (line 563) | def save_model(self, output_dir: Optional[str] = None):
    method _save_tpu (line 576) | def _save_tpu(self, output_dir: Optional[str] = None):
    method _save (line 592) | def _save(self, output_dir: Optional[str] = None):
    method _sorted_checkpoints (line 605) | def _sorted_checkpoints(self, checkpoint_prefix=PREFIX_CHECKPOINT_DIR,...
    method _rotate_checkpoints (line 622) | def _rotate_checkpoints(self, use_mtime=False) -> None:
    method evaluate (line 641) | def evaluate(
    method predict (line 670) | def predict(self, test_dataset: Dataset) -> PredictionOutput:
    method _prediction_loop (line 681) | def _prediction_loop(
    method distributed_concat (line 771) | def distributed_concat(self, tensor: torch.Tensor, num_total_examples:...

FILE: code/bert-base-count5/pretrain/transformers1/trainer_tf.py
  class TFTrainer (line 20) | class TFTrainer:
    method __init__ (line 31) | def __init__(
    method _setup_training (line 50) | def _setup_training(self) -> None:
    method _set_loss_and_metric (line 67) | def _set_loss_and_metric(self) -> None:
    method _create_summary_writer (line 84) | def _create_summary_writer(self) -> None:
    method _prepare_dataset (line 90) | def _prepare_dataset(self) -> None:
    method _create_optimizer (line 122) | def _create_optimizer(self) -> None:
    method _create_checkpoint_manager (line 146) | def _create_checkpoint_manager(self, max_to_keep: int = 5, load_model:...
    method _evaluate_steps (line 162) | def _evaluate_steps(self, per_replica_features, per_replica_labels):
    method _prediction_loop (line 182) | def _prediction_loop(
    method evaluate (line 237) | def evaluate(
    method train (line 250) | def train(self) -> None:
    method _training_steps (line 317) | def _training_steps(self):
    method _apply_gradients (line 327) | def _apply_gradients(self):
    method _step (line 331) | def _step(self):
    method _accumulate_next_gradients (line 342) | def _accumulate_next_gradients(self):
    method _accumulate_gradients (line 358) | def _accumulate_gradients(self, per_replica_features, per_replica_labe...
    method _forward (line 371) | def _forward(self, features, labels):
    method _run_model (line 383) | def _run_model(self, features, labels, training):
    method predict (line 412) | def predict(self, test_dataset: tf.data.Dataset) -> PredictionOutput:
    method save_model (line 426) | def save_model(self) -> None:

FILE: code/bert-base-count5/pretrain/transformers1/trainer_utils.py
  class EvalPrediction (line 6) | class EvalPrediction(NamedTuple):
  class PredictionOutput (line 16) | class PredictionOutput(NamedTuple):
  class TrainOutput (line 22) | class TrainOutput(NamedTuple):

FILE: code/bert-base-count5/pretrain/transformers1/training_args.py
  function is_tpu_available (line 23) | def is_tpu_available():
  class TrainingArguments (line 31) | class TrainingArguments:
    method train_batch_size (line 138) | def train_batch_size(self) -> int:
    method eval_batch_size (line 148) | def eval_batch_size(self) -> int:
    method _setup_devices (line 159) | def _setup_devices(self) -> Tuple["torch.device", int]:
    method device (line 182) | def device(self) -> "torch.device":
    method n_gpu (line 187) | def n_gpu(self):
    method to_json_string (line 190) | def to_json_string(self):
    method to_sanitized_dict (line 196) | def to_sanitized_dict(self) -> Dict[str, Any]:

FILE: code/bert-base-count5/pretrain/transformers1/training_args_tf.py
  class TFTrainingArguments (line 16) | class TFTrainingArguments(TrainingArguments):
    method _setup_strategy (line 46) | def _setup_strategy(self) -> Tuple["tf.distribute.Strategy", int]:
    method strategy (line 80) | def strategy(self) -> "tf.distribute.Strategy":
    method n_gpu (line 85) | def n_gpu(self) -> int:

FILE: code/bert-base-count5/pretrain/transformers1/utils_encoder_decoder.py
  function prepare_encoder_decoder_model_kwargs (line 18) | def prepare_encoder_decoder_model_kwargs(**kwargs):

FILE: code/build_vocab.py
  function loadData (line 2) | def loadData(path):

FILE: code/main_fusion_thread.py
  function init_model (line 9) | def init_model(model_path, export_model_path, optimized_model_path, leng...
  function infer (line 92) | def infer(session,config,inp:Queue,res:Queue):
  function softmax (line 105) | def softmax(x, axis=1):
  class Config (line 120) | class Config:
    method __init__ (line 121) | def __init__(self):
  function tccapi (line 147) | def tccapi():

FILE: code/model.py
  class BertForClass (line 11) | class BertForClass(nn.Module):
    method __init__ (line 12) | def __init__(self, config):
    method forward (line 24) | def forward(self, input_ids, input_masks, segment_ids):
  class BertForClass_MultiDropout (line 37) | class BertForClass_MultiDropout(nn.Module):
    method __init__ (line 38) | def __init__(self, config):
    method forward (line 50) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastTwoCls (line 63) | class BertLastTwoCls(nn.Module):
    method __init__ (line 64) | def __init__(self, config):
    method forward (line 75) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastCls (line 83) | class BertLastCls(nn.Module):
    method __init__ (line 84) | def __init__(self, config):
    method forward (line 95) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastTwoClsPooler (line 108) | class BertLastTwoClsPooler(nn.Module):
    method __init__ (line 109) | def __init__(self, config):
    method forward (line 120) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastTwoEmbeddings (line 132) | class BertLastTwoEmbeddings(nn.Module):
    method __init__ (line 133) | def __init__(self, config):
    method forward (line 144) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastTwoEmbeddingsPooler (line 160) | class BertLastTwoEmbeddingsPooler(nn.Module):
    method __init__ (line 161) | def __init__(self, config):
    method forward (line 172) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastFourCls (line 187) | class BertLastFourCls(nn.Module):
    method __init__ (line 188) | def __init__(self, config):
    method forward (line 199) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastFourClsPooler (line 215) | class BertLastFourClsPooler(nn.Module):
    method __init__ (line 216) | def __init__(self, config):
    method forward (line 227) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastFourEmbeddings (line 239) | class BertLastFourEmbeddings(nn.Module):
    method __init__ (line 240) | def __init__(self, config):
    method forward (line 251) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastFourEmbeddingsPooler (line 268) | class BertLastFourEmbeddingsPooler(nn.Module):
    method __init__ (line 269) | def __init__(self, config):
    method forward (line 280) | def forward(self, input_ids, input_masks, segment_ids):
  class BertDynCls (line 296) | class BertDynCls(nn.Module):
    method __init__ (line 297) | def __init__(self, config):
    method forward (line 311) | def forward(self, input_ids, input_masks, segment_ids):
  class BertDynEmbeddings (line 343) | class BertDynEmbeddings(nn.Module):
    method __init__ (line 344) | def __init__(self, config):
    method forward (line 358) | def forward(self, input_ids, input_masks, segment_ids):
  class BertRNN (line 392) | class BertRNN(nn.Module):
    method __init__ (line 394) | def __init__(self, config):
    method forward (line 434) | def forward(self, input_ids, input_masks, segment_ids):
  class BertCNN (line 459) | class BertCNN(nn.Module):
    method __init__ (line 461) | def __init__(self, config):
    method conv_and_pool (line 480) | def conv_and_pool(self, x, conv):
    method forward (line 485) | def forward(self, input_ids, input_masks, segment_ids):
  class BertRCNN (line 497) | class BertRCNN(nn.Module):
    method __init__ (line 498) | def __init__(self, config):
    method forward (line 540) | def forward(self, input_ids, input_masks, segment_ids):
  class XLNet (line 564) | class XLNet(nn.Module):
    method __init__ (line 566) | def __init__(self, config):
    method forward (line 574) | def forward(self, input_ids, input_masks, segment_ids):
  class ElectraClassificationHead (line 584) | class ElectraClassificationHead(nn.Module):
    method __init__ (line 587) | def __init__(self, config):
    method forward (line 593) | def forward(self, features, **kwargs):
  class Electra (line 602) | class Electra(nn.Module):
    method __init__ (line 604) | def __init__(self, config):
    method forward (line 613) | def forward(self, input_ids, input_masks, segment_ids):
  class NEZHA (line 621) | class NEZHA(nn.Module):
    method __init__ (line 622) | def __init__(self, config):
    method forward (line 635) | def forward(self, input_ids, input_masks, segment_ids):

FILE: code/nezha-base-count3/finetuning/NEZHA/configuration_nezha.py
  class NeZhaConfig (line 6) | class NeZhaConfig(PretrainedConfig):
    method __init__ (line 82) | def __init__(

FILE: code/nezha-base-count3/finetuning/NEZHA/modeling_nezha.py
  function load_tf_weights_in_bert (line 48) | def load_tf_weights_in_bert(model, config, tf_checkpoint_path):
  class BertEmbeddings (line 122) | class BertEmbeddings(nn.Module):
    method __init__ (line 125) | def __init__(self, config):
    method forward (line 134) | def forward(self, input_ids=None, token_type_ids=None, inputs_embeds=N...
  function relative_position_encoding (line 151) | def relative_position_encoding(depth, max_length=512, max_relative_posit...
  class BertSelfAttention (line 175) | class BertSelfAttention(nn.Module):
    method __init__ (line 176) | def __init__(self, config):
    method transpose_for_scores (line 200) | def transpose_for_scores(self, x):
    method forward (line 205) | def forward(
  class BertSelfOutput (line 308) | class BertSelfOutput(nn.Module):
    method __init__ (line 309) | def __init__(self, config):
    method forward (line 315) | def forward(self, hidden_states, input_tensor):
  class BertAttention (line 322) | class BertAttention(nn.Module):
    method __init__ (line 323) | def __init__(self, config):
    method prune_heads (line 329) | def prune_heads(self, heads):
    method forward (line 347) | def forward(
  class BertIntermediate (line 373) | class BertIntermediate(nn.Module):
    method __init__ (line 374) | def __init__(self, config):
    method forward (line 382) | def forward(self, hidden_states):
  class BertOutput (line 388) | class BertOutput(nn.Module):
    method __init__ (line 389) | def __init__(self, config):
    method forward (line 395) | def forward(self, hidden_states, input_tensor):
  class BertLayer (line 402) | class BertLayer(nn.Module):
    method __init__ (line 403) | def __init__(self, config):
    method forward (line 416) | def forward(
    method feed_forward_chunk (line 481) | def feed_forward_chunk(self, attention_output):
  class NeZhaEncoder (line 487) | class NeZhaEncoder(nn.Module):
    method __init__ (line 488) | def __init__(self, config):
    method forward (line 495) | def forward(
  class BertPooler (line 588) | class BertPooler(nn.Module):
    method __init__ (line 589) | def __init__(self, config):
    method forward (line 594) | def forward(self, hidden_states):
  class BertPredictionHeadTransform (line 603) | class BertPredictionHeadTransform(nn.Module):
    method __init__ (line 604) | def __init__(self, config):
    method forward (line 613) | def forward(self, hidden_states):
  class BertLMPredictionHead (line 620) | class BertLMPredictionHead(nn.Module):
    method __init__ (line 621) | def __init__(self, config):
    method forward (line 634) | def forward(self, hidden_states):
  class BertOnlyMLMHead (line 640) | class BertOnlyMLMHead(nn.Module):
    method __init__ (line 641) | def __init__(self, config):
    method forward (line 645) | def forward(self, sequence_output):
  class BertOnlyNSPHead (line 650) | class BertOnlyNSPHead(nn.Module):
    method __init__ (line 651) | def __init__(self, config):
    method forward (line 655) | def forward(self, pooled_output):
  class BertPreTrainingHeads (line 660) | class BertPreTrainingHeads(nn.Module):
    method __init__ (line 661) | def __init__(self, config):
    method forward (line 666) | def forward(self, sequence_output, pooled_output):
  class BertPreTrainedModel (line 672) | class BertPreTrainedModel(PreTrainedModel):
    method _init_weights (line 682) | def _init_weights(self, module):
  class BertForPreTrainingOutput (line 700) | class BertForPreTrainingOutput(ModelOutput):
  class NeZhaModel (line 805) | class NeZhaModel(BertPreTrainedModel):
    method __init__ (line 819) | def __init__(self, config, add_pooling_layer=True):
    method get_input_embeddings (line 830) | def get_input_embeddings(self):
    method set_input_embeddings (line 833) | def set_input_embeddings(self, value):
    method _prune_heads (line 836) | def _prune_heads(self, heads_to_prune):
    method forward (line 851) | def forward(
  class BertForPreTraining (line 982) | class BertForPreTraining(BertPreTrainedModel):
    method __init__ (line 983) | def __init__(self, config):
    method get_output_embeddings (line 991) | def get_output_embeddings(self):
    method set_output_embeddings (line 994) | def set_output_embeddings(self, new_embeddings):
    method forward (line 999) | def forward(
  class BertLMHeadModel (line 1083) | class BertLMHeadModel(BertPreTrainedModel):
    method __init__ (line 1088) | def __init__(self, config):
    method get_output_embeddings (line 1099) | def get_output_embeddings(self):
    method set_output_embeddings (line 1102) | def set_output_embeddings(self, new_embeddings):
    method forward (line 1107) | def forward(
    method prepare_inputs_for_generation (line 1209) | def prepare_inputs_for_generation(self, input_ids, past=None, attentio...
    method _reorder_cache (line 1221) | def _reorder_cache(self, past, beam_idx):
  class NeZhaForMaskedLM (line 1229) | class NeZhaForMaskedLM(BertPreTrainedModel):
    method __init__ (line 1234) | def __init__(self, config):
    method get_output_embeddings (line 1248) | def get_output_embeddings(self):
    method set_output_embeddings (line 1251) | def set_output_embeddings(self, new_embeddings):
    method forward (line 1261) | def forward(
    method prepare_inputs_for_generation (line 1318) | def prepare_inputs_for_generation(self, input_ids, attention_mask=None...
  class BertForNextSentencePrediction (line 1337) | class BertForNextSentencePrediction(BertPreTrainedModel):
    method __init__ (line 1338) | def __init__(self, config):
    method forward (line 1348) | def forward(
  class BertForSequenceClassification (line 1438) | class BertForSequenceClassification(BertPreTrainedModel):
    method __init__ (line 1439) | def __init__(self, config):
    method forward (line 1456) | def forward(
  class BertForMultipleChoice (line 1523) | class BertForMultipleChoice(BertPreTrainedModel):
    method __init__ (line 1524) | def __init__(self, config):
    method forward (line 1540) | def forward(
  class BertForTokenClassification (line 1613) | class BertForTokenClassification(BertPreTrainedModel):
    method __init__ (line 1617) | def __init__(self, config):
    method forward (line 1634) | def forward(
  class BertForQuestionAnswering (line 1704) | class BertForQuestionAnswering(BertPreTrainedModel):
    method __init__ (line 1708) | def __init__(self, config):
    method forward (line 1724) | def forward(

FILE: code/nezha-base-count3/finetuning/model.py
  class BertForClass (line 11) | class BertForClass(nn.Module):
    method __init__ (line 12) | def __init__(self, config):
    method forward (line 24) | def forward(self, input_ids, input_masks, segment_ids):
  class BertForClass_MultiDropout (line 37) | class BertForClass_MultiDropout(nn.Module):
    method __init__ (line 38) | def __init__(self, config):
    method forward (line 50) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastTwoCls (line 63) | class BertLastTwoCls(nn.Module):
    method __init__ (line 64) | def __init__(self, config):
    method forward (line 75) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastCls (line 83) | class BertLastCls(nn.Module):
    method __init__ (line 84) | def __init__(self, config):
    method forward (line 95) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastTwoClsPooler (line 108) | class BertLastTwoClsPooler(nn.Module):
    method __init__ (line 109) | def __init__(self, config):
    method forward (line 120) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastTwoEmbeddings (line 132) | class BertLastTwoEmbeddings(nn.Module):
    method __init__ (line 133) | def __init__(self, config):
    method forward (line 144) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastTwoEmbeddingsPooler (line 160) | class BertLastTwoEmbeddingsPooler(nn.Module):
    method __init__ (line 161) | def __init__(self, config):
    method forward (line 172) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastFourCls (line 187) | class BertLastFourCls(nn.Module):
    method __init__ (line 188) | def __init__(self, config):
    method forward (line 199) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastFourClsPooler (line 215) | class BertLastFourClsPooler(nn.Module):
    method __init__ (line 216) | def __init__(self, config):
    method forward (line 227) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastFourEmbeddings (line 239) | class BertLastFourEmbeddings(nn.Module):
    method __init__ (line 240) | def __init__(self, config):
    method forward (line 251) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastFourEmbeddingsPooler (line 268) | class BertLastFourEmbeddingsPooler(nn.Module):
    method __init__ (line 269) | def __init__(self, config):
    method forward (line 280) | def forward(self, input_ids, input_masks, segment_ids):
  class BertDynCls (line 296) | class BertDynCls(nn.Module):
    method __init__ (line 297) | def __init__(self, config):
    method forward (line 311) | def forward(self, input_ids, input_masks, segment_ids):
  class BertDynEmbeddings (line 343) | class BertDynEmbeddings(nn.Module):
    method __init__ (line 344) | def __init__(self, config):
    method forward (line 358) | def forward(self, input_ids, input_masks, segment_ids):
  class BertRNN (line 392) | class BertRNN(nn.Module):
    method __init__ (line 394) | def __init__(self, config):
    method forward (line 434) | def forward(self, input_ids, input_masks, segment_ids):
  class BertCNN (line 459) | class BertCNN(nn.Module):
    method __init__ (line 461) | def __init__(self, config):
    method conv_and_pool (line 480) | def conv_and_pool(self, x, conv):
    method forward (line 485) | def forward(self, input_ids, input_masks, segment_ids):
  class BertRCNN (line 497) | class BertRCNN(nn.Module):
    method __init__ (line 498) | def __init__(self, config):
    method forward (line 540) | def forward(self, input_ids, input_masks, segment_ids):
  class XLNet (line 564) | class XLNet(nn.Module):
    method __init__ (line 566) | def __init__(self, config):
    method forward (line 574) | def forward(self, input_ids, input_masks, segment_ids):
  class ElectraClassificationHead (line 584) | class ElectraClassificationHead(nn.Module):
    method __init__ (line 587) | def __init__(self, config):
    method forward (line 593) | def forward(self, features, **kwargs):
  class Electra (line 602) | class Electra(nn.Module):
    method __init__ (line 604) | def __init__(self, config):
    method forward (line 613) | def forward(self, input_ids, input_masks, segment_ids):
  class NEZHA (line 621) | class NEZHA(nn.Module):
    method __init__ (line 622) | def __init__(self, config):
    method forward (line 637) | def forward(self, input_ids, input_masks, segment_ids):

FILE: code/nezha-base-count3/finetuning/multi_gpu_QA.py
  class Config (line 46) | class Config:
    method __init__ (line 47) | def __init__(self):

FILE: code/nezha-base-count3/finetuning/utils.py
  function paddingList (line 12) | def paddingList(ls:list,val,returnTensor=False):
  function fastTokenizer (line 19) | def fastTokenizer(a:str,b:str,maxLen,tk):
  class data_generator (line 39) | class data_generator:
    method __init__ (line 40) | def __init__(self, data, config, shuffle=False):
    method __len__ (line 53) | def __len__(self):
    method __iter__ (line 56) | def __iter__(self):
  class PGD (line 95) | class PGD():
    method __init__ (line 96) | def __init__(self, model):
    method attack (line 101) | def attack(self, epsilon=0.3, alpha=0.1, emb_name='word_embeddings', i...
    method restore (line 113) | def restore(self, emb_name='word_embeddings'):
    method project (line 121) | def project(self, param_name, param_data, epsilon):
    method backup_grad (line 127) | def backup_grad(self):
    method restore_grad (line 132) | def restore_grad(self):
  class FGM (line 139) | class FGM():
    method __init__ (line 140) | def __init__(self, model):
    method attack (line 144) | def attack(self, epsilon=0.25, emb_name='word_embeddings'):
    method restore (line 154) | def restore(self, emb_name='word_embeddings'):
  class FocalLoss (line 164) | class FocalLoss(nn.Module):
    method __init__ (line 180) | def __init__(self, num_class, alpha=None, gamma=2,
    method forward (line 201) | def forward(self, input, target):
  function f1_match (line 244) | def f1_match(y_true,y_pred):

FILE: code/nezha-base-count3/pretrain/NEZHA/configuration_nezha.py
  class NeZhaConfig (line 6) | class NeZhaConfig(PretrainedConfig):
    method __init__ (line 82) | def __init__(

FILE: code/nezha-base-count3/pretrain/NEZHA/modeling_nezha.py
  function load_tf_weights_in_bert (line 48) | def load_tf_weights_in_bert(model, config, tf_checkpoint_path):
  class BertEmbeddings (line 122) | class BertEmbeddings(nn.Module):
    method __init__ (line 125) | def __init__(self, config):
    method forward (line 134) | def forward(self, input_ids=None, token_type_ids=None, inputs_embeds=N...
  function relative_position_encoding (line 151) | def relative_position_encoding(depth, max_length=512, max_relative_posit...
  class BertSelfAttention (line 175) | class BertSelfAttention(nn.Module):
    method __init__ (line 176) | def __init__(self, config):
    method transpose_for_scores (line 200) | def transpose_for_scores(self, x):
    method forward (line 205) | def forward(
  class BertSelfOutput (line 308) | class BertSelfOutput(nn.Module):
    method __init__ (line 309) | def __init__(self, config):
    method forward (line 315) | def forward(self, hidden_states, input_tensor):
  class BertAttention (line 322) | class BertAttention(nn.Module):
    method __init__ (line 323) | def __init__(self, config):
    method prune_heads (line 329) | def prune_heads(self, heads):
    method forward (line 347) | def forward(
  class BertIntermediate (line 373) | class BertIntermediate(nn.Module):
    method __init__ (line 374) | def __init__(self, config):
    method forward (line 382) | def forward(self, hidden_states):
  class BertOutput (line 388) | class BertOutput(nn.Module):
    method __init__ (line 389) | def __init__(self, config):
    method forward (line 395) | def forward(self, hidden_states, input_tensor):
  class BertLayer (line 402) | class BertLayer(nn.Module):
    method __init__ (line 403) | def __init__(self, config):
    method forward (line 416) | def forward(
    method feed_forward_chunk (line 481) | def feed_forward_chunk(self, attention_output):
  class NeZhaEncoder (line 487) | class NeZhaEncoder(nn.Module):
    method __init__ (line 488) | def __init__(self, config):
    method forward (line 495) | def forward(
  class BertPooler (line 588) | class BertPooler(nn.Module):
    method __init__ (line 589) | def __init__(self, config):
    method forward (line 594) | def forward(self, hidden_states):
  class BertPredictionHeadTransform (line 603) | class BertPredictionHeadTransform(nn.Module):
    method __init__ (line 604) | def __init__(self, config):
    method forward (line 613) | def forward(self, hidden_states):
  class BertLMPredictionHead (line 620) | class BertLMPredictionHead(nn.Module):
    method __init__ (line 621) | def __init__(self, config):
    method forward (line 634) | def forward(self, hidden_states):
  class BertOnlyMLMHead (line 640) | class BertOnlyMLMHead(nn.Module):
    method __init__ (line 641) | def __init__(self, config):
    method forward (line 645) | def forward(self, sequence_output):
  class BertOnlyNSPHead (line 650) | class BertOnlyNSPHead(nn.Module):
    method __init__ (line 651) | def __init__(self, config):
    method forward (line 655) | def forward(self, pooled_output):
  class BertPreTrainingHeads (line 660) | class BertPreTrainingHeads(nn.Module):
    method __init__ (line 661) | def __init__(self, config):
    method forward (line 666) | def forward(self, sequence_output, pooled_output):
  class BertPreTrainedModel (line 672) | class BertPreTrainedModel(PreTrainedModel):
    method _init_weights (line 682) | def _init_weights(self, module):
  class BertForPreTrainingOutput (line 700) | class BertForPreTrainingOutput(ModelOutput):
  class NeZhaModel (line 805) | class NeZhaModel(BertPreTrainedModel):
    method __init__ (line 819) | def __init__(self, config, add_pooling_layer=True):
    method get_input_embeddings (line 830) | def get_input_embeddings(self):
    method set_input_embeddings (line 833) | def set_input_embeddings(self, value):
    method _prune_heads (line 836) | def _prune_heads(self, heads_to_prune):
    method forward (line 851) | def forward(
  class BertForPreTraining (line 982) | class BertForPreTraining(BertPreTrainedModel):
    method __init__ (line 983) | def __init__(self, config):
    method get_output_embeddings (line 991) | def get_output_embeddings(self):
    method set_output_embeddings (line 994) | def set_output_embeddings(self, new_embeddings):
    method forward (line 999) | def forward(
  class BertLMHeadModel (line 1083) | class BertLMHeadModel(BertPreTrainedModel):
    method __init__ (line 1088) | def __init__(self, config):
    method get_output_embeddings (line 1099) | def get_output_embeddings(self):
    method set_output_embeddings (line 1102) | def set_output_embeddings(self, new_embeddings):
    method forward (line 1107) | def forward(
    method prepare_inputs_for_generation (line 1209) | def prepare_inputs_for_generation(self, input_ids, past=None, attentio...
    method _reorder_cache (line 1221) | def _reorder_cache(self, past, beam_idx):
  class NeZhaForMaskedLM (line 1229) | class NeZhaForMaskedLM(BertPreTrainedModel):
    method __init__ (line 1234) | def __init__(self, config):
    method get_output_embeddings (line 1248) | def get_output_embeddings(self):
    method set_output_embeddings (line 1251) | def set_output_embeddings(self, new_embeddings):
    method forward (line 1261) | def forward(
    method prepare_inputs_for_generation (line 1318) | def prepare_inputs_for_generation(self, input_ids, attention_mask=None...
  class BertForNextSentencePrediction (line 1337) | class BertForNextSentencePrediction(BertPreTrainedModel):
    method __init__ (line 1338) | def __init__(self, config):
    method forward (line 1348) | def forward(
  class BertForSequenceClassification (line 1438) | class BertForSequenceClassification(BertPreTrainedModel):
    method __init__ (line 1439) | def __init__(self, config):
    method forward (line 1456) | def forward(
  class BertForMultipleChoice (line 1523) | class BertForMultipleChoice(BertPreTrainedModel):
    method __init__ (line 1524) | def __init__(self, config):
    method forward (line 1540) | def forward(
  class BertForTokenClassification (line 1613) | class BertForTokenClassification(BertPreTrainedModel):
    method __init__ (line 1617) | def __init__(self, config):
    method forward (line 1634) | def forward(
  class BertForQuestionAnswering (line 1704) | class BertForQuestionAnswering(BertPreTrainedModel):
    method __init__ (line 1708) | def __init__(self, config):
    method forward (line 1724) | def forward(

FILE: code/nezha-base-count3/pretrain/NLP_Utils.py
  function writeToJsonFile (line 10) | def writeToJsonFile(path: str, obj):
  function readFromJsonFile (line 13) | def readFromJsonFile(path: str):
  function loadData (line 17) | def loadData(path):
  function calNegPos (line 35) | def calNegPos(ls):#计算正负比例
  function paddingList (line 54) | def paddingList(ls:list,val,returnTensor=False):
  function truncate (line 61) | def truncate(a:list,b:list,maxLen):
  class MLM_Data (line 77) | class MLM_Data(Dataset):
    method __init__ (line 79) | def __init__(self,textLs:list,maxLen:int,tk:BertTokenizer):
    method __len__ (line 87) | def __len__(self):
    method random_mask (line 90) | def random_mask(self,text_ids):
    method __getitem__ (line 128) | def __getitem__(self, item):
    method collate (line 143) | def collate(cls,batch):
  function blockShuffle (line 163) | def blockShuffle(data:list,bs:int,sortBsNum,key):
  class blockShuffleDataLoader (line 179) | class blockShuffleDataLoader(DataLoader):
    method __init__ (line 180) | def __init__(self, dataset: Dataset,sortBsNum,key,**kwargs):
    method __iter__ (line 186) | def __iter__(self):

FILE: code/nezha-base-count3/pretrain/transformers1/__main__.py
  function main (line 2) | def main():

FILE: code/nezha-base-count3/pretrain/transformers1/activations.py
  function swish (line 11) | def swish(x):
  function _gelu_python (line 15) | def _gelu_python(x):
  function gelu_new (line 25) | def gelu_new(x):
  function gelu_fast (line 38) | def gelu_fast(x):
  function get_activation (line 52) | def get_activation(activation_string):

FILE: code/nezha-base-count3/pretrain/transformers1/benchmark/benchmark.py
  class PyTorchBenchmark (line 38) | class PyTorchBenchmark(Benchmark):
    method framework_version (line 45) | def framework_version(self):
    method train (line 48) | def train(self, model_name, batch_size, sequence_length, trace_memory=...
    method inference (line 100) | def inference(self, model_name, batch_size, sequence_length, trace_mem...

FILE: code/nezha-base-count3/pretrain/transformers1/benchmark/benchmark_args.py
  function is_tpu_available (line 37) | def is_tpu_available():
  class PyTorchBenchmarkArguments (line 45) | class PyTorchBenchmarkArguments(BenchmarkArguments):
    method _setup_devices (line 52) | def _setup_devices(self) -> Tuple["torch.device", int]:
    method device_idx (line 67) | def device_idx(self) -> int:
    method device (line 72) | def device(self) -> "torch.device":
    method n_gpu (line 77) | def n_gpu(self):

FILE: code/nezha-base-count3/pretrain/transformers1/benchmark/benchmark_args_utils.py
  function list_field (line 24) | def list_field(default=None, metadata=None):
  class BenchmarkArguments (line 29) | class BenchmarkArguments:
    method to_json_string (line 90) | def to_json_string(self):
    method model_names (line 97) | def model_names(self):

FILE: code/nezha-base-count3/pretrain/transformers1/benchmark/benchmark_utils.py
  function is_memory_tracing_enabled (line 43) | def is_memory_tracing_enabled():
  class Frame (line 48) | class Frame(NamedTuple):
  class UsedMemoryState (line 65) | class UsedMemoryState(NamedTuple):
  class Memory (line 77) | class Memory(NamedTuple):
    method __repr__ (line 85) | def __repr__(self) -> str:
  class MemoryState (line 89) | class MemoryState(NamedTuple):
  class MemorySummary (line 103) | class MemorySummary(NamedTuple):
  function start_memory_tracing (line 123) | def start_memory_tracing(
  function stop_memory_tracing (line 273) | def stop_memory_tracing(
  function bytes_to_mega_bytes (line 370) | def bytes_to_mega_bytes(memory_amount: int) -> int:
  class Benchmark (line 376) | class Benchmark(ABC):
    method __init__ (line 386) | def __init__(self, args: BenchmarkArguments = None, configs: Pretraine...
    method print_fn (line 401) | def print_fn(self):
    method is_gpu (line 421) | def is_gpu(self):
    method framework_version (line 426) | def framework_version(self):
    method train (line 430) | def train(self, model_name, batch_size, sequence_length):
    method inference (line 434) | def inference(self, model_name, batch_size, sequence_length):
    method run (line 437) | def run(self):
    method environment_info (line 512) | def environment_info(self):
    method print_results (line 572) | def print_results(self, result_dict):
    method print_memory_trace_statistics (line 585) | def print_memory_trace_statistics(self, summary: MemorySummary):
    method save_to_csv (line 609) | def save_to_csv(self, result_dict, filename):

FILE: code/nezha-base-count3/pretrain/transformers1/benchmark_utils.py
  function is_memory_tracing_enabled (line 29) | def is_memory_tracing_enabled():
  class Frame (line 34) | class Frame(NamedTuple):
  class UsedMemoryState (line 51) | class UsedMemoryState(NamedTuple):
  class Memory (line 63) | class Memory(NamedTuple):
    method __repr__ (line 71) | def __repr__(self) -> str:
  class MemoryState (line 75) | class MemoryState(NamedTuple):
  class MemorySummary (line 89) | class MemorySummary(NamedTuple):
  function start_memory_tracing (line 108) | def start_memory_tracing(
  function stop_memory_tracing (line 256) | def stop_memory_tracing(
  function bytes_to_human_readable (line 334) | def bytes_to_human_readable(memory_amount: int) -> str:

FILE: code/nezha-base-count3/pretrain/transformers1/commands/__init__.py
  class BaseTransformersCLICommand (line 5) | class BaseTransformersCLICommand(ABC):
    method register_subcommand (line 8) | def register_subcommand(parser: ArgumentParser):
    method run (line 12) | def run(self):

FILE: code/nezha-base-count3/pretrain/transformers1/commands/convert.py
  function convert_command_factory (line 7) | def convert_command_factory(args: Namespace):
  class ConvertCommand (line 17) | class ConvertCommand(BaseTransformersCLICommand):
    method register_subcommand (line 19) | def register_subcommand(parser: ArgumentParser):
    method __init__ (line 46) | def __init__(
    method run (line 64) | def run(self):

FILE: code/nezha-base-count3/pretrain/transformers1/commands/download.py
  function download_command_factory (line 6) | def download_command_factory(args):
  class DownloadCommand (line 10) | class DownloadCommand(BaseTransformersCLICommand):
    method register_subcommand (line 12) | def register_subcommand(parser: ArgumentParser):
    method __init__ (line 23) | def __init__(self, model: str, cache: str, force: bool):
    method run (line 28) | def run(self):

FILE: code/nezha-base-count3/pretrain/transformers1/commands/env.py
  function info_command_factory (line 9) | def info_command_factory(_):
  class EnvironmentCommand (line 13) | class EnvironmentCommand(BaseTransformersCLICommand):
    method register_subcommand (line 15) | def register_subcommand(parser: ArgumentParser):
    method run (line 19) | def run(self):
    method format_dict (line 57) | def format_dict(d):

FILE: code/nezha-base-count3/pretrain/transformers1/commands/run.py
  function try_infer_format_from_ext (line 11) | def try_infer_format_from_ext(path: str):
  function run_command_factory (line 25) | def run_command_factory(args):
  class RunCommand (line 44) | class RunCommand(BaseTransformersCLICommand):
    method __init__ (line 45) | def __init__(self, nlp: Pipeline, reader: PipelineDataFormat):
    method register_subcommand (line 50) | def register_subcommand(parser: ArgumentParser):
    method run (line 81) | def run(self):

FILE: code/nezha-base-count3/pretrain/transformers1/commands/serving.py
  function Body (line 21) | def Body(*x, **y):
  function serve_command_factory (line 30) | def serve_command_factory(args: Namespace):
  class ServeModelInfoResult (line 45) | class ServeModelInfoResult(BaseModel):
  class ServeTokenizeResult (line 53) | class ServeTokenizeResult(BaseModel):
  class ServeDeTokenizeResult (line 62) | class ServeDeTokenizeResult(BaseModel):
  class ServeForwardResult (line 70) | class ServeForwardResult(BaseModel):
  class ServeCommand (line 78) | class ServeCommand(BaseTransformersCLICommand):
    method register_subcommand (line 80) | def register_subcommand(parser: ArgumentParser):
    method __init__ (line 106) | def __init__(self, pipeline: Pipeline, host: str, port: int, workers: ...
    method run (line 156) | def run(self):
    method model_info (line 159) | def model_info(self):
    method tokenize (line 162) | def tokenize(self, text_input: str = Body(None, embed=True), return_id...
    method detokenize (line 180) | def detokenize(
    method forward (line 198) | async def forward(self, inputs=Body(None, embed=True)):

FILE: code/nezha-base-count3/pretrain/transformers1/commands/train.py
  function train_command_factory (line 18) | def train_command_factory(args: Namespace):
  class TrainCommand (line 26) | class TrainCommand(BaseTransformersCLICommand):
    method register_subcommand (line 28) | def register_subcommand(parser: ArgumentParser):
    method __init__ (line 78) | def __init__(self, args: Namespace):
    method run (line 124) | def run(self):
    method run_torch (line 129) | def run_torch(self):
    method run_tf (line 132) | def run_tf(self):

FILE: code/nezha-base-count3/pretrain/transformers1/commands/transformers_cli.py
  function main (line 12) | def main():

FILE: code/nezha-base-count3/pretrain/transformers1/commands/user.py
  class UserCommands (line 16) | class UserCommands(BaseTransformersCLICommand):
    method register_subcommand (line 18) | def register_subcommand(parser: ArgumentParser):
  class ANSI (line 47) | class ANSI:
    method bold (line 57) | def bold(cls, s):
    method red (line 61) | def red(cls, s):
  class BaseUserCommand (line 65) | class BaseUserCommand:
    method __init__ (line 66) | def __init__(self, args):
  class LoginCommand (line 71) | class LoginCommand(BaseUserCommand):
    method run (line 72) | def run(self):
  class WhoamiCommand (line 98) | class WhoamiCommand(BaseUserCommand):
    method run (line 99) | def run(self):
  class LogoutCommand (line 115) | class LogoutCommand(BaseUserCommand):
    method run (line 116) | def run(self):
  class ListObjsCommand (line 126) | class ListObjsCommand(BaseUserCommand):
    method tabulate (line 127) | def tabulate(self, rows: List[List[Union[str, int]]], headers: List[st...
    method run (line 142) | def run(self):
  class DeleteObjCommand (line 160) | class DeleteObjCommand(BaseUserCommand):
    method run (line 161) | def run(self):
  class UploadCommand (line 175) | class UploadCommand(BaseUserCommand):
    method walk_dir (line 176) | def walk_dir(self, rel_path):
    method run (line 187) | def run(self):

FILE: code/nezha-base-count3/pretrain/transformers1/configuration_albert.py
  class AlbertConfig (line 33) | class AlbertConfig(PretrainedConfig):
    method __init__ (line 104) | def __init__(

FILE: code/nezha-base-count3/pretrain/transformers1/configuration_auto.py
  class AutoConfig (line 98) | class AutoConfig:
    method __init__ (line 109) | def __init__(self):
    method for_model (line 116) | def for_model(cls, model_type: str, *args, **kwargs):
    method from_pretrained (line 127) | def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):

FILE: code/nezha-base-count3/pretrain/transformers1/configuration_bart.py
  class BartConfig (line 34) | class BartConfig(PretrainedConfig):
    method __init__ (line 40) | def __init__(
    method num_attention_heads (line 121) | def num_attention_heads(self) -> int:
    method hidden_size (line 125) | def hidden_size(self) -> int:
    method is_valid_mbart (line 128) | def is_valid_mbart(self) -> bool:

FILE: code/nezha-base-count3/pretrain/transformers1/configuration_bert.py
  class BertConfig (line 53) | class BertConfig(PretrainedConfig):
    method __init__ (line 109) | def __init__(

FILE: code/nezha-base-count3/pretrain/transformers1/configuration_camembert.py
  class CamembertConfig (line 33) | class CamembertConfig(RobertaConfig):

FILE: code/nezha-base-count3/pretrain/transformers1/configuration_ctrl.py
  class CTRLConfig (line 28) | class CTRLConfig(PretrainedConfig):
    method __init__ (line 83) | def __init__(
    method max_position_embeddings (line 125) | def max_position_embeddings(self):
    method hidden_size (line 129) | def hidden_size(self):
    method num_attention_heads (line 133) | def num_attention_heads(self):
    method num_hidden_layers (line 137) | def num_hidden_layers(self):

FILE: code/nezha-base-count3/pretrain/transformers1/configuration_distilbert.py
  class DistilBertConfig (line 36) | class DistilBertConfig(PretrainedConfig):
    method __init__ (line 96) | def __init__(
    method hidden_size (line 130) | def hidden_size(self):
    method num_attention_heads (line 134) | def num_attention_heads(self):
    method num_hidden_layers (line 138) | def num_hidden_layers(self):

FILE: code/nezha-base-count3/pretrain/transformers1/configuration_electra.py
  class ElectraConfig (line 36) | class ElectraConfig(PretrainedConfig):
    method __init__ (line 95) | def __init__(

FILE: code/nezha-base-count3/pretrain/transformers1/configuration_encoder_decoder.py
  class EncoderDecoderConfig (line 26) | class EncoderDecoderConfig(PretrainedConfig):
    method __init__ (line 62) | def __init__(self, **kwargs):
    method from_encoder_decoder_configs (line 79) | def from_encoder_decoder_configs(
    method to_dict (line 90) | def to_dict(self):

FILE: code/nezha-base-count3/pretrain/transformers1/configuration_flaubert.py
  class FlaubertConfig (line 33) | class FlaubertConfig(XLMConfig):
    method __init__ (line 147) | def __init__(self, layerdrop=0.0, pre_norm=False, pad_token_id=2, bos_...

FILE: code/nezha-base-count3/pretrain/transformers1/configuration_gpt2.py
  class GPT2Config (line 35) | class GPT2Config(PretrainedConfig):
    method __init__ (line 117) | def __init__(
    method max_position_embeddings (line 164) | def max_position_embeddings(self):
    method hidden_size (line 168) | def hidden_size(self):
    method num_attention_heads (line 172) | def num_attention_heads(self):
    method num_hidden_layers (line 176) | def num_hidden_layers(self):

FILE: code/nezha-base-count3/pretrain/transformers1/configuration_longformer.py
  class LongformerConfig (line 34) | class LongformerConfig(RobertaConfig):
    method __init__ (line 65) | def __init__(self, attention_window: Union[List[int], int] = 512, sep_...

FILE: code/nezha-base-count3/pretrain/transformers1/configuration_marian.py
  class MarianConfig (line 25) | class MarianConfig(BartConfig):

FILE: code/nezha-base-count3/pretrain/transformers1/configuration_mmbt.py
  class MMBTConfig (line 25) | class MMBTConfig(object):
    method __init__ (line 38) | def __init__(self, config, num_labels=None, modal_hidden_size=2048):

FILE: code/nezha-base-count3/pretrain/transformers1/configuration_openai.py
  class OpenAIGPTConfig (line 31) | class OpenAIGPTConfig(PretrainedConfig):
    method __init__ (line 115) | def __init__(
    method max_position_embeddings (line 159) | def max_position_embeddings(self):
    method hidden_size (line 163) | def hidden_size(self):
    method num_attention_heads (line 167) | def num_attention_heads(self):
    method num_hidden_layers (line 171) | def num_hidden_layers(self):

FILE: code/nezha-base-count3/pretrain/transformers1/configuration_reformer.py
  class ReformerConfig (line 32) | class ReformerConfig(PretrainedConfig):
    method __init__ (line 141) | def __init__(

FILE: code/nezha-base-count3/pretrain/transformers1/configuration_roberta.py
  class RobertaConfig (line 36) | class RobertaConfig(BertConfig):
    method __init__ (line 65) | def __init__(self, pad_token_id=1, bos_token_id=0, eos_token_id=2, **k...

FILE: code/nezha-base-count3/pretrain/transformers1/configuration_t5.py
  class T5Config (line 34) | class T5Config(PretrainedConfig):
    method __init__ (line 64) | def __init__(
    method max_position_embeddings (line 98) | def max_position_embeddings(self):
    method hidden_size (line 102) | def hidden_size(self):
    method num_attention_heads (line 106) | def num_attention_heads(self):
    method num_hidden_layers (line 110) | def num_hidden_layers(self):

FILE: code/nezha-base-count3/pretrain/transformers1/configuration_transfo_xl.py
  class TransfoXLConfig (line 31) | class TransfoXLConfig(PretrainedConfig):
    method __init__ (line 117) | def __init__(
    method max_position_embeddings (line 186) | def max_position_embeddings(self):
    method n_token (line 190) | def n_token(self):  # Backward compatibility
    method n_token (line 194) | def n_token(self, value):  # Backward compatibility
    method hidden_size (line 198) | def hidden_size(self):
    method num_attention_heads (line 202) | def num_attention_heads(self):
    method num_hidden_layers (line 206) | def num_hidden_layers(self):

FILE: code/nezha-base-count3/pretrain/transformers1/configuration_utils.py
  class PretrainedConfig (line 31) | class PretrainedConfig(object):
    method __init__ (line 56) | def __init__(self, **kwargs):
    method num_labels (line 118) | def num_labels(self):
    method num_labels (line 122) | def num_labels(self, num_labels):
    method save_pretrained (line 126) | def save_pretrained(self, save_directory):
    method from_pretrained (line 146) | def from_pretrained(cls, pretrained_model_name_or_path, **kwargs) -> "...
    method get_config_dict (line 205) | def get_config_dict(cls, pretrained_model_name_or_path: str, **kwargs)...
    method from_dict (line 270) | def from_dict(cls, config_dict: Dict, **kwargs) -> "PretrainedConfig":
    method from_json_file (line 308) | def from_json_file(cls, json_file: str) -> "PretrainedConfig":
    method _dict_from_json_file (line 324) | def _dict_from_json_file(cls, json_file: str):
    method __eq__ (line 329) | def __eq__(self, other):
    method __repr__ (line 332) | def __repr__(self):
    method to_diff_dict (line 335) | def to_diff_dict(self):
    method to_dict (line 358) | def to_dict(self):
    method to_json_string (line 370) | def to_json_string(self, use_diff=True):
    method to_json_file (line 387) | def to_json_file(self, json_file_path, use_diff=True):
    method update (line 400) | def update(self, config_dict: Dict):

FILE: code/nezha-base-count3/pretrain/transformers1/configuration_xlm.py
  class XLMConfig (line 39) | class XLMConfig(PretrainedConfig):
    method __init__ (line 159) | def __init__(
    method n_words (line 235) | def n_words(self):  # For backward compatibility
    method n_words (line 239) | def n_words(self, value):  # For backward compatibility
    method hidden_size (line 243) | def hidden_size(self):
    method num_attention_heads (line 247) | def num_attention_heads(self):
    method num_hidden_layers (line 251) | def num_hidden_layers(self):

FILE: code/nezha-base-count3/pretrain/transformers1/configuration_xlm_roberta.py
  class XLMRobertaConfig (line 36) | class XLMRobertaConfig(RobertaConfig):

FILE: code/nezha-base-count3/pretrain/transformers1/configuration_xlnet.py
  class XLNetConfig (line 32) | class XLNetConfig(PretrainedConfig):
    method __init__ (line 129) | def __init__(
    method max_position_embeddings (line 194) | def max_position_embeddings(self):
    method n_token (line 198) | def n_token(self):  # Backward compatibility
    method n_token (line 202) | def n_token(self, value):  # Backward compatibility
    method hidden_size (line 206) | def hidden_size(self):
    method num_attention_heads (line 210) | def num_attention_heads(self):
    method num_hidden_layers (line 214) | def num_hidden_layers(self):

FILE: code/nezha-base-count3/pretrain/transformers1/convert_albert_original_tf_checkpoint_to_pytorch.py
  function convert_tf_checkpoint_to_pytorch (line 29) | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, albert_config_f...

FILE: code/nezha-base-count3/pretrain/transformers1/convert_bart_original_pytorch_checkpoint_to_pytorch.py
  function remove_ignore_keys_ (line 56) | def remove_ignore_keys_(state_dict):
  function rename_key (line 68) | def rename_key(dct, old, new):
  function load_xsum_checkpoint (line 73) | def load_xsum_checkpoint(checkpoint_path):
  function convert_checkpoint_from_disk (line 81) | def convert_checkpoint_from_disk(checkpoint_path, **config_kwargs):
  function convert_bart_checkpoint (line 95) | def convert_bart_checkpoint(checkpoint_path, pytorch_dump_folder_path, h...

FILE: code/nezha-base-count3/pretrain/transformers1/convert_bert_original_tf_checkpoint_to_pytorch.py
  function convert_tf_checkpoint_to_pytorch (line 29) | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_fil...

FILE: code/nezha-base-count3/pretrain/transformers1/convert_bert_pytorch_checkpoint_to_original_tf.py
  function convert_pytorch_checkpoint_to_tf (line 28) | def convert_pytorch_checkpoint_to_tf(model: BertModel, ckpt_dir: str, mo...
  function main (line 92) | def main(raw_args=None):

FILE: code/nezha-base-count3/pretrain/transformers1/convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py
  function convert_dialogpt_checkpoint (line 15) | def convert_dialogpt_checkpoint(checkpoint_path: str, pytorch_dump_folde...

FILE: code/nezha-base-count3/pretrain/transformers1/convert_electra_original_tf_checkpoint_to_pytorch.py
  function convert_tf_checkpoint_to_pytorch (line 29) | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, py...

FILE: code/nezha-base-count3/pretrain/transformers1/convert_gpt2_original_tf_checkpoint_to_pytorch.py
  function convert_gpt2_checkpoint_to_pytorch (line 29) | def convert_gpt2_checkpoint_to_pytorch(gpt2_checkpoint_path, gpt2_config...

FILE: code/nezha-base-count3/pretrain/transformers1/convert_graph_to_onnx.py
  class OnnxConverterArgumentParser (line 11) | class OnnxConverterArgumentParser(ArgumentParser):
    method __init__ (line 16) | def __init__(self):
  function ensure_valid_input (line 28) | def ensure_valid_input(model, tokens, input_names):
  function infer_shapes (line 53) | def infer_shapes(nlp: Pipeline, framework: str) -> Tuple[List[str], List...
  function load_graph_from_args (line 100) | def load_graph_from_args(framework: str, model: str, tokenizer: Optional...
  function convert_pytorch (line 111) | def convert_pytorch(nlp: Pipeline, opset: int, output: str, use_external...
  function convert_tensorflow (line 138) | def convert_tensorflow(nlp: Pipeline, opset: int, output: str):
  function convert (line 166) | def convert(
  function verify (line 193) | def verify(path: str):

FILE: code/nezha-base-count3/pretrain/transformers1/convert_longformer_original_pytorch_lightning_to_pytorch.py
  class LightningModel (line 26) | class LightningModel(pl.LightningModule):
    method __init__ (line 27) | def __init__(self, model):
    method forward (line 34) | def forward(self):
  function convert_longformer_qa_checkpoint_to_pytorch (line 38) | def convert_longformer_qa_checkpoint_to_pytorch(

FILE: code/nezha-base-count3/pretrain/transformers1/convert_marian_to_pytorch.py
  function remove_prefix (line 18) | def remove_prefix(text: str, prefix: str):
  function convert_encoder_layer (line 24) | def convert_encoder_layer(opus_dict, layer_prefix: str, converter: dict):
  function load_layers_ (line 35) | def load_layers_(layer_lst: torch.nn.ModuleList, opus_state: dict, conve...
  function find_pretrained_model (line 42) | def find_pretrained_model(src_lang: str, tgt_lang: str) -> List[str]:
  function add_emb_entries (line 55) | def add_emb_entries(wemb, final_bias, n_special_tokens=1):
  function _cast_yaml_str (line 64) | def _cast_yaml_str(v):
  function cast_marian_config (line 76) | def cast_marian_config(raw_cfg: Dict[str, str]) -> Dict:
  function load_config_from_state_dict (line 83) | def load_config_from_state_dict(opus_dict):
  function find_model_file (line 91) | def find_model_file(dest_dir):  # this one better
  function convert_opus_name_to_hf_name (line 136) | def convert_opus_name_to_hf_name(x):
  function convert_hf_name_to_opus_name (line 142) | def convert_hf_name_to_opus_name(hf_model_name):
  function write_model_card (line 152) | def write_model_card(
  function get_clean_model_id_mapping (line 185) | def get_clean_model_id_mapping(multiling_model_ids):
  function make_registry (line 189) | def make_registry(repo_path="Opus-MT-train/models"):
  function convert_all_sentencepiece_models (line 206) | def convert_all_sentencepiece_models(model_list=None, repo_path=None):
  function lmap (line 222) | def lmap(f, x) -> List:
  function fetch_test_set (line 226) | def fetch_test_set(test_set_url):
  function convert_whole_dir (line 239) | def convert_whole_dir(path=Path("marian_ckpt/")):
  function _parse_readme (line 247) | def _parse_readme(lns):
  function save_tokenizer_config (line 270) | def save_tokenizer_config(dest_dir: Path):
  function add_to_vocab_ (line 276) | def add_to_vocab_(vocab: Dict[str, int], special_tokens: List[str]):
  function find_vocab_file (line 287) | def find_vocab_file(model_dir):
  function add_special_tokens_to_vocab (line 291) | def add_special_tokens_to_vocab(model_dir: Path) -> None:
  function save_tokenizer (line 300) | def save_tokenizer(self, save_directory):
  function check_equal (line 309) | def check_equal(marian_cfg, k1, k2):
  function check_marian_cfg_assumptions (line 314) | def check_marian_cfg_assumptions(marian_cfg):
  class OpusState (line 371) | class OpusState:
    method __init__ (line 372) | def __init__(self, source_dir):
    method _check_layer_entries (line 420) | def _check_layer_entries(self):
    method extra_keys (line 432) | def extra_keys(self):
    method sub_keys (line 445) | def sub_keys(self, layer_prefix):
    method load_marian_model (line 448) | def load_marian_model(self) -> MarianMTModel:
  function download_and_unzip (line 483) | def download_and_unzip(url, dest_dir):
  function convert (line 494) | def convert(source_dir: Path, dest_dir):
  function load_yaml (line 525) | def load_yaml(path):
  function save_json (line 532) | def save_json(content: Union[Dict, List], path: str) -> None:
  function unzip (line 537) | def unzip(zip_path: str, dest_dir: str) -> None:

FILE: code/nezha-base-count3/pretrain/transformers1/convert_openai_original_tf_checkpoint_to_pytorch.py
  function convert_openai_checkpoint_to_pytorch (line 29) | def convert_openai_checkpoint_to_pytorch(openai_checkpoint_folder_path, ...

FILE: code/nezha-base-count3/pretrain/transformers1/convert_pytorch_checkpoint_to_tf2.py
  function convert_pt_checkpoint_to_tf (line 187) | def convert_pt_checkpoint_to_tf(
  function convert_all_pt_checkpoints_to_tf (line 233) | def convert_all_pt_checkpoints_to_tf(

FILE: code/nezha-base-count3/pretrain/transformers1/convert_reformer_trax_checkpoint_to_pytorch.py
  function set_param (line 31) | def set_param(torch_layer, weight, bias=None):
  function set_layer_weights_in_torch_lsh (line 40) | def set_layer_weights_in_torch_lsh(weights, torch_layer, hidden_size):
  function set_layer_weights_in_torch_local (line 58) | def set_layer_weights_in_torch_local(weights, torch_layer, hidden_size):
  function set_block_weights_in_torch (line 79) | def set_block_weights_in_torch(weights, torch_block, hidden_size):
  function set_model_weights_in_torch (line 128) | def set_model_weights_in_torch(weights, torch_model, hidden_size):
  function convert_trax_checkpoint_to_pytorch (line 174) | def convert_trax_checkpoint_to_pytorch(trax_model_pkl_path, config_file,...

FILE: code/nezha-base-count3/pretrain/transformers1/convert_roberta_original_pytorch_checkpoint_to_pytorch.py
  function convert_roberta_checkpoint_to_pytorch (line 42) | def convert_roberta_checkpoint_to_pytorch(

FILE: code/nezha-base-count3/pretrain/transformers1/convert_t5_original_tf_checkpoint_to_pytorch.py
  function convert_tf_checkpoint_to_pytorch (line 29) | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, py...

FILE: code/nezha-base-count3/pretrain/transformers1/convert_transfo_xl_original_tf_checkpoint_to_pytorch.py
  function convert_transfo_xl_checkpoint_to_pytorch (line 47) | def convert_transfo_xl_checkpoint_to_pytorch(

FILE: code/nezha-base-count3/pretrain/transformers1/convert_xlm_original_pytorch_checkpoint_to_pytorch.py
  function convert_xlm_checkpoint_to_pytorch (line 32) | def convert_xlm_checkpoint_to_pytorch(xlm_checkpoint_path, pytorch_dump_...

FILE: code/nezha-base-count3/pretrain/transformers1/convert_xlnet_original_tf_checkpoint_to_pytorch.py
  function convert_xlnet_checkpoint_to_pytorch (line 51) | def convert_xlnet_checkpoint_to_pytorch(

FILE: code/nezha-base-count3/pretrain/transformers1/data/data_collator.py
  class DataCollator (line 12) | class DataCollator(ABC):
    method collate_batch (line 19) | def collate_batch(self) -> Dict[str, torch.Tensor]:
  class DefaultDataCollator (line 33) | class DefaultDataCollator(DataCollator):
    method collate_batch (line 46) | def collate_batch(self, features: List[InputDataClass]) -> Dict[str, t...
  class DataCollatorForLanguageModeling (line 80) | class DataCollatorForLanguageModeling(DataCollator):
    method collate_batch (line 91) | def collate_batch(self, examples: List[torch.Tensor]) -> Dict[str, tor...
    method _tensorize_batch (line 99) | def _tensorize_batch(self, examples: List[torch.Tensor]) -> torch.Tensor:
    method mask_tokens (line 112) | def mask_tokens(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, tor...
    method mask_tokens2 (line 148) | def mask_tokens2(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, to...
    method mask_tokens3 (line 192) | def mask_tokens3(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, to...
    method mask_tokens4 (line 259) | def mask_tokens4(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, to...
    method mask_tokens5 (line 342) | def mask_tokens5(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, to...
    method mask_tokens6 (line 427) | def mask_tokens6(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, to...
    method mask_tokens7 (line 507) | def mask_tokens7(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, to...

FILE: code/nezha-base-count3/pretrain/transformers1/data/datasets/glue.py
  class GlueDataTrainingArguments (line 23) | class GlueDataTrainingArguments:
    method __post_init__ (line 47) | def __post_init__(self):
  class Split (line 51) | class Split(Enum):
  class GlueDataset (line 57) | class GlueDataset(Dataset):
    method __init__ (line 67) | def __init__(
    method __len__ (line 135) | def __len__(self):
    method __getitem__ (line 138) | def __getitem__(self, i) -> InputFeatures:
    method get_labels (line 141) | def get_labels(self):

FILE: code/nezha-base-count3/pretrain/transformers1/data/datasets/language_modeling.py
  class TextDataset (line 16) | class TextDataset(Dataset):
    method __init__ (line 22) | def __init__(
    method __len__ (line 71) | def __len__(self):
    method __getitem__ (line 74) | def __getitem__(self, i) -> torch.Tensor:
  class LineByLineTextDataset (line 78) | class LineByLineTextDataset(Dataset):
    method __init__ (line 84) | def __init__(self, tokenizer: PreTrainedTokenizer, file_path: str, blo...
    method __len__ (line 97) | def __len__(self):
    method __getitem__ (line 100) | def __getitem__(self, i) -> torch.Tensor:

FILE: code/nezha-base-count3/pretrain/transformers1/data/metrics/__init__.py
  function is_sklearn_available (line 26) | def is_sklearn_available():
  function simple_accuracy (line 32) | def simple_accuracy(preds, labels):
  function acc_and_f1 (line 35) | def acc_and_f1(preds, labels):
  function pearson_and_spearman (line 44) | def pearson_and_spearman(preds, labels):
  function glue_compute_metrics (line 53) | def glue_compute_metrics(task_name, preds, labels):
  function xnli_compute_metrics (line 80) | def xnli_compute_metrics(task_name, preds, labels):

FILE: code/nezha-base-count3/pretrain/transformers1/data/metrics/squad_metrics.py
  function normalize_answer (line 24) | def normalize_answer(s):
  function get_tokens (line 44) | def get_tokens(s):
  function compute_exact (line 50) | def compute_exact(a_gold, a_pred):
  function compute_f1 (line 54) | def compute_f1(a_gold, a_pred):
  function get_raw_scores (line 70) | def get_raw_scores(examples, preds):
  function apply_no_ans_threshold (line 96) | def apply_no_ans_threshold(scores, na_probs, qid_to_has_ans, na_prob_thr...
  function make_eval_dict (line 107) | def make_eval_dict(exact_scores, f1_scores, qid_list=None):
  function merge_eval (line 128) | def merge_eval(main_eval, new_eval, prefix):
  function find_best_thresh_v2 (line 133) | def find_best_thresh_v2(preds, scores, na_probs, qid_to_has_ans):
  function find_all_best_thresh_v2 (line 167) | def find_all_best_thresh_v2(main_eval, preds, exact_raw, f1_raw, na_prob...
  function find_best_thresh (line 178) | def find_best_thresh(preds, scores, na_probs, qid_to_has_ans):
  function find_all_best_thresh (line 201) | def find_all_best_thresh(main_eval, preds, exact_raw, f1_raw, na_probs, ...
  function squad_evaluate (line 211) | def squad_evaluate(examples, preds, no_answer_probs=None, no_answer_prob...
  function get_final_text (line 242) | def get_final_text(pred_text, orig_text, do_lower_case, verbose_logging=...
  function _get_best_indexes (line 336) | def _get_best_indexes(logits, n_best_size):
  function _compute_softmax (line 348) | def _compute_softmax(scores):
  function compute_predictions_logits (line 371) | def compute_predictions_logits(
  function compute_predictions_log_probs (line 576) | def compute_predictions_log_probs(

FILE: code/nezha-base-count3/pretrain/transformers1/data/processors/glue.py
  function glue_convert_examples_to_features (line 34) | def glue_convert_examples_to_features(
  function _tf_glue_convert_examples_to_features (line 70) | def _tf_glue_convert_examples_to_features(
  function _glue_convert_examples_to_features (line 107) | def _glue_convert_examples_to_features(
  class OutputMode (line 159) | class OutputMode(Enum):
  class MrpcProcessor (line 164) | class MrpcProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 167) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 176) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 181) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 185) | def get_test_examples(self, data_dir):
    method get_labels (line 189) | def get_labels(self):
    method _create_examples (line 193) | def _create_examples(self, lines, set_type):
  class MnliProcessor (line 207) | class MnliProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 210) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 219) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 223) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 227) | def get_test_examples(self, data_dir):
    method get_labels (line 231) | def get_labels(self):
    method _create_examples (line 235) | def _create_examples(self, lines, set_type):
  class MnliMismatchedProcessor (line 249) | class MnliMismatchedProcessor(MnliProcessor):
    method get_dev_examples (line 252) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 256) | def get_test_examples(self, data_dir):
  class ColaProcessor (line 261) | class ColaProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 264) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 273) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 277) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 281) | def get_test_examples(self, data_dir):
    method get_labels (line 285) | def get_labels(self):
    method _create_examples (line 289) | def _create_examples(self, lines, set_type):
  class Sst2Processor (line 304) | class Sst2Processor(DataProcessor):
    method get_example_from_tensor_dict (line 307) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 316) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 320) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 324) | def get_test_examples(self, data_dir):
    method get_labels (line 328) | def get_labels(self):
    method _create_examples (line 332) | def _create_examples(self, lines, set_type):
  class StsbProcessor (line 346) | class StsbProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 349) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 358) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 362) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 366) | def get_test_examples(self, data_dir):
    method get_labels (line 370) | def get_labels(self):
    method _create_examples (line 374) | def _create_examples(self, lines, set_type):
  class QqpProcessor (line 388) | class QqpProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 391) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 400) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 404) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 408) | def get_test_examples(self, data_dir):
    method get_labels (line 412) | def get_labels(self):
    method _create_examples (line 416) | def _create_examples(self, lines, set_type):
  class QnliProcessor (line 436) | class QnliProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 439) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 448) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 452) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 456) | def get_test_examples(self, data_dir):
    method get_labels (line 460) | def get_labels(self):
    method _create_examples (line 464) | def _create_examples(self, lines, set_type):
  class RteProcessor (line 478) | class RteProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 481) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 490) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 494) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 498) | def get_test_examples(self, data_dir):
    method get_labels (line 502) | def get_labels(self):
    method _create_examples (line 506) | def _create_examples(self, lines, set_type):
  class WnliProcessor (line 520) | class WnliProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 523) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 532) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 536) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 540) | def get_test_examples(self, data_dir):
    method get_labels (line 544) | def get_labels(self):
    method _create_examples (line 548) | def _create_examples(self, lines, set_type):

FILE: code/nezha-base-count3/pretrain/transformers1/data/processors/squad.py
  function _improve_answer_span (line 25) | def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer, ...
  function _check_is_max_context (line 38) | def _check_is_max_context(doc_spans, cur_span_index, position):
  function _new_check_is_max_context (line 58) | def _new_check_is_max_context(doc_spans, cur_span_index, position):
  function _is_whitespace (line 80) | def _is_whitespace(c):
  function squad_convert_example_to_features (line 86) | def squad_convert_example_to_features(example, max_seq_length, doc_strid...
  function squad_convert_example_to_features_init (line 264) | def squad_convert_example_to_features_init(tokenizer_for_convert):
  function squad_convert_examples_to_features (line 269) | def squad_convert_examples_to_features(
  class SquadProcessor (line 445) | class SquadProcessor(DataProcessor):
    method _get_example_from_tensor_dict (line 454) | def _get_example_from_tensor_dict(self, tensor_dict, evaluate=False):
    method get_examples_from_dataset (line 478) | def get_examples_from_dataset(self, dataset, evaluate=False):
    method get_train_examples (line 509) | def get_train_examples(self, data_dir, filename=None):
    method get_dev_examples (line 531) | def get_dev_examples(self, data_dir, filename=None):
    method _create_examples (line 552) | def _create_examples(self, input_data, set_type):
  class SquadV1Processor (line 594) | class SquadV1Processor(SquadProcessor):
  class SquadV2Processor (line 599) | class SquadV2Processor(SquadProcessor):
  class SquadExample (line 604) | class SquadExample(object):
    method __init__ (line 619) | def __init__(
  class SquadFeatures (line 667) | class SquadFeatures(object):
    method __init__ (line 692) | def __init__(
  class SquadResult (line 729) | class SquadResult(object):
    method __init__ (line 739) | def __init__(self, unique_id, start_logits, end_logits, start_top_inde...

FILE: code/nezha-base-count3/pretrain/transformers1/data/processors/utils.py
  class InputExample (line 31) | class InputExample:
    method to_json_string (line 50) | def to_json_string(self):
  class InputFeatures (line 56) | class InputFeatures:
    method to_json_string (line 77) | def to_json_string(self):
  class DataProcessor (line 82) | class DataProcessor:
    method get_example_from_tensor_dict (line 85) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 93) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 97) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 101) | def get_test_examples(self, data_dir):
    method get_labels (line 105) | def get_labels(self):
    method tfds_map (line 109) | def tfds_map(self, example):
    method _read_tsv (line 117) | def _read_tsv(cls, input_file, quotechar=None):
  class SingleSentenceClassificationProcessor (line 123) | class SingleSentenceClassificationProcessor(DataProcessor):
    method __init__ (line 126) | def __init__(self, labels=None, examples=None, mode="classification", ...
    method __len__ (line 132) | def __len__(self):
    method __getitem__ (line 135) | def __getitem__(self, idx):
    method create_from_csv (line 141) | def create_from_csv(
    method create_from_examples (line 158) | def create_from_examples(cls, texts_or_text_and_labels, labels=None, *...
    method add_examples_from_csv (line 163) | def add_examples_from_csv(
    method add_examples (line 193) | def add_examples(
    method get_features (line 226) | def get_features(

FILE: code/nezha-base-count3/pretrain/transformers1/data/processors/xnli.py
  class XnliProcessor (line 28) | class XnliProcessor(DataProcessor):
    method __init__ (line 32) | def __init__(self, language, train_language=None):
    method get_train_examples (line 36) | def get_train_examples(self, data_dir):
    method get_test_examples (line 52) | def get_test_examples(self, data_dir):
    method get_labels (line 70) | def get_labels(self):

FILE: code/nezha-base-count3/pretrain/transformers1/file_utils.py
  function is_torch_available (line 93) | def is_torch_available():
  function is_tf_available (line 97) | def is_tf_available():
  function add_start_docstrings (line 101) | def add_start_docstrings(*docstr):
  function add_start_docstrings_to_callable (line 109) | def add_start_docstrings_to_callable(*docstr):
  function add_end_docstrings (line 127) | def add_end_docstrings(*docstr):
  function is_remote_url (line 135) | def is_remote_url(url_or_filename):
  function hf_bucket_url (line 140) | def hf_bucket_url(model_id: str, filename: str, use_cdn=True) -> str:
  function url_to_filename (line 164) | def url_to_filename(url, etag=None):
  function filename_to_url (line 188) | def filename_to_url(filename, cache_dir=None):
  function cached_path (line 214) | def cached_path(
  function http_get (line 306) | def http_get(url, temp_file, proxies=None, resume_size=0, user_agent=None):
  function get_from_cache (line 339) | def get_from_cache(
  class cached_property (line 453) | class cached_property(property):
    method __get__ (line 462) | def __get__(self, obj, objtype=None):
  function torch_required (line 476) | def torch_required(func):
  function tf_required (line 488) | def tf_required(func):

FILE: code/nezha-base-count3/pretrain/transformers1/hf_api.py
  class S3Obj (line 29) | class S3Obj:
    method __init__ (line 34) | def __init__(self, filename: str, LastModified: str, ETag: str, Size: ...
  class PresignedUrl (line 41) | class PresignedUrl:
    method __init__ (line 42) | def __init__(self, write: str, access: str, type: str, **kwargs):
  class S3Object (line 48) | class S3Object:
    method __init__ (line 53) | def __init__(
  class ModelInfo (line 69) | class ModelInfo:
    method __init__ (line 74) | def __init__(
  class HfApi (line 92) | class HfApi:
    method __init__ (line 93) | def __init__(self, endpoint=None):
    method login (line 96) | def login(self, username: str, password: str) -> str:
    method whoami (line 112) | def whoami(self, token: str) -> Tuple[str, List[str]]:
    method logout (line 122) | def logout(self, token: str) -> None:
    method presign (line 130) | def presign(self, token: str, filename: str, organization: Optional[st...
    method presign_and_upload (line 144) | def presign_and_upload(self, token: str, filename: str, filepath: str,...
    method list_objs (line 166) | def list_objs(self, token: str, organization: Optional[str] = None) ->...
    method delete_obj (line 177) | def delete_obj(self, token: str, filename: str, organization: Optional...
    method model_list (line 189) | def model_list(self) -> List[ModelInfo]:
  class TqdmProgressFileReader (line 200) | class TqdmProgressFileReader:
    method __init__ (line 209) | def __init__(self, f: io.BufferedReader):
    method _read (line 216) | def _read(self, n=-1):
    method close (line 220) | def close(self):
  class HfFolder (line 224) | class HfFolder:
    method save_token (line 228) | def save_token(cls, token):
    method get_token (line 237) | def get_token(cls):
    method delete_token (line 248) | def delete_token(cls):

FILE: code/nezha-base-count3/pretrain/transformers1/hf_argparser.py
  class HfArgumentParser (line 14) | class HfArgumentParser(ArgumentParser):
    method __init__ (line 26) | def __init__(self, dataclass_types: Union[DataClassType, Iterable[Data...
    method _add_dataclass_arguments (line 42) | def _add_dataclass_arguments(self, dtype: DataClassType):
    method parse_args_into_dataclasses (line 88) | def parse_args_into_dataclasses(
    method parse_json_file (line 146) | def parse_json_file(self, json_file: str) -> Tuple[DataClass, ...]:

FILE: code/nezha-base-count3/pretrain/transformers1/modelcard.py
  class ModelCard (line 38) | class ModelCard:
    method __init__ (line 55) | def __init__(self, **kwargs):
    method save_pretrained (line 75) | def save_pretrained(self, save_directory_or_file):
    method from_pretrained (line 88) | def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
    method from_dict (line 186) | def from_dict(cls, json_object):
    method from_json_file (line 191) | def from_json_file(cls, json_file):
    method __eq__ (line 198) | def __eq__(self, other):
    method __repr__ (line 201) | def __repr__(self):
    method to_dict (line 204) | def to_dict(self):
    method to_json_string (line 209) | def to_json_string(self):
    method to_json_file (line 213) | def to_json_file(self, json_file_path):

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_albert.py
  function load_tf_weights_in_albert (line 47) | def load_tf_weights_in_albert(model, config, tf_checkpoint_path):
  class AlbertEmbeddings (line 171) | class AlbertEmbeddings(BertEmbeddings):
    method __init__ (line 176) | def __init__(self, config):
  class AlbertAttention (line 185) | class AlbertAttention(BertSelfAttention):
    method __init__ (line 186) | def __init__(self, config):
    method prune_heads (line 198) | def prune_heads(self, heads):
    method forward (line 221) | def forward(self, input_ids, attention_mask=None, head_mask=None):
  class AlbertLayer (line 266) | class AlbertLayer(nn.Module):
    method __init__ (line 267) | def __init__(self, config):
    method forward (line 277) | def forward(self, hidden_states, attention_mask=None, head_mask=None):
  class AlbertLayerGroup (line 287) | class AlbertLayerGroup(nn.Module):
    method __init__ (line 288) | def __init__(self, config):
    method forward (line 295) | def forward(self, hidden_states, attention_mask=None, head_mask=None):
  class AlbertTransformer (line 317) | class AlbertTransformer(nn.Module):
    method __init__ (line 318) | def __init__(self, config):
    method forward (line 327) | def forward(self, hidden_states, attention_mask=None, head_mask=None):
  class AlbertPreTrainedModel (line 363) | class AlbertPreTrainedModel(PreTrainedModel):
    method _init_weights (line 371) | def _init_weights(self, module):
  class AlbertModel (line 439) | class AlbertModel(AlbertPreTrainedModel):
    method __init__ (line 445) | def __init__(self, config):
    method get_input_embeddings (line 456) | def get_input_embeddings(self):
    method set_input_embeddings (line 459) | def set_input_embeddings(self, value):
    method _resize_token_embeddings (line 462) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 468) | def _prune_heads(self, heads_to_prune):
    method forward (line 487) | def forward(
  class AlbertForPreTraining (line 576) | class AlbertForPreTraining(AlbertPreTrainedModel):
    method __init__ (line 577) | def __init__(self, config):
    method tie_weights (line 587) | def tie_weights(self):
    method get_output_embeddings (line 590) | def get_output_embeddings(self):
    method forward (line 594) | def forward(
  class AlbertMLMHead (line 680) | class AlbertMLMHead(nn.Module):
    method __init__ (line 681) | def __init__(self, config):
    method forward (line 693) | def forward(self, hidden_states):
  class AlbertSOPHead (line 704) | class AlbertSOPHead(nn.Module):
    method __init__ (line 705) | def __init__(self, config):
    method forward (line 711) | def forward(self, pooled_output):
  class AlbertForMaskedLM (line 720) | class AlbertForMaskedLM(AlbertPreTrainedModel):
    method __init__ (line 721) | def __init__(self, config):
    method tie_weights (line 730) | def tie_weights(self):
    method get_output_embeddings (line 733) | def get_output_embeddings(self):
    method forward (line 737) | def forward(
  class AlbertForSequenceClassification (line 810) | class AlbertForSequenceClassification(AlbertPreTrainedModel):
    method __init__ (line 811) | def __init__(self, config):
    method forward (line 822) | def forward(
  class AlbertForTokenClassification (line 905) | class AlbertForTokenClassification(AlbertPreTrainedModel):
    method __init__ (line 906) | def __init__(self, config):
    method forward (line 917) | def forward(
  class AlbertForQuestionAnswering (line 1002) | class AlbertForQuestionAnswering(AlbertPreTrainedModel):
    method __init__ (line 1003) | def __init__(self, config):
    method forward (line 1013) | def forward(

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_auto.py
  class AutoModel (line 269) | class AutoModel:
    method __init__ (line 279) | def __init__(self):
    method from_config (line 287) | def from_config(cls, config):
    method from_pretrained (line 329) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class AutoModelForPreTraining (line 424) | class AutoModelForPreTraining:
    method __init__ (line 433) | def __init__(self):
    method from_config (line 441) | def from_config(cls, config):
    method from_pretrained (line 483) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class AutoModelWithLMHead (line 570) | class AutoModelWithLMHead:
    method __init__ (line 580) | def __init__(self):
    method from_config (line 588) | def from_config(cls, config):
    method from_pretrained (line 630) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class AutoModelForSequenceClassification (line 718) | class AutoModelForSequenceClassification:
    method __init__ (line 728) | def __init__(self):
    method from_config (line 736) | def from_config(cls, config):
    method from_pretrained (line 778) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class AutoModelForQuestionAnswering (line 867) | class AutoModelForQuestionAnswering:
    method __init__ (line 877) | def __init__(self):
    method from_config (line 885) | def from_config(cls, config):
    method from_pretrained (line 924) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class AutoModelForTokenClassification (line 1009) | class AutoModelForTokenClassification:
    method __init__ (line 1019) | def __init__(self):
    method from_config (line 1027) | def from_config(cls, config):
    method from_pretrained (line 1069) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class AutoModelForMultipleChoice (line 1156) | class AutoModelForMultipleChoice:
    method __init__ (line 1166) | def __init__(self):
    method from_config (line 1174) | def from_config(cls, config):
    method from_pretrained (line 1189) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_bart.py
  function invert_mask (line 94) | def invert_mask(attention_mask):
  function _prepare_bart_decoder_inputs (line 99) | def _prepare_bart_decoder_inputs(
  class PretrainedBartModel (line 120) | class PretrainedBartModel(PreTrainedModel):
    method _init_weights (line 124) | def _init_weights(self, module):
    method dummy_inputs (line 138) | def dummy_inputs(self):
  function _make_linear_from_emb (line 148) | def _make_linear_from_emb(emb):
  function _check_shapes (line 156) | def _check_shapes(shape_1, shape2):
  function shift_tokens_right (line 161) | def shift_tokens_right(input_ids, pad_token_id):
  function make_padding_mask (line 170) | def make_padding_mask(input_ids, padding_idx=1):
  class EncoderLayer (line 181) | class EncoderLayer(nn.Module):
    method __init__ (line 182) | def __init__(self, config: BartConfig):
    method forward (line 198) | def forward(self, x, encoder_padding_mask):
  class BartEncoder (line 234) | class BartEncoder(nn.Module):
    method __init__ (line 243) | def __init__(self, config: BartConfig, embed_tokens):
    method forward (line 270) | def forward(
  class DecoderLayer (line 327) | class DecoderLayer(nn.Module):
    method __init__ (line 328) | def __init__(self, config: BartConfig):
    method forward (line 352) | def forward(
  class BartDecoder (line 416) | class BartDecoder(nn.Module):
    method __init__ (line 425) | def __init__(self, config: BartConfig, embed_tokens: nn.Embedding):
    method forward (line 449) | def forward(
  function _reorder_buffer (line 542) | def _reorder_buffer(attn_cache, new_order):
  class SelfAttention (line 549) | class SelfAttention(nn.Module):
    method __init__ (line 552) | def __init__(
    method _shape (line 575) | def _shape(self, tensor, dim_0, bsz):
    method forward (line 578) | def forward(
    method _use_saved_state (line 663) | def _use_saved_state(self, k, v, saved_state, key_padding_mask, static...
    method _cat_prev_key_padding_mask (line 691) | def _cat_prev_key_padding_mask(
  class BartClassificationHead (line 718) | class BartClassificationHead(nn.Module):
    method __init__ (line 723) | def __init__(
    method forward (line 731) | def forward(self, x):
  class LearnedPositionalEmbedding (line 740) | class LearnedPositionalEmbedding(nn.Embedding):
    method __init__ (line 748) | def __init__(
    method forward (line 757) | def forward(self, input, use_cache=False):
  function LayerNorm (line 767) | def LayerNorm(normalized_shape, eps=1e-5, elementwise_affine=True):
  function fill_with_neg_inf (line 778) | def fill_with_neg_inf(t):
  function _filter_out_falsey_values (line 783) | def _filter_out_falsey_values(tup) -> Tuple:
  function _get_shape (line 789) | def _get_shape(t):
  class BartModel (line 796) | class BartModel(PretrainedBartModel):
    method __init__ (line 797) | def __init__(self, config: BartConfig):
    method forward (line 811) | def forward(
    method get_input_embeddings (line 854) | def get_input_embeddings(self):
    method set_input_embeddings (line 857) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 862) | def get_output_embeddings(self):
  class BartForConditionalGeneration (line 870) | class BartForConditionalGeneration(PretrainedBartModel):
    method __init__ (line 873) | def __init__(self, config: BartConfig):
    method resize_token_embeddings (line 879) | def resize_token_embeddings(self, new_num_tokens: int) -> nn.Embedding:
    method _resize_final_logits_bias (line 886) | def _resize_final_logits_bias(self, new_num_tokens: int, old_num_token...
    method forward (line 895) | def forward(
    method prepare_inputs_for_generation (line 967) | def prepare_inputs_for_generation(self, decoder_input_ids, past, atten...
    method prepare_logits_for_generation (line 984) | def prepare_logits_for_generation(self, logits, cur_len, max_length):
    method _force_token_ids_generation (line 991) | def _force_token_ids_generation(self, scores, token_ids) -> None:
    method _reorder_cache (line 1004) | def _reorder_cache(past, beam_idx):
    method get_encoder (line 1020) | def get_encoder(self):
    method get_output_embeddings (line 1023) | def get_output_embeddings(self):
  class BartForSequenceClassification (line 1031) | class BartForSequenceClassification(PretrainedBartModel):
    method __init__ (line 1032) | def __init__(self, config: BartConfig, **kwargs):
    method forward (line 1042) | def forward(
  class SinusoidalPositionalEmbedding (line 1109) | class SinusoidalPositionalEmbedding(nn.Embedding):
    method __init__ (line 1112) | def __init__(self, num_positions, embedding_dim, padding_idx=None):
    method _init_weight (line 1119) | def _init_weight(out: nn.Parameter):
    method forward (line 1134) | def forward(self, input_ids, use_cache=False):

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_beam_search.py
  class TransformerBeamSearch (line 29) | class TransformerBeamSearch(nn.Module):
    method __init__ (line 30) | def __init__(
    method step (line 80) | def step(self, log_probabilities):
    method forward (line 177) | def forward(self, encoder_input_ids, **kwargs):
    method remove_repeating_trigrams (line 224) | def remove_repeating_trigrams(self, log_probabilities, _B):
    method enforce_min_length (line 233) | def enforce_min_length(self):
    method enforce_max_length (line 237) | def enforce_max_length(self):
    method length_penalty (line 241) | def length_penalty(self):
  function tile (line 245) | def tile(x, count, dim=0):

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_bert.py
  function load_tf_weights_in_bert (line 62) | def load_tf_weights_in_bert(model, config, tf_checkpoint_path):
  function mish (line 134) | def mish(x):
  class BertEmbeddings (line 144) | class BertEmbeddings(nn.Module):
    method __init__ (line 148) | def __init__(self, config):
    method forward (line 159) | def forward(self, input_ids=None, token_type_ids=None, position_ids=No...
  class BertSelfAttention (line 184) | class BertSelfAttention(nn.Module):
    method __init__ (line 185) | def __init__(self, config):
    method transpose_for_scores (line 204) | def transpose_for_scores(self, x):
    method forward (line 209) | def forward(
  class BertSelfOutput (line 262) | class BertSelfOutput(nn.Module):
    method __init__ (line 263) | def __init__(self, config):
    method forward (line 269) | def forward(self, hidden_states, input_tensor):
  class BertAttention (line 276) | class BertAttention(nn.Module):
    method __init__ (line 277) | def __init__(self, config):
    method prune_heads (line 283) | def prune_heads(self, heads):
    method forward (line 306) | def forward(
  class BertIntermediate (line 322) | class BertIntermediate(nn.Module):
    method __init__ (line 323) | def __init__(self, config):
    method forward (line 331) | def forward(self, hidden_states):
  class BertOutput (line 337) | class BertOutput(nn.Module):
    method __init__ (line 338) | def __init__(self, config):
    method forward (line 344) | def forward(self, hidden_states, input_tensor):
  class BertLayer (line 351) | class BertLayer(nn.Module):
    method __init__ (line 352) | def __init__(self, config):
    method forward (line 361) | def forward(
  class BertEncoder (line 386) | class BertEncoder(nn.Module):
    method __init__ (line 387) | def __init__(self, config):
    method forward (line 393) | def forward(
  class BertPooler (line 427) | class BertPooler(nn.Module):
    method __init__ (line 428) | def __init__(self, config):
    method forward (line 433) | def forward(self, hidden_states):
  class BertPredictionHeadTransform (line 442) | class BertPredictionHeadTransform(nn.Module):
    method __init__ (line 443) | def __init__(self, config):
    method forward (line 452) | def forward(self, hidden_states):
  class BertLMPredictionHead (line 459) | class BertLMPredictionHead(nn.Module):
    method __init__ (line 460) | def __init__(self, config):
    method forward (line 473) | def forward(self, hidden_states):
  class BertOnlyMLMHead (line 479) | class BertOnlyMLMHead(nn.Module):
    method __init__ (line 480) | def __init__(self, config):
    method forward (line 484) | def forward(self, sequence_output):
  class BertOnlyNSPHead (line 489) | class BertOnlyNSPHead(nn.Module):
    method __init__ (line 490) | def __init__(self, config):
    method forward (line 494) | def forward(self, pooled_output):
  class BertPreTrainingHeads (line 499) | class BertPreTrainingHeads(nn.Module):
    method __init__ (line 500) | def __init__(self, config):
    method forward (line 505) | def forward(self, sequence_output, pooled_output):
  class BertPreTrainedModel (line 511) | class BertPreTrainedModel(PreTrainedModel):
    method _init_weights (line 520) | def _init_weights(self, module):
  class BertModel (line 594) | class BertModel(BertPreTrainedModel):
    method __init__ (line 611) | def __init__(self, config):
    method get_input_embeddings (line 621) | def get_input_embeddings(self):
    method set_input_embeddings (line 624) | def set_input_embeddings(self, value):
    method _prune_heads (line 627) | def _prune_heads(self, heads_to_prune):
    method forward (line 636) | def forward(
  class BertForPreTraining (line 750) | class BertForPreTraining(BertPreTrainedModel):
    method __init__ (line 751) | def __init__(self, config):
    method get_output_embeddings (line 759) | def get_output_embeddings(self):
    method forward (line 763) | def forward(
  class BertForMaskedLM (line 850) | class BertForMaskedLM(BertPreTrainedModel):
    method __init__ (line 851) | def __init__(self, config):
    method get_output_embeddings (line 859) | def get_output_embeddings(self):
    method forward (line 863) | def forward(
    method prepare_inputs_for_generation (line 960) | def prepare_inputs_for_generation(self, input_ids, attention_mask=None...
  class BertForNextSentencePrediction (line 986) | class BertForNextSentencePrediction(BertPreTrainedModel):
    method __init__ (line 987) | def __init__(self, config):
    method forward (line 996) | def forward(
  class BertForSequenceClassification (line 1074) | class BertForSequenceClassification(BertPreTrainedModel):
    method __init__ (line 1075) | def __init__(self, config):
    method forward (line 1086) | def forward(
  class BertForMultipleChoice (line 1171) | class BertForMultipleChoice(BertPreTrainedModel):
    method __init__ (line 1172) | def __init__(self, config):
    method forward (line 1182) | def forward(
  class BertForTokenClassification (line 1274) | class BertForTokenClassification(BertPreTrainedModel):
    method __init__ (line 1275) | def __init__(self, config):
    method forward (line 1286) | def forward(
  class BertForQuestionAnswering (line 1372) | class BertForQuestionAnswering(BertPreTrainedModel):
    method __init__ (line 1373) | def __init__(self, config):
    method forward (line 1383) | def forward(

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_camembert.py
  class CamembertModel (line 59) | class CamembertModel(RobertaModel):
  class CamembertForMaskedLM (line 71) | class CamembertForMaskedLM(RobertaForMaskedLM):
  class CamembertForSequenceClassification (line 85) | class CamembertForSequenceClassification(RobertaForSequenceClassification):
  class CamembertForMultipleChoice (line 99) | class CamembertForMultipleChoice(RobertaForMultipleChoice):
  class CamembertForTokenClassification (line 113) | class CamembertForTokenClassification(RobertaForTokenClassification):
  class CamembertForQuestionAnswering (line 127) | class CamembertForQuestionAnswering(RobertaForQuestionAnswering):

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_ctrl.py
  function angle_defn (line 39) | def angle_defn(pos, i, d_model_size):
  function positional_encoding (line 44) | def positional_encoding(position, d_model_size, dtype):
  function scaled_dot_product_attention (line 59) | def scaled_dot_product_attention(q, k, v, mask, attention_mask=None, hea...
  class MultiHeadAttention (line 85) | class MultiHeadAttention(torch.nn.Module):
    method __init__ (line 86) | def __init__(self, d_model_size, num_heads, output_attentions=False):
    method split_into_heads (line 100) | def split_into_heads(self, x, batch_size):
    method forward (line 104) | def forward(self, v, k, q, mask, layer_past=None, attention_mask=None,...
  function point_wise_feed_forward_network (line 136) | def point_wise_feed_forward_network(d_model_size, dff):
  class EncoderLayer (line 140) | class EncoderLayer(torch.nn.Module):
    method __init__ (line 141) | def __init__(self, d_model_size, num_heads, dff, rate=0.1, output_atte...
    method forward (line 153) | def forward(self, x, mask, layer_past=None, attention_mask=None, head_...
  class CTRLPreTrainedModel (line 178) | class CTRLPreTrainedModel(PreTrainedModel):
    method _init_weights (line 186) | def _init_weights(self, module):
  class CTRLModel (line 263) | class CTRLModel(CTRLPreTrainedModel):
    method __init__ (line 264) | def __init__(self, config):
    method get_input_embeddings (line 287) | def get_input_embeddings(self):
    method set_input_embeddings (line 290) | def set_input_embeddings(self, new_embeddings):
    method _prune_heads (line 293) | def _prune_heads(self, heads_to_prune):
    method forward (line 301) | def forward(
  class CTRLLMHeadModel (line 458) | class CTRLLMHeadModel(CTRLPreTrainedModel):
    method __init__ (line 459) | def __init__(self, config):
    method get_output_embeddings (line 466) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 469) | def prepare_inputs_for_generation(self, input_ids, past, **kwargs):
    method forward (line 477) | def forward(

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_distilbert.py
  function create_sinusoidal_embeddings (line 54) | def create_sinusoidal_embeddings(n_pos, dim, out):
  class Embeddings (line 62) | class Embeddings(nn.Module):
    method __init__ (line 63) | def __init__(self, config):
    method forward (line 75) | def forward(self, input_ids):
  class MultiHeadSelfAttention (line 100) | class MultiHeadSelfAttention(nn.Module):
    method __init__ (line 101) | def __init__(self, config):
    method prune_heads (line 118) | def prune_heads(self, heads):
    method forward (line 139) | def forward(self, query, key, value, mask, head_mask=None):
  class FFN (line 198) | class FFN(nn.Module):
    method __init__ (line 199) | def __init__(self, config):
    method forward (line 209) | def forward(self, input):
  class TransformerBlock (line 217) | class TransformerBlock(nn.Module):
    method __init__ (line 218) | def __init__(self, config):
    method forward (line 231) | def forward(self, x, attn_mask=None, head_mask=None):
  class Transformer (line 264) | class Transformer(nn.Module):
    method __init__ (line 265) | def __init__(self, config):
    method forward (line 274) | def forward(self, x, attn_mask=None, head_mask=None):
  class DistilBertPreTrainedModel (line 325) | class DistilBertPreTrainedModel(PreTrainedModel):
    method _init_weights (line 334) | def _init_weights(self, module):
  class DistilBertModel (line 392) | class DistilBertModel(DistilBertPreTrainedModel):
    method __init__ (line 393) | def __init__(self, config):
    method get_input_embeddings (line 401) | def get_input_embeddings(self):
    method set_input_embeddings (line 404) | def set_input_embeddings(self, new_embeddings):
    method _prune_heads (line 407) | def _prune_heads(self, heads_to_prune):
    method forward (line 416) | def forward(self, input_ids=None, attention_mask=None, head_mask=None,...
  class DistilBertForMaskedLM (line 477) | class DistilBertForMaskedLM(DistilBertPreTrainedModel):
    method __init__ (line 478) | def __init__(self, config):
    method get_output_embeddings (line 492) | def get_output_embeddings(self):
    method forward (line 496) | def forward(self, input_ids=None, attention_mask=None, head_mask=None,...
  class DistilBertForSequenceClassification (line 558) | class DistilBertForSequenceClassification(DistilBertPreTrainedModel):
    method __init__ (line 559) | def __init__(self, config):
    method forward (line 571) | def forward(self, input_ids=None, attention_mask=None, head_mask=None,...
  class DistilBertForQuestionAnswering (line 638) | class DistilBertForQuestionAnswering(DistilBertPreTrainedModel):
    method __init__ (line 639) | def __init__(self, config):
    method forward (line 650) | def forward(
  class DistilBertForTokenClassification (line 740) | class DistilBertForTokenClassification(DistilBertPreTrainedModel):
    method __init__ (line 741) | def __init__(self, config):
    method forward (line 752) | def forward(self, input_ids=None, attention_mask=None, head_mask=None,...

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_electra.py
  function load_tf_weights_in_electra (line 28) | def load_tf_weights_in_electra(model, config, tf_checkpoint_path, discri...
  class ElectraEmbeddings (line 109) | class ElectraEmbeddings(BertEmbeddings):
    method __init__ (line 112) | def __init__(self, config):
  class ElectraDiscriminatorPredictions (line 123) | class ElectraDiscriminatorPredictions(nn.Module):
    method __init__ (line 126) | def __init__(self, config):
    method forward (line 133) | def forward(self, discriminator_hidden_states, attention_mask):
  class ElectraGeneratorPredictions (line 141) | class ElectraGeneratorPredictions(nn.Module):
    method __init__ (line 144) | def __init__(self, config):
    method forward (line 150) | def forward(self, generator_hidden_states):
  class ElectraPreTrainedModel (line 158) | class ElectraPreTrainedModel(BertPreTrainedModel):
  class ElectraModel (line 233) | class ElectraModel(ElectraPreTrainedModel):
    method __init__ (line 237) | def __init__(self, config):
    method get_input_embeddings (line 248) | def get_input_embeddings(self):
    method set_input_embeddings (line 251) | def set_input_embeddings(self, value):
    method _prune_heads (line 254) | def _prune_heads(self, heads_to_prune):
    method forward (line 263) | def forward(
  class ElectraClassificationHead (line 334) | class ElectraClassificationHead(nn.Module):
    method __init__ (line 337) | def __init__(self, config):
    method forward (line 343) | def forward(self, features, **kwargs):
  class ElectraForSequenceClassification (line 358) | class ElectraForSequenceClassification(ElectraPreTrainedModel):
    method __init__ (line 359) | def __init__(self, config):
    method forward (line 368) | def forward(
  class ElectraForPreTraining (line 448) | class ElectraForPreTraining(ElectraPreTrainedModel):
    method __init__ (line 449) | def __init__(self, config):
    method forward (line 457) | def forward(
  class ElectraForMaskedLM (line 542) | class ElectraForMaskedLM(ElectraPreTrainedModel):
    method __init__ (line 543) | def __init__(self, config):
    method get_output_embeddings (line 552) | def get_output_embeddings(self):
    method forward (line 556) | def forward(
  class ElectraForTokenClassification (line 634) | class ElectraForTokenClassification(ElectraPreTrainedModel):
    method __init__ (line 635) | def __init__(self, config):
    method forward (line 644) | def forward(

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_encoder_decoder.py
  class EncoderDecoderModel (line 29) | class EncoderDecoderModel(PreTrainedModel):
    method __init__ (line 40) | def __init__(
    method tie_weights (line 74) | def tie_weights(self):
    method get_encoder (line 78) | def get_encoder(self):
    method get_decoder (line 81) | def get_decoder(self):
    method get_input_embeddings (line 84) | def get_input_embeddings(self):
    method get_output_embeddings (line 87) | def get_output_embeddings(self):
    method from_encoder_decoder_pretrained (line 91) | def from_encoder_decoder_pretrained(
    method forward (line 183) | def forward(
    method prepare_inputs_for_generation (line 303) | def prepare_inputs_for_generation(self, input_ids, past, attention_mas...
    method _reorder_cache (line 321) | def _reorder_cache(self, past, beam_idx):

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_flaubert.py
  class FlaubertModel (line 110) | class FlaubertModel(XLMModel):
    method __init__ (line 114) | def __init__(self, config):  # , dico, is_encoder, with_output):
    method forward (line 120) | def forward(
  class FlaubertWithLMHeadModel (line 300) | class FlaubertWithLMHeadModel(XLMWithLMHeadModel):
    method __init__ (line 308) | def __init__(self, config):
  class FlaubertForSequenceClassification (line 319) | class FlaubertForSequenceClassification(XLMForSequenceClassification):
    method __init__ (line 327) | def __init__(self, config):
  class FlaubertForQuestionAnsweringSimple (line 338) | class FlaubertForQuestionAnsweringSimple(XLMForQuestionAnsweringSimple):
    method __init__ (line 346) | def __init__(self, config):
  class FlaubertForQuestionAnswering (line 357) | class FlaubertForQuestionAnswering(XLMForQuestionAnswering):
    method __init__ (line 365) | def __init__(self, config):

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_gpt2.py
  function load_tf_weights_in_gpt2 (line 44) | def load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path):
  class Attention (line 99) | class Attention(nn.Module):
    method __init__ (line 100) | def __init__(self, nx, n_ctx, config, scale=False):
    method prune_heads (line 121) | def prune_heads(self, heads):
    method _attn (line 143) | def _attn(self, q, k, v, attention_mask=None, head_mask=None):
    method merge_heads (line 167) | def merge_heads(self, x):
    method split_heads (line 172) | def split_heads(self, x, k=False):
    method forward (line 180) | def forward(self, x, layer_past=None, attention_mask=None, head_mask=N...
  class MLP (line 207) | class MLP(nn.Module):
    method __init__ (line 208) | def __init__(self, n_state, config):  # in MLP: n_state=3072 (4 * n_embd)
    method forward (line 216) | def forward(self, x):
  class Block (line 222) | class Block(nn.Module):
    method __init__ (line 223) | def __init__(self, n_ctx, config, scale=False):
    method forward (line 231) | def forward(self, x, layer_past=None, attention_mask=None, head_mask=N...
  class GPT2PreTrainedModel (line 249) | class GPT2PreTrainedModel(PreTrainedModel):
    method __init__ (line 258) | def __init__(self, *inputs, **kwargs):
    method _init_weights (line 261) | def _init_weights(self, module):
  class GPT2Model (line 339) | class GPT2Model(GPT2PreTrainedModel):
    method __init__ (line 340) | def __init__(self, config):
    method get_input_embeddings (line 353) | def get_input_embeddings(self):
    method set_input_embeddings (line 356) | def set_input_embeddings(self, new_embeddings):
    method _prune_heads (line 359) | def _prune_heads(self, heads_to_prune):
    method forward (line 367) | def forward(
  class GPT2LMHeadModel (line 523) | class GPT2LMHeadModel(GPT2PreTrainedModel):
    method __init__ (line 524) | def __init__(self, config):
    method get_output_embeddings (line 531) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 534) | def prepare_inputs_for_generation(self, input_ids, past, **kwargs):
    method forward (line 542) | def forward(
  class GPT2DoubleHeadsModel (line 631) | class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
    method __init__ (line 632) | def __init__(self, config):
    method get_output_embeddings (line 641) | def get_output_embeddings(self):
    method forward (line 645) | def forward(

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_longformer.py
  function _get_question_end_index (line 43) | def _get_question_end_index(input_ids, sep_token_id):
  function _compute_global_attention_mask (line 59) | def _compute_global_attention_mask(input_ids, sep_token_id, before_sep_t...
  class LongformerSelfAttention (line 81) | class LongformerSelfAttention(nn.Module):
    method __init__ (line 82) | def __init__(self, config, layer_id):
    method _skew (line 117) | def _skew(x, direction):
    method _skew2 (line 124) | def _skew2(x):
    method _chunk (line 136) | def _chunk(x, w):
    method _mask_invalid_locations (line 150) | def _mask_invalid_locations(self, input_tensor, w) -> torch.Tensor:
    method _sliding_chunks_matmul_qk (line 163) | def _sliding_chunks_matmul_qk(self, q: torch.Tensor, k: torch.Tensor, ...
    method _sliding_chunks_matmul_pv (line 210) | def _sliding_chunks_matmul_pv(self, prob: torch.Tensor, v: torch.Tenso...
    method forward (line 238) | def forward(
  class LongformerModel (line 498) | class LongformerModel(RobertaModel):
    method __init__ (line 519) | def __init__(self, config):
    method _pad_to_window_size (line 538) | def _pad_to_window_size(
    method forward (line 582) | def forward(
  class LongformerForMaskedLM (line 686) | class LongformerForMaskedLM(BertPreTrainedModel):
    method __init__ (line 690) | def __init__(self, config):
    method forward (line 699) | def forward(
  class LongformerForSequenceClassification (line 776) | class LongformerForSequenceClassification(BertPreTrainedModel):
    method __init__ (line 780) | def __init__(self, config):
    method forward (line 788) | def forward(
  class LongformerClassificationHead (line 868) | class LongformerClassificationHead(nn.Module):
    method __init__ (line 871) | def __init__(self, config):
    method forward (line 877) | def forward(self, hidden_states, **kwargs):
  class LongformerForQuestionAnswering (line 892) | class LongformerForQuestionAnswering(BertPreTrainedModel):
    method __init__ (line 896) | def __init__(self, config):
    method forward (line 906) | def forward(
  class LongformerForTokenClassification (line 1016) | class LongformerForTokenClassification(BertPreTrainedModel):
    method __init__ (line 1020) | def __init__(self, config):
    method forward (line 1031) | def forward(
  class LongformerForMultipleChoice (line 1116) | class LongformerForMultipleChoice(BertPreTrainedModel):
    method __init__ (line 1120) | def __init__(self, config):
    method forward (line 1130) | def forward(

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_marian.py
  class MarianMTModel (line 26) | class MarianMTModel(BartForConditionalGeneration):
    method prepare_logits_for_generation (line 49) | def prepare_logits_for_generation(self, logits, cur_len, max_length):

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_mmbt.py
  class ModalEmbeddings (line 32) | class ModalEmbeddings(nn.Module):
    method __init__ (line 36) | def __init__(self, config, encoder, embeddings):
    method forward (line 47) | def forward(self, input_modal, start_token=None, end_token=None, posit...
  class MMBTModel (line 152) | class MMBTModel(nn.Module, ModuleUtilsMixin):
    method __init__ (line 180) | def __init__(self, config, transformer, encoder):
    method forward (line 186) | def forward(
    method get_input_embeddings (line 268) | def get_input_embeddings(self):
    method set_input_embeddings (line 271) | def set_input_embeddings(self, value):
  class MMBTForClassification (line 281) | class MMBTForClassification(nn.Module):
    method __init__ (line 312) | def __init__(self, config, transformer, encoder):
    method forward (line 320) | def forward(

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_openai.py
  function load_tf_weights_in_openai_gpt (line 42) | def load_tf_weights_in_openai_gpt(model, config, openai_checkpoint_folde...
  class Attention (line 122) | class Attention(nn.Module):
    method __init__ (line 123) | def __init__(self, nx, n_ctx, config, scale=False):
    method prune_heads (line 141) | def prune_heads(self, heads):
    method _attn (line 160) | def _attn(self, q, k, v, attention_mask=None, head_mask=None):
    method merge_heads (line 185) | def merge_heads(self, x):
    method split_heads (line 190) | def split_heads(self, x, k=False):
    method forward (line 198) | def forward(self, x, attention_mask=None, head_mask=None):
  class MLP (line 216) | class MLP(nn.Module):
    method __init__ (line 217) | def __init__(self, n_state, config):  # in MLP: n_state=3072 (4 * n_embd)
    method forward (line 225) | def forward(self, x):
  class Block (line 231) | class Block(nn.Module):
    method __init__ (line 232) | def __init__(self, n_ctx, config, scale=False):
    method forward (line 240) | def forward(self, x, attention_mask=None, head_mask=None):
  class OpenAIGPTPreTrainedModel (line 252) | class OpenAIGPTPreTrainedModel(PreTrainedModel):
    method _init_weights (line 261) | def _init_weights(self, module):
  class OpenAIGPTModel (line 329) | class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
    method __init__ (line 330) | def __init__(self, config):
    method get_input_embeddings (line 342) | def get_input_embeddings(self):
    method set_input_embeddings (line 345) | def set_input_embeddings(self, new_embeddings):
    method _prune_heads (line 348) | def _prune_heads(self, heads_to_prune):
    method forward (line 356) | def forward(
  class OpenAIGPTLMHeadModel (line 471) | class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel):
    method __init__ (line 472) | def __init__(self, config):
    method get_output_embeddings (line 479) | def get_output_embeddings(self):
    method forward (line 483) | def forward(
  class OpenAIGPTDoubleHeadsModel (line 567) | class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
    method __init__ (line 568) | def __init__(self, config):
    method get_output_embeddings (line 578) | def get_output_embeddings(self):
    method forward (line 582) | def forward(

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_reformer.py
  function mish (line 45) | def mish(x):
  function _get_least_common_mult_chunk_len (line 70) | def _get_least_common_mult_chunk_len(config):
  class AxialPositionEmbeddings (line 87) | class AxialPositionEmbeddings(nn.Module):
    method __init__ (line 92) | def __init__(self, config):
    method forward (line 117) | def forward(self, position_ids):
  class PositionEmbeddings (line 166) | class PositionEmbeddings(nn.Module):
    method __init__ (line 170) | def __init__(self, config):
    method forward (line 175) | def forward(self, position_ids):
  class ReformerEmbeddings (line 181) | class ReformerEmbeddings(nn.Module):
    method __init__ (line 185) | def __init__(self, config):
    method forward (line 195) | def forward(self, input_ids=None, position_ids=None, inputs_embeds=None):
  class EfficientAttentionMixin (line 226) | class EfficientAttentionMixin:
    method _look_adjacent (line 231) | def _look_adjacent(self, vectors, num_chunks_before, num_chunks_after):
    method _split_hidden_size_dim (line 254) | def _split_hidden_size_dim(self, x, num_attn_heads, attn_head_size):
    method _merge_hidden_size_dims (line 262) | def _merge_hidden_size_dims(self, x, num_attn_heads, attn_head_size):
    method _split_seq_length_dim_to (line 269) | def _split_seq_length_dim_to(self, vectors, dim_factor_1, dim_factor_2...
  class LSHSelfAttention (line 284) | class LSHSelfAttention(nn.Module, EfficientAttentionMixin):
    method __init__ (line 285) | def __init__(self, config):
    method forward (line 315) | def forward(
    method _hash_vectors (line 441) | def _hash_vectors(self, vectors, num_hashes):
    method _get_sorted_bucket_idx_and_undo_sorted_bucket_idx (line 506) | def _get_sorted_bucket_idx_and_undo_sorted_bucket_idx(self, sequence_l...
    method _set_num_buckets (line 537) | def _set_num_buckets(self, sequence_length):
    method _attend (line 556) | def _attend(
    method _compute_attn_mask (line 635) | def _compute_attn_mask(self, query_indices, key_indices, attention_mask):
    method _len_and_dim_norm (line 663) | def _len_and_dim_norm(self, vectors):
    method _len_norm (line 673) | def _len_norm(self, x, epsilon=1e-6):
    method _gather_by_expansion (line 681) | def _gather_by_expansion(self, vectors, idxs, num_hashes):
  class ReverseSort (line 690) | class ReverseSort(Function):
    method forward (line 700) | def forward(ctx, out_vectors, logits, sorted_bucket_idx, undo_sorted_b...
    method backward (line 713) | def backward(ctx, grad_out_vectors, grad_logits):
  class LocalSelfAttention (line 747) | class LocalSelfAttention(nn.Module, EfficientAttentionMixin):
    method __init__ (line 748) | def __init__(self, config):
    method forward (line 773) | def forward(self, hidden_states, attention_mask=None, head_mask=None, ...
    method _compute_attn_mask (line 888) | def _compute_attn_mask(self, query_indices, key_indices, attention_mas...
  class ReformerSelfOutput (line 913) | class ReformerSelfOutput(nn.Module):
    method __init__ (line 914) | def __init__(self, config):
    method forward (line 921) | def forward(self, hidden_states):
  class ReformerAttention (line 927) | class ReformerAttention(nn.Module):
    method __init__ (line 928) | def __init__(self, config, layer_id=0):
    method forward (line 953) | def forward(
  class ReformerFeedForwardDense (line 986) | class ReformerFeedForwardDense(nn.Module):
    method __init__ (line 987) | def __init__(self, config):
    method forward (line 998) | def forward(self, hidden_states):
  class ReformerFeedForwardOutput (line 1005) | class ReformerFeedForwardOutput(nn.Module):
    method __init__ (line 1006) | def __init__(self, config):
    method forward (line 1012) | def forward(self, hidden_states):
  class ChunkReformerFeedForward (line 1018) | class ChunkReformerFeedForward(nn.Module):
    method __init__ (line 1019) | def __init__(self, config):
    method forward (line 1028) | def forward(self, attention_output):
    method forward_chunk (line 1033) | def forward_chunk(self, hidden_states):
  class ReformerLayer (line 1039) | class ReformerLayer(nn.Module):
    method __init__ (line 1040) | def __init__(self, config, layer_id=0):
    method _init_attention_seed (line 1050) | def _init_attention_seed(self):
    method _init_feed_forward_seed (line 1070) | def _init_feed_forward_seed(self):
    method forward (line 1090) | def forward(
    method backward_pass (line 1134) | def backward_pass(
  class _ReversibleFunction (line 1195) | class _ReversibleFunction(Function):
    method forward (line 1205) | def forward(
    method backward (line 1256) | def backward(ctx, grad_hidden_states):
  class ReformerEncoder (line 1302) | class ReformerEncoder(nn.Module):
    method __init__ (line 1303) | def __init__(self, config):
    method forward (line 1312) | def forward(
  class ReformerOnlyLMHead (line 1350) | class ReformerOnlyLMHead(nn.Module):
    method __init__ (line 1351) | def __init__(self, config):
    method forward (line 1363) | def forward(self, hidden_states):
    method forward_chunk (line 1366) | def forward_chunk(self, hidden_states):
  class ReformerPreTrainedModel (line 1371) | class ReformerPreTrainedModel(PreTrainedModel):
    method dummy_inputs (line 1380) | def dummy_inputs(self):
    method _init_weights (line 1389) | def _init_weights(self, module):
  class ReformerModel (line 1470) | class ReformerModel(ReformerPreTrainedModel):
    method __init__ (line 1471) | def __init__(self, config):
    method get_input_embeddings (line 1483) | def get_input_embeddings(self):
    method set_input_embeddings (line 1486) | def set_input_embeddings(self, value):
    method _prune_heads (line 1489) | def _prune_heads(self, heads_to_prune):
    method forward (line 1498) | def forward(
    method _pad_to_mult_of_chunk_length (line 1615) | def _pad_to_mult_of_chunk_length(
  class ReformerModelWithLMHead (line 1674) | class ReformerModelWithLMHead(ReformerPreTrainedModel):
    method __init__ (line 1675) | def __init__(self, config):
    method get_output_embeddings (line 1682) | def get_output_embeddings(self):
    method tie_weights (line 1685) | def tie_weights(self):
    method forward (line 1690) | def forward(
    method prepare_inputs_for_generation (line 1766) | def prepare_inputs_for_generation(self, input_ids, past, **kwargs):

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_roberta.py
  class RobertaEmbeddings (line 44) | class RobertaEmbeddings(BertEmbeddings):
    method __init__ (line 49) | def __init__(self, config):
    method forward (line 57) | def forward(self, input_ids=None, token_type_ids=None, position_ids=No...
    method create_position_ids_from_inputs_embeds (line 69) | def create_position_ids_from_inputs_embeds(self, inputs_embeds):
  class RobertaModel (line 139) | class RobertaModel(BertModel):
    method __init__ (line 148) | def __init__(self, config):
    method get_input_embeddings (line 154) | def get_input_embeddings(self):
    method set_input_embeddings (line 157) | def set_input_embeddings(self, value):
  class RobertaForMaskedLM (line 162) | class RobertaForMaskedLM(BertPreTrainedModel):
    method __init__ (line 166) | def __init__(self, config):
    method get_output_embeddings (line 174) | def get_output_embeddings(self):
    method forward (line 178) | def forward(
  class RobertaLMHead (line 246) | class RobertaLMHead(nn.Module):
    method __init__ (line 249) | def __init__(self, config):
    method forward (line 260) | def forward(self, features, **kwargs):
  class RobertaForSequenceClassification (line 276) | class RobertaForSequenceClassification(BertPreTrainedModel):
    method __init__ (line 280) | def __init__(self, config):
    method forward (line 288) | def forward(
  class RobertaForMultipleChoice (line 366) | class RobertaForMultipleChoice(BertPreTrainedModel):
    method __init__ (line 370) | def __init__(self, config):
    method forward (line 380) | def forward(
  class RobertaForTokenClassification (line 464) | class RobertaForTokenClassification(BertPreTrainedModel):
    method __init__ (line 468) | def __init__(self, config):
    method forward (line 479) | def forward(
  class RobertaClassificationHead (line 559) | class RobertaClassificationHead(nn.Module):
    method __init__ (line 562) | def __init__(self, config):
    method forward (line 568) | def forward(self, features, **kwargs):
  class RobertaForQuestionAnswering (line 583) | class RobertaForQuestionAnswering(BertPreTrainedModel):
    method __init__ (line 587) | def __init__(self, config):
    method forward (line 597) | def forward(

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_t5.py
  function load_tf_weights_in_t5 (line 53) | def load_tf_weights_in_t5(model, config, tf_checkpoint_path):
  class T5LayerNorm (line 143) | class T5LayerNorm(nn.Module):
    method __init__ (line 144) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 152) | def forward(self, x):
  class T5DenseReluDense (line 162) | class T5DenseReluDense(nn.Module):
    method __init__ (line 163) | def __init__(self, config):
    method forward (line 169) | def forward(self, hidden_states):
  class T5LayerFF (line 177) | class T5LayerFF(nn.Module):
    method __init__ (line 178) | def __init__(self, config):
    method forward (line 184) | def forward(self, hidden_states):
  class T5Attention (line 191) | class T5Attention(nn.Module):
    method __init__ (line 192) | def __init__(self, config: T5Config, has_relative_attention_bias=False):
    method prune_heads (line 215) | def prune_heads(self, heads):
    method _relative_position_bucket (line 236) | def _relative_position_bucket(relative_position, bidirectional=True, n...
    method compute_bias (line 283) | def compute_bias(self, qlen, klen):
    method forward (line 298) | def forward(
  class T5LayerSelfAttention (line 401) | class T5LayerSelfAttention(nn.Module):
    method __init__ (line 402) | def __init__(self, config, has_relative_attention_bias=False):
    method forward (line 408) | def forward(
  class T5LayerCrossAttention (line 432) | class T5LayerCrossAttention(nn.Module):
    method __init__ (line 433) | def __init__(self, config, has_relative_attention_bias=False):
    method forward (line 439) | def forward(
  class T5Block (line 467) | class T5Block(nn.Module):
    method __init__ (line 468) | def __init__(self, config, has_relative_attention_bias=False):
    method forward (line 478) | def forward(
  class T5PreTrainedModel (line 553) | class T5PreTrainedModel(PreTrainedModel):
    method dummy_inputs (line 563) | def dummy_inputs(self):
    method _init_weights (line 573) | def _init_weights(self, module):
    method _shift_right (line 605) | def _shift_right(self, input_ids):
  class T5Stack (line 627) | class T5Stack(T5PreTrainedModel):
    method __init__ (line 628) | def __init__(self, config, embed_tokens=None):
    method get_input_embeddings (line 644) | def get_input_embeddings(self):
    method get_output_embeddings (line 647) | def get_output_embeddings(self):
    method set_input_embeddings (line 650) | def set_input_embeddings(self, new_embeddings):
    method forward (line 653) | def forward(
  class T5Model (line 846) | class T5Model(T5PreTrainedModel):
    method __init__ (line 847) | def __init__(self, config):
    method get_input_embeddings (line 860) | def get_input_embeddings(self):
    method set_input_embeddings (line 863) | def set_input_embeddings(self, new_embeddings):
    method get_encoder (line 868) | def get_encoder(self):
    method get_decoder (line 871) | def get_decoder(self):
    method _prune_heads (line 874) | def _prune_heads(self, heads_to_prune):
    method forward (line 883) | def forward(
  class T5ForConditionalGeneration (line 966) | class T5ForConditionalGeneration(T5PreTrainedModel):
    method __init__ (line 967) | def __init__(self, config):
    method get_input_embeddings (line 984) | def get_input_embeddings(self):
    method set_input_embeddings (line 987) | def set_input_embeddings(self, new_embeddings):
    method get_output_embeddings (line 992) | def get_output_embeddings(self):
    method get_encoder (line 995) | def get_encoder(self):
    method get_decoder (line 998) | def get_decoder(self):
    method forward (line 1002) | def forward(
    method prepare_inputs_for_generation (line 1114) | def prepare_inputs_for_generation(self, input_ids, past, attention_mas...
    method _reorder_cache (line 1131) | def _reorder_cache(self, past, beam_idx):

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_tf_albert.py
  class TFAlbertEmbeddings (line 45) | class TFAlbertEmbeddings(tf.keras.layers.Layer):
    method __init__ (line 49) | def __init__(self, config, **kwargs):
    method build (line 71) | def build(self, input_shape):
    method call (line 83) | def call(self, inputs, mode="embedding", training=False):
    method _embedding (line 105) | def _embedding(self, inputs, training=False):
    method _linear (line 130) | def _linear(self, inputs):
  class TFAlbertSelfAttention (line 144) | class TFAlbertSelfAttention(tf.keras.layers.Layer):
    method __init__ (line 145) | def __init__(self, config, **kwargs):
    method transpose_for_scores (line 171) | def transpose_for_scores(self, x, batch_size):
    method call (line 175) | def call(self, inputs, training=False):
  class TFAlbertSelfOutput (line 220) | class TFAlbertSelfOutput(tf.keras.layers.Layer):
    method __init__ (line 221) | def __init__(self, config, **kwargs):
    method call (line 229) | def call(self, inputs, training=False):
  class TFAlbertAttention (line 238) | class TFAlbertAttention(TFBertSelfAttention):
    method __init__ (line 239) | def __init__(self, config, **kwargs):
    method prune_heads (line 249) | def prune_heads(self, heads):
    method call (line 252) | def call(self, inputs, training=False):
  class TFAlbertLayer (line 306) | class TFAlbertLayer(tf.keras.layers.Layer):
    method __init__ (line 307) | def __init__(self, config, **kwargs):
    method call (line 328) | def call(self, inputs, training=False):
  class TFAlbertLayerGroup (line 344) | class TFAlbertLayerGroup(tf.keras.layers.Layer):
    method __init__ (line 345) | def __init__(self, config, **kwargs):
    method call (line 354) | def call(self, inputs, training=False):
  class TFAlbertTransformer (line 379) | class TFAlbertTransformer(tf.keras.layers.Layer):
    method __init__ (line 380) | def __init__(self, config, **kwargs):
    method call (line 396) | def call(self, inputs, training=False):
  class TFAlbertPreTrainedModel (line 438) | class TFAlbertPreTrainedModel(TFPreTrainedModel):
  class TFAlbertMLMHead (line 447) | class TFAlbertMLMHead(tf.keras.layers.Layer):
    method __init__ (line 448) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 466) | def build(self, input_shape):
    method call (line 473) | def call(self, hidden_states):
  class TFAlbertMainLayer (line 482) | class TFAlbertMainLayer(tf.keras.layers.Layer):
    method __init__ (line 485) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 498) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 501) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 504) | def _prune_heads(self, heads_to_prune):
    method call (line 511) | def call(
  class TFAlbertModel (line 674) | class TFAlbertModel(TFAlbertPreTrainedModel):
    method __init__ (line 675) | def __init__(self, config, *inputs, **kwargs):
    method call (line 680) | def call(self, inputs, **kwargs):
  class TFAlbertForPreTraining (line 725) | class TFAlbertForPreTraining(TFAlbertPreTrainedModel):
    method __init__ (line 726) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 734) | def get_output_embeddings(self):
    method call (line 738) | def call(self, inputs, **kwargs):
  class TFAlbertSOPHead (line 772) | class TFAlbertSOPHead(tf.keras.layers.Layer):
    method __init__ (line 773) | def __init__(self, config, **kwargs):
    method call (line 781) | def call(self, pooled_output, training: bool):
  class TFAlbertForMaskedLM (line 788) | class TFAlbertForMaskedLM(TFAlbertPreTrainedModel):
    method __init__ (line 789) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 795) | def get_output_embeddings(self):
    method call (line 799) | def call(self, inputs, **kwargs):
  class TFAlbertForSequenceClassification (line 844) | class TFAlbertForSequenceClassification(TFAlbertPreTrainedModel):
    method __init__ (line 845) | def __init__(self, config, *inputs, **kwargs):
    method call (line 856) | def call(self, inputs, **kwargs):
  class TFAlbertForQuestionAnswering (line 901) | class TFAlbertForQuestionAnswering(TFAlbertPreTrainedModel):
    method __init__ (line 902) | def __init__(self, config, *inputs, **kwargs):
    method call (line 912) | def call(self, inputs, **kwargs):
  class TFAlbertForMultipleChoice (line 967) | class TFAlbertForMultipleChoice(TFAlbertPreTrainedModel):
    method __init__ (line 968) | def __init__(self, config, *inputs, **kwargs):
    method dummy_inputs (line 978) | def dummy_inputs(self):
    method call (line 987) | def call(

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_tf_auto.py
  class TFAutoModel (line 174) | class TFAutoModel(object):
    method __init__ (line 198) | def __init__(self):
    method from_config (line 206) | def from_config(cls, config):
    method from_pretrained (line 244) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class TFAutoModelForPreTraining (line 336) | class TFAutoModelForPreTraining(object):
    method __init__ (line 345) | def __init__(self):
    method from_config (line 353) | def from_config(cls, config):
    method from_pretrained (line 392) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class TFAutoModelWithLMHead (line 486) | class TFAutoModelWithLMHead(object):
    method __init__ (line 510) | def __init__(self):
    method from_config (line 518) | def from_config(cls, config):
    method from_pretrained (line 556) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class TFAutoModelForMultipleChoice (line 649) | class TFAutoModelForMultipleChoice:
    method __init__ (line 665) | def __init__(self):
    method from_config (line 673) | def from_config(cls, config):
    method from_pretrained (line 706) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class TFAutoModelForSequenceClassification (line 796) | class TFAutoModelForSequenceClassification(object):
    method __init__ (line 815) | def __init__(self):
    method from_config (line 823) | def from_config(cls, config):
    method from_pretrained (line 859) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class TFAutoModelForQuestionAnswering (line 952) | class TFAutoModelForQuestionAnswering(object):
    method __init__ (line 972) | def __init__(self):
    method from_config (line 980) | def from_config(cls, config):
    method from_pretrained (line 1017) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class TFAutoModelForTokenClassification (line 1111) | class TFAutoModelForTokenClassification:
    method __init__ (line 1112) | def __init__(self):
    method from_config (line 1120) | def from_config(cls, config):
    method from_pretrained (line 1155) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_tf_bert.py
  function gelu (line 58) | def gelu(x):
  function gelu_new (line 69) | def gelu_new(x):
  function swish (line 82) | def swish(x):
  class TFBertEmbeddings (line 94) | class TFBertEmbeddings(tf.keras.layers.Layer):
    method __init__ (line 98) | def __init__(self, config, **kwargs):
    method build (line 122) | def build(self, input_shape):
    method call (line 134) | def call(self, inputs, mode="embedding", training=False):
    method _embedding (line 156) | def _embedding(self, inputs, training=False):
    method _linear (line 181) | def _linear(self, inputs):
  class TFBertSelfAttention (line 197) | class TFBertSelfAttention(tf.keras.layers.Layer):
    method __init__ (line 198) | def __init__(self, config, **kwargs):
    method transpose_for_scores (line 224) | def transpose_for_scores(self, x, batch_size):
    method call (line 228) | def call(self, inputs, training=False):
  class TFBertSelfOutput (line 273) | class TFBertSelfOutput(tf.keras.layers.Layer):
    method __init__ (line 274) | def __init__(self, config, **kwargs):
    method call (line 282) | def call(self, inputs, training=False):
  class TFBertAttention (line 291) | class TFBertAttention(tf.keras.layers.Layer):
    method __init__ (line 292) | def __init__(self, config, **kwargs):
    method prune_heads (line 297) | def prune_heads(self, heads):
    method call (line 300) | def call(self, inputs, training=False):
  class TFBertIntermediate (line 309) | class TFBertIntermediate(tf.keras.layers.Layer):
    method __init__ (line 310) | def __init__(self, config, **kwargs):
    method call (line 320) | def call(self, hidden_states):
  class TFBertOutput (line 326) | class TFBertOutput(tf.keras.layers.Layer):
    method __init__ (line 327) | def __init__(self, config, **kwargs):
    method call (line 335) | def call(self, inputs, training=False):
  class TFBertLayer (line 344) | class TFBertLayer(tf.keras.layers.Layer):
    method __init__ (line 345) | def __init__(self, config, **kwargs):
    method call (line 351) | def call(self, inputs, training=False):
  class TFBertEncoder (line 362) | class TFBertEncoder(tf.keras.layers.Layer):
    method __init__ (line 363) | def __init__(self, config, **kwargs):
    method call (line 369) | def call(self, inputs, training=False):
  class TFBertPooler (line 396) | class TFBertPooler(tf.keras.layers.Layer):
    method __init__ (line 397) | def __init__(self, config, **kwargs):
    method call (line 406) | def call(self, hidden_states):
  class TFBertPredictionHeadTransform (line 414) | class TFBertPredictionHeadTransform(tf.keras.layers.Layer):
    method __init__ (line 415) | def __init__(self, config, **kwargs):
    method call (line 426) | def call(self, hidden_states):
  class TFBertLMPredictionHead (line 433) | class TFBertLMPredictionHead(tf.keras.layers.Layer):
    method __init__ (line 434) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 443) | def build(self, input_shape):
    method call (line 447) | def call(self, hidden_states):
  class TFBertMLMHead (line 454) | class TFBertMLMHead(tf.keras.layers.Layer):
    method __init__ (line 455) | def __init__(self, config, input_embeddings, **kwargs):
    method call (line 459) | def call(self, sequence_output):
  class TFBertNSPHead (line 464) | class TFBertNSPHead(tf.keras.layers.Layer):
    method __init__ (line 465) | def __init__(self, config, **kwargs):
    method call (line 471) | def call(self, pooled_output):
  class TFBertMainLayer (line 477) | class TFBertMainLayer(tf.keras.layers.Layer):
    method __init__ (line 480) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 488) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 491) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 494) | def _prune_heads(self, heads_to_prune):
    method call (line 501) | def call(
  class TFBertPreTrainedModel (line 583) | class TFBertPreTrainedModel(TFPreTrainedModel):
  class TFBertModel (line 667) | class TFBertModel(TFBertPreTrainedModel):
    method __init__ (line 668) | def __init__(self, config, *inputs, **kwargs):
    method call (line 673) | def call(self, inputs, **kwargs):
  class TFBertForPreTraining (line 718) | class TFBertForPreTraining(TFBertPreTrainedModel):
    method __init__ (line 719) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 726) | def get_output_embeddings(self):
    method call (line 730) | def call(self, inputs, **kwargs):
  class TFBertForMaskedLM (line 775) | class TFBertForMaskedLM(TFBertPreTrainedModel):
    method __init__ (line 776) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 782) | def get_output_embeddings(self):
    method call (line 786) | def call(self, inputs, **kwargs):
  class TFBertForNextSentencePrediction (line 828) | class TFBertForNextSentencePrediction(TFBertPreTrainedModel):
    method __init__ (line 829) | def __init__(self, config, *inputs, **kwargs):
    method call (line 836) | def call(self, inputs, **kwargs):
  class TFBertForSequenceClassification (line 883) | class TFBertForSequenceClassification(TFBertPreTrainedModel):
    method __init__ (line 884) | def __init__(self, config, *inputs, **kwargs):
    method call (line 895) | def call(self, inputs, **kwargs):
  class TFBertForMultipleChoice (line 941) | class TFBertForMultipleChoice(TFBertPreTrainedModel):
    method __init__ (line 942) | def __init__(self, config, *inputs, **kwargs):
    method dummy_inputs (line 952) | def dummy_inputs(self):
    method call (line 961) | def call(
  class TFBertForTokenClassification (line 1064) | class TFBertForTokenClassification(TFBertPreTrainedModel):
    method __init__ (line 1065) | def __init__(self, config, *inputs, **kwargs):
    method call (line 1076) | def call(self, inputs, **kwargs):
  class TFBertForQuestionAnswering (line 1122) | class TFBertForQuestionAnswering(TFBertPreTrainedModel):
    method __init__ (line 1123) | def __init__(self, config, *inputs, **kwargs):
    method call (line 1133) | def call(self, inputs, **kwargs):

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_tf_camembert.py
  class TFCamembertModel (line 70) | class TFCamembertModel(TFRobertaModel):
  class TFCamembertForMaskedLM (line 82) | class TFCamembertForMaskedLM(TFRobertaForMaskedLM):
  class TFCamembertForSequenceClassification (line 96) | class TFCamembertForSequenceClassification(TFRobertaForSequenceClassific...
  class TFCamembertForTokenClassification (line 110) | class TFCamembertForTokenClassification(TFRobertaForTokenClassification):

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_tf_ctrl.py
  function angle_defn (line 38) | def angle_defn(pos, i, d_model_size):
  function positional_encoding (line 43) | def positional_encoding(position, d_model_size):
  function scaled_dot_product_attention (line 55) | def scaled_dot_product_attention(q, k, v, mask, attention_mask=None, hea...
  class TFMultiHeadAttention (line 80) | class TFMultiHeadAttention(tf.keras.layers.Layer):
    method __init__ (line 81) | def __init__(self, d_model_size, num_heads, output_attentions=False, *...
    method split_into_heads (line 95) | def split_into_heads(self, x, batch_size):
    method call (line 99) | def call(self, inputs, training=False):
  function point_wise_feed_forward_network (line 142) | def point_wise_feed_forward_network(d_model_size, dff, name=""):
  class TFEncoderLayer (line 149) | class TFEncoderLayer(tf.keras.layers.Layer):
    method __init__ (line 150) | def __init__(
    method call (line 166) | def call(self, inputs, training=False):
  class TFCTRLMainLayer (line 186) | class TFCTRLMainLayer(tf.keras.layers.Layer):
    method __init__ (line 189) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 218) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 221) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 224) | def _prune_heads(self, heads_to_prune):
    method call (line 230) | def call(
  class TFCTRLPreTrainedModel (line 379) | class TFCTRLPreTrainedModel(TFPreTrainedModel):
  class TFCTRLModel (line 471) | class TFCTRLModel(TFCTRLPreTrainedModel):
    method __init__ (line 472) | def __init__(self, config, *inputs, **kwargs):
    method call (line 477) | def call(self, inputs, **kwargs):
  class TFCTRLLMHead (line 515) | class TFCTRLLMHead(tf.keras.layers.Layer):
    method __init__ (line 516) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 524) | def build(self, input_shape):
    method call (line 528) | def call(self, hidden_states):
  class TFCTRLLMHeadModel (line 539) | class TFCTRLLMHeadModel(TFCTRLPreTrainedModel):
    method __init__ (line 540) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 546) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 549) | def prepare_inputs_for_generation(self, inputs, past, **kwargs):
    method call (line 557) | def call(self, inputs, **kwargs):

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_tf_distilbert.py
  function gelu (line 46) | def gelu(x):
  function gelu_new (line 57) | def gelu_new(x):
  class TFEmbeddings (line 70) | class TFEmbeddings(tf.keras.layers.Layer):
    method __init__ (line 71) | def __init__(self, config, **kwargs):
    method build (line 89) | def build(self, input_shape):
    method call (line 99) | def call(self, inputs, inputs_embeds=None, mode="embedding", training=...
    method _embedding (line 121) | def _embedding(self, inputs, inputs_embeds=None, training=False):
    method _linear (line 156) | def _linear(self, inputs):
  class TFMultiHeadSelfAttention (line 172) | class TFMultiHeadSelfAttention(tf.keras.layers.Layer):
    method __init__ (line 173) | def __init__(self, config, **kwargs):
    method prune_heads (line 198) | def prune_heads(self, heads):
    method call (line 201) | def call(self, inputs, training=False):
  class TFFFN (line 262) | class TFFFN(tf.keras.layers.Layer):
    method __init__ (line 263) | def __init__(self, config, **kwargs):
    method call (line 279) | def call(self, input, training=False):
  class TFTransformerBlock (line 287) | class TFTransformerBlock(tf.keras.layers.Layer):
    method __init__ (line 288) | def __init__(self, config, **kwargs):
    method call (line 306) | def call(self, inputs, training=False):  # removed: src_enc=None, src_...
  class TFTransformer (line 341) | class TFTransformer(tf.keras.layers.Layer):
    method __init__ (line 342) | def __init__(self, config, **kwargs):
    method call (line 350) | def call(self, inputs, training=False):
  class TFDistilBertMainLayer (line 402) | class TFDistilBertMainLayer(tf.keras.layers.Layer):
    method __init__ (line 403) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 410) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 413) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 416) | def _prune_heads(self, heads_to_prune):
    method call (line 419) | def call(self, inputs, attention_mask=None, head_mask=None, inputs_emb...
  class TFDistilBertPreTrainedModel (line 465) | class TFDistilBertPreTrainedModel(TFPreTrainedModel):
  class TFDistilBertModel (line 539) | class TFDistilBertModel(TFDistilBertPreTrainedModel):
    method __init__ (line 540) | def __init__(self, config, *inputs, **kwargs):
    method call (line 545) | def call(self, inputs, **kwargs):
  class TFDistilBertLMHead (line 577) | class TFDistilBertLMHead(tf.keras.layers.Layer):
    method __init__ (line 578) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 586) | def build(self, input_shape):
    method call (line 590) | def call(self, hidden_states):
  class TFDistilBertForMaskedLM (line 599) | class TFDistilBertForMaskedLM(TFDistilBertPreTrainedModel):
    method __init__ (line 600) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 614) | def get_output_embeddings(self):
    method call (line 618) | def call(self, inputs, **kwargs):
  class TFDistilBertForSequenceClassification (line 665) | class TFDistilBertForSequenceClassification(TFDistilBertPreTrainedModel):
    method __init__ (line 666) | def __init__(self, config, *inputs, **kwargs):
    method call (line 683) | def call(self, inputs, **kwargs):
  class TFDistilBertForTokenClassification (line 729) | class TFDistilBertForTokenClassification(TFDistilBertPreTrainedModel):
    method __init__ (line 730) | def __init__(self, config, *inputs, **kwargs):
    method call (line 741) | def call(self, inputs, **kwargs):
  class TFDistilBertForQuestionAnswering (line 786) | class TFDistilBertForQuestionAnswering(TFDistilBertPreTrainedModel):
    method __init__ (line 787) | def __init__(self, config, *inputs, **kwargs):
    method call (line 798) | def call(self, inputs, **kwargs):

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_tf_electra.py
  class TFElectraEmbeddings (line 27) | class TFElectraEmbeddings(tf.keras.layers.Layer):
    method __init__ (line 31) | def __init__(self, config, **kwargs):
    method build (line 55) | def build(self, input_shape):
    method call (line 67) | def call(self, inputs, mode="embedding", training=False):
    method _embedding (line 89) | def _embedding(self, inputs, training=False):
    method _linear (line 114) | def _linear(self, inputs):
  class TFElectraDiscriminatorPredictions (line 130) | class TFElectraDiscriminatorPredictions(tf.keras.layers.Layer):
    method __init__ (line 131) | def __init__(self, config, **kwargs):
    method call (line 138) | def call(self, discriminator_hidden_states, training=False):
  class TFElectraGeneratorPredictions (line 146) | class TFElectraGeneratorPredictions(tf.keras.layers.Layer):
    method __init__ (line 147) | def __init__(self, config, **kwargs):
    method call (line 153) | def call(self, generator_hidden_states, training=False):
  class TFElectraPreTrainedModel (line 161) | class TFElectraPreTrainedModel(TFBertPreTrainedModel):
    method get_extended_attention_mask (line 166) | def get_extended_attention_mask(self, attention_mask, input_shape):
    method get_head_mask (line 188) | def get_head_mask(self, head_mask):
  class TFElectraMainLayer (line 197) | class TFElectraMainLayer(TFElectraPreTrainedModel):
    method __init__ (line 201) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 210) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 213) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 216) | def _prune_heads(self, heads_to_prune):
    method call (line 223) | def call(
  class TFElectraModel (line 348) | class TFElectraModel(TFElectraPreTrainedModel):
    method __init__ (line 349) | def __init__(self, config, *inputs, **kwargs):
    method get_input_embeddings (line 353) | def get_input_embeddings(self):
    method call (line 357) | def call(self, inputs, **kwargs):
  class TFElectraForPreTraining (line 398) | class TFElectraForPreTraining(TFElectraPreTrainedModel):
    method __init__ (line 399) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 405) | def get_input_embeddings(self):
    method call (line 409) | def call(
  class TFElectraMaskedLMHead (line 458) | class TFElectraMaskedLMHead(tf.keras.layers.Layer):
    method __init__ (line 459) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 464) | def build(self, input_shape):
    method call (line 468) | def call(self, hidden_states, training=False):
  class TFElectraForMaskedLM (line 482) | class TFElectraForMaskedLM(TFElectraPreTrainedModel):
    method __init__ (line 483) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 495) | def get_input_embeddings(self):
    method get_output_embeddings (line 498) | def get_output_embeddings(self):
    method call (line 502) | def call(
  class TFElectraForTokenClassification (line 560) | class TFElectraForTokenClassification(TFElectraPreTrainedModel):
    method __init__ (line 561) | def __init__(self, config, **kwargs):
    method call (line 569) | def call(

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_tf_flaubert.py
  class TFFlaubertModel (line 107) | class TFFlaubertModel(TFXLMModel):
    method __init__ (line 110) | def __init__(self, config, *inputs, **kwargs):
  class TFFlaubertMainLayer (line 115) | class TFFlaubertMainLayer(TFXLMMainLayer):
    method __init__ (line 116) | def __init__(self, config, *inputs, **kwargs):
    method call (line 121) | def call(
  class TFFlaubertWithLMHeadModel (line 311) | class TFFlaubertWithLMHeadModel(TFXLMWithLMHeadModel):
    method __init__ (line 314) | def __init__(self, config, *inputs, **kwargs):
  class TFFlaubertForSequenceClassification (line 324) | class TFFlaubertForSequenceClassification(TFXLMForSequenceClassification):
    method __init__ (line 327) | def __init__(self, config, *inputs, **kwargs):

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_tf_gpt2.py
  function gelu (line 50) | def gelu(x):
  class TFAttention (line 63) | class TFAttention(tf.keras.layers.Layer):
    method __init__ (line 64) | def __init__(self, nx, n_ctx, config, scale=False, **kwargs):
    method prune_heads (line 82) | def prune_heads(self, heads):
    method causal_attention_mask (line 86) | def causal_attention_mask(nd, ns, dtype):
    method _attn (line 95) | def _attn(self, inputs, training=False):
    method merge_heads (line 125) | def merge_heads(self, x):
    method split_heads (line 131) | def split_heads(self, x):
    method call (line 137) | def call(self, inputs, training=False):
  class TFMLP (line 175) | class TFMLP(tf.keras.layers.Layer):
    method __init__ (line 176) | def __init__(self, n_state, config, **kwargs):
    method call (line 184) | def call(self, x, training=False):
  class TFBlock (line 191) | class TFBlock(tf.keras.layers.Layer):
    method __init__ (line 192) | def __init__(self, n_ctx, config, scale=False, **kwargs):
    method call (line 200) | def call(self, inputs, training=False):
  class TFGPT2MainLayer (line 217) | class TFGPT2MainLayer(tf.keras.layers.Layer):
    method __init__ (line 220) | def __init__(self, config, *inputs, **kwargs):
    method get_input_embeddings (line 241) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 244) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 247) | def _prune_heads(self, heads_to_prune):
    method call (line 253) | def call(
  class TFGPT2PreTrainedModel (line 387) | class TFGPT2PreTrainedModel(TFPreTrainedModel):
  class TFGPT2Model (line 475) | class TFGPT2Model(TFGPT2PreTrainedModel):
    method __init__ (line 476) | def __init__(self, config, *inputs, **kwargs):
    method call (line 481) | def call(self, inputs, **kwargs):
  class TFGPT2LMHeadModel (line 524) | class TFGPT2LMHeadModel(TFGPT2PreTrainedModel):
    method __init__ (line 525) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 529) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 532) | def prepare_inputs_for_generation(self, inputs, past, **kwargs):
    method call (line 540) | def call(self, inputs, **kwargs):
  class TFGPT2DoubleHeadsModel (line 593) | class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel):
    method __init__ (line 594) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 602) | def get_output_embeddings(self):
    method call (line 606) | def call(

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_tf_openai.py
  function gelu (line 45) | def gelu(x):
  function swish (line 58) | def swish(x):
  class TFAttention (line 69) | class TFAttention(tf.keras.layers.Layer):
    method __init__ (line 70) | def __init__(self, nx, n_ctx, config, scale=False, **kwargs):
    method prune_heads (line 88) | def prune_heads(self, heads):
    method causal_attention_mask (line 92) | def causal_attention_mask(nd, ns, dtype):
    method _attn (line 101) | def _attn(self, inputs, training=False):
    method merge_heads (line 131) | def merge_heads(self, x):
    method split_heads (line 137) | def split_heads(self, x):
    method call (line 143) | def call(self, inputs, training=False):
  class TFMLP (line 163) | class TFMLP(tf.keras.layers.Layer):
    method __init__ (line 164) | def __init__(self, n_state, config, **kwargs):
    method call (line 172) | def call(self, x, training=False):
  class TFBlock (line 179) | class TFBlock(tf.keras.layers.Layer):
    method __init__ (line 180) | def __init__(self, n_ctx, config, scale=False, **kwargs):
    method call (line 188) | def call(self, inputs, training=False):
  class TFOpenAIGPTMainLayer (line 202) | class TFOpenAIGPTMainLayer(tf.keras.layers.Layer):
    method __init__ (line 203) | def __init__(self, config, *inputs, **kwargs):
    method get_input_embeddings (line 223) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 226) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 229) | def _prune_heads(self, heads_to_prune):
    method call (line 235) | def call(
  class TFOpenAIGPTPreTrainedModel (line 349) | class TFOpenAIGPTPreTrainedModel(TFPreTrainedModel):
  class TFOpenAIGPTModel (line 430) | class TFOpenAIGPTModel(TFOpenAIGPTPreTrainedModel):
    method __init__ (line 431) | def __init__(self, config, *inputs, **kwargs):
    method call (line 436) | def call(self, inputs, **kwargs):
  class TFOpenAIGPTLMHeadModel (line 475) | class TFOpenAIGPTLMHeadModel(TFOpenAIGPTPreTrainedModel):
    method __init__ (line 476) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 480) | def get_output_embeddings(self):
    method call (line 484) | def call(self, inputs, **kwargs):
  class TFOpenAIGPTDoubleHeadsModel (line 532) | class TFOpenAIGPTDoubleHeadsModel(TFOpenAIGPTPreTrainedModel):
    method __init__ (line 533) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 541) | def get_output_embeddings(self):
    method call (line 545) | def call(

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_tf_pytorch_utils.py
  function convert_tf_weight_name_to_pt_weight_name (line 29) | def convert_tf_weight_name_to_pt_weight_name(tf_name, start_prefix_to_re...
  function load_pytorch_checkpoint_in_tf2_model (line 73) | def load_pytorch_checkpoint_in_tf2_model(tf_model, pytorch_checkpoint_pa...
  function load_pytorch_model_in_tf2_model (line 97) | def load_pytorch_model_in_tf2_model(tf_model, pt_model, tf_inputs=None, ...
  function load_pytorch_weights_in_tf2_model (line 107) | def load_pytorch_weights_in_tf2_model(tf_model, pt_state_dict, tf_inputs...
  function load_tf2_checkpoint_in_pytorch_model (line 205) | def load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path, t...
  function load_tf2_model_in_pytorch_model (line 240) | def load_tf2_model_in_pytorch_model(pt_model, tf_model, allow_missing_ke...
  function load_tf2_weights_in_pytorch_model (line 248) | def load_tf2_weights_in_pytorch_model(pt_model, tf_weights, allow_missin...

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_tf_roberta.py
  class TFRobertaEmbeddings (line 40) | class TFRobertaEmbeddings(TFBertEmbeddings):
    method __init__ (line 45) | def __init__(self, config, **kwargs):
    method create_position_ids_from_input_ids (line 49) | def create_position_ids_from_input_ids(self, x):
    method create_position_ids_from_inputs_embeds (line 60) | def create_position_ids_from_inputs_embeds(self, inputs_embeds):
    method _embedding (line 71) | def _embedding(self, inputs, training=False):
  class TFRobertaMainLayer (line 85) | class TFRobertaMainLayer(TFBertMainLayer):
    method __init__ (line 90) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 94) | def get_input_embeddings(self):
  class TFRobertaPreTrainedModel (line 98) | class TFRobertaPreTrainedModel(TFPreTrainedModel):
  class TFRobertaModel (line 182) | class TFRobertaModel(TFRobertaPreTrainedModel):
    method __init__ (line 183) | def __init__(self, config, *inputs, **kwargs):
    method call (line 188) | def call(self, inputs, **kwargs):
  class TFRobertaLMHead (line 228) | class TFRobertaLMHead(tf.keras.layers.Layer):
    method __init__ (line 231) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 244) | def build(self, input_shape):
    method call (line 248) | def call(self, features):
  class TFRobertaForMaskedLM (line 260) | class TFRobertaForMaskedLM(TFRobertaPreTrainedModel):
    method __init__ (line 261) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 267) | def get_output_embeddings(self):
    method call (line 271) | def call(self, inputs, **kwargs):
  class TFRobertaClassificationHead (line 310) | class TFRobertaClassificationHead(tf.keras.layers.Layer):
    method __init__ (line 313) | def __init__(self, config, **kwargs):
    method call (line 326) | def call(self, features, training=False):
  class TFRobertaForSequenceClassification (line 340) | class TFRobertaForSequenceClassification(TFRobertaPreTrainedModel):
    method __init__ (line 341) | def __init__(self, config, *inputs, **kwargs):
    method call (line 349) | def call(self, inputs, **kwargs):
  class TFRobertaForTokenClassification (line 394) | class TFRobertaForTokenClassification(TFRobertaPreTrainedModel):
    method __init__ (line 395) | def __init__(self, config, *inputs, **kwargs):
    method call (line 406) | def call(self, inputs, **kwargs):
  class TFRobertaForQuestionAnswering (line 451) | class TFRobertaForQuestionAnswering(TFRobertaPreTrainedModel):
    method __init__ (line 452) | def __init__(self, config, *inputs, **kwargs):
    method call (line 462) | def call(self, inputs, **kwargs):

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_tf_t5.py
  class TFT5LayerNorm (line 49) | class TFT5LayerNorm(tf.keras.layers.Layer):
    method __init__ (line 50) | def __init__(self, epsilon=1e-6, **kwargs):
    method build (line 57) | def build(self, input_shape):
    method call (line 62) | def call(self, x):
  class TFT5DenseReluDense (line 68) | class TFT5DenseReluDense(tf.keras.layers.Layer):
    method __init__ (line 69) | def __init__(self, config, **kwargs):
    method call (line 76) | def call(self, hidden_states, training=False):
  class TFT5LayerFF (line 84) | class TFT5LayerFF(tf.keras.layers.Layer):
    method __init__ (line 85) | def __init__(self, config, **kwargs):
    method call (line 91) | def call(self, hidden_states, training=False):
  class TFT5Attention (line 98) | class TFT5Attention(tf.keras.layers.Layer):
    method __init__ (line 101) | def __init__(self, config, has_relative_attention_bias=False, **kwargs):
    method prune_heads (line 127) | def prune_heads(self, heads):
    method _relative_position_bucket (line 131) | def _relative_position_bucket(relative_position, bidirectional=True, n...
    method compute_bias (line 176) | def compute_bias(self, qlen, klen):
    method call (line 188) | def call(
  class TFT5LayerSelfAttention (line 302) | class TFT5LayerSelfAttention(tf.keras.layers.Layer):
    method __init__ (line 303) | def __init__(self, config, has_relative_attention_bias=False, **kwargs):
    method call (line 311) | def call(
  class TFT5LayerCrossAttention (line 337) | class TFT5LayerCrossAttention(tf.keras.layers.Layer):
    method __init__ (line 338) | def __init__(self, config, has_relative_attention_bias=False, **kwargs):
    method call (line 346) | def call(
  class TFT5Block (line 376) | class TFT5Block(tf.keras.layers.Layer):
    method __init__ (line 377) | def __init__(self, config, has_relative_attention_bias=False, **kwargs):
    method call (line 393) | def call(
  class _NoLayerEmbedTokens (line 471) | class _NoLayerEmbedTokens(object):
    method __init__ (line 478) | def __init__(self, layer, abs_scope_name=None):
    method call (line 482) | def call(self, inputs, mode="embedding"):
    method __call__ (line 491) | def __call__(self, inputs, mode="embedding"):
  class TFT5MainLayer (line 505) | class TFT5MainLayer(tf.keras.layers.Layer):
    method __init__ (line 506) | def __init__(self, config, embed_tokens=None, **kwargs):
    method get_input_embeddings (line 524) | def get_input_embeddings(self):
    method get_output_embeddings (line 527) | def get_output_embeddings(self):
    method set_embed_tokens (line 530) | def set_embed_tokens(self, embed_tokens):
    method _resize_token_embeddings (line 533) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 536) | def _prune_heads(self, heads_to_prune):
    method call (line 539) | def call(
  class TFT5PreTrainedModel (line 718) | class TFT5PreTrainedModel(TFPreTrainedModel):
    method dummy_inputs (line 727) | def dummy_inputs(self):
  class TFT5Model (line 828) | class TFT5Model(TFT5PreTrainedModel):
    method __init__ (line 829) | def __init__(self, config, *inputs, **kwargs):
    method get_input_embeddings (line 846) | def get_input_embeddings(self):
    method get_output_embeddings (line 849) | def get_output_embeddings(self):
    method get_encoder (line 852) | def get_encoder(self):
    method get_decoder (line 855) | def get_decoder(self):
    method call (line 859) | def call(self, inputs, **kwargs):
  class TFT5ForConditionalGeneration (line 947) | class TFT5ForConditionalGeneration(TFT5PreTrainedModel):
    method __init__ (line 948) | def __init__(self, config, *inputs, **kwargs):
    method get_input_embeddings (line 967) | def get_input_embeddings(self):
    method get_output_embeddings (line 970) | def get_output_embeddings(self):
    method get_encoder (line 973) | def get_encoder(self):
    method get_decoder (line 976) | def get_decoder(self):
    method call (line 980) | def call(self, inputs, **kwargs):
    method prepare_inputs_for_generation (line 1079) | def prepare_inputs_for_generation(self, inputs, past, attention_mask, ...
    method _reorder_cache (line 1097) | def _reorder_cache(self, past, beam_idx):

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_tf_transfo_xl.py
  class TFPositionalEmbedding (line 39) | class TFPositionalEmbedding(tf.keras.layers.Layer):
    method __init__ (line 40) | def __init__(self, demb, **kwargs):
    method call (line 45) | def call(self, pos_seq, bsz=None):
  class TFPositionwiseFF (line 55) | class TFPositionwiseFF(tf.keras.layers.Layer):
    method __init__ (line 56) | def __init__(self, d_model, d_inner, dropout, pre_lnorm=False, layer_n...
    method call (line 74) | def call(self, inp, training=False):
  class TFRelPartialLearnableMultiHeadAttn (line 98) | class TFRelPartialLearnableMultiHeadAttn(tf.keras.layers.Layer):
    method __init__ (line 99) | def __init__(
    method build (line 152) | def build(self, input_shape):
    method _rel_shift (line 162) | def _rel_shift(self, x):
    method call (line 172) | def call(self, inputs, training=False):
  class TFRelPartialLearnableDecoderLayer (line 252) | class TFRelPartialLearnableDecoderLayer(tf.keras.layers.Layer):
    method __init__ (line 253) | def __init__(
    method call (line 301) | def call(self, inputs, training=False):
  class TFAdaptiveEmbedding (line 311) | class TFAdaptiveEmbedding(tf.keras.layers.Layer):
    method __init__ (line 312) | def __init__(self, n_token, d_embed, d_proj, cutoffs, div_val=1, init_...
    method build (line 344) | def build(self, input_shape):
    method call (line 357) | def call(self, inp):
  class TFTransfoXLMainLayer (line 384) | class TFTransfoXLMainLayer(tf.keras.layers.Layer):
    method __init__ (line 387) | def __init__(self, config, **kwargs):
    method build (line 455) | def build(self, input_shape):
    method get_input_embeddings (line 465) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 468) | def _resize_token_embeddings(self, new_num_tokens):
    method backward_compatible (line 471) | def backward_compatible(self):
    method reset_length (line 474) | def reset_length(self, tgt_len, ext_len, mem_len):
    method _prune_heads (line 479) | def _prune_heads(self, heads):
    method init_mems (line 482) | def init_mems(self, bsz):
    method _update_mems (line 493) | def _update_mems(self, hids, mems, mlen, qlen):
    method call (line 517) | def call(self, inputs, mems=None, head_mask=None, inputs_embeds=None, ...
  class TFTransfoXLPreTrainedModel (line 628) | class TFTransfoXLPreTrainedModel(TFPreTrainedModel):
  class TFTransfoXLModel (line 693) | class TFTransfoXLModel(TFTransfoXLPreTrainedModel):
    method __init__ (line 694) | def __init__(self, config, *inputs, **kwargs):
    method call (line 699) | def call(self, inputs, **kwargs):
  class TFTransfoXLLMHead (line 737) | class TFTransfoXLLMHead(tf.keras.layers.Layer):
    method __init__ (line 738) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 746) | def build(self, input_shape):
    method call (line 750) | def call(self, hidden_states):
  class TFTransfoXLLMHeadModel (line 761) | class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel):
    method __init__ (line 762) | def __init__(self, config):
    method get_output_embeddings (line 774) | def get_output_embeddings(self):
    method reset_length (line 781) | def reset_length(self, tgt_len, ext_len, mem_len):
    method init_mems (line 784) | def init_mems(self, bsz):
    method call (line 788) | def call(self, inputs, mems=None, head_mask=None, inputs_embeds=None, ...
    method prepare_inputs_for_generation (line 855) | def prepare_inputs_for_generation(self, inputs, past, **model_kwargs):

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_tf_transfo_xl_utilities.py
  class TFAdaptiveSoftmaxMask (line 25) | class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer):
    method __init__ (line 26) | def __init__(self, vocab_size, d_embed, d_proj, cutoffs, div_val=1, ke...
    method build (line 45) | def build(self, input_shape):
    method _logit (line 104) | def _logit(x, W, b, proj=None):
    method _gather_logprob (line 111) | def _gather_logprob(logprob, target):
    method call (line 117) | def call(self, inputs, return_mean=True, training=False):

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_tf_utils.py
  class TFModelUtilsMixin (line 34) | class TFModelUtilsMixin:
    method num_parameters (line 39) | def num_parameters(self, only_trainable: bool = False) -> int:
  function keras_serializable (line 49) | def keras_serializable(cls):
  class TFPreTrainedModel (line 107) | class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin):
    method dummy_inputs (line 127) | def dummy_inputs(self):
    method __init__ (line 135) | def __init__(self, config, *inputs, **kwargs):
    method get_input_embeddings (line 148) | def get_input_embeddings(self):
    method get_output_embeddings (line 162) | def get_output_embeddings(self):
    method _get_resized_embeddings (line 172) | def _get_resized_embeddings(self, old_embeddings, new_num_tokens=None):
    method resize_token_embeddings (line 206) | def resize_token_embeddings(self, new_num_tokens=None):
    method prune_heads (line 221) | def prune_heads(self, heads_to_prune):
    method save_pretrained (line 230) | def save_pretrained(self, save_directory):
    method from_pretrained (line 247) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
    method prepare_inputs_for_generation (line 438) | def prepare_inputs_for_generation(self, inputs, **kwargs):
    method _use_cache (line 441) | def _use_cache(self, outputs, use_cache):
    method generate (line 449) | def generate(
    method _generate_no_beam_search (line 810) | def _generate_no_beam_search(
    method _generate_beam_search (line 973) | def _generate_beam_search(
    method _reorder_cache (line 1294) | def _reorder_cache(past, beam_idx):
  function _create_next_token_logits_penalties (line 1298) | def _create_next_token_logits_penalties(input_ids, logits, repetition_pe...
  function calc_banned_ngram_tokens (line 1312) | def calc_banned_ngram_tokens(prev_input_ids, num_hypos, no_repeat_ngram_...
  function calc_banned_bad_words_ids (line 1335) | def calc_banned_bad_words_ids(prev_input_ids, bad_words_ids):
  function tf_top_k_top_p_filtering (line 1371) | def tf_top_k_top_p_filtering(logits, top_k=0, top_p=1.0, filter_value=-f...
  function scatter_values_on_batch_indices (line 1421) | def scatter_values_on_batch_indices(values, batch_indices):
  function set_tensor_by_indices_to_value (line 1431) | def set_tensor_by_indices_to_value(tensor, indices, value):
  class BeamHypotheses (line 1437) | class BeamHypotheses(object):
    method __init__ (line 1438) | def __init__(self, num_beams, max_length, length_penalty, early_stoppi...
    method __len__ (line 1449) | def __len__(self):
    method add (line 1455) | def add(self, hyp, sum_logprobs):
    method is_done (line 1469) | def is_done(self, best_sum_logprobs, cur_len=None):
  class TFConv1D (line 1487) | class TFConv1D(tf.keras.layers.Layer):
    method __init__ (line 1488) | def __init__(self, nf, nx, initializer_range=0.02, **kwargs):
    method build (line 1497) | def build(self, input_shape):
    method call (line 1503) | def call(self, x):
  class TFSharedEmbeddings (line 1514) | class TFSharedEmbeddings(tf.keras.layers.Layer):
    method __init__ (line 1518) | def __init__(self, vocab_size, hidden_size, initializer_range=None, **...
    method build (line 1524) | def build(self, input_shape):
    method call (line 1534) | def call(self, inputs, mode="embedding"):
    method _embedding (line 1556) | def _embedding(self, input_ids):
    method _linear (line 1560) | def _linear(self, inputs):
  class TFSequenceSummary (line 1575) | class TFSequenceSummary(tf.keras.layers.Layer):
    method __init__ (line 1591) | def __init__(self, config, initializer_range=0.02, **kwargs):
    method call (line 1623) | def call(self, inputs, training=False):
  function shape_list (line 1682) | def shape_list(x):
  function get_initializer (line 1689) | def get_initializer(initializer_range=0.02):

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_tf_xlm.py
  function create_sinusoidal_embeddings (line 49) | def create_sinusoidal_embeddings(n_pos, dim, out):
  function gelu (line 55) | def gelu(x):
  function get_masks (line 66) | def get_masks(slen, lengths, causal, padding_mask=None, dtype=tf.float32):
  class TFMultiHeadAttention (line 97) | class TFMultiHeadAttention(tf.keras.layers.Layer):
    method __init__ (line 101) | def __init__(self, n_heads, dim, config, **kwargs):
    method prune_heads (line 116) | def prune_heads(self, heads):
    method call (line 119) | def call(self, inputs, training=False):
  class TFTransformerFFN (line 185) | class TFTransformerFFN(tf.keras.layers.Layer):
    method __init__ (line 186) | def __init__(self, in_dim, dim_hidden, out_dim, config, **kwargs):
    method call (line 193) | def call(self, input, training=False):
  class TFXLMMainLayer (line 201) | class TFXLMMainLayer(tf.keras.layers.Layer):
    method __init__ (line 202) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 292) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 295) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 298) | def _prune_heads(self, heads_to_prune):
    method call (line 305) | def call(
  class TFXLMPreTrainedModel (line 468) | class TFXLMPreTrainedModel(TFPreTrainedModel):
    method dummy_inputs (line 477) | def dummy_inputs(self):
  class TFXLMModel (line 574) | class TFXLMModel(TFXLMPreTrainedModel):
    method __init__ (line 575) | def __init__(self, config, *inputs, **kwargs):
    method call (line 580) | def call(self, inputs, **kwargs):
  class TFXLMPredLayer (line 614) | class TFXLMPredLayer(tf.keras.layers.Layer):
    method __init__ (line 619) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 636) | def build(self, input_shape):
    method call (line 641) | def call(self, hidden_states):
  class TFXLMWithLMHeadModel (line 652) | class TFXLMWithLMHeadModel(TFXLMPreTrainedModel):
    method __init__ (line 653) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 658) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 661) | def prepare_inputs_for_generation(self, inputs, **kwargs):
    method call (line 676) | def call(self, inputs, **kwargs):
  class TFXLMForSequenceClassification (line 720) | class TFXLMForSequenceClassification(TFXLMPreTrainedModel):
    method __init__ (line 721) | def __init__(self, config, *inputs, **kwargs):
    method call (line 729) | def call(self, inputs, **kwargs):
  class TFXLMForQuestionAnsweringSimple (line 774) | class TFXLMForQuestionAnsweringSimple(TFXLMPreTrainedModel):
    method __init__ (line 775) | def __init__(self, config, *inputs, **kwargs):
    method call (line 783) | def call(self, inputs, **kwargs):

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_tf_xlm_roberta.py
  class TFXLMRobertaModel (line 70) | class TFXLMRobertaModel(TFRobertaModel):
  class TFXLMRobertaForMaskedLM (line 82) | class TFXLMRobertaForMaskedLM(TFRobertaForMaskedLM):
  class TFXLMRobertaForSequenceClassification (line 96) | class TFXLMRobertaForSequenceClassification(TFRobertaForSequenceClassifi...
  class TFXLMRobertaForTokenClassification (line 110) | class TFXLMRobertaForTokenClassification(TFRobertaForTokenClassification):

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_tf_xlnet.py
  function gelu (line 47) | def gelu(x):
  function swish (line 56) | def swish(x):
  class TFXLNetRelativeAttention (line 67) | class TFXLNetRelativeAttention(tf.keras.layers.Layer):
    method __init__ (line 68) | def __init__(self, config, **kwargs):
    method build (line 87) | def build(self, input_shape):
    method prune_heads (line 118) | def prune_heads(self, heads):
    method rel_shift (line 121) | def rel_shift(self, x, klen=-1):
    method rel_attn_core (line 133) | def rel_attn_core(self, inputs, training=False):
    method post_attention (line 178) | def post_attention(self, inputs, residual=True, training=False):
    method call (line 193) | def call(self, inputs, training=False):
  class TFXLNetFeedForward (line 290) | class TFXLNetFeedForward(tf.keras.layers.Layer):
    method __init__ (line 291) | def __init__(self, config, **kwargs):
    method call (line 306) | def call(self, inp, training=False):
  class TFXLNetLayer (line 317) | class TFXLNetLayer(tf.keras.layers.Layer):
    method __init__ (line 318) | def __init__(self, config, **kwargs):
    method call (line 324) | def call(self, inputs, training=False):
  class TFXLNetLMHead (line 336) | class TFXLNetLMHead(tf.keras.layers.Layer):
    method __init__ (line 337) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 344) | def build(self, input_shape):
    method call (line 348) | def call(self, hidden_states):
  class TFXLNetMainLayer (line 355) | class TFXLNetMainLayer(tf.keras.layers.Layer):
    method __init__ (line 358) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 380) | def get_input_embeddings(self):
    method build (line 383) | def build(self, input_shape):
    method _resize_token_embeddings (line 389) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 392) | def _prune_heads(self, heads_to_prune):
    method create_mask (line 395) | def create_mask(self, qlen, mlen, dtype=tf.float32):
    method cache_mem (line 424) | def cache_mem(self, curr_out, prev_mem):
    method positional_embedding (line 437) | def positional_embedding(pos_seq, inv_freq, bsz=None):
    method relative_positional_encoding (line 447) | def relative_positional_encoding(self, qlen, klen, bsz=None, dtype=None):
    method call (line 495) | def call(
  class TFXLNetPreTrainedModel (line 699) | class TFXLNetPreTrainedModel(TFPreTrainedModel):
  class TFXLNetModel (line 795) | class TFXLNetModel(TFXLNetPreTrainedModel):
    method __init__ (line 796) | def __init__(self, config, *inputs, **kwargs):
    method call (line 801) | def call(self, inputs, **kwargs):
  class TFXLNetLMHeadModel (line 844) | class TFXLNetLMHeadModel(TFXLNetPreTrainedModel):
    method __init__ (line 845) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 850) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 853) | def prepare_inputs_for_generation(self, inputs, past, **kwargs):
    method call (line 885) | def call(self, inputs, **kwargs):
  class TFXLNetForSequenceClassification (line 941) | class TFXLNetForSequenceClassification(TFXLNetPreTrainedModel):
    method __init__ (line 942) | def __init__(self, config, *inputs, **kwargs):
    method call (line 955) | def call(self, inputs, **kwargs):
  class TFXLNetForTokenClassification (line 1005) | class TFXLNetForTokenClassification(TFXLNetPreTrainedModel):
    method __init__ (line 1006) | def __init__(self, config, *inputs, **kwargs):
    method call (line 1015) | def call(self, inputs, **kwargs):
  class TFXLNetForQuestionAnsweringSimple (line 1064) | class TFXLNetForQuestionAnsweringSimple(TFXLNetPreTrainedModel):
    method __init__ (line 1065) | def __init__(self, config, *inputs, **kwargs):
    method call (line 1073) | def call(self, inputs, **kwargs):

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_transfo_xl.py
  function build_tf_to_pytorch_map (line 42) | def build_tf_to_pytorch_map(model, config):
  function load_tf_weights_in_transfo_xl (line 109) | def load_tf_weights_in_transfo_xl(model, config, tf_path):
  class PositionalEmbedding (line 167) | class PositionalEmbedding(nn.Module):
    method __init__ (line 168) | def __init__(self, demb):
    method forward (line 176) | def forward(self, pos_seq, bsz=None):
  class PositionwiseFF (line 186) | class PositionwiseFF(nn.Module):
    method __init__ (line 187) | def __init__(self, d_model, d_inner, dropout, pre_lnorm=False, layer_n...
    method forward (line 206) | def forward(self, inp):
  class RelPartialLearnableMultiHeadAttn (line 223) | class RelPartialLearnableMultiHeadAttn(nn.Module):
    method __init__ (line 224) | def __init__(
    method _rel_shift (line 269) | def _rel_shift(self, x):
    method forward (line 281) | def forward(self, w, r, attn_mask=None, mems=None, head_mask=None):
  class RelPartialLearnableDecoderLayer (line 370) | class RelPartialLearnableDecoderLayer(nn.Module):
    method __init__ (line 371) | def __init__(self, n_head, d_model, d_head, d_inner, dropout, layer_no...
    method forward (line 381) | def forward(self, dec_inp, r, dec_attn_mask=None, mems=None, head_mask...
  class AdaptiveEmbedding (line 391) | class AdaptiveEmbedding(nn.Module):
    method __init__ (line 392) | def __init__(self, n_token, d_embed, d_proj, cutoffs, div_val=1, sampl...
    method forward (line 419) | def forward(self, inp):
  class TransfoXLPreTrainedModel (line 451) | class TransfoXLPreTrainedModel(PreTrainedModel):
    method _init_weight (line 460) | def _init_weight(self, weight):
    method _init_bias (line 466) | def _init_bias(self, bias):
    method _init_weights (line 469) | def _init_weights(self, m):
  class TransfoXLModel (line 552) | class TransfoXLModel(TransfoXLPreTrainedModel):
    method __init__ (line 553) | def __init__(self, config):
    method get_input_embeddings (line 618) | def get_input_embeddings(self):
    method set_input_embeddings (line 621) | def set_input_embeddings(self, new_embeddings):
    method backward_compatible (line 624) | def backward_compatible(self):
    method reset_length (line 627) | def reset_length(self, tgt_len, ext_len, mem_len):
    method _prune_heads (line 632) | def _prune_heads(self, heads):
    method init_mems (line 636) | def init_mems(self, bsz):
    method _update_mems (line 648) | def _update_mems(self, hids, mems, mlen, qlen):
    method forward (line 673) | def forward(self, input_ids=None, mems=None, head_mask=None, inputs_em...
  class TransfoXLLMHeadModel (line 807) | class TransfoXLLMHeadModel(TransfoXLPreTrainedModel):
    method __init__ (line 808) | def __init__(self, config):
    method tie_weights (line 823) | def tie_weights(self):
    method reset_length (line 844) | def reset_length(self, tgt_len, ext_len, mem_len):
    method init_mems (line 847) | def init_mems(self, bsz):
    method forward (line 851) | def forward(self, input_ids=None, mems=None, head_mask=None, inputs_em...
    method get_output_embeddings (line 917) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 925) | def prepare_inputs_for_generation(self, input_ids, past, **model_kwargs):

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_transfo_xl_utilities.py
  class ProjectedAdaptiveLogSoftmax (line 30) | class ProjectedAdaptiveLogSoftmax(nn.Module):
    method __init__ (line 31) | def __init__(self, n_token, d_embed, d_proj, cutoffs, div_val=1, keep_...
    method _compute_logit (line 72) | def _compute_logit(self, hidden, weight, bias, proj):
    method forward (line 86) | def forward(self, hidden, labels=None, keep_order=False):
    method log_prob (line 193) | def log_prob(self, hidden):

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_utils.py
  class Identity (line 47) | class Identity(nn.Module):
    method __init__ (line 51) | def __init__(self, *args, **kwargs):
    method forward (line 54) | def forward(self, input):
  class ModuleUtilsMixin (line 58) | class ModuleUtilsMixin:
    method num_parameters (line 63) | def num_parameters(self, only_trainable: bool = False) -> int:
    method _hook_rss_memory_pre_forward (line 71) | def _hook_rss_memory_pre_forward(module, *args, **kwargs):
    method _hook_rss_memory_post_forward (line 83) | def _hook_rss_memory_post_forward(module, *args, **kwargs):
    method add_memory_hooks (line 96) | def add_memory_hooks(self):
    method reset_memory_hooks_state (line 105) | def reset_memory_hooks_state(self):
    method device (line 112) | def device(self) -> device:
    method dtype (line 130) | def dtype(self) -> dtype:
    method invert_attention_mask (line 147) | def invert_attention_mask(self, encoder_attention_mask: Tensor) -> Ten...
    method get_extended_attention_mask (line 173) | def get_extended_attention_mask(self, attention_mask: Tensor, input_sh...
    method get_head_mask (line 217) | def get_head_mask(self, head_mask: Tensor, num_hidden_layers: int, is_...
    method _convert_head_mask_to_5d (line 238) | def _convert_head_mask_to_5d(self, head_mask, num_hidden_layers):
  class PreTrainedModel (line 250) | class PreTrainedModel(nn.Module, ModuleUtilsMixin):
    method dummy_inputs (line 270) | def dummy_inputs(self):
    method __init__ (line 278) | def __init__(self, config, *inputs, **kwargs):
    method base_model (line 292) | def base_model(self):
    method get_input_embeddings (line 295) | def get_input_embeddings(self):
    method set_input_embeddings (line 309) | def set_input_embeddings(self, value: nn.Module):
    method get_output_embeddings (line 323) | def get_output_embeddings(self):
    method tie_weights (line 333) | def tie_weights(self):
    method _tie_or_clone_weights (line 343) | def _tie_or_clone_weights(self, output_embeddings, input_embeddings):
    method resize_token_embeddings (line 361) | def resize_token_embeddings(self, new_num_tokens: Optional[int] = None):
    method _resize_token_embeddings (line 388) | def _resize_token_embeddings(self, new_num_tokens):
    method _get_resized_embeddings (line 394) | def _get_resized_embeddings(
    method init_weights (line 432) | def init_weights(self):
    method prune_heads (line 444) | def prune_heads(self, heads_to_prune: Dict):
    method save_pretrained (line 459) | def save_pretrained(self, save_directory):
    method from_pretrained (line 494) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
    method prepare_inputs_for_generation (line 777) | def prepare_inputs_for_generation(self, input_ids, **kwargs):
    method prepare_logits_for_generation (line 780) | def prepare_logits_for_generation(self, logits, **kwargs):
    method _use_cache (line 783) | def _use_cache(self, outputs, use_cache):
    method enforce_repetition_penalty_ (line 791) | def enforce_repetition_penalty_(self, lprobs, batch_size, num_beams, p...
    method generate (line 802) | def generate(
    method _generate_no_beam_search (line 1186) | def _generate_no_beam_search(
    method _generate_beam_search (line 1307) | def _generate_beam_search(
    method _reorder_cache (line 1582) | def _reorder_cache(past: Tuple, beam_idx: Tensor) -> Tuple[Tensor]:
  function calc_banned_ngram_tokens (line 1586) | def calc_banned_ngram_tokens(prev_input_ids: Tensor, num_hypos: int, no_...
  function calc_banned_bad_words_ids (line 1609) | def calc_banned_bad_words_ids(prev_input_ids: Iterable[int], bad_words_i...
  function top_k_top_p_filtering (line 1645) | def top_k_top_p_filtering(
  class BeamHypotheses (line 1686) | class BeamHypotheses(object):
    method __init__ (line 1687) | def __init__(self, num_beams, max_length, length_penalty, early_stoppi...
    method __len__ (line 1698) | def __len__(self):
    method add (line 1704) | def add(self, hyp, sum_logprobs):
    method is_done (line 1718) | def is_done(self, best_sum_logprobs, cur_len=None):
  class Conv1D (line 1736) | class Conv1D(nn.Module):
    method __init__ (line 1737) | def __init__(self, nf, nx):
    method forward (line 1748) | def forward(self, x):
  class PoolerStartLogits (line 1755) | class PoolerStartLogits(nn.Module):
    method __init__ (line 1758) | def __init__(self, config):
    method forward (line 1762) | def forward(self, hidden_states, p_mask=None):
  class PoolerEndLogits (line 1779) | class PoolerEndLogits(nn.Module):
    method __init__ (line 1783) | def __init__(self, config):
    method forward (line 1790) | def forward(self, hidden_states, start_states=None, start_positions=No...
  class PoolerAnswerClass (line 1826) | class PoolerAnswerClass(nn.Module):
    method __init__ (line 1829) | def __init__(self, config):
    method forward (line 1835) | def forward(self, hidden_states, start_states=None, start_positions=No...
  class SQuADHead (line 1873) | class SQuADHead(nn.Module):
    method __init__ (line 1914) | def __init__(self, config):
    method forward (line 1923) | def forward(
  class SequenceSummary (line 1990) | class SequenceSummary(nn.Module):
    method __init__ (line 2006) | def __init__(self, config: PretrainedConfig):
    method forward (line 2035) | def forward(self, hidden_states, cls_index=None):
  function create_position_ids_from_input_ids (line 2067) | def create_position_ids_from_input_ids(input_ids, padding_idx):
  function prune_linear_layer (line 2081) | def prune_linear_layer(layer, index, dim=0):
  function prune_conv1d_layer (line 2106) | def prune_conv1d_layer(layer, index, dim=1):
  function prune_layer (line 2130) | def prune_layer(layer, index, dim=None):
  function apply_chunking_to_forward (line 2143) | def apply_chunking_to_forward(

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_xlm.py
  function create_sinusoidal_embeddings (line 52) | def create_sinusoidal_embeddings(n_pos, dim, out):
  function get_masks (line 60) | def get_masks(slen, lengths, causal, padding_mask=None):
  class MultiHeadAttention (line 85) | class MultiHeadAttention(nn.Module):
    method __init__ (line 89) | def __init__(self, n_heads, dim, config):
    method prune_heads (line 104) | def prune_heads(self, heads):
    method forward (line 125) | def forward(self, input, mask, kv=None, cache=None, head_mask=None):
  class TransformerFFN (line 189) | class TransformerFFN(nn.Module):
    method __init__ (line 190) | def __init__(self, in_dim, dim_hidden, out_dim, config):
    method forward (line 197) | def forward(self, input):
  class XLMPreTrainedModel (line 205) | class XLMPreTrainedModel(PreTrainedModel):
    method __init__ (line 214) | def __init__(self, *inputs, **kwargs):
    method dummy_inputs (line 218) | def dummy_inputs(self):
    method _init_weights (line 227) | def _init_weights(self, module):
  class XLMModel (line 313) | class XLMModel(XLMPreTrainedModel):
    method __init__ (line 314) | def __init__(self, config):  # , dico, is_encoder, with_output):
    method get_input_embeddings (line 384) | def get_input_embeddings(self):
    method set_input_embeddings (line 387) | def set_input_embeddings(self, new_embeddings):
    method _prune_heads (line 390) | def _prune_heads(self, heads_to_prune):
    method forward (line 399) | def forward(
  class XLMPredLayer (line 554) | class XLMPredLayer(nn.Module):
    method __init__ (line 559) | def __init__(self, config):
    method forward (line 577) | def forward(self, x, y=None):
  class XLMWithLMHeadModel (line 602) | class XLMWithLMHeadModel(XLMPreTrainedModel):
    method __init__ (line 603) | def __init__(self, config):
    method get_output_embeddings (line 610) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 613) | def prepare_inputs_for_generation(self, input_ids, **kwargs):
    method forward (line 627) | def forward(
  class XLMForSequenceClassification (line 702) | class XLMForSequenceClassification(XLMPreTrainedModel):
    method __init__ (line 703) | def __init__(self, config):
    method forward (line 713) | def forward(
  class XLMForQuestionAnsweringSimple (line 799) | class XLMForQuestionAnsweringSimple(XLMPreTrainedModel):
    method __init__ (line 800) | def __init__(self, config):
    method forward (line 809) | def forward(
  class XLMForQuestionAnswering (line 917) | class XLMForQuestionAnswering(XLMPreTrainedModel):
    method __init__ (line 918) | def __init__(self, config):
    method forward (line 927) | def forward(
  class XLMForTokenClassification (line 1034) | class XLMForTokenClassification(XLMPreTrainedModel):
    method __init__ (line 1035) | def __init__(self, config):
    method forward (line 1046) | def forward(

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_xlm_roberta.py
  class XLMRobertaModel (line 62) | class XLMRobertaModel(RobertaModel):
  class XLMRobertaForMaskedLM (line 74) | class XLMRobertaForMaskedLM(RobertaForMaskedLM):
  class XLMRobertaForSequenceClassification (line 88) | class XLMRobertaForSequenceClassification(RobertaForSequenceClassificati...
  class XLMRobertaForMultipleChoice (line 102) | class XLMRobertaForMultipleChoice(RobertaForMultipleChoice):
  class XLMRobertaForTokenClassification (line 116) | class XLMRobertaForTokenClassification(RobertaForTokenClassification):

FILE: code/nezha-base-count3/pretrain/transformers1/modeling_xlnet.py
  function build_tf_xlnet_to_pytorch_map (line 42) | def build_tf_xlnet_to_pytorch_map(model, config, tf_weights=None):
  function load_tf_weights_in_xlnet (line 125) | def load_tf_weights_in_xlnet(model, config, tf_path):
  class XLNetRelativeAttention (line 193) | class XLNetRelativeAttention(nn.Module):
    method __init__ (line 194) | def __init__(self, config):
    method prune_heads (line 223) | def prune_heads(self, heads):
    method rel_shift (line 227) | def rel_shift(x, klen=-1):
    method rel_shift_bnij (line 240) | def rel_shift_bnij(x, klen=-1):
    method rel_attn_core (line 254) | def rel_attn_core(self, q_head, k_head_h, v_head_h, k_head_r, seg_mat=...
    method post_attention (line 296) | def post_attention(self, h, attn_vec, residual=True):
    method forward (line 308) | def forward(self, h, g, attn_mask_h, attn_mask_g, r, seg_mat, mems=Non...
  class XLNetFeedForward (line 403) | class XLNetFeedForward(nn.Module):
    method __init__ (line 404) | def __init__(self, config):
    method forward (line 415) | def forward(self, inp):
  class XLNetLayer (line 426) | class XLNetLayer(nn.Module):
    method __init__ (line 427) | def __init__(self, config):
    method forward (line 433) | def forward(
  class XLNetPreTrainedModel (line 457) | class XLNetPreTrainedModel(PreTrainedModel):
    method _init_weights (line 466) | def _init_weights(self, module):
  class XLNetModel (line 568) | class XLNetModel(XLNetPreTrainedModel):
    method __init__ (line 569) | def __init__(self, config):
    method get_input_embeddings (line 590) | def get_input_embeddings(self):
    method set_input_embeddings (line 593) | def set_input_embeddings(self, new_embeddings):
    method _prune_heads (line 596) | def _prune_heads(self, heads_to_prune):
    method create_mask (line 599) | def create_mask(self, qlen, mlen):
    method cache_mem (line 629) | def cache_mem(self, curr_out, prev_mem):
    method positional_embedding (line 642) | def positional_embedding(pos_seq, inv_freq, bsz=None):
    method relative_positional_encoding (line 652) | def relative_positional_encoding(self, qlen, klen, bsz=None):
    method forward (line 692) | def forward(
  class XLNetLMHeadModel (line 927) | class XLNetLMHeadModel(XLNetPreTrainedModel):
    method __init__ (line 928) | def __init__(self, config):
    method get_output_embeddings (line 938) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 941) | def prepare_inputs_for_generation(self, input_ids, past, **kwargs):
    method forward (line 975) | def forward(
  class XLNetForSequenceClassification (line 1083) | class XLNetForSequenceClassification(XLNetPreTrainedModel):
    method __init__ (line 1084) | def __init__(self, config):
    method forward (line 1095) | def forward(
  class XLNetForTokenClassification (line 1189) | class XLNetForTokenClassification(XLNetPreTrainedModel):
    method __init__ (line 1190) | def __init__(self, config):
    method forward (line 1200) | def forward(
  class XLNetForMultipleChoice (line 1298) | class XLNetForMultipleChoice(XLNetPreTrainedModel):
    method __init__ (line 1299) | def __init__(self, config):
    method forward (line 1309) | def forward(
  class XLNetForQuestionAnsweringSimple (line 1411) | class XLNetForQuestionAnsweringSimple(XLNetPreTrainedModel):
    method __init__ (line 1412) | def __init__(self, config):
    method forward (line 1422) | def forward(
  class XLNetForQuestionAnswering (line 1534) | class XLNetForQuestionAnswering(XLNetPreTrainedModel):
    method __init__ (line 1535) | def __init__(self, config):
    method forward (line 1548) | def forward(

FILE: code/nezha-base-count3/pretrain/transformers1/optimization.py
  function get_constant_schedule (line 28) | def get_constant_schedule(optimizer, last_epoch=-1):
  function get_constant_schedule_with_warmup (line 34) | def get_constant_schedule_with_warmup(optimizer, num_warmup_steps, last_...
  function get_linear_schedule_with_warmup (line 47) | def get_linear_schedule_with_warmup(optimizer, num_warmup_steps, num_tra...
  function get_cosine_schedule_with_warmup (line 62) | def get_cosine_schedule_with_warmup(optimizer, num_warmup_steps, num_tra...
  function get_cosine_with_hard_restarts_schedule_with_warmup (line 77) | def get_cosine_with_hard_restarts_schedule_with_warmup(
  class AdamW (line 96) | class AdamW(Optimizer):
    method __init__ (line 107) | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-6, weig...
    method step (line 119) | def step(self, closure=None):

FILE: code/nezha-base-count3/pretrain/transformers1/optimization_tf.py
  class WarmUp (line 23) | class WarmUp(tf.keras.optimizers.schedules.LearningRateSchedule):
    method __init__ (line 26) | def __init__(
    method __call__ (line 36) | def __call__(self, step):
    method get_config (line 51) | def get_config(self):
  function create_optimizer (line 61) | def create_optimizer(init_lr, num_train_steps, num_warmup_steps, end_lr=...
  class AdamWeightDecay (line 84) | class AdamWeightDecay(tf.keras.optimizers.Adam):
    method __init__ (line 94) | def __init__(
    method from_config (line 113) | def from_config(cls, config):
    method _prepare_local (line 118) | def _prepare_local(self, var_device, var_dtype, apply_state):
    method _decay_weights_op (line 124) | def _decay_weights_op(self, var, learning_rate, apply_state):
    method apply_gradients (line 133) | def apply_gradients(self, grads_and_vars, name=None):
    method _get_lr (line 137) | def _get_lr(self, var_device, var_dtype, apply_state):
    method _resource_apply_dense (line 150) | def _resource_apply_dense(self, grad, var, apply_state=None):
    method _resource_apply_sparse (line 156) | def _resource_apply_sparse(self, grad, var, indices, apply_state=None):
    method get_config (line 162) | def get_config(self):
    method _do_use_weight_decay (line 167) | def _do_use_weight_decay(self, param_name):
  class GradientAccumulator (line 185) | class GradientAccumulator(object):
    method __init__ (line 197) | def __init__(self):
    method step (line 203) | def step(self):
    method gradients (line 216) | def gradients(self):
    method __call__ (line 222) | def __call__(self, gradients):
    method reset (line 248) | def reset(self):

FILE: code/nezha-base-count3/pretrain/transformers1/pipelines.py
  function get_framework (line 69) | def get_framework(model=None):
  class ArgumentHandler (line 89) | class ArgumentHandler(ABC):
    method __call__ (line 95) | def __call__(self, *args, **kwargs):
  class DefaultArgumentHandler (line 99) | class DefaultArgumentHandler(ArgumentHandler):
    method handle_kwargs (line 105) | def handle_kwargs(kwargs: Dict) -> List:
    method handle_args (line 114) | def handle_args(args: Sequence[Any]) -> List[str]:
    method __call__ (line 140) | def __call__(self, *args, **kwargs):
  class PipelineDataFormat (line 150) | class PipelineDataFormat:
    method __init__ (line 164) | def __init__(
    method __iter__ (line 184) | def __iter__(self):
    method save (line 188) | def save(self, data: dict):
    method save_binary (line 196) | def save_binary(self, data: Union[dict, List[dict]]) -> str:
    method from_str (line 211) | def from_str(
  class CsvPipelineDataFormat (line 224) | class CsvPipelineDataFormat(PipelineDataFormat):
    method __init__ (line 225) | def __init__(
    method __iter__ (line 230) | def __iter__(self):
    method save (line 239) | def save(self, data: List[dict]):
  class JsonPipelineDataFormat (line 247) | class JsonPipelineDataFormat(PipelineDataFormat):
    method __init__ (line 248) | def __init__(
    method __iter__ (line 256) | def __iter__(self):
    method save (line 263) | def save(self, data: dict):
  class PipedPipelineDataFormat (line 268) | class PipedPipelineDataFormat(PipelineDataFormat):
    method __iter__ (line 276) | def __iter__(self):
    method save (line 292) | def save(self, data: dict):
    method save_binary (line 295) | def save_binary(self, data: Union[dict, List[dict]]) -> str:
  class _ScikitCompat (line 305) | class _ScikitCompat(ABC):
    method transform (line 311) | def transform(self, X):
    method predict (line 315) | def predict(self, X):
  class Pipeline (line 319) | class Pipeline(_ScikitCompat):
    method __init__ (line 370) | def __init__(
    method save_pretrained (line 402) | def save_pretrained(self, save_directory):
    method transform (line 415) | def transform(self, X):
    method predict (line 421) | def predict(self, X):
    method device_placement (line 428) | def device_placement(self):
    method ensure_tensor_on_device (line 449) | def ensure_tensor_on_device(self, **inputs):
    method _parse_and_tokenize (line 457) | def _parse_and_tokenize(self, *args, pad_to_max_length=True, add_speci...
    method __call__ (line 472) | def __call__(self, *args, **kwargs):
    method _forward (line 476) | def _forward(self, inputs, return_tensors=False):
  class FeatureExtractionPipeline (line 501) | class FeatureExtractionPipeline(Pipeline):
    method __init__ (line 537) | def __init__(
    method __call__ (line 558) | def __call__(self, *args, **kwargs):
  class TextGenerationPipeline (line 562) | class TextGenerationPipeline(Pipeline):
    method __call__ (line 606) | def __call__(
  class TextClassificationPipeline (line 683) | class TextClassificationPipeline(Pipeline):
    method __call__ (line 720) | def __call__(self, *args, **kwargs):
  class FillMaskPipeline (line 726) | class FillMaskPipeline(Pipeline):
    method __init__ (line 764) | def __init__(
    method __call__ (line 788) | def __call__(self, *args, **kwargs):
  class NerPipeline (line 826) | class NerPipeline(Pipeline):
    method __init__ (line 865) | def __init__(
    method __call__ (line 893) | def __call__(self, *args, **kwargs):
    method group_entities (line 973) | def group_entities(self, entities):
  class QuestionAnsweringArgumentHandler (line 993) | class QuestionAnsweringArgumentHandler(ArgumentHandler):
    method __call__ (line 1002) | def __call__(self, *args, **kwargs):
  class QuestionAnsweringPipeline (line 1055) | class QuestionAnsweringPipeline(Pipeline):
    method __init__ (line 1094) | def __init__(
    method create_sample (line 1116) | def create_sample(
    method __call__ (line 1135) | def __call__(self, *args, **kwargs):
    method decode (line 1240) | def decode(self, start: np.ndarray, end: np.ndarray, topk: int, max_an...
    method span_to_answer (line 1280) | def span_to_answer(self, text: str, start: int, end: int):
  class SummarizationPipeline (line 1325) | class SummarizationPipeline(Pipeline):
    method __call__ (line 1373) | def __call__(
  class TranslationPipeline (line 1462) | class TranslationPipeline(Pipeline):
    method __call__ (line 1501) | def __call__(
  function pipeline (line 1677) | def pipeline(

FILE: code/nezha-base-count3/pretrain/transformers1/tokenization_albert.py
  class AlbertTokenizer (line 57) | class AlbertTokenizer(PreTrainedTokenizer):
    method __init__ (line 114) | def __init__(
    method vocab_size (line 158) | def vocab_size(self):
    method get_vocab (line 161) | def get_vocab(self):
    method __getstate__ (line 166) | def __getstate__(self):
    method __setstate__ (line 171) | def __setstate__(self, d):
    method preprocess_text (line 184) | def preprocess_text(self, inputs):
    method _tokenize (line 199) | def _tokenize(self, text, sample=False):
    method _convert_token_to_id (line 223) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 227) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 231) | def convert_tokens_to_string(self, tokens):
    method build_inputs_with_special_tokens (line 235) | def build_inputs_with_special_tokens(
    method get_special_tokens_mask (line 261) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 292) | def create_token_type_ids_from_sequences(
    method save_vocabulary (line 323) | def save_vocabulary(self, save_directory):

FILE: code/nezha-base-count3/pretrain/transformers1/tokenization_auto.py
  class AutoTokenizer (line 94) | class AutoTokenizer:
    method __init__ (line 122) | def __init__(self):
    method from_pretrained (line 129) | def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwa...

FILE: code/nezha-base-count3/pretrain/transformers1/tokenization_bart.py
  class BartTokenizer (line 36) | class BartTokenizer(RobertaTokenizer):
  class MBartTokenizer (line 49) | class MBartTokenizer(XLMRobertaTokenizer):

FILE: code/nezha-base-count3/pretrain/transformers1/tokenization_bert.py
  function load_vocab (line 99) | def load_vocab(vocab_file):
  function whitespace_tokenize (line 110) | def whitespace_tokenize(text):
  class BertTokenizer (line 119) | class BertTokenizer(PreTrainedTokenizer):
    method __init__ (line 163) | def __init__(
    method vocab_size (line 201) | def vocab_size(self):
    method get_vocab (line 204) | def get_vocab(self):
    method _tokenize (line 207) | def _tokenize(self, text):
    method _convert_token_to_id (line 217) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 221) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 225) | def convert_tokens_to_string(self, tokens):
    method build_inputs_with_special_tokens (line 230) | def build_inputs_with_special_tokens(
    method get_special_tokens_mask (line 256) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 287) | def create_token_type_ids_from_sequences(
    method save_vocabulary (line 317) | def save_vocabulary(self, vocab_path):
  class BasicTokenizer (line 346) | class BasicTokenizer(object):
    method __init__ (line 349) | def __init__(self, do_lower_case=True, never_split=None, tokenize_chin...
    method tokenize (line 369) | def tokenize(self, text, never_split=None):
    method _run_strip_accents (line 400) | def _run_strip_accents(self, text):
    method _run_split_on_punc (line 411) | def _run_split_on_punc(self, text, never_split=None):
    method _tokenize_chinese_chars (line 433) | def _tokenize_chinese_chars(self, text):
    method _is_chinese_char (line 446) | def _is_chinese_char(self, cp):
    method _clean_text (line 470) | def _clean_text(self, text):
  class WordpieceTokenizer (line 484) | class WordpieceTokenizer(object):
    method __init__ (line 487) | def __init__(self, vocab, unk_token, max_input_chars_per_word=100):
    method tokenize (line 492) | def tokenize(self, text):
  function _is_whitespace (line 544) | def _is_whitespace(char):
  function _is_control (line 556) | def _is_control(char):
  function _is_punctuation (line 568) | def _is_punctuation(char):
  class BertTokenizerFast (line 583) | class BertTokenizerFast(PreTrainedTokenizerFast):
    method __init__ (line 631) | def __init__(
    method build_inputs_with_special_tokens (line 668) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No...
    method create_token_type_ids_from_sequences (line 676) | def create_token_type_ids_from_sequences(

FILE: code/nezha-base-count3/pretrain/transformers1/tokenization_bert_japanese.py
  class BertJapaneseTokenizer (line 71) | class BertJapaneseTokenizer(BertTokenizer):
    method __init__ (line 79) | def __init__(
    method _tokenize (line 153) | def _tokenize(self, text):
  class MecabTokenizer (line 167) | class MecabTokenizer:
    method __init__ (line 170) | def __init__(self, do_lower_case=False, never_split=None, normalize_te...
    method tokenize (line 192) | def tokenize(self, text, never_split=None, **kwargs):
  class CharacterTokenizer (line 219) | class CharacterTokenizer(object):
    method __init__ (line 222) | def __init__(self, vocab, unk_token, normalize_text=True):
    method tokenize (line 237) | def tokenize(self, text):

FILE: code/nezha-base-count3/pretrain/transformers1/tokenization_camembert.py
  class CamembertTokenizer (line 51) | class CamembertTokenizer(PreTrainedTokenizer):
    method __init__ (line 107) | def __init__(
    method build_inputs_with_special_tokens (line 142) | def build_inputs_with_special_tokens(
    method get_special_tokens_mask (line 169) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 199) | def create_token_type_ids_from_sequences(
    method vocab_size (line 224) | def vocab_size(self):
    method _tokenize (line 227) | def _tokenize(self, text):
    method _convert_token_to_id (line 230) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 239) | def _convert_id_to_token(self, index):
    method __getstate__ (line 245) | def __getstate__(self):
    method __setstate__ (line 250) | def __setstate__(self, d):
    method convert_tokens_to_string (line 263) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 268) | def save_vocabulary(self, save_directory):

FILE: code/nezha-base-count3/pretrain/transformers1/tokenization_ctrl.py
  function get_pairs (line 102) | def get_pairs(word):
  class CTRLTokenizer (line 117) | class CTRLTokenizer(PreTrainedTokenizer):
    method __init__ (line 141) | def __init__(self, vocab_file, merges_file, unk_token="<unk>", **kwargs):
    method vocab_size (line 154) | def vocab_size(self):
    method get_vocab (line 157) | def get_vocab(self):
    method bpe (line 160) | def bpe(self, token):
    method _tokenize (line 204) | def _tokenize(self, text):
    method _convert_token_to_id (line 215) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 219) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 223) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 228) | def save_vocabulary(self, save_directory):

FILE: code/nezha-base-count3/pretrain/transformers1/tokenization_distilbert.py
  class DistilBertTokenizer (line 58) | class DistilBertTokenizer(BertTokenizer):
  class DistilBertTokenizerFast (line 76) | class DistilBertTokenizerFast(BertTokenizerFast):

FILE: code/nezha-base-count3/pretrain/transformers1/tokenization_electra.py
  class ElectraTokenizer (line 52) | class ElectraTokenizer(BertTokenizer):
  class ElectraTokenizerFast (line 68) | class ElectraTokenizerFast(BertTokenizerFast):

FILE: code/nezha-base-count3/pretrain/transformers1/tokenization_flaubert.py
  function convert_to_unicode (line 63) | def convert_to_unicode(text):
  class FlaubertTokenizer (line 79) | class FlaubertTokenizer(XLMTokenizer):
    method __init__ (line 98) | def __init__(self, do_lowercase=False, **kwargs):
    method preprocess_text (line 103) | def preprocess_text(self, text):
    method _tokenize (line 113) | def _tokenize(self, text, bypass_tokenizer=False):

FILE: code/nezha-base-count3/pretrain/transformers1/tokenization_gpt2.py
  function bytes_to_unicode (line 63) | def bytes_to_unicode():
  function get_pairs (line 88) | def get_pairs(word):
  class GPT2Tokenizer (line 101) | class GPT2Tokenizer(PreTrainedTokenizer):
    method __init__ (line 139) | def __init__(
    method vocab_size (line 167) | def vocab_size(self):
    method get_vocab (line 170) | def get_vocab(self):
    method bpe (line 173) | def bpe(self, token):
    method _tokenize (line 215) | def _tokenize(self, text):
    method _convert_token_to_id (line 225) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 229) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 233) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 239) | def save_vocabulary(self, save_directory):
    method prepare_for_tokenization (line 274) | def prepare_for_tokenization(self, text, **kwargs):
  class GPT2TokenizerFast (line 280) | class GPT2TokenizerFast(PreTrainedTokenizerFast):
    method __init__ (line 326) | def __init__(

FILE: code/nezha-base-count3/pretrain/transformers1/tokenization_longformer.py
  class LongformerTokenizer (line 45) | class LongformerTokenizer(RobertaTokenizer):
  class LongformerTokenizerFast (line 54) | class LongformerTokenizerFast(RobertaTokenizerFast):

FILE: code/nezha-base-count3/pretrain/transformers1/tokenization_marian.py
  class MarianTokenizer (line 28) | class MarianTokenizer(PreTrainedTokenizer):
    method __init__ (line 49) | def __init__(
    method _setup_normalizer (line 91) | def _setup_normalizer(self):
    method normalize (line 100) | def normalize(self, x: str) -> str:
    method _convert_token_to_id (line 104) | def _convert_token_to_id(self, token):
    method remove_language_code (line 107) | def remove_language_code(self, text: str):
    method _tokenize (line 113) | def _tokenize(self, text: str) -> List[str]:
    method _convert_id_to_token (line 118) | def _convert_id_to_token(self, index: int) -> str:
    method convert_tokens_to_string (line 122) | def convert_tokens_to_string(self, tokens: List[str]) -> str:
    method build_inputs_with_special_tokens (line 126) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No...
    method prepare_translation_batch (line 133) | def prepare_translation_batch(
    method vocab_size (line 182) | def vocab_size(self) -> int:
    method save_vocabulary (line 185) | def save_vocabulary(self, save_directory: str) -> Tuple[str]:
    method get_vocab (line 197) | def get_vocab(self) -> Dict:
    method __getstate__ (line 202) | def __getstate__(self) -> Dict:
    method __setstate__ (line 207) | def __setstate__(self, d: Dict) -> None:
    method num_special_tokens_to_add (line 213) | def num_special_tokens_to_add(self, **unused):
    method _special_token_mask (line 217) | def _special_token_mask(self, seq):
    method get_special_tokens_mask (line 222) | def get_special_tokens_mask(
  function load_spm (line 234) | def load_spm(path: str) -> sentencepiece.SentencePieceProcessor:
  function save_json (line 240) | def save_json(data, path: str) -> None:
  function load_json (line 245) | def load_json(path: str) -> Union[Dict, List]:

FILE: code/nezha-base-count3/pretrain/transformers1/tokenization_openai.py
  function get_pairs (line 46) | def get_pairs(word):
  function text_standardize (line 59) | def text_standardize(text):
  class OpenAIGPTTokenizer (line 75) | class OpenAIGPTTokenizer(PreTrainedTokenizer):
    method __init__ (line 99) | def __init__(self, vocab_file, merges_file, unk_token="<unk>", **kwargs):
    method vocab_size (line 124) | def vocab_size(self):
    method get_vocab (line 127) | def get_vocab(self):
    method bpe (line 130) | def bpe(self, token):
    method _tokenize (line 174) | def _tokenize(self, text):
    method _convert_token_to_id (line 189) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 193) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 197) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 202) | def save_vocabulary(self, save_directory):
  class OpenAIGPTTokenizerFast (line 238) | class OpenAIGPTTokenizerFast(PreTrainedTokenizerFast):
    method __init__ (line 264) | def __init__(self, vocab_file, merges_file, unk_token="<unk>", **kwargs):

FILE: code/nezha-base-count3/pretrain/transformers1/tokenization_reformer.py
  class ReformerTokenizer (line 54) | class ReformerTokenizer(PreTrainedTokenizer):
    method __init__ (line 85) | def __init__(
    method vocab_size (line 117) | def vocab_size(self):
    method get_vocab (line 120) | def get_vocab(self):
    method __getstate__ (line 125) | def __getstate__(self):
    method __setstate__ (line 130) | def __setstate__(self, d):
    method _tokenize (line 143) | def _tokenize(self, text, sample=False):
    method _convert_token_to_id (line 152) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 156) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 162) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 167) | def save_vocabulary(self, save_directory):

FILE: code/nezha-base-count3/pretrain/transformers1/tokenization_roberta.py
  class RobertaTokenizer (line 64) | class RobertaTokenizer(GPT2Tokenizer):
    method __init__ (line 126) | def __init__(
    method build_inputs_with_special_tokens (line 154) | def build_inputs_with_special_tokens(
    method get_special_tokens_mask (line 180) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 210) | def create_token_type_ids_from_sequences(
    method prepare_for_tokenization (line 234) | def prepare_for_tokenization(self, text, add_special_tokens=False, **k...
  class RobertaTokenizerFast (line 244) | class RobertaTokenizerFast(GPT2TokenizerFast):
    method __init__ (line 291) | def __init__(
    method mask_token (line 333) | def mask_token(self, value):
    method build_inputs_with_special_tokens (line 340) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No...
    method create_token_type_ids_from_sequences (line 347) | def create_token_type_ids_from_sequences(

FILE: code/nezha-base-count3/pretrain/transformers1/tokenization_t5.py
  class T5Tokenizer (line 62) | class T5Tokenizer(PreTrainedTokenizer):
    method __init__ (line 98) | def __init__(
    method vocab_size (line 139) | def vocab_size(self):
    method get_vocab (line 142) | def get_vocab(self):
    method __getstate__ (line 147) | def __getstate__(self):
    method __setstate__ (line 152) | def __setstate__(self, d):
    method _tokenize (line 165) | def _tokenize(self, text, sample=False):
    method _convert_token_to_id (line 174) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 182) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 190) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 195) | def save_vocabulary(self, save_directory):

FILE: code/nezha-base-count3/pretrain/transformers1/tokenization_transfo_xl.py
  class TransfoXLTokenizer (line 72) | class TransfoXLTokenizer(PreTrainedTokenizer):
    method __init__ (line 85) | def __init__(
    method _compile_space_around_punctuation_pattern (line 141) | def _compile_space_around_punctuation_pattern(self):
    method count_file (line 146) | def count_file(self, path, verbose=False, add_eos=False):
    method count_sents (line 162) | def count_sents(self, sents, verbose=False):
    method _build_from_file (line 173) | def _build_from_file(self, vocab_file):
    method save_vocabulary (line 188) | def save_vocabulary(self, vocab_path):
    method build_vocab (line 212) | def build_vocab(self):
    method encode_file (line 232) | def encode_file(self, path, ordered=False, verbose=False, add_eos=True...
    method encode_sents (line 249) | def encode_sents(self, sents, ordered=False, verbose=False):
    method add_special (line 263) | def add_special(self, sym):
    method add_symbol (line 269) | def add_symbol(self, sym):
    method _convert_id_to_token (line 274) | def _convert_id_to_token(self, idx):
    method _convert_token_to_id (line 279) | def _convert_token_to_id(self, sym):
    method convert_tokens_to_string (line 296) | def convert_tokens_to_string(self, tokens):
    method convert_to_tensor (line 301) | def convert_to_tensor(self, symbols):
    method vocab_size (line 305) | def vocab_size(self):
    method get_vocab (line 308) | def get_vocab(self):
    method _tokenize (line 311) | def _tokenize(self, line, add_eos=False, add_double_eos=False):
    method prepare_for_tokenization (line 330) | def prepare_for_tokenization(self, text, **kwargs):
  class _TransfoXLDelimiterLookupTokenizer (line 344) | class _TransfoXLDelimiterLookupTokenizer(BaseTokenizer):
    method __init__ (line 345) | def __init__(
  class TransfoXLTokenizerFast (line 405) | class TransfoXLTokenizerFast(PreTrainedTokenizerFast):
    method __init__ (line 422) | def __init__(
    method save_pretrained (line 458) | def save_pretrained(self, save_directory):
  class LMOrderedIterator (line 467) | class LMOrderedIterator(object):
    method __init__ (line 468) | def __init__(self, data, bsz, bptt, device="cpu", ext_len=None):
    method get_batch (line 490) | def get_batch(self, i, bptt=None):
    method get_fixlen_iter (line 506) | def get_fixlen_iter(self, start=0):
    method get_varlen_iter (line 510) | def get_varlen_iter(self, start=0, std=5, min_len=5, max_deviation=3):
    method __iter__ (line 522) | def __iter__(self):
  class LMShuffledIterator (line 526) | class LMShuffledIterator(object):
    method __init__ (line 527) | def __init__(self, data, bsz, bptt, device="cpu", ext_len=None, shuffl...
    method get_sent_stream (line 540) | def get_sent_stream(self):
    method stream_iterator (line 548) | def stream_iterator(self, sent_stream):
    method __iter__ (line 595) | def __iter__(self):
  class LMMultiFileIterator (line 603) | class LMMultiFileIterator(LMShuffledIterator):
    method __init__ (line 604) | def __init__(self, paths, vocab, bsz, bptt, device="cpu", ext_len=None...
    method get_sent_stream (line 616) | def get_sent_stream(self, path):
    method __iter__ (line 624) | def __iter__(self):
  class TransfoXLCorpus (line 635) | class TransfoXLCorpus(object):
    method from_pretrained (line 637) | def from_pretrained(cls, pretrained_model_name_or_path, cache_dir=None...
    method __init__ (line 680) | def __init__(self, *args, **kwargs):
    method build_corpus (line 687) | def build_corpus(self, path, dataset):
    method get_iterator (line 721) | def get_iterator(self, split, *args, **kwargs):
  function get_lm_corpus (line 738) | def get_lm_corpus(datadir, dataset):

FILE: code/nezha-base-count3/pretrain/transformers1/tokenization_utils.py
  class CharSpan (line 61) | class CharSpan(NamedTuple):
  class TokenSpan (line 73) | class TokenSpan(NamedTuple):
  function flatten (line 85) | def flatten(x: Sequence):
  function truncate_and_pad (line 100) | def truncate_and_pad(
  class BatchEncoding (line 164) | class BatchEncoding(UserDict):
    method __init__ (line 177) | def __init__(
    method __getitem__ (line 189) | def __getitem__(self, item: Union[int, str]) -> EncodingFast:
    method __getattr__ (line 203) | def __getattr__(self, item: str):
    method keys (line 206) | def keys(self):
    method values (line 209) | def values(self):
    method items (line 212) | def items(self):
    method encodings (line 220) | def encodings(self) -> Optional[List[EncodingFast]]:
    method tokens (line 228) | def tokens(self, batch_index: int = 0) -> List[int]:
    method words (line 233) | def words(self, batch_index: int = 0) -> List[Optional[int]]:
    method token_to_word (line 238) | def token_to_word(self, batch_or_token_index: int, token_index: Option...
    method word_to_tokens (line 277) | def word_to_tokens(self, batch_or_word_index: int, word_index: Optiona...
    method token_to_chars (line 322) | def token_to_chars(self, batch_or_token_index: int, token_index: Optio...
    method char_to_token (line 359) | def char_to_token(self, batch_or_char_index: int, char_index: Optional...
    method word_to_chars (line 394) | def word_to_chars(self, batch_or_word_index: int, word_index: Optional...
    method char_to_word (line 431) | def char_to_word(self, batch_or_char_index: int, char_index: Optional[...
    method to (line 467) | def to(self, device: str):
  class SpecialTokensMixin (line 473) | class SpecialTokensMixin:
    method __init__ (line 491) | def __init__(self, **kwargs):
    method bos_token (line 517) | def bos_token(self):
    method eos_token (line 524) | def eos_token(self):
    method unk_token (line 531) | def unk_token(self):
    method sep_token (line 538) | def sep_token(self):
    method pad_token (line 545) | def pad_token(self):
    method cls_token (line 552) | def cls_token(self):
    method mask_token (line 559) | def mask_token(self):
    method additional_special_tokens (line 566) | def additional_special_tokens(self):
    method _maybe_update_backend (line 572) | def _maybe_update_backend(self, value):
    method bos_token (line 577) | def bos_token(self, value):
    method eos_token (line 582) | def eos_token(self, value):
    method unk_token (line 587) | def unk_token(self, value):
    method sep_token (line 592) | def sep_token(self, value):
    method pad_token (line 597) | def pad_token(self, value):
    method cls_token (line 602) | def cls_token(self, value):
    method mask_token (line 607) | def mask_token(self, value):
    method additional_special_tokens (line 612) | def additional_special_tokens(self, value):
    method bos_token_id (line 617) | def bos_token_id(self):
    method eos_token_id (line 622) | def eos_token_id(self):
    method unk_token_id (line 627) | def unk_token_id(self):
    method sep_token_id (line 632) | def sep_token_id(self):
    method pad_token_id (line 637) | def pad_token_id(self):
    method pad_token_type_id (line 642) | def pad_token_type_id(self):
    method cls_token_id (line 647) | def cls_token_id(self):
    method mask_token_id (line 652) | def mask_token_id(self):
    method additional_special_tokens_ids (line 657) | def additional_special_tokens_ids(self):
    method special_tokens_map (line 662) | def special_tokens_map(self):
    method all_special_tokens (line 674) | def all_special_tokens(self):
    method all_special_ids (line 686) | def all_special_ids(self):
  class PreTrainedTokenizer (line 695) | class PreTrainedTokenizer(SpecialTokensMixin):
    method vocab_size (line 771) | def vocab_size(self) -> int:
    method is_fast (line 776) | def is_fast(self) -> bool:
    method max_len (line 780) | def max_len(self) -> int:
    method max_len_single_sentence (line 787) | def max_len_single_sentence(self) -> int:
    method max_len_sentences_pair (line 791) | def max_len_sentences_pair(self) -> int:
    method max_len_single_sentence (line 795) | def max_len_single_sentence(self, value) -> int:
    method max_len_sentences_pair (line 807) | def max_len_sentences_pair(self, value) -> int:
    method get_vocab (line 818) | def get_vocab(self):
    method __init__ (line 822) | def __init__(self, model_max_length=None, **kwargs):
    method __len__ (line 854) | def __len__(self):
    method from_pretrained (line 859) | def from_pretrained(cls, *inputs, **kwargs):
    method _from_pretrained (line 914) | def _from_pretrained(cls, pretrained_model_name_or_path, *init_inputs,...
    method save_pretrained (line 1087) | def save_pretrained(self, save_directory):
    method save_vocabulary (line 1128) | def save_vocabulary(self, save_directory) -> Tuple[str]:
    method add_tokens (line 1138) | def add_tokens(self, new_tokens: Union[str, List[str]]) -> int:
    method num_special_tokens_to_add (line 1187) | def num_special_tokens_to_add(self, pair=False):
    method add_special_tokens (line 1206) | def add_special_tokens(self, special_tokens_dict):
    method tokenize (line 1260) | def tokenize(self, text: TextInput, **kwargs):
    method _tokenize (line 1332) | def _tokenize(self, text, **kwargs):
    method convert_tokens_to_ids (line 1341) | def convert_tokens_to_ids(self, tokens):
    method _convert_token_to_id_with_added_voc (line 1356) | def _convert_token_to_id_with_added_voc(self, token):
    method _convert_token_to_id (line 1364) | def _convert_token_to_id(self, token):
    method encode (line 1367) | def encode(
    method encode_plus (line 1439) | def encode_plus(
    method batch_encode_plus (line 1594) | def batch_encode_plus(
    method convert_to_tensors_ (line 1789) | def convert_to_tensors_(self, batch_outputs: dict, return_tensors: str...
    method prepare_for_model (line 1818) | def prepare_for_model(
    method prepare_for_tokenization (line 2018) | def prepare_for_tokenization(self, text: str, **kwargs) -> str:
    method truncate_sequences (line 2022) | def truncate_sequences(
    method create_token_type_ids_from_sequences (line 2082) | def create_token_type_ids_from_sequences(self, token_ids_0: List, toke...
    method build_inputs_with_special_tokens (line 2087) | def build_inputs_with_special_tokens(self, token_ids_0: List, token_id...
    method get_special_tokens_mask (line 2096) | def get_special_tokens_mask(
    method convert_ids_to_tokens (line 2115) | def convert_ids_to_tokens(
    method _convert_id_to_token (line 2140) | def _convert_id_to_token(self, index: int) -> str:
    method convert_tokens_to_string (line 2143) | def convert_tokens_to_string(self, tokens: List[str]) -> str:
    method decode (line 2150) | def decode(
    method batch_decode (line 2190) | def batch_decode(self, sequences: List[List[int]], **kwargs) -> List[s...
    method clean_up_tokenization (line 2194) | def clean_up_tokenization(out_string: str) -> str:
  class PreTrainedTokenizerFast (line 2212) | class PreTrainedTokenizerFast(PreTrainedTokenizer):
    method __init__ (line 2270) | def __init__(self, tokenizer: BaseTokenizerFast, **kwargs):
    method backend_tokenizer (line 2281) | def backend_tokenizer(self) -> BaseTokenizerFast:
    method decoder (line 2285) | def decoder(self) -> DecoderFast:
    method is_fast (line 2289) | def is_fast(self) -> bool:
    method vocab_size (line 2293) | def vocab_size(self) -> int:
    method __len__ (line 2296) | def __len__(self) -> int:
    method _maybe_update_backend (line 2299) | def _maybe_update_backend(self, value):
    method _convert_encoding (line 2304) | def _convert_encoding(
    method _convert_token_to_id_with_added_voc (line 2360) | def _convert_token_to_id_with_added_voc(self, token: int) -> str:
    method _convert_id_to_token (line 2366) | def _convert_id_to_token(self, index: int) -> Optional[str]:
    method get_vocab (line 2369) | def get_vocab(self):
    method convert_tokens_to_string (line 2372) | def convert_tokens_to_string(self, tokens: List[int], skip_special_tok...
    method add_tokens (line 2375) | def add_tokens(self, new_tokens: List[Union[str, AddedTokenFast]]) -> ...
    method add_special_tokens (line 2402) | def add_special_tokens(self, special_tokens_dict: dict) -> int:
    method num_special_tokens_to_add (line 2421) | def num_special_tokens_to_add(self, pair: bool = False) -> int:
    method tokenize (line 2424) | def tokenize(
    method batch_encode_plus (line 2429) | def batch_encode_plus(
    method encode_plus (line 2567) | def encode_plus(
    method decode (line 2659) | def decode(
    method save_vocabulary (line 2670) | def save_vocabulary(self, save_directory: str) -> Tuple[str]:
  function trim_batch (line 2680) | def trim_batch(

FILE: code/nezha-base-count3/pretrain/transformers1/tokenization_xlm.py
  function get_pairs (line 430) | def get_pairs(word):
  function lowercase_and_remove_accent (line 443) | def lowercase_and_remove_accent(text):
  function replace_unicode_punct (line 460) | def replace_unicode_punct(text):
  function remove_non_printing_char (line 503) | def remove_non_printing_char(text):
  function romanian_preprocessing (line 516) | def romanian_preprocessing(text):
  class XLMTokenizer (line 530) | class XLMTokenizer(PreTrainedTokenizer):
    method __init__ (line 594) | def __init__(
    method moses_punct_norm (line 656) | def moses_punct_norm(self, text, lang):
    method moses_tokenize (line 664) | def moses_tokenize(self, text, lang):
    method moses_pipeline (line 672) | def moses_pipeline(self, text, lang):
    method ja_tokenize (line 678) | def ja_tokenize(self, text):
    method vocab_size (line 699) | def vocab_size(self):
    method get_vocab (line 702) | def get_vocab(self):
    method bpe (line 705) | def bpe(self, token):
    method _tokenize (line 749) | def _tokenize(self, text, lang="en", bypass_tokenizer=False):
    method _convert_token_to_id (line 839) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 843) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 847) | def convert_tokens_to_string(self, tokens):
    method build_inputs_with_special_tokens (line 852) | def build_inputs_with_special_tokens(
    method get_special_tokens_mask (line 880) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 911) | def create_token_type_ids_from_sequences(
    method save_vocabulary (line 941) | def save_vocabulary(self, save_directory):

FILE: code/nezha-base-count3/pretrain/transformers1/tokenization_xlm_roberta.py
  class XLMRobertaTokenizer (line 52) | class XLMRobertaTokenizer(PreTrainedTokenizer):
    method __init__ (line 108) | def __init__(
    method __getstate__ (line 159) | def __getstate__(self):
    method __setstate__ (line 164) | def __setstate__(self, d):
    method build_inputs_with_special_tokens (line 177) | def build_inputs_with_special_tokens(
    method get_special_tokens_mask (line 204) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 235) | def create_token_type_ids_from_sequences(
    method vocab_size (line 261) | def vocab_size(self):
    method get_vocab (line 264) | def get_vocab(self):
    method _tokenize (line 269) | def _tokenize(self, text):
    method _convert_token_to_id (line 272) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 281) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 287) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 292) | def save_vocabulary(self, save_directory):

FILE: code/nezha-base-count3/pretrain/transformers1/tokenization_xlnet.py
  class XLNetTokenizer (line 53) | class XLNetTokenizer(PreTrainedTokenizer):
    method __init__ (line 113) | def __init__(
    method vocab_size (line 161) | def vocab_size(self):
    method get_vocab (line 164) | def get_vocab(self):
    method __getstate__ (line 169) | def __getstate__(self):
    method __setstate__ (line 174) | def __setstate__(self, d):
    method preprocess_text (line 187) | def preprocess_text(self, inputs):
    method _tokenize (line 202) | def _tokenize(self, text, sample=False):
    method _convert_token_to_id (line 226) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 230) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 234) | def convert_tokens_to_string(self, tokens):
    method build_inputs_with_special_tokens (line 239) | def build_inputs_with_special_tokens(
    method get_special_tokens_mask (line 265) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 296) | def create_token_type_ids_from_sequences(
    method save_vocabulary (line 324) | def save_vocabulary(self, save_directory):

FILE: code/nezha-base-count3/pretrain/transformers1/trainer.py
  function is_apex_available (line 38) | def is_apex_available():
  function is_tensorboard_available (line 60) | def is_tensorboard_available():
  function is_wandb_available (line 77) | def is_wandb_available():
  function set_seed (line 84) | def set_seed(seed: int):
  function torch_distributed_zero_first (line 93) | def torch_distributed_zero_first(local_rank: int):
  class SequentialDistributedSampler (line 104) | class SequentialDistributedSampler(Sampler):
    method __init__ (line 116) | def __init__(self, dataset, num_replicas=None, rank=None):
    method __iter__ (line 131) | def __iter__(self):
    method __len__ (line 144) | def __len__(self):
  function get_tpu_sampler (line 148) | def get_tpu_sampler(dataset: Dataset):
  class Trainer (line 154) | class Trainer:
    method __init__ (line 171) | def __init__(
    method get_test_dataloader (line 222) | def get_test_dataloader(self, test_dataset: Dataset) -> DataLoader:
    method get_optimizers (line 242) | def get_optimizers(
    method _setup_wandb (line 273) | def _setup_wandb(self):
    method num_examples (line 297) | def num_examples(self, dataloader: DataLoader) -> int:
    method train (line 303) | def train(self, model_path: Optional[str] = None):
    method _log (line 510) | def _log(self, logs: Dict[str, float], iterator: Optional[tqdm] = None...
    method _training_step (line 524) | def _training_step(
    method is_local_master (line 547) | def is_local_master(self) -> bool:
    method is_world_master (line 553) | def is_world_master(self) -> bool:
    method save_model (line 563) | def save_model(self, output_dir: Optional[str] = None):
    method _save_tpu (line 576) | def _save_tpu(self, output_dir: Optional[str] = None):
    method _save (line 592) | def _save(self, output_dir: Optional[str] = None):
    method _sorted_checkpoints (line 605) | def _sorted_checkpoints(self, checkpoint_prefix=PREFIX_CHECKPOINT_DIR,...
    method _rotate_checkpoints (line 622) | def _rotate_checkpoints(self, use_mtime=False) -> None:
    method evaluate (line 641) | def evaluate(
    method predict (line 670) | def predict(self, test_dataset: Dataset) -> PredictionOutput:
    method _prediction_loop (line 681) | def _prediction_loop(
    method distributed_concat (line 771) | def distributed_concat(self, tensor: torch.Tensor, num_total_examples:...

FILE: code/nezha-base-count3/pretrain/transformers1/trainer_tf.py
  class TFTrainer (line 20) | class TFTrainer:
    method __init__ (line 31) | def __init__(
    method _setup_training (line 50) | def _setup_training(self) -> None:
    method _set_loss_and_metric (line 67) | def _set_loss_and_metric(self) -> None:
    method _create_summary_writer (line 84) | def _create_summary_writer(self) -> None:
    method _prepare_dataset (line 90) | def _prepare_dataset(self) -> None:
    method _create_optimizer (line 122) | def _create_optimizer(self) -> None:
    method _create_checkpoint_manager (line 146) | def _create_checkpoint_manager(self, max_to_keep: int = 5, load_model:...
    method _evaluate_steps (line 162) | def _evaluate_steps(self, per_replica_features, per_replica_labels):
    method _prediction_loop (line 182) | def _prediction_loop(
    method evaluate (line 237) | def evaluate(
    method train (line 250) | def train(self) -> None:
    method _training_steps (line 317) | def _training_steps(self):
    method _apply_gradients (line 327) | def _apply_gradients(self):
    method _step (line 331) | def _step(self):
    method _accumulate_next_gradients (line 342) | def _accumulate_next_gradients(self):
    method _accumulate_gradients (line 358) | def _accumulate_gradients(self, per_replica_features, per_replica_labe...
    method _forward (line 371) | def _forward(self, features, labels):
    method _run_model (line 383) | def _run_model(self, features, labels, training):
    method predict (line 412) | def predict(self, test_dataset: tf.data.Dataset) -> PredictionOutput:
    method save_model (line 426) | def save_model(self) -> None:

FILE: code/nezha-base-count3/pretrain/transformers1/trainer_utils.py
  class EvalPrediction (line 6) | class EvalPrediction(NamedTuple):
  class PredictionOutput (line 16) | class PredictionOutput(NamedTuple):
  class TrainOutput (line 22) | class TrainOutput(NamedTuple):

FILE: code/nezha-base-count3/pretrain/transformers1/training_args.py
  function is_tpu_available (line 23) | def is_tpu_available():
  class TrainingArguments (line 31) | class TrainingArguments:
    method train_batch_size (line 138) | def train_batch_size(self) -> int:
    method eval_batch_size (line 148) | def eval_batch_size(self) -> int:
    method _setup_devices (line 159) | def _setup_devices(self) -> Tuple["torch.device", int]:
    method device (line 182) | def device(self) -> "torch.device":
    method n_gpu (line 187) | def n_gpu(self):
    method to_json_string (line 190) | def to_json_string(self):
    method to_sanitized_dict (line 196) | def to_sanitized_dict(self) -> Dict[str, Any]:

FILE: code/nezha-base-count3/pretrain/transformers1/training_args_tf.py
  class TFTrainingArguments (line 16) | class TFTrainingArguments(TrainingArguments):
    method _setup_strategy (line 46) | def _setup_strategy(self) -> Tuple["tf.distribute.Strategy", int]:
    method strategy (line 80) | def strategy(self) -> "tf.distribute.Strategy":
    method n_gpu (line 85) | def n_gpu(self) -> int:

FILE: code/nezha-base-count3/pretrain/transformers1/utils_encoder_decoder.py
  function prepare_encoder_decoder_model_kwargs (line 18) | def prepare_encoder_decoder_model_kwargs(**kwargs):

FILE: code/nezha-base-count5/finetuning/NEZHA/configuration_nezha.py
  class NeZhaConfig (line 6) | class NeZhaConfig(PretrainedConfig):
    method __init__ (line 82) | def __init__(

FILE: code/nezha-base-count5/finetuning/NEZHA/modeling_nezha.py
  function load_tf_weights_in_bert (line 48) | def load_tf_weights_in_bert(model, config, tf_checkpoint_path):
  class BertEmbeddings (line 122) | class BertEmbeddings(nn.Module):
    method __init__ (line 125) | def __init__(self, config):
    method forward (line 134) | def forward(self, input_ids=None, token_type_ids=None, inputs_embeds=N...
  function relative_position_encoding (line 151) | def relative_position_encoding(depth, max_length=512, max_relative_posit...
  class BertSelfAttention (line 175) | class BertSelfAttention(nn.Module):
    method __init__ (line 176) | def __init__(self, config):
    method transpose_for_scores (line 200) | def transpose_for_scores(self, x):
    method forward (line 205) | def forward(
  class BertSelfOutput (line 308) | class BertSelfOutput(nn.Module):
    method __init__ (line 309) | def __init__(self, config):
    method forward (line 315) | def forward(self, hidden_states, input_tensor):
  class BertAttention (line 322) | class BertAttention(nn.Module):
    method __init__ (line 323) | def __init__(self, config):
    method prune_heads (line 329) | def prune_heads(self, heads):
    method forward (line 347) | def forward(
  class BertIntermediate (line 373) | class BertIntermediate(nn.Module):
    method __init__ (line 374) | def __init__(self, config):
    method forward (line 382) | def forward(self, hidden_states):
  class BertOutput (line 388) | class BertOutput(nn.Module):
    method __init__ (line 389) | def __init__(self, config):
    method forward (line 395) | def forward(self, hidden_states, input_tensor):
  class BertLayer (line 402) | class BertLayer(nn.Module):
    method __init__ (line 403) | def __init__(self, config):
    method forward (line 416) | def forward(
    method feed_forward_chunk (line 481) | def feed_forward_chunk(self, attention_output):
  class NeZhaEncoder (line 487) | class NeZhaEncoder(nn.Module):
    method __init__ (line 488) | def __init__(self, config):
    method forward (line 495) | def forward(
  class BertPooler (line 588) | class BertPooler(nn.Module):
    method __init__ (line 589) | def __init__(self, config):
    method forward (line 594) | def forward(self, hidden_states):
  class BertPredictionHeadTransform (line 603) | class BertPredictionHeadTransform(nn.Module):
    method __init__ (line 604) | def __init__(self, config):
    method forward (line 613) | def forward(self, hidden_states):
  class BertLMPredictionHead (line 620) | class BertLMPredictionHead(nn.Module):
    method __init__ (line 621) | def __init__(self, config):
    method forward (line 634) | def forward(self, hidden_states):
  class BertOnlyMLMHead (line 640) | class BertOnlyMLMHead(nn.Module):
    method __init__ (line 641) | def __init__(self, config):
    method forward (line 645) | def forward(self, sequence_output):
  class BertOnlyNSPHead (line 650) | class BertOnlyNSPHead(nn.Module):
    method __init__ (line 651) | def __init__(self, config):
    method forward (line 655) | def forward(self, pooled_output):
  class BertPreTrainingHeads (line 660) | class BertPreTrainingHeads(nn.Module):
    method __init__ (line 661) | def __init__(self, config):
    method forward (line 666) | def forward(self, sequence_output, pooled_output):
  class BertPreTrainedModel (line 672) | class BertPreTrainedModel(PreTrainedModel):
    method _init_weights (line 682) | def _init_weights(self, module):
  class BertForPreTrainingOutput (line 700) | class BertForPreTrainingOutput(ModelOutput):
  class NeZhaModel (line 805) | class NeZhaModel(BertPreTrainedModel):
    method __init__ (line 819) | def __init__(self, config, add_pooling_layer=True):
    method get_input_embeddings (line 830) | def get_input_embeddings(self):
    method set_input_embeddings (line 833) | def set_input_embeddings(self, value):
    method _prune_heads (line 836) | def _prune_heads(self, heads_to_prune):
    method forward (line 851) | def forward(
  class BertForPreTraining (line 982) | class BertForPreTraining(BertPreTrainedModel):
    method __init__ (line 983) | def __init__(self, config):
    method get_output_embeddings (line 991) | def get_output_embeddings(self):
    method set_output_embeddings (line 994) | def set_output_embeddings(self, new_embeddings):
    method forward (line 999) | def forward(
  class BertLMHeadModel (line 1083) | class BertLMHeadModel(BertPreTrainedModel):
    method __init__ (line 1088) | def __init__(self, config):
    method get_output_embeddings (line 1099) | def get_output_embeddings(self):
    method set_output_embeddings (line 1102) | def set_output_embeddings(self, new_embeddings):
    method forward (line 1107) | def forward(
    method prepare_inputs_for_generation (line 1209) | def prepare_inputs_for_generation(self, input_ids, past=None, attentio...
    method _reorder_cache (line 1221) | def _reorder_cache(self, past, beam_idx):
  class NeZhaForMaskedLM (line 1229) | class NeZhaForMaskedLM(BertPreTrainedModel):
    method __init__ (line 1234) | def __init__(self, config):
    method get_output_embeddings (line 1248) | def get_output_embeddings(self):
    method set_output_embeddings (line 1251) | def set_output_embeddings(self, new_embeddings):
    method forward (line 1261) | def forward(
    method prepare_inputs_for_generation (line 1318) | def prepare_inputs_for_generation(self, input_ids, attention_mask=None...
  class BertForNextSentencePrediction (line 1337) | class BertForNextSentencePrediction(BertPreTrainedModel):
    method __init__ (line 1338) | def __init__(self, config):
    method forward (line 1348) | def forward(
  class BertForSequenceClassification (line 1438) | class BertForSequenceClassification(BertPreTrainedModel):
    method __init__ (line 1439) | def __init__(self, config):
    method forward (line 1456) | def forward(
  class BertForMultipleChoice (line 1523) | class BertForMultipleChoice(BertPreTrainedModel):
    method __init__ (line 1524) | def __init__(self, config):
    method forward (line 1540) | def forward(
  class BertForTokenClassification (line 1613) | class BertForTokenClassification(BertPreTrainedModel):
    method __init__ (line 1617) | def __init__(self, config):
    method forward (line 1634) | def forward(
  class BertForQuestionAnswering (line 1704) | class BertForQuestionAnswering(BertPreTrainedModel):
    method __init__ (line 1708) | def __init__(self, config):
    method forward (line 1724) | def forward(

FILE: code/nezha-base-count5/finetuning/model.py
  class BertForClass (line 11) | class BertForClass(nn.Module):
    method __init__ (line 12) | def __init__(self, config):
    method forward (line 24) | def forward(self, input_ids, input_masks, segment_ids):
  class BertForClass_MultiDropout (line 37) | class BertForClass_MultiDropout(nn.Module):
    method __init__ (line 38) | def __init__(self, config):
    method forward (line 50) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastTwoCls (line 63) | class BertLastTwoCls(nn.Module):
    method __init__ (line 64) | def __init__(self, config):
    method forward (line 75) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastCls (line 83) | class BertLastCls(nn.Module):
    method __init__ (line 84) | def __init__(self, config):
    method forward (line 95) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastTwoClsPooler (line 108) | class BertLastTwoClsPooler(nn.Module):
    method __init__ (line 109) | def __init__(self, config):
    method forward (line 120) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastTwoEmbeddings (line 132) | class BertLastTwoEmbeddings(nn.Module):
    method __init__ (line 133) | def __init__(self, config):
    method forward (line 144) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastTwoEmbeddingsPooler (line 160) | class BertLastTwoEmbeddingsPooler(nn.Module):
    method __init__ (line 161) | def __init__(self, config):
    method forward (line 172) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastFourCls (line 187) | class BertLastFourCls(nn.Module):
    method __init__ (line 188) | def __init__(self, config):
    method forward (line 199) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastFourClsPooler (line 215) | class BertLastFourClsPooler(nn.Module):
    method __init__ (line 216) | def __init__(self, config):
    method forward (line 227) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastFourEmbeddings (line 239) | class BertLastFourEmbeddings(nn.Module):
    method __init__ (line 240) | def __init__(self, config):
    method forward (line 251) | def forward(self, input_ids, input_masks, segment_ids):
  class BertLastFourEmbeddingsPooler (line 268) | class BertLastFourEmbeddingsPooler(nn.Module):
    method __init__ (line 269) | def __init__(self, config):
    method forward (line 280) | def forward(self, input_ids, input_masks, segment_ids):
  class BertDynCls (line 296) | class BertDynCls(nn.Module):
    method __init__ (line 297) | def __init__(self, config):
    method forward (line 311) | def forward(self, input_ids, input_masks, segment_ids):
  class BertDynEmbeddings (line 343) | class BertDynEmbeddings(nn.Module):
    method __init__ (line 344) | def __init__(self, config):
    method forward (line 358) | def forward(self, input_ids, input_masks, segment_ids):
  class BertRNN (line 392) | class BertRNN(nn.Module):
    method __init__ (line 394) | def __init__(self, config):
    method forward (line 434) | def forward(self, input_ids, input_masks, segment_ids):
  class BertCNN (line 459) | class BertCNN(nn.Module):
    method __init__ (line 461) | def __init__(self, config):
    method conv_and_pool (line 480) | def conv_and_pool(self, x, conv):
    method forward (line 485) | def forward(self, input_ids, input_masks, segment_ids):
  class BertRCNN (line 497) | class BertRCNN(nn.Module):
    method __init__ (line 498) | def __init__(self, config):
    method forward (line 540) | def forward(self, input_ids, input_masks, segment_ids):
  class XLNet (line 564) | class XLNet(nn.Module):
    method __init__ (line 566) | def __init__(self, config):
    method forward (line 574) | def forward(self, input_ids, input_masks, segment_ids):
  class ElectraClassificationHead (line 584) | class ElectraClassificationHead(nn.Module):
    method __init__ (line 587) | def __init__(self, config):
    method forward (line 593) | def forward(self, features, **kwargs):
  class Electra (line 602) | class Electra(nn.Module):
    method __init__ (line 604) | def __init__(self, config):
    method forward (line 613) | def forward(self, input_ids, input_masks, segment_ids):
  class NEZHA (line 621) | class NEZHA(nn.Module):
    method __init__ (line 622) | def __init__(self, config):
    method forward (line 637) | def forward(self, input_ids, input_masks, segment_ids):

FILE: code/nezha-base-count5/finetuning/multi_gpu_QA.py
  class Config (line 47) | class Config:
    method __init__ (line 48) | def __init__(self):

FILE: code/nezha-base-count5/finetuning/utils.py
  function paddingList (line 12) | def paddingList(ls:list,val,returnTensor=False):
  function fastTokenizer (line 19) | def fastTokenizer(a:str,b:str,maxLen,tk):
  class data_generator (line 39) | class data_generator:
    method __init__ (line 40) | def __init__(self, data, config, shuffle=False):
    method __len__ (line 53) | def __len__(self):
    method __iter__ (line 56) | def __iter__(self):
  class PGD (line 95) | class PGD():
    method __init__ (line 96) | def __init__(self, model):
    method attack (line 101) | def attack(self, epsilon=0.3, alpha=0.1, emb_name='word_embeddings', i...
    method restore (line 113) | def restore(self, emb_name='word_embeddings'):
    method project (line 121) | def project(self, param_name, param_data, epsilon):
    method backup_grad (line 127) | def backup_grad(self):
    method restore_grad (line 132) | def restore_grad(self):
  class FGM (line 139) | class FGM():
    method __init__ (line 140) | def __init__(self, model):
    method attack (line 144) | def attack(self, epsilon=0.25, emb_name='word_embeddings'):
    method restore (line 154) | def restore(self, emb_name='word_embeddings'):
  class FocalLoss (line 164) | class FocalLoss(nn.Module):
    method __init__ (line 180) | def __init__(self, num_class, alpha=None, gamma=2,
    method forward (line 201) | def forward(self, input, target):
  function f1_match (line 244) | def f1_match(y_true,y_pred):

FILE: code/nezha-base-count5/pretrain/NEZHA/configuration_nezha.py
  class NeZhaConfig (line 6) | class NeZhaConfig(PretrainedConfig):
    method __init__ (line 82) | def __init__(

FILE: code/nezha-base-count5/pretrain/NEZHA/modeling_nezha.py
  function load_tf_weights_in_bert (line 48) | def load_tf_weights_in_bert(model, config, tf_checkpoint_path):
  class BertEmbeddings (line 122) | class BertEmbeddings(nn.Module):
    method __init__ (line 125) | def __init__(self, config):
    method forward (line 134) | def forward(self, input_ids=None, token_type_ids=None, inputs_embeds=N...
  function relative_position_encoding (line 151) | def relative_position_encoding(depth, max_length=512, max_relative_posit...
  class BertSelfAttention (line 175) | class BertSelfAttention(nn.Module):
    method __init__ (line 176) | def __init__(self, config):
    method transpose_for_scores (line 200) | def transpose_for_scores(self, x):
    method forward (line 205) | def forward(
  class BertSelfOutput (line 308) | class BertSelfOutput(nn.Module):
    method __init__ (line 309) | def __init__(self, config):
    method forward (line 315) | def forward(self, hidden_states, input_tensor):
  class BertAttention (line 322) | class BertAttention(nn.Module):
    method __init__ (line 323) | def __init__(self, config):
    method prune_heads (line 329) | def prune_heads(self, heads):
    method forward (line 347) | def forward(
  class BertIntermediate (line 373) | class BertIntermediate(nn.Module):
    method __init__ (line 374) | def __init__(self, config):
    method forward (line 382) | def forward(self, hidden_states):
  class BertOutput (line 388) | class BertOutput(nn.Module):
    method __init__ (line 389) | def __init__(self, config):
    method forward (line 395) | def forward(self, hidden_states, input_tensor):
  class BertLayer (line 402) | class BertLayer(nn.Module):
    method __init__ (line 403) | def __init__(self, config):
    method forward (line 416) | def forward(
    method feed_forward_chunk (line 481) | def feed_forward_chunk(self, attention_output):
  class NeZhaEncoder (line 487) | class NeZhaEncoder(nn.Module):
    method __init__ (line 488) | def __init__(self, config):
    method forward (line 495) | def forward(
  class BertPooler (line 588) | class BertPooler(nn.Module):
    method __init__ (line 589) | def __init__(self, config):
    method forward (line 594) | def forward(self, hidden_states):
  class BertPredictionHeadTransform (line 603) | class BertPredictionHeadTransform(nn.Module):
    method __init__ (line 604) | def __init__(self, config):
    method forward (line 613) | def forward(self, hidden_states):
  class BertLMPredictionHead (line 620) | class BertLMPredictionHead(nn.Module):
    method __init__ (line 621) | def __init__(self, config):
    method forward (line 634) | def forward(self, hidden_states):
  class BertOnlyMLMHead (line 640) | class BertOnlyMLMHead(nn.Module):
    method __init__ (line 641) | def __init__(self, config):
    method forward (line 645) | def forward(self, sequence_output):
  class BertOnlyNSPHead (line 650) | class BertOnlyNSPHead(nn.Module):
    method __init__ (line 651) | def __init__(self, config):
    method forward (line 655) | def forward(self, pooled_output):
  class BertPreTrainingHeads (line 660) | class BertPreTrainingHeads(nn.Module):
    method __init__ (line 661) | def __init__(self, config):
    method forward (line 666) | def forward(self, sequence_output, pooled_output):
  class BertPreTrainedModel (line 672) | class BertPreTrainedModel(PreTrainedModel):
    method _init_weights (line 682) | def _init_weights(self, module):
  class BertForPreTrainingOutput (line 700) | class BertForPreTrainingOutput(ModelOutput):
  class NeZhaModel (line 805) | class NeZhaModel(BertPreTrainedModel):
    method __init__ (line 819) | def __init__(self, config, add_pooling_layer=True):
    method get_input_embeddings (line 830) | def get_input_embeddings(self):
    method set_input_embeddings (line 833) | def set_input_embeddings(self, value):
    method _prune_heads (line 836) | def _prune_heads(self, heads_to_prune):
    method forward (line 851) | def forward(
  class BertForPreTraining (line 982) | class BertForPreTraining(BertPreTrainedModel):
    method __init__ (line 983) | def __init__(self, config):
    method get_output_embeddings (line 991) | def get_output_embeddings(self):
    method set_output_embeddings (line 994) | def set_output_embeddings(self, new_embeddings):
    method forward (line 999) | def forward(
  class BertLMHeadModel (line 1083) | class BertLMHeadModel(BertPreTrainedModel):
    method __init__ (line 1088) | def __init__(self, config):
    method get_output_embeddings (line 1099) | def get_output_embeddings(self):
    method set_output_embeddings (line 1102) | def set_output_embeddings(self, new_embeddings):
    method forward (line 1107) | def forward(
    method prepare_inputs_for_generation (line 1209) | def prepare_inputs_for_generation(self, input_ids, past=None, attentio...
    method _reorder_cache (line 1221) | def _reorder_cache(self, past, beam_idx):
  class NeZhaForMaskedLM (line 1229) | class NeZhaForMaskedLM(BertPreTrainedModel):
    method __init__ (line 1234) | def __init__(self, config):
    method get_output_embeddings (line 1248) | def get_output_embeddings(self):
    method set_output_embeddings (line 1251) | def set_output_embeddings(self, new_embeddings):
    method forward (line 1261) | def forward(
    method prepare_inputs_for_generation (line 1318) | def prepare_inputs_for_generation(self, input_ids, attention_mask=None...
  class BertForNextSentencePrediction (line 1337) | class BertForNextSentencePrediction(BertPreTrainedModel):
    method __init__ (line 1338) | def __init__(self, config):
    method forward (line 1348) | def forward(
  class BertForSequenceClassification (line 1438) | class BertForSequenceClassification(BertPreTrainedModel):
    method __init__ (line 1439) | def __init__(self, config):
    method forward (line 1456) | def forward(
  class BertForMultipleChoice (line 1523) | class BertForMultipleChoice(BertPreTrainedModel):
    method __init__ (line 1524) | def __init__(self, config):
    method forward (line 1540) | def forward(
  class BertForTokenClassification (line 1613) | class BertForTokenClassification(BertPreTrainedModel):
    method __init__ (line 1617) | def __init__(self, config):
    method forward (line 1634) | def forward(
  class BertForQuestionAnswering (line 1704) | class BertForQuestionAnswering(BertPreTrainedModel):
    method __init__ (line 1708) | def __init__(self, config):
    method forward (line 1724) | def forward(

FILE: code/nezha-base-count5/pretrain/NLP_Utils.py
  function writeToJsonFile (line 10) | def writeToJsonFile(path: str, obj):
  function readFromJsonFile (line 13) | def readFromJsonFile(path: str):
  function loadData (line 17) | def loadData(path):
  function calNegPos (line 35) | def calNegPos(ls):#计算正负比例
  function paddingList (line 54) | def paddingList(ls:list,val,returnTensor=False):
  function truncate (line 61) | def truncate(a:list,b:list,maxLen):
  class MLM_Data (line 77) | class MLM_Data(Dataset):
    method __init__ (line 79) | def __init__(self,textLs:list,maxLen:int,tk:BertTokenizer):
    method __len__ (line 87) | def __len__(self):
    method random_mask (line 90) | def random_mask(self,text_ids):
    method __getitem__ (line 128) | def __getitem__(self, item):
    method collate (line 143) | def collate(cls,batch):
  function blockShuffle (line 163) | def blockShuffle(data:list,bs:int,sortBsNum,key):
  class blockShuffleDataLoader (line 179) | class blockShuffleDataLoader(DataLoader):
    method __init__ (line 180) | def __init__(self, dataset: Dataset,sortBsNum,key,**kwargs):
    method __iter__ (line 186) | def __iter__(self):

FILE: code/nezha-base-count5/pretrain/transformers1/__main__.py
  function main (line 2) | def main():

FILE: code/nezha-base-count5/pretrain/transformers1/activations.py
  function swish (line 11) | def swish(x):
  function _gelu_python (line 15) | def _gelu_python(x):
  function gelu_new (line 25) | def gelu_new(x):
  function gelu_fast (line 38) | def gelu_fast(x):
  function get_activation (line 52) | def get_activation(activation_string):

FILE: code/nezha-base-count5/pretrain/transformers1/benchmark/benchmark.py
  class PyTorchBenchmark (line 38) | class PyTorchBenchmark(Benchmark):
    method framework_version (line 45) | def framework_version(self):
    method train (line 48) | def train(self, model_name, batch_size, sequence_length, trace_memory=...
    method inference (line 100) | def inference(self, model_name, batch_size, sequence_length, trace_mem...

FILE: code/nezha-base-count5/pretrain/transformers1/benchmark/benchmark_args.py
  function is_tpu_available (line 37) | def is_tpu_available():
  class PyTorchBenchmarkArguments (line 45) | class PyTorchBenchmarkArguments(BenchmarkArguments):
    method _setup_devices (line 52) | def _setup_devices(self) -> Tuple["torch.device", int]:
    method device_idx (line 67) | def device_idx(self) -> int:
    method device (line 72) | def device(self) -> "torch.device":
    method n_gpu (line 77) | def n_gpu(self):

FILE: code/nezha-base-count5/pretrain/transformers1/benchmark/benchmark_args_utils.py
  function list_field (line 24) | def list_field(default=None, metadata=None):
  class BenchmarkArguments (line 29) | class BenchmarkArguments:
    method to_json_string (line 90) | def to_json_string(self):
    method model_names (line 97) | def model_names(self):

FILE: code/nezha-base-count5/pretrain/transformers1/benchmark/benchmark_utils.py
  function is_memory_tracing_enabled (line 43) | def is_memory_tracing_enabled():
  class Frame (line 48) | class Frame(NamedTuple):
  class UsedMemoryState (line 65) | class UsedMemoryState(NamedTuple):
  class Memory (line 77) | class Memory(NamedTuple):
    method __repr__ (line 85) | def __repr__(self) -> str:
  class MemoryState (line 89) | class MemoryState(NamedTuple):
  class MemorySummary (line 103) | class MemorySummary(NamedTuple):
  function start_memory_tracing (line 123) | def start_memory_tracing(
  function stop_memory_tracing (line 273) | def stop_memory_tracing(
  function bytes_to_mega_bytes (line 370) | def bytes_to_mega_bytes(memory_amount: int) -> int:
  class Benchmark (line 376) | class Benchmark(ABC):
    method __init__ (line 386) | def __init__(self, args: BenchmarkArguments = None, configs: Pretraine...
    method print_fn (line 401) | def print_fn(self):
    method is_gpu (line 421) | def is_gpu(self):
    method framework_version (line 426) | def framework_version(self):
    method train (line 430) | def train(self, model_name, batch_size, sequence_length):
    method inference (line 434) | def inference(self, model_name, batch_size, sequence_length):
    method run (line 437) | def run(self):
    method environment_info (line 512) | def environment_info(self):
    method print_results (line 572) | def print_results(self, result_dict):
    method print_memory_trace_statistics (line 585) | def print_memory_trace_statistics(self, summary: MemorySummary):
    method save_to_csv (line 609) | def save_to_csv(self, result_dict, filename):

FILE: code/nezha-base-count5/pretrain/transformers1/benchmark_utils.py
  function is_memory_tracing_enabled (line 29) | def is_memory_tracing_enabled():
  class Frame (line 34) | class Frame(NamedTuple):
  class UsedMemoryState (line 51) | class UsedMemoryState(NamedTuple):
  class Memory (line 63) | class Memory(NamedTuple):
    method __repr__ (line 71) | def __repr__(self) -> str:
  class MemoryState (line 75) | class MemoryState(NamedTuple):
  class MemorySummary (line 89) | class MemorySummary(NamedTuple):
  function start_memory_tracing (line 108) | def start_memory_tracing(
  function stop_memory_tracing (line 256) | def stop_memory_tracing(
  function bytes_to_human_readable (line 334) | def bytes_to_human_readable(memory_amount: int) -> str:

FILE: code/nezha-base-count5/pretrain/transformers1/commands/__init__.py
  class BaseTransformersCLICommand (line 5) | class BaseTransformersCLICommand(ABC):
    method register_subcommand (line 8) | def register_subcommand(parser: ArgumentParser):
    method run (line 12) | def run(self):

FILE: code/nezha-base-count5/pretrain/transformers1/commands/convert.py
  function convert_command_factory (line 7) | def convert_command_factory(args: Namespace):
  class ConvertCommand (line 17) | class ConvertCommand(BaseTransformersCLICommand):
    method register_subcommand (line 19) | def register_subcommand(parser: ArgumentParser):
    method __init__ (line 46) | def __init__(
    method run (line 64) | def run(self):

FILE: code/nezha-base-count5/pretrain/transformers1/commands/download.py
  function download_command_factory (line 6) | def download_command_factory(args):
  class DownloadCommand (line 10) | class DownloadCommand(BaseTransformersCLICommand):
    method register_subcommand (line 12) | def register_subcommand(parser: ArgumentParser):
    method __init__ (line 23) | def __init__(self, model: str, cache: str, force: bool):
    method run (line 28) | def run(self):

FILE: code/nezha-base-count5/pretrain/transformers1/commands/env.py
  function info_command_factory (line 9) | def info_command_factory(_):
  class EnvironmentCommand (line 13) | class EnvironmentCommand(BaseTransformersCLICommand):
    method register_subcommand (line 15) | def register_subcommand(parser: ArgumentParser):
    method run (line 19) | def run(self):
    method format_dict (line 57) | def format_dict(d):

FILE: code/nezha-base-count5/pretrain/transformers1/commands/run.py
  function try_infer_format_from_ext (line 11) | def try_infer_format_from_ext(path: str):
  function run_command_factory (line 25) | def run_command_factory(args):
  class RunCommand (line 44) | class RunCommand(BaseTransformersCLICommand):
    method __init__ (line 45) | def __init__(self, nlp: Pipeline, reader: PipelineDataFormat):
    method register_subcommand (line 50) | def register_subcommand(parser: ArgumentParser):
    method run (line 81) | def run(self):

FILE: code/nezha-base-count5/pretrain/transformers1/commands/serving.py
  function Body (line 21) | def Body(*x, **y):
  function serve_command_factory (line 30) | def serve_command_factory(args: Namespace):
  class ServeModelInfoResult (line 45) | class ServeModelInfoResult(BaseModel):
  class ServeTokenizeResult (line 53) | class ServeTokenizeResult(BaseModel):
  class ServeDeTokenizeResult (line 62) | class ServeDeTokenizeResult(BaseModel):
  class ServeForwardResult (line 70) | class ServeForwardResult(BaseModel):
  class ServeCommand (line 78) | class ServeCommand(BaseTransformersCLICommand):
    method register_subcommand (line 80) | def register_subcommand(parser: ArgumentParser):
    method __init__ (line 106) | def __init__(self, pipeline: Pipeline, host: str, port: int, workers: ...
    method run (line 156) | def run(self):
    method model_info (line 159) | def model_info(self):
    method tokenize (line 162) | def tokenize(self, text_input: str = Body(None, embed=True), return_id...
    method detokenize (line 180) | def detokenize(
    method forward (line 198) | async def forward(self, inputs=Body(None, embed=True)):

FILE: code/nezha-base-count5/pretrain/transformers1/commands/train.py
  function train_command_factory (line 18) | def train_command_factory(args: Namespace):
  class TrainCommand (line 26) | class TrainCommand(BaseTransformersCLICommand):
    method register_subcommand (line 28) | def register_subcommand(parser: ArgumentParser):
    method __init__ (line 78) | def __init__(self, args: Namespace):
    method run (line 124) | def run(self):
    method run_torch (line 129) | def run_torch(self):
    method run_tf (line 132) | def run_tf(self):

FILE: code/nezha-base-count5/pretrain/transformers1/commands/transformers_cli.py
  function main (line 12) | def main():

FILE: code/nezha-base-count5/pretrain/transformers1/commands/user.py
  class UserCommands (line 16) | class UserCommands(BaseTransformersCLICommand):
    method register_subcommand (line 18) | def register_subcommand(parser: ArgumentParser):
  class ANSI (line 47) | class ANSI:
    method bold (line 57) | def bold(cls, s):
    method red (line 61) | def red(cls, s):
  class BaseUserCommand (line 65) | class BaseUserCommand:
    method __init__ (line 66) | def __init__(self, args):
  class LoginCommand (line 71) | class LoginCommand(BaseUserCommand):
    method run (line 72) | def run(self):
  class WhoamiCommand (line 98) | class WhoamiCommand(BaseUserCommand):
    method run (line 99) | def run(self):
  class LogoutCommand (line 115) | class LogoutCommand(BaseUserCommand):
    method run (line 116) | def run(self):
  class ListObjsCommand (line 126) | class ListObjsCommand(BaseUserCommand):
    method tabulate (line 127) | def tabulate(self, rows: List[List[Union[str, int]]], headers: List[st...
    method run (line 142) | def run(self):
  class DeleteObjCommand (line 160) | class DeleteObjCommand(BaseUserCommand):
    method run (line 161) | def run(self):
  class UploadCommand (line 175) | class UploadCommand(BaseUserCommand):
    method walk_dir (line 176) | def walk_dir(self, rel_path):
    method run (line 187) | def run(self):

FILE: code/nezha-base-count5/pretrain/transformers1/configuration_albert.py
  class AlbertConfig (line 33) | class AlbertConfig(PretrainedConfig):
    method __init__ (line 104) | def __init__(

FILE: code/nezha-base-count5/pretrain/transformers1/configuration_auto.py
  class AutoConfig (line 98) | class AutoConfig:
    method __init__ (line 109) | def __init__(self):
    method for_model (line 116) | def for_model(cls, model_type: str, *args, **kwargs):
    method from_pretrained (line 127) | def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):

FILE: code/nezha-base-count5/pretrain/transformers1/configuration_bart.py
  class BartConfig (line 34) | class BartConfig(PretrainedConfig):
    method __init__ (line 40) | def __init__(
    method num_attention_heads (line 121) | def num_attention_heads(self) -> int:
    method hidden_size (line 125) | def hidden_size(self) -> int:
    method is_valid_mbart (line 128) | def is_valid_mbart(self) -> bool:

FILE: code/nezha-base-count5/pretrain/transformers1/configuration_bert.py
  class BertConfig (line 53) | class BertConfig(PretrainedConfig):
    method __init__ (line 109) | def __init__(

FILE: code/nezha-base-count5/pretrain/transformers1/configuration_camembert.py
  class CamembertConfig (line 33) | class CamembertConfig(RobertaConfig):

FILE: code/nezha-base-count5/pretrain/transformers1/configuration_ctrl.py
  class CTRLConfig (line 28) | class CTRLConfig(PretrainedConfig):
    method __init__ (line 83) | def __init__(
    method max_position_embeddings (line 125) | def max_position_embeddings(self):
    method hidden_size (line 129) | def hidden_size(self):
    method num_attention_heads (line 133) | def num_attention_heads(self):
    method num_hidden_layers (line 137) | def num_hidden_layers(self):

FILE: code/nezha-base-count5/pretrain/transformers1/configuration_distilbert.py
  class DistilBertConfig (line 36) | class DistilBertConfig(PretrainedConfig):
    method __init__ (line 96) | def __init__(
    method hidden_size (line 130) | def hidden_size(self):
    method num_attention_heads (line 134) | def num_attention_heads(self):
    method num_hidden_layers (line 138) | def num_hidden_layers(self):

FILE: code/nezha-base-count5/pretrain/transformers1/configuration_electra.py
  class ElectraConfig (line 36) | class ElectraConfig(PretrainedConfig):
    method __init__ (line 95) | def __init__(

FILE: code/nezha-base-count5/pretrain/transformers1/configuration_encoder_decoder.py
  class EncoderDecoderConfig (line 26) | class EncoderDecoderConfig(PretrainedConfig):
    method __init__ (line 62) | def __init__(self, **kwargs):
    method from_encoder_decoder_configs (line 79) | def from_encoder_decoder_configs(
    method to_dict (line 90) | def to_dict(self):

FILE: code/nezha-base-count5/pretrain/transformers1/configuration_flaubert.py
  class FlaubertConfig (line 33) | class FlaubertConfig(XLMConfig):
    method __init__ (line 147) | def __init__(self, layerdrop=0.0, pre_norm=False, pad_token_id=2, bos_...

FILE: code/nezha-base-count5/pretrain/transformers1/configuration_gpt2.py
  class GPT2Config (line 35) | class GPT2Config(PretrainedConfig):
    method __init__ (line 117) | def __init__(
    method max_position_embeddings (line 164) | def max_position_embeddings(self):
    method hidden_size (line 168) | def hidden_size(self):
    method num_attention_heads (line 172) | def num_attention_heads(self):
    method num_hidden_layers (line 176) | def num_hidden_layers(self):

FILE: code/nezha-base-count5/pretrain/transformers1/configuration_longformer.py
  class LongformerConfig (line 34) | class LongformerConfig(RobertaConfig):
    method __init__ (line 65) | def __init__(self, attention_window: Union[List[int], int] = 512, sep_...

FILE: code/nezha-base-count5/pretrain/transformers1/configuration_marian.py
  class MarianConfig (line 25) | class MarianConfig(BartConfig):

FILE: code/nezha-base-count5/pretrain/transformers1/configuration_mmbt.py
  class MMBTConfig (line 25) | class MMBTConfig(object):
    method __init__ (line 38) | def __init__(self, config, num_labels=None, modal_hidden_size=2048):

FILE: code/nezha-base-count5/pretrain/transformers1/configuration_openai.py
  class OpenAIGPTConfig (line 31) | class OpenAIGPTConfig(PretrainedConfig):
    method __init__ (line 115) | def __init__(
    method max_position_embeddings (line 159) | def max_position_embeddings(self):
    method hidden_size (line 163) | def hidden_size(self):
    method num_attention_heads (line 167) | def num_attention_heads(self):
    method num_hidden_layers (line 171) | def num_hidden_layers(self):

FILE: code/nezha-base-count5/pretrain/transformers1/configuration_reformer.py
  class ReformerConfig (line 32) | class ReformerConfig(PretrainedConfig):
    method __init__ (line 141) | def __init__(

FILE: code/nezha-base-count5/pretrain/transformers1/configuration_roberta.py
  class RobertaConfig (line 36) | class RobertaConfig(BertConfig):
    method __init__ (line 65) | def __init__(self, pad_token_id=1, bos_token_id=0, eos_token_id=2, **k...

FILE: code/nezha-base-count5/pretrain/transformers1/configuration_t5.py
  class T5Config (line 34) | class T5Config(PretrainedConfig):
    method __init__ (line 64) | def __init__(
    method max_position_embeddings (line 98) | def max_position_embeddings(self):
    method hidden_size (line 102) | def hidden_size(self):
    method num_attention_heads (line 106) | def num_attention_heads(self):
    method num_hidden_layers (line 110) | def num_hidden_layers(self):

FILE: code/nezha-base-count5/pretrain/transformers1/configuration_transfo_xl.py
  class TransfoXLConfig (line 31) | class TransfoXLConfig(PretrainedConfig):
    method __init__ (line 117) | def __init__(
    method max_position_embeddings (line 186) | def max_position_embeddings(self):
    method n_token (line 190) | def n_token(self):  # Backward compatibility
    method n_token (line 194) | def n_token(self, value):  # Backward compatibility
    method hidden_size (line 198) | def hidden_size(self):
    method num_attention_heads (line 202) | def num_attention_heads(self):
    method num_hidden_layers (line 206) | def num_hidden_layers(self):

FILE: code/nezha-base-count5/pretrain/transformers1/configuration_utils.py
  class PretrainedConfig (line 31) | class PretrainedConfig(object):
    method __init__ (line 56) | def __init__(self, **kwargs):
    method num_labels (line 118) | def num_labels(self):
    method num_labels (line 122) | def num_labels(self, num_labels):
    method save_pretrained (line 126) | def save_pretrained(self, save_directory):
    method from_pretrained (line 146) | def from_pretrained(cls, pretrained_model_name_or_path, **kwargs) -> "...
    method get_config_dict (line 205) | def get_config_dict(cls, pretrained_model_name_or_path: str, **kwargs)...
    method from_dict (line 270) | def from_dict(cls, config_dict: Dict, **kwargs) -> "PretrainedConfig":
    method from_json_file (line 308) | def from_json_file(cls, json_file: str) -> "PretrainedConfig":
    method _dict_from_json_file (line 324) | def _dict_from_json_file(cls, json_file: str):
    method __eq__ (line 329) | def __eq__(self, other):
    method __repr__ (line 332) | def __repr__(self):
    method to_diff_dict (line 335) | def to_diff_dict(self):
    method to_dict (line 358) | def to_dict(self):
    method to_json_string (line 370) | def to_json_string(self, use_diff=True):
    method to_json_file (line 387) | def to_json_file(self, json_file_path, use_diff=True):
    method update (line 400) | def update(self, config_dict: Dict):

FILE: code/nezha-base-count5/pretrain/transformers1/configuration_xlm.py
  class XLMConfig (line 39) | class XLMConfig(PretrainedConfig):
    method __init__ (line 159) | def __init__(
    method n_words (line 235) | def n_words(self):  # For backward compatibility
    method n_words (line 239) | def n_words(self, value):  # For backward compatibility
    method hidden_size (line 243) | def hidden_size(self):
    method num_attention_heads (line 247) | def num_attention_heads(self):
    method num_hidden_layers (line 251) | def num_hidden_layers(self):

FILE: code/nezha-base-count5/pretrain/transformers1/configuration_xlm_roberta.py
  class XLMRobertaConfig (line 36) | class XLMRobertaConfig(RobertaConfig):

FILE: code/nezha-base-count5/pretrain/transformers1/configuration_xlnet.py
  class XLNetConfig (line 32) | class XLNetConfig(PretrainedConfig):
    method __init__ (line 129) | def __init__(
    method max_position_embeddings (line 194) | def max_position_embeddings(self):
    method n_token (line 198) | def n_token(self):  # Backward compatibility
    method n_token (line 202) | def n_token(self, value):  # Backward compatibility
    method hidden_size (line 206) | def hidden_size(self):
    method num_attention_heads (line 210) | def num_attention_heads(self):
    method num_hidden_layers (line 214) | def num_hidden_layers(self):

FILE: code/nezha-base-count5/pretrain/transformers1/convert_albert_original_tf_checkpoint_to_pytorch.py
  function convert_tf_checkpoint_to_pytorch (line 29) | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, albert_config_f...

FILE: code/nezha-base-count5/pretrain/transformers1/convert_bart_original_pytorch_checkpoint_to_pytorch.py
  function remove_ignore_keys_ (line 56) | def remove_ignore_keys_(state_dict):
  function rename_key (line 68) | def rename_key(dct, old, new):
  function load_xsum_checkpoint (line 73) | def load_xsum_checkpoint(checkpoint_path):
  function convert_checkpoint_from_disk (line 81) | def convert_checkpoint_from_disk(checkpoint_path, **config_kwargs):
  function convert_bart_checkpoint (line 95) | def convert_bart_checkpoint(checkpoint_path, pytorch_dump_folder_path, h...

FILE: code/nezha-base-count5/pretrain/transformers1/convert_bert_original_tf_checkpoint_to_pytorch.py
  function convert_tf_checkpoint_to_pytorch (line 29) | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_fil...

FILE: code/nezha-base-count5/pretrain/transformers1/convert_bert_pytorch_checkpoint_to_original_tf.py
  function convert_pytorch_checkpoint_to_tf (line 28) | def convert_pytorch_checkpoint_to_tf(model: BertModel, ckpt_dir: str, mo...
  function main (line 92) | def main(raw_args=None):

FILE: code/nezha-base-count5/pretrain/transformers1/convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py
  function convert_dialogpt_checkpoint (line 15) | def convert_dialogpt_checkpoint(checkpoint_path: str, pytorch_dump_folde...

FILE: code/nezha-base-count5/pretrain/transformers1/convert_electra_original_tf_checkpoint_to_pytorch.py
  function convert_tf_checkpoint_to_pytorch (line 29) | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, py...

FILE: code/nezha-base-count5/pretrain/transformers1/convert_gpt2_original_tf_checkpoint_to_pytorch.py
  function convert_gpt2_checkpoint_to_pytorch (line 29) | def convert_gpt2_checkpoint_to_pytorch(gpt2_checkpoint_path, gpt2_config...

FILE: code/nezha-base-count5/pretrain/transformers1/convert_graph_to_onnx.py
  class OnnxConverterArgumentParser (line 11) | class OnnxConverterArgumentParser(ArgumentParser):
    method __init__ (line 16) | def __init__(self):
  function ensure_valid_input (line 28) | def ensure_valid_input(model, tokens, input_names):
  function infer_shapes (line 53) | def infer_shapes(nlp: Pipeline, framework: str) -> Tuple[List[str], List...
  function load_graph_from_args (line 100) | def load_graph_from_args(framework: str, model: str, tokenizer: Optional...
  function convert_pytorch (line 111) | def convert_pytorch(nlp: Pipeline, opset: int, output: str, use_external...
  function convert_tensorflow (line 138) | def convert_tensorflow(nlp: Pipeline, opset: int, output: str):
  function convert (line 166) | def convert(
  function verify (line 193) | def verify(path: str):

FILE: code/nezha-base-count5/pretrain/transformers1/convert_longformer_original_pytorch_lightning_to_pytorch.py
  class LightningModel (line 26) | class LightningModel(pl.LightningModule):
    method __init__ (line 27) | def __init__(self, model):
    method forward (line 34) | def forward(self):
  function convert_longformer_qa_checkpoint_to_pytorch (line 38) | def convert_longformer_qa_checkpoint_to_pytorch(

FILE: code/nezha-base-count5/pretrain/transformers1/convert_marian_to_pytorch.py
  function remove_prefix (line 18) | def remove_prefix(text: str, prefix: str):
  function convert_encoder_layer (line 24) | def convert_encoder_layer(opus_dict, layer_prefix: str, converter: dict):
  function load_layers_ (line 35) | def load_layers_(layer_lst: torch.nn.ModuleList, opus_state: dict, conve...
  function find_pretrained_model (line 42) | def find_pretrained_model(src_lang: str, tgt_lang: str) -> List[str]:
  function add_emb_entries (line 55) | def add_emb_entries(wemb, final_bias, n_special_tokens=1):
  function _cast_yaml_str (line 64) | def _cast_yaml_str(v):
  function cast_marian_config (line 76) | def cast_marian_config(raw_cfg: Dict[str, str]) -> Dict:
  function load_config_from_state_dict (line 83) | def load_config_from_state_dict(opus_dict):
  function find_model_file (line 91) | def find_model_file(dest_dir):  # this one better
  function convert_opus_name_to_hf_name (line 136) | def convert_opus_name_to_hf_name(x):
  function convert_hf_name_to_opus_name (line 142) | def convert_hf_name_to_opus_name(hf_model_name):
  function write_model_card (line 152) | def write_model_card(
  function get_clean_model_id_mapping (line 185) | def get_clean_model_id_mapping(multiling_model_ids):
  function make_registry (line 189) | def make_registry(repo_path="Opus-MT-train/models"):
  function convert_all_sentencepiece_models (line 206) | def convert_all_sentencepiece_models(model_list=None, repo_path=None):
  function lmap (line 222) | def lmap(f, x) -> List:
  function fetch_test_set (line 226) | def fetch_test_set(test_set_url):
  function convert_whole_dir (line 239) | def convert_whole_dir(path=Path("marian_ckpt/")):
  function _parse_readme (line 247) | def _parse_readme(lns):
  function save_tokenizer_config (line 270) | def save_tokenizer_config(dest_dir: Path):
  function add_to_vocab_ (line 276) | def add_to_vocab_(vocab: Dict[str, int], special_tokens: List[str]):
  function find_vocab_file (line 287) | def find_vocab_file(model_dir):
  function add_special_tokens_to_vocab (line 291) | def add_special_tokens_to_vocab(model_dir: Path) -> None:
  function save_tokenizer (line 300) | def save_tokenizer(self, save_directory):
  function check_equal (line 309) | def check_equal(marian_cfg, k1, k2):
  function check_marian_cfg_assumptions (line 314) | def check_marian_cfg_assumptions(marian_cfg):
  class OpusState (line 371) | class OpusState:
    method __init__ (line 372) | def __init__(self, source_dir):
    method _check_layer_entries (line 420) | def _check_layer_entries(self):
    method extra_keys (line 432) | def extra_keys(self):
    method sub_keys (line 445) | def sub_keys(self, layer_prefix):
    method load_marian_model (line 448) | def load_marian_model(self) -> MarianMTModel:
  function download_and_unzip (line 483) | def download_and_unzip(url, dest_dir):
  function convert (line 494) | def convert(source_dir: Path, dest_dir):
  function load_yaml (line 525) | def load_yaml(path):
  function save_json (line 532) | def save_json(content: Union[Dict, List], path: str) -> None:
  function unzip (line 537) | def unzip(zip_path: str, dest_dir: str) -> None:

FILE: code/nezha-base-count5/pretrain/transformers1/convert_openai_original_tf_checkpoint_to_pytorch.py
  function convert_openai_checkpoint_to_pytorch (line 29) | def convert_openai_checkpoint_to_pytorch(openai_checkpoint_folder_path, ...

FILE: code/nezha-base-count5/pretrain/transformers1/convert_pytorch_checkpoint_to_tf2.py
  function convert_pt_checkpoint_to_tf (line 187) | def convert_pt_checkpoint_to_tf(
  function convert_all_pt_checkpoints_to_tf (line 233) | def convert_all_pt_checkpoints_to_tf(

FILE: code/nezha-base-count5/pretrain/transformers1/convert_reformer_trax_checkpoint_to_pytorch.py
  function set_param (line 31) | def set_param(torch_layer, weight, bias=None):
  function set_layer_weights_in_torch_lsh (line 40) | def set_layer_weights_in_torch_lsh(weights, torch_layer, hidden_size):
  function set_layer_weights_in_torch_local (line 58) | def set_layer_weights_in_torch_local(weights, torch_layer, hidden_size):
  function set_block_weights_in_torch (line 79) | def set_block_weights_in_torch(weights, torch_block, hidden_size):
  function set_model_weights_in_torch (line 128) | def set_model_weights_in_torch(weights, torch_model, hidden_size):
  function convert_trax_checkpoint_to_pytorch (line 174) | def convert_trax_checkpoint_to_pytorch(trax_model_pkl_path, config_file,...

FILE: code/nezha-base-count5/pretrain/transformers1/convert_roberta_original_pytorch_checkpoint_to_pytorch.py
  function convert_roberta_checkpoint_to_pytorch (line 42) | def convert_roberta_checkpoint_to_pytorch(

FILE: code/nezha-base-count5/pretrain/transformers1/convert_t5_original_tf_checkpoint_to_pytorch.py
  function convert_tf_checkpoint_to_pytorch (line 29) | def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, py...

FILE: code/nezha-base-count5/pretrain/transformers1/convert_transfo_xl_original_tf_checkpoint_to_pytorch.py
  function convert_transfo_xl_checkpoint_to_pytorch (line 47) | def convert_transfo_xl_checkpoint_to_pytorch(

FILE: code/nezha-base-count5/pretrain/transformers1/convert_xlm_original_pytorch_checkpoint_to_pytorch.py
  function convert_xlm_checkpoint_to_pytorch (line 32) | def convert_xlm_checkpoint_to_pytorch(xlm_checkpoint_path, pytorch_dump_...

FILE: code/nezha-base-count5/pretrain/transformers1/convert_xlnet_original_tf_checkpoint_to_pytorch.py
  function convert_xlnet_checkpoint_to_pytorch (line 51) | def convert_xlnet_checkpoint_to_pytorch(

FILE: code/nezha-base-count5/pretrain/transformers1/data/data_collator.py
  class DataCollator (line 12) | class DataCollator(ABC):
    method collate_batch (line 19) | def collate_batch(self) -> Dict[str, torch.Tensor]:
  class DefaultDataCollator (line 33) | class DefaultDataCollator(DataCollator):
    method collate_batch (line 46) | def collate_batch(self, features: List[InputDataClass]) -> Dict[str, t...
  class DataCollatorForLanguageModeling (line 80) | class DataCollatorForLanguageModeling(DataCollator):
    method collate_batch (line 91) | def collate_batch(self, examples: List[torch.Tensor]) -> Dict[str, tor...
    method _tensorize_batch (line 99) | def _tensorize_batch(self, examples: List[torch.Tensor]) -> torch.Tensor:
    method mask_tokens (line 112) | def mask_tokens(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, tor...
    method mask_tokens2 (line 148) | def mask_tokens2(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, to...
    method mask_tokens3 (line 192) | def mask_tokens3(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, to...
    method mask_tokens4 (line 259) | def mask_tokens4(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, to...
    method mask_tokens5 (line 342) | def mask_tokens5(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, to...
    method mask_tokens6 (line 427) | def mask_tokens6(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, to...
    method mask_tokens7 (line 507) | def mask_tokens7(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, to...

FILE: code/nezha-base-count5/pretrain/transformers1/data/datasets/glue.py
  class GlueDataTrainingArguments (line 23) | class GlueDataTrainingArguments:
    method __post_init__ (line 47) | def __post_init__(self):
  class Split (line 51) | class Split(Enum):
  class GlueDataset (line 57) | class GlueDataset(Dataset):
    method __init__ (line 67) | def __init__(
    method __len__ (line 135) | def __len__(self):
    method __getitem__ (line 138) | def __getitem__(self, i) -> InputFeatures:
    method get_labels (line 141) | def get_labels(self):

FILE: code/nezha-base-count5/pretrain/transformers1/data/datasets/language_modeling.py
  class TextDataset (line 16) | class TextDataset(Dataset):
    method __init__ (line 22) | def __init__(
    method __len__ (line 71) | def __len__(self):
    method __getitem__ (line 74) | def __getitem__(self, i) -> torch.Tensor:
  class LineByLineTextDataset (line 78) | class LineByLineTextDataset(Dataset):
    method __init__ (line 84) | def __init__(self, tokenizer: PreTrainedTokenizer, file_path: str, blo...
    method __len__ (line 97) | def __len__(self):
    method __getitem__ (line 100) | def __getitem__(self, i) -> torch.Tensor:

FILE: code/nezha-base-count5/pretrain/transformers1/data/metrics/__init__.py
  function is_sklearn_available (line 26) | def is_sklearn_available():
  function simple_accuracy (line 32) | def simple_accuracy(preds, labels):
  function acc_and_f1 (line 35) | def acc_and_f1(preds, labels):
  function pearson_and_spearman (line 44) | def pearson_and_spearman(preds, labels):
  function glue_compute_metrics (line 53) | def glue_compute_metrics(task_name, preds, labels):
  function xnli_compute_metrics (line 80) | def xnli_compute_metrics(task_name, preds, labels):

FILE: code/nezha-base-count5/pretrain/transformers1/data/metrics/squad_metrics.py
  function normalize_answer (line 24) | def normalize_answer(s):
  function get_tokens (line 44) | def get_tokens(s):
  function compute_exact (line 50) | def compute_exact(a_gold, a_pred):
  function compute_f1 (line 54) | def compute_f1(a_gold, a_pred):
  function get_raw_scores (line 70) | def get_raw_scores(examples, preds):
  function apply_no_ans_threshold (line 96) | def apply_no_ans_threshold(scores, na_probs, qid_to_has_ans, na_prob_thr...
  function make_eval_dict (line 107) | def make_eval_dict(exact_scores, f1_scores, qid_list=None):
  function merge_eval (line 128) | def merge_eval(main_eval, new_eval, prefix):
  function find_best_thresh_v2 (line 133) | def find_best_thresh_v2(preds, scores, na_probs, qid_to_has_ans):
  function find_all_best_thresh_v2 (line 167) | def find_all_best_thresh_v2(main_eval, preds, exact_raw, f1_raw, na_prob...
  function find_best_thresh (line 178) | def find_best_thresh(preds, scores, na_probs, qid_to_has_ans):
  function find_all_best_thresh (line 201) | def find_all_best_thresh(main_eval, preds, exact_raw, f1_raw, na_probs, ...
  function squad_evaluate (line 211) | def squad_evaluate(examples, preds, no_answer_probs=None, no_answer_prob...
  function get_final_text (line 242) | def get_final_text(pred_text, orig_text, do_lower_case, verbose_logging=...
  function _get_best_indexes (line 336) | def _get_best_indexes(logits, n_best_size):
  function _compute_softmax (line 348) | def _compute_softmax(scores):
  function compute_predictions_logits (line 371) | def compute_predictions_logits(
  function compute_predictions_log_probs (line 576) | def compute_predictions_log_probs(

FILE: code/nezha-base-count5/pretrain/transformers1/data/processors/glue.py
  function glue_convert_examples_to_features (line 34) | def glue_convert_examples_to_features(
  function _tf_glue_convert_examples_to_features (line 70) | def _tf_glue_convert_examples_to_features(
  function _glue_convert_examples_to_features (line 107) | def _glue_convert_examples_to_features(
  class OutputMode (line 159) | class OutputMode(Enum):
  class MrpcProcessor (line 164) | class MrpcProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 167) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 176) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 181) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 185) | def get_test_examples(self, data_dir):
    method get_labels (line 189) | def get_labels(self):
    method _create_examples (line 193) | def _create_examples(self, lines, set_type):
  class MnliProcessor (line 207) | class MnliProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 210) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 219) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 223) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 227) | def get_test_examples(self, data_dir):
    method get_labels (line 231) | def get_labels(self):
    method _create_examples (line 235) | def _create_examples(self, lines, set_type):
  class MnliMismatchedProcessor (line 249) | class MnliMismatchedProcessor(MnliProcessor):
    method get_dev_examples (line 252) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 256) | def get_test_examples(self, data_dir):
  class ColaProcessor (line 261) | class ColaProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 264) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 273) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 277) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 281) | def get_test_examples(self, data_dir):
    method get_labels (line 285) | def get_labels(self):
    method _create_examples (line 289) | def _create_examples(self, lines, set_type):
  class Sst2Processor (line 304) | class Sst2Processor(DataProcessor):
    method get_example_from_tensor_dict (line 307) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 316) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 320) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 324) | def get_test_examples(self, data_dir):
    method get_labels (line 328) | def get_labels(self):
    method _create_examples (line 332) | def _create_examples(self, lines, set_type):
  class StsbProcessor (line 346) | class StsbProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 349) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 358) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 362) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 366) | def get_test_examples(self, data_dir):
    method get_labels (line 370) | def get_labels(self):
    method _create_examples (line 374) | def _create_examples(self, lines, set_type):
  class QqpProcessor (line 388) | class QqpProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 391) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 400) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 404) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 408) | def get_test_examples(self, data_dir):
    method get_labels (line 412) | def get_labels(self):
    method _create_examples (line 416) | def _create_examples(self, lines, set_type):
  class QnliProcessor (line 436) | class QnliProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 439) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 448) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 452) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 456) | def get_test_examples(self, data_dir):
    method get_labels (line 460) | def get_labels(self):
    method _create_examples (line 464) | def _create_examples(self, lines, set_type):
  class RteProcessor (line 478) | class RteProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 481) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 490) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 494) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 498) | def get_test_examples(self, data_dir):
    method get_labels (line 502) | def get_labels(self):
    method _create_examples (line 506) | def _create_examples(self, lines, set_type):
  class WnliProcessor (line 520) | class WnliProcessor(DataProcessor):
    method get_example_from_tensor_dict (line 523) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 532) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 536) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 540) | def get_test_examples(self, data_dir):
    method get_labels (line 544) | def get_labels(self):
    method _create_examples (line 548) | def _create_examples(self, lines, set_type):

FILE: code/nezha-base-count5/pretrain/transformers1/data/processors/squad.py
  function _improve_answer_span (line 25) | def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer, ...
  function _check_is_max_context (line 38) | def _check_is_max_context(doc_spans, cur_span_index, position):
  function _new_check_is_max_context (line 58) | def _new_check_is_max_context(doc_spans, cur_span_index, position):
  function _is_whitespace (line 80) | def _is_whitespace(c):
  function squad_convert_example_to_features (line 86) | def squad_convert_example_to_features(example, max_seq_length, doc_strid...
  function squad_convert_example_to_features_init (line 264) | def squad_convert_example_to_features_init(tokenizer_for_convert):
  function squad_convert_examples_to_features (line 269) | def squad_convert_examples_to_features(
  class SquadProcessor (line 445) | class SquadProcessor(DataProcessor):
    method _get_example_from_tensor_dict (line 454) | def _get_example_from_tensor_dict(self, tensor_dict, evaluate=False):
    method get_examples_from_dataset (line 478) | def get_examples_from_dataset(self, dataset, evaluate=False):
    method get_train_examples (line 509) | def get_train_examples(self, data_dir, filename=None):
    method get_dev_examples (line 531) | def get_dev_examples(self, data_dir, filename=None):
    method _create_examples (line 552) | def _create_examples(self, input_data, set_type):
  class SquadV1Processor (line 594) | class SquadV1Processor(SquadProcessor):
  class SquadV2Processor (line 599) | class SquadV2Processor(SquadProcessor):
  class SquadExample (line 604) | class SquadExample(object):
    method __init__ (line 619) | def __init__(
  class SquadFeatures (line 667) | class SquadFeatures(object):
    method __init__ (line 692) | def __init__(
  class SquadResult (line 729) | class SquadResult(object):
    method __init__ (line 739) | def __init__(self, unique_id, start_logits, end_logits, start_top_inde...

FILE: code/nezha-base-count5/pretrain/transformers1/data/processors/utils.py
  class InputExample (line 31) | class InputExample:
    method to_json_string (line 50) | def to_json_string(self):
  class InputFeatures (line 56) | class InputFeatures:
    method to_json_string (line 77) | def to_json_string(self):
  class DataProcessor (line 82) | class DataProcessor:
    method get_example_from_tensor_dict (line 85) | def get_example_from_tensor_dict(self, tensor_dict):
    method get_train_examples (line 93) | def get_train_examples(self, data_dir):
    method get_dev_examples (line 97) | def get_dev_examples(self, data_dir):
    method get_test_examples (line 101) | def get_test_examples(self, data_dir):
    method get_labels (line 105) | def get_labels(self):
    method tfds_map (line 109) | def tfds_map(self, example):
    method _read_tsv (line 117) | def _read_tsv(cls, input_file, quotechar=None):
  class SingleSentenceClassificationProcessor (line 123) | class SingleSentenceClassificationProcessor(DataProcessor):
    method __init__ (line 126) | def __init__(self, labels=None, examples=None, mode="classification", ...
    method __len__ (line 132) | def __len__(self):
    method __getitem__ (line 135) | def __getitem__(self, idx):
    method create_from_csv (line 141) | def create_from_csv(
    method create_from_examples (line 158) | def create_from_examples(cls, texts_or_text_and_labels, labels=None, *...
    method add_examples_from_csv (line 163) | def add_examples_from_csv(
    method add_examples (line 193) | def add_examples(
    method get_features (line 226) | def get_features(

FILE: code/nezha-base-count5/pretrain/transformers1/data/processors/xnli.py
  class XnliProcessor (line 28) | class XnliProcessor(DataProcessor):
    method __init__ (line 32) | def __init__(self, language, train_language=None):
    method get_train_examples (line 36) | def get_train_examples(self, data_dir):
    method get_test_examples (line 52) | def get_test_examples(self, data_dir):
    method get_labels (line 70) | def get_labels(self):

FILE: code/nezha-base-count5/pretrain/transformers1/file_utils.py
  function is_torch_available (line 93) | def is_torch_available():
  function is_tf_available (line 97) | def is_tf_available():
  function add_start_docstrings (line 101) | def add_start_docstrings(*docstr):
  function add_start_docstrings_to_callable (line 109) | def add_start_docstrings_to_callable(*docstr):
  function add_end_docstrings (line 127) | def add_end_docstrings(*docstr):
  function is_remote_url (line 135) | def is_remote_url(url_or_filename):
  function hf_bucket_url (line 140) | def hf_bucket_url(model_id: str, filename: str, use_cdn=True) -> str:
  function url_to_filename (line 164) | def url_to_filename(url, etag=None):
  function filename_to_url (line 188) | def filename_to_url(filename, cache_dir=None):
  function cached_path (line 214) | def cached_path(
  function http_get (line 306) | def http_get(url, temp_file, proxies=None, resume_size=0, user_agent=None):
  function get_from_cache (line 339) | def get_from_cache(
  class cached_property (line 453) | class cached_property(property):
    method __get__ (line 462) | def __get__(self, obj, objtype=None):
  function torch_required (line 476) | def torch_required(func):
  function tf_required (line 488) | def tf_required(func):

FILE: code/nezha-base-count5/pretrain/transformers1/hf_api.py
  class S3Obj (line 29) | class S3Obj:
    method __init__ (line 34) | def __init__(self, filename: str, LastModified: str, ETag: str, Size: ...
  class PresignedUrl (line 41) | class PresignedUrl:
    method __init__ (line 42) | def __init__(self, write: str, access: str, type: str, **kwargs):
  class S3Object (line 48) | class S3Object:
    method __init__ (line 53) | def __init__(
  class ModelInfo (line 69) | class ModelInfo:
    method __init__ (line 74) | def __init__(
  class HfApi (line 92) | class HfApi:
    method __init__ (line 93) | def __init__(self, endpoint=None):
    method login (line 96) | def login(self, username: str, password: str) -> str:
    method whoami (line 112) | def whoami(self, token: str) -> Tuple[str, List[str]]:
    method logout (line 122) | def logout(self, token: str) -> None:
    method presign (line 130) | def presign(self, token: str, filename: str, organization: Optional[st...
    method presign_and_upload (line 144) | def presign_and_upload(self, token: str, filename: str, filepath: str,...
    method list_objs (line 166) | def list_objs(self, token: str, organization: Optional[str] = None) ->...
    method delete_obj (line 177) | def delete_obj(self, token: str, filename: str, organization: Optional...
    method model_list (line 189) | def model_list(self) -> List[ModelInfo]:
  class TqdmProgressFileReader (line 200) | class TqdmProgressFileReader:
    method __init__ (line 209) | def __init__(self, f: io.BufferedReader):
    method _read (line 216) | def _read(self, n=-1):
    method close (line 220) | def close(self):
  class HfFolder (line 224) | class HfFolder:
    method save_token (line 228) | def save_token(cls, token):
    method get_token (line 237) | def get_token(cls):
    method delete_token (line 248) | def delete_token(cls):

FILE: code/nezha-base-count5/pretrain/transformers1/hf_argparser.py
  class HfArgumentParser (line 14) | class HfArgumentParser(ArgumentParser):
    method __init__ (line 26) | def __init__(self, dataclass_types: Union[DataClassType, Iterable[Data...
    method _add_dataclass_arguments (line 42) | def _add_dataclass_arguments(self, dtype: DataClassType):
    method parse_args_into_dataclasses (line 88) | def parse_args_into_dataclasses(
    method parse_json_file (line 146) | def parse_json_file(self, json_file: str) -> Tuple[DataClass, ...]:

FILE: code/nezha-base-count5/pretrain/transformers1/modelcard.py
  class ModelCard (line 38) | class ModelCard:
    method __init__ (line 55) | def __init__(self, **kwargs):
    method save_pretrained (line 75) | def save_pretrained(self, save_directory_or_file):
    method from_pretrained (line 88) | def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
    method from_dict (line 186) | def from_dict(cls, json_object):
    method from_json_file (line 191) | def from_json_file(cls, json_file):
    method __eq__ (line 198) | def __eq__(self, other):
    method __repr__ (line 201) | def __repr__(self):
    method to_dict (line 204) | def to_dict(self):
    method to_json_string (line 209) | def to_json_string(self):
    method to_json_file (line 213) | def to_json_file(self, json_file_path):

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_albert.py
  function load_tf_weights_in_albert (line 47) | def load_tf_weights_in_albert(model, config, tf_checkpoint_path):
  class AlbertEmbeddings (line 171) | class AlbertEmbeddings(BertEmbeddings):
    method __init__ (line 176) | def __init__(self, config):
  class AlbertAttention (line 185) | class AlbertAttention(BertSelfAttention):
    method __init__ (line 186) | def __init__(self, config):
    method prune_heads (line 198) | def prune_heads(self, heads):
    method forward (line 221) | def forward(self, input_ids, attention_mask=None, head_mask=None):
  class AlbertLayer (line 266) | class AlbertLayer(nn.Module):
    method __init__ (line 267) | def __init__(self, config):
    method forward (line 277) | def forward(self, hidden_states, attention_mask=None, head_mask=None):
  class AlbertLayerGroup (line 287) | class AlbertLayerGroup(nn.Module):
    method __init__ (line 288) | def __init__(self, config):
    method forward (line 295) | def forward(self, hidden_states, attention_mask=None, head_mask=None):
  class AlbertTransformer (line 317) | class AlbertTransformer(nn.Module):
    method __init__ (line 318) | def __init__(self, config):
    method forward (line 327) | def forward(self, hidden_states, attention_mask=None, head_mask=None):
  class AlbertPreTrainedModel (line 363) | class AlbertPreTrainedModel(PreTrainedModel):
    method _init_weights (line 371) | def _init_weights(self, module):
  class AlbertModel (line 439) | class AlbertModel(AlbertPreTrainedModel):
    method __init__ (line 445) | def __init__(self, config):
    method get_input_embeddings (line 456) | def get_input_embeddings(self):
    method set_input_embeddings (line 459) | def set_input_embeddings(self, value):
    method _resize_token_embeddings (line 462) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 468) | def _prune_heads(self, heads_to_prune):
    method forward (line 487) | def forward(
  class AlbertForPreTraining (line 576) | class AlbertForPreTraining(AlbertPreTrainedModel):
    method __init__ (line 577) | def __init__(self, config):
    method tie_weights (line 587) | def tie_weights(self):
    method get_output_embeddings (line 590) | def get_output_embeddings(self):
    method forward (line 594) | def forward(
  class AlbertMLMHead (line 680) | class AlbertMLMHead(nn.Module):
    method __init__ (line 681) | def __init__(self, config):
    method forward (line 693) | def forward(self, hidden_states):
  class AlbertSOPHead (line 704) | class AlbertSOPHead(nn.Module):
    method __init__ (line 705) | def __init__(self, config):
    method forward (line 711) | def forward(self, pooled_output):
  class AlbertForMaskedLM (line 720) | class AlbertForMaskedLM(AlbertPreTrainedModel):
    method __init__ (line 721) | def __init__(self, config):
    method tie_weights (line 730) | def tie_weights(self):
    method get_output_embeddings (line 733) | def get_output_embeddings(self):
    method forward (line 737) | def forward(
  class AlbertForSequenceClassification (line 810) | class AlbertForSequenceClassification(AlbertPreTrainedModel):
    method __init__ (line 811) | def __init__(self, config):
    method forward (line 822) | def forward(
  class AlbertForTokenClassification (line 905) | class AlbertForTokenClassification(AlbertPreTrainedModel):
    method __init__ (line 906) | def __init__(self, config):
    method forward (line 917) | def forward(
  class AlbertForQuestionAnswering (line 1002) | class AlbertForQuestionAnswering(AlbertPreTrainedModel):
    method __init__ (line 1003) | def __init__(self, config):
    method forward (line 1013) | def forward(

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_auto.py
  class AutoModel (line 269) | class AutoModel:
    method __init__ (line 279) | def __init__(self):
    method from_config (line 287) | def from_config(cls, config):
    method from_pretrained (line 329) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class AutoModelForPreTraining (line 424) | class AutoModelForPreTraining:
    method __init__ (line 433) | def __init__(self):
    method from_config (line 441) | def from_config(cls, config):
    method from_pretrained (line 483) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class AutoModelWithLMHead (line 570) | class AutoModelWithLMHead:
    method __init__ (line 580) | def __init__(self):
    method from_config (line 588) | def from_config(cls, config):
    method from_pretrained (line 630) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class AutoModelForSequenceClassification (line 718) | class AutoModelForSequenceClassification:
    method __init__ (line 728) | def __init__(self):
    method from_config (line 736) | def from_config(cls, config):
    method from_pretrained (line 778) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class AutoModelForQuestionAnswering (line 867) | class AutoModelForQuestionAnswering:
    method __init__ (line 877) | def __init__(self):
    method from_config (line 885) | def from_config(cls, config):
    method from_pretrained (line 924) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class AutoModelForTokenClassification (line 1009) | class AutoModelForTokenClassification:
    method __init__ (line 1019) | def __init__(self):
    method from_config (line 1027) | def from_config(cls, config):
    method from_pretrained (line 1069) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class AutoModelForMultipleChoice (line 1156) | class AutoModelForMultipleChoice:
    method __init__ (line 1166) | def __init__(self):
    method from_config (line 1174) | def from_config(cls, config):
    method from_pretrained (line 1189) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_bart.py
  function invert_mask (line 94) | def invert_mask(attention_mask):
  function _prepare_bart_decoder_inputs (line 99) | def _prepare_bart_decoder_inputs(
  class PretrainedBartModel (line 120) | class PretrainedBartModel(PreTrainedModel):
    method _init_weights (line 124) | def _init_weights(self, module):
    method dummy_inputs (line 138) | def dummy_inputs(self):
  function _make_linear_from_emb (line 148) | def _make_linear_from_emb(emb):
  function _check_shapes (line 156) | def _check_shapes(shape_1, shape2):
  function shift_tokens_right (line 161) | def shift_tokens_right(input_ids, pad_token_id):
  function make_padding_mask (line 170) | def make_padding_mask(input_ids, padding_idx=1):
  class EncoderLayer (line 181) | class EncoderLayer(nn.Module):
    method __init__ (line 182) | def __init__(self, config: BartConfig):
    method forward (line 198) | def forward(self, x, encoder_padding_mask):
  class BartEncoder (line 234) | class BartEncoder(nn.Module):
    method __init__ (line 243) | def __init__(self, config: BartConfig, embed_tokens):
    method forward (line 270) | def forward(
  class DecoderLayer (line 327) | class DecoderLayer(nn.Module):
    method __init__ (line 328) | def __init__(self, config: BartConfig):
    method forward (line 352) | def forward(
  class BartDecoder (line 416) | class BartDecoder(nn.Module):
    method __init__ (line 425) | def __init__(self, config: BartConfig, embed_tokens: nn.Embedding):
    method forward (line 449) | def forward(
  function _reorder_buffer (line 542) | def _reorder_buffer(attn_cache, new_order):
  class SelfAttention (line 549) | class SelfAttention(nn.Module):
    method __init__ (line 552) | def __init__(
    method _shape (line 575) | def _shape(self, tensor, dim_0, bsz):
    method forward (line 578) | def forward(
    method _use_saved_state (line 663) | def _use_saved_state(self, k, v, saved_state, key_padding_mask, static...
    method _cat_prev_key_padding_mask (line 691) | def _cat_prev_key_padding_mask(
  class BartClassificationHead (line 718) | class BartClassificationHead(nn.Module):
    method __init__ (line 723) | def __init__(
    method forward (line 731) | def forward(self, x):
  class LearnedPositionalEmbedding (line 740) | class LearnedPositionalEmbedding(nn.Embedding):
    method __init__ (line 748) | def __init__(
    method forward (line 757) | def forward(self, input, use_cache=False):
  function LayerNorm (line 767) | def LayerNorm(normalized_shape, eps=1e-5, elementwise_affine=True):
  function fill_with_neg_inf (line 778) | def fill_with_neg_inf(t):
  function _filter_out_falsey_values (line 783) | def _filter_out_falsey_values(tup) -> Tuple:
  function _get_shape (line 789) | def _get_shape(t):
  class BartModel (line 796) | class BartModel(PretrainedBartModel):
    method __init__ (line 797) | def __init__(self, config: BartConfig):
    method forward (line 811) | def forward(
    method get_input_embeddings (line 854) | def get_input_embeddings(self):
    method set_input_embeddings (line 857) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 862) | def get_output_embeddings(self):
  class BartForConditionalGeneration (line 870) | class BartForConditionalGeneration(PretrainedBartModel):
    method __init__ (line 873) | def __init__(self, config: BartConfig):
    method resize_token_embeddings (line 879) | def resize_token_embeddings(self, new_num_tokens: int) -> nn.Embedding:
    method _resize_final_logits_bias (line 886) | def _resize_final_logits_bias(self, new_num_tokens: int, old_num_token...
    method forward (line 895) | def forward(
    method prepare_inputs_for_generation (line 967) | def prepare_inputs_for_generation(self, decoder_input_ids, past, atten...
    method prepare_logits_for_generation (line 984) | def prepare_logits_for_generation(self, logits, cur_len, max_length):
    method _force_token_ids_generation (line 991) | def _force_token_ids_generation(self, scores, token_ids) -> None:
    method _reorder_cache (line 1004) | def _reorder_cache(past, beam_idx):
    method get_encoder (line 1020) | def get_encoder(self):
    method get_output_embeddings (line 1023) | def get_output_embeddings(self):
  class BartForSequenceClassification (line 1031) | class BartForSequenceClassification(PretrainedBartModel):
    method __init__ (line 1032) | def __init__(self, config: BartConfig, **kwargs):
    method forward (line 1042) | def forward(
  class SinusoidalPositionalEmbedding (line 1109) | class SinusoidalPositionalEmbedding(nn.Embedding):
    method __init__ (line 1112) | def __init__(self, num_positions, embedding_dim, padding_idx=None):
    method _init_weight (line 1119) | def _init_weight(out: nn.Parameter):
    method forward (line 1134) | def forward(self, input_ids, use_cache=False):

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_beam_search.py
  class TransformerBeamSearch (line 29) | class TransformerBeamSearch(nn.Module):
    method __init__ (line 30) | def __init__(
    method step (line 80) | def step(self, log_probabilities):
    method forward (line 177) | def forward(self, encoder_input_ids, **kwargs):
    method remove_repeating_trigrams (line 224) | def remove_repeating_trigrams(self, log_probabilities, _B):
    method enforce_min_length (line 233) | def enforce_min_length(self):
    method enforce_max_length (line 237) | def enforce_max_length(self):
    method length_penalty (line 241) | def length_penalty(self):
  function tile (line 245) | def tile(x, count, dim=0):

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_bert.py
  function load_tf_weights_in_bert (line 62) | def load_tf_weights_in_bert(model, config, tf_checkpoint_path):
  function mish (line 134) | def mish(x):
  class BertEmbeddings (line 144) | class BertEmbeddings(nn.Module):
    method __init__ (line 148) | def __init__(self, config):
    method forward (line 159) | def forward(self, input_ids=None, token_type_ids=None, position_ids=No...
  class BertSelfAttention (line 184) | class BertSelfAttention(nn.Module):
    method __init__ (line 185) | def __init__(self, config):
    method transpose_for_scores (line 204) | def transpose_for_scores(self, x):
    method forward (line 209) | def forward(
  class BertSelfOutput (line 262) | class BertSelfOutput(nn.Module):
    method __init__ (line 263) | def __init__(self, config):
    method forward (line 269) | def forward(self, hidden_states, input_tensor):
  class BertAttention (line 276) | class BertAttention(nn.Module):
    method __init__ (line 277) | def __init__(self, config):
    method prune_heads (line 283) | def prune_heads(self, heads):
    method forward (line 306) | def forward(
  class BertIntermediate (line 322) | class BertIntermediate(nn.Module):
    method __init__ (line 323) | def __init__(self, config):
    method forward (line 331) | def forward(self, hidden_states):
  class BertOutput (line 337) | class BertOutput(nn.Module):
    method __init__ (line 338) | def __init__(self, config):
    method forward (line 344) | def forward(self, hidden_states, input_tensor):
  class BertLayer (line 351) | class BertLayer(nn.Module):
    method __init__ (line 352) | def __init__(self, config):
    method forward (line 361) | def forward(
  class BertEncoder (line 386) | class BertEncoder(nn.Module):
    method __init__ (line 387) | def __init__(self, config):
    method forward (line 393) | def forward(
  class BertPooler (line 427) | class BertPooler(nn.Module):
    method __init__ (line 428) | def __init__(self, config):
    method forward (line 433) | def forward(self, hidden_states):
  class BertPredictionHeadTransform (line 442) | class BertPredictionHeadTransform(nn.Module):
    method __init__ (line 443) | def __init__(self, config):
    method forward (line 452) | def forward(self, hidden_states):
  class BertLMPredictionHead (line 459) | class BertLMPredictionHead(nn.Module):
    method __init__ (line 460) | def __init__(self, config):
    method forward (line 473) | def forward(self, hidden_states):
  class BertOnlyMLMHead (line 479) | class BertOnlyMLMHead(nn.Module):
    method __init__ (line 480) | def __init__(self, config):
    method forward (line 484) | def forward(self, sequence_output):
  class BertOnlyNSPHead (line 489) | class BertOnlyNSPHead(nn.Module):
    method __init__ (line 490) | def __init__(self, config):
    method forward (line 494) | def forward(self, pooled_output):
  class BertPreTrainingHeads (line 499) | class BertPreTrainingHeads(nn.Module):
    method __init__ (line 500) | def __init__(self, config):
    method forward (line 505) | def forward(self, sequence_output, pooled_output):
  class BertPreTrainedModel (line 511) | class BertPreTrainedModel(PreTrainedModel):
    method _init_weights (line 520) | def _init_weights(self, module):
  class BertModel (line 594) | class BertModel(BertPreTrainedModel):
    method __init__ (line 611) | def __init__(self, config):
    method get_input_embeddings (line 621) | def get_input_embeddings(self):
    method set_input_embeddings (line 624) | def set_input_embeddings(self, value):
    method _prune_heads (line 627) | def _prune_heads(self, heads_to_prune):
    method forward (line 636) | def forward(
  class BertForPreTraining (line 750) | class BertForPreTraining(BertPreTrainedModel):
    method __init__ (line 751) | def __init__(self, config):
    method get_output_embeddings (line 759) | def get_output_embeddings(self):
    method forward (line 763) | def forward(
  class BertForMaskedLM (line 850) | class BertForMaskedLM(BertPreTrainedModel):
    method __init__ (line 851) | def __init__(self, config):
    method get_output_embeddings (line 859) | def get_output_embeddings(self):
    method forward (line 863) | def forward(
    method prepare_inputs_for_generation (line 960) | def prepare_inputs_for_generation(self, input_ids, attention_mask=None...
  class BertForNextSentencePrediction (line 986) | class BertForNextSentencePrediction(BertPreTrainedModel):
    method __init__ (line 987) | def __init__(self, config):
    method forward (line 996) | def forward(
  class BertForSequenceClassification (line 1074) | class BertForSequenceClassification(BertPreTrainedModel):
    method __init__ (line 1075) | def __init__(self, config):
    method forward (line 1086) | def forward(
  class BertForMultipleChoice (line 1171) | class BertForMultipleChoice(BertPreTrainedModel):
    method __init__ (line 1172) | def __init__(self, config):
    method forward (line 1182) | def forward(
  class BertForTokenClassification (line 1274) | class BertForTokenClassification(BertPreTrainedModel):
    method __init__ (line 1275) | def __init__(self, config):
    method forward (line 1286) | def forward(
  class BertForQuestionAnswering (line 1372) | class BertForQuestionAnswering(BertPreTrainedModel):
    method __init__ (line 1373) | def __init__(self, config):
    method forward (line 1383) | def forward(

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_camembert.py
  class CamembertModel (line 59) | class CamembertModel(RobertaModel):
  class CamembertForMaskedLM (line 71) | class CamembertForMaskedLM(RobertaForMaskedLM):
  class CamembertForSequenceClassification (line 85) | class CamembertForSequenceClassification(RobertaForSequenceClassification):
  class CamembertForMultipleChoice (line 99) | class CamembertForMultipleChoice(RobertaForMultipleChoice):
  class CamembertForTokenClassification (line 113) | class CamembertForTokenClassification(RobertaForTokenClassification):
  class CamembertForQuestionAnswering (line 127) | class CamembertForQuestionAnswering(RobertaForQuestionAnswering):

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_ctrl.py
  function angle_defn (line 39) | def angle_defn(pos, i, d_model_size):
  function positional_encoding (line 44) | def positional_encoding(position, d_model_size, dtype):
  function scaled_dot_product_attention (line 59) | def scaled_dot_product_attention(q, k, v, mask, attention_mask=None, hea...
  class MultiHeadAttention (line 85) | class MultiHeadAttention(torch.nn.Module):
    method __init__ (line 86) | def __init__(self, d_model_size, num_heads, output_attentions=False):
    method split_into_heads (line 100) | def split_into_heads(self, x, batch_size):
    method forward (line 104) | def forward(self, v, k, q, mask, layer_past=None, attention_mask=None,...
  function point_wise_feed_forward_network (line 136) | def point_wise_feed_forward_network(d_model_size, dff):
  class EncoderLayer (line 140) | class EncoderLayer(torch.nn.Module):
    method __init__ (line 141) | def __init__(self, d_model_size, num_heads, dff, rate=0.1, output_atte...
    method forward (line 153) | def forward(self, x, mask, layer_past=None, attention_mask=None, head_...
  class CTRLPreTrainedModel (line 178) | class CTRLPreTrainedModel(PreTrainedModel):
    method _init_weights (line 186) | def _init_weights(self, module):
  class CTRLModel (line 263) | class CTRLModel(CTRLPreTrainedModel):
    method __init__ (line 264) | def __init__(self, config):
    method get_input_embeddings (line 287) | def get_input_embeddings(self):
    method set_input_embeddings (line 290) | def set_input_embeddings(self, new_embeddings):
    method _prune_heads (line 293) | def _prune_heads(self, heads_to_prune):
    method forward (line 301) | def forward(
  class CTRLLMHeadModel (line 458) | class CTRLLMHeadModel(CTRLPreTrainedModel):
    method __init__ (line 459) | def __init__(self, config):
    method get_output_embeddings (line 466) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 469) | def prepare_inputs_for_generation(self, input_ids, past, **kwargs):
    method forward (line 477) | def forward(

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_distilbert.py
  function create_sinusoidal_embeddings (line 54) | def create_sinusoidal_embeddings(n_pos, dim, out):
  class Embeddings (line 62) | class Embeddings(nn.Module):
    method __init__ (line 63) | def __init__(self, config):
    method forward (line 75) | def forward(self, input_ids):
  class MultiHeadSelfAttention (line 100) | class MultiHeadSelfAttention(nn.Module):
    method __init__ (line 101) | def __init__(self, config):
    method prune_heads (line 118) | def prune_heads(self, heads):
    method forward (line 139) | def forward(self, query, key, value, mask, head_mask=None):
  class FFN (line 198) | class FFN(nn.Module):
    method __init__ (line 199) | def __init__(self, config):
    method forward (line 209) | def forward(self, input):
  class TransformerBlock (line 217) | class TransformerBlock(nn.Module):
    method __init__ (line 218) | def __init__(self, config):
    method forward (line 231) | def forward(self, x, attn_mask=None, head_mask=None):
  class Transformer (line 264) | class Transformer(nn.Module):
    method __init__ (line 265) | def __init__(self, config):
    method forward (line 274) | def forward(self, x, attn_mask=None, head_mask=None):
  class DistilBertPreTrainedModel (line 325) | class DistilBertPreTrainedModel(PreTrainedModel):
    method _init_weights (line 334) | def _init_weights(self, module):
  class DistilBertModel (line 392) | class DistilBertModel(DistilBertPreTrainedModel):
    method __init__ (line 393) | def __init__(self, config):
    method get_input_embeddings (line 401) | def get_input_embeddings(self):
    method set_input_embeddings (line 404) | def set_input_embeddings(self, new_embeddings):
    method _prune_heads (line 407) | def _prune_heads(self, heads_to_prune):
    method forward (line 416) | def forward(self, input_ids=None, attention_mask=None, head_mask=None,...
  class DistilBertForMaskedLM (line 477) | class DistilBertForMaskedLM(DistilBertPreTrainedModel):
    method __init__ (line 478) | def __init__(self, config):
    method get_output_embeddings (line 492) | def get_output_embeddings(self):
    method forward (line 496) | def forward(self, input_ids=None, attention_mask=None, head_mask=None,...
  class DistilBertForSequenceClassification (line 558) | class DistilBertForSequenceClassification(DistilBertPreTrainedModel):
    method __init__ (line 559) | def __init__(self, config):
    method forward (line 571) | def forward(self, input_ids=None, attention_mask=None, head_mask=None,...
  class DistilBertForQuestionAnswering (line 638) | class DistilBertForQuestionAnswering(DistilBertPreTrainedModel):
    method __init__ (line 639) | def __init__(self, config):
    method forward (line 650) | def forward(
  class DistilBertForTokenClassification (line 740) | class DistilBertForTokenClassification(DistilBertPreTrainedModel):
    method __init__ (line 741) | def __init__(self, config):
    method forward (line 752) | def forward(self, input_ids=None, attention_mask=None, head_mask=None,...

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_electra.py
  function load_tf_weights_in_electra (line 28) | def load_tf_weights_in_electra(model, config, tf_checkpoint_path, discri...
  class ElectraEmbeddings (line 109) | class ElectraEmbeddings(BertEmbeddings):
    method __init__ (line 112) | def __init__(self, config):
  class ElectraDiscriminatorPredictions (line 123) | class ElectraDiscriminatorPredictions(nn.Module):
    method __init__ (line 126) | def __init__(self, config):
    method forward (line 133) | def forward(self, discriminator_hidden_states, attention_mask):
  class ElectraGeneratorPredictions (line 141) | class ElectraGeneratorPredictions(nn.Module):
    method __init__ (line 144) | def __init__(self, config):
    method forward (line 150) | def forward(self, generator_hidden_states):
  class ElectraPreTrainedModel (line 158) | class ElectraPreTrainedModel(BertPreTrainedModel):
  class ElectraModel (line 233) | class ElectraModel(ElectraPreTrainedModel):
    method __init__ (line 237) | def __init__(self, config):
    method get_input_embeddings (line 248) | def get_input_embeddings(self):
    method set_input_embeddings (line 251) | def set_input_embeddings(self, value):
    method _prune_heads (line 254) | def _prune_heads(self, heads_to_prune):
    method forward (line 263) | def forward(
  class ElectraClassificationHead (line 334) | class ElectraClassificationHead(nn.Module):
    method __init__ (line 337) | def __init__(self, config):
    method forward (line 343) | def forward(self, features, **kwargs):
  class ElectraForSequenceClassification (line 358) | class ElectraForSequenceClassification(ElectraPreTrainedModel):
    method __init__ (line 359) | def __init__(self, config):
    method forward (line 368) | def forward(
  class ElectraForPreTraining (line 448) | class ElectraForPreTraining(ElectraPreTrainedModel):
    method __init__ (line 449) | def __init__(self, config):
    method forward (line 457) | def forward(
  class ElectraForMaskedLM (line 542) | class ElectraForMaskedLM(ElectraPreTrainedModel):
    method __init__ (line 543) | def __init__(self, config):
    method get_output_embeddings (line 552) | def get_output_embeddings(self):
    method forward (line 556) | def forward(
  class ElectraForTokenClassification (line 634) | class ElectraForTokenClassification(ElectraPreTrainedModel):
    method __init__ (line 635) | def __init__(self, config):
    method forward (line 644) | def forward(

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_encoder_decoder.py
  class EncoderDecoderModel (line 29) | class EncoderDecoderModel(PreTrainedModel):
    method __init__ (line 40) | def __init__(
    method tie_weights (line 74) | def tie_weights(self):
    method get_encoder (line 78) | def get_encoder(self):
    method get_decoder (line 81) | def get_decoder(self):
    method get_input_embeddings (line 84) | def get_input_embeddings(self):
    method get_output_embeddings (line 87) | def get_output_embeddings(self):
    method from_encoder_decoder_pretrained (line 91) | def from_encoder_decoder_pretrained(
    method forward (line 183) | def forward(
    method prepare_inputs_for_generation (line 303) | def prepare_inputs_for_generation(self, input_ids, past, attention_mas...
    method _reorder_cache (line 321) | def _reorder_cache(self, past, beam_idx):

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_flaubert.py
  class FlaubertModel (line 110) | class FlaubertModel(XLMModel):
    method __init__ (line 114) | def __init__(self, config):  # , dico, is_encoder, with_output):
    method forward (line 120) | def forward(
  class FlaubertWithLMHeadModel (line 300) | class FlaubertWithLMHeadModel(XLMWithLMHeadModel):
    method __init__ (line 308) | def __init__(self, config):
  class FlaubertForSequenceClassification (line 319) | class FlaubertForSequenceClassification(XLMForSequenceClassification):
    method __init__ (line 327) | def __init__(self, config):
  class FlaubertForQuestionAnsweringSimple (line 338) | class FlaubertForQuestionAnsweringSimple(XLMForQuestionAnsweringSimple):
    method __init__ (line 346) | def __init__(self, config):
  class FlaubertForQuestionAnswering (line 357) | class FlaubertForQuestionAnswering(XLMForQuestionAnswering):
    method __init__ (line 365) | def __init__(self, config):

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_gpt2.py
  function load_tf_weights_in_gpt2 (line 44) | def load_tf_weights_in_gpt2(model, config, gpt2_checkpoint_path):
  class Attention (line 99) | class Attention(nn.Module):
    method __init__ (line 100) | def __init__(self, nx, n_ctx, config, scale=False):
    method prune_heads (line 121) | def prune_heads(self, heads):
    method _attn (line 143) | def _attn(self, q, k, v, attention_mask=None, head_mask=None):
    method merge_heads (line 167) | def merge_heads(self, x):
    method split_heads (line 172) | def split_heads(self, x, k=False):
    method forward (line 180) | def forward(self, x, layer_past=None, attention_mask=None, head_mask=N...
  class MLP (line 207) | class MLP(nn.Module):
    method __init__ (line 208) | def __init__(self, n_state, config):  # in MLP: n_state=3072 (4 * n_embd)
    method forward (line 216) | def forward(self, x):
  class Block (line 222) | class Block(nn.Module):
    method __init__ (line 223) | def __init__(self, n_ctx, config, scale=False):
    method forward (line 231) | def forward(self, x, layer_past=None, attention_mask=None, head_mask=N...
  class GPT2PreTrainedModel (line 249) | class GPT2PreTrainedModel(PreTrainedModel):
    method __init__ (line 258) | def __init__(self, *inputs, **kwargs):
    method _init_weights (line 261) | def _init_weights(self, module):
  class GPT2Model (line 339) | class GPT2Model(GPT2PreTrainedModel):
    method __init__ (line 340) | def __init__(self, config):
    method get_input_embeddings (line 353) | def get_input_embeddings(self):
    method set_input_embeddings (line 356) | def set_input_embeddings(self, new_embeddings):
    method _prune_heads (line 359) | def _prune_heads(self, heads_to_prune):
    method forward (line 367) | def forward(
  class GPT2LMHeadModel (line 523) | class GPT2LMHeadModel(GPT2PreTrainedModel):
    method __init__ (line 524) | def __init__(self, config):
    method get_output_embeddings (line 531) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 534) | def prepare_inputs_for_generation(self, input_ids, past, **kwargs):
    method forward (line 542) | def forward(
  class GPT2DoubleHeadsModel (line 631) | class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
    method __init__ (line 632) | def __init__(self, config):
    method get_output_embeddings (line 641) | def get_output_embeddings(self):
    method forward (line 645) | def forward(

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_longformer.py
  function _get_question_end_index (line 43) | def _get_question_end_index(input_ids, sep_token_id):
  function _compute_global_attention_mask (line 59) | def _compute_global_attention_mask(input_ids, sep_token_id, before_sep_t...
  class LongformerSelfAttention (line 81) | class LongformerSelfAttention(nn.Module):
    method __init__ (line 82) | def __init__(self, config, layer_id):
    method _skew (line 117) | def _skew(x, direction):
    method _skew2 (line 124) | def _skew2(x):
    method _chunk (line 136) | def _chunk(x, w):
    method _mask_invalid_locations (line 150) | def _mask_invalid_locations(self, input_tensor, w) -> torch.Tensor:
    method _sliding_chunks_matmul_qk (line 163) | def _sliding_chunks_matmul_qk(self, q: torch.Tensor, k: torch.Tensor, ...
    method _sliding_chunks_matmul_pv (line 210) | def _sliding_chunks_matmul_pv(self, prob: torch.Tensor, v: torch.Tenso...
    method forward (line 238) | def forward(
  class LongformerModel (line 498) | class LongformerModel(RobertaModel):
    method __init__ (line 519) | def __init__(self, config):
    method _pad_to_window_size (line 538) | def _pad_to_window_size(
    method forward (line 582) | def forward(
  class LongformerForMaskedLM (line 686) | class LongformerForMaskedLM(BertPreTrainedModel):
    method __init__ (line 690) | def __init__(self, config):
    method forward (line 699) | def forward(
  class LongformerForSequenceClassification (line 776) | class LongformerForSequenceClassification(BertPreTrainedModel):
    method __init__ (line 780) | def __init__(self, config):
    method forward (line 788) | def forward(
  class LongformerClassificationHead (line 868) | class LongformerClassificationHead(nn.Module):
    method __init__ (line 871) | def __init__(self, config):
    method forward (line 877) | def forward(self, hidden_states, **kwargs):
  class LongformerForQuestionAnswering (line 892) | class LongformerForQuestionAnswering(BertPreTrainedModel):
    method __init__ (line 896) | def __init__(self, config):
    method forward (line 906) | def forward(
  class LongformerForTokenClassification (line 1016) | class LongformerForTokenClassification(BertPreTrainedModel):
    method __init__ (line 1020) | def __init__(self, config):
    method forward (line 1031) | def forward(
  class LongformerForMultipleChoice (line 1116) | class LongformerForMultipleChoice(BertPreTrainedModel):
    method __init__ (line 1120) | def __init__(self, config):
    method forward (line 1130) | def forward(

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_marian.py
  class MarianMTModel (line 26) | class MarianMTModel(BartForConditionalGeneration):
    method prepare_logits_for_generation (line 49) | def prepare_logits_for_generation(self, logits, cur_len, max_length):

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_mmbt.py
  class ModalEmbeddings (line 32) | class ModalEmbeddings(nn.Module):
    method __init__ (line 36) | def __init__(self, config, encoder, embeddings):
    method forward (line 47) | def forward(self, input_modal, start_token=None, end_token=None, posit...
  class MMBTModel (line 152) | class MMBTModel(nn.Module, ModuleUtilsMixin):
    method __init__ (line 180) | def __init__(self, config, transformer, encoder):
    method forward (line 186) | def forward(
    method get_input_embeddings (line 268) | def get_input_embeddings(self):
    method set_input_embeddings (line 271) | def set_input_embeddings(self, value):
  class MMBTForClassification (line 281) | class MMBTForClassification(nn.Module):
    method __init__ (line 312) | def __init__(self, config, transformer, encoder):
    method forward (line 320) | def forward(

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_openai.py
  function load_tf_weights_in_openai_gpt (line 42) | def load_tf_weights_in_openai_gpt(model, config, openai_checkpoint_folde...
  class Attention (line 122) | class Attention(nn.Module):
    method __init__ (line 123) | def __init__(self, nx, n_ctx, config, scale=False):
    method prune_heads (line 141) | def prune_heads(self, heads):
    method _attn (line 160) | def _attn(self, q, k, v, attention_mask=None, head_mask=None):
    method merge_heads (line 185) | def merge_heads(self, x):
    method split_heads (line 190) | def split_heads(self, x, k=False):
    method forward (line 198) | def forward(self, x, attention_mask=None, head_mask=None):
  class MLP (line 216) | class MLP(nn.Module):
    method __init__ (line 217) | def __init__(self, n_state, config):  # in MLP: n_state=3072 (4 * n_embd)
    method forward (line 225) | def forward(self, x):
  class Block (line 231) | class Block(nn.Module):
    method __init__ (line 232) | def __init__(self, n_ctx, config, scale=False):
    method forward (line 240) | def forward(self, x, attention_mask=None, head_mask=None):
  class OpenAIGPTPreTrainedModel (line 252) | class OpenAIGPTPreTrainedModel(PreTrainedModel):
    method _init_weights (line 261) | def _init_weights(self, module):
  class OpenAIGPTModel (line 329) | class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
    method __init__ (line 330) | def __init__(self, config):
    method get_input_embeddings (line 342) | def get_input_embeddings(self):
    method set_input_embeddings (line 345) | def set_input_embeddings(self, new_embeddings):
    method _prune_heads (line 348) | def _prune_heads(self, heads_to_prune):
    method forward (line 356) | def forward(
  class OpenAIGPTLMHeadModel (line 471) | class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel):
    method __init__ (line 472) | def __init__(self, config):
    method get_output_embeddings (line 479) | def get_output_embeddings(self):
    method forward (line 483) | def forward(
  class OpenAIGPTDoubleHeadsModel (line 567) | class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
    method __init__ (line 568) | def __init__(self, config):
    method get_output_embeddings (line 578) | def get_output_embeddings(self):
    method forward (line 582) | def forward(

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_reformer.py
  function mish (line 45) | def mish(x):
  function _get_least_common_mult_chunk_len (line 70) | def _get_least_common_mult_chunk_len(config):
  class AxialPositionEmbeddings (line 87) | class AxialPositionEmbeddings(nn.Module):
    method __init__ (line 92) | def __init__(self, config):
    method forward (line 117) | def forward(self, position_ids):
  class PositionEmbeddings (line 166) | class PositionEmbeddings(nn.Module):
    method __init__ (line 170) | def __init__(self, config):
    method forward (line 175) | def forward(self, position_ids):
  class ReformerEmbeddings (line 181) | class ReformerEmbeddings(nn.Module):
    method __init__ (line 185) | def __init__(self, config):
    method forward (line 195) | def forward(self, input_ids=None, position_ids=None, inputs_embeds=None):
  class EfficientAttentionMixin (line 226) | class EfficientAttentionMixin:
    method _look_adjacent (line 231) | def _look_adjacent(self, vectors, num_chunks_before, num_chunks_after):
    method _split_hidden_size_dim (line 254) | def _split_hidden_size_dim(self, x, num_attn_heads, attn_head_size):
    method _merge_hidden_size_dims (line 262) | def _merge_hidden_size_dims(self, x, num_attn_heads, attn_head_size):
    method _split_seq_length_dim_to (line 269) | def _split_seq_length_dim_to(self, vectors, dim_factor_1, dim_factor_2...
  class LSHSelfAttention (line 284) | class LSHSelfAttention(nn.Module, EfficientAttentionMixin):
    method __init__ (line 285) | def __init__(self, config):
    method forward (line 315) | def forward(
    method _hash_vectors (line 441) | def _hash_vectors(self, vectors, num_hashes):
    method _get_sorted_bucket_idx_and_undo_sorted_bucket_idx (line 506) | def _get_sorted_bucket_idx_and_undo_sorted_bucket_idx(self, sequence_l...
    method _set_num_buckets (line 537) | def _set_num_buckets(self, sequence_length):
    method _attend (line 556) | def _attend(
    method _compute_attn_mask (line 635) | def _compute_attn_mask(self, query_indices, key_indices, attention_mask):
    method _len_and_dim_norm (line 663) | def _len_and_dim_norm(self, vectors):
    method _len_norm (line 673) | def _len_norm(self, x, epsilon=1e-6):
    method _gather_by_expansion (line 681) | def _gather_by_expansion(self, vectors, idxs, num_hashes):
  class ReverseSort (line 690) | class ReverseSort(Function):
    method forward (line 700) | def forward(ctx, out_vectors, logits, sorted_bucket_idx, undo_sorted_b...
    method backward (line 713) | def backward(ctx, grad_out_vectors, grad_logits):
  class LocalSelfAttention (line 747) | class LocalSelfAttention(nn.Module, EfficientAttentionMixin):
    method __init__ (line 748) | def __init__(self, config):
    method forward (line 773) | def forward(self, hidden_states, attention_mask=None, head_mask=None, ...
    method _compute_attn_mask (line 888) | def _compute_attn_mask(self, query_indices, key_indices, attention_mas...
  class ReformerSelfOutput (line 913) | class ReformerSelfOutput(nn.Module):
    method __init__ (line 914) | def __init__(self, config):
    method forward (line 921) | def forward(self, hidden_states):
  class ReformerAttention (line 927) | class ReformerAttention(nn.Module):
    method __init__ (line 928) | def __init__(self, config, layer_id=0):
    method forward (line 953) | def forward(
  class ReformerFeedForwardDense (line 986) | class ReformerFeedForwardDense(nn.Module):
    method __init__ (line 987) | def __init__(self, config):
    method forward (line 998) | def forward(self, hidden_states):
  class ReformerFeedForwardOutput (line 1005) | class ReformerFeedForwardOutput(nn.Module):
    method __init__ (line 1006) | def __init__(self, config):
    method forward (line 1012) | def forward(self, hidden_states):
  class ChunkReformerFeedForward (line 1018) | class ChunkReformerFeedForward(nn.Module):
    method __init__ (line 1019) | def __init__(self, config):
    method forward (line 1028) | def forward(self, attention_output):
    method forward_chunk (line 1033) | def forward_chunk(self, hidden_states):
  class ReformerLayer (line 1039) | class ReformerLayer(nn.Module):
    method __init__ (line 1040) | def __init__(self, config, layer_id=0):
    method _init_attention_seed (line 1050) | def _init_attention_seed(self):
    method _init_feed_forward_seed (line 1070) | def _init_feed_forward_seed(self):
    method forward (line 1090) | def forward(
    method backward_pass (line 1134) | def backward_pass(
  class _ReversibleFunction (line 1195) | class _ReversibleFunction(Function):
    method forward (line 1205) | def forward(
    method backward (line 1256) | def backward(ctx, grad_hidden_states):
  class ReformerEncoder (line 1302) | class ReformerEncoder(nn.Module):
    method __init__ (line 1303) | def __init__(self, config):
    method forward (line 1312) | def forward(
  class ReformerOnlyLMHead (line 1350) | class ReformerOnlyLMHead(nn.Module):
    method __init__ (line 1351) | def __init__(self, config):
    method forward (line 1363) | def forward(self, hidden_states):
    method forward_chunk (line 1366) | def forward_chunk(self, hidden_states):
  class ReformerPreTrainedModel (line 1371) | class ReformerPreTrainedModel(PreTrainedModel):
    method dummy_inputs (line 1380) | def dummy_inputs(self):
    method _init_weights (line 1389) | def _init_weights(self, module):
  class ReformerModel (line 1470) | class ReformerModel(ReformerPreTrainedModel):
    method __init__ (line 1471) | def __init__(self, config):
    method get_input_embeddings (line 1483) | def get_input_embeddings(self):
    method set_input_embeddings (line 1486) | def set_input_embeddings(self, value):
    method _prune_heads (line 1489) | def _prune_heads(self, heads_to_prune):
    method forward (line 1498) | def forward(
    method _pad_to_mult_of_chunk_length (line 1615) | def _pad_to_mult_of_chunk_length(
  class ReformerModelWithLMHead (line 1674) | class ReformerModelWithLMHead(ReformerPreTrainedModel):
    method __init__ (line 1675) | def __init__(self, config):
    method get_output_embeddings (line 1682) | def get_output_embeddings(self):
    method tie_weights (line 1685) | def tie_weights(self):
    method forward (line 1690) | def forward(
    method prepare_inputs_for_generation (line 1766) | def prepare_inputs_for_generation(self, input_ids, past, **kwargs):

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_roberta.py
  class RobertaEmbeddings (line 44) | class RobertaEmbeddings(BertEmbeddings):
    method __init__ (line 49) | def __init__(self, config):
    method forward (line 57) | def forward(self, input_ids=None, token_type_ids=None, position_ids=No...
    method create_position_ids_from_inputs_embeds (line 69) | def create_position_ids_from_inputs_embeds(self, inputs_embeds):
  class RobertaModel (line 139) | class RobertaModel(BertModel):
    method __init__ (line 148) | def __init__(self, config):
    method get_input_embeddings (line 154) | def get_input_embeddings(self):
    method set_input_embeddings (line 157) | def set_input_embeddings(self, value):
  class RobertaForMaskedLM (line 162) | class RobertaForMaskedLM(BertPreTrainedModel):
    method __init__ (line 166) | def __init__(self, config):
    method get_output_embeddings (line 174) | def get_output_embeddings(self):
    method forward (line 178) | def forward(
  class RobertaLMHead (line 246) | class RobertaLMHead(nn.Module):
    method __init__ (line 249) | def __init__(self, config):
    method forward (line 260) | def forward(self, features, **kwargs):
  class RobertaForSequenceClassification (line 276) | class RobertaForSequenceClassification(BertPreTrainedModel):
    method __init__ (line 280) | def __init__(self, config):
    method forward (line 288) | def forward(
  class RobertaForMultipleChoice (line 366) | class RobertaForMultipleChoice(BertPreTrainedModel):
    method __init__ (line 370) | def __init__(self, config):
    method forward (line 380) | def forward(
  class RobertaForTokenClassification (line 464) | class RobertaForTokenClassification(BertPreTrainedModel):
    method __init__ (line 468) | def __init__(self, config):
    method forward (line 479) | def forward(
  class RobertaClassificationHead (line 559) | class RobertaClassificationHead(nn.Module):
    method __init__ (line 562) | def __init__(self, config):
    method forward (line 568) | def forward(self, features, **kwargs):
  class RobertaForQuestionAnswering (line 583) | class RobertaForQuestionAnswering(BertPreTrainedModel):
    method __init__ (line 587) | def __init__(self, config):
    method forward (line 597) | def forward(

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_t5.py
  function load_tf_weights_in_t5 (line 53) | def load_tf_weights_in_t5(model, config, tf_checkpoint_path):
  class T5LayerNorm (line 143) | class T5LayerNorm(nn.Module):
    method __init__ (line 144) | def __init__(self, hidden_size, eps=1e-6):
    method forward (line 152) | def forward(self, x):
  class T5DenseReluDense (line 162) | class T5DenseReluDense(nn.Module):
    method __init__ (line 163) | def __init__(self, config):
    method forward (line 169) | def forward(self, hidden_states):
  class T5LayerFF (line 177) | class T5LayerFF(nn.Module):
    method __init__ (line 178) | def __init__(self, config):
    method forward (line 184) | def forward(self, hidden_states):
  class T5Attention (line 191) | class T5Attention(nn.Module):
    method __init__ (line 192) | def __init__(self, config: T5Config, has_relative_attention_bias=False):
    method prune_heads (line 215) | def prune_heads(self, heads):
    method _relative_position_bucket (line 236) | def _relative_position_bucket(relative_position, bidirectional=True, n...
    method compute_bias (line 283) | def compute_bias(self, qlen, klen):
    method forward (line 298) | def forward(
  class T5LayerSelfAttention (line 401) | class T5LayerSelfAttention(nn.Module):
    method __init__ (line 402) | def __init__(self, config, has_relative_attention_bias=False):
    method forward (line 408) | def forward(
  class T5LayerCrossAttention (line 432) | class T5LayerCrossAttention(nn.Module):
    method __init__ (line 433) | def __init__(self, config, has_relative_attention_bias=False):
    method forward (line 439) | def forward(
  class T5Block (line 467) | class T5Block(nn.Module):
    method __init__ (line 468) | def __init__(self, config, has_relative_attention_bias=False):
    method forward (line 478) | def forward(
  class T5PreTrainedModel (line 553) | class T5PreTrainedModel(PreTrainedModel):
    method dummy_inputs (line 563) | def dummy_inputs(self):
    method _init_weights (line 573) | def _init_weights(self, module):
    method _shift_right (line 605) | def _shift_right(self, input_ids):
  class T5Stack (line 627) | class T5Stack(T5PreTrainedModel):
    method __init__ (line 628) | def __init__(self, config, embed_tokens=None):
    method get_input_embeddings (line 644) | def get_input_embeddings(self):
    method get_output_embeddings (line 647) | def get_output_embeddings(self):
    method set_input_embeddings (line 650) | def set_input_embeddings(self, new_embeddings):
    method forward (line 653) | def forward(
  class T5Model (line 846) | class T5Model(T5PreTrainedModel):
    method __init__ (line 847) | def __init__(self, config):
    method get_input_embeddings (line 860) | def get_input_embeddings(self):
    method set_input_embeddings (line 863) | def set_input_embeddings(self, new_embeddings):
    method get_encoder (line 868) | def get_encoder(self):
    method get_decoder (line 871) | def get_decoder(self):
    method _prune_heads (line 874) | def _prune_heads(self, heads_to_prune):
    method forward (line 883) | def forward(
  class T5ForConditionalGeneration (line 966) | class T5ForConditionalGeneration(T5PreTrainedModel):
    method __init__ (line 967) | def __init__(self, config):
    method get_input_embeddings (line 984) | def get_input_embeddings(self):
    method set_input_embeddings (line 987) | def set_input_embeddings(self, new_embeddings):
    method get_output_embeddings (line 992) | def get_output_embeddings(self):
    method get_encoder (line 995) | def get_encoder(self):
    method get_decoder (line 998) | def get_decoder(self):
    method forward (line 1002) | def forward(
    method prepare_inputs_for_generation (line 1114) | def prepare_inputs_for_generation(self, input_ids, past, attention_mas...
    method _reorder_cache (line 1131) | def _reorder_cache(self, past, beam_idx):

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_tf_albert.py
  class TFAlbertEmbeddings (line 45) | class TFAlbertEmbeddings(tf.keras.layers.Layer):
    method __init__ (line 49) | def __init__(self, config, **kwargs):
    method build (line 71) | def build(self, input_shape):
    method call (line 83) | def call(self, inputs, mode="embedding", training=False):
    method _embedding (line 105) | def _embedding(self, inputs, training=False):
    method _linear (line 130) | def _linear(self, inputs):
  class TFAlbertSelfAttention (line 144) | class TFAlbertSelfAttention(tf.keras.layers.Layer):
    method __init__ (line 145) | def __init__(self, config, **kwargs):
    method transpose_for_scores (line 171) | def transpose_for_scores(self, x, batch_size):
    method call (line 175) | def call(self, inputs, training=False):
  class TFAlbertSelfOutput (line 220) | class TFAlbertSelfOutput(tf.keras.layers.Layer):
    method __init__ (line 221) | def __init__(self, config, **kwargs):
    method call (line 229) | def call(self, inputs, training=False):
  class TFAlbertAttention (line 238) | class TFAlbertAttention(TFBertSelfAttention):
    method __init__ (line 239) | def __init__(self, config, **kwargs):
    method prune_heads (line 249) | def prune_heads(self, heads):
    method call (line 252) | def call(self, inputs, training=False):
  class TFAlbertLayer (line 306) | class TFAlbertLayer(tf.keras.layers.Layer):
    method __init__ (line 307) | def __init__(self, config, **kwargs):
    method call (line 328) | def call(self, inputs, training=False):
  class TFAlbertLayerGroup (line 344) | class TFAlbertLayerGroup(tf.keras.layers.Layer):
    method __init__ (line 345) | def __init__(self, config, **kwargs):
    method call (line 354) | def call(self, inputs, training=False):
  class TFAlbertTransformer (line 379) | class TFAlbertTransformer(tf.keras.layers.Layer):
    method __init__ (line 380) | def __init__(self, config, **kwargs):
    method call (line 396) | def call(self, inputs, training=False):
  class TFAlbertPreTrainedModel (line 438) | class TFAlbertPreTrainedModel(TFPreTrainedModel):
  class TFAlbertMLMHead (line 447) | class TFAlbertMLMHead(tf.keras.layers.Layer):
    method __init__ (line 448) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 466) | def build(self, input_shape):
    method call (line 473) | def call(self, hidden_states):
  class TFAlbertMainLayer (line 482) | class TFAlbertMainLayer(tf.keras.layers.Layer):
    method __init__ (line 485) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 498) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 501) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 504) | def _prune_heads(self, heads_to_prune):
    method call (line 511) | def call(
  class TFAlbertModel (line 674) | class TFAlbertModel(TFAlbertPreTrainedModel):
    method __init__ (line 675) | def __init__(self, config, *inputs, **kwargs):
    method call (line 680) | def call(self, inputs, **kwargs):
  class TFAlbertForPreTraining (line 725) | class TFAlbertForPreTraining(TFAlbertPreTrainedModel):
    method __init__ (line 726) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 734) | def get_output_embeddings(self):
    method call (line 738) | def call(self, inputs, **kwargs):
  class TFAlbertSOPHead (line 772) | class TFAlbertSOPHead(tf.keras.layers.Layer):
    method __init__ (line 773) | def __init__(self, config, **kwargs):
    method call (line 781) | def call(self, pooled_output, training: bool):
  class TFAlbertForMaskedLM (line 788) | class TFAlbertForMaskedLM(TFAlbertPreTrainedModel):
    method __init__ (line 789) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 795) | def get_output_embeddings(self):
    method call (line 799) | def call(self, inputs, **kwargs):
  class TFAlbertForSequenceClassification (line 844) | class TFAlbertForSequenceClassification(TFAlbertPreTrainedModel):
    method __init__ (line 845) | def __init__(self, config, *inputs, **kwargs):
    method call (line 856) | def call(self, inputs, **kwargs):
  class TFAlbertForQuestionAnswering (line 901) | class TFAlbertForQuestionAnswering(TFAlbertPreTrainedModel):
    method __init__ (line 902) | def __init__(self, config, *inputs, **kwargs):
    method call (line 912) | def call(self, inputs, **kwargs):
  class TFAlbertForMultipleChoice (line 967) | class TFAlbertForMultipleChoice(TFAlbertPreTrainedModel):
    method __init__ (line 968) | def __init__(self, config, *inputs, **kwargs):
    method dummy_inputs (line 978) | def dummy_inputs(self):
    method call (line 987) | def call(

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_tf_auto.py
  class TFAutoModel (line 174) | class TFAutoModel(object):
    method __init__ (line 198) | def __init__(self):
    method from_config (line 206) | def from_config(cls, config):
    method from_pretrained (line 244) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class TFAutoModelForPreTraining (line 336) | class TFAutoModelForPreTraining(object):
    method __init__ (line 345) | def __init__(self):
    method from_config (line 353) | def from_config(cls, config):
    method from_pretrained (line 392) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class TFAutoModelWithLMHead (line 486) | class TFAutoModelWithLMHead(object):
    method __init__ (line 510) | def __init__(self):
    method from_config (line 518) | def from_config(cls, config):
    method from_pretrained (line 556) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class TFAutoModelForMultipleChoice (line 649) | class TFAutoModelForMultipleChoice:
    method __init__ (line 665) | def __init__(self):
    method from_config (line 673) | def from_config(cls, config):
    method from_pretrained (line 706) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class TFAutoModelForSequenceClassification (line 796) | class TFAutoModelForSequenceClassification(object):
    method __init__ (line 815) | def __init__(self):
    method from_config (line 823) | def from_config(cls, config):
    method from_pretrained (line 859) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class TFAutoModelForQuestionAnswering (line 952) | class TFAutoModelForQuestionAnswering(object):
    method __init__ (line 972) | def __init__(self):
    method from_config (line 980) | def from_config(cls, config):
    method from_pretrained (line 1017) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
  class TFAutoModelForTokenClassification (line 1111) | class TFAutoModelForTokenClassification:
    method __init__ (line 1112) | def __init__(self):
    method from_config (line 1120) | def from_config(cls, config):
    method from_pretrained (line 1155) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_tf_bert.py
  function gelu (line 58) | def gelu(x):
  function gelu_new (line 69) | def gelu_new(x):
  function swish (line 82) | def swish(x):
  class TFBertEmbeddings (line 94) | class TFBertEmbeddings(tf.keras.layers.Layer):
    method __init__ (line 98) | def __init__(self, config, **kwargs):
    method build (line 122) | def build(self, input_shape):
    method call (line 134) | def call(self, inputs, mode="embedding", training=False):
    method _embedding (line 156) | def _embedding(self, inputs, training=False):
    method _linear (line 181) | def _linear(self, inputs):
  class TFBertSelfAttention (line 197) | class TFBertSelfAttention(tf.keras.layers.Layer):
    method __init__ (line 198) | def __init__(self, config, **kwargs):
    method transpose_for_scores (line 224) | def transpose_for_scores(self, x, batch_size):
    method call (line 228) | def call(self, inputs, training=False):
  class TFBertSelfOutput (line 273) | class TFBertSelfOutput(tf.keras.layers.Layer):
    method __init__ (line 274) | def __init__(self, config, **kwargs):
    method call (line 282) | def call(self, inputs, training=False):
  class TFBertAttention (line 291) | class TFBertAttention(tf.keras.layers.Layer):
    method __init__ (line 292) | def __init__(self, config, **kwargs):
    method prune_heads (line 297) | def prune_heads(self, heads):
    method call (line 300) | def call(self, inputs, training=False):
  class TFBertIntermediate (line 309) | class TFBertIntermediate(tf.keras.layers.Layer):
    method __init__ (line 310) | def __init__(self, config, **kwargs):
    method call (line 320) | def call(self, hidden_states):
  class TFBertOutput (line 326) | class TFBertOutput(tf.keras.layers.Layer):
    method __init__ (line 327) | def __init__(self, config, **kwargs):
    method call (line 335) | def call(self, inputs, training=False):
  class TFBertLayer (line 344) | class TFBertLayer(tf.keras.layers.Layer):
    method __init__ (line 345) | def __init__(self, config, **kwargs):
    method call (line 351) | def call(self, inputs, training=False):
  class TFBertEncoder (line 362) | class TFBertEncoder(tf.keras.layers.Layer):
    method __init__ (line 363) | def __init__(self, config, **kwargs):
    method call (line 369) | def call(self, inputs, training=False):
  class TFBertPooler (line 396) | class TFBertPooler(tf.keras.layers.Layer):
    method __init__ (line 397) | def __init__(self, config, **kwargs):
    method call (line 406) | def call(self, hidden_states):
  class TFBertPredictionHeadTransform (line 414) | class TFBertPredictionHeadTransform(tf.keras.layers.Layer):
    method __init__ (line 415) | def __init__(self, config, **kwargs):
    method call (line 426) | def call(self, hidden_states):
  class TFBertLMPredictionHead (line 433) | class TFBertLMPredictionHead(tf.keras.layers.Layer):
    method __init__ (line 434) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 443) | def build(self, input_shape):
    method call (line 447) | def call(self, hidden_states):
  class TFBertMLMHead (line 454) | class TFBertMLMHead(tf.keras.layers.Layer):
    method __init__ (line 455) | def __init__(self, config, input_embeddings, **kwargs):
    method call (line 459) | def call(self, sequence_output):
  class TFBertNSPHead (line 464) | class TFBertNSPHead(tf.keras.layers.Layer):
    method __init__ (line 465) | def __init__(self, config, **kwargs):
    method call (line 471) | def call(self, pooled_output):
  class TFBertMainLayer (line 477) | class TFBertMainLayer(tf.keras.layers.Layer):
    method __init__ (line 480) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 488) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 491) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 494) | def _prune_heads(self, heads_to_prune):
    method call (line 501) | def call(
  class TFBertPreTrainedModel (line 583) | class TFBertPreTrainedModel(TFPreTrainedModel):
  class TFBertModel (line 667) | class TFBertModel(TFBertPreTrainedModel):
    method __init__ (line 668) | def __init__(self, config, *inputs, **kwargs):
    method call (line 673) | def call(self, inputs, **kwargs):
  class TFBertForPreTraining (line 718) | class TFBertForPreTraining(TFBertPreTrainedModel):
    method __init__ (line 719) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 726) | def get_output_embeddings(self):
    method call (line 730) | def call(self, inputs, **kwargs):
  class TFBertForMaskedLM (line 775) | class TFBertForMaskedLM(TFBertPreTrainedModel):
    method __init__ (line 776) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 782) | def get_output_embeddings(self):
    method call (line 786) | def call(self, inputs, **kwargs):
  class TFBertForNextSentencePrediction (line 828) | class TFBertForNextSentencePrediction(TFBertPreTrainedModel):
    method __init__ (line 829) | def __init__(self, config, *inputs, **kwargs):
    method call (line 836) | def call(self, inputs, **kwargs):
  class TFBertForSequenceClassification (line 883) | class TFBertForSequenceClassification(TFBertPreTrainedModel):
    method __init__ (line 884) | def __init__(self, config, *inputs, **kwargs):
    method call (line 895) | def call(self, inputs, **kwargs):
  class TFBertForMultipleChoice (line 941) | class TFBertForMultipleChoice(TFBertPreTrainedModel):
    method __init__ (line 942) | def __init__(self, config, *inputs, **kwargs):
    method dummy_inputs (line 952) | def dummy_inputs(self):
    method call (line 961) | def call(
  class TFBertForTokenClassification (line 1064) | class TFBertForTokenClassification(TFBertPreTrainedModel):
    method __init__ (line 1065) | def __init__(self, config, *inputs, **kwargs):
    method call (line 1076) | def call(self, inputs, **kwargs):
  class TFBertForQuestionAnswering (line 1122) | class TFBertForQuestionAnswering(TFBertPreTrainedModel):
    method __init__ (line 1123) | def __init__(self, config, *inputs, **kwargs):
    method call (line 1133) | def call(self, inputs, **kwargs):

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_tf_camembert.py
  class TFCamembertModel (line 70) | class TFCamembertModel(TFRobertaModel):
  class TFCamembertForMaskedLM (line 82) | class TFCamembertForMaskedLM(TFRobertaForMaskedLM):
  class TFCamembertForSequenceClassification (line 96) | class TFCamembertForSequenceClassification(TFRobertaForSequenceClassific...
  class TFCamembertForTokenClassification (line 110) | class TFCamembertForTokenClassification(TFRobertaForTokenClassification):

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_tf_ctrl.py
  function angle_defn (line 38) | def angle_defn(pos, i, d_model_size):
  function positional_encoding (line 43) | def positional_encoding(position, d_model_size):
  function scaled_dot_product_attention (line 55) | def scaled_dot_product_attention(q, k, v, mask, attention_mask=None, hea...
  class TFMultiHeadAttention (line 80) | class TFMultiHeadAttention(tf.keras.layers.Layer):
    method __init__ (line 81) | def __init__(self, d_model_size, num_heads, output_attentions=False, *...
    method split_into_heads (line 95) | def split_into_heads(self, x, batch_size):
    method call (line 99) | def call(self, inputs, training=False):
  function point_wise_feed_forward_network (line 142) | def point_wise_feed_forward_network(d_model_size, dff, name=""):
  class TFEncoderLayer (line 149) | class TFEncoderLayer(tf.keras.layers.Layer):
    method __init__ (line 150) | def __init__(
    method call (line 166) | def call(self, inputs, training=False):
  class TFCTRLMainLayer (line 186) | class TFCTRLMainLayer(tf.keras.layers.Layer):
    method __init__ (line 189) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 218) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 221) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 224) | def _prune_heads(self, heads_to_prune):
    method call (line 230) | def call(
  class TFCTRLPreTrainedModel (line 379) | class TFCTRLPreTrainedModel(TFPreTrainedModel):
  class TFCTRLModel (line 471) | class TFCTRLModel(TFCTRLPreTrainedModel):
    method __init__ (line 472) | def __init__(self, config, *inputs, **kwargs):
    method call (line 477) | def call(self, inputs, **kwargs):
  class TFCTRLLMHead (line 515) | class TFCTRLLMHead(tf.keras.layers.Layer):
    method __init__ (line 516) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 524) | def build(self, input_shape):
    method call (line 528) | def call(self, hidden_states):
  class TFCTRLLMHeadModel (line 539) | class TFCTRLLMHeadModel(TFCTRLPreTrainedModel):
    method __init__ (line 540) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 546) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 549) | def prepare_inputs_for_generation(self, inputs, past, **kwargs):
    method call (line 557) | def call(self, inputs, **kwargs):

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_tf_distilbert.py
  function gelu (line 46) | def gelu(x):
  function gelu_new (line 57) | def gelu_new(x):
  class TFEmbeddings (line 70) | class TFEmbeddings(tf.keras.layers.Layer):
    method __init__ (line 71) | def __init__(self, config, **kwargs):
    method build (line 89) | def build(self, input_shape):
    method call (line 99) | def call(self, inputs, inputs_embeds=None, mode="embedding", training=...
    method _embedding (line 121) | def _embedding(self, inputs, inputs_embeds=None, training=False):
    method _linear (line 156) | def _linear(self, inputs):
  class TFMultiHeadSelfAttention (line 172) | class TFMultiHeadSelfAttention(tf.keras.layers.Layer):
    method __init__ (line 173) | def __init__(self, config, **kwargs):
    method prune_heads (line 198) | def prune_heads(self, heads):
    method call (line 201) | def call(self, inputs, training=False):
  class TFFFN (line 262) | class TFFFN(tf.keras.layers.Layer):
    method __init__ (line 263) | def __init__(self, config, **kwargs):
    method call (line 279) | def call(self, input, training=False):
  class TFTransformerBlock (line 287) | class TFTransformerBlock(tf.keras.layers.Layer):
    method __init__ (line 288) | def __init__(self, config, **kwargs):
    method call (line 306) | def call(self, inputs, training=False):  # removed: src_enc=None, src_...
  class TFTransformer (line 341) | class TFTransformer(tf.keras.layers.Layer):
    method __init__ (line 342) | def __init__(self, config, **kwargs):
    method call (line 350) | def call(self, inputs, training=False):
  class TFDistilBertMainLayer (line 402) | class TFDistilBertMainLayer(tf.keras.layers.Layer):
    method __init__ (line 403) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 410) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 413) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 416) | def _prune_heads(self, heads_to_prune):
    method call (line 419) | def call(self, inputs, attention_mask=None, head_mask=None, inputs_emb...
  class TFDistilBertPreTrainedModel (line 465) | class TFDistilBertPreTrainedModel(TFPreTrainedModel):
  class TFDistilBertModel (line 539) | class TFDistilBertModel(TFDistilBertPreTrainedModel):
    method __init__ (line 540) | def __init__(self, config, *inputs, **kwargs):
    method call (line 545) | def call(self, inputs, **kwargs):
  class TFDistilBertLMHead (line 577) | class TFDistilBertLMHead(tf.keras.layers.Layer):
    method __init__ (line 578) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 586) | def build(self, input_shape):
    method call (line 590) | def call(self, hidden_states):
  class TFDistilBertForMaskedLM (line 599) | class TFDistilBertForMaskedLM(TFDistilBertPreTrainedModel):
    method __init__ (line 600) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 614) | def get_output_embeddings(self):
    method call (line 618) | def call(self, inputs, **kwargs):
  class TFDistilBertForSequenceClassification (line 665) | class TFDistilBertForSequenceClassification(TFDistilBertPreTrainedModel):
    method __init__ (line 666) | def __init__(self, config, *inputs, **kwargs):
    method call (line 683) | def call(self, inputs, **kwargs):
  class TFDistilBertForTokenClassification (line 729) | class TFDistilBertForTokenClassification(TFDistilBertPreTrainedModel):
    method __init__ (line 730) | def __init__(self, config, *inputs, **kwargs):
    method call (line 741) | def call(self, inputs, **kwargs):
  class TFDistilBertForQuestionAnswering (line 786) | class TFDistilBertForQuestionAnswering(TFDistilBertPreTrainedModel):
    method __init__ (line 787) | def __init__(self, config, *inputs, **kwargs):
    method call (line 798) | def call(self, inputs, **kwargs):

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_tf_electra.py
  class TFElectraEmbeddings (line 27) | class TFElectraEmbeddings(tf.keras.layers.Layer):
    method __init__ (line 31) | def __init__(self, config, **kwargs):
    method build (line 55) | def build(self, input_shape):
    method call (line 67) | def call(self, inputs, mode="embedding", training=False):
    method _embedding (line 89) | def _embedding(self, inputs, training=False):
    method _linear (line 114) | def _linear(self, inputs):
  class TFElectraDiscriminatorPredictions (line 130) | class TFElectraDiscriminatorPredictions(tf.keras.layers.Layer):
    method __init__ (line 131) | def __init__(self, config, **kwargs):
    method call (line 138) | def call(self, discriminator_hidden_states, training=False):
  class TFElectraGeneratorPredictions (line 146) | class TFElectraGeneratorPredictions(tf.keras.layers.Layer):
    method __init__ (line 147) | def __init__(self, config, **kwargs):
    method call (line 153) | def call(self, generator_hidden_states, training=False):
  class TFElectraPreTrainedModel (line 161) | class TFElectraPreTrainedModel(TFBertPreTrainedModel):
    method get_extended_attention_mask (line 166) | def get_extended_attention_mask(self, attention_mask, input_shape):
    method get_head_mask (line 188) | def get_head_mask(self, head_mask):
  class TFElectraMainLayer (line 197) | class TFElectraMainLayer(TFElectraPreTrainedModel):
    method __init__ (line 201) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 210) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 213) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 216) | def _prune_heads(self, heads_to_prune):
    method call (line 223) | def call(
  class TFElectraModel (line 348) | class TFElectraModel(TFElectraPreTrainedModel):
    method __init__ (line 349) | def __init__(self, config, *inputs, **kwargs):
    method get_input_embeddings (line 353) | def get_input_embeddings(self):
    method call (line 357) | def call(self, inputs, **kwargs):
  class TFElectraForPreTraining (line 398) | class TFElectraForPreTraining(TFElectraPreTrainedModel):
    method __init__ (line 399) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 405) | def get_input_embeddings(self):
    method call (line 409) | def call(
  class TFElectraMaskedLMHead (line 458) | class TFElectraMaskedLMHead(tf.keras.layers.Layer):
    method __init__ (line 459) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 464) | def build(self, input_shape):
    method call (line 468) | def call(self, hidden_states, training=False):
  class TFElectraForMaskedLM (line 482) | class TFElectraForMaskedLM(TFElectraPreTrainedModel):
    method __init__ (line 483) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 495) | def get_input_embeddings(self):
    method get_output_embeddings (line 498) | def get_output_embeddings(self):
    method call (line 502) | def call(
  class TFElectraForTokenClassification (line 560) | class TFElectraForTokenClassification(TFElectraPreTrainedModel):
    method __init__ (line 561) | def __init__(self, config, **kwargs):
    method call (line 569) | def call(

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_tf_flaubert.py
  class TFFlaubertModel (line 107) | class TFFlaubertModel(TFXLMModel):
    method __init__ (line 110) | def __init__(self, config, *inputs, **kwargs):
  class TFFlaubertMainLayer (line 115) | class TFFlaubertMainLayer(TFXLMMainLayer):
    method __init__ (line 116) | def __init__(self, config, *inputs, **kwargs):
    method call (line 121) | def call(
  class TFFlaubertWithLMHeadModel (line 311) | class TFFlaubertWithLMHeadModel(TFXLMWithLMHeadModel):
    method __init__ (line 314) | def __init__(self, config, *inputs, **kwargs):
  class TFFlaubertForSequenceClassification (line 324) | class TFFlaubertForSequenceClassification(TFXLMForSequenceClassification):
    method __init__ (line 327) | def __init__(self, config, *inputs, **kwargs):

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_tf_gpt2.py
  function gelu (line 50) | def gelu(x):
  class TFAttention (line 63) | class TFAttention(tf.keras.layers.Layer):
    method __init__ (line 64) | def __init__(self, nx, n_ctx, config, scale=False, **kwargs):
    method prune_heads (line 82) | def prune_heads(self, heads):
    method causal_attention_mask (line 86) | def causal_attention_mask(nd, ns, dtype):
    method _attn (line 95) | def _attn(self, inputs, training=False):
    method merge_heads (line 125) | def merge_heads(self, x):
    method split_heads (line 131) | def split_heads(self, x):
    method call (line 137) | def call(self, inputs, training=False):
  class TFMLP (line 175) | class TFMLP(tf.keras.layers.Layer):
    method __init__ (line 176) | def __init__(self, n_state, config, **kwargs):
    method call (line 184) | def call(self, x, training=False):
  class TFBlock (line 191) | class TFBlock(tf.keras.layers.Layer):
    method __init__ (line 192) | def __init__(self, n_ctx, config, scale=False, **kwargs):
    method call (line 200) | def call(self, inputs, training=False):
  class TFGPT2MainLayer (line 217) | class TFGPT2MainLayer(tf.keras.layers.Layer):
    method __init__ (line 220) | def __init__(self, config, *inputs, **kwargs):
    method get_input_embeddings (line 241) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 244) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 247) | def _prune_heads(self, heads_to_prune):
    method call (line 253) | def call(
  class TFGPT2PreTrainedModel (line 387) | class TFGPT2PreTrainedModel(TFPreTrainedModel):
  class TFGPT2Model (line 475) | class TFGPT2Model(TFGPT2PreTrainedModel):
    method __init__ (line 476) | def __init__(self, config, *inputs, **kwargs):
    method call (line 481) | def call(self, inputs, **kwargs):
  class TFGPT2LMHeadModel (line 524) | class TFGPT2LMHeadModel(TFGPT2PreTrainedModel):
    method __init__ (line 525) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 529) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 532) | def prepare_inputs_for_generation(self, inputs, past, **kwargs):
    method call (line 540) | def call(self, inputs, **kwargs):
  class TFGPT2DoubleHeadsModel (line 593) | class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel):
    method __init__ (line 594) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 602) | def get_output_embeddings(self):
    method call (line 606) | def call(

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_tf_openai.py
  function gelu (line 45) | def gelu(x):
  function swish (line 58) | def swish(x):
  class TFAttention (line 69) | class TFAttention(tf.keras.layers.Layer):
    method __init__ (line 70) | def __init__(self, nx, n_ctx, config, scale=False, **kwargs):
    method prune_heads (line 88) | def prune_heads(self, heads):
    method causal_attention_mask (line 92) | def causal_attention_mask(nd, ns, dtype):
    method _attn (line 101) | def _attn(self, inputs, training=False):
    method merge_heads (line 131) | def merge_heads(self, x):
    method split_heads (line 137) | def split_heads(self, x):
    method call (line 143) | def call(self, inputs, training=False):
  class TFMLP (line 163) | class TFMLP(tf.keras.layers.Layer):
    method __init__ (line 164) | def __init__(self, n_state, config, **kwargs):
    method call (line 172) | def call(self, x, training=False):
  class TFBlock (line 179) | class TFBlock(tf.keras.layers.Layer):
    method __init__ (line 180) | def __init__(self, n_ctx, config, scale=False, **kwargs):
    method call (line 188) | def call(self, inputs, training=False):
  class TFOpenAIGPTMainLayer (line 202) | class TFOpenAIGPTMainLayer(tf.keras.layers.Layer):
    method __init__ (line 203) | def __init__(self, config, *inputs, **kwargs):
    method get_input_embeddings (line 223) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 226) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 229) | def _prune_heads(self, heads_to_prune):
    method call (line 235) | def call(
  class TFOpenAIGPTPreTrainedModel (line 349) | class TFOpenAIGPTPreTrainedModel(TFPreTrainedModel):
  class TFOpenAIGPTModel (line 430) | class TFOpenAIGPTModel(TFOpenAIGPTPreTrainedModel):
    method __init__ (line 431) | def __init__(self, config, *inputs, **kwargs):
    method call (line 436) | def call(self, inputs, **kwargs):
  class TFOpenAIGPTLMHeadModel (line 475) | class TFOpenAIGPTLMHeadModel(TFOpenAIGPTPreTrainedModel):
    method __init__ (line 476) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 480) | def get_output_embeddings(self):
    method call (line 484) | def call(self, inputs, **kwargs):
  class TFOpenAIGPTDoubleHeadsModel (line 532) | class TFOpenAIGPTDoubleHeadsModel(TFOpenAIGPTPreTrainedModel):
    method __init__ (line 533) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 541) | def get_output_embeddings(self):
    method call (line 545) | def call(

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_tf_pytorch_utils.py
  function convert_tf_weight_name_to_pt_weight_name (line 29) | def convert_tf_weight_name_to_pt_weight_name(tf_name, start_prefix_to_re...
  function load_pytorch_checkpoint_in_tf2_model (line 73) | def load_pytorch_checkpoint_in_tf2_model(tf_model, pytorch_checkpoint_pa...
  function load_pytorch_model_in_tf2_model (line 97) | def load_pytorch_model_in_tf2_model(tf_model, pt_model, tf_inputs=None, ...
  function load_pytorch_weights_in_tf2_model (line 107) | def load_pytorch_weights_in_tf2_model(tf_model, pt_state_dict, tf_inputs...
  function load_tf2_checkpoint_in_pytorch_model (line 205) | def load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path, t...
  function load_tf2_model_in_pytorch_model (line 240) | def load_tf2_model_in_pytorch_model(pt_model, tf_model, allow_missing_ke...
  function load_tf2_weights_in_pytorch_model (line 248) | def load_tf2_weights_in_pytorch_model(pt_model, tf_weights, allow_missin...

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_tf_roberta.py
  class TFRobertaEmbeddings (line 40) | class TFRobertaEmbeddings(TFBertEmbeddings):
    method __init__ (line 45) | def __init__(self, config, **kwargs):
    method create_position_ids_from_input_ids (line 49) | def create_position_ids_from_input_ids(self, x):
    method create_position_ids_from_inputs_embeds (line 60) | def create_position_ids_from_inputs_embeds(self, inputs_embeds):
    method _embedding (line 71) | def _embedding(self, inputs, training=False):
  class TFRobertaMainLayer (line 85) | class TFRobertaMainLayer(TFBertMainLayer):
    method __init__ (line 90) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 94) | def get_input_embeddings(self):
  class TFRobertaPreTrainedModel (line 98) | class TFRobertaPreTrainedModel(TFPreTrainedModel):
  class TFRobertaModel (line 182) | class TFRobertaModel(TFRobertaPreTrainedModel):
    method __init__ (line 183) | def __init__(self, config, *inputs, **kwargs):
    method call (line 188) | def call(self, inputs, **kwargs):
  class TFRobertaLMHead (line 228) | class TFRobertaLMHead(tf.keras.layers.Layer):
    method __init__ (line 231) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 244) | def build(self, input_shape):
    method call (line 248) | def call(self, features):
  class TFRobertaForMaskedLM (line 260) | class TFRobertaForMaskedLM(TFRobertaPreTrainedModel):
    method __init__ (line 261) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 267) | def get_output_embeddings(self):
    method call (line 271) | def call(self, inputs, **kwargs):
  class TFRobertaClassificationHead (line 310) | class TFRobertaClassificationHead(tf.keras.layers.Layer):
    method __init__ (line 313) | def __init__(self, config, **kwargs):
    method call (line 326) | def call(self, features, training=False):
  class TFRobertaForSequenceClassification (line 340) | class TFRobertaForSequenceClassification(TFRobertaPreTrainedModel):
    method __init__ (line 341) | def __init__(self, config, *inputs, **kwargs):
    method call (line 349) | def call(self, inputs, **kwargs):
  class TFRobertaForTokenClassification (line 394) | class TFRobertaForTokenClassification(TFRobertaPreTrainedModel):
    method __init__ (line 395) | def __init__(self, config, *inputs, **kwargs):
    method call (line 406) | def call(self, inputs, **kwargs):
  class TFRobertaForQuestionAnswering (line 451) | class TFRobertaForQuestionAnswering(TFRobertaPreTrainedModel):
    method __init__ (line 452) | def __init__(self, config, *inputs, **kwargs):
    method call (line 462) | def call(self, inputs, **kwargs):

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_tf_t5.py
  class TFT5LayerNorm (line 49) | class TFT5LayerNorm(tf.keras.layers.Layer):
    method __init__ (line 50) | def __init__(self, epsilon=1e-6, **kwargs):
    method build (line 57) | def build(self, input_shape):
    method call (line 62) | def call(self, x):
  class TFT5DenseReluDense (line 68) | class TFT5DenseReluDense(tf.keras.layers.Layer):
    method __init__ (line 69) | def __init__(self, config, **kwargs):
    method call (line 76) | def call(self, hidden_states, training=False):
  class TFT5LayerFF (line 84) | class TFT5LayerFF(tf.keras.layers.Layer):
    method __init__ (line 85) | def __init__(self, config, **kwargs):
    method call (line 91) | def call(self, hidden_states, training=False):
  class TFT5Attention (line 98) | class TFT5Attention(tf.keras.layers.Layer):
    method __init__ (line 101) | def __init__(self, config, has_relative_attention_bias=False, **kwargs):
    method prune_heads (line 127) | def prune_heads(self, heads):
    method _relative_position_bucket (line 131) | def _relative_position_bucket(relative_position, bidirectional=True, n...
    method compute_bias (line 176) | def compute_bias(self, qlen, klen):
    method call (line 188) | def call(
  class TFT5LayerSelfAttention (line 302) | class TFT5LayerSelfAttention(tf.keras.layers.Layer):
    method __init__ (line 303) | def __init__(self, config, has_relative_attention_bias=False, **kwargs):
    method call (line 311) | def call(
  class TFT5LayerCrossAttention (line 337) | class TFT5LayerCrossAttention(tf.keras.layers.Layer):
    method __init__ (line 338) | def __init__(self, config, has_relative_attention_bias=False, **kwargs):
    method call (line 346) | def call(
  class TFT5Block (line 376) | class TFT5Block(tf.keras.layers.Layer):
    method __init__ (line 377) | def __init__(self, config, has_relative_attention_bias=False, **kwargs):
    method call (line 393) | def call(
  class _NoLayerEmbedTokens (line 471) | class _NoLayerEmbedTokens(object):
    method __init__ (line 478) | def __init__(self, layer, abs_scope_name=None):
    method call (line 482) | def call(self, inputs, mode="embedding"):
    method __call__ (line 491) | def __call__(self, inputs, mode="embedding"):
  class TFT5MainLayer (line 505) | class TFT5MainLayer(tf.keras.layers.Layer):
    method __init__ (line 506) | def __init__(self, config, embed_tokens=None, **kwargs):
    method get_input_embeddings (line 524) | def get_input_embeddings(self):
    method get_output_embeddings (line 527) | def get_output_embeddings(self):
    method set_embed_tokens (line 530) | def set_embed_tokens(self, embed_tokens):
    method _resize_token_embeddings (line 533) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 536) | def _prune_heads(self, heads_to_prune):
    method call (line 539) | def call(
  class TFT5PreTrainedModel (line 718) | class TFT5PreTrainedModel(TFPreTrainedModel):
    method dummy_inputs (line 727) | def dummy_inputs(self):
  class TFT5Model (line 828) | class TFT5Model(TFT5PreTrainedModel):
    method __init__ (line 829) | def __init__(self, config, *inputs, **kwargs):
    method get_input_embeddings (line 846) | def get_input_embeddings(self):
    method get_output_embeddings (line 849) | def get_output_embeddings(self):
    method get_encoder (line 852) | def get_encoder(self):
    method get_decoder (line 855) | def get_decoder(self):
    method call (line 859) | def call(self, inputs, **kwargs):
  class TFT5ForConditionalGeneration (line 947) | class TFT5ForConditionalGeneration(TFT5PreTrainedModel):
    method __init__ (line 948) | def __init__(self, config, *inputs, **kwargs):
    method get_input_embeddings (line 967) | def get_input_embeddings(self):
    method get_output_embeddings (line 970) | def get_output_embeddings(self):
    method get_encoder (line 973) | def get_encoder(self):
    method get_decoder (line 976) | def get_decoder(self):
    method call (line 980) | def call(self, inputs, **kwargs):
    method prepare_inputs_for_generation (line 1079) | def prepare_inputs_for_generation(self, inputs, past, attention_mask, ...
    method _reorder_cache (line 1097) | def _reorder_cache(self, past, beam_idx):

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_tf_transfo_xl.py
  class TFPositionalEmbedding (line 39) | class TFPositionalEmbedding(tf.keras.layers.Layer):
    method __init__ (line 40) | def __init__(self, demb, **kwargs):
    method call (line 45) | def call(self, pos_seq, bsz=None):
  class TFPositionwiseFF (line 55) | class TFPositionwiseFF(tf.keras.layers.Layer):
    method __init__ (line 56) | def __init__(self, d_model, d_inner, dropout, pre_lnorm=False, layer_n...
    method call (line 74) | def call(self, inp, training=False):
  class TFRelPartialLearnableMultiHeadAttn (line 98) | class TFRelPartialLearnableMultiHeadAttn(tf.keras.layers.Layer):
    method __init__ (line 99) | def __init__(
    method build (line 152) | def build(self, input_shape):
    method _rel_shift (line 162) | def _rel_shift(self, x):
    method call (line 172) | def call(self, inputs, training=False):
  class TFRelPartialLearnableDecoderLayer (line 252) | class TFRelPartialLearnableDecoderLayer(tf.keras.layers.Layer):
    method __init__ (line 253) | def __init__(
    method call (line 301) | def call(self, inputs, training=False):
  class TFAdaptiveEmbedding (line 311) | class TFAdaptiveEmbedding(tf.keras.layers.Layer):
    method __init__ (line 312) | def __init__(self, n_token, d_embed, d_proj, cutoffs, div_val=1, init_...
    method build (line 344) | def build(self, input_shape):
    method call (line 357) | def call(self, inp):
  class TFTransfoXLMainLayer (line 384) | class TFTransfoXLMainLayer(tf.keras.layers.Layer):
    method __init__ (line 387) | def __init__(self, config, **kwargs):
    method build (line 455) | def build(self, input_shape):
    method get_input_embeddings (line 465) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 468) | def _resize_token_embeddings(self, new_num_tokens):
    method backward_compatible (line 471) | def backward_compatible(self):
    method reset_length (line 474) | def reset_length(self, tgt_len, ext_len, mem_len):
    method _prune_heads (line 479) | def _prune_heads(self, heads):
    method init_mems (line 482) | def init_mems(self, bsz):
    method _update_mems (line 493) | def _update_mems(self, hids, mems, mlen, qlen):
    method call (line 517) | def call(self, inputs, mems=None, head_mask=None, inputs_embeds=None, ...
  class TFTransfoXLPreTrainedModel (line 628) | class TFTransfoXLPreTrainedModel(TFPreTrainedModel):
  class TFTransfoXLModel (line 693) | class TFTransfoXLModel(TFTransfoXLPreTrainedModel):
    method __init__ (line 694) | def __init__(self, config, *inputs, **kwargs):
    method call (line 699) | def call(self, inputs, **kwargs):
  class TFTransfoXLLMHead (line 737) | class TFTransfoXLLMHead(tf.keras.layers.Layer):
    method __init__ (line 738) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 746) | def build(self, input_shape):
    method call (line 750) | def call(self, hidden_states):
  class TFTransfoXLLMHeadModel (line 761) | class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel):
    method __init__ (line 762) | def __init__(self, config):
    method get_output_embeddings (line 774) | def get_output_embeddings(self):
    method reset_length (line 781) | def reset_length(self, tgt_len, ext_len, mem_len):
    method init_mems (line 784) | def init_mems(self, bsz):
    method call (line 788) | def call(self, inputs, mems=None, head_mask=None, inputs_embeds=None, ...
    method prepare_inputs_for_generation (line 855) | def prepare_inputs_for_generation(self, inputs, past, **model_kwargs):

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_tf_transfo_xl_utilities.py
  class TFAdaptiveSoftmaxMask (line 25) | class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer):
    method __init__ (line 26) | def __init__(self, vocab_size, d_embed, d_proj, cutoffs, div_val=1, ke...
    method build (line 45) | def build(self, input_shape):
    method _logit (line 104) | def _logit(x, W, b, proj=None):
    method _gather_logprob (line 111) | def _gather_logprob(logprob, target):
    method call (line 117) | def call(self, inputs, return_mean=True, training=False):

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_tf_utils.py
  class TFModelUtilsMixin (line 34) | class TFModelUtilsMixin:
    method num_parameters (line 39) | def num_parameters(self, only_trainable: bool = False) -> int:
  function keras_serializable (line 49) | def keras_serializable(cls):
  class TFPreTrainedModel (line 107) | class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin):
    method dummy_inputs (line 127) | def dummy_inputs(self):
    method __init__ (line 135) | def __init__(self, config, *inputs, **kwargs):
    method get_input_embeddings (line 148) | def get_input_embeddings(self):
    method get_output_embeddings (line 162) | def get_output_embeddings(self):
    method _get_resized_embeddings (line 172) | def _get_resized_embeddings(self, old_embeddings, new_num_tokens=None):
    method resize_token_embeddings (line 206) | def resize_token_embeddings(self, new_num_tokens=None):
    method prune_heads (line 221) | def prune_heads(self, heads_to_prune):
    method save_pretrained (line 230) | def save_pretrained(self, save_directory):
    method from_pretrained (line 247) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
    method prepare_inputs_for_generation (line 438) | def prepare_inputs_for_generation(self, inputs, **kwargs):
    method _use_cache (line 441) | def _use_cache(self, outputs, use_cache):
    method generate (line 449) | def generate(
    method _generate_no_beam_search (line 810) | def _generate_no_beam_search(
    method _generate_beam_search (line 973) | def _generate_beam_search(
    method _reorder_cache (line 1294) | def _reorder_cache(past, beam_idx):
  function _create_next_token_logits_penalties (line 1298) | def _create_next_token_logits_penalties(input_ids, logits, repetition_pe...
  function calc_banned_ngram_tokens (line 1312) | def calc_banned_ngram_tokens(prev_input_ids, num_hypos, no_repeat_ngram_...
  function calc_banned_bad_words_ids (line 1335) | def calc_banned_bad_words_ids(prev_input_ids, bad_words_ids):
  function tf_top_k_top_p_filtering (line 1371) | def tf_top_k_top_p_filtering(logits, top_k=0, top_p=1.0, filter_value=-f...
  function scatter_values_on_batch_indices (line 1421) | def scatter_values_on_batch_indices(values, batch_indices):
  function set_tensor_by_indices_to_value (line 1431) | def set_tensor_by_indices_to_value(tensor, indices, value):
  class BeamHypotheses (line 1437) | class BeamHypotheses(object):
    method __init__ (line 1438) | def __init__(self, num_beams, max_length, length_penalty, early_stoppi...
    method __len__ (line 1449) | def __len__(self):
    method add (line 1455) | def add(self, hyp, sum_logprobs):
    method is_done (line 1469) | def is_done(self, best_sum_logprobs, cur_len=None):
  class TFConv1D (line 1487) | class TFConv1D(tf.keras.layers.Layer):
    method __init__ (line 1488) | def __init__(self, nf, nx, initializer_range=0.02, **kwargs):
    method build (line 1497) | def build(self, input_shape):
    method call (line 1503) | def call(self, x):
  class TFSharedEmbeddings (line 1514) | class TFSharedEmbeddings(tf.keras.layers.Layer):
    method __init__ (line 1518) | def __init__(self, vocab_size, hidden_size, initializer_range=None, **...
    method build (line 1524) | def build(self, input_shape):
    method call (line 1534) | def call(self, inputs, mode="embedding"):
    method _embedding (line 1556) | def _embedding(self, input_ids):
    method _linear (line 1560) | def _linear(self, inputs):
  class TFSequenceSummary (line 1575) | class TFSequenceSummary(tf.keras.layers.Layer):
    method __init__ (line 1591) | def __init__(self, config, initializer_range=0.02, **kwargs):
    method call (line 1623) | def call(self, inputs, training=False):
  function shape_list (line 1682) | def shape_list(x):
  function get_initializer (line 1689) | def get_initializer(initializer_range=0.02):

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_tf_xlm.py
  function create_sinusoidal_embeddings (line 49) | def create_sinusoidal_embeddings(n_pos, dim, out):
  function gelu (line 55) | def gelu(x):
  function get_masks (line 66) | def get_masks(slen, lengths, causal, padding_mask=None, dtype=tf.float32):
  class TFMultiHeadAttention (line 97) | class TFMultiHeadAttention(tf.keras.layers.Layer):
    method __init__ (line 101) | def __init__(self, n_heads, dim, config, **kwargs):
    method prune_heads (line 116) | def prune_heads(self, heads):
    method call (line 119) | def call(self, inputs, training=False):
  class TFTransformerFFN (line 185) | class TFTransformerFFN(tf.keras.layers.Layer):
    method __init__ (line 186) | def __init__(self, in_dim, dim_hidden, out_dim, config, **kwargs):
    method call (line 193) | def call(self, input, training=False):
  class TFXLMMainLayer (line 201) | class TFXLMMainLayer(tf.keras.layers.Layer):
    method __init__ (line 202) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 292) | def get_input_embeddings(self):
    method _resize_token_embeddings (line 295) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 298) | def _prune_heads(self, heads_to_prune):
    method call (line 305) | def call(
  class TFXLMPreTrainedModel (line 468) | class TFXLMPreTrainedModel(TFPreTrainedModel):
    method dummy_inputs (line 477) | def dummy_inputs(self):
  class TFXLMModel (line 574) | class TFXLMModel(TFXLMPreTrainedModel):
    method __init__ (line 575) | def __init__(self, config, *inputs, **kwargs):
    method call (line 580) | def call(self, inputs, **kwargs):
  class TFXLMPredLayer (line 614) | class TFXLMPredLayer(tf.keras.layers.Layer):
    method __init__ (line 619) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 636) | def build(self, input_shape):
    method call (line 641) | def call(self, hidden_states):
  class TFXLMWithLMHeadModel (line 652) | class TFXLMWithLMHeadModel(TFXLMPreTrainedModel):
    method __init__ (line 653) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 658) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 661) | def prepare_inputs_for_generation(self, inputs, **kwargs):
    method call (line 676) | def call(self, inputs, **kwargs):
  class TFXLMForSequenceClassification (line 720) | class TFXLMForSequenceClassification(TFXLMPreTrainedModel):
    method __init__ (line 721) | def __init__(self, config, *inputs, **kwargs):
    method call (line 729) | def call(self, inputs, **kwargs):
  class TFXLMForQuestionAnsweringSimple (line 774) | class TFXLMForQuestionAnsweringSimple(TFXLMPreTrainedModel):
    method __init__ (line 775) | def __init__(self, config, *inputs, **kwargs):
    method call (line 783) | def call(self, inputs, **kwargs):

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_tf_xlm_roberta.py
  class TFXLMRobertaModel (line 70) | class TFXLMRobertaModel(TFRobertaModel):
  class TFXLMRobertaForMaskedLM (line 82) | class TFXLMRobertaForMaskedLM(TFRobertaForMaskedLM):
  class TFXLMRobertaForSequenceClassification (line 96) | class TFXLMRobertaForSequenceClassification(TFRobertaForSequenceClassifi...
  class TFXLMRobertaForTokenClassification (line 110) | class TFXLMRobertaForTokenClassification(TFRobertaForTokenClassification):

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_tf_xlnet.py
  function gelu (line 47) | def gelu(x):
  function swish (line 56) | def swish(x):
  class TFXLNetRelativeAttention (line 67) | class TFXLNetRelativeAttention(tf.keras.layers.Layer):
    method __init__ (line 68) | def __init__(self, config, **kwargs):
    method build (line 87) | def build(self, input_shape):
    method prune_heads (line 118) | def prune_heads(self, heads):
    method rel_shift (line 121) | def rel_shift(self, x, klen=-1):
    method rel_attn_core (line 133) | def rel_attn_core(self, inputs, training=False):
    method post_attention (line 178) | def post_attention(self, inputs, residual=True, training=False):
    method call (line 193) | def call(self, inputs, training=False):
  class TFXLNetFeedForward (line 290) | class TFXLNetFeedForward(tf.keras.layers.Layer):
    method __init__ (line 291) | def __init__(self, config, **kwargs):
    method call (line 306) | def call(self, inp, training=False):
  class TFXLNetLayer (line 317) | class TFXLNetLayer(tf.keras.layers.Layer):
    method __init__ (line 318) | def __init__(self, config, **kwargs):
    method call (line 324) | def call(self, inputs, training=False):
  class TFXLNetLMHead (line 336) | class TFXLNetLMHead(tf.keras.layers.Layer):
    method __init__ (line 337) | def __init__(self, config, input_embeddings, **kwargs):
    method build (line 344) | def build(self, input_shape):
    method call (line 348) | def call(self, hidden_states):
  class TFXLNetMainLayer (line 355) | class TFXLNetMainLayer(tf.keras.layers.Layer):
    method __init__ (line 358) | def __init__(self, config, **kwargs):
    method get_input_embeddings (line 380) | def get_input_embeddings(self):
    method build (line 383) | def build(self, input_shape):
    method _resize_token_embeddings (line 389) | def _resize_token_embeddings(self, new_num_tokens):
    method _prune_heads (line 392) | def _prune_heads(self, heads_to_prune):
    method create_mask (line 395) | def create_mask(self, qlen, mlen, dtype=tf.float32):
    method cache_mem (line 424) | def cache_mem(self, curr_out, prev_mem):
    method positional_embedding (line 437) | def positional_embedding(pos_seq, inv_freq, bsz=None):
    method relative_positional_encoding (line 447) | def relative_positional_encoding(self, qlen, klen, bsz=None, dtype=None):
    method call (line 495) | def call(
  class TFXLNetPreTrainedModel (line 699) | class TFXLNetPreTrainedModel(TFPreTrainedModel):
  class TFXLNetModel (line 795) | class TFXLNetModel(TFXLNetPreTrainedModel):
    method __init__ (line 796) | def __init__(self, config, *inputs, **kwargs):
    method call (line 801) | def call(self, inputs, **kwargs):
  class TFXLNetLMHeadModel (line 844) | class TFXLNetLMHeadModel(TFXLNetPreTrainedModel):
    method __init__ (line 845) | def __init__(self, config, *inputs, **kwargs):
    method get_output_embeddings (line 850) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 853) | def prepare_inputs_for_generation(self, inputs, past, **kwargs):
    method call (line 885) | def call(self, inputs, **kwargs):
  class TFXLNetForSequenceClassification (line 941) | class TFXLNetForSequenceClassification(TFXLNetPreTrainedModel):
    method __init__ (line 942) | def __init__(self, config, *inputs, **kwargs):
    method call (line 955) | def call(self, inputs, **kwargs):
  class TFXLNetForTokenClassification (line 1005) | class TFXLNetForTokenClassification(TFXLNetPreTrainedModel):
    method __init__ (line 1006) | def __init__(self, config, *inputs, **kwargs):
    method call (line 1015) | def call(self, inputs, **kwargs):
  class TFXLNetForQuestionAnsweringSimple (line 1064) | class TFXLNetForQuestionAnsweringSimple(TFXLNetPreTrainedModel):
    method __init__ (line 1065) | def __init__(self, config, *inputs, **kwargs):
    method call (line 1073) | def call(self, inputs, **kwargs):

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_transfo_xl.py
  function build_tf_to_pytorch_map (line 42) | def build_tf_to_pytorch_map(model, config):
  function load_tf_weights_in_transfo_xl (line 109) | def load_tf_weights_in_transfo_xl(model, config, tf_path):
  class PositionalEmbedding (line 167) | class PositionalEmbedding(nn.Module):
    method __init__ (line 168) | def __init__(self, demb):
    method forward (line 176) | def forward(self, pos_seq, bsz=None):
  class PositionwiseFF (line 186) | class PositionwiseFF(nn.Module):
    method __init__ (line 187) | def __init__(self, d_model, d_inner, dropout, pre_lnorm=False, layer_n...
    method forward (line 206) | def forward(self, inp):
  class RelPartialLearnableMultiHeadAttn (line 223) | class RelPartialLearnableMultiHeadAttn(nn.Module):
    method __init__ (line 224) | def __init__(
    method _rel_shift (line 269) | def _rel_shift(self, x):
    method forward (line 281) | def forward(self, w, r, attn_mask=None, mems=None, head_mask=None):
  class RelPartialLearnableDecoderLayer (line 370) | class RelPartialLearnableDecoderLayer(nn.Module):
    method __init__ (line 371) | def __init__(self, n_head, d_model, d_head, d_inner, dropout, layer_no...
    method forward (line 381) | def forward(self, dec_inp, r, dec_attn_mask=None, mems=None, head_mask...
  class AdaptiveEmbedding (line 391) | class AdaptiveEmbedding(nn.Module):
    method __init__ (line 392) | def __init__(self, n_token, d_embed, d_proj, cutoffs, div_val=1, sampl...
    method forward (line 419) | def forward(self, inp):
  class TransfoXLPreTrainedModel (line 451) | class TransfoXLPreTrainedModel(PreTrainedModel):
    method _init_weight (line 460) | def _init_weight(self, weight):
    method _init_bias (line 466) | def _init_bias(self, bias):
    method _init_weights (line 469) | def _init_weights(self, m):
  class TransfoXLModel (line 552) | class TransfoXLModel(TransfoXLPreTrainedModel):
    method __init__ (line 553) | def __init__(self, config):
    method get_input_embeddings (line 618) | def get_input_embeddings(self):
    method set_input_embeddings (line 621) | def set_input_embeddings(self, new_embeddings):
    method backward_compatible (line 624) | def backward_compatible(self):
    method reset_length (line 627) | def reset_length(self, tgt_len, ext_len, mem_len):
    method _prune_heads (line 632) | def _prune_heads(self, heads):
    method init_mems (line 636) | def init_mems(self, bsz):
    method _update_mems (line 648) | def _update_mems(self, hids, mems, mlen, qlen):
    method forward (line 673) | def forward(self, input_ids=None, mems=None, head_mask=None, inputs_em...
  class TransfoXLLMHeadModel (line 807) | class TransfoXLLMHeadModel(TransfoXLPreTrainedModel):
    method __init__ (line 808) | def __init__(self, config):
    method tie_weights (line 823) | def tie_weights(self):
    method reset_length (line 844) | def reset_length(self, tgt_len, ext_len, mem_len):
    method init_mems (line 847) | def init_mems(self, bsz):
    method forward (line 851) | def forward(self, input_ids=None, mems=None, head_mask=None, inputs_em...
    method get_output_embeddings (line 917) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 925) | def prepare_inputs_for_generation(self, input_ids, past, **model_kwargs):

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_transfo_xl_utilities.py
  class ProjectedAdaptiveLogSoftmax (line 30) | class ProjectedAdaptiveLogSoftmax(nn.Module):
    method __init__ (line 31) | def __init__(self, n_token, d_embed, d_proj, cutoffs, div_val=1, keep_...
    method _compute_logit (line 72) | def _compute_logit(self, hidden, weight, bias, proj):
    method forward (line 86) | def forward(self, hidden, labels=None, keep_order=False):
    method log_prob (line 193) | def log_prob(self, hidden):

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_utils.py
  class Identity (line 47) | class Identity(nn.Module):
    method __init__ (line 51) | def __init__(self, *args, **kwargs):
    method forward (line 54) | def forward(self, input):
  class ModuleUtilsMixin (line 58) | class ModuleUtilsMixin:
    method num_parameters (line 63) | def num_parameters(self, only_trainable: bool = False) -> int:
    method _hook_rss_memory_pre_forward (line 71) | def _hook_rss_memory_pre_forward(module, *args, **kwargs):
    method _hook_rss_memory_post_forward (line 83) | def _hook_rss_memory_post_forward(module, *args, **kwargs):
    method add_memory_hooks (line 96) | def add_memory_hooks(self):
    method reset_memory_hooks_state (line 105) | def reset_memory_hooks_state(self):
    method device (line 112) | def device(self) -> device:
    method dtype (line 130) | def dtype(self) -> dtype:
    method invert_attention_mask (line 147) | def invert_attention_mask(self, encoder_attention_mask: Tensor) -> Ten...
    method get_extended_attention_mask (line 173) | def get_extended_attention_mask(self, attention_mask: Tensor, input_sh...
    method get_head_mask (line 217) | def get_head_mask(self, head_mask: Tensor, num_hidden_layers: int, is_...
    method _convert_head_mask_to_5d (line 238) | def _convert_head_mask_to_5d(self, head_mask, num_hidden_layers):
  class PreTrainedModel (line 250) | class PreTrainedModel(nn.Module, ModuleUtilsMixin):
    method dummy_inputs (line 270) | def dummy_inputs(self):
    method __init__ (line 278) | def __init__(self, config, *inputs, **kwargs):
    method base_model (line 292) | def base_model(self):
    method get_input_embeddings (line 295) | def get_input_embeddings(self):
    method set_input_embeddings (line 309) | def set_input_embeddings(self, value: nn.Module):
    method get_output_embeddings (line 323) | def get_output_embeddings(self):
    method tie_weights (line 333) | def tie_weights(self):
    method _tie_or_clone_weights (line 343) | def _tie_or_clone_weights(self, output_embeddings, input_embeddings):
    method resize_token_embeddings (line 361) | def resize_token_embeddings(self, new_num_tokens: Optional[int] = None):
    method _resize_token_embeddings (line 388) | def _resize_token_embeddings(self, new_num_tokens):
    method _get_resized_embeddings (line 394) | def _get_resized_embeddings(
    method init_weights (line 432) | def init_weights(self):
    method prune_heads (line 444) | def prune_heads(self, heads_to_prune: Dict):
    method save_pretrained (line 459) | def save_pretrained(self, save_directory):
    method from_pretrained (line 494) | def from_pretrained(cls, pretrained_model_name_or_path, *model_args, *...
    method prepare_inputs_for_generation (line 777) | def prepare_inputs_for_generation(self, input_ids, **kwargs):
    method prepare_logits_for_generation (line 780) | def prepare_logits_for_generation(self, logits, **kwargs):
    method _use_cache (line 783) | def _use_cache(self, outputs, use_cache):
    method enforce_repetition_penalty_ (line 791) | def enforce_repetition_penalty_(self, lprobs, batch_size, num_beams, p...
    method generate (line 802) | def generate(
    method _generate_no_beam_search (line 1186) | def _generate_no_beam_search(
    method _generate_beam_search (line 1307) | def _generate_beam_search(
    method _reorder_cache (line 1582) | def _reorder_cache(past: Tuple, beam_idx: Tensor) -> Tuple[Tensor]:
  function calc_banned_ngram_tokens (line 1586) | def calc_banned_ngram_tokens(prev_input_ids: Tensor, num_hypos: int, no_...
  function calc_banned_bad_words_ids (line 1609) | def calc_banned_bad_words_ids(prev_input_ids: Iterable[int], bad_words_i...
  function top_k_top_p_filtering (line 1645) | def top_k_top_p_filtering(
  class BeamHypotheses (line 1686) | class BeamHypotheses(object):
    method __init__ (line 1687) | def __init__(self, num_beams, max_length, length_penalty, early_stoppi...
    method __len__ (line 1698) | def __len__(self):
    method add (line 1704) | def add(self, hyp, sum_logprobs):
    method is_done (line 1718) | def is_done(self, best_sum_logprobs, cur_len=None):
  class Conv1D (line 1736) | class Conv1D(nn.Module):
    method __init__ (line 1737) | def __init__(self, nf, nx):
    method forward (line 1748) | def forward(self, x):
  class PoolerStartLogits (line 1755) | class PoolerStartLogits(nn.Module):
    method __init__ (line 1758) | def __init__(self, config):
    method forward (line 1762) | def forward(self, hidden_states, p_mask=None):
  class PoolerEndLogits (line 1779) | class PoolerEndLogits(nn.Module):
    method __init__ (line 1783) | def __init__(self, config):
    method forward (line 1790) | def forward(self, hidden_states, start_states=None, start_positions=No...
  class PoolerAnswerClass (line 1826) | class PoolerAnswerClass(nn.Module):
    method __init__ (line 1829) | def __init__(self, config):
    method forward (line 1835) | def forward(self, hidden_states, start_states=None, start_positions=No...
  class SQuADHead (line 1873) | class SQuADHead(nn.Module):
    method __init__ (line 1914) | def __init__(self, config):
    method forward (line 1923) | def forward(
  class SequenceSummary (line 1990) | class SequenceSummary(nn.Module):
    method __init__ (line 2006) | def __init__(self, config: PretrainedConfig):
    method forward (line 2035) | def forward(self, hidden_states, cls_index=None):
  function create_position_ids_from_input_ids (line 2067) | def create_position_ids_from_input_ids(input_ids, padding_idx):
  function prune_linear_layer (line 2081) | def prune_linear_layer(layer, index, dim=0):
  function prune_conv1d_layer (line 2106) | def prune_conv1d_layer(layer, index, dim=1):
  function prune_layer (line 2130) | def prune_layer(layer, index, dim=None):
  function apply_chunking_to_forward (line 2143) | def apply_chunking_to_forward(

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_xlm.py
  function create_sinusoidal_embeddings (line 52) | def create_sinusoidal_embeddings(n_pos, dim, out):
  function get_masks (line 60) | def get_masks(slen, lengths, causal, padding_mask=None):
  class MultiHeadAttention (line 85) | class MultiHeadAttention(nn.Module):
    method __init__ (line 89) | def __init__(self, n_heads, dim, config):
    method prune_heads (line 104) | def prune_heads(self, heads):
    method forward (line 125) | def forward(self, input, mask, kv=None, cache=None, head_mask=None):
  class TransformerFFN (line 189) | class TransformerFFN(nn.Module):
    method __init__ (line 190) | def __init__(self, in_dim, dim_hidden, out_dim, config):
    method forward (line 197) | def forward(self, input):
  class XLMPreTrainedModel (line 205) | class XLMPreTrainedModel(PreTrainedModel):
    method __init__ (line 214) | def __init__(self, *inputs, **kwargs):
    method dummy_inputs (line 218) | def dummy_inputs(self):
    method _init_weights (line 227) | def _init_weights(self, module):
  class XLMModel (line 313) | class XLMModel(XLMPreTrainedModel):
    method __init__ (line 314) | def __init__(self, config):  # , dico, is_encoder, with_output):
    method get_input_embeddings (line 384) | def get_input_embeddings(self):
    method set_input_embeddings (line 387) | def set_input_embeddings(self, new_embeddings):
    method _prune_heads (line 390) | def _prune_heads(self, heads_to_prune):
    method forward (line 399) | def forward(
  class XLMPredLayer (line 554) | class XLMPredLayer(nn.Module):
    method __init__ (line 559) | def __init__(self, config):
    method forward (line 577) | def forward(self, x, y=None):
  class XLMWithLMHeadModel (line 602) | class XLMWithLMHeadModel(XLMPreTrainedModel):
    method __init__ (line 603) | def __init__(self, config):
    method get_output_embeddings (line 610) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 613) | def prepare_inputs_for_generation(self, input_ids, **kwargs):
    method forward (line 627) | def forward(
  class XLMForSequenceClassification (line 702) | class XLMForSequenceClassification(XLMPreTrainedModel):
    method __init__ (line 703) | def __init__(self, config):
    method forward (line 713) | def forward(
  class XLMForQuestionAnsweringSimple (line 799) | class XLMForQuestionAnsweringSimple(XLMPreTrainedModel):
    method __init__ (line 800) | def __init__(self, config):
    method forward (line 809) | def forward(
  class XLMForQuestionAnswering (line 917) | class XLMForQuestionAnswering(XLMPreTrainedModel):
    method __init__ (line 918) | def __init__(self, config):
    method forward (line 927) | def forward(
  class XLMForTokenClassification (line 1034) | class XLMForTokenClassification(XLMPreTrainedModel):
    method __init__ (line 1035) | def __init__(self, config):
    method forward (line 1046) | def forward(

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_xlm_roberta.py
  class XLMRobertaModel (line 62) | class XLMRobertaModel(RobertaModel):
  class XLMRobertaForMaskedLM (line 74) | class XLMRobertaForMaskedLM(RobertaForMaskedLM):
  class XLMRobertaForSequenceClassification (line 88) | class XLMRobertaForSequenceClassification(RobertaForSequenceClassificati...
  class XLMRobertaForMultipleChoice (line 102) | class XLMRobertaForMultipleChoice(RobertaForMultipleChoice):
  class XLMRobertaForTokenClassification (line 116) | class XLMRobertaForTokenClassification(RobertaForTokenClassification):

FILE: code/nezha-base-count5/pretrain/transformers1/modeling_xlnet.py
  function build_tf_xlnet_to_pytorch_map (line 42) | def build_tf_xlnet_to_pytorch_map(model, config, tf_weights=None):
  function load_tf_weights_in_xlnet (line 125) | def load_tf_weights_in_xlnet(model, config, tf_path):
  class XLNetRelativeAttention (line 193) | class XLNetRelativeAttention(nn.Module):
    method __init__ (line 194) | def __init__(self, config):
    method prune_heads (line 223) | def prune_heads(self, heads):
    method rel_shift (line 227) | def rel_shift(x, klen=-1):
    method rel_shift_bnij (line 240) | def rel_shift_bnij(x, klen=-1):
    method rel_attn_core (line 254) | def rel_attn_core(self, q_head, k_head_h, v_head_h, k_head_r, seg_mat=...
    method post_attention (line 296) | def post_attention(self, h, attn_vec, residual=True):
    method forward (line 308) | def forward(self, h, g, attn_mask_h, attn_mask_g, r, seg_mat, mems=Non...
  class XLNetFeedForward (line 403) | class XLNetFeedForward(nn.Module):
    method __init__ (line 404) | def __init__(self, config):
    method forward (line 415) | def forward(self, inp):
  class XLNetLayer (line 426) | class XLNetLayer(nn.Module):
    method __init__ (line 427) | def __init__(self, config):
    method forward (line 433) | def forward(
  class XLNetPreTrainedModel (line 457) | class XLNetPreTrainedModel(PreTrainedModel):
    method _init_weights (line 466) | def _init_weights(self, module):
  class XLNetModel (line 568) | class XLNetModel(XLNetPreTrainedModel):
    method __init__ (line 569) | def __init__(self, config):
    method get_input_embeddings (line 590) | def get_input_embeddings(self):
    method set_input_embeddings (line 593) | def set_input_embeddings(self, new_embeddings):
    method _prune_heads (line 596) | def _prune_heads(self, heads_to_prune):
    method create_mask (line 599) | def create_mask(self, qlen, mlen):
    method cache_mem (line 629) | def cache_mem(self, curr_out, prev_mem):
    method positional_embedding (line 642) | def positional_embedding(pos_seq, inv_freq, bsz=None):
    method relative_positional_encoding (line 652) | def relative_positional_encoding(self, qlen, klen, bsz=None):
    method forward (line 692) | def forward(
  class XLNetLMHeadModel (line 927) | class XLNetLMHeadModel(XLNetPreTrainedModel):
    method __init__ (line 928) | def __init__(self, config):
    method get_output_embeddings (line 938) | def get_output_embeddings(self):
    method prepare_inputs_for_generation (line 941) | def prepare_inputs_for_generation(self, input_ids, past, **kwargs):
    method forward (line 975) | def forward(
  class XLNetForSequenceClassification (line 1083) | class XLNetForSequenceClassification(XLNetPreTrainedModel):
    method __init__ (line 1084) | def __init__(self, config):
    method forward (line 1095) | def forward(
  class XLNetForTokenClassification (line 1189) | class XLNetForTokenClassification(XLNetPreTrainedModel):
    method __init__ (line 1190) | def __init__(self, config):
    method forward (line 1200) | def forward(
  class XLNetForMultipleChoice (line 1298) | class XLNetForMultipleChoice(XLNetPreTrainedModel):
    method __init__ (line 1299) | def __init__(self, config):
    method forward (line 1309) | def forward(
  class XLNetForQuestionAnsweringSimple (line 1411) | class XLNetForQuestionAnsweringSimple(XLNetPreTrainedModel):
    method __init__ (line 1412) | def __init__(self, config):
    method forward (line 1422) | def forward(
  class XLNetForQuestionAnswering (line 1534) | class XLNetForQuestionAnswering(XLNetPreTrainedModel):
    method __init__ (line 1535) | def __init__(self, config):
    method forward (line 1548) | def forward(

FILE: code/nezha-base-count5/pretrain/transformers1/optimization.py
  function get_constant_schedule (line 28) | def get_constant_schedule(optimizer, last_epoch=-1):
  function get_constant_schedule_with_warmup (line 34) | def get_constant_schedule_with_warmup(optimizer, num_warmup_steps, last_...
  function get_linear_schedule_with_warmup (line 47) | def get_linear_schedule_with_warmup(optimizer, num_warmup_steps, num_tra...
  function get_cosine_schedule_with_warmup (line 62) | def get_cosine_schedule_with_warmup(optimizer, num_warmup_steps, num_tra...
  function get_cosine_with_hard_restarts_schedule_with_warmup (line 77) | def get_cosine_with_hard_restarts_schedule_with_warmup(
  class AdamW (line 96) | class AdamW(Optimizer):
    method __init__ (line 107) | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-6, weig...
    method step (line 119) | def step(self, closure=None):

FILE: code/nezha-base-count5/pretrain/transformers1/optimization_tf.py
  class WarmUp (line 23) | class WarmUp(tf.keras.optimizers.schedules.LearningRateSchedule):
    method __init__ (line 26) | def __init__(
    method __call__ (line 36) | def __call__(self, step):
    method get_config (line 51) | def get_config(self):
  function create_optimizer (line 61) | def create_optimizer(init_lr, num_train_steps, num_warmup_steps, end_lr=...
  class AdamWeightDecay (line 84) | class AdamWeightDecay(tf.keras.optimizers.Adam):
    method __init__ (line 94) | def __init__(
    method from_config (line 113) | def from_config(cls, config):
    method _prepare_local (line 118) | def _prepare_local(self, var_device, var_dtype, apply_state):
    method _decay_weights_op (line 124) | def _decay_weights_op(self, var, learning_rate, apply_state):
    method apply_gradients (line 133) | def apply_gradients(self, grads_and_vars, name=None):
    method _get_lr (line 137) | def _get_lr(self, var_device, var_dtype, apply_state):
    method _resource_apply_dense (line 150) | def _resource_apply_dense(self, grad, var, apply_state=None):
    method _resource_apply_sparse (line 156) | def _resource_apply_sparse(self, grad, var, indices, apply_state=None):
    method get_config (line 162) | def get_config(self):
    method _do_use_weight_decay (line 167) | def _do_use_weight_decay(self, param_name):
  class GradientAccumulator (line 185) | class GradientAccumulator(object):
    method __init__ (line 197) | def __init__(self):
    method step (line 203) | def step(self):
    method gradients (line 216) | def gradients(self):
    method __call__ (line 222) | def __call__(self, gradients):
    method reset (line 248) | def reset(self):

FILE: code/nezha-base-count5/pretrain/transformers1/pipelines.py
  function get_framework (line 69) | def get_framework(model=None):
  class ArgumentHandler (line 89) | class ArgumentHandler(ABC):
    method __call__ (line 95) | def __call__(self, *args, **kwargs):
  class DefaultArgumentHandler (line 99) | class DefaultArgumentHandler(ArgumentHandler):
    method handle_kwargs (line 105) | def handle_kwargs(kwargs: Dict) -> List:
    method handle_args (line 114) | def handle_args(args: Sequence[Any]) -> List[str]:
    method __call__ (line 140) | def __call__(self, *args, **kwargs):
  class PipelineDataFormat (line 150) | class PipelineDataFormat:
    method __init__ (line 164) | def __init__(
    method __iter__ (line 184) | def __iter__(self):
    method save (line 188) | def save(self, data: dict):
    method save_binary (line 196) | def save_binary(self, data: Union[dict, List[dict]]) -> str:
    method from_str (line 211) | def from_str(
  class CsvPipelineDataFormat (line 224) | class CsvPipelineDataFormat(PipelineDataFormat):
    method __init__ (line 225) | def __init__(
    method __iter__ (line 230) | def __iter__(self):
    method save (line 239) | def save(self, data: List[dict]):
  class JsonPipelineDataFormat (line 247) | class JsonPipelineDataFormat(PipelineDataFormat):
    method __init__ (line 248) | def __init__(
    method __iter__ (line 256) | def __iter__(self):
    method save (line 263) | def save(self, data: dict):
  class PipedPipelineDataFormat (line 268) | class PipedPipelineDataFormat(PipelineDataFormat):
    method __iter__ (line 276) | def __iter__(self):
    method save (line 292) | def save(self, data: dict):
    method save_binary (line 295) | def save_binary(self, data: Union[dict, List[dict]]) -> str:
  class _ScikitCompat (line 305) | class _ScikitCompat(ABC):
    method transform (line 311) | def transform(self, X):
    method predict (line 315) | def predict(self, X):
  class Pipeline (line 319) | class Pipeline(_ScikitCompat):
    method __init__ (line 370) | def __init__(
    method save_pretrained (line 402) | def save_pretrained(self, save_directory):
    method transform (line 415) | def transform(self, X):
    method predict (line 421) | def predict(self, X):
    method device_placement (line 428) | def device_placement(self):
    method ensure_tensor_on_device (line 449) | def ensure_tensor_on_device(self, **inputs):
    method _parse_and_tokenize (line 457) | def _parse_and_tokenize(self, *args, pad_to_max_length=True, add_speci...
    method __call__ (line 472) | def __call__(self, *args, **kwargs):
    method _forward (line 476) | def _forward(self, inputs, return_tensors=False):
  class FeatureExtractionPipeline (line 501) | class FeatureExtractionPipeline(Pipeline):
    method __init__ (line 537) | def __init__(
    method __call__ (line 558) | def __call__(self, *args, **kwargs):
  class TextGenerationPipeline (line 562) | class TextGenerationPipeline(Pipeline):
    method __call__ (line 606) | def __call__(
  class TextClassificationPipeline (line 683) | class TextClassificationPipeline(Pipeline):
    method __call__ (line 720) | def __call__(self, *args, **kwargs):
  class FillMaskPipeline (line 726) | class FillMaskPipeline(Pipeline):
    method __init__ (line 764) | def __init__(
    method __call__ (line 788) | def __call__(self, *args, **kwargs):
  class NerPipeline (line 826) | class NerPipeline(Pipeline):
    method __init__ (line 865) | def __init__(
    method __call__ (line 893) | def __call__(self, *args, **kwargs):
    method group_entities (line 973) | def group_entities(self, entities):
  class QuestionAnsweringArgumentHandler (line 993) | class QuestionAnsweringArgumentHandler(ArgumentHandler):
    method __call__ (line 1002) | def __call__(self, *args, **kwargs):
  class QuestionAnsweringPipeline (line 1055) | class QuestionAnsweringPipeline(Pipeline):
    method __init__ (line 1094) | def __init__(
    method create_sample (line 1116) | def create_sample(
    method __call__ (line 1135) | def __call__(self, *args, **kwargs):
    method decode (line 1240) | def decode(self, start: np.ndarray, end: np.ndarray, topk: int, max_an...
    method span_to_answer (line 1280) | def span_to_answer(self, text: str, start: int, end: int):
  class SummarizationPipeline (line 1325) | class SummarizationPipeline(Pipeline):
    method __call__ (line 1373) | def __call__(
  class TranslationPipeline (line 1462) | class TranslationPipeline(Pipeline):
    method __call__ (line 1501) | def __call__(
  function pipeline (line 1677) | def pipeline(

FILE: code/nezha-base-count5/pretrain/transformers1/tokenization_albert.py
  class AlbertTokenizer (line 57) | class AlbertTokenizer(PreTrainedTokenizer):
    method __init__ (line 114) | def __init__(
    method vocab_size (line 158) | def vocab_size(self):
    method get_vocab (line 161) | def get_vocab(self):
    method __getstate__ (line 166) | def __getstate__(self):
    method __setstate__ (line 171) | def __setstate__(self, d):
    method preprocess_text (line 184) | def preprocess_text(self, inputs):
    method _tokenize (line 199) | def _tokenize(self, text, sample=False):
    method _convert_token_to_id (line 223) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 227) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 231) | def convert_tokens_to_string(self, tokens):
    method build_inputs_with_special_tokens (line 235) | def build_inputs_with_special_tokens(
    method get_special_tokens_mask (line 261) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 292) | def create_token_type_ids_from_sequences(
    method save_vocabulary (line 323) | def save_vocabulary(self, save_directory):

FILE: code/nezha-base-count5/pretrain/transformers1/tokenization_auto.py
  class AutoTokenizer (line 94) | class AutoTokenizer:
    method __init__ (line 122) | def __init__(self):
    method from_pretrained (line 129) | def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwa...

FILE: code/nezha-base-count5/pretrain/transformers1/tokenization_bart.py
  class BartTokenizer (line 36) | class BartTokenizer(RobertaTokenizer):
  class MBartTokenizer (line 49) | class MBartTokenizer(XLMRobertaTokenizer):

FILE: code/nezha-base-count5/pretrain/transformers1/tokenization_bert.py
  function load_vocab (line 99) | def load_vocab(vocab_file):
  function whitespace_tokenize (line 110) | def whitespace_tokenize(text):
  class BertTokenizer (line 119) | class BertTokenizer(PreTrainedTokenizer):
    method __init__ (line 163) | def __init__(
    method vocab_size (line 201) | def vocab_size(self):
    method get_vocab (line 204) | def get_vocab(self):
    method _tokenize (line 207) | def _tokenize(self, text):
    method _convert_token_to_id (line 217) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 221) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 225) | def convert_tokens_to_string(self, tokens):
    method build_inputs_with_special_tokens (line 230) | def build_inputs_with_special_tokens(
    method get_special_tokens_mask (line 256) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 287) | def create_token_type_ids_from_sequences(
    method save_vocabulary (line 317) | def save_vocabulary(self, vocab_path):
  class BasicTokenizer (line 346) | class BasicTokenizer(object):
    method __init__ (line 349) | def __init__(self, do_lower_case=True, never_split=None, tokenize_chin...
    method tokenize (line 369) | def tokenize(self, text, never_split=None):
    method _run_strip_accents (line 400) | def _run_strip_accents(self, text):
    method _run_split_on_punc (line 411) | def _run_split_on_punc(self, text, never_split=None):
    method _tokenize_chinese_chars (line 433) | def _tokenize_chinese_chars(self, text):
    method _is_chinese_char (line 446) | def _is_chinese_char(self, cp):
    method _clean_text (line 470) | def _clean_text(self, text):
  class WordpieceTokenizer (line 484) | class WordpieceTokenizer(object):
    method __init__ (line 487) | def __init__(self, vocab, unk_token, max_input_chars_per_word=100):
    method tokenize (line 492) | def tokenize(self, text):
  function _is_whitespace (line 544) | def _is_whitespace(char):
  function _is_control (line 556) | def _is_control(char):
  function _is_punctuation (line 568) | def _is_punctuation(char):
  class BertTokenizerFast (line 583) | class BertTokenizerFast(PreTrainedTokenizerFast):
    method __init__ (line 631) | def __init__(
    method build_inputs_with_special_tokens (line 668) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No...
    method create_token_type_ids_from_sequences (line 676) | def create_token_type_ids_from_sequences(

FILE: code/nezha-base-count5/pretrain/transformers1/tokenization_bert_japanese.py
  class BertJapaneseTokenizer (line 71) | class BertJapaneseTokenizer(BertTokenizer):
    method __init__ (line 79) | def __init__(
    method _tokenize (line 153) | def _tokenize(self, text):
  class MecabTokenizer (line 167) | class MecabTokenizer:
    method __init__ (line 170) | def __init__(self, do_lower_case=False, never_split=None, normalize_te...
    method tokenize (line 192) | def tokenize(self, text, never_split=None, **kwargs):
  class CharacterTokenizer (line 219) | class CharacterTokenizer(object):
    method __init__ (line 222) | def __init__(self, vocab, unk_token, normalize_text=True):
    method tokenize (line 237) | def tokenize(self, text):

FILE: code/nezha-base-count5/pretrain/transformers1/tokenization_camembert.py
  class CamembertTokenizer (line 51) | class CamembertTokenizer(PreTrainedTokenizer):
    method __init__ (line 107) | def __init__(
    method build_inputs_with_special_tokens (line 142) | def build_inputs_with_special_tokens(
    method get_special_tokens_mask (line 169) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 199) | def create_token_type_ids_from_sequences(
    method vocab_size (line 224) | def vocab_size(self):
    method _tokenize (line 227) | def _tokenize(self, text):
    method _convert_token_to_id (line 230) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 239) | def _convert_id_to_token(self, index):
    method __getstate__ (line 245) | def __getstate__(self):
    method __setstate__ (line 250) | def __setstate__(self, d):
    method convert_tokens_to_string (line 263) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 268) | def save_vocabulary(self, save_directory):

FILE: code/nezha-base-count5/pretrain/transformers1/tokenization_ctrl.py
  function get_pairs (line 102) | def get_pairs(word):
  class CTRLTokenizer (line 117) | class CTRLTokenizer(PreTrainedTokenizer):
    method __init__ (line 141) | def __init__(self, vocab_file, merges_file, unk_token="<unk>", **kwargs):
    method vocab_size (line 154) | def vocab_size(self):
    method get_vocab (line 157) | def get_vocab(self):
    method bpe (line 160) | def bpe(self, token):
    method _tokenize (line 204) | def _tokenize(self, text):
    method _convert_token_to_id (line 215) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 219) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 223) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 228) | def save_vocabulary(self, save_directory):

FILE: code/nezha-base-count5/pretrain/transformers1/tokenization_distilbert.py
  class DistilBertTokenizer (line 58) | class DistilBertTokenizer(BertTokenizer):
  class DistilBertTokenizerFast (line 76) | class DistilBertTokenizerFast(BertTokenizerFast):

FILE: code/nezha-base-count5/pretrain/transformers1/tokenization_electra.py
  class ElectraTokenizer (line 52) | class ElectraTokenizer(BertTokenizer):
  class ElectraTokenizerFast (line 68) | class ElectraTokenizerFast(BertTokenizerFast):

FILE: code/nezha-base-count5/pretrain/transformers1/tokenization_flaubert.py
  function convert_to_unicode (line 63) | def convert_to_unicode(text):
  class FlaubertTokenizer (line 79) | class FlaubertTokenizer(XLMTokenizer):
    method __init__ (line 98) | def __init__(self, do_lowercase=False, **kwargs):
    method preprocess_text (line 103) | def preprocess_text(self, text):
    method _tokenize (line 113) | def _tokenize(self, text, bypass_tokenizer=False):

FILE: code/nezha-base-count5/pretrain/transformers1/tokenization_gpt2.py
  function bytes_to_unicode (line 63) | def bytes_to_unicode():
  function get_pairs (line 88) | def get_pairs(word):
  class GPT2Tokenizer (line 101) | class GPT2Tokenizer(PreTrainedTokenizer):
    method __init__ (line 139) | def __init__(
    method vocab_size (line 167) | def vocab_size(self):
    method get_vocab (line 170) | def get_vocab(self):
    method bpe (line 173) | def bpe(self, token):
    method _tokenize (line 215) | def _tokenize(self, text):
    method _convert_token_to_id (line 225) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 229) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 233) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 239) | def save_vocabulary(self, save_directory):
    method prepare_for_tokenization (line 274) | def prepare_for_tokenization(self, text, **kwargs):
  class GPT2TokenizerFast (line 280) | class GPT2TokenizerFast(PreTrainedTokenizerFast):
    method __init__ (line 326) | def __init__(

FILE: code/nezha-base-count5/pretrain/transformers1/tokenization_longformer.py
  class LongformerTokenizer (line 45) | class LongformerTokenizer(RobertaTokenizer):
  class LongformerTokenizerFast (line 54) | class LongformerTokenizerFast(RobertaTokenizerFast):

FILE: code/nezha-base-count5/pretrain/transformers1/tokenization_marian.py
  class MarianTokenizer (line 28) | class MarianTokenizer(PreTrainedTokenizer):
    method __init__ (line 49) | def __init__(
    method _setup_normalizer (line 91) | def _setup_normalizer(self):
    method normalize (line 100) | def normalize(self, x: str) -> str:
    method _convert_token_to_id (line 104) | def _convert_token_to_id(self, token):
    method remove_language_code (line 107) | def remove_language_code(self, text: str):
    method _tokenize (line 113) | def _tokenize(self, text: str) -> List[str]:
    method _convert_id_to_token (line 118) | def _convert_id_to_token(self, index: int) -> str:
    method convert_tokens_to_string (line 122) | def convert_tokens_to_string(self, tokens: List[str]) -> str:
    method build_inputs_with_special_tokens (line 126) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No...
    method prepare_translation_batch (line 133) | def prepare_translation_batch(
    method vocab_size (line 182) | def vocab_size(self) -> int:
    method save_vocabulary (line 185) | def save_vocabulary(self, save_directory: str) -> Tuple[str]:
    method get_vocab (line 197) | def get_vocab(self) -> Dict:
    method __getstate__ (line 202) | def __getstate__(self) -> Dict:
    method __setstate__ (line 207) | def __setstate__(self, d: Dict) -> None:
    method num_special_tokens_to_add (line 213) | def num_special_tokens_to_add(self, **unused):
    method _special_token_mask (line 217) | def _special_token_mask(self, seq):
    method get_special_tokens_mask (line 222) | def get_special_tokens_mask(
  function load_spm (line 234) | def load_spm(path: str) -> sentencepiece.SentencePieceProcessor:
  function save_json (line 240) | def save_json(data, path: str) -> None:
  function load_json (line 245) | def load_json(path: str) -> Union[Dict, List]:

FILE: code/nezha-base-count5/pretrain/transformers1/tokenization_openai.py
  function get_pairs (line 46) | def get_pairs(word):
  function text_standardize (line 59) | def text_standardize(text):
  class OpenAIGPTTokenizer (line 75) | class OpenAIGPTTokenizer(PreTrainedTokenizer):
    method __init__ (line 99) | def __init__(self, vocab_file, merges_file, unk_token="<unk>", **kwargs):
    method vocab_size (line 124) | def vocab_size(self):
    method get_vocab (line 127) | def get_vocab(self):
    method bpe (line 130) | def bpe(self, token):
    method _tokenize (line 174) | def _tokenize(self, text):
    method _convert_token_to_id (line 189) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 193) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 197) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 202) | def save_vocabulary(self, save_directory):
  class OpenAIGPTTokenizerFast (line 238) | class OpenAIGPTTokenizerFast(PreTrainedTokenizerFast):
    method __init__ (line 264) | def __init__(self, vocab_file, merges_file, unk_token="<unk>", **kwargs):

FILE: code/nezha-base-count5/pretrain/transformers1/tokenization_reformer.py
  class ReformerTokenizer (line 54) | class ReformerTokenizer(PreTrainedTokenizer):
    method __init__ (line 85) | def __init__(
    method vocab_size (line 117) | def vocab_size(self):
    method get_vocab (line 120) | def get_vocab(self):
    method __getstate__ (line 125) | def __getstate__(self):
    method __setstate__ (line 130) | def __setstate__(self, d):
    method _tokenize (line 143) | def _tokenize(self, text, sample=False):
    method _convert_token_to_id (line 152) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 156) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 162) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 167) | def save_vocabulary(self, save_directory):

FILE: code/nezha-base-count5/pretrain/transformers1/tokenization_roberta.py
  class RobertaTokenizer (line 64) | class RobertaTokenizer(GPT2Tokenizer):
    method __init__ (line 126) | def __init__(
    method build_inputs_with_special_tokens (line 154) | def build_inputs_with_special_tokens(
    method get_special_tokens_mask (line 180) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 210) | def create_token_type_ids_from_sequences(
    method prepare_for_tokenization (line 234) | def prepare_for_tokenization(self, text, add_special_tokens=False, **k...
  class RobertaTokenizerFast (line 244) | class RobertaTokenizerFast(GPT2TokenizerFast):
    method __init__ (line 291) | def __init__(
    method mask_token (line 333) | def mask_token(self, value):
    method build_inputs_with_special_tokens (line 340) | def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=No...
    method create_token_type_ids_from_sequences (line 347) | def create_token_type_ids_from_sequences(

FILE: code/nezha-base-count5/pretrain/transformers1/tokenization_t5.py
  class T5Tokenizer (line 62) | class T5Tokenizer(PreTrainedTokenizer):
    method __init__ (line 98) | def __init__(
    method vocab_size (line 139) | def vocab_size(self):
    method get_vocab (line 142) | def get_vocab(self):
    method __getstate__ (line 147) | def __getstate__(self):
    method __setstate__ (line 152) | def __setstate__(self, d):
    method _tokenize (line 165) | def _tokenize(self, text, sample=False):
    method _convert_token_to_id (line 174) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 182) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 190) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 195) | def save_vocabulary(self, save_directory):

FILE: code/nezha-base-count5/pretrain/transformers1/tokenization_transfo_xl.py
  class TransfoXLTokenizer (line 72) | class TransfoXLTokenizer(PreTrainedTokenizer):
    method __init__ (line 85) | def __init__(
    method _compile_space_around_punctuation_pattern (line 141) | def _compile_space_around_punctuation_pattern(self):
    method count_file (line 146) | def count_file(self, path, verbose=False, add_eos=False):
    method count_sents (line 162) | def count_sents(self, sents, verbose=False):
    method _build_from_file (line 173) | def _build_from_file(self, vocab_file):
    method save_vocabulary (line 188) | def save_vocabulary(self, vocab_path):
    method build_vocab (line 212) | def build_vocab(self):
    method encode_file (line 232) | def encode_file(self, path, ordered=False, verbose=False, add_eos=True...
    method encode_sents (line 249) | def encode_sents(self, sents, ordered=False, verbose=False):
    method add_special (line 263) | def add_special(self, sym):
    method add_symbol (line 269) | def add_symbol(self, sym):
    method _convert_id_to_token (line 274) | def _convert_id_to_token(self, idx):
    method _convert_token_to_id (line 279) | def _convert_token_to_id(self, sym):
    method convert_tokens_to_string (line 296) | def convert_tokens_to_string(self, tokens):
    method convert_to_tensor (line 301) | def convert_to_tensor(self, symbols):
    method vocab_size (line 305) | def vocab_size(self):
    method get_vocab (line 308) | def get_vocab(self):
    method _tokenize (line 311) | def _tokenize(self, line, add_eos=False, add_double_eos=False):
    method prepare_for_tokenization (line 330) | def prepare_for_tokenization(self, text, **kwargs):
  class _TransfoXLDelimiterLookupTokenizer (line 344) | class _TransfoXLDelimiterLookupTokenizer(BaseTokenizer):
    method __init__ (line 345) | def __init__(
  class TransfoXLTokenizerFast (line 405) | class TransfoXLTokenizerFast(PreTrainedTokenizerFast):
    method __init__ (line 422) | def __init__(
    method save_pretrained (line 458) | def save_pretrained(self, save_directory):
  class LMOrderedIterator (line 467) | class LMOrderedIterator(object):
    method __init__ (line 468) | def __init__(self, data, bsz, bptt, device="cpu", ext_len=None):
    method get_batch (line 490) | def get_batch(self, i, bptt=None):
    method get_fixlen_iter (line 506) | def get_fixlen_iter(self, start=0):
    method get_varlen_iter (line 510) | def get_varlen_iter(self, start=0, std=5, min_len=5, max_deviation=3):
    method __iter__ (line 522) | def __iter__(self):
  class LMShuffledIterator (line 526) | class LMShuffledIterator(object):
    method __init__ (line 527) | def __init__(self, data, bsz, bptt, device="cpu", ext_len=None, shuffl...
    method get_sent_stream (line 540) | def get_sent_stream(self):
    method stream_iterator (line 548) | def stream_iterator(self, sent_stream):
    method __iter__ (line 595) | def __iter__(self):
  class LMMultiFileIterator (line 603) | class LMMultiFileIterator(LMShuffledIterator):
    method __init__ (line 604) | def __init__(self, paths, vocab, bsz, bptt, device="cpu", ext_len=None...
    method get_sent_stream (line 616) | def get_sent_stream(self, path):
    method __iter__ (line 624) | def __iter__(self):
  class TransfoXLCorpus (line 635) | class TransfoXLCorpus(object):
    method from_pretrained (line 637) | def from_pretrained(cls, pretrained_model_name_or_path, cache_dir=None...
    method __init__ (line 680) | def __init__(self, *args, **kwargs):
    method build_corpus (line 687) | def build_corpus(self, path, dataset):
    method get_iterator (line 721) | def get_iterator(self, split, *args, **kwargs):
  function get_lm_corpus (line 738) | def get_lm_corpus(datadir, dataset):

FILE: code/nezha-base-count5/pretrain/transformers1/tokenization_utils.py
  class CharSpan (line 61) | class CharSpan(NamedTuple):
  class TokenSpan (line 73) | class TokenSpan(NamedTuple):
  function flatten (line 85) | def flatten(x: Sequence):
  function truncate_and_pad (line 100) | def truncate_and_pad(
  class BatchEncoding (line 164) | class BatchEncoding(UserDict):
    method __init__ (line 177) | def __init__(
    method __getitem__ (line 189) | def __getitem__(self, item: Union[int, str]) -> EncodingFast:
    method __getattr__ (line 203) | def __getattr__(self, item: str):
    method keys (line 206) | def keys(self):
    method values (line 209) | def values(self):
    method items (line 212) | def items(self):
    method encodings (line 220) | def encodings(self) -> Optional[List[EncodingFast]]:
    method tokens (line 228) | def tokens(self, batch_index: int = 0) -> List[int]:
    method words (line 233) | def words(self, batch_index: int = 0) -> List[Optional[int]]:
    method token_to_word (line 238) | def token_to_word(self, batch_or_token_index: int, token_index: Option...
    method word_to_tokens (line 277) | def word_to_tokens(self, batch_or_word_index: int, word_index: Optiona...
    method token_to_chars (line 322) | def token_to_chars(self, batch_or_token_index: int, token_index: Optio...
    method char_to_token (line 359) | def char_to_token(self, batch_or_char_index: int, char_index: Optional...
    method word_to_chars (line 394) | def word_to_chars(self, batch_or_word_index: int, word_index: Optional...
    method char_to_word (line 431) | def char_to_word(self, batch_or_char_index: int, char_index: Optional[...
    method to (line 467) | def to(self, device: str):
  class SpecialTokensMixin (line 473) | class SpecialTokensMixin:
    method __init__ (line 491) | def __init__(self, **kwargs):
    method bos_token (line 517) | def bos_token(self):
    method eos_token (line 524) | def eos_token(self):
    method unk_token (line 531) | def unk_token(self):
    method sep_token (line 538) | def sep_token(self):
    method pad_token (line 545) | def pad_token(self):
    method cls_token (line 552) | def cls_token(self):
    method mask_token (line 559) | def mask_token(self):
    method additional_special_tokens (line 566) | def additional_special_tokens(self):
    method _maybe_update_backend (line 572) | def _maybe_update_backend(self, value):
    method bos_token (line 577) | def bos_token(self, value):
    method eos_token (line 582) | def eos_token(self, value):
    method unk_token (line 587) | def unk_token(self, value):
    method sep_token (line 592) | def sep_token(self, value):
    method pad_token (line 597) | def pad_token(self, value):
    method cls_token (line 602) | def cls_token(self, value):
    method mask_token (line 607) | def mask_token(self, value):
    method additional_special_tokens (line 612) | def additional_special_tokens(self, value):
    method bos_token_id (line 617) | def bos_token_id(self):
    method eos_token_id (line 622) | def eos_token_id(self):
    method unk_token_id (line 627) | def unk_token_id(self):
    method sep_token_id (line 632) | def sep_token_id(self):
    method pad_token_id (line 637) | def pad_token_id(self):
    method pad_token_type_id (line 642) | def pad_token_type_id(self):
    method cls_token_id (line 647) | def cls_token_id(self):
    method mask_token_id (line 652) | def mask_token_id(self):
    method additional_special_tokens_ids (line 657) | def additional_special_tokens_ids(self):
    method special_tokens_map (line 662) | def special_tokens_map(self):
    method all_special_tokens (line 674) | def all_special_tokens(self):
    method all_special_ids (line 686) | def all_special_ids(self):
  class PreTrainedTokenizer (line 695) | class PreTrainedTokenizer(SpecialTokensMixin):
    method vocab_size (line 771) | def vocab_size(self) -> int:
    method is_fast (line 776) | def is_fast(self) -> bool:
    method max_len (line 780) | def max_len(self) -> int:
    method max_len_single_sentence (line 787) | def max_len_single_sentence(self) -> int:
    method max_len_sentences_pair (line 791) | def max_len_sentences_pair(self) -> int:
    method max_len_single_sentence (line 795) | def max_len_single_sentence(self, value) -> int:
    method max_len_sentences_pair (line 807) | def max_len_sentences_pair(self, value) -> int:
    method get_vocab (line 818) | def get_vocab(self):
    method __init__ (line 822) | def __init__(self, model_max_length=None, **kwargs):
    method __len__ (line 854) | def __len__(self):
    method from_pretrained (line 859) | def from_pretrained(cls, *inputs, **kwargs):
    method _from_pretrained (line 914) | def _from_pretrained(cls, pretrained_model_name_or_path, *init_inputs,...
    method save_pretrained (line 1087) | def save_pretrained(self, save_directory):
    method save_vocabulary (line 1128) | def save_vocabulary(self, save_directory) -> Tuple[str]:
    method add_tokens (line 1138) | def add_tokens(self, new_tokens: Union[str, List[str]]) -> int:
    method num_special_tokens_to_add (line 1187) | def num_special_tokens_to_add(self, pair=False):
    method add_special_tokens (line 1206) | def add_special_tokens(self, special_tokens_dict):
    method tokenize (line 1260) | def tokenize(self, text: TextInput, **kwargs):
    method _tokenize (line 1332) | def _tokenize(self, text, **kwargs):
    method convert_tokens_to_ids (line 1341) | def convert_tokens_to_ids(self, tokens):
    method _convert_token_to_id_with_added_voc (line 1356) | def _convert_token_to_id_with_added_voc(self, token):
    method _convert_token_to_id (line 1364) | def _convert_token_to_id(self, token):
    method encode (line 1367) | def encode(
    method encode_plus (line 1439) | def encode_plus(
    method batch_encode_plus (line 1594) | def batch_encode_plus(
    method convert_to_tensors_ (line 1789) | def convert_to_tensors_(self, batch_outputs: dict, return_tensors: str...
    method prepare_for_model (line 1818) | def prepare_for_model(
    method prepare_for_tokenization (line 2018) | def prepare_for_tokenization(self, text: str, **kwargs) -> str:
    method truncate_sequences (line 2022) | def truncate_sequences(
    method create_token_type_ids_from_sequences (line 2082) | def create_token_type_ids_from_sequences(self, token_ids_0: List, toke...
    method build_inputs_with_special_tokens (line 2087) | def build_inputs_with_special_tokens(self, token_ids_0: List, token_id...
    method get_special_tokens_mask (line 2096) | def get_special_tokens_mask(
    method convert_ids_to_tokens (line 2115) | def convert_ids_to_tokens(
    method _convert_id_to_token (line 2140) | def _convert_id_to_token(self, index: int) -> str:
    method convert_tokens_to_string (line 2143) | def convert_tokens_to_string(self, tokens: List[str]) -> str:
    method decode (line 2150) | def decode(
    method batch_decode (line 2190) | def batch_decode(self, sequences: List[List[int]], **kwargs) -> List[s...
    method clean_up_tokenization (line 2194) | def clean_up_tokenization(out_string: str) -> str:
  class PreTrainedTokenizerFast (line 2212) | class PreTrainedTokenizerFast(PreTrainedTokenizer):
    method __init__ (line 2270) | def __init__(self, tokenizer: BaseTokenizerFast, **kwargs):
    method backend_tokenizer (line 2281) | def backend_tokenizer(self) -> BaseTokenizerFast:
    method decoder (line 2285) | def decoder(self) -> DecoderFast:
    method is_fast (line 2289) | def is_fast(self) -> bool:
    method vocab_size (line 2293) | def vocab_size(self) -> int:
    method __len__ (line 2296) | def __len__(self) -> int:
    method _maybe_update_backend (line 2299) | def _maybe_update_backend(self, value):
    method _convert_encoding (line 2304) | def _convert_encoding(
    method _convert_token_to_id_with_added_voc (line 2360) | def _convert_token_to_id_with_added_voc(self, token: int) -> str:
    method _convert_id_to_token (line 2366) | def _convert_id_to_token(self, index: int) -> Optional[str]:
    method get_vocab (line 2369) | def get_vocab(self):
    method convert_tokens_to_string (line 2372) | def convert_tokens_to_string(self, tokens: List[int], skip_special_tok...
    method add_tokens (line 2375) | def add_tokens(self, new_tokens: List[Union[str, AddedTokenFast]]) -> ...
    method add_special_tokens (line 2402) | def add_special_tokens(self, special_tokens_dict: dict) -> int:
    method num_special_tokens_to_add (line 2421) | def num_special_tokens_to_add(self, pair: bool = False) -> int:
    method tokenize (line 2424) | def tokenize(
    method batch_encode_plus (line 2429) | def batch_encode_plus(
    method encode_plus (line 2567) | def encode_plus(
    method decode (line 2659) | def decode(
    method save_vocabulary (line 2670) | def save_vocabulary(self, save_directory: str) -> Tuple[str]:
  function trim_batch (line 2680) | def trim_batch(

FILE: code/nezha-base-count5/pretrain/transformers1/tokenization_xlm.py
  function get_pairs (line 430) | def get_pairs(word):
  function lowercase_and_remove_accent (line 443) | def lowercase_and_remove_accent(text):
  function replace_unicode_punct (line 460) | def replace_unicode_punct(text):
  function remove_non_printing_char (line 503) | def remove_non_printing_char(text):
  function romanian_preprocessing (line 516) | def romanian_preprocessing(text):
  class XLMTokenizer (line 530) | class XLMTokenizer(PreTrainedTokenizer):
    method __init__ (line 594) | def __init__(
    method moses_punct_norm (line 656) | def moses_punct_norm(self, text, lang):
    method moses_tokenize (line 664) | def moses_tokenize(self, text, lang):
    method moses_pipeline (line 672) | def moses_pipeline(self, text, lang):
    method ja_tokenize (line 678) | def ja_tokenize(self, text):
    method vocab_size (line 699) | def vocab_size(self):
    method get_vocab (line 702) | def get_vocab(self):
    method bpe (line 705) | def bpe(self, token):
    method _tokenize (line 749) | def _tokenize(self, text, lang="en", bypass_tokenizer=False):
    method _convert_token_to_id (line 839) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 843) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 847) | def convert_tokens_to_string(self, tokens):
    method build_inputs_with_special_tokens (line 852) | def build_inputs_with_special_tokens(
    method get_special_tokens_mask (line 880) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 911) | def create_token_type_ids_from_sequences(
    method save_vocabulary (line 941) | def save_vocabulary(self, save_directory):

FILE: code/nezha-base-count5/pretrain/transformers1/tokenization_xlm_roberta.py
  class XLMRobertaTokenizer (line 52) | class XLMRobertaTokenizer(PreTrainedTokenizer):
    method __init__ (line 108) | def __init__(
    method __getstate__ (line 159) | def __getstate__(self):
    method __setstate__ (line 164) | def __setstate__(self, d):
    method build_inputs_with_special_tokens (line 177) | def build_inputs_with_special_tokens(
    method get_special_tokens_mask (line 204) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 235) | def create_token_type_ids_from_sequences(
    method vocab_size (line 261) | def vocab_size(self):
    method get_vocab (line 264) | def get_vocab(self):
    method _tokenize (line 269) | def _tokenize(self, text):
    method _convert_token_to_id (line 272) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 281) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 287) | def convert_tokens_to_string(self, tokens):
    method save_vocabulary (line 292) | def save_vocabulary(self, save_directory):

FILE: code/nezha-base-count5/pretrain/transformers1/tokenization_xlnet.py
  class XLNetTokenizer (line 53) | class XLNetTokenizer(PreTrainedTokenizer):
    method __init__ (line 113) | def __init__(
    method vocab_size (line 161) | def vocab_size(self):
    method get_vocab (line 164) | def get_vocab(self):
    method __getstate__ (line 169) | def __getstate__(self):
    method __setstate__ (line 174) | def __setstate__(self, d):
    method preprocess_text (line 187) | def preprocess_text(self, inputs):
    method _tokenize (line 202) | def _tokenize(self, text, sample=False):
    method _convert_token_to_id (line 226) | def _convert_token_to_id(self, token):
    method _convert_id_to_token (line 230) | def _convert_id_to_token(self, index):
    method convert_tokens_to_string (line 234) | def convert_tokens_to_string(self, tokens):
    method build_inputs_with_special_tokens (line 239) | def build_inputs_with_special_tokens(
    method get_special_tokens_mask (line 265) | def get_special_tokens_mask(
    method create_token_type_ids_from_sequences (line 296) | def create_token_type_ids_from_sequences(
    method save_vocabulary (line 324) | def save_vocabulary(self, save_directory):

FILE: code/nezha-base-count5/pretrain/transformers1/trainer.py
  function is_apex_available (line 38) | def is_apex_available():
  function is_tensorboard_available (line 60) | def is_tensorboard_available():
  function is_wandb_available (line 77) | def is_wandb_available():
  function set_seed (line 84) | def set_seed(seed: int):
  function torch_distributed_zero_first (line 93) | def torch_distributed_zero_first(local_rank: int):
  class SequentialDistributedSampler (line 104) | class SequentialDistributedSampler(Sampler):
    method __init__ (line 116) | def __init__(self, dataset, num_replicas=None, rank=None):
    method __iter__ (line 131) | def __iter__(self):
    method __len__ (line 144) | def __len__(self):
  function get_tpu_sampler (line 148) | def get_tpu_sampler(dataset: Dataset):
  class Trainer (line 154) | class Trainer:
    method __init__ (line 171) | def __init__(
    method get_test_dataloader (line 222) | def get_test_dataloader(self, test_dataset: Dataset) -> DataLoader:
    method get_optimizers (line 242) | def get_optimizers(
    method _setup_wandb (line 273) | def _setup_wandb(self):
    method num_examples (line 297) | def num_examples(self, dataloader: DataLoader) -> int:
    method train (line 303) | def train(self, model_path: Optional[str] = None):
    method _log (line 510) | def _log(self, logs: Dict[str, float], iterator: Optional[tqdm] = None...
    method _training_step (line 524) | def _training_step(
    method is_local_master (line 547) | def is_local_master(self) -> bool:
    method is_world_master (line 553) | def is_world_master(self) -> bool:
    method save_model (line 563) | def save_model(self, output_dir: Optional[str] = None):
    method _save_tpu (line 576) | def _save_tpu(self, output_dir: Optional[str] = None):
    method _save (line 592) | def _save(self, output_dir: Optional[str] = None):
    method _sorted_checkpoints (line 605) | def _sorted_checkpoints(self, checkpoint_prefix=PREFIX_CHECKPOINT_DIR,...
    method _rotate_checkpoints (line 622) | def _rotate_checkpoints(self, use_mtime=False) -> None:
    method evaluate (line 641) | def evaluate(
    method predict (line 670) | def predict(self, test_dataset: Dataset) -> PredictionOutput:
    method _prediction_loop (line 681) | def _prediction_loop(
    method distributed_concat (line 771) | def distributed_concat(self, tensor: torch.Tensor, num_total_examples:...

FILE: code/nezha-base-count5/pretrain/transformers1/trainer_tf.py
  class TFTrainer (line 20) | class TFTrainer:
    method __init__ (line 31) | def __init__(
    method _setup_training (line 50) | def _setup_training(self) -> None:
    method _set_loss_and_metric (line 67) | def _set_loss_and_metric(self) -> None:
    method _create_summary_writer (line 84) | def _create_summary_writer(self) -> None:
    method _prepare_dataset (line 90) | def _prepare_dataset(self) -> None:
    method _create_optimizer (line 122) | def _create_optimizer(self) -> None:
    method _create_checkpoint_manager (line 146) | def _create_checkpoint_manager(self, max_to_keep: int = 5, load_model:...
    method _evaluate_steps (line 162) | def _evaluate_steps(self, per_replica_features, per_replica_labels):
    method _prediction_loop (line 182) | def _prediction_loop(
    method evaluate (line 237) | def evaluate(
    method train (line 250) | def train(self) -> None:
    method _training_steps (line 317) | def _training_steps(self):
    method _apply_gradients (line 327) | def _apply_gradients(self):
    method _step (line 331) | def _step(self):
    method _accumulate_next_gradients (line 342) | def _accumulate_next_gradients(self):
    method _accumulate_gradients (line 358) | def _accumulate_gradients(self, per_replica_features, per_replica_labe...
    method _forward (line 371) | def _forward(self, features, labels):
    method _run_model (line 383) | def _run_model(self, features, labels, training):
    method predict (line 412) | def predict(self, test_dataset: tf.data.Dataset) -> PredictionOutput:
    method save_model (line 426) | def save_model(self) -> None:

FILE: code/nezha-base-count5/pretrain/transformers1/trainer_utils.py
  class EvalPrediction (line 6) | class EvalPrediction(NamedTuple):
  class PredictionOutput (line 16) | class PredictionOutput(NamedTuple):
  class TrainOutput (line 22) | class TrainOutput(NamedTuple):

FILE: code/nezha-base-count5/pretrain/transformers1/training_args.py
  function is_tpu_available (line 23) | def is_tpu_available():
  class TrainingArguments (line 31) | class TrainingArguments:
    method train_batch_size (line 138) | def train_batch_size(self) -> int:
    method eval_batch_size (line 148) | def eval_batch_size(self) -> int:
    method _setup_devices (line 159) | def _setup_devices(self) -> Tuple["torch.device", int]:
    method device (line 182) | def device(self) -> "torch.device":
    method n_gpu (line 187) | def n_gpu(self):
    method to_json_string (line 190) | def to_json_string(self):
    method to_sanitized_dict (line 196) | def to_sanitized_dict(self) -> Dict[str, Any]:

FILE: code/nezha-base-count5/pretrain/transformers1/training_args_tf.py
  class TFTrainingArguments (line 16) | class TFTrainingArguments(TrainingArguments):
    method _setup_strategy (line 46) | def _setup_strategy(self) -> Tuple["tf.distribute.Strategy", int]:
    method strategy (line 80) | def strategy(self) -> "tf.distribute.Strategy":
    method n_gpu (line 85) | def n_gpu(self) -> int:

FILE: code/nezha-base-count5/pretrain/transformers1/utils_encoder_decoder.py
  function prepare_encoder_decoder_model_kwargs (line 18) | def prepare_encoder_decoder_model_kwargs(**kwargs):

FILE: code/serial_main_fusion_thread.py
  function init_model (line 13) | def init_model(model_path, export_model_path, optimized_model_path, leng...
  function infer (line 96) | def infer(session,data_gen,query_A, query_B):
  function softmax (line 106) | def softmax(x, axis=1):
  class Config (line 121) | class Config:
    method __init__ (line 122) | def __init__(self):
  function tccapi (line 152) | def tccapi():

FILE: code/utils.py
  function fastTokenizer (line 12) | def fastTokenizer(a:str,b:str,maxLen,tk):
  class data_generator (line 33) | class data_generator:
    method __init__ (line 34) | def __init__(self, config, shuffle=False):
    method generate (line 43) | def generate(self, data):
  class PGD (line 58) | class PGD():
    method __init__ (line 59) | def __init__(self, model):
    method attack (line 64) | def attack(self, epsilon=1., alpha=0.3, emb_name='word_embeddings', is...
    method restore (line 76) | def restore(self, emb_name='word_embeddings'):
    method project (line 84) | def project(self, param_name, param_data, epsilon):
    method backup_grad (line 90) | def backup_grad(self):
    method restore_grad (line 95) | def restore_grad(self):
  class FGM (line 102) | class FGM():
    method __init__ (line 103) | def __init__(self, model):
    method attack (line 107) | def attack(self, epsilon=0.5, emb_name='word_embeddings'):
    method restore (line 117) | def restore(self, emb_name='word_embeddings'):
  class FocalLoss (line 127) | class FocalLoss(nn.Module):
    method __init__ (line 143) | def __init__(self, num_class, alpha=None, gamma=2,
    method forward (line 164) | def forward(self, input, target):
  function f1_match (line 207) | def f1_match(y_true,y_pred):