SYMBOL INDEX (1224 symbols across 114 files)

FILE: api/codegeex-api-example-java/src/main/java/cn/aminer/codegeex/example/CodeGenerationExample.java
  class CodeGenerationExample (line 15) | public class CodeGenerationExample {
    method main (line 22) | public static void main(String[] args) throws Exception {
    method generateCode (line 35) | public void generateCode(String prompt) throws Exception {
    method performHttpPost (line 50) | public String performHttpPost(String url, String payload) {

FILE: api/codegeex-api-example-java/src/main/java/cn/aminer/codegeex/example/pojo/Payload.java
  class Payload (line 14) | @JsonIgnoreProperties(ignoreUnknown = true)

FILE: api/codegeex-api-example-python/generation_example.py
  function main (line 33) | def main():

FILE: codegeex/__init__.py
  function get_model (line 8) | def get_model(
  function generate (line 15) | def generate(

FILE: codegeex/benchmark/evaluate_humaneval_x.py
  function process_humaneval_test (line 27) | def process_humaneval_test(sample, problems, example_test=False):
  function stream_jsonl_all (line 85) | def stream_jsonl_all(filename: str) -> Iterable[Dict]:
  function evaluate_functional_correctness (line 99) | def evaluate_functional_correctness(
  function main (line 234) | def main():

FILE: codegeex/benchmark/execution.py
  function dicts_to_jsonl (line 15) | def dicts_to_jsonl(data_list: list, filename: str, compress: bool = True...
  function check_correctness (line 44) | def check_correctness(
  function time_limit (line 409) | def time_limit(seconds: float):
  function swallow_io (line 422) | def swallow_io():
  function create_tempdir (line 431) | def create_tempdir():
  class TimeoutException (line 437) | class TimeoutException(Exception):
  class WriteOnlyStringIO (line 441) | class WriteOnlyStringIO(io.StringIO):
    method read (line 444) | def read(self, *args, **kwargs):
    method readline (line 447) | def readline(self, *args, **kwargs):
    method readlines (line 450) | def readlines(self, *args, **kwargs):
    method readable (line 453) | def readable(self, *args, **kwargs):
  class redirect_stdin (line 458) | class redirect_stdin(contextlib._RedirectStream):  # type: ignore
  function chdir (line 463) | def chdir(root):
  function reliability_guard (line 477) | def reliability_guard(maximum_memory_bytes: Optional[int] = None):

FILE: codegeex/benchmark/gather_output.py
  function gather_output (line 7) | def gather_output(
  function main (line 51) | def main():

FILE: codegeex/benchmark/humaneval-x/cpp/evaluation/test.cpp
  function separate_paren_groups (line 13) | vector<string> separate_paren_groups(string paren_string){
  function issame (line 43) | bool issame(vector<string> a,vector<string>b){
  function main (line 51) | int main(){

FILE: codegeex/benchmark/humaneval-x/evaluate_humaneval_x.py
  function process_humaneval_test (line 27) | def process_humaneval_test(sample, problems, example_test=False):
  function stream_jsonl_all (line 85) | def stream_jsonl_all(filename: str) -> Iterable[Dict]:
  function evaluate_functional_correctness (line 99) | def evaluate_functional_correctness(
  function main (line 234) | def main():

FILE: codegeex/benchmark/humaneval-x/generate_humaneval_x.py
  function add_code_generation_args (line 21) | def add_code_generation_args(parser):
  function main (line 202) | def main(node_rank: int, local_rank: int, master_port: int, num_devices:...
  function server (line 250) | def server():

FILE: codegeex/benchmark/humaneval-x/translate_humaneval_x.py
  function add_code_generate_args (line 19) | def add_code_generate_args(parser):
  function main (line 228) | def main(node_rank: int, local_rank: int, master_port: int, num_devices:...
  function server (line 274) | def server():

FILE: codegeex/benchmark/inspect_result.py
  function inspect_result (line 73) | def inspect_result(
  function main (line 281) | def main():

FILE: codegeex/benchmark/metric.py
  function estimate_pass_at_k (line 27) | def estimate_pass_at_k(

FILE: codegeex/benchmark/utils.py
  function read_dataset (line 51) | def read_dataset(
  function read_translation_dataset (line 69) | def read_translation_dataset(
  function process_extra_prompt (line 102) | def process_extra_prompt(prompt: str, language_type: str = None) -> str:
  function is_code_generation_finished (line 115) | def is_code_generation_finished(
  function cleanup_code (line 151) | def cleanup_code(

FILE: codegeex/data/data_utils.py
  function stream_jsonl (line 67) | def stream_jsonl(filename: str) -> Iterable[Dict]:
  function write_jsonl (line 84) | def write_jsonl(filename: str, data: Iterable[Dict], append: bool = False):
  function sliding_window (line 104) | def sliding_window(

FILE: codegeex/data/process_pretrain_dataset.py
  function try_format_code (line 19) | def try_format_code(code: str):
  function load_pretrain_dataset (line 32) | def load_pretrain_dataset(dataset_path: Union[str, List[str]]) -> Dict:
  function process_sample (line 49) | def process_sample(
  function generate_prompt_samples (line 72) | def generate_prompt_samples(
  function main (line 81) | def main(

FILE: codegeex/data/processor.py
  class PromptDatasetProcessor (line 8) | class PromptDatasetProcessor(object):
    method __init__ (line 9) | def __init__(
    method pad_seq (line 34) | def pad_seq(self, prompt_tokens: List[int], code_tokens: List[int], ex...
    method process_sample (line 48) | def process_sample(self, sample: PromptSample) -> Iterable[Dict[str, L...
    method process_sample_strict (line 66) | def process_sample_strict(self, sample: PromptSample) -> List[Dict[str...
    method process_sample_ (line 75) | def process_sample_(self, sample) -> List[Dict[str, List[int]]]:
    method report (line 79) | def report(self):
  class LabelDatasetProcessor (line 88) | class LabelDatasetProcessor(object):
    method __init__ (line 89) | def __init__(
    method pad_seq (line 114) | def pad_seq(self, prompt_tokens: List[int], label: int, extra: dict = ...
    method process_sample (line 128) | def process_sample(self, sample: LabelSample) -> Iterable[Dict[str, Li...
    method process_sample_strict (line 143) | def process_sample_strict(self, sample: LabelSample) -> List[Dict[str,...
    method process_sample_ (line 152) | def process_sample_(self, sample) -> List[Dict[str, List[int]]]:
    method report (line 156) | def report(self):

FILE: codegeex/data/types.py
  class PromptSample (line 6) | class PromptSample:
  class LabelSample (line 15) | class LabelSample:

FILE: codegeex/kernels/__init__.py
  class Kernel (line 11) | class Kernel:
    method __init__ (line 12) | def __init__(self, filename: str, function_names: List[str]):
  function compress_int4_weight (line 37) | def compress_int4_weight(weight: torch.Tensor):  # (n, m)
  function extract_weight_to_half (line 58) | def extract_weight_to_half(weight: torch.Tensor, scale_list: torch.Tenso...

FILE: codegeex/megatron/__init__.py
  function print_rank_0 (line 28) | def print_rank_0(message):
  function is_last_rank (line 37) | def is_last_rank():
  function print_rank_last (line 41) | def print_rank_last(message):

FILE: codegeex/megatron/arguments.py
  function parse_args (line 25) | def parse_args(extra_args_provider=None, defaults={}, ignore_unknown_arg...
  function _print_args (line 302) | def _print_args(args):
  function _check_arg_is_not_none (line 315) | def _check_arg_is_not_none(args, arg):
  function _add_network_size_args (line 319) | def _add_network_size_args(parser):
  function _add_logging_args (line 426) | def _add_logging_args(parser):
  function _add_regularization_args (line 495) | def _add_regularization_args(parser):
  function _add_training_args (line 589) | def _add_training_args(parser):
  function _add_initialization_args (line 748) | def _add_initialization_args(parser):
  function _add_inference_args (line 773) | def _add_inference_args(parser):
  function _add_learning_rate_args (line 806) | def _add_learning_rate_args(parser):
  function _add_checkpointing_args (line 898) | def _add_checkpointing_args(parser):
  function _add_mixed_precision_args (line 973) | def _add_mixed_precision_args(parser):
  function _add_distributed_args (line 1040) | def _add_distributed_args(parser):
  function _add_validation_args (line 1128) | def _add_validation_args(parser):
  function _add_data_args (line 1152) | def _add_data_args(parser):
  function _add_autoresume_args (line 1298) | def _add_autoresume_args(parser):
  function _add_biencoder_args (line 1316) | def _add_biencoder_args(parser):
  function _add_vit_args (line 1425) | def _add_vit_args(parser):
  function _add_zero_args (line 1453) | def _add_zero_args(parser):
  function _add_memoryopt_args (line 1485) | def _add_memoryopt_args(parser):
  function _add_activation_checkpoint_args (line 1520) | def _add_activation_checkpoint_args(parser):

FILE: codegeex/megatron/checkpointing.py
  function set_checkpoint_version (line 31) | def set_checkpoint_version(value):
  function get_checkpoint_version (line 38) | def get_checkpoint_version():
  function check_checkpoint_args (line 43) | def check_checkpoint_args(checkpoint_args):
  function ensure_directory_exists (line 75) | def ensure_directory_exists(filename):
  function get_checkpoint_name (line 82) | def get_checkpoint_name(checkpoints_path, iteration, release=False):
  function get_checkpoint_tracker_filename (line 104) | def get_checkpoint_tracker_filename(checkpoints_path):
  function save_checkpoint (line 110) | def save_checkpoint(iteration, model, optimizer, lr_scheduler):
  function _transpose_first_dim (line 205) | def _transpose_first_dim(t, num_splits, num_splits_first, model):
  function fix_query_key_value_ordering (line 249) | def fix_query_key_value_ordering(model, checkpoint_version):
  function load_deepspeed_state (line 282) | def load_deepspeed_state(model):
  function load_checkpoint (line 301) | def load_checkpoint(
  function load_biencoder_checkpoint (line 508) | def load_biencoder_checkpoint(

FILE: codegeex/megatron/code_generation_utils.py
  function get_batch (line 35) | def get_batch(context_tokens, micro_batch_size=None):
  function get_batch_ (line 56) | def get_batch_(context_tokens):
  function top_k_logits (line 75) | def top_k_logits(logits, top_k=0, top_p=0.0, filter_value=-float("Inf")):
  function generate_samples_input_from_file (line 105) | def generate_samples_input_from_file(model):
  function generate_samples_eval (line 225) | def generate_samples_eval(model, context, max_gen_length, eos_token_id):
  function generate_samples_interactive_code_contest (line 252) | def generate_samples_interactive_code_contest(model, print_frequency=10):
  function generate_samples_interactive (line 377) | def generate_samples_interactive(model, print_frequency=24):
  function generate_samples_unconditional (line 487) | def generate_samples_unconditional(model):
  function generate_and_write_samples_unconditional (line 528) | def generate_and_write_samples_unconditional(model):
  function pad_batch (line 540) | def pad_batch(batch, pad_id, args):
  function topk_sampling (line 550) | def topk_sampling(logits: torch.FloatTensor, num_samples: int):
  function nuclear_sampling (line 566) | def nuclear_sampling(logits: torch.FloatTensor, temperature: float, top_...
  function sample_topk_tokens (line 579) | def sample_topk_tokens(model,
  function nuclear_sample_tokens (line 600) | def nuclear_sample_tokens(model,
  class Beam (line 621) | class Beam:
    method __repr__ (line 625) | def __repr__(self):
    method get_code (line 628) | def get_code(self):
  function expand_beams (line 632) | def expand_beams(beams: List[Beam], num_beams: int, model) -> List[Beam]:
  function beam_search (line 661) | def beam_search(model, context_tokens, num_beams: int):
  class Handle (line 727) | class Handle:
    method __repr__ (line 731) | def __repr__(self):
    method is_finished (line 734) | def is_finished(self):
    method derived (line 737) | def derived(self, new_token: int, log_prob: float):
  function expand_handles (line 742) | def expand_handles(handles: List[Handle], temperature: float, top_p: flo...
  function generate_nuclear_sampling (line 771) | def generate_nuclear_sampling(model, context_tokens, num_samples: int, t...
  function forward_step (line 801) | def forward_step(
  function get_token_stream (line 841) | def get_token_stream(
  function switch (line 900) | def switch(val1, val2, boolean):
  function sample_sequence_batch (line 905) | def sample_sequence_batch(

FILE: codegeex/megatron/convert_ckpt_parallel.py
  function get_change_ckpt_args (line 8) | def get_change_ckpt_args(parser):
  function get_element_from_dict_by_path (line 33) | def get_element_from_dict_by_path(d, path):
  function main (line 48) | def main():

FILE: codegeex/megatron/data/blendable_dataset.py
  class BlendableDataset (line 25) | class BlendableDataset(torch.utils.data.Dataset):
    method __init__ (line 26) | def __init__(self, datasets, weights):
    method __len__ (line 63) | def __len__(self):
    method __getitem__ (line 66) | def __getitem__(self, idx):

FILE: codegeex/megatron/data/data_samplers.py
  function build_pretraining_data_loader (line 24) | def build_pretraining_data_loader(dataset, consumed_samples):
  class MegatronPretrainingSampler (line 62) | class MegatronPretrainingSampler:
    method __init__ (line 63) | def __init__(
    method __len__ (line 99) | def __len__(self):
    method get_start_end_idx (line 102) | def get_start_end_idx(self):
    method __iter__ (line 107) | def __iter__(self):
  class MegatronPretrainingRandomSampler (line 123) | class MegatronPretrainingRandomSampler:
    method __init__ (line 124) | def __init__(
    method __len__ (line 157) | def __len__(self):
    method __iter__ (line 160) | def __iter__(self):

FILE: codegeex/megatron/data/dataset_utils.py
  function get_datasets_weights_and_num_samples (line 33) | def get_datasets_weights_and_num_samples(data_prefix, train_valid_test_n...
  function compile_helper (line 66) | def compile_helper():
  function get_a_and_b_segments (line 81) | def get_a_and_b_segments(sample, np_rng):
  function truncate_segments (line 113) | def truncate_segments(tokens_a, tokens_b, len_a, len_b, max_num_tokens, ...
  function create_tokens_and_tokentypes (line 133) | def create_tokens_and_tokentypes(tokens_a, tokens_b, cls_id, sep_id):
  function is_start_piece (line 163) | def is_start_piece(piece):
  function create_masked_lm_predictions (line 172) | def create_masked_lm_predictions(
  function pad_and_convert_to_numpy (line 396) | def pad_and_convert_to_numpy(
  function get_indexed_dataset_ (line 429) | def get_indexed_dataset_(data_prefix, data_impl, skip_warmup):
  function get_train_valid_test_split_ (line 450) | def get_train_valid_test_split_(splits_string, size):
  function get_samples_mapping (line 477) | def get_samples_mapping(

FILE: codegeex/megatron/data/helpers.cpp
  function build_blending_indices (line 36) | void build_blending_indices(py::array_t<uint8_t>& dataset_index,
  function build_sample_idx (line 99) | py::array build_sample_idx(const py::array_t<int32_t>& sizes_,
  function get_target_sample_len (line 188) | inline int32_t get_target_sample_len(const int32_t short_seq_ratio,
  function build_mapping_impl (line 204) | py::array build_mapping_impl(const py::array_t<int64_t>& docs_,
  function build_mapping (line 440) | py::array build_mapping(const py::array_t<int64_t>& docs_,
  function build_blocks_mapping_impl (line 470) | py::array build_blocks_mapping_impl(const py::array_t<int64_t>& docs_,
  function build_blocks_mapping (line 687) | py::array build_blocks_mapping(const py::array_t<int64_t>& docs_,
  function PYBIND11_MODULE (line 712) | PYBIND11_MODULE(helpers, m) {

FILE: codegeex/megatron/data/indexed_dataset.py
  function __best_fitting_dtype (line 26) | def __best_fitting_dtype(vocab_size=None):
  function get_available_dataset_impl (line 33) | def get_available_dataset_impl():
  function infer_dataset_impl (line 37) | def infer_dataset_impl(path):
  function make_builder (line 55) | def make_builder(out_file, impl, vocab_size=None):
  function make_dataset (line 64) | def make_dataset(path, impl, skip_warmup=False):
  function dataset_exists (line 83) | def dataset_exists(path, impl):
  function read_longs (line 90) | def read_longs(f, n):
  function write_longs (line 96) | def write_longs(f, a):
  function __best_fitting_dtype (line 112) | def __best_fitting_dtype(vocab_size=None):
  function make_mmap_builder (line 119) | def make_mmap_builder(out_file, vocab_size=None):
  function code (line 125) | def code(dtype):
  function index_file_path (line 132) | def index_file_path(prefix_path):
  function data_file_path (line 136) | def data_file_path(prefix_path):
  function create_doc_idx (line 140) | def create_doc_idx(sizes):
  class IndexedDataset (line 148) | class IndexedDataset(torch.utils.data.Dataset):
    method __init__ (line 153) | def __init__(self, path):
    method read_index (line 159) | def read_index(self, path):
    method read_data (line 177) | def read_data(self, path):
    method check_index (line 180) | def check_index(self, i):
    method __del__ (line 184) | def __del__(self):
    method __getitem__ (line 189) | def __getitem__(self, idx):
    method __len__ (line 213) | def __len__(self):
    method num_tokens (line 216) | def num_tokens(self, index):
    method size (line 219) | def size(self, index):
    method exists (line 223) | def exists(path):
    method supports_prefetch (line 229) | def supports_prefetch(self):
  class IndexedCachedDataset (line 233) | class IndexedCachedDataset(IndexedDataset):
    method __init__ (line 234) | def __init__(self, path):
    method supports_prefetch (line 240) | def supports_prefetch(self):
    method prefetch (line 243) | def prefetch(self, indices):
    method __getitem__ (line 268) | def __getitem__(self, idx):
  class IndexedDatasetBuilder (line 285) | class IndexedDatasetBuilder(object):
    method __init__ (line 296) | def __init__(self, out_file, dtype=np.int32):
    method add_item (line 305) | def add_item(self, tensor):
    method end_document (line 312) | def end_document(self):
    method merge_file_ (line 315) | def merge_file_(self, another_file):
    method finalize (line 335) | def finalize(self, index_file):
  function _warmup_mmap_file (line 350) | def _warmup_mmap_file(path):
  class MMapIndexedDataset (line 356) | class MMapIndexedDataset(torch.utils.data.Dataset):
    class Index (line 357) | class Index(object):
      method writer (line 361) | def writer(cls, path, dtype):
      method __init__ (line 406) | def __init__(self, path, skip_warmup=False):
      method __del__ (line 442) | def __del__(self):
      method dtype (line 447) | def dtype(self):
      method sizes (line 451) | def sizes(self):
      method doc_idx (line 455) | def doc_idx(self):
      method __getitem__ (line 459) | def __getitem__(self, i):
      method __len__ (line 462) | def __len__(self):
    method __init__ (line 465) | def __init__(self, path, skip_warmup=False):
    method __getstate__ (line 474) | def __getstate__(self):
    method __setstate__ (line 477) | def __setstate__(self, state):
    method _do_init (line 480) | def _do_init(self, path, skip_warmup):
    method __del__ (line 494) | def __del__(self):
    method __len__ (line 499) | def __len__(self):
    method __getitem__ (line 503) | def __getitem__(self, idx):
    method get (line 524) | def get(self, idx, offset=0, length=None):
    method sizes (line 540) | def sizes(self):
    method doc_idx (line 544) | def doc_idx(self):
    method get_doc_idx (line 547) | def get_doc_idx(self):
    method set_doc_idx (line 550) | def set_doc_idx(self, doc_idx_):
    method supports_prefetch (line 554) | def supports_prefetch(self):
    method exists (line 558) | def exists(path):
  class MMapIndexedDatasetBuilder (line 564) | class MMapIndexedDatasetBuilder(object):
    method __init__ (line 565) | def __init__(self, out_file, dtype=np.int64):
    method add_item (line 571) | def add_item(self, tensor):
    method end_document (line 576) | def end_document(self):
    method merge_file_ (line 579) | def merge_file_(self, another_file):
    method finalize (line 591) | def finalize(self, index_file):

FILE: codegeex/megatron/data/prompt_dataset.py
  function build_train_valid_test_datasets (line 31) | def build_train_valid_test_datasets(
  function _build_train_valid_test_datasets (line 96) | def _build_train_valid_test_datasets(
  function get_indexed_dataset_ (line 164) | def get_indexed_dataset_(data_prefix, data_impl, skip_warmup):
  class PromptDataset (line 179) | class PromptDataset(torch.utils.data.Dataset):
    method __init__ (line 180) | def __init__(
    method __len__ (line 229) | def __len__(self):
    method __getitem__ (line 234) | def __getitem__(self, idx):
  function _build_index_mappings (line 253) | def _build_index_mappings(
  function _num_epochs (line 314) | def _num_epochs(samples_per_epoch, num_samples):
  function _build_doc_idx (line 319) | def _build_doc_idx(documents, num_epochs, np_rng, separate_last_epoch):

FILE: codegeex/megatron/enums.py
  class LayerType (line 19) | class LayerType(enum.Enum):
  class AttnType (line 24) | class AttnType(enum.Enum):
  class AttnMaskType (line 29) | class AttnMaskType(enum.Enum):

FILE: codegeex/megatron/global_vars.py
  function get_args (line 36) | def get_args():
  function get_num_microbatches (line 42) | def get_num_microbatches():
  function get_current_global_batch_size (line 46) | def get_current_global_batch_size():
  function update_num_microbatches (line 50) | def update_num_microbatches(consumed_samples, consistency_check=True):
  function get_tokenizer (line 54) | def get_tokenizer():
  function get_tensorboard_writer (line 60) | def get_tensorboard_writer():
  function get_adlr_autoresume (line 66) | def get_adlr_autoresume():
  function get_timers (line 72) | def get_timers():
  function set_global_variables (line 78) | def set_global_variables(
  function _parse_args (line 95) | def _parse_args(extra_args_provider=None, defaults={}, ignore_unknown_ar...
  function _build_num_microbatches_calculator (line 107) | def _build_num_microbatches_calculator(args):
  function _build_tokenizer (line 117) | def _build_tokenizer(args):
  function rebuild_tokenizer (line 125) | def rebuild_tokenizer(args):
  function _set_tensorboard_writer (line 131) | def _set_tensorboard_writer(args):
  function _set_adlr_autoresume (line 157) | def _set_adlr_autoresume(args):
  function _set_timers (line 175) | def _set_timers():
  function _ensure_var_is_initialized (line 182) | def _ensure_var_is_initialized(var, name):
  function _ensure_var_is_not_initialized (line 187) | def _ensure_var_is_not_initialized(var, name):
  class _Timer (line 192) | class _Timer:
    method __init__ (line 195) | def __init__(self, name):
    method start (line 201) | def start(self):
    method stop (line 208) | def stop(self):
    method reset (line 215) | def reset(self):
    method elapsed (line 220) | def elapsed(self, reset=True):
  class Timers (line 237) | class Timers:
    method __init__ (line 240) | def __init__(self):
    method __call__ (line 243) | def __call__(self, name):
    method write (line 248) | def write(self, names, writer, iteration, normalizer=1.0, reset=False):
    method log (line 258) | def log(self, names, normalizer=1.0, reset=True):

FILE: codegeex/megatron/inference.py
  function model_provider (line 18) | def model_provider():
  function set_random_seed (line 27) | def set_random_seed(seed):
  function run_generation_distributed (line 35) | def run_generation_distributed(model):

FILE: codegeex/megatron/initialize.py
  function initialize_megatron (line 44) | def initialize_megatron(
  function _compile_dependencies (line 105) | def _compile_dependencies():
  function setup_deepspeed_random_and_activation_checkpointing (line 171) | def setup_deepspeed_random_and_activation_checkpointing(args):
  function _initialize_distributed (line 209) | def _initialize_distributed():
  function _init_autoresume (line 282) | def _init_autoresume():
  function _set_random_seed (line 291) | def _set_random_seed(seed_):
  function write_args_to_tensorboard (line 305) | def write_args_to_tensorboard():
  function initialize_wandb_experiment (line 314) | def initialize_wandb_experiment():
  function _initialize_mem_buffs (line 331) | def _initialize_mem_buffs():

FILE: codegeex/megatron/learning_rates.py
  class AnnealingLR (line 23) | class AnnealingLR(object):
    method __init__ (line 26) | def __init__(
    method get_lr (line 70) | def get_lr(self):
    method step (line 116) | def step(self, increment, token_num=None):
    method state_dict (line 127) | def state_dict(self):
    method _check_and_set (line 140) | def _check_and_set(self, cls_value, sd_value, name):
    method load_state_dict (line 155) | def load_state_dict(self, sd):

FILE: codegeex/megatron/memory.py
  function allocate_mem_buff (line 24) | def allocate_mem_buff(name, numel, dtype, track_usage):
  function get_mem_buff (line 31) | def get_mem_buff(name):
  class MemoryBuffer (line 36) | class MemoryBuffer:
    method __init__ (line 49) | def __init__(self, name, numel, dtype, track_usage):
    method reset (line 78) | def reset(self):
    method is_in_use (line 82) | def is_in_use(self):
    method numel_in_use (line 86) | def numel_in_use(self):
    method add (line 90) | def add(self, tensor):
    method get_data (line 114) | def get_data(self):
    method print_average_usage (line 121) | def print_average_usage(self):
  class RingMemBuffer (line 134) | class RingMemBuffer:
    method __init__ (line 137) | def __init__(self, name, num_buffers, numel, dtype, track_usage):
    method get_next_buffer (line 145) | def get_next_buffer(self):

FILE: codegeex/megatron/merge_ckpt_parallel.py
  function get_change_ckpt_args (line 13) | def get_change_ckpt_args(parser):
  function main (line 43) | def main():

FILE: codegeex/megatron/microbatches.py
  function build_num_microbatches_calculator (line 22) | def build_num_microbatches_calculator(args):
  class NumMicroBatchesCalculator (line 70) | class NumMicroBatchesCalculator(ABC):
    method __init__ (line 71) | def __init__(self):
    method get (line 75) | def get(self):
    method get_current_global_batch_size (line 78) | def get_current_global_batch_size(self):
    method update (line 82) | def update(self, consumed_samples, consistency_check):
  class ConstantNumMicroBatches (line 86) | class ConstantNumMicroBatches(NumMicroBatchesCalculator):
    method __init__ (line 87) | def __init__(self, global_batch_size, micro_batch_size, data_parallel_...
    method update (line 99) | def update(self, consumed_samples, consistency_check):
  class RampupBatchsizeNumMicroBatches (line 103) | class RampupBatchsizeNumMicroBatches(NumMicroBatchesCalculator):
    method __init__ (line 104) | def __init__(
    method update (line 159) | def update(self, consumed_samples, consistency_check):

FILE: codegeex/megatron/mindspore_to_megatron.py
  function get_change_ckpt_args (line 34) | def get_change_ckpt_args(parser):
  function loadModelFromNp (line 53) | def loadModelFromNp(sd, args):
  function loadEmbeddingFromNp (line 73) | def loadEmbeddingFromNp(npCkptPath, languageModel, vocabSize=52224):
  function loadAttentionLayerFromNp (line 90) | def loadAttentionLayerFromNp(npCkptPath, transformer, layerID):
  function loadQueryLayerFromNp (line 199) | def loadQueryLayerFromNp(npCkptPath, transformer):
  function main (line 290) | def main():

FILE: codegeex/megatron/model/codegeex_model.py
  class CodeGeeXModel (line 27) | class CodeGeeXModel(MegatronModule):
    method __init__ (line 30) | def __init__(self, num_tokentypes=0, parallel_output=False):
    method set_input_tensor (line 44) | def set_input_tensor(self, input_tensor):
    method forward (line 48) | def forward(
    method state_dict_for_save_checkpoint (line 99) | def state_dict_for_save_checkpoint(self, destination=None, prefix='',
    method load_state_dict (line 108) | def load_state_dict(self, state_dict, strict=True):
  function CrossEntropy (line 116) | def CrossEntropy(output, labels):
  class CodeGeeXModelPipe (line 127) | class CodeGeeXModelPipe(PipelineModule, MegatronModule):
    method __init__ (line 130) | def __init__(self, num_tokentypes=0, parallel_output=True):

FILE: codegeex/megatron/model/distributed.py
  class MemoryBuffer (line 26) | class MemoryBuffer:
    method __init__ (line 27) | def __init__(self, numel, dtype):
    method zero (line 37) | def zero(self):
    method get (line 41) | def get(self, shape, start_index):
  class DistributedDataParallelBase (line 51) | class DistributedDataParallelBase(MegatronModule, ABC):
    method __init__ (line 54) | def __init__(self, module):
    method allreduce_gradients (line 60) | def allreduce_gradients(self):
    method forward (line 63) | def forward(self, *inputs, **kwargs):
    method state_dict (line 66) | def state_dict(self, destination=None, prefix="", keep_vars=False):
    method state_dict_for_save_checkpoint (line 69) | def state_dict_for_save_checkpoint(
    method load_state_dict (line 76) | def load_state_dict(self, state_dict, strict=True):
  class DistributedDataParallel (line 80) | class DistributedDataParallel(DistributedDataParallelBase):
    method __init__ (line 96) | def __init__(
    method _make_param_hook (line 162) | def _make_param_hook(self, param):
    method zero_grad_buffer (line 175) | def zero_grad_buffer(self):
    method allreduce_gradients (line 182) | def allreduce_gradients(self):

FILE: codegeex/megatron/model/language_model.py
  function get_shrink_embedding_gradient_alpha (line 29) | def get_shrink_embedding_gradient_alpha(iteration):
  function parallel_lm_logits (line 46) | def parallel_lm_logits(input_, word_embeddings_weight, parallel_output, ...
  function get_language_model (line 73) | def get_language_model(
  class Embedding (line 100) | class Embedding(MegatronModule):
    method __init__ (line 114) | def __init__(
    method add_tokentype_embeddings (line 164) | def add_tokentype_embeddings(self, num_tokentypes):
    method forward (line 180) | def forward(self, input_ids, position_ids, tokentype_ids=None):
    method state_dict_for_save_checkpoint (line 196) | def state_dict_for_save_checkpoint(
    method load_state_dict (line 214) | def load_state_dict(self, state_dict, strict=True):
  class EmbeddingPipe (line 273) | class EmbeddingPipe(Embedding):
    method forward (line 274) | def forward(self, inputs, **kwargs):
    method word_embeddings_weight (line 302) | def word_embeddings_weight(self):
  class QueryEmbedding (line 307) | class QueryEmbedding(MegatronModule):
    method __init__ (line 321) | def __init__(self,
    method add_tokentype_embeddings (line 360) | def add_tokentype_embeddings(self, num_tokentypes):
    method forward (line 376) | def forward(self, position_ids, tokentype_ids=None):
    method state_dict_for_save_checkpoint (line 391) | def state_dict_for_save_checkpoint(self, destination=None, prefix='',
    method load_state_dict (line 406) | def load_state_dict(self, state_dict, strict=True):
  class QueryEmbeddingPipe (line 448) | class QueryEmbeddingPipe(QueryEmbedding):
    method forward (line 449) | def forward(self, inputs, **kwargs):
    method word_embeddings_weight (line 476) | def word_embeddings_weight(self):
  class TransformerLanguageModel (line 481) | class TransformerLanguageModel(MegatronModule):
    method __init__ (line 500) | def __init__(self,
    method set_input_tensor (line 537) | def set_input_tensor(self, input_tensor):
    method forward (line 541) | def forward(
    method state_dict_for_save_checkpoint (line 572) | def state_dict_for_save_checkpoint(self, destination=None, prefix='',
    method load_state_dict (line 593) | def load_state_dict(self, state_dict, strict=True):

FILE: codegeex/megatron/model/module.py
  function param_is_not_shared (line 31) | def param_is_not_shared(param):
  class MegatronModule (line 35) | class MegatronModule(torch.nn.Module):
    method __init__ (line 39) | def __init__(self, share_word_embeddings=True):
    method state_dict_for_save_checkpoint (line 43) | def state_dict_for_save_checkpoint(
    method word_embeddings_weight (line 50) | def word_embeddings_weight(self):
    method initialize_word_embeddings (line 64) | def initialize_word_embeddings(self, init_method_normal):
  function conversion_helper (line 120) | def conversion_helper(val, conversion):
  function fp32_to_float16 (line 131) | def fp32_to_float16(val, float16_convertor):
  function float16_to_fp32 (line 145) | def float16_to_fp32(val):
  class Float16Module (line 159) | class Float16Module(MegatronModule):
    method __init__ (line 160) | def __init__(self, module, args):
    method forward (line 180) | def forward(self, *inputs, **kwargs):
    method state_dict (line 188) | def state_dict(self, destination=None, prefix="", keep_vars=False):
    method state_dict_for_save_checkpoint (line 191) | def state_dict_for_save_checkpoint(
    method load_state_dict (line 198) | def load_state_dict(self, state_dict, strict=True):

FILE: codegeex/megatron/model/transformer.py
  class ParallelMLP (line 55) | class ParallelMLP(MegatronModule):
    method __init__ (line 64) | def __init__(
    method forward (line 93) | def forward(self, hidden_states):
  class ParallelSelfAttention (line 103) | class ParallelSelfAttention(MegatronModule):
    method __init__ (line 110) | def __init__(self, init_method,
    method forward (line 164) | def forward(
  class ParallelTopQuerySelfAttention (line 325) | class ParallelTopQuerySelfAttention(MegatronModule):
    method __init__ (line 332) | def __init__(self, init_method,
    method forward (line 387) | def forward(
  function bias_dropout_add (line 547) | def bias_dropout_add(x, bias, residual, prob, training):
  function get_bias_dropout_add (line 554) | def get_bias_dropout_add(training):
  function bias_dropout_add_fused_train (line 562) | def bias_dropout_add_fused_train(x, bias, residual, prob):
  function bias_dropout_add_fused_inference (line 568) | def bias_dropout_add_fused_inference(x, bias, residual, prob):
  class ParallelTransformerLayer (line 573) | class ParallelTransformerLayer(MegatronModule):
    method __init__ (line 580) | def __init__(self, init_method,
    method forward (line 619) | def forward(
  class ParallelTransformerLayerPipe (line 695) | class ParallelTransformerLayerPipe(ParallelTransformerLayer):
    method forward (line 716) | def forward(self, inputs, **kwargs):
  class ParallelTopQueryLayer (line 732) | class ParallelTopQueryLayer(MegatronModule):
    method __init__ (line 739) | def __init__(self, init_method,
    method forward (line 776) | def forward(
  class ParallelTopQueryLayerPipe (line 857) | class ParallelTopQueryLayerPipe(ParallelTopQueryLayer):
    method forward (line 878) | def forward(self, inputs, **kwargs):
  class ParallelTransformer (line 895) | class ParallelTransformer(MegatronModule):
    method __init__ (line 898) | def __init__(self, init_method, output_layer_init_method):
    method _get_layer_index (line 943) | def _get_layer_index(self, layer_number):
    method _get_layer (line 950) | def _get_layer(self, layer_number):
    method _checkpointed_forward (line 953) | def _checkpointed_forward(self, hidden_states, attention_mask):
    method set_input_tensor (line 977) | def set_input_tensor(self, input_tensor):
    method forward (line 987) | def forward(

FILE: codegeex/megatron/model/utils.py
  function init_method_normal (line 22) | def init_method_normal(sigma):
  function scaled_init_method_normal (line 31) | def scaled_init_method_normal(sigma, num_layers):
  function attention_mask_func (line 41) | def attention_mask_func(attention_scores, attention_mask):
  function get_linear_layer (line 47) | def get_linear_layer(rows, columns, init_method):
  function fast_gelu (line 56) | def fast_gelu(x):
  function gelu_impl (line 62) | def gelu_impl(x):
  function openai_gelu (line 69) | def openai_gelu(x):
  function erf_gelu (line 75) | def erf_gelu(x):

FILE: codegeex/megatron/mpu/cross_entropy.py
  class _VocabParallelCrossEntropy (line 25) | class _VocabParallelCrossEntropy(torch.autograd.Function):
    method forward (line 27) | def forward(ctx, vocab_parallel_logits, target):
    method backward (line 92) | def backward(ctx, grad_output):
  function vocab_parallel_cross_entropy (line 113) | def vocab_parallel_cross_entropy(vocab_parallel_logits, target):

FILE: codegeex/megatron/mpu/data.py
  function _check_data_types (line 26) | def _check_data_types(keys, data, target_dtype):
  function _build_key_size_numel_dictionaries (line 36) | def _build_key_size_numel_dictionaries(keys, data):
  function broadcast_data (line 82) | def broadcast_data(keys, data, datatype):

FILE: codegeex/megatron/mpu/initialize.py
  function is_unitialized (line 49) | def is_unitialized():
  function initialize_model_parallel (line 54) | def initialize_model_parallel(
  function model_parallel_is_initialized (line 184) | def model_parallel_is_initialized():
  function get_model_parallel_group (line 195) | def get_model_parallel_group():
  function get_tensor_model_parallel_group (line 201) | def get_tensor_model_parallel_group():
  function get_pipeline_model_parallel_group (line 209) | def get_pipeline_model_parallel_group():
  function get_data_parallel_group (line 217) | def get_data_parallel_group():
  function get_embedding_group (line 223) | def get_embedding_group():
  function set_tensor_model_parallel_world_size (line 229) | def set_tensor_model_parallel_world_size(world_size):
  function set_pipeline_model_parallel_world_size (line 235) | def set_pipeline_model_parallel_world_size(world_size):
  function get_tensor_model_parallel_world_size (line 241) | def get_tensor_model_parallel_world_size():
  function get_model_parallel_world_size (line 249) | def get_model_parallel_world_size():
  function get_pipeline_model_parallel_world_size (line 256) | def get_pipeline_model_parallel_world_size():
  function set_tensor_model_parallel_rank (line 264) | def set_tensor_model_parallel_rank(rank):
  function set_pipeline_model_parallel_rank (line 270) | def set_pipeline_model_parallel_rank(rank):
  function get_tensor_model_parallel_rank (line 276) | def get_tensor_model_parallel_rank():
  function get_model_parallel_rank (line 284) | def get_model_parallel_rank():
  function get_pipeline_model_parallel_rank (line 291) | def get_pipeline_model_parallel_rank():
  function is_pipeline_first_stage (line 299) | def is_pipeline_first_stage(ignore_virtual=False):
  function is_pipeline_last_stage (line 310) | def is_pipeline_last_stage(ignore_virtual=False):
  function get_virtual_pipeline_model_parallel_rank (line 327) | def get_virtual_pipeline_model_parallel_rank():
  function set_virtual_pipeline_model_parallel_rank (line 333) | def set_virtual_pipeline_model_parallel_rank(rank):
  function get_virtual_pipeline_model_parallel_world_size (line 339) | def get_virtual_pipeline_model_parallel_world_size():
  function get_tensor_model_parallel_src_rank (line 345) | def get_tensor_model_parallel_src_rank():
  function get_pipeline_model_parallel_first_rank (line 353) | def get_pipeline_model_parallel_first_rank():
  function get_pipeline_model_parallel_last_rank (line 360) | def get_pipeline_model_parallel_last_rank():
  function get_pipeline_model_parallel_next_rank (line 368) | def get_pipeline_model_parallel_next_rank():
  function get_pipeline_model_parallel_prev_rank (line 377) | def get_pipeline_model_parallel_prev_rank():
  function get_data_parallel_world_size (line 386) | def get_data_parallel_world_size():
  function get_data_parallel_rank (line 391) | def get_data_parallel_rank():
  function destroy_model_parallel (line 396) | def destroy_model_parallel():

FILE: codegeex/megatron/mpu/layers.py
  function param_is_not_tensor_parallel_duplicate (line 49) | def param_is_not_tensor_parallel_duplicate(param):
  function set_tensor_model_parallel_attributes (line 55) | def set_tensor_model_parallel_attributes(tensor, is_parallel, dim, stride):
  function set_defaults_if_not_set_tensor_model_parallel_attributes (line 65) | def set_defaults_if_not_set_tensor_model_parallel_attributes(tensor):
  function copy_tensor_model_parallel_attributes (line 74) | def copy_tensor_model_parallel_attributes(destination_tensor, source_ten...
  function _initialize_affine_weight_gpu (line 83) | def _initialize_affine_weight_gpu(weight, init_method, partition_dim, st...
  function _initialize_affine_weight_cpu (line 98) | def _initialize_affine_weight_cpu(
  class VocabParallelEmbedding (line 141) | class VocabParallelEmbedding(torch.nn.Module):
    method __init__ (line 152) | def __init__(self, num_embeddings, embedding_dim, init_method=init.xav...
    method forward (line 211) | def forward(self, input_):
  class ColumnParallelLinear (line 240) | class ColumnParallelLinear(torch.nn.Module):
    method __init__ (line 264) | def __init__(
    method forward (line 351) | def forward(self, input_):
  class RowParallelLinear (line 367) | class RowParallelLinear(torch.nn.Module):
    method __init__ (line 397) | def __init__(
    method forward (line 483) | def forward(self, input_):

FILE: codegeex/megatron/mpu/mappings.py
  function _reduce (line 26) | def _reduce(input_):
  function _split (line 39) | def _split(input_):
  function _gather (line 58) | def _gather(input_):
  class _CopyToModelParallelRegion (line 82) | class _CopyToModelParallelRegion(torch.autograd.Function):
    method symbolic (line 86) | def symbolic(graph, input_):
    method forward (line 90) | def forward(ctx, input_):
    method backward (line 94) | def backward(ctx, grad_output):
  class _ReduceFromModelParallelRegion (line 98) | class _ReduceFromModelParallelRegion(torch.autograd.Function):
    method symbolic (line 102) | def symbolic(graph, input_):
    method forward (line 106) | def forward(ctx, input_):
    method backward (line 110) | def backward(ctx, grad_output):
  class _ScatterToModelParallelRegion (line 114) | class _ScatterToModelParallelRegion(torch.autograd.Function):
    method symbolic (line 118) | def symbolic(graph, input_):
    method forward (line 122) | def forward(ctx, input_):
    method backward (line 126) | def backward(ctx, grad_output):
  class _GatherFromModelParallelRegion (line 130) | class _GatherFromModelParallelRegion(torch.autograd.Function):
    method symbolic (line 134) | def symbolic(graph, input_):
    method forward (line 138) | def forward(ctx, input_):
    method backward (line 142) | def backward(ctx, grad_output):
  function copy_to_tensor_model_parallel_region (line 151) | def copy_to_tensor_model_parallel_region(input_):
  function reduce_from_tensor_model_parallel_region (line 155) | def reduce_from_tensor_model_parallel_region(input_):
  function scatter_to_tensor_model_parallel_region (line 159) | def scatter_to_tensor_model_parallel_region(input_):
  function gather_from_tensor_model_parallel_region (line 163) | def gather_from_tensor_model_parallel_region(input_):

FILE: codegeex/megatron/mpu/random.py
  function init_checkpointed_activations_memory_buffer (line 44) | def init_checkpointed_activations_memory_buffer():
  function reset_checkpointed_activations_memory_buffer (line 72) | def reset_checkpointed_activations_memory_buffer():
  function _set_cuda_rng_state (line 78) | def _set_cuda_rng_state(new_state, device=-1):
  function split_tensor_into_1d_equal_chunks (line 112) | def split_tensor_into_1d_equal_chunks(tensor):
  function gather_split_1d_tensor (line 121) | def gather_split_1d_tensor(tensor):
  class CudaRNGStatesTracker (line 139) | class CudaRNGStatesTracker:
    method __init__ (line 148) | def __init__(self):
    method reset (line 154) | def reset(self):
    method get_states (line 159) | def get_states(self):
    method set_states (line 167) | def set_states(self, states):
    method add (line 172) | def add(self, name, seed):
    method fork (line 190) | def fork(self, name=_MODEL_PARALLEL_RNG_TRACKER_NAME):
  function get_cuda_rng_tracker (line 215) | def get_cuda_rng_tracker():
  function model_parallel_cuda_manual_seed (line 220) | def model_parallel_cuda_manual_seed(seed):
  class CheckpointFunction (line 265) | class CheckpointFunction(torch.autograd.Function):
    method forward (line 274) | def forward(ctx, run_function, *args):
    method backward (line 298) | def backward(ctx, *args):
  function checkpoint (line 339) | def checkpoint(function, *args):

FILE: codegeex/megatron/mpu/utils.py
  function ensure_divisibility (line 20) | def ensure_divisibility(numerator, denominator):
  function divide (line 27) | def divide(numerator, denominator):
  function split_tensor_along_last_dim (line 34) | def split_tensor_along_last_dim(tensor, num_partitions, contiguous_split...
  class VocabUtility (line 54) | class VocabUtility:
    method vocab_range_from_per_partition_vocab_size (line 60) | def vocab_range_from_per_partition_vocab_size(
    method vocab_range_from_global_vocab_size (line 68) | def vocab_range_from_global_vocab_size(global_vocab_size, rank, world_...

FILE: codegeex/megatron/optimizer/__init__.py
  function _get_params_for_weight_decay_optimization (line 26) | def _get_params_for_weight_decay_optimization(modules):
  function get_megatron_optimizer (line 58) | def get_megatron_optimizer(model):

FILE: codegeex/megatron/optimizer/clip_grads.py
  function clip_grad_norm_fp32 (line 29) | def clip_grad_norm_fp32(parameters, max_norm, norm_type=2):
  function count_zeros_fp32 (line 126) | def count_zeros_fp32(parameters):

FILE: codegeex/megatron/optimizer/grad_scaler.py
  class MegatronGradScaler (line 24) | class MegatronGradScaler(ABC):
    method __init__ (line 25) | def __init__(self, initial_scale):
    method scale (line 31) | def scale(self):
    method inv_scale (line 35) | def inv_scale(self):
    method update (line 39) | def update(self, found_inf):
    method state_dict (line 43) | def state_dict(self):
    method load_state_dict (line 47) | def load_state_dict(self, state_dict):
  class ConstantGradScaler (line 51) | class ConstantGradScaler(MegatronGradScaler):
    method update (line 52) | def update(self, found_inf):
    method state_dict (line 55) | def state_dict(self):
    method load_state_dict (line 58) | def load_state_dict(self, state_dict):
  class DynamicGradScaler (line 62) | class DynamicGradScaler(MegatronGradScaler):
    method __init__ (line 63) | def __init__(
    method update (line 99) | def update(self, found_inf):
    method state_dict (line 122) | def state_dict(self):
    method load_state_dict (line 129) | def load_state_dict(self, state_dict):

FILE: codegeex/megatron/optimizer/optimizer.py
  function _zero_grad_group_helper (line 33) | def _zero_grad_group_helper(group, set_to_none):
  function _multi_tensor_copy_this_to_that (line 48) | def _multi_tensor_copy_this_to_that(this, that, overflow_buf=None):
  class MegatronOptimizer (line 62) | class MegatronOptimizer(ABC):
    method __init__ (line 63) | def __init__(
    method get_parameters (line 74) | def get_parameters(self):
    method clip_grad_norm (line 81) | def clip_grad_norm(self, clip_grad):
    method count_zeros (line 85) | def count_zeros(self):
    method zero_grad (line 90) | def zero_grad(self, set_to_none=True):
    method get_loss_scale (line 94) | def get_loss_scale(self):
    method scale_loss (line 98) | def scale_loss(self, loss):
    method step (line 103) | def step(self):
    method reload_model_params (line 107) | def reload_model_params(self):
    method state_dict (line 116) | def state_dict(self):
    method load_state_dict (line 120) | def load_state_dict(self, state_dict):
    method _get_state (line 125) | def _get_state(self):
    method _set_state (line 128) | def _set_state(self, value):
    method _get_param_groups (line 136) | def _get_param_groups(self):
    method _set_param_groups (line 139) | def _set_param_groups(self, value):
  class Float16OptimizerWithFloat16Params (line 145) | class Float16OptimizerWithFloat16Params(MegatronOptimizer):
    method __init__ (line 170) | def __init__(
    method zero_grad (line 273) | def zero_grad(self, set_to_none=True):
    method get_loss_scale (line 281) | def get_loss_scale(self):
    method _copy_model_grads_to_main_grads (line 286) | def _copy_model_grads_to_main_grads(self):
    method _unscale_main_grads_and_check_for_nan (line 303) | def _unscale_main_grads_and_check_for_nan(self):
    method _get_model_and_main_params_data_float16 (line 332) | def _get_model_and_main_params_data_float16(self):
    method _copy_main_params_to_model_params (line 343) | def _copy_main_params_to_model_params(self):
    method _copy_model_params_to_main_params (line 350) | def _copy_model_params_to_main_params(self):
    method reload_model_params (line 357) | def reload_model_params(self):
    method step (line 361) | def step(self):
    method state_dict (line 408) | def state_dict(self):
    method load_state_dict (line 416) | def load_state_dict(self, state_dict):
  class FP32Optimizer (line 453) | class FP32Optimizer(MegatronOptimizer):
    method __init__ (line 454) | def __init__(
    method zero_grad (line 464) | def zero_grad(self, set_to_none=True):
    method get_loss_scale (line 469) | def get_loss_scale(self):
    method step (line 474) | def step(self):
    method reload_model_params (line 498) | def reload_model_params(self):
    method state_dict (line 501) | def state_dict(self):
    method load_state_dict (line 504) | def load_state_dict(self, state_dict):

FILE: codegeex/megatron/p2p_communication.py
  function _communicate (line 24) | def _communicate(
  function recv_forward (line 150) | def recv_forward(timers=None):
  function recv_backward (line 168) | def recv_backward(timers=None):
  function send_forward (line 186) | def send_forward(output_tensor, timers=None):
  function send_backward (line 201) | def send_backward(input_tensor_grad, timers=None):
  function send_forward_recv_backward (line 216) | def send_forward_recv_backward(output_tensor, timers=None):
  function send_backward_recv_forward (line 234) | def send_backward_recv_forward(input_tensor_grad, timers=None):
  function send_forward_recv_forward (line 252) | def send_forward_recv_forward(output_tensor, recv_prev, timers=None):
  function send_backward_recv_backward (line 267) | def send_backward_recv_backward(input_tensor_grad, recv_next, timers=None):
  function send_forward_backward_recv_forward_backward (line 282) | def send_forward_backward_recv_forward_backward(

FILE: codegeex/megatron/schedules.py
  function get_forward_backward_func (line 31) | def get_forward_backward_func():
  function forward_step (line 43) | def forward_step(forward_step_func, data_iterator, model, input_tensor, ...
  function backward_step (line 72) | def backward_step(
  function dummy_handler (line 113) | def dummy_handler():
  function forward_backward_no_pipelining (line 120) | def forward_backward_no_pipelining(
  function forward_backward_pipelining_with_interleaving (line 173) | def forward_backward_pipelining_with_interleaving(
  function forward_backward_pipelining_without_interleaving (line 416) | def forward_backward_pipelining_without_interleaving(

FILE: codegeex/megatron/tokenizer/gpt2_tokenization.py
  function lru_cache (line 33) | def lru_cache():
  function bytes_to_unicode (line 54) | def bytes_to_unicode():
  function get_pairs (line 81) | def get_pairs(word):
  class GPT2Tokenizer (line 94) | class GPT2Tokenizer(object):
    method from_pretrained (line 101) | def from_pretrained(
    method __init__ (line 183) | def __init__(
    method __len__ (line 212) | def __len__(self):
    method set_special_tokens (line 215) | def set_special_tokens(self, special_tokens):
    method bpe (line 230) | def bpe(self, token):
    method tokenize (line 271) | def tokenize(self, text):
    method convert_tokens_to_ids (line 282) | def convert_tokens_to_ids(self, tokens):
    method convert_ids_to_tokens (line 307) | def convert_ids_to_tokens(self, ids, skip_special_tokens=False):
    method encode (line 318) | def encode(self, text):
    method decode (line 321) | def decode(self, tokens):
    method save_vocabulary (line 328) | def save_vocabulary(self, vocab_path):

FILE: codegeex/megatron/tokenizer/tokenizer.py
  function encode_whitespaces (line 26) | def encode_whitespaces(text: str, start_extra_id: int=10, max_len: int=10):
  function decode_whitespaces (line 37) | def decode_whitespaces(text: str, start_extra_id: int=10, max_len: int=10):
  function build_hgf_tokenizer (line 52) | def build_hgf_tokenizer(args):
  function build_tokenizer (line 71) | def build_tokenizer(args):
  function _vocab_size_with_padding (line 98) | def _vocab_size_with_padding(orig_vocab_size, args):
  class AbstractTokenizer (line 118) | class AbstractTokenizer(ABC):
    method __init__ (line 121) | def __init__(self, name):
    method vocab_size (line 127) | def vocab_size(self):
    method vocab (line 132) | def vocab(self):
    method inv_vocab (line 138) | def inv_vocab(self):
    method tokenize (line 143) | def tokenize(self, text):
    method detokenize (line 146) | def detokenize(self, token_ids):
    method cls (line 152) | def cls(self):
    method sep (line 158) | def sep(self):
    method pad (line 164) | def pad(self):
    method eod (line 170) | def eod(self):
    method mask (line 176) | def mask(self):
  class _GPT2BPETokenizer (line 182) | class _GPT2BPETokenizer(AbstractTokenizer):
    method __init__ (line 185) | def __init__(self, vocab_file, merge_file):
    method vocab_size (line 195) | def vocab_size(self):
    method vocab (line 199) | def vocab(self):
    method inv_vocab (line 203) | def inv_vocab(self):
    method tokenize (line 206) | def tokenize(self, text):
    method detokenize (line 209) | def detokenize(self, token_ids):
    method eod (line 213) | def eod(self):
  class HgfTokenizerWrapper (line 217) | class HgfTokenizerWrapper(AbstractTokenizer):
    method __init__ (line 220) | def __init__(
    method tokenize (line 231) | def tokenize(self, text):
    method detokenize (line 238) | def detokenize(self, token_ids):
    method eod (line 245) | def eod(self):
    method inv_vocab (line 249) | def inv_vocab(self):
    method vocab (line 253) | def vocab(self):
    method vocab_size (line 257) | def vocab_size(self):

FILE: codegeex/megatron/tools/collect_env.py
  function main (line 7) | def main():

FILE: codegeex/megatron/tools/finetune_codegeex.py
  function model_provider (line 19) | def model_provider(pre_process=True, post_process=True):
  function get_batch (line 88) | def get_batch(data_iterator):
  function get_batch_pipe (line 128) | def get_batch_pipe(data):
  function loss_func (line 157) | def loss_func(loss_mask, output_tensor):
  function valid_loss_func (line 182) | def valid_loss_func(loss_mask, output_tensor):
  function forward_step (line 200) | def forward_step(data_iterator, model):
  function valid_forward_step (line 215) | def valid_forward_step(data_iterator, model):
  function train_valid_test_datasets_provider (line 230) | def train_valid_test_datasets_provider(train_val_test_num_samples):

FILE: codegeex/megatron/tools/pretrain_codegeex.py
  function model_provider (line 20) | def model_provider(pre_process=True, post_process=True):
  function get_batch (line 89) | def get_batch(data_iterator):
  function get_batch_pipe (line 123) | def get_batch_pipe(data):
  function loss_func (line 152) | def loss_func(loss_mask, output_tensor):
  function forward_step (line 163) | def forward_step(data_iterator, model):
  function train_valid_test_datasets_provider (line 178) | def train_valid_test_datasets_provider(train_val_test_num_samples):
  function command_exists (line 197) | def command_exists(cmd):

FILE: codegeex/megatron/training.py
  function print_datetime (line 70) | def print_datetime(string):
  function pretrain (line 77) | def pretrain(
  function update_train_iters (line 221) | def update_train_iters(args):
  function get_model (line 250) | def get_model(model_provider_func):
  function get_learning_rate_scheduler (line 353) | def get_learning_rate_scheduler(optimizer):
  function setup_model_and_optimizer (line 396) | def setup_model_and_optimizer(model_provider_func):
  function train_step (line 477) | def train_step(forward_step_func, data_iterator, model, optimizer, lr_sc...
  function training_log (line 600) | def training_log(
  function save_checkpoint_and_time (line 870) | def save_checkpoint_and_time(iteration, model, optimizer, lr_scheduler):
  function train (line 882) | def train(
  function evaluate (line 1022) | def evaluate(forward_step_func, data_iterator, model, verbose=False):
  function evaluate_and_print_results (line 1090) | def evaluate_and_print_results(
  function evaluate_and_print_results_gold (line 1157) | def evaluate_and_print_results_gold(
  function cyclic_iter (line 1214) | def cyclic_iter(iter):
  function build_train_valid_test_data_iterators (line 1220) | def build_train_valid_test_data_iterators(build_train_valid_test_dataset...

FILE: codegeex/megatron/utils.py
  function unwrap_model (line 34) | def unwrap_model(model, module_instances=(torchDDP)):
  function calc_params_l2_norm (line 49) | def calc_params_l2_norm(model):
  function average_losses_across_data_parallel_group (line 81) | def average_losses_across_data_parallel_group(losses):
  function report_memory (line 92) | def report_memory(name):
  function print_params_min_max_norm (line 108) | def print_params_min_max_norm(optimizer, iteration):
  function check_adlr_autoresume_termination (line 127) | def check_adlr_autoresume_termination(iteration, model, optimizer, lr_sc...
  function get_ltor_masks_and_position_ids (line 145) | def get_ltor_masks_and_position_ids(
  function get_parameters_in_billions (line 202) | def get_parameters_in_billions(model):
  function flops_calculator (line 222) | def flops_calculator(model, args, iteration_time):

FILE: codegeex/mindspore/convertion_1p.py
  function load_model (line 40) | def load_model(args_opt):
  function export_mindir (line 188) | def export_mindir(model_predict, config):
  function run_predict (line 206) | def run_predict(model_predict, config, args_opt, rank):
  function main (line 246) | def main():

FILE: codegeex/mindspore/finetune.py
  function set_weight_decay (line 57) | def set_weight_decay(params):
  function add_checkpoint_callback_policy (line 72) | def add_checkpoint_callback_policy(args_param, callback, rank_id):
  function set_parallel_context (line 102) | def set_parallel_context(args_opt):
  function run_train (line 121) | def run_train(args_opt):

FILE: codegeex/mindspore/generation.py
  function load_model (line 40) | def load_model(args_opt):
  function export_mindir (line 177) | def export_mindir(model_predict, config):
  function run_predict (line 195) | def run_predict(model_predict, config, args_opt, rank):
  function main (line 249) | def main():

FILE: codegeex/mindspore/generation_1p.py
  function load_model (line 40) | def load_model(args_opt):
  function export_mindir (line 183) | def export_mindir(model_predict, config):
  function run_predict (line 201) | def run_predict(model_predict, config, args_opt, rank):
  function main (line 242) | def main():

FILE: codegeex/mindspore/generation_batch.py
  function load_model (line 40) | def load_model(args_opt):
  function export_mindir (line 175) | def export_mindir(model_predict, config):
  function run_predict (line 193) | def run_predict(model_predict, config, args_opt, rank):
  function main (line 298) | def main():

FILE: codegeex/mindspore/generation_finetune.py
  function load_model (line 41) | def load_model(args_opt):
  function export_mindir (line 178) | def export_mindir(model_predict, config):
  function run_predict (line 196) | def run_predict(model_predict, config, args_opt, rank):
  function main (line 285) | def main():

FILE: codegeex/mindspore/generation_humaneval.py
  function load_model (line 42) | def load_model(args_opt):
  function export_mindir (line 177) | def export_mindir(model_predict, config):
  function run_predict (line 195) | def run_predict(model_predict, config, args_opt, rank):
  function main (line 245) | def main():

FILE: codegeex/mindspore/generation_values.py
  function load_model (line 39) | def load_model(args_opt):
  function run_predict (line 174) | def run_predict(model_predict, config, args_opt, rank):
  function main (line 208) | def main():

FILE: codegeex/mindspore/generation_values_1p.py
  function load_model (line 40) | def load_model(args_opt):
  function export_mindir (line 183) | def export_mindir(model_predict, config):
  function run_predict (line 201) | def run_predict(model_predict, config, args_opt, rank):
  function main (line 243) | def main():

FILE: codegeex/mindspore/save_1p_ckpt_from_8p_ckpt.py
  function set_weight_decay (line 66) | def set_weight_decay(params):
  function add_checkpoint_callback_policy (line 81) | def add_checkpoint_callback_policy(args_param, callback, rank_id):
  function set_parallel_context (line 111) | def set_parallel_context(args_opt):
  function download_ckpt (line 128) | def download_ckpt(args_opt, file_num, rank_num, rank_id):
  function get_needed_model_parallel_list (line 147) | def get_needed_model_parallel_list(train_strategy_file, self_rank):
  function transform_model_parallel (line 160) | def transform_model_parallel(restore_local_ckpt_file_list, train_strateg...
  function run_transform_model_parallel_ckpt (line 213) | def run_transform_model_parallel_ckpt(args_opt):

FILE: codegeex/mindspore/save_8p_ckpt.py
  function set_parallel_context (line 61) | def set_parallel_context(args_opt):
  function download_ckpt (line 78) | def download_ckpt(args_opt, file_num, rank_num, rank_id):
  function get_needed_opt_shard_list (line 95) | def get_needed_opt_shard_list(train_strategy_file, self_rank):
  function transform_opt_shard (line 111) | def transform_opt_shard(restore_local_ckpt_file_list, train_strategy_fil...
  function run_transform_opt_shard_ckpt (line 154) | def run_transform_opt_shard_ckpt(args_opt):

FILE: codegeex/mindspore/scripts/layer_norm.py
  function get_op_support_info (line 43) | def get_op_support_info(input_x, input_gamma, input_beta,
  function _division_sixteen (line 97) | def _division_sixteen(shape, begin_norm_axis):
  function op_select_format (line 121) | def op_select_format(input_x, input_gamma, input_beta,
  function to_frac_z_axis (line 273) | def to_frac_z_axis(ori_shape, ori_axis):
  function _broadcast_nz (line 312) | def _broadcast_nz(tensor, shape):
  function _check_vector_to_cube (line 330) | def _check_vector_to_cube(dtype, ori_shape_x, shape_x, begin_norm_axis, ...
  function nz_non_aligned (line 354) | def nz_non_aligned(input_x, input_gamma, input_beta,
  function layer_norm_compute_nz (line 433) | def layer_norm_compute_nz(input_x, input_gamma, input_beta,
  function layer_norm_compute (line 571) | def layer_norm_compute(input_x, input_gamma, input_beta,
  function is_support_nz_non_aligned (line 709) | def is_support_nz_non_aligned(ori_shape_x, begin_params_axis, impl_mode):
  function layer_norm (line 726) | def layer_norm(input_x, input_gamma, input_beta,
  function __dynamic_template_api (line 917) | def __dynamic_template_api(input_x, input_gamma, input_beta, output_y, o...

FILE: codegeex/mindspore/scripts/layer_norm_x_backprop_v2.py
  function get_op_support_info (line 36) | def get_op_support_info(input_dy,
  function _check_dynamic_format (line 87) | def _check_dynamic_format(shape_dy, shape_gamma, c_0):
  function op_select_format (line 100) | def op_select_format(input_dy,
  function _check_params (line 224) | def _check_params(params_map):
  function _check_shape (line 247) | def _check_shape(params_map):
  function _check_shape_mean (line 285) | def _check_shape_mean(shape_x, shape_mean):
  function _check_shape_gamma (line 325) | def _check_shape_gamma(shape_x, shape_gamma):
  function _broadcast_nz (line 351) | def _broadcast_nz(tensor, shape):
  function _update_gamma_shape (line 366) | def _update_gamma_shape(shape_x, shape_gamma):
  function _get_data_gm (line 398) | def _get_data_gm(shapes, dtype):
  function _get_params (line 426) | def _get_params(shape_x, shape_mean, shape_gamma):
  function _get_pd_xl (line 469) | def _get_pd_xl(data, shape_x):
  function _get_pd_var_front (line 491) | def _get_pd_var_front(data, cast_dtype):
  function _get_pd_var (line 519) | def _get_pd_var(data, params, shape_x, pd_xl, cast_dtype):
  function _get_pd_mean (line 561) | def _get_pd_mean(params, pd_xl, pd_var, var_elta_2, sub_x_mean, cast_dty...
  function _get_pd_x (line 600) | def _get_pd_x(data, params, shape_x, dtype, cast_dtype):
  function _get_res (line 656) | def _get_res(data, params, shape_x, dtype, cast_dtype):
  function _get_pds (line 688) | def _get_pds(data_dy, data_x, data_variance, data_mean, data_gamma, shap...
  function layer_norm_x_backprop_v2_compute (line 748) | def layer_norm_x_backprop_v2_compute(input_dy,
  function update_shape_nz (line 787) | def update_shape_nz(shape_x, shape_var, shape_gamma):
  function _get_data_nz (line 847) | def _get_data_nz(param_nz, dtype):
  function _get_pd_xl_nz (line 863) | def _get_pd_xl_nz(data, param_nz):
  function _get_pd_var_front_nz (line 874) | def _get_pd_var_front_nz(data, cast_dtype):
  function _get_pd_var_nz (line 889) | def _get_pd_var_nz(data, param_nz, pd_xl, cast_dtype):
  function _get_pd_mean_nz (line 907) | def _get_pd_mean_nz(param_nz, pd_xl, pd_var, var_elta_2, sub_x_mean, cas...
  function _get_pd_x_nz (line 920) | def _get_pd_x_nz(data, param_nz, dtype, cast_dtype):
  function _get_res_nz (line 951) | def _get_res_nz(data, param_nz, dtype, cast_dtype):
  function _get_pds_nz (line 961) | def _get_pds_nz(data_dy, data_x, data_variance, data_mean, data_gamma, p...
  function layer_norm_x_back_nz_compute (line 994) | def layer_norm_x_back_nz_compute(data_dy, data_x, data_variance, data_me...
  function layer_norm_x_backprop_v2 (line 1027) | def layer_norm_x_backprop_v2(input_dy,

FILE: codegeex/mindspore/src/adam.py
  function _update_run_kernel (line 36) | def _update_run_kernel(opt, clip_value, beta1, beta2, eps, lr, weight_de...
  function _check_param_value (line 53) | def _check_param_value(beta1, beta2, eps, prim_name):
  class AdamWeightDecayOp (line 63) | class AdamWeightDecayOp(Optimizer):
    method __init__ (line 136) | def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999,...
    method construct (line 156) | def construct(self, gradients, clip_value):
    method clone_param32 (line 182) | def clone_param32(self, prefix, init=None):

FILE: codegeex/mindspore/src/callbacks.py
  class LossCallBack (line 31) | class LossCallBack(Callback):
    method __init__ (line 37) | def __init__(
    method step_end (line 61) | def step_end(self, run_context):
  class EvalCallBack (line 92) | class EvalCallBack(Callback):
    method __init__ (line 102) | def __init__(self, model, eval_dataset, ppl_metric, validation_loss, p...
    method step_end (line 123) | def step_end(self, run_context):
  class SaveCheckpointCallback (line 161) | class SaveCheckpointCallback(Callback):
    method __init__ (line 162) | def __init__(self, cache_dir, bucket, local_rank=0, has_trained_epoch=...
    method step_end (line 175) | def step_end(self, run_context):
    method syn_files (line 183) | def syn_files(self):

FILE: codegeex/mindspore/src/code_tokenizer.py
  function encode_whitespaces (line 8) | def encode_whitespaces(text, start_extra_id: int, max_len: int):
  function decode_whitespaces (line 43) | def decode_whitespaces(text: str, start_extra_id: int, max_len: int):
  class Code13BDictionary (line 58) | class Code13BDictionary(object):
    method __init__ (line 59) | def __init__(
    method _pad_to_vocab_size (line 87) | def _pad_to_vocab_size(self, vocab_size: int):
    method _load_dict (line 94) | def _load_dict(self, dict_file: str):
    method _add_symbol (line 103) | def _add_symbol(self, sym: str, count: int):
    method __len__ (line 109) | def __len__(self):
    method index (line 112) | def index(self, sym: str):
    method string (line 115) | def string(self, idx: int):
    method map_token (line 118) | def map_token(self, token: Union[int, str]):
    method map_tokens (line 123) | def map_tokens(self, tokens):
    method decode_tokens (line 126) | def decode_tokens(self, tokens):
  class CodeTokenizer (line 131) | class CodeTokenizer(object):
    method __init__ (line 132) | def __init__(
    method encode_code (line 149) | def encode_code(self, code: str):
    method decode_code (line 161) | def decode_code(self, input_ids):

FILE: codegeex/mindspore/src/dataset.py
  function get_input_data_batch_slice_map (line 35) | def get_input_data_batch_slice_map(input_ids, eod_id, rank, dis, eod_res...
  function create_dataset (line 80) | def create_dataset(batch_size, data_path, args_opt, device_num=1, rank=0...
  function get_code_data_train (line 173) | def get_code_data_train(code_data_path, args_opt, process_fn=None, scale...
  function get_code_data_eval (line 201) | def get_code_data_eval(code_data_path, args_opt, process_fn=None, scale=1):

FILE: codegeex/mindspore/src/dataset_finetune.py
  function get_input_data_batch_slice_map (line 33) | def get_input_data_batch_slice_map(input_ids, loss_mask, eod_id, rank, d...
  function create_dataset (line 82) | def create_dataset(batch_size, data_path, args_opt, device_num=1, rank=0...
  function get_code_data (line 165) | def get_code_data(code_data_path, split, args_opt, process_fn=None, skip...

FILE: codegeex/mindspore/src/generate.py
  function topk_fun (line 26) | def topk_fun(logits, topk=5):
  function sampler (line 38) | def sampler(log_probs_revised, top_p, top_k_num, use_pynative=False):
  function generate (line 84) | def generate(model, origin_inputs, config, verbose=False):
  function generate_increment (line 170) | def generate_increment(model, origin_inputs, config, verbose=False):

FILE: codegeex/mindspore/src/generate_finetune.py
  function topk_fun (line 28) | def topk_fun(logits, topk=5):
  function sampler (line 35) | def sampler(log_probs_revised, top_p, top_k_num, use_pynative=False, bad...
  function generate_increment (line 78) | def generate_increment(model, origin_inputs, origin_length, config, toke...

FILE: codegeex/mindspore/src/generate_greedy.py
  function topk_fun (line 27) | def topk_fun(logits, topk=5):
  function sampler (line 34) | def sampler(log_probs_revised, top_p, top_k_num, use_pynative=False, bad...
  function generate_increment (line 45) | def generate_increment(model, origin_inputs, origin_length, config, toke...

FILE: codegeex/mindspore/src/generate_humaneval.py
  function is_code_generation_finished (line 25) | def is_code_generation_finished(text: str):
  function cleanup_text (line 37) | def cleanup_text(text: str):
  function truncate_text (line 49) | def truncate_text(text: str):
  function topk_fun (line 63) | def topk_fun(logits, topk=5):
  function sampler (line 77) | def sampler(log_probs_revised, top_p, top_k_num, use_pynative=False, bad...
  function generate_increment (line 130) | def generate_increment(model, origin_inputs, config, tokenizer, verbose=...

FILE: codegeex/mindspore/src/metrics.py
  class PPLMetric (line 26) | class PPLMetric(Metric):
    method __init__ (line 31) | def __init__(self, data_length):
    method clear (line 40) | def clear(self):
    method update (line 45) | def update(self, *inputs):  # inputs
    method eval (line 53) | def eval(self):
  class ValidationLoss (line 67) | class ValidationLoss(Metric):
    method __init__ (line 68) | def __init__(self, data_length):
    method clear (line 77) | def clear(self):
    method update (line 82) | def update(self, *inputs):  # inputs
    method eval (line 94) | def eval(self):

FILE: codegeex/mindspore/src/pangu_alpha.py
  class EmbeddingLayer (line 38) | class EmbeddingLayer(nn.Cell):
    method __init__ (line 41) | def __init__(self, config):
    method construct (line 92) | def construct(
    method get_word_embedding_weight (line 105) | def get_word_embedding_weight(self):
  class QueryLayer (line 109) | class QueryLayer(TransformerEncoderLayer):
    method __init__ (line 112) | def __init__(
    method construct (line 144) | def construct(
  class PanGuHead (line 220) | class PanGuHead(Cell):
    method __init__ (line 232) | def __init__(
    method construct (line 252) | def construct(self, state, embed):
  function set_parallel_configure_for_layer (line 261) | def set_parallel_configure_for_layer(
  class PanguAlpha_Model (line 296) | class PanguAlpha_Model(Cell):
    method __init__ (line 299) | def __init__(self, config):
    method construct (line 388) | def construct(self, input_ids,
    method reshape_to_2d (line 419) | def reshape_to_2d(self, x):
    method load_embedding_from_ckpt (line 427) | def load_embedding_from_ckpt(self, load_ckpt_path):
  class PanguAlphaModel (line 461) | class PanguAlphaModel(nn.Cell):
    method __init__ (line 474) | def __init__(self, config):
    method construct (line 488) | def construct(self, input_ids, input_position, attention_mask,
  class PanGUAlphaWithLoss (line 496) | class PanGUAlphaWithLoss(Cell):
    method __init__ (line 508) | def __init__(self, config, network, loss):
    method construct (line 527) | def construct(self, input_ids, input_position=None, attention_mask=None):
  class EvalNet (line 547) | class EvalNet(nn.Cell):
    method __init__ (line 561) | def __init__(self, backbone, generate=False, pad_token=6, seq_length=2...
    method construct (line 575) | def construct(self, input_ids, current_index, init_reset=True, batch_v...
  class LogitsNet (line 597) | class LogitsNet(nn.Cell):
    method __init__ (line 610) | def __init__(self, backbone, generate=False, pad_token=6, seq_length=2...
    method construct (line 624) | def construct(self, input_ids, init_reset=True, batch_valid_length=Non...
  class PanGUAlphaWithFinetuneLoss (line 642) | class PanGUAlphaWithFinetuneLoss(Cell):
    method __init__ (line 654) | def __init__(self, config, network, loss):
    method construct (line 673) | def construct(self, input_ids, loss_mask, input_position, attention_ma...

FILE: codegeex/mindspore/src/pangu_alpha_config.py
  class PanguAlphaConfig (line 21) | class PanguAlphaConfig:
    method __init__ (line 26) | def __init__(self,
    method __str__ (line 71) | def __str__(self):
  function set_parse (line 80) | def set_parse(args_opt):

FILE: codegeex/mindspore/src/pangu_alpha_fp16_predict.py
  class EmbeddingLayer (line 37) | class EmbeddingLayer(nn.Cell):
    method __init__ (line 40) | def __init__(self, config):
    method construct (line 89) | def construct(
    method get_word_embedding_weight (line 101) | def get_word_embedding_weight(self):
  class QueryLayer (line 105) | class QueryLayer(TransformerEncoderLayer):
    method __init__ (line 108) | def __init__(
    method construct (line 140) | def construct(
  class PanGuHead (line 216) | class PanGuHead(Cell):
    method __init__ (line 228) | def __init__(
    method construct (line 248) | def construct(self, state, embed):
  function set_parallel_configure_for_layer (line 257) | def set_parallel_configure_for_layer(
  class PanguAlpha_Model (line 292) | class PanguAlpha_Model(Cell):
    method __init__ (line 295) | def __init__(self, config):
    method construct (line 383) | def construct(self, input_ids,
    method reshape_to_2d (line 422) | def reshape_to_2d(self, x):
    method load_embedding_from_ckpt (line 430) | def load_embedding_from_ckpt(self, load_ckpt_path):
  class PanguAlphaModel (line 471) | class PanguAlphaModel(nn.Cell):
    method __init__ (line 484) | def __init__(self, config):
    method construct (line 499) | def construct(self, input_ids, input_position, attention_mask,
  class PanGUAlphaWithLoss (line 511) | class PanGUAlphaWithLoss(Cell):
    method __init__ (line 523) | def __init__(self, config, network, loss):
    method construct (line 542) | def construct(self, input_ids, input_position=None, attention_mask=None):
  class EvalNet (line 562) | class EvalNet(nn.Cell):
    method __init__ (line 576) | def __init__(self, backbone, generate=False, pad_token=6, seq_length=2...
    method construct (line 590) | def construct(self, input_ids, current_index, init_reset=True, batch_v...

FILE: codegeex/mindspore/src/pangu_alpha_wrapcell.py
  function _clip_grad (line 37) | def _clip_grad(clip_type, clip_value, grad):
  function tensor_grad_scale (line 72) | def tensor_grad_scale(scale, grad):
  function tensor_grad_scale_pipeline (line 77) | def tensor_grad_scale_pipeline(scale, grad, accu_grad):
  function tensor_shard_grad_scale_pipeline (line 87) | def tensor_shard_grad_scale_pipeline(scale, grad, accu_grad):
  class PanguAlphaTrainOneStepWithLossScaleCell (line 94) | class PanguAlphaTrainOneStepWithLossScaleCell(TrainOneStepWithLossScaleC...
    method __init__ (line 107) | def __init__(
    method construct (line 130) | def construct(self, input_ids, input_position, attention_mask, layer_p...
  class PanguAlphaTrainPipelineWithLossScaleCell (line 168) | class PanguAlphaTrainPipelineWithLossScaleCell(nn.Cell):
    method __init__ (line 181) | def __init__(self, network, optimizer, config, scale_update_cell=None,...
    method construct (line 221) | def construct(

FILE: codegeex/mindspore/src/pangu_alpha_wrapcell_finetune.py
  function _clip_grad (line 37) | def _clip_grad(clip_type, clip_value, grad):
  function tensor_grad_scale (line 72) | def tensor_grad_scale(scale, grad):
  function tensor_grad_scale_pipeline (line 77) | def tensor_grad_scale_pipeline(scale, grad, accu_grad):
  function tensor_shard_grad_scale_pipeline (line 87) | def tensor_shard_grad_scale_pipeline(scale, grad, accu_grad):
  class PanguAlphaTrainOneStepWithLossScaleCell (line 94) | class PanguAlphaTrainOneStepWithLossScaleCell(TrainOneStepWithLossScaleC...
    method __init__ (line 107) | def __init__(
    method construct (line 130) | def construct(self, input_ids, loss_mask, input_position, attention_ma...
  class PanguAlphaTrainPipelineWithLossScaleCell (line 168) | class PanguAlphaTrainPipelineWithLossScaleCell(nn.Cell):
    method __init__ (line 181) | def __init__(self, network, optimizer, config, scale_update_cell=None,...
    method construct (line 221) | def construct(

FILE: codegeex/mindspore/src/preprocess.py
  function chunks (line 32) | def chunks(lst, n):
  function package_file (line 38) | def package_file(it, n):
  function clean_wikitext (line 53) | def clean_wikitext(string):
  function tokenize_openwebtext (line 87) | def tokenize_openwebtext(tokenizer, iterator, seq_length, eot):
  function tokenize_wiki (line 107) | def tokenize_wiki(tokenizer, file_path, seq_length, eot):
  function tokenize_lambada (line 124) | def tokenize_lambada(tokenizer, file_path, seq_length, eot):
  function task_unit (line 145) | def task_unit(iterator, tokenizer, seq_length, eot, parallel_writer=True):

FILE: codegeex/mindspore/src/sat_dataset.py
  class Dataset (line 8) | class Dataset(ABC):
    method __len__ (line 10) | def __len__(self):
    method __getitem__ (line 14) | def __getitem__(self, idx):
  class LMDBDataset (line 18) | class LMDBDataset(Dataset):
    method __init__ (line 19) | def __init__(self, path, process_fn=None):
    method __len__ (line 38) | def __len__(self):
    method __getitem__ (line 41) | def __getitem__(self, idx):
  class PadDataset (line 56) | class PadDataset(Dataset):
    method __init__ (line 57) | def __init__(self, dataset, seq_len, eod_id):
    method __len__ (line 62) | def __len__(self):
    method __getitem__ (line 65) | def __getitem__(self, idx):
  class BinaryDataset (line 72) | class BinaryDataset(Dataset):
    method __init__ (line 73) | def __init__(
    method __len__ (line 100) | def __len__(self):
    method __getitem__ (line 103) | def __getitem__(self, index):
  class TSVDataset (line 108) | class TSVDataset(Dataset):
    method __init__ (line 109) | def __init__(self, path, process_fn, with_heads=True, **kwargs):
    method __len__ (line 118) | def __len__(self):
    method __getitem__ (line 121) | def __getitem__(self, index):
  class ConcatDataset (line 125) | class ConcatDataset(Dataset):
    method cumsum (line 136) | def cumsum(sequence, weights):
    method __init__ (line 144) | def __init__(self, datasets, weights=None, skip_num=0, **kwargs):
    method __len__ (line 158) | def __len__(self):
    method __getitem__ (line 161) | def __getitem__(self, idx):
  class RandomMappingDataset (line 175) | class RandomMappingDataset(Dataset):
    method __init__ (line 181) | def __init__(self, ds):
    method __len__ (line 185) | def __len__(self):
    method __getitem__ (line 188) | def __getitem__(self, index):
  class BlockedSplitDataset (line 197) | class BlockedSplitDataset(Dataset):
    method __init__ (line 204) | def __init__(self, ds, indices, block_size):
    method __len__ (line 214) | def __len__(self):
    method __getitem__ (line 217) | def __getitem__(self, index):
  class SubsetDataset (line 223) | class SubsetDataset(Dataset):
    method __init__ (line 224) | def __init__(self, ds, start, length):
    method __len__ (line 230) | def __len__(self):
    method __getitem__ (line 233) | def __getitem__(self, idx):
  function split_train_val_test (line 239) | def split_train_val_test(ds, split=[0.99, 0.01, 0.0], seed=None):

FILE: codegeex/mindspore/src/tokenization_jieba.py
  class JIEBATokenizer (line 25) | class JIEBATokenizer():
    method __init__ (line 30) | def __init__(self, vocab_file, model_file, max_len=None):
    method vocab_size (line 49) | def vocab_size(self):
    method __len__ (line 52) | def __len__(self):
    method eod (line 56) | def eod(self):
    method tokenize (line 59) | def tokenize(self, text):
    method convert_tokens_to_ids (line 65) | def convert_tokens_to_ids(self, tokens):
    method convert_ids_to_tokens (line 68) | def convert_ids_to_tokens(self, ids):
    method encode (line 71) | def encode(self, text):
    method decode (line 75) | def decode(self, tokens):

FILE: codegeex/mindspore/src/utils.py
  class FP32StateAdamWeightDecay (line 40) | class FP32StateAdamWeightDecay(AdamWeightDecay):
    method __init__ (line 49) | def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999,...
    method clone_state (line 59) | def clone_state(self, parameter_tuple, prefix, init):
  function _get_square_sum (line 80) | def _get_square_sum(grad, value):
  function _apply_global_norm (line 90) | def _apply_global_norm(enable_grad_fp16, clip_norm, global_norm, grad):
  function _get_model_parallel_group (line 98) | def _get_model_parallel_group(mp):
  function _get_pipeline_group (line 118) | def _get_pipeline_group():
  class GlobalNorm (line 136) | class GlobalNorm(nn.Cell):
    method __init__ (line 141) | def __init__(self, params, config):
    method construct (line 193) | def construct(self, grads):
  class ClipByGlobalNorm (line 206) | class ClipByGlobalNorm(nn.Cell):
    method __init__ (line 213) | def __init__(self, params, config, clip_norm=1.0):
    method construct (line 223) | def construct(self, grads):
  class LearningRate (line 232) | class LearningRate(LearningRateSchedule):
    method __init__ (line 237) | def __init__(self,
    method construct (line 261) | def construct(self, global_step):
  function add_inference_params (line 278) | def add_inference_params(opt):
  function add_training_params (line 315) | def add_training_params(opt):
  function add_retrain_params (line 419) | def add_retrain_params(opt):
  function get_args (line 461) | def get_args(inference=False):
  function download_data (line 587) | def download_data(src_data_url, tgt_data_path, rank):

FILE: codegeex/mindspore/train.py
  function set_weight_decay (line 59) | def set_weight_decay(params):
  function add_checkpoint_callback_policy (line 74) | def add_checkpoint_callback_policy(args_param, callback, rank_id):
  function set_parallel_context (line 104) | def set_parallel_context(args_opt):
  function run_train (line 121) | def run_train(args_opt):
  function restore_checkpoint (line 333) | def restore_checkpoint(args_param, sink_size, dataset, model, network, e...
  function get_exception_checkpoints (line 376) | def get_exception_checkpoints(args_param):
  function check_exception_checkpoints (line 408) | def check_exception_checkpoints(ckpt_file_list):
  function restore_exception_checkpoint (line 425) | def restore_exception_checkpoint(args_param, sink_size, dataset, model, ...
  function set_pipeline_parallel_context (line 487) | def set_pipeline_parallel_context(args_opt):
  function run_train_pipeline (line 506) | def run_train_pipeline(args_opt):

FILE: codegeex/oneflow/codegeex_model.py
  function fast_gelu (line 7) | def fast_gelu(x):
  class MLP (line 14) | class MLP(torch.nn.Module):
    method __init__ (line 22) | def __init__(
    method forward (line 42) | def forward(self, hidden_states):
  class SelfAttention (line 52) | class SelfAttention(torch.nn.Module):
    method __init__ (line 58) | def __init__(
    method forward (line 85) | def forward(
  class TopQuerySelfAttention (line 279) | class TopQuerySelfAttention(torch.nn.Module):
    method __init__ (line 285) | def __init__(
    method forward (line 312) | def forward(
  class TransformerLayer (line 494) | class TransformerLayer(torch.nn.Module):
    method __init__ (line 500) | def __init__(
    method forward (line 530) | def forward(
  class TopQueryLayer (line 573) | class TopQueryLayer(torch.nn.Module):
    method __init__ (line 579) | def __init__(
    method forward (line 607) | def forward(
  class Transformer (line 657) | class Transformer(torch.nn.Module):
    method __init__ (line 660) | def __init__(
    method _get_layer_index (line 698) | def _get_layer_index(self, layer_number):
    method _get_layer (line 701) | def _get_layer(self, layer_number):
    method forward (line 704) | def forward(
    method state_dict_for_save_checkpoint (line 767) | def state_dict_for_save_checkpoint(
  class Embedding (line 773) | class Embedding(torch.nn.Module):
    method __init__ (line 782) | def __init__(
    method forward (line 802) | def forward(self, input_ids, position_ids):
    method state_dict_for_save_checkpoint (line 810) | def state_dict_for_save_checkpoint(self, destination=None, prefix='',
    method load_state_dict (line 823) | def load_state_dict(self, state_dict, strict=True):
  class QueryEmbedding (line 852) | class QueryEmbedding(torch.nn.Module):
    method __init__ (line 861) | def __init__(
    method forward (line 878) | def forward(self, position_ids):
    method state_dict_for_save_checkpoint (line 884) | def state_dict_for_save_checkpoint(self, destination=None, prefix='',
    method load_state_dict (line 895) | def load_state_dict(self, state_dict, strict=True):
  class TransformerLanguageModel (line 911) | class TransformerLanguageModel(torch.nn.Module):
    method __init__ (line 926) | def __init__(
    method forward (line 959) | def forward(
    method state_dict_for_save_checkpoint (line 986) | def state_dict_for_save_checkpoint(self, destination=None, prefix='',
    method load_state_dict (line 1003) | def load_state_dict(self, state_dict, strict=True):
  class CodeGeeXModel (line 1039) | class CodeGeeXModel(torch.nn.Module):
    method __init__ (line 1042) | def __init__(
    method forward (line 1059) | def forward(
    method state_dict_for_save_checkpoint (line 1088) | def state_dict_for_save_checkpoint(self, destination=None, prefix='',
    method load_state_dict (line 1097) | def load_state_dict(self, state_dict, strict=True):

FILE: codegeex/oneflow/inference.py
  function get_ltor_masks_and_position_ids (line 12) | def get_ltor_masks_and_position_ids(
  function get_batch (line 67) | def get_batch(
  function top_k_logits (line 87) | def top_k_logits(logits, top_k=0, top_p=0.0, filter_value=-float("Inf")):
  function pad_batch (line 117) | def pad_batch(batch, pad_id, seq_length):
  function forward_step (line 127) | def forward_step(
  function get_token_stream (line 158) | def get_token_stream(
  function switch (line 212) | def switch(val1, val2, boolean):
  function sample_sequence_batch (line 217) | def sample_sequence_batch(

FILE: codegeex/paddle/codegeex_model.py
  function fast_gelu (line 6) | def fast_gelu(x):
  class MLP (line 11) | class MLP(paddle.nn.Layer):
    method __init__ (line 20) | def __init__(
    method forward (line 40) | def forward(self, hidden_states):
  class SelfAttention (line 50) | class SelfAttention(paddle.nn.Layer):
    method __init__ (line 57) | def __init__(
    method forward (line 84) | def forward(
  class TopQuerySelfAttention (line 226) | class TopQuerySelfAttention(paddle.nn.Layer):
    method __init__ (line 233) | def __init__(
    method forward (line 260) | def forward(
  class TransformerLayer (line 400) | class TransformerLayer(paddle.nn.Layer):
    method __init__ (line 407) | def __init__(
    method forward (line 437) | def forward(
  class TopQueryLayer (line 478) | class TopQueryLayer(paddle.nn.Layer):
    method __init__ (line 485) | def __init__(
    method forward (line 513) | def forward(
  class Transformer (line 563) | class Transformer(paddle.nn.Layer):
    method __init__ (line 566) | def __init__(
    method _get_layer_index (line 604) | def _get_layer_index(self, layer_number):
    method _get_layer (line 607) | def _get_layer(self, layer_number):
    method forward (line 610) | def forward(
    method state_dict_for_save_checkpoint (line 672) | def state_dict_for_save_checkpoint(
  class Embedding (line 678) | class Embedding(paddle.nn.Layer):
    method __init__ (line 688) | def __init__(
    method forward (line 708) | def forward(self, input_ids, position_ids):
    method state_dict_for_save_checkpoint (line 716) | def state_dict_for_save_checkpoint(self, destination=None, prefix='',
    method set_state_dict (line 729) | def set_state_dict(self, state_dict, use_structured_name=True):
  class QueryEmbedding (line 758) | class QueryEmbedding(paddle.nn.Layer):
    method __init__ (line 768) | def __init__(
    method forward (line 785) | def forward(self, position_ids):
    method state_dict_for_save_checkpoint (line 791) | def state_dict_for_save_checkpoint(self, destination=None, prefix='',
    method set_state_dict (line 802) | def set_state_dict(self, state_dict, use_structured_name=True):
  class TransformerLanguageModel (line 818) | class TransformerLanguageModel(paddle.nn.Layer):
    method __init__ (line 834) | def __init__(
    method forward (line 867) | def forward(
    method state_dict_for_save_checkpoint (line 894) | def state_dict_for_save_checkpoint(self, destination=None, prefix='',
    method set_state_dict (line 911) | def set_state_dict(self, state_dict, use_structured_name=True):
  class CodeGeeXModel (line 947) | class CodeGeeXModel(paddle.nn.Layer):
    method __init__ (line 950) | def __init__(
    method forward (line 967) | def forward(
    method state_dict_for_save_checkpoint (line 996) | def state_dict_for_save_checkpoint(self, destination=None, prefix='',
    method set_state_dict (line 1005) | def set_state_dict(self, state_dict, use_structured_name=True):

FILE: codegeex/paddle/inference.py
  function get_ltor_masks_and_position_ids (line 12) | def get_ltor_masks_and_position_ids(
  function get_batch (line 67) | def get_batch(
  function top_k_logits (line 87) | def top_k_logits(logits, top_k=0, top_p=0.0, filter_value=-float("Inf")):
  function pad_batch (line 117) | def pad_batch(batch, pad_id, seq_length):
  function forward_step (line 127) | def forward_step(
  function get_token_stream (line 158) | def get_token_stream(
  function switch (line 212) | def switch(val1, val2, boolean):
  function sample_sequence_batch (line 217) | def sample_sequence_batch(

FILE: codegeex/paddle/pt_to_pdparams.py
  function WalkDict (line 15) | def WalkDict(x):
  function parse_opt (line 29) | def parse_opt():
  function main (line 47) | def main(opt):

FILE: codegeex/quantization/quantize.py
  class W8A16Linear (line 9) | class W8A16Linear(torch.autograd.Function):
    method forward (line 11) | def forward(ctx, inp: torch.Tensor, quant_w: torch.Tensor, scale_w: to...
    method backward (line 23) | def backward(ctx, grad_output: torch.Tensor):
  class QuantizedLinear (line 32) | class QuantizedLinear(torch.nn.Module):
    method __init__ (line 33) | def __init__(
    method forward (line 68) | def forward(self, input_):
  class QuantizedColumnParallelLinear (line 77) | class QuantizedColumnParallelLinear(ColumnParallelLinear):
    method __init__ (line 78) | def __init__(
    method forward (line 118) | def forward(self, input_):
  class QuantizedRowParallelLinear (line 136) | class QuantizedRowParallelLinear(RowParallelLinear):
    method __init__ (line 137) | def __init__(
    method forward (line 177) | def forward(self, input_):
  function quantize (line 196) | def quantize(model, weight_bit_width, backend="torch"):

FILE: codegeex/quantization/quantize_oneflow.py
  function _pack_int8_to_int4 (line 5) | def _pack_int8_to_int4(x):
  function _quantize (line 16) | def _quantize(num_bits, symmetric, x, group_dim, group_size, quant_type):
  class QuantizedLinear (line 53) | class QuantizedLinear(torch.nn.Module):
    method __init__ (line 54) | def __init__(
    method forward (line 89) | def forward(self, input_):
  function quantize_oneflow (line 103) | def quantize_oneflow(model, weight_bit_width):

FILE: codegeex/tokenizer/tokenizer.py
  function encode_whitespaces (line 7) | def encode_whitespaces(text: str, start_extra_id: int, max_len: int):
  function decode_whitespaces (line 18) | def decode_whitespaces(text: str, start_extra_id: int, max_len: int):
  class CodeGeeXTokenizer (line 33) | class CodeGeeXTokenizer(object):
    method __init__ (line 34) | def __init__(
    method encode_code (line 51) | def encode_code(self, code: str):
    method decode_code (line 58) | def decode_code(self, input_ids):

FILE: codegeex/torch/codegeex_model.py
  function fast_gelu (line 7) | def fast_gelu(x):
  class MLP (line 12) | class MLP(torch.nn.Module):
    method __init__ (line 21) | def __init__(
    method forward (line 41) | def forward(self, hidden_states):
  class SelfAttention (line 51) | class SelfAttention(torch.nn.Module):
    method __init__ (line 58) | def __init__(
    method forward (line 85) | def forward(
  class TopQuerySelfAttention (line 227) | class TopQuerySelfAttention(torch.nn.Module):
    method __init__ (line 234) | def __init__(
    method forward (line 261) | def forward(
  class TransformerLayer (line 401) | class TransformerLayer(torch.nn.Module):
    method __init__ (line 408) | def __init__(
    method forward (line 438) | def forward(
  class TopQueryLayer (line 479) | class TopQueryLayer(torch.nn.Module):
    method __init__ (line 486) | def __init__(
    method forward (line 514) | def forward(
  class Transformer (line 564) | class Transformer(torch.nn.Module):
    method __init__ (line 567) | def __init__(
    method _get_layer_index (line 605) | def _get_layer_index(self, layer_number):
    method _get_layer (line 608) | def _get_layer(self, layer_number):
    method forward (line 611) | def forward(
    method state_dict_for_save_checkpoint (line 673) | def state_dict_for_save_checkpoint(
  class Embedding (line 679) | class Embedding(torch.nn.Module):
    method __init__ (line 689) | def __init__(
    method forward (line 709) | def forward(self, input_ids, position_ids):
    method state_dict_for_save_checkpoint (line 717) | def state_dict_for_save_checkpoint(self, destination=None, prefix='',
    method load_state_dict (line 730) | def load_state_dict(self, state_dict, strict=True):
  class QueryEmbedding (line 759) | class QueryEmbedding(torch.nn.Module):
    method __init__ (line 769) | def __init__(
    method forward (line 786) | def forward(self, position_ids):
    method state_dict_for_save_checkpoint (line 792) | def state_dict_for_save_checkpoint(self, destination=None, prefix='',
    method load_state_dict (line 803) | def load_state_dict(self, state_dict, strict=True):
  class TransformerLanguageModel (line 819) | class TransformerLanguageModel(torch.nn.Module):
    method __init__ (line 835) | def __init__(
    method forward (line 868) | def forward(
    method state_dict_for_save_checkpoint (line 895) | def state_dict_for_save_checkpoint(self, destination=None, prefix='',
    method load_state_dict (line 912) | def load_state_dict(self, state_dict, strict=True):
  class CodeGeeXModel (line 948) | class CodeGeeXModel(torch.nn.Module):
    method __init__ (line 951) | def __init__(
    method forward (line 968) | def forward(
    method state_dict_for_save_checkpoint (line 997) | def state_dict_for_save_checkpoint(self, destination=None, prefix='',
    method load_state_dict (line 1006) | def load_state_dict(self, state_dict, strict=True):

FILE: codegeex/torch/get_ckpt_qkv.py
  function main (line 9) | def main():

FILE: codegeex/torch/inference.py
  function get_ltor_masks_and_position_ids (line 12) | def get_ltor_masks_and_position_ids(
  function get_batch (line 67) | def get_batch(
  function top_k_logits (line 87) | def top_k_logits(logits, top_k=0, top_p=0.0, filter_value=-float("Inf")):
  function pad_batch (line 117) | def pad_batch(batch, pad_id, seq_length):
  function forward_step (line 127) | def forward_step(
  function get_token_stream (line 158) | def get_token_stream(
  function switch (line 212) | def switch(val1, val2, boolean):
  function sample_sequence_batch (line 217) | def sample_sequence_batch(

FILE: deployment/server_gradio.py
  function model_provider (line 14) | def model_provider(args):
  function add_code_generation_args (line 27) | def add_code_generation_args(parser):
  function main (line 85) | def main():

FILE: tests/test_inference.py
  function model_provider (line 12) | def model_provider(args):
  function add_code_generation_args (line 26) | def add_code_generation_args(parser):
  function main (line 118) | def main():

FILE: tests/test_inference_megatron.py
  function set_random_seed (line 17) | def set_random_seed(seed):
  function model_provider (line 26) | def model_provider(pre_process=True, post_process=True):
  function add_code_generation_args (line 36) | def add_code_generation_args(parser):
  function main (line 136) | def main():

FILE: tests/test_inference_oneflow.py
  function model_provider (line 17) | def model_provider(args):
  function add_code_generation_args (line 31) | def add_code_generation_args(parser):
  function main (line 119) | def main():

FILE: tests/test_inference_paddle.py
  function model_provider (line 15) | def model_provider(args):
  function add_code_generation_args (line 41) | def add_code_generation_args(parser):
  function main (line 129) | def main():