SYMBOL INDEX (1224 symbols across 114 files) FILE: api/codegeex-api-example-java/src/main/java/cn/aminer/codegeex/example/CodeGenerationExample.java class CodeGenerationExample (line 15) | public class CodeGenerationExample { method main (line 22) | public static void main(String[] args) throws Exception { method generateCode (line 35) | public void generateCode(String prompt) throws Exception { method performHttpPost (line 50) | public String performHttpPost(String url, String payload) { FILE: api/codegeex-api-example-java/src/main/java/cn/aminer/codegeex/example/pojo/Payload.java class Payload (line 14) | @JsonIgnoreProperties(ignoreUnknown = true) FILE: api/codegeex-api-example-python/generation_example.py function main (line 33) | def main(): FILE: codegeex/__init__.py function get_model (line 8) | def get_model( function generate (line 15) | def generate( FILE: codegeex/benchmark/evaluate_humaneval_x.py function process_humaneval_test (line 27) | def process_humaneval_test(sample, problems, example_test=False): function stream_jsonl_all (line 85) | def stream_jsonl_all(filename: str) -> Iterable[Dict]: function evaluate_functional_correctness (line 99) | def evaluate_functional_correctness( function main (line 234) | def main(): FILE: codegeex/benchmark/execution.py function dicts_to_jsonl (line 15) | def dicts_to_jsonl(data_list: list, filename: str, compress: bool = True... function check_correctness (line 44) | def check_correctness( function time_limit (line 409) | def time_limit(seconds: float): function swallow_io (line 422) | def swallow_io(): function create_tempdir (line 431) | def create_tempdir(): class TimeoutException (line 437) | class TimeoutException(Exception): class WriteOnlyStringIO (line 441) | class WriteOnlyStringIO(io.StringIO): method read (line 444) | def read(self, *args, **kwargs): method readline (line 447) | def readline(self, *args, **kwargs): method readlines (line 450) | def readlines(self, *args, **kwargs): method readable (line 453) | def readable(self, *args, **kwargs): class redirect_stdin (line 458) | class redirect_stdin(contextlib._RedirectStream): # type: ignore function chdir (line 463) | def chdir(root): function reliability_guard (line 477) | def reliability_guard(maximum_memory_bytes: Optional[int] = None): FILE: codegeex/benchmark/gather_output.py function gather_output (line 7) | def gather_output( function main (line 51) | def main(): FILE: codegeex/benchmark/humaneval-x/cpp/evaluation/test.cpp function separate_paren_groups (line 13) | vector separate_paren_groups(string paren_string){ function issame (line 43) | bool issame(vector a,vectorb){ function main (line 51) | int main(){ FILE: codegeex/benchmark/humaneval-x/evaluate_humaneval_x.py function process_humaneval_test (line 27) | def process_humaneval_test(sample, problems, example_test=False): function stream_jsonl_all (line 85) | def stream_jsonl_all(filename: str) -> Iterable[Dict]: function evaluate_functional_correctness (line 99) | def evaluate_functional_correctness( function main (line 234) | def main(): FILE: codegeex/benchmark/humaneval-x/generate_humaneval_x.py function add_code_generation_args (line 21) | def add_code_generation_args(parser): function main (line 202) | def main(node_rank: int, local_rank: int, master_port: int, num_devices:... function server (line 250) | def server(): FILE: codegeex/benchmark/humaneval-x/translate_humaneval_x.py function add_code_generate_args (line 19) | def add_code_generate_args(parser): function main (line 228) | def main(node_rank: int, local_rank: int, master_port: int, num_devices:... function server (line 274) | def server(): FILE: codegeex/benchmark/inspect_result.py function inspect_result (line 73) | def inspect_result( function main (line 281) | def main(): FILE: codegeex/benchmark/metric.py function estimate_pass_at_k (line 27) | def estimate_pass_at_k( FILE: codegeex/benchmark/utils.py function read_dataset (line 51) | def read_dataset( function read_translation_dataset (line 69) | def read_translation_dataset( function process_extra_prompt (line 102) | def process_extra_prompt(prompt: str, language_type: str = None) -> str: function is_code_generation_finished (line 115) | def is_code_generation_finished( function cleanup_code (line 151) | def cleanup_code( FILE: codegeex/data/data_utils.py function stream_jsonl (line 67) | def stream_jsonl(filename: str) -> Iterable[Dict]: function write_jsonl (line 84) | def write_jsonl(filename: str, data: Iterable[Dict], append: bool = False): function sliding_window (line 104) | def sliding_window( FILE: codegeex/data/process_pretrain_dataset.py function try_format_code (line 19) | def try_format_code(code: str): function load_pretrain_dataset (line 32) | def load_pretrain_dataset(dataset_path: Union[str, List[str]]) -> Dict: function process_sample (line 49) | def process_sample( function generate_prompt_samples (line 72) | def generate_prompt_samples( function main (line 81) | def main( FILE: codegeex/data/processor.py class PromptDatasetProcessor (line 8) | class PromptDatasetProcessor(object): method __init__ (line 9) | def __init__( method pad_seq (line 34) | def pad_seq(self, prompt_tokens: List[int], code_tokens: List[int], ex... method process_sample (line 48) | def process_sample(self, sample: PromptSample) -> Iterable[Dict[str, L... method process_sample_strict (line 66) | def process_sample_strict(self, sample: PromptSample) -> List[Dict[str... method process_sample_ (line 75) | def process_sample_(self, sample) -> List[Dict[str, List[int]]]: method report (line 79) | def report(self): class LabelDatasetProcessor (line 88) | class LabelDatasetProcessor(object): method __init__ (line 89) | def __init__( method pad_seq (line 114) | def pad_seq(self, prompt_tokens: List[int], label: int, extra: dict = ... method process_sample (line 128) | def process_sample(self, sample: LabelSample) -> Iterable[Dict[str, Li... method process_sample_strict (line 143) | def process_sample_strict(self, sample: LabelSample) -> List[Dict[str,... method process_sample_ (line 152) | def process_sample_(self, sample) -> List[Dict[str, List[int]]]: method report (line 156) | def report(self): FILE: codegeex/data/types.py class PromptSample (line 6) | class PromptSample: class LabelSample (line 15) | class LabelSample: FILE: codegeex/kernels/__init__.py class Kernel (line 11) | class Kernel: method __init__ (line 12) | def __init__(self, filename: str, function_names: List[str]): function compress_int4_weight (line 37) | def compress_int4_weight(weight: torch.Tensor): # (n, m) function extract_weight_to_half (line 58) | def extract_weight_to_half(weight: torch.Tensor, scale_list: torch.Tenso... FILE: codegeex/megatron/__init__.py function print_rank_0 (line 28) | def print_rank_0(message): function is_last_rank (line 37) | def is_last_rank(): function print_rank_last (line 41) | def print_rank_last(message): FILE: codegeex/megatron/arguments.py function parse_args (line 25) | def parse_args(extra_args_provider=None, defaults={}, ignore_unknown_arg... function _print_args (line 302) | def _print_args(args): function _check_arg_is_not_none (line 315) | def _check_arg_is_not_none(args, arg): function _add_network_size_args (line 319) | def _add_network_size_args(parser): function _add_logging_args (line 426) | def _add_logging_args(parser): function _add_regularization_args (line 495) | def _add_regularization_args(parser): function _add_training_args (line 589) | def _add_training_args(parser): function _add_initialization_args (line 748) | def _add_initialization_args(parser): function _add_inference_args (line 773) | def _add_inference_args(parser): function _add_learning_rate_args (line 806) | def _add_learning_rate_args(parser): function _add_checkpointing_args (line 898) | def _add_checkpointing_args(parser): function _add_mixed_precision_args (line 973) | def _add_mixed_precision_args(parser): function _add_distributed_args (line 1040) | def _add_distributed_args(parser): function _add_validation_args (line 1128) | def _add_validation_args(parser): function _add_data_args (line 1152) | def _add_data_args(parser): function _add_autoresume_args (line 1298) | def _add_autoresume_args(parser): function _add_biencoder_args (line 1316) | def _add_biencoder_args(parser): function _add_vit_args (line 1425) | def _add_vit_args(parser): function _add_zero_args (line 1453) | def _add_zero_args(parser): function _add_memoryopt_args (line 1485) | def _add_memoryopt_args(parser): function _add_activation_checkpoint_args (line 1520) | def _add_activation_checkpoint_args(parser): FILE: codegeex/megatron/checkpointing.py function set_checkpoint_version (line 31) | def set_checkpoint_version(value): function get_checkpoint_version (line 38) | def get_checkpoint_version(): function check_checkpoint_args (line 43) | def check_checkpoint_args(checkpoint_args): function ensure_directory_exists (line 75) | def ensure_directory_exists(filename): function get_checkpoint_name (line 82) | def get_checkpoint_name(checkpoints_path, iteration, release=False): function get_checkpoint_tracker_filename (line 104) | def get_checkpoint_tracker_filename(checkpoints_path): function save_checkpoint (line 110) | def save_checkpoint(iteration, model, optimizer, lr_scheduler): function _transpose_first_dim (line 205) | def _transpose_first_dim(t, num_splits, num_splits_first, model): function fix_query_key_value_ordering (line 249) | def fix_query_key_value_ordering(model, checkpoint_version): function load_deepspeed_state (line 282) | def load_deepspeed_state(model): function load_checkpoint (line 301) | def load_checkpoint( function load_biencoder_checkpoint (line 508) | def load_biencoder_checkpoint( FILE: codegeex/megatron/code_generation_utils.py function get_batch (line 35) | def get_batch(context_tokens, micro_batch_size=None): function get_batch_ (line 56) | def get_batch_(context_tokens): function top_k_logits (line 75) | def top_k_logits(logits, top_k=0, top_p=0.0, filter_value=-float("Inf")): function generate_samples_input_from_file (line 105) | def generate_samples_input_from_file(model): function generate_samples_eval (line 225) | def generate_samples_eval(model, context, max_gen_length, eos_token_id): function generate_samples_interactive_code_contest (line 252) | def generate_samples_interactive_code_contest(model, print_frequency=10): function generate_samples_interactive (line 377) | def generate_samples_interactive(model, print_frequency=24): function generate_samples_unconditional (line 487) | def generate_samples_unconditional(model): function generate_and_write_samples_unconditional (line 528) | def generate_and_write_samples_unconditional(model): function pad_batch (line 540) | def pad_batch(batch, pad_id, args): function topk_sampling (line 550) | def topk_sampling(logits: torch.FloatTensor, num_samples: int): function nuclear_sampling (line 566) | def nuclear_sampling(logits: torch.FloatTensor, temperature: float, top_... function sample_topk_tokens (line 579) | def sample_topk_tokens(model, function nuclear_sample_tokens (line 600) | def nuclear_sample_tokens(model, class Beam (line 621) | class Beam: method __repr__ (line 625) | def __repr__(self): method get_code (line 628) | def get_code(self): function expand_beams (line 632) | def expand_beams(beams: List[Beam], num_beams: int, model) -> List[Beam]: function beam_search (line 661) | def beam_search(model, context_tokens, num_beams: int): class Handle (line 727) | class Handle: method __repr__ (line 731) | def __repr__(self): method is_finished (line 734) | def is_finished(self): method derived (line 737) | def derived(self, new_token: int, log_prob: float): function expand_handles (line 742) | def expand_handles(handles: List[Handle], temperature: float, top_p: flo... function generate_nuclear_sampling (line 771) | def generate_nuclear_sampling(model, context_tokens, num_samples: int, t... function forward_step (line 801) | def forward_step( function get_token_stream (line 841) | def get_token_stream( function switch (line 900) | def switch(val1, val2, boolean): function sample_sequence_batch (line 905) | def sample_sequence_batch( FILE: codegeex/megatron/convert_ckpt_parallel.py function get_change_ckpt_args (line 8) | def get_change_ckpt_args(parser): function get_element_from_dict_by_path (line 33) | def get_element_from_dict_by_path(d, path): function main (line 48) | def main(): FILE: codegeex/megatron/data/blendable_dataset.py class BlendableDataset (line 25) | class BlendableDataset(torch.utils.data.Dataset): method __init__ (line 26) | def __init__(self, datasets, weights): method __len__ (line 63) | def __len__(self): method __getitem__ (line 66) | def __getitem__(self, idx): FILE: codegeex/megatron/data/data_samplers.py function build_pretraining_data_loader (line 24) | def build_pretraining_data_loader(dataset, consumed_samples): class MegatronPretrainingSampler (line 62) | class MegatronPretrainingSampler: method __init__ (line 63) | def __init__( method __len__ (line 99) | def __len__(self): method get_start_end_idx (line 102) | def get_start_end_idx(self): method __iter__ (line 107) | def __iter__(self): class MegatronPretrainingRandomSampler (line 123) | class MegatronPretrainingRandomSampler: method __init__ (line 124) | def __init__( method __len__ (line 157) | def __len__(self): method __iter__ (line 160) | def __iter__(self): FILE: codegeex/megatron/data/dataset_utils.py function get_datasets_weights_and_num_samples (line 33) | def get_datasets_weights_and_num_samples(data_prefix, train_valid_test_n... function compile_helper (line 66) | def compile_helper(): function get_a_and_b_segments (line 81) | def get_a_and_b_segments(sample, np_rng): function truncate_segments (line 113) | def truncate_segments(tokens_a, tokens_b, len_a, len_b, max_num_tokens, ... function create_tokens_and_tokentypes (line 133) | def create_tokens_and_tokentypes(tokens_a, tokens_b, cls_id, sep_id): function is_start_piece (line 163) | def is_start_piece(piece): function create_masked_lm_predictions (line 172) | def create_masked_lm_predictions( function pad_and_convert_to_numpy (line 396) | def pad_and_convert_to_numpy( function get_indexed_dataset_ (line 429) | def get_indexed_dataset_(data_prefix, data_impl, skip_warmup): function get_train_valid_test_split_ (line 450) | def get_train_valid_test_split_(splits_string, size): function get_samples_mapping (line 477) | def get_samples_mapping( FILE: codegeex/megatron/data/helpers.cpp function build_blending_indices (line 36) | void build_blending_indices(py::array_t& dataset_index, function build_sample_idx (line 99) | py::array build_sample_idx(const py::array_t& sizes_, function get_target_sample_len (line 188) | inline int32_t get_target_sample_len(const int32_t short_seq_ratio, function build_mapping_impl (line 204) | py::array build_mapping_impl(const py::array_t& docs_, function build_mapping (line 440) | py::array build_mapping(const py::array_t& docs_, function build_blocks_mapping_impl (line 470) | py::array build_blocks_mapping_impl(const py::array_t& docs_, function build_blocks_mapping (line 687) | py::array build_blocks_mapping(const py::array_t& docs_, function PYBIND11_MODULE (line 712) | PYBIND11_MODULE(helpers, m) { FILE: codegeex/megatron/data/indexed_dataset.py function __best_fitting_dtype (line 26) | def __best_fitting_dtype(vocab_size=None): function get_available_dataset_impl (line 33) | def get_available_dataset_impl(): function infer_dataset_impl (line 37) | def infer_dataset_impl(path): function make_builder (line 55) | def make_builder(out_file, impl, vocab_size=None): function make_dataset (line 64) | def make_dataset(path, impl, skip_warmup=False): function dataset_exists (line 83) | def dataset_exists(path, impl): function read_longs (line 90) | def read_longs(f, n): function write_longs (line 96) | def write_longs(f, a): function __best_fitting_dtype (line 112) | def __best_fitting_dtype(vocab_size=None): function make_mmap_builder (line 119) | def make_mmap_builder(out_file, vocab_size=None): function code (line 125) | def code(dtype): function index_file_path (line 132) | def index_file_path(prefix_path): function data_file_path (line 136) | def data_file_path(prefix_path): function create_doc_idx (line 140) | def create_doc_idx(sizes): class IndexedDataset (line 148) | class IndexedDataset(torch.utils.data.Dataset): method __init__ (line 153) | def __init__(self, path): method read_index (line 159) | def read_index(self, path): method read_data (line 177) | def read_data(self, path): method check_index (line 180) | def check_index(self, i): method __del__ (line 184) | def __del__(self): method __getitem__ (line 189) | def __getitem__(self, idx): method __len__ (line 213) | def __len__(self): method num_tokens (line 216) | def num_tokens(self, index): method size (line 219) | def size(self, index): method exists (line 223) | def exists(path): method supports_prefetch (line 229) | def supports_prefetch(self): class IndexedCachedDataset (line 233) | class IndexedCachedDataset(IndexedDataset): method __init__ (line 234) | def __init__(self, path): method supports_prefetch (line 240) | def supports_prefetch(self): method prefetch (line 243) | def prefetch(self, indices): method __getitem__ (line 268) | def __getitem__(self, idx): class IndexedDatasetBuilder (line 285) | class IndexedDatasetBuilder(object): method __init__ (line 296) | def __init__(self, out_file, dtype=np.int32): method add_item (line 305) | def add_item(self, tensor): method end_document (line 312) | def end_document(self): method merge_file_ (line 315) | def merge_file_(self, another_file): method finalize (line 335) | def finalize(self, index_file): function _warmup_mmap_file (line 350) | def _warmup_mmap_file(path): class MMapIndexedDataset (line 356) | class MMapIndexedDataset(torch.utils.data.Dataset): class Index (line 357) | class Index(object): method writer (line 361) | def writer(cls, path, dtype): method __init__ (line 406) | def __init__(self, path, skip_warmup=False): method __del__ (line 442) | def __del__(self): method dtype (line 447) | def dtype(self): method sizes (line 451) | def sizes(self): method doc_idx (line 455) | def doc_idx(self): method __getitem__ (line 459) | def __getitem__(self, i): method __len__ (line 462) | def __len__(self): method __init__ (line 465) | def __init__(self, path, skip_warmup=False): method __getstate__ (line 474) | def __getstate__(self): method __setstate__ (line 477) | def __setstate__(self, state): method _do_init (line 480) | def _do_init(self, path, skip_warmup): method __del__ (line 494) | def __del__(self): method __len__ (line 499) | def __len__(self): method __getitem__ (line 503) | def __getitem__(self, idx): method get (line 524) | def get(self, idx, offset=0, length=None): method sizes (line 540) | def sizes(self): method doc_idx (line 544) | def doc_idx(self): method get_doc_idx (line 547) | def get_doc_idx(self): method set_doc_idx (line 550) | def set_doc_idx(self, doc_idx_): method supports_prefetch (line 554) | def supports_prefetch(self): method exists (line 558) | def exists(path): class MMapIndexedDatasetBuilder (line 564) | class MMapIndexedDatasetBuilder(object): method __init__ (line 565) | def __init__(self, out_file, dtype=np.int64): method add_item (line 571) | def add_item(self, tensor): method end_document (line 576) | def end_document(self): method merge_file_ (line 579) | def merge_file_(self, another_file): method finalize (line 591) | def finalize(self, index_file): FILE: codegeex/megatron/data/prompt_dataset.py function build_train_valid_test_datasets (line 31) | def build_train_valid_test_datasets( function _build_train_valid_test_datasets (line 96) | def _build_train_valid_test_datasets( function get_indexed_dataset_ (line 164) | def get_indexed_dataset_(data_prefix, data_impl, skip_warmup): class PromptDataset (line 179) | class PromptDataset(torch.utils.data.Dataset): method __init__ (line 180) | def __init__( method __len__ (line 229) | def __len__(self): method __getitem__ (line 234) | def __getitem__(self, idx): function _build_index_mappings (line 253) | def _build_index_mappings( function _num_epochs (line 314) | def _num_epochs(samples_per_epoch, num_samples): function _build_doc_idx (line 319) | def _build_doc_idx(documents, num_epochs, np_rng, separate_last_epoch): FILE: codegeex/megatron/enums.py class LayerType (line 19) | class LayerType(enum.Enum): class AttnType (line 24) | class AttnType(enum.Enum): class AttnMaskType (line 29) | class AttnMaskType(enum.Enum): FILE: codegeex/megatron/global_vars.py function get_args (line 36) | def get_args(): function get_num_microbatches (line 42) | def get_num_microbatches(): function get_current_global_batch_size (line 46) | def get_current_global_batch_size(): function update_num_microbatches (line 50) | def update_num_microbatches(consumed_samples, consistency_check=True): function get_tokenizer (line 54) | def get_tokenizer(): function get_tensorboard_writer (line 60) | def get_tensorboard_writer(): function get_adlr_autoresume (line 66) | def get_adlr_autoresume(): function get_timers (line 72) | def get_timers(): function set_global_variables (line 78) | def set_global_variables( function _parse_args (line 95) | def _parse_args(extra_args_provider=None, defaults={}, ignore_unknown_ar... function _build_num_microbatches_calculator (line 107) | def _build_num_microbatches_calculator(args): function _build_tokenizer (line 117) | def _build_tokenizer(args): function rebuild_tokenizer (line 125) | def rebuild_tokenizer(args): function _set_tensorboard_writer (line 131) | def _set_tensorboard_writer(args): function _set_adlr_autoresume (line 157) | def _set_adlr_autoresume(args): function _set_timers (line 175) | def _set_timers(): function _ensure_var_is_initialized (line 182) | def _ensure_var_is_initialized(var, name): function _ensure_var_is_not_initialized (line 187) | def _ensure_var_is_not_initialized(var, name): class _Timer (line 192) | class _Timer: method __init__ (line 195) | def __init__(self, name): method start (line 201) | def start(self): method stop (line 208) | def stop(self): method reset (line 215) | def reset(self): method elapsed (line 220) | def elapsed(self, reset=True): class Timers (line 237) | class Timers: method __init__ (line 240) | def __init__(self): method __call__ (line 243) | def __call__(self, name): method write (line 248) | def write(self, names, writer, iteration, normalizer=1.0, reset=False): method log (line 258) | def log(self, names, normalizer=1.0, reset=True): FILE: codegeex/megatron/inference.py function model_provider (line 18) | def model_provider(): function set_random_seed (line 27) | def set_random_seed(seed): function run_generation_distributed (line 35) | def run_generation_distributed(model): FILE: codegeex/megatron/initialize.py function initialize_megatron (line 44) | def initialize_megatron( function _compile_dependencies (line 105) | def _compile_dependencies(): function setup_deepspeed_random_and_activation_checkpointing (line 171) | def setup_deepspeed_random_and_activation_checkpointing(args): function _initialize_distributed (line 209) | def _initialize_distributed(): function _init_autoresume (line 282) | def _init_autoresume(): function _set_random_seed (line 291) | def _set_random_seed(seed_): function write_args_to_tensorboard (line 305) | def write_args_to_tensorboard(): function initialize_wandb_experiment (line 314) | def initialize_wandb_experiment(): function _initialize_mem_buffs (line 331) | def _initialize_mem_buffs(): FILE: codegeex/megatron/learning_rates.py class AnnealingLR (line 23) | class AnnealingLR(object): method __init__ (line 26) | def __init__( method get_lr (line 70) | def get_lr(self): method step (line 116) | def step(self, increment, token_num=None): method state_dict (line 127) | def state_dict(self): method _check_and_set (line 140) | def _check_and_set(self, cls_value, sd_value, name): method load_state_dict (line 155) | def load_state_dict(self, sd): FILE: codegeex/megatron/memory.py function allocate_mem_buff (line 24) | def allocate_mem_buff(name, numel, dtype, track_usage): function get_mem_buff (line 31) | def get_mem_buff(name): class MemoryBuffer (line 36) | class MemoryBuffer: method __init__ (line 49) | def __init__(self, name, numel, dtype, track_usage): method reset (line 78) | def reset(self): method is_in_use (line 82) | def is_in_use(self): method numel_in_use (line 86) | def numel_in_use(self): method add (line 90) | def add(self, tensor): method get_data (line 114) | def get_data(self): method print_average_usage (line 121) | def print_average_usage(self): class RingMemBuffer (line 134) | class RingMemBuffer: method __init__ (line 137) | def __init__(self, name, num_buffers, numel, dtype, track_usage): method get_next_buffer (line 145) | def get_next_buffer(self): FILE: codegeex/megatron/merge_ckpt_parallel.py function get_change_ckpt_args (line 13) | def get_change_ckpt_args(parser): function main (line 43) | def main(): FILE: codegeex/megatron/microbatches.py function build_num_microbatches_calculator (line 22) | def build_num_microbatches_calculator(args): class NumMicroBatchesCalculator (line 70) | class NumMicroBatchesCalculator(ABC): method __init__ (line 71) | def __init__(self): method get (line 75) | def get(self): method get_current_global_batch_size (line 78) | def get_current_global_batch_size(self): method update (line 82) | def update(self, consumed_samples, consistency_check): class ConstantNumMicroBatches (line 86) | class ConstantNumMicroBatches(NumMicroBatchesCalculator): method __init__ (line 87) | def __init__(self, global_batch_size, micro_batch_size, data_parallel_... method update (line 99) | def update(self, consumed_samples, consistency_check): class RampupBatchsizeNumMicroBatches (line 103) | class RampupBatchsizeNumMicroBatches(NumMicroBatchesCalculator): method __init__ (line 104) | def __init__( method update (line 159) | def update(self, consumed_samples, consistency_check): FILE: codegeex/megatron/mindspore_to_megatron.py function get_change_ckpt_args (line 34) | def get_change_ckpt_args(parser): function loadModelFromNp (line 53) | def loadModelFromNp(sd, args): function loadEmbeddingFromNp (line 73) | def loadEmbeddingFromNp(npCkptPath, languageModel, vocabSize=52224): function loadAttentionLayerFromNp (line 90) | def loadAttentionLayerFromNp(npCkptPath, transformer, layerID): function loadQueryLayerFromNp (line 199) | def loadQueryLayerFromNp(npCkptPath, transformer): function main (line 290) | def main(): FILE: codegeex/megatron/model/codegeex_model.py class CodeGeeXModel (line 27) | class CodeGeeXModel(MegatronModule): method __init__ (line 30) | def __init__(self, num_tokentypes=0, parallel_output=False): method set_input_tensor (line 44) | def set_input_tensor(self, input_tensor): method forward (line 48) | def forward( method state_dict_for_save_checkpoint (line 99) | def state_dict_for_save_checkpoint(self, destination=None, prefix='', method load_state_dict (line 108) | def load_state_dict(self, state_dict, strict=True): function CrossEntropy (line 116) | def CrossEntropy(output, labels): class CodeGeeXModelPipe (line 127) | class CodeGeeXModelPipe(PipelineModule, MegatronModule): method __init__ (line 130) | def __init__(self, num_tokentypes=0, parallel_output=True): FILE: codegeex/megatron/model/distributed.py class MemoryBuffer (line 26) | class MemoryBuffer: method __init__ (line 27) | def __init__(self, numel, dtype): method zero (line 37) | def zero(self): method get (line 41) | def get(self, shape, start_index): class DistributedDataParallelBase (line 51) | class DistributedDataParallelBase(MegatronModule, ABC): method __init__ (line 54) | def __init__(self, module): method allreduce_gradients (line 60) | def allreduce_gradients(self): method forward (line 63) | def forward(self, *inputs, **kwargs): method state_dict (line 66) | def state_dict(self, destination=None, prefix="", keep_vars=False): method state_dict_for_save_checkpoint (line 69) | def state_dict_for_save_checkpoint( method load_state_dict (line 76) | def load_state_dict(self, state_dict, strict=True): class DistributedDataParallel (line 80) | class DistributedDataParallel(DistributedDataParallelBase): method __init__ (line 96) | def __init__( method _make_param_hook (line 162) | def _make_param_hook(self, param): method zero_grad_buffer (line 175) | def zero_grad_buffer(self): method allreduce_gradients (line 182) | def allreduce_gradients(self): FILE: codegeex/megatron/model/language_model.py function get_shrink_embedding_gradient_alpha (line 29) | def get_shrink_embedding_gradient_alpha(iteration): function parallel_lm_logits (line 46) | def parallel_lm_logits(input_, word_embeddings_weight, parallel_output, ... function get_language_model (line 73) | def get_language_model( class Embedding (line 100) | class Embedding(MegatronModule): method __init__ (line 114) | def __init__( method add_tokentype_embeddings (line 164) | def add_tokentype_embeddings(self, num_tokentypes): method forward (line 180) | def forward(self, input_ids, position_ids, tokentype_ids=None): method state_dict_for_save_checkpoint (line 196) | def state_dict_for_save_checkpoint( method load_state_dict (line 214) | def load_state_dict(self, state_dict, strict=True): class EmbeddingPipe (line 273) | class EmbeddingPipe(Embedding): method forward (line 274) | def forward(self, inputs, **kwargs): method word_embeddings_weight (line 302) | def word_embeddings_weight(self): class QueryEmbedding (line 307) | class QueryEmbedding(MegatronModule): method __init__ (line 321) | def __init__(self, method add_tokentype_embeddings (line 360) | def add_tokentype_embeddings(self, num_tokentypes): method forward (line 376) | def forward(self, position_ids, tokentype_ids=None): method state_dict_for_save_checkpoint (line 391) | def state_dict_for_save_checkpoint(self, destination=None, prefix='', method load_state_dict (line 406) | def load_state_dict(self, state_dict, strict=True): class QueryEmbeddingPipe (line 448) | class QueryEmbeddingPipe(QueryEmbedding): method forward (line 449) | def forward(self, inputs, **kwargs): method word_embeddings_weight (line 476) | def word_embeddings_weight(self): class TransformerLanguageModel (line 481) | class TransformerLanguageModel(MegatronModule): method __init__ (line 500) | def __init__(self, method set_input_tensor (line 537) | def set_input_tensor(self, input_tensor): method forward (line 541) | def forward( method state_dict_for_save_checkpoint (line 572) | def state_dict_for_save_checkpoint(self, destination=None, prefix='', method load_state_dict (line 593) | def load_state_dict(self, state_dict, strict=True): FILE: codegeex/megatron/model/module.py function param_is_not_shared (line 31) | def param_is_not_shared(param): class MegatronModule (line 35) | class MegatronModule(torch.nn.Module): method __init__ (line 39) | def __init__(self, share_word_embeddings=True): method state_dict_for_save_checkpoint (line 43) | def state_dict_for_save_checkpoint( method word_embeddings_weight (line 50) | def word_embeddings_weight(self): method initialize_word_embeddings (line 64) | def initialize_word_embeddings(self, init_method_normal): function conversion_helper (line 120) | def conversion_helper(val, conversion): function fp32_to_float16 (line 131) | def fp32_to_float16(val, float16_convertor): function float16_to_fp32 (line 145) | def float16_to_fp32(val): class Float16Module (line 159) | class Float16Module(MegatronModule): method __init__ (line 160) | def __init__(self, module, args): method forward (line 180) | def forward(self, *inputs, **kwargs): method state_dict (line 188) | def state_dict(self, destination=None, prefix="", keep_vars=False): method state_dict_for_save_checkpoint (line 191) | def state_dict_for_save_checkpoint( method load_state_dict (line 198) | def load_state_dict(self, state_dict, strict=True): FILE: codegeex/megatron/model/transformer.py class ParallelMLP (line 55) | class ParallelMLP(MegatronModule): method __init__ (line 64) | def __init__( method forward (line 93) | def forward(self, hidden_states): class ParallelSelfAttention (line 103) | class ParallelSelfAttention(MegatronModule): method __init__ (line 110) | def __init__(self, init_method, method forward (line 164) | def forward( class ParallelTopQuerySelfAttention (line 325) | class ParallelTopQuerySelfAttention(MegatronModule): method __init__ (line 332) | def __init__(self, init_method, method forward (line 387) | def forward( function bias_dropout_add (line 547) | def bias_dropout_add(x, bias, residual, prob, training): function get_bias_dropout_add (line 554) | def get_bias_dropout_add(training): function bias_dropout_add_fused_train (line 562) | def bias_dropout_add_fused_train(x, bias, residual, prob): function bias_dropout_add_fused_inference (line 568) | def bias_dropout_add_fused_inference(x, bias, residual, prob): class ParallelTransformerLayer (line 573) | class ParallelTransformerLayer(MegatronModule): method __init__ (line 580) | def __init__(self, init_method, method forward (line 619) | def forward( class ParallelTransformerLayerPipe (line 695) | class ParallelTransformerLayerPipe(ParallelTransformerLayer): method forward (line 716) | def forward(self, inputs, **kwargs): class ParallelTopQueryLayer (line 732) | class ParallelTopQueryLayer(MegatronModule): method __init__ (line 739) | def __init__(self, init_method, method forward (line 776) | def forward( class ParallelTopQueryLayerPipe (line 857) | class ParallelTopQueryLayerPipe(ParallelTopQueryLayer): method forward (line 878) | def forward(self, inputs, **kwargs): class ParallelTransformer (line 895) | class ParallelTransformer(MegatronModule): method __init__ (line 898) | def __init__(self, init_method, output_layer_init_method): method _get_layer_index (line 943) | def _get_layer_index(self, layer_number): method _get_layer (line 950) | def _get_layer(self, layer_number): method _checkpointed_forward (line 953) | def _checkpointed_forward(self, hidden_states, attention_mask): method set_input_tensor (line 977) | def set_input_tensor(self, input_tensor): method forward (line 987) | def forward( FILE: codegeex/megatron/model/utils.py function init_method_normal (line 22) | def init_method_normal(sigma): function scaled_init_method_normal (line 31) | def scaled_init_method_normal(sigma, num_layers): function attention_mask_func (line 41) | def attention_mask_func(attention_scores, attention_mask): function get_linear_layer (line 47) | def get_linear_layer(rows, columns, init_method): function fast_gelu (line 56) | def fast_gelu(x): function gelu_impl (line 62) | def gelu_impl(x): function openai_gelu (line 69) | def openai_gelu(x): function erf_gelu (line 75) | def erf_gelu(x): FILE: codegeex/megatron/mpu/cross_entropy.py class _VocabParallelCrossEntropy (line 25) | class _VocabParallelCrossEntropy(torch.autograd.Function): method forward (line 27) | def forward(ctx, vocab_parallel_logits, target): method backward (line 92) | def backward(ctx, grad_output): function vocab_parallel_cross_entropy (line 113) | def vocab_parallel_cross_entropy(vocab_parallel_logits, target): FILE: codegeex/megatron/mpu/data.py function _check_data_types (line 26) | def _check_data_types(keys, data, target_dtype): function _build_key_size_numel_dictionaries (line 36) | def _build_key_size_numel_dictionaries(keys, data): function broadcast_data (line 82) | def broadcast_data(keys, data, datatype): FILE: codegeex/megatron/mpu/initialize.py function is_unitialized (line 49) | def is_unitialized(): function initialize_model_parallel (line 54) | def initialize_model_parallel( function model_parallel_is_initialized (line 184) | def model_parallel_is_initialized(): function get_model_parallel_group (line 195) | def get_model_parallel_group(): function get_tensor_model_parallel_group (line 201) | def get_tensor_model_parallel_group(): function get_pipeline_model_parallel_group (line 209) | def get_pipeline_model_parallel_group(): function get_data_parallel_group (line 217) | def get_data_parallel_group(): function get_embedding_group (line 223) | def get_embedding_group(): function set_tensor_model_parallel_world_size (line 229) | def set_tensor_model_parallel_world_size(world_size): function set_pipeline_model_parallel_world_size (line 235) | def set_pipeline_model_parallel_world_size(world_size): function get_tensor_model_parallel_world_size (line 241) | def get_tensor_model_parallel_world_size(): function get_model_parallel_world_size (line 249) | def get_model_parallel_world_size(): function get_pipeline_model_parallel_world_size (line 256) | def get_pipeline_model_parallel_world_size(): function set_tensor_model_parallel_rank (line 264) | def set_tensor_model_parallel_rank(rank): function set_pipeline_model_parallel_rank (line 270) | def set_pipeline_model_parallel_rank(rank): function get_tensor_model_parallel_rank (line 276) | def get_tensor_model_parallel_rank(): function get_model_parallel_rank (line 284) | def get_model_parallel_rank(): function get_pipeline_model_parallel_rank (line 291) | def get_pipeline_model_parallel_rank(): function is_pipeline_first_stage (line 299) | def is_pipeline_first_stage(ignore_virtual=False): function is_pipeline_last_stage (line 310) | def is_pipeline_last_stage(ignore_virtual=False): function get_virtual_pipeline_model_parallel_rank (line 327) | def get_virtual_pipeline_model_parallel_rank(): function set_virtual_pipeline_model_parallel_rank (line 333) | def set_virtual_pipeline_model_parallel_rank(rank): function get_virtual_pipeline_model_parallel_world_size (line 339) | def get_virtual_pipeline_model_parallel_world_size(): function get_tensor_model_parallel_src_rank (line 345) | def get_tensor_model_parallel_src_rank(): function get_pipeline_model_parallel_first_rank (line 353) | def get_pipeline_model_parallel_first_rank(): function get_pipeline_model_parallel_last_rank (line 360) | def get_pipeline_model_parallel_last_rank(): function get_pipeline_model_parallel_next_rank (line 368) | def get_pipeline_model_parallel_next_rank(): function get_pipeline_model_parallel_prev_rank (line 377) | def get_pipeline_model_parallel_prev_rank(): function get_data_parallel_world_size (line 386) | def get_data_parallel_world_size(): function get_data_parallel_rank (line 391) | def get_data_parallel_rank(): function destroy_model_parallel (line 396) | def destroy_model_parallel(): FILE: codegeex/megatron/mpu/layers.py function param_is_not_tensor_parallel_duplicate (line 49) | def param_is_not_tensor_parallel_duplicate(param): function set_tensor_model_parallel_attributes (line 55) | def set_tensor_model_parallel_attributes(tensor, is_parallel, dim, stride): function set_defaults_if_not_set_tensor_model_parallel_attributes (line 65) | def set_defaults_if_not_set_tensor_model_parallel_attributes(tensor): function copy_tensor_model_parallel_attributes (line 74) | def copy_tensor_model_parallel_attributes(destination_tensor, source_ten... function _initialize_affine_weight_gpu (line 83) | def _initialize_affine_weight_gpu(weight, init_method, partition_dim, st... function _initialize_affine_weight_cpu (line 98) | def _initialize_affine_weight_cpu( class VocabParallelEmbedding (line 141) | class VocabParallelEmbedding(torch.nn.Module): method __init__ (line 152) | def __init__(self, num_embeddings, embedding_dim, init_method=init.xav... method forward (line 211) | def forward(self, input_): class ColumnParallelLinear (line 240) | class ColumnParallelLinear(torch.nn.Module): method __init__ (line 264) | def __init__( method forward (line 351) | def forward(self, input_): class RowParallelLinear (line 367) | class RowParallelLinear(torch.nn.Module): method __init__ (line 397) | def __init__( method forward (line 483) | def forward(self, input_): FILE: codegeex/megatron/mpu/mappings.py function _reduce (line 26) | def _reduce(input_): function _split (line 39) | def _split(input_): function _gather (line 58) | def _gather(input_): class _CopyToModelParallelRegion (line 82) | class _CopyToModelParallelRegion(torch.autograd.Function): method symbolic (line 86) | def symbolic(graph, input_): method forward (line 90) | def forward(ctx, input_): method backward (line 94) | def backward(ctx, grad_output): class _ReduceFromModelParallelRegion (line 98) | class _ReduceFromModelParallelRegion(torch.autograd.Function): method symbolic (line 102) | def symbolic(graph, input_): method forward (line 106) | def forward(ctx, input_): method backward (line 110) | def backward(ctx, grad_output): class _ScatterToModelParallelRegion (line 114) | class _ScatterToModelParallelRegion(torch.autograd.Function): method symbolic (line 118) | def symbolic(graph, input_): method forward (line 122) | def forward(ctx, input_): method backward (line 126) | def backward(ctx, grad_output): class _GatherFromModelParallelRegion (line 130) | class _GatherFromModelParallelRegion(torch.autograd.Function): method symbolic (line 134) | def symbolic(graph, input_): method forward (line 138) | def forward(ctx, input_): method backward (line 142) | def backward(ctx, grad_output): function copy_to_tensor_model_parallel_region (line 151) | def copy_to_tensor_model_parallel_region(input_): function reduce_from_tensor_model_parallel_region (line 155) | def reduce_from_tensor_model_parallel_region(input_): function scatter_to_tensor_model_parallel_region (line 159) | def scatter_to_tensor_model_parallel_region(input_): function gather_from_tensor_model_parallel_region (line 163) | def gather_from_tensor_model_parallel_region(input_): FILE: codegeex/megatron/mpu/random.py function init_checkpointed_activations_memory_buffer (line 44) | def init_checkpointed_activations_memory_buffer(): function reset_checkpointed_activations_memory_buffer (line 72) | def reset_checkpointed_activations_memory_buffer(): function _set_cuda_rng_state (line 78) | def _set_cuda_rng_state(new_state, device=-1): function split_tensor_into_1d_equal_chunks (line 112) | def split_tensor_into_1d_equal_chunks(tensor): function gather_split_1d_tensor (line 121) | def gather_split_1d_tensor(tensor): class CudaRNGStatesTracker (line 139) | class CudaRNGStatesTracker: method __init__ (line 148) | def __init__(self): method reset (line 154) | def reset(self): method get_states (line 159) | def get_states(self): method set_states (line 167) | def set_states(self, states): method add (line 172) | def add(self, name, seed): method fork (line 190) | def fork(self, name=_MODEL_PARALLEL_RNG_TRACKER_NAME): function get_cuda_rng_tracker (line 215) | def get_cuda_rng_tracker(): function model_parallel_cuda_manual_seed (line 220) | def model_parallel_cuda_manual_seed(seed): class CheckpointFunction (line 265) | class CheckpointFunction(torch.autograd.Function): method forward (line 274) | def forward(ctx, run_function, *args): method backward (line 298) | def backward(ctx, *args): function checkpoint (line 339) | def checkpoint(function, *args): FILE: codegeex/megatron/mpu/utils.py function ensure_divisibility (line 20) | def ensure_divisibility(numerator, denominator): function divide (line 27) | def divide(numerator, denominator): function split_tensor_along_last_dim (line 34) | def split_tensor_along_last_dim(tensor, num_partitions, contiguous_split... class VocabUtility (line 54) | class VocabUtility: method vocab_range_from_per_partition_vocab_size (line 60) | def vocab_range_from_per_partition_vocab_size( method vocab_range_from_global_vocab_size (line 68) | def vocab_range_from_global_vocab_size(global_vocab_size, rank, world_... FILE: codegeex/megatron/optimizer/__init__.py function _get_params_for_weight_decay_optimization (line 26) | def _get_params_for_weight_decay_optimization(modules): function get_megatron_optimizer (line 58) | def get_megatron_optimizer(model): FILE: codegeex/megatron/optimizer/clip_grads.py function clip_grad_norm_fp32 (line 29) | def clip_grad_norm_fp32(parameters, max_norm, norm_type=2): function count_zeros_fp32 (line 126) | def count_zeros_fp32(parameters): FILE: codegeex/megatron/optimizer/grad_scaler.py class MegatronGradScaler (line 24) | class MegatronGradScaler(ABC): method __init__ (line 25) | def __init__(self, initial_scale): method scale (line 31) | def scale(self): method inv_scale (line 35) | def inv_scale(self): method update (line 39) | def update(self, found_inf): method state_dict (line 43) | def state_dict(self): method load_state_dict (line 47) | def load_state_dict(self, state_dict): class ConstantGradScaler (line 51) | class ConstantGradScaler(MegatronGradScaler): method update (line 52) | def update(self, found_inf): method state_dict (line 55) | def state_dict(self): method load_state_dict (line 58) | def load_state_dict(self, state_dict): class DynamicGradScaler (line 62) | class DynamicGradScaler(MegatronGradScaler): method __init__ (line 63) | def __init__( method update (line 99) | def update(self, found_inf): method state_dict (line 122) | def state_dict(self): method load_state_dict (line 129) | def load_state_dict(self, state_dict): FILE: codegeex/megatron/optimizer/optimizer.py function _zero_grad_group_helper (line 33) | def _zero_grad_group_helper(group, set_to_none): function _multi_tensor_copy_this_to_that (line 48) | def _multi_tensor_copy_this_to_that(this, that, overflow_buf=None): class MegatronOptimizer (line 62) | class MegatronOptimizer(ABC): method __init__ (line 63) | def __init__( method get_parameters (line 74) | def get_parameters(self): method clip_grad_norm (line 81) | def clip_grad_norm(self, clip_grad): method count_zeros (line 85) | def count_zeros(self): method zero_grad (line 90) | def zero_grad(self, set_to_none=True): method get_loss_scale (line 94) | def get_loss_scale(self): method scale_loss (line 98) | def scale_loss(self, loss): method step (line 103) | def step(self): method reload_model_params (line 107) | def reload_model_params(self): method state_dict (line 116) | def state_dict(self): method load_state_dict (line 120) | def load_state_dict(self, state_dict): method _get_state (line 125) | def _get_state(self): method _set_state (line 128) | def _set_state(self, value): method _get_param_groups (line 136) | def _get_param_groups(self): method _set_param_groups (line 139) | def _set_param_groups(self, value): class Float16OptimizerWithFloat16Params (line 145) | class Float16OptimizerWithFloat16Params(MegatronOptimizer): method __init__ (line 170) | def __init__( method zero_grad (line 273) | def zero_grad(self, set_to_none=True): method get_loss_scale (line 281) | def get_loss_scale(self): method _copy_model_grads_to_main_grads (line 286) | def _copy_model_grads_to_main_grads(self): method _unscale_main_grads_and_check_for_nan (line 303) | def _unscale_main_grads_and_check_for_nan(self): method _get_model_and_main_params_data_float16 (line 332) | def _get_model_and_main_params_data_float16(self): method _copy_main_params_to_model_params (line 343) | def _copy_main_params_to_model_params(self): method _copy_model_params_to_main_params (line 350) | def _copy_model_params_to_main_params(self): method reload_model_params (line 357) | def reload_model_params(self): method step (line 361) | def step(self): method state_dict (line 408) | def state_dict(self): method load_state_dict (line 416) | def load_state_dict(self, state_dict): class FP32Optimizer (line 453) | class FP32Optimizer(MegatronOptimizer): method __init__ (line 454) | def __init__( method zero_grad (line 464) | def zero_grad(self, set_to_none=True): method get_loss_scale (line 469) | def get_loss_scale(self): method step (line 474) | def step(self): method reload_model_params (line 498) | def reload_model_params(self): method state_dict (line 501) | def state_dict(self): method load_state_dict (line 504) | def load_state_dict(self, state_dict): FILE: codegeex/megatron/p2p_communication.py function _communicate (line 24) | def _communicate( function recv_forward (line 150) | def recv_forward(timers=None): function recv_backward (line 168) | def recv_backward(timers=None): function send_forward (line 186) | def send_forward(output_tensor, timers=None): function send_backward (line 201) | def send_backward(input_tensor_grad, timers=None): function send_forward_recv_backward (line 216) | def send_forward_recv_backward(output_tensor, timers=None): function send_backward_recv_forward (line 234) | def send_backward_recv_forward(input_tensor_grad, timers=None): function send_forward_recv_forward (line 252) | def send_forward_recv_forward(output_tensor, recv_prev, timers=None): function send_backward_recv_backward (line 267) | def send_backward_recv_backward(input_tensor_grad, recv_next, timers=None): function send_forward_backward_recv_forward_backward (line 282) | def send_forward_backward_recv_forward_backward( FILE: codegeex/megatron/schedules.py function get_forward_backward_func (line 31) | def get_forward_backward_func(): function forward_step (line 43) | def forward_step(forward_step_func, data_iterator, model, input_tensor, ... function backward_step (line 72) | def backward_step( function dummy_handler (line 113) | def dummy_handler(): function forward_backward_no_pipelining (line 120) | def forward_backward_no_pipelining( function forward_backward_pipelining_with_interleaving (line 173) | def forward_backward_pipelining_with_interleaving( function forward_backward_pipelining_without_interleaving (line 416) | def forward_backward_pipelining_without_interleaving( FILE: codegeex/megatron/tokenizer/gpt2_tokenization.py function lru_cache (line 33) | def lru_cache(): function bytes_to_unicode (line 54) | def bytes_to_unicode(): function get_pairs (line 81) | def get_pairs(word): class GPT2Tokenizer (line 94) | class GPT2Tokenizer(object): method from_pretrained (line 101) | def from_pretrained( method __init__ (line 183) | def __init__( method __len__ (line 212) | def __len__(self): method set_special_tokens (line 215) | def set_special_tokens(self, special_tokens): method bpe (line 230) | def bpe(self, token): method tokenize (line 271) | def tokenize(self, text): method convert_tokens_to_ids (line 282) | def convert_tokens_to_ids(self, tokens): method convert_ids_to_tokens (line 307) | def convert_ids_to_tokens(self, ids, skip_special_tokens=False): method encode (line 318) | def encode(self, text): method decode (line 321) | def decode(self, tokens): method save_vocabulary (line 328) | def save_vocabulary(self, vocab_path): FILE: codegeex/megatron/tokenizer/tokenizer.py function encode_whitespaces (line 26) | def encode_whitespaces(text: str, start_extra_id: int=10, max_len: int=10): function decode_whitespaces (line 37) | def decode_whitespaces(text: str, start_extra_id: int=10, max_len: int=10): function build_hgf_tokenizer (line 52) | def build_hgf_tokenizer(args): function build_tokenizer (line 71) | def build_tokenizer(args): function _vocab_size_with_padding (line 98) | def _vocab_size_with_padding(orig_vocab_size, args): class AbstractTokenizer (line 118) | class AbstractTokenizer(ABC): method __init__ (line 121) | def __init__(self, name): method vocab_size (line 127) | def vocab_size(self): method vocab (line 132) | def vocab(self): method inv_vocab (line 138) | def inv_vocab(self): method tokenize (line 143) | def tokenize(self, text): method detokenize (line 146) | def detokenize(self, token_ids): method cls (line 152) | def cls(self): method sep (line 158) | def sep(self): method pad (line 164) | def pad(self): method eod (line 170) | def eod(self): method mask (line 176) | def mask(self): class _GPT2BPETokenizer (line 182) | class _GPT2BPETokenizer(AbstractTokenizer): method __init__ (line 185) | def __init__(self, vocab_file, merge_file): method vocab_size (line 195) | def vocab_size(self): method vocab (line 199) | def vocab(self): method inv_vocab (line 203) | def inv_vocab(self): method tokenize (line 206) | def tokenize(self, text): method detokenize (line 209) | def detokenize(self, token_ids): method eod (line 213) | def eod(self): class HgfTokenizerWrapper (line 217) | class HgfTokenizerWrapper(AbstractTokenizer): method __init__ (line 220) | def __init__( method tokenize (line 231) | def tokenize(self, text): method detokenize (line 238) | def detokenize(self, token_ids): method eod (line 245) | def eod(self): method inv_vocab (line 249) | def inv_vocab(self): method vocab (line 253) | def vocab(self): method vocab_size (line 257) | def vocab_size(self): FILE: codegeex/megatron/tools/collect_env.py function main (line 7) | def main(): FILE: codegeex/megatron/tools/finetune_codegeex.py function model_provider (line 19) | def model_provider(pre_process=True, post_process=True): function get_batch (line 88) | def get_batch(data_iterator): function get_batch_pipe (line 128) | def get_batch_pipe(data): function loss_func (line 157) | def loss_func(loss_mask, output_tensor): function valid_loss_func (line 182) | def valid_loss_func(loss_mask, output_tensor): function forward_step (line 200) | def forward_step(data_iterator, model): function valid_forward_step (line 215) | def valid_forward_step(data_iterator, model): function train_valid_test_datasets_provider (line 230) | def train_valid_test_datasets_provider(train_val_test_num_samples): FILE: codegeex/megatron/tools/pretrain_codegeex.py function model_provider (line 20) | def model_provider(pre_process=True, post_process=True): function get_batch (line 89) | def get_batch(data_iterator): function get_batch_pipe (line 123) | def get_batch_pipe(data): function loss_func (line 152) | def loss_func(loss_mask, output_tensor): function forward_step (line 163) | def forward_step(data_iterator, model): function train_valid_test_datasets_provider (line 178) | def train_valid_test_datasets_provider(train_val_test_num_samples): function command_exists (line 197) | def command_exists(cmd): FILE: codegeex/megatron/training.py function print_datetime (line 70) | def print_datetime(string): function pretrain (line 77) | def pretrain( function update_train_iters (line 221) | def update_train_iters(args): function get_model (line 250) | def get_model(model_provider_func): function get_learning_rate_scheduler (line 353) | def get_learning_rate_scheduler(optimizer): function setup_model_and_optimizer (line 396) | def setup_model_and_optimizer(model_provider_func): function train_step (line 477) | def train_step(forward_step_func, data_iterator, model, optimizer, lr_sc... function training_log (line 600) | def training_log( function save_checkpoint_and_time (line 870) | def save_checkpoint_and_time(iteration, model, optimizer, lr_scheduler): function train (line 882) | def train( function evaluate (line 1022) | def evaluate(forward_step_func, data_iterator, model, verbose=False): function evaluate_and_print_results (line 1090) | def evaluate_and_print_results( function evaluate_and_print_results_gold (line 1157) | def evaluate_and_print_results_gold( function cyclic_iter (line 1214) | def cyclic_iter(iter): function build_train_valid_test_data_iterators (line 1220) | def build_train_valid_test_data_iterators(build_train_valid_test_dataset... FILE: codegeex/megatron/utils.py function unwrap_model (line 34) | def unwrap_model(model, module_instances=(torchDDP)): function calc_params_l2_norm (line 49) | def calc_params_l2_norm(model): function average_losses_across_data_parallel_group (line 81) | def average_losses_across_data_parallel_group(losses): function report_memory (line 92) | def report_memory(name): function print_params_min_max_norm (line 108) | def print_params_min_max_norm(optimizer, iteration): function check_adlr_autoresume_termination (line 127) | def check_adlr_autoresume_termination(iteration, model, optimizer, lr_sc... function get_ltor_masks_and_position_ids (line 145) | def get_ltor_masks_and_position_ids( function get_parameters_in_billions (line 202) | def get_parameters_in_billions(model): function flops_calculator (line 222) | def flops_calculator(model, args, iteration_time): FILE: codegeex/mindspore/convertion_1p.py function load_model (line 40) | def load_model(args_opt): function export_mindir (line 188) | def export_mindir(model_predict, config): function run_predict (line 206) | def run_predict(model_predict, config, args_opt, rank): function main (line 246) | def main(): FILE: codegeex/mindspore/finetune.py function set_weight_decay (line 57) | def set_weight_decay(params): function add_checkpoint_callback_policy (line 72) | def add_checkpoint_callback_policy(args_param, callback, rank_id): function set_parallel_context (line 102) | def set_parallel_context(args_opt): function run_train (line 121) | def run_train(args_opt): FILE: codegeex/mindspore/generation.py function load_model (line 40) | def load_model(args_opt): function export_mindir (line 177) | def export_mindir(model_predict, config): function run_predict (line 195) | def run_predict(model_predict, config, args_opt, rank): function main (line 249) | def main(): FILE: codegeex/mindspore/generation_1p.py function load_model (line 40) | def load_model(args_opt): function export_mindir (line 183) | def export_mindir(model_predict, config): function run_predict (line 201) | def run_predict(model_predict, config, args_opt, rank): function main (line 242) | def main(): FILE: codegeex/mindspore/generation_batch.py function load_model (line 40) | def load_model(args_opt): function export_mindir (line 175) | def export_mindir(model_predict, config): function run_predict (line 193) | def run_predict(model_predict, config, args_opt, rank): function main (line 298) | def main(): FILE: codegeex/mindspore/generation_finetune.py function load_model (line 41) | def load_model(args_opt): function export_mindir (line 178) | def export_mindir(model_predict, config): function run_predict (line 196) | def run_predict(model_predict, config, args_opt, rank): function main (line 285) | def main(): FILE: codegeex/mindspore/generation_humaneval.py function load_model (line 42) | def load_model(args_opt): function export_mindir (line 177) | def export_mindir(model_predict, config): function run_predict (line 195) | def run_predict(model_predict, config, args_opt, rank): function main (line 245) | def main(): FILE: codegeex/mindspore/generation_values.py function load_model (line 39) | def load_model(args_opt): function run_predict (line 174) | def run_predict(model_predict, config, args_opt, rank): function main (line 208) | def main(): FILE: codegeex/mindspore/generation_values_1p.py function load_model (line 40) | def load_model(args_opt): function export_mindir (line 183) | def export_mindir(model_predict, config): function run_predict (line 201) | def run_predict(model_predict, config, args_opt, rank): function main (line 243) | def main(): FILE: codegeex/mindspore/save_1p_ckpt_from_8p_ckpt.py function set_weight_decay (line 66) | def set_weight_decay(params): function add_checkpoint_callback_policy (line 81) | def add_checkpoint_callback_policy(args_param, callback, rank_id): function set_parallel_context (line 111) | def set_parallel_context(args_opt): function download_ckpt (line 128) | def download_ckpt(args_opt, file_num, rank_num, rank_id): function get_needed_model_parallel_list (line 147) | def get_needed_model_parallel_list(train_strategy_file, self_rank): function transform_model_parallel (line 160) | def transform_model_parallel(restore_local_ckpt_file_list, train_strateg... function run_transform_model_parallel_ckpt (line 213) | def run_transform_model_parallel_ckpt(args_opt): FILE: codegeex/mindspore/save_8p_ckpt.py function set_parallel_context (line 61) | def set_parallel_context(args_opt): function download_ckpt (line 78) | def download_ckpt(args_opt, file_num, rank_num, rank_id): function get_needed_opt_shard_list (line 95) | def get_needed_opt_shard_list(train_strategy_file, self_rank): function transform_opt_shard (line 111) | def transform_opt_shard(restore_local_ckpt_file_list, train_strategy_fil... function run_transform_opt_shard_ckpt (line 154) | def run_transform_opt_shard_ckpt(args_opt): FILE: codegeex/mindspore/scripts/layer_norm.py function get_op_support_info (line 43) | def get_op_support_info(input_x, input_gamma, input_beta, function _division_sixteen (line 97) | def _division_sixteen(shape, begin_norm_axis): function op_select_format (line 121) | def op_select_format(input_x, input_gamma, input_beta, function to_frac_z_axis (line 273) | def to_frac_z_axis(ori_shape, ori_axis): function _broadcast_nz (line 312) | def _broadcast_nz(tensor, shape): function _check_vector_to_cube (line 330) | def _check_vector_to_cube(dtype, ori_shape_x, shape_x, begin_norm_axis, ... function nz_non_aligned (line 354) | def nz_non_aligned(input_x, input_gamma, input_beta, function layer_norm_compute_nz (line 433) | def layer_norm_compute_nz(input_x, input_gamma, input_beta, function layer_norm_compute (line 571) | def layer_norm_compute(input_x, input_gamma, input_beta, function is_support_nz_non_aligned (line 709) | def is_support_nz_non_aligned(ori_shape_x, begin_params_axis, impl_mode): function layer_norm (line 726) | def layer_norm(input_x, input_gamma, input_beta, function __dynamic_template_api (line 917) | def __dynamic_template_api(input_x, input_gamma, input_beta, output_y, o... FILE: codegeex/mindspore/scripts/layer_norm_x_backprop_v2.py function get_op_support_info (line 36) | def get_op_support_info(input_dy, function _check_dynamic_format (line 87) | def _check_dynamic_format(shape_dy, shape_gamma, c_0): function op_select_format (line 100) | def op_select_format(input_dy, function _check_params (line 224) | def _check_params(params_map): function _check_shape (line 247) | def _check_shape(params_map): function _check_shape_mean (line 285) | def _check_shape_mean(shape_x, shape_mean): function _check_shape_gamma (line 325) | def _check_shape_gamma(shape_x, shape_gamma): function _broadcast_nz (line 351) | def _broadcast_nz(tensor, shape): function _update_gamma_shape (line 366) | def _update_gamma_shape(shape_x, shape_gamma): function _get_data_gm (line 398) | def _get_data_gm(shapes, dtype): function _get_params (line 426) | def _get_params(shape_x, shape_mean, shape_gamma): function _get_pd_xl (line 469) | def _get_pd_xl(data, shape_x): function _get_pd_var_front (line 491) | def _get_pd_var_front(data, cast_dtype): function _get_pd_var (line 519) | def _get_pd_var(data, params, shape_x, pd_xl, cast_dtype): function _get_pd_mean (line 561) | def _get_pd_mean(params, pd_xl, pd_var, var_elta_2, sub_x_mean, cast_dty... function _get_pd_x (line 600) | def _get_pd_x(data, params, shape_x, dtype, cast_dtype): function _get_res (line 656) | def _get_res(data, params, shape_x, dtype, cast_dtype): function _get_pds (line 688) | def _get_pds(data_dy, data_x, data_variance, data_mean, data_gamma, shap... function layer_norm_x_backprop_v2_compute (line 748) | def layer_norm_x_backprop_v2_compute(input_dy, function update_shape_nz (line 787) | def update_shape_nz(shape_x, shape_var, shape_gamma): function _get_data_nz (line 847) | def _get_data_nz(param_nz, dtype): function _get_pd_xl_nz (line 863) | def _get_pd_xl_nz(data, param_nz): function _get_pd_var_front_nz (line 874) | def _get_pd_var_front_nz(data, cast_dtype): function _get_pd_var_nz (line 889) | def _get_pd_var_nz(data, param_nz, pd_xl, cast_dtype): function _get_pd_mean_nz (line 907) | def _get_pd_mean_nz(param_nz, pd_xl, pd_var, var_elta_2, sub_x_mean, cas... function _get_pd_x_nz (line 920) | def _get_pd_x_nz(data, param_nz, dtype, cast_dtype): function _get_res_nz (line 951) | def _get_res_nz(data, param_nz, dtype, cast_dtype): function _get_pds_nz (line 961) | def _get_pds_nz(data_dy, data_x, data_variance, data_mean, data_gamma, p... function layer_norm_x_back_nz_compute (line 994) | def layer_norm_x_back_nz_compute(data_dy, data_x, data_variance, data_me... function layer_norm_x_backprop_v2 (line 1027) | def layer_norm_x_backprop_v2(input_dy, FILE: codegeex/mindspore/src/adam.py function _update_run_kernel (line 36) | def _update_run_kernel(opt, clip_value, beta1, beta2, eps, lr, weight_de... function _check_param_value (line 53) | def _check_param_value(beta1, beta2, eps, prim_name): class AdamWeightDecayOp (line 63) | class AdamWeightDecayOp(Optimizer): method __init__ (line 136) | def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999,... method construct (line 156) | def construct(self, gradients, clip_value): method clone_param32 (line 182) | def clone_param32(self, prefix, init=None): FILE: codegeex/mindspore/src/callbacks.py class LossCallBack (line 31) | class LossCallBack(Callback): method __init__ (line 37) | def __init__( method step_end (line 61) | def step_end(self, run_context): class EvalCallBack (line 92) | class EvalCallBack(Callback): method __init__ (line 102) | def __init__(self, model, eval_dataset, ppl_metric, validation_loss, p... method step_end (line 123) | def step_end(self, run_context): class SaveCheckpointCallback (line 161) | class SaveCheckpointCallback(Callback): method __init__ (line 162) | def __init__(self, cache_dir, bucket, local_rank=0, has_trained_epoch=... method step_end (line 175) | def step_end(self, run_context): method syn_files (line 183) | def syn_files(self): FILE: codegeex/mindspore/src/code_tokenizer.py function encode_whitespaces (line 8) | def encode_whitespaces(text, start_extra_id: int, max_len: int): function decode_whitespaces (line 43) | def decode_whitespaces(text: str, start_extra_id: int, max_len: int): class Code13BDictionary (line 58) | class Code13BDictionary(object): method __init__ (line 59) | def __init__( method _pad_to_vocab_size (line 87) | def _pad_to_vocab_size(self, vocab_size: int): method _load_dict (line 94) | def _load_dict(self, dict_file: str): method _add_symbol (line 103) | def _add_symbol(self, sym: str, count: int): method __len__ (line 109) | def __len__(self): method index (line 112) | def index(self, sym: str): method string (line 115) | def string(self, idx: int): method map_token (line 118) | def map_token(self, token: Union[int, str]): method map_tokens (line 123) | def map_tokens(self, tokens): method decode_tokens (line 126) | def decode_tokens(self, tokens): class CodeTokenizer (line 131) | class CodeTokenizer(object): method __init__ (line 132) | def __init__( method encode_code (line 149) | def encode_code(self, code: str): method decode_code (line 161) | def decode_code(self, input_ids): FILE: codegeex/mindspore/src/dataset.py function get_input_data_batch_slice_map (line 35) | def get_input_data_batch_slice_map(input_ids, eod_id, rank, dis, eod_res... function create_dataset (line 80) | def create_dataset(batch_size, data_path, args_opt, device_num=1, rank=0... function get_code_data_train (line 173) | def get_code_data_train(code_data_path, args_opt, process_fn=None, scale... function get_code_data_eval (line 201) | def get_code_data_eval(code_data_path, args_opt, process_fn=None, scale=1): FILE: codegeex/mindspore/src/dataset_finetune.py function get_input_data_batch_slice_map (line 33) | def get_input_data_batch_slice_map(input_ids, loss_mask, eod_id, rank, d... function create_dataset (line 82) | def create_dataset(batch_size, data_path, args_opt, device_num=1, rank=0... function get_code_data (line 165) | def get_code_data(code_data_path, split, args_opt, process_fn=None, skip... FILE: codegeex/mindspore/src/generate.py function topk_fun (line 26) | def topk_fun(logits, topk=5): function sampler (line 38) | def sampler(log_probs_revised, top_p, top_k_num, use_pynative=False): function generate (line 84) | def generate(model, origin_inputs, config, verbose=False): function generate_increment (line 170) | def generate_increment(model, origin_inputs, config, verbose=False): FILE: codegeex/mindspore/src/generate_finetune.py function topk_fun (line 28) | def topk_fun(logits, topk=5): function sampler (line 35) | def sampler(log_probs_revised, top_p, top_k_num, use_pynative=False, bad... function generate_increment (line 78) | def generate_increment(model, origin_inputs, origin_length, config, toke... FILE: codegeex/mindspore/src/generate_greedy.py function topk_fun (line 27) | def topk_fun(logits, topk=5): function sampler (line 34) | def sampler(log_probs_revised, top_p, top_k_num, use_pynative=False, bad... function generate_increment (line 45) | def generate_increment(model, origin_inputs, origin_length, config, toke... FILE: codegeex/mindspore/src/generate_humaneval.py function is_code_generation_finished (line 25) | def is_code_generation_finished(text: str): function cleanup_text (line 37) | def cleanup_text(text: str): function truncate_text (line 49) | def truncate_text(text: str): function topk_fun (line 63) | def topk_fun(logits, topk=5): function sampler (line 77) | def sampler(log_probs_revised, top_p, top_k_num, use_pynative=False, bad... function generate_increment (line 130) | def generate_increment(model, origin_inputs, config, tokenizer, verbose=... FILE: codegeex/mindspore/src/metrics.py class PPLMetric (line 26) | class PPLMetric(Metric): method __init__ (line 31) | def __init__(self, data_length): method clear (line 40) | def clear(self): method update (line 45) | def update(self, *inputs): # inputs method eval (line 53) | def eval(self): class ValidationLoss (line 67) | class ValidationLoss(Metric): method __init__ (line 68) | def __init__(self, data_length): method clear (line 77) | def clear(self): method update (line 82) | def update(self, *inputs): # inputs method eval (line 94) | def eval(self): FILE: codegeex/mindspore/src/pangu_alpha.py class EmbeddingLayer (line 38) | class EmbeddingLayer(nn.Cell): method __init__ (line 41) | def __init__(self, config): method construct (line 92) | def construct( method get_word_embedding_weight (line 105) | def get_word_embedding_weight(self): class QueryLayer (line 109) | class QueryLayer(TransformerEncoderLayer): method __init__ (line 112) | def __init__( method construct (line 144) | def construct( class PanGuHead (line 220) | class PanGuHead(Cell): method __init__ (line 232) | def __init__( method construct (line 252) | def construct(self, state, embed): function set_parallel_configure_for_layer (line 261) | def set_parallel_configure_for_layer( class PanguAlpha_Model (line 296) | class PanguAlpha_Model(Cell): method __init__ (line 299) | def __init__(self, config): method construct (line 388) | def construct(self, input_ids, method reshape_to_2d (line 419) | def reshape_to_2d(self, x): method load_embedding_from_ckpt (line 427) | def load_embedding_from_ckpt(self, load_ckpt_path): class PanguAlphaModel (line 461) | class PanguAlphaModel(nn.Cell): method __init__ (line 474) | def __init__(self, config): method construct (line 488) | def construct(self, input_ids, input_position, attention_mask, class PanGUAlphaWithLoss (line 496) | class PanGUAlphaWithLoss(Cell): method __init__ (line 508) | def __init__(self, config, network, loss): method construct (line 527) | def construct(self, input_ids, input_position=None, attention_mask=None): class EvalNet (line 547) | class EvalNet(nn.Cell): method __init__ (line 561) | def __init__(self, backbone, generate=False, pad_token=6, seq_length=2... method construct (line 575) | def construct(self, input_ids, current_index, init_reset=True, batch_v... class LogitsNet (line 597) | class LogitsNet(nn.Cell): method __init__ (line 610) | def __init__(self, backbone, generate=False, pad_token=6, seq_length=2... method construct (line 624) | def construct(self, input_ids, init_reset=True, batch_valid_length=Non... class PanGUAlphaWithFinetuneLoss (line 642) | class PanGUAlphaWithFinetuneLoss(Cell): method __init__ (line 654) | def __init__(self, config, network, loss): method construct (line 673) | def construct(self, input_ids, loss_mask, input_position, attention_ma... FILE: codegeex/mindspore/src/pangu_alpha_config.py class PanguAlphaConfig (line 21) | class PanguAlphaConfig: method __init__ (line 26) | def __init__(self, method __str__ (line 71) | def __str__(self): function set_parse (line 80) | def set_parse(args_opt): FILE: codegeex/mindspore/src/pangu_alpha_fp16_predict.py class EmbeddingLayer (line 37) | class EmbeddingLayer(nn.Cell): method __init__ (line 40) | def __init__(self, config): method construct (line 89) | def construct( method get_word_embedding_weight (line 101) | def get_word_embedding_weight(self): class QueryLayer (line 105) | class QueryLayer(TransformerEncoderLayer): method __init__ (line 108) | def __init__( method construct (line 140) | def construct( class PanGuHead (line 216) | class PanGuHead(Cell): method __init__ (line 228) | def __init__( method construct (line 248) | def construct(self, state, embed): function set_parallel_configure_for_layer (line 257) | def set_parallel_configure_for_layer( class PanguAlpha_Model (line 292) | class PanguAlpha_Model(Cell): method __init__ (line 295) | def __init__(self, config): method construct (line 383) | def construct(self, input_ids, method reshape_to_2d (line 422) | def reshape_to_2d(self, x): method load_embedding_from_ckpt (line 430) | def load_embedding_from_ckpt(self, load_ckpt_path): class PanguAlphaModel (line 471) | class PanguAlphaModel(nn.Cell): method __init__ (line 484) | def __init__(self, config): method construct (line 499) | def construct(self, input_ids, input_position, attention_mask, class PanGUAlphaWithLoss (line 511) | class PanGUAlphaWithLoss(Cell): method __init__ (line 523) | def __init__(self, config, network, loss): method construct (line 542) | def construct(self, input_ids, input_position=None, attention_mask=None): class EvalNet (line 562) | class EvalNet(nn.Cell): method __init__ (line 576) | def __init__(self, backbone, generate=False, pad_token=6, seq_length=2... method construct (line 590) | def construct(self, input_ids, current_index, init_reset=True, batch_v... FILE: codegeex/mindspore/src/pangu_alpha_wrapcell.py function _clip_grad (line 37) | def _clip_grad(clip_type, clip_value, grad): function tensor_grad_scale (line 72) | def tensor_grad_scale(scale, grad): function tensor_grad_scale_pipeline (line 77) | def tensor_grad_scale_pipeline(scale, grad, accu_grad): function tensor_shard_grad_scale_pipeline (line 87) | def tensor_shard_grad_scale_pipeline(scale, grad, accu_grad): class PanguAlphaTrainOneStepWithLossScaleCell (line 94) | class PanguAlphaTrainOneStepWithLossScaleCell(TrainOneStepWithLossScaleC... method __init__ (line 107) | def __init__( method construct (line 130) | def construct(self, input_ids, input_position, attention_mask, layer_p... class PanguAlphaTrainPipelineWithLossScaleCell (line 168) | class PanguAlphaTrainPipelineWithLossScaleCell(nn.Cell): method __init__ (line 181) | def __init__(self, network, optimizer, config, scale_update_cell=None,... method construct (line 221) | def construct( FILE: codegeex/mindspore/src/pangu_alpha_wrapcell_finetune.py function _clip_grad (line 37) | def _clip_grad(clip_type, clip_value, grad): function tensor_grad_scale (line 72) | def tensor_grad_scale(scale, grad): function tensor_grad_scale_pipeline (line 77) | def tensor_grad_scale_pipeline(scale, grad, accu_grad): function tensor_shard_grad_scale_pipeline (line 87) | def tensor_shard_grad_scale_pipeline(scale, grad, accu_grad): class PanguAlphaTrainOneStepWithLossScaleCell (line 94) | class PanguAlphaTrainOneStepWithLossScaleCell(TrainOneStepWithLossScaleC... method __init__ (line 107) | def __init__( method construct (line 130) | def construct(self, input_ids, loss_mask, input_position, attention_ma... class PanguAlphaTrainPipelineWithLossScaleCell (line 168) | class PanguAlphaTrainPipelineWithLossScaleCell(nn.Cell): method __init__ (line 181) | def __init__(self, network, optimizer, config, scale_update_cell=None,... method construct (line 221) | def construct( FILE: codegeex/mindspore/src/preprocess.py function chunks (line 32) | def chunks(lst, n): function package_file (line 38) | def package_file(it, n): function clean_wikitext (line 53) | def clean_wikitext(string): function tokenize_openwebtext (line 87) | def tokenize_openwebtext(tokenizer, iterator, seq_length, eot): function tokenize_wiki (line 107) | def tokenize_wiki(tokenizer, file_path, seq_length, eot): function tokenize_lambada (line 124) | def tokenize_lambada(tokenizer, file_path, seq_length, eot): function task_unit (line 145) | def task_unit(iterator, tokenizer, seq_length, eot, parallel_writer=True): FILE: codegeex/mindspore/src/sat_dataset.py class Dataset (line 8) | class Dataset(ABC): method __len__ (line 10) | def __len__(self): method __getitem__ (line 14) | def __getitem__(self, idx): class LMDBDataset (line 18) | class LMDBDataset(Dataset): method __init__ (line 19) | def __init__(self, path, process_fn=None): method __len__ (line 38) | def __len__(self): method __getitem__ (line 41) | def __getitem__(self, idx): class PadDataset (line 56) | class PadDataset(Dataset): method __init__ (line 57) | def __init__(self, dataset, seq_len, eod_id): method __len__ (line 62) | def __len__(self): method __getitem__ (line 65) | def __getitem__(self, idx): class BinaryDataset (line 72) | class BinaryDataset(Dataset): method __init__ (line 73) | def __init__( method __len__ (line 100) | def __len__(self): method __getitem__ (line 103) | def __getitem__(self, index): class TSVDataset (line 108) | class TSVDataset(Dataset): method __init__ (line 109) | def __init__(self, path, process_fn, with_heads=True, **kwargs): method __len__ (line 118) | def __len__(self): method __getitem__ (line 121) | def __getitem__(self, index): class ConcatDataset (line 125) | class ConcatDataset(Dataset): method cumsum (line 136) | def cumsum(sequence, weights): method __init__ (line 144) | def __init__(self, datasets, weights=None, skip_num=0, **kwargs): method __len__ (line 158) | def __len__(self): method __getitem__ (line 161) | def __getitem__(self, idx): class RandomMappingDataset (line 175) | class RandomMappingDataset(Dataset): method __init__ (line 181) | def __init__(self, ds): method __len__ (line 185) | def __len__(self): method __getitem__ (line 188) | def __getitem__(self, index): class BlockedSplitDataset (line 197) | class BlockedSplitDataset(Dataset): method __init__ (line 204) | def __init__(self, ds, indices, block_size): method __len__ (line 214) | def __len__(self): method __getitem__ (line 217) | def __getitem__(self, index): class SubsetDataset (line 223) | class SubsetDataset(Dataset): method __init__ (line 224) | def __init__(self, ds, start, length): method __len__ (line 230) | def __len__(self): method __getitem__ (line 233) | def __getitem__(self, idx): function split_train_val_test (line 239) | def split_train_val_test(ds, split=[0.99, 0.01, 0.0], seed=None): FILE: codegeex/mindspore/src/tokenization_jieba.py class JIEBATokenizer (line 25) | class JIEBATokenizer(): method __init__ (line 30) | def __init__(self, vocab_file, model_file, max_len=None): method vocab_size (line 49) | def vocab_size(self): method __len__ (line 52) | def __len__(self): method eod (line 56) | def eod(self): method tokenize (line 59) | def tokenize(self, text): method convert_tokens_to_ids (line 65) | def convert_tokens_to_ids(self, tokens): method convert_ids_to_tokens (line 68) | def convert_ids_to_tokens(self, ids): method encode (line 71) | def encode(self, text): method decode (line 75) | def decode(self, tokens): FILE: codegeex/mindspore/src/utils.py class FP32StateAdamWeightDecay (line 40) | class FP32StateAdamWeightDecay(AdamWeightDecay): method __init__ (line 49) | def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999,... method clone_state (line 59) | def clone_state(self, parameter_tuple, prefix, init): function _get_square_sum (line 80) | def _get_square_sum(grad, value): function _apply_global_norm (line 90) | def _apply_global_norm(enable_grad_fp16, clip_norm, global_norm, grad): function _get_model_parallel_group (line 98) | def _get_model_parallel_group(mp): function _get_pipeline_group (line 118) | def _get_pipeline_group(): class GlobalNorm (line 136) | class GlobalNorm(nn.Cell): method __init__ (line 141) | def __init__(self, params, config): method construct (line 193) | def construct(self, grads): class ClipByGlobalNorm (line 206) | class ClipByGlobalNorm(nn.Cell): method __init__ (line 213) | def __init__(self, params, config, clip_norm=1.0): method construct (line 223) | def construct(self, grads): class LearningRate (line 232) | class LearningRate(LearningRateSchedule): method __init__ (line 237) | def __init__(self, method construct (line 261) | def construct(self, global_step): function add_inference_params (line 278) | def add_inference_params(opt): function add_training_params (line 315) | def add_training_params(opt): function add_retrain_params (line 419) | def add_retrain_params(opt): function get_args (line 461) | def get_args(inference=False): function download_data (line 587) | def download_data(src_data_url, tgt_data_path, rank): FILE: codegeex/mindspore/train.py function set_weight_decay (line 59) | def set_weight_decay(params): function add_checkpoint_callback_policy (line 74) | def add_checkpoint_callback_policy(args_param, callback, rank_id): function set_parallel_context (line 104) | def set_parallel_context(args_opt): function run_train (line 121) | def run_train(args_opt): function restore_checkpoint (line 333) | def restore_checkpoint(args_param, sink_size, dataset, model, network, e... function get_exception_checkpoints (line 376) | def get_exception_checkpoints(args_param): function check_exception_checkpoints (line 408) | def check_exception_checkpoints(ckpt_file_list): function restore_exception_checkpoint (line 425) | def restore_exception_checkpoint(args_param, sink_size, dataset, model, ... function set_pipeline_parallel_context (line 487) | def set_pipeline_parallel_context(args_opt): function run_train_pipeline (line 506) | def run_train_pipeline(args_opt): FILE: codegeex/oneflow/codegeex_model.py function fast_gelu (line 7) | def fast_gelu(x): class MLP (line 14) | class MLP(torch.nn.Module): method __init__ (line 22) | def __init__( method forward (line 42) | def forward(self, hidden_states): class SelfAttention (line 52) | class SelfAttention(torch.nn.Module): method __init__ (line 58) | def __init__( method forward (line 85) | def forward( class TopQuerySelfAttention (line 279) | class TopQuerySelfAttention(torch.nn.Module): method __init__ (line 285) | def __init__( method forward (line 312) | def forward( class TransformerLayer (line 494) | class TransformerLayer(torch.nn.Module): method __init__ (line 500) | def __init__( method forward (line 530) | def forward( class TopQueryLayer (line 573) | class TopQueryLayer(torch.nn.Module): method __init__ (line 579) | def __init__( method forward (line 607) | def forward( class Transformer (line 657) | class Transformer(torch.nn.Module): method __init__ (line 660) | def __init__( method _get_layer_index (line 698) | def _get_layer_index(self, layer_number): method _get_layer (line 701) | def _get_layer(self, layer_number): method forward (line 704) | def forward( method state_dict_for_save_checkpoint (line 767) | def state_dict_for_save_checkpoint( class Embedding (line 773) | class Embedding(torch.nn.Module): method __init__ (line 782) | def __init__( method forward (line 802) | def forward(self, input_ids, position_ids): method state_dict_for_save_checkpoint (line 810) | def state_dict_for_save_checkpoint(self, destination=None, prefix='', method load_state_dict (line 823) | def load_state_dict(self, state_dict, strict=True): class QueryEmbedding (line 852) | class QueryEmbedding(torch.nn.Module): method __init__ (line 861) | def __init__( method forward (line 878) | def forward(self, position_ids): method state_dict_for_save_checkpoint (line 884) | def state_dict_for_save_checkpoint(self, destination=None, prefix='', method load_state_dict (line 895) | def load_state_dict(self, state_dict, strict=True): class TransformerLanguageModel (line 911) | class TransformerLanguageModel(torch.nn.Module): method __init__ (line 926) | def __init__( method forward (line 959) | def forward( method state_dict_for_save_checkpoint (line 986) | def state_dict_for_save_checkpoint(self, destination=None, prefix='', method load_state_dict (line 1003) | def load_state_dict(self, state_dict, strict=True): class CodeGeeXModel (line 1039) | class CodeGeeXModel(torch.nn.Module): method __init__ (line 1042) | def __init__( method forward (line 1059) | def forward( method state_dict_for_save_checkpoint (line 1088) | def state_dict_for_save_checkpoint(self, destination=None, prefix='', method load_state_dict (line 1097) | def load_state_dict(self, state_dict, strict=True): FILE: codegeex/oneflow/inference.py function get_ltor_masks_and_position_ids (line 12) | def get_ltor_masks_and_position_ids( function get_batch (line 67) | def get_batch( function top_k_logits (line 87) | def top_k_logits(logits, top_k=0, top_p=0.0, filter_value=-float("Inf")): function pad_batch (line 117) | def pad_batch(batch, pad_id, seq_length): function forward_step (line 127) | def forward_step( function get_token_stream (line 158) | def get_token_stream( function switch (line 212) | def switch(val1, val2, boolean): function sample_sequence_batch (line 217) | def sample_sequence_batch( FILE: codegeex/paddle/codegeex_model.py function fast_gelu (line 6) | def fast_gelu(x): class MLP (line 11) | class MLP(paddle.nn.Layer): method __init__ (line 20) | def __init__( method forward (line 40) | def forward(self, hidden_states): class SelfAttention (line 50) | class SelfAttention(paddle.nn.Layer): method __init__ (line 57) | def __init__( method forward (line 84) | def forward( class TopQuerySelfAttention (line 226) | class TopQuerySelfAttention(paddle.nn.Layer): method __init__ (line 233) | def __init__( method forward (line 260) | def forward( class TransformerLayer (line 400) | class TransformerLayer(paddle.nn.Layer): method __init__ (line 407) | def __init__( method forward (line 437) | def forward( class TopQueryLayer (line 478) | class TopQueryLayer(paddle.nn.Layer): method __init__ (line 485) | def __init__( method forward (line 513) | def forward( class Transformer (line 563) | class Transformer(paddle.nn.Layer): method __init__ (line 566) | def __init__( method _get_layer_index (line 604) | def _get_layer_index(self, layer_number): method _get_layer (line 607) | def _get_layer(self, layer_number): method forward (line 610) | def forward( method state_dict_for_save_checkpoint (line 672) | def state_dict_for_save_checkpoint( class Embedding (line 678) | class Embedding(paddle.nn.Layer): method __init__ (line 688) | def __init__( method forward (line 708) | def forward(self, input_ids, position_ids): method state_dict_for_save_checkpoint (line 716) | def state_dict_for_save_checkpoint(self, destination=None, prefix='', method set_state_dict (line 729) | def set_state_dict(self, state_dict, use_structured_name=True): class QueryEmbedding (line 758) | class QueryEmbedding(paddle.nn.Layer): method __init__ (line 768) | def __init__( method forward (line 785) | def forward(self, position_ids): method state_dict_for_save_checkpoint (line 791) | def state_dict_for_save_checkpoint(self, destination=None, prefix='', method set_state_dict (line 802) | def set_state_dict(self, state_dict, use_structured_name=True): class TransformerLanguageModel (line 818) | class TransformerLanguageModel(paddle.nn.Layer): method __init__ (line 834) | def __init__( method forward (line 867) | def forward( method state_dict_for_save_checkpoint (line 894) | def state_dict_for_save_checkpoint(self, destination=None, prefix='', method set_state_dict (line 911) | def set_state_dict(self, state_dict, use_structured_name=True): class CodeGeeXModel (line 947) | class CodeGeeXModel(paddle.nn.Layer): method __init__ (line 950) | def __init__( method forward (line 967) | def forward( method state_dict_for_save_checkpoint (line 996) | def state_dict_for_save_checkpoint(self, destination=None, prefix='', method set_state_dict (line 1005) | def set_state_dict(self, state_dict, use_structured_name=True): FILE: codegeex/paddle/inference.py function get_ltor_masks_and_position_ids (line 12) | def get_ltor_masks_and_position_ids( function get_batch (line 67) | def get_batch( function top_k_logits (line 87) | def top_k_logits(logits, top_k=0, top_p=0.0, filter_value=-float("Inf")): function pad_batch (line 117) | def pad_batch(batch, pad_id, seq_length): function forward_step (line 127) | def forward_step( function get_token_stream (line 158) | def get_token_stream( function switch (line 212) | def switch(val1, val2, boolean): function sample_sequence_batch (line 217) | def sample_sequence_batch( FILE: codegeex/paddle/pt_to_pdparams.py function WalkDict (line 15) | def WalkDict(x): function parse_opt (line 29) | def parse_opt(): function main (line 47) | def main(opt): FILE: codegeex/quantization/quantize.py class W8A16Linear (line 9) | class W8A16Linear(torch.autograd.Function): method forward (line 11) | def forward(ctx, inp: torch.Tensor, quant_w: torch.Tensor, scale_w: to... method backward (line 23) | def backward(ctx, grad_output: torch.Tensor): class QuantizedLinear (line 32) | class QuantizedLinear(torch.nn.Module): method __init__ (line 33) | def __init__( method forward (line 68) | def forward(self, input_): class QuantizedColumnParallelLinear (line 77) | class QuantizedColumnParallelLinear(ColumnParallelLinear): method __init__ (line 78) | def __init__( method forward (line 118) | def forward(self, input_): class QuantizedRowParallelLinear (line 136) | class QuantizedRowParallelLinear(RowParallelLinear): method __init__ (line 137) | def __init__( method forward (line 177) | def forward(self, input_): function quantize (line 196) | def quantize(model, weight_bit_width, backend="torch"): FILE: codegeex/quantization/quantize_oneflow.py function _pack_int8_to_int4 (line 5) | def _pack_int8_to_int4(x): function _quantize (line 16) | def _quantize(num_bits, symmetric, x, group_dim, group_size, quant_type): class QuantizedLinear (line 53) | class QuantizedLinear(torch.nn.Module): method __init__ (line 54) | def __init__( method forward (line 89) | def forward(self, input_): function quantize_oneflow (line 103) | def quantize_oneflow(model, weight_bit_width): FILE: codegeex/tokenizer/tokenizer.py function encode_whitespaces (line 7) | def encode_whitespaces(text: str, start_extra_id: int, max_len: int): function decode_whitespaces (line 18) | def decode_whitespaces(text: str, start_extra_id: int, max_len: int): class CodeGeeXTokenizer (line 33) | class CodeGeeXTokenizer(object): method __init__ (line 34) | def __init__( method encode_code (line 51) | def encode_code(self, code: str): method decode_code (line 58) | def decode_code(self, input_ids): FILE: codegeex/torch/codegeex_model.py function fast_gelu (line 7) | def fast_gelu(x): class MLP (line 12) | class MLP(torch.nn.Module): method __init__ (line 21) | def __init__( method forward (line 41) | def forward(self, hidden_states): class SelfAttention (line 51) | class SelfAttention(torch.nn.Module): method __init__ (line 58) | def __init__( method forward (line 85) | def forward( class TopQuerySelfAttention (line 227) | class TopQuerySelfAttention(torch.nn.Module): method __init__ (line 234) | def __init__( method forward (line 261) | def forward( class TransformerLayer (line 401) | class TransformerLayer(torch.nn.Module): method __init__ (line 408) | def __init__( method forward (line 438) | def forward( class TopQueryLayer (line 479) | class TopQueryLayer(torch.nn.Module): method __init__ (line 486) | def __init__( method forward (line 514) | def forward( class Transformer (line 564) | class Transformer(torch.nn.Module): method __init__ (line 567) | def __init__( method _get_layer_index (line 605) | def _get_layer_index(self, layer_number): method _get_layer (line 608) | def _get_layer(self, layer_number): method forward (line 611) | def forward( method state_dict_for_save_checkpoint (line 673) | def state_dict_for_save_checkpoint( class Embedding (line 679) | class Embedding(torch.nn.Module): method __init__ (line 689) | def __init__( method forward (line 709) | def forward(self, input_ids, position_ids): method state_dict_for_save_checkpoint (line 717) | def state_dict_for_save_checkpoint(self, destination=None, prefix='', method load_state_dict (line 730) | def load_state_dict(self, state_dict, strict=True): class QueryEmbedding (line 759) | class QueryEmbedding(torch.nn.Module): method __init__ (line 769) | def __init__( method forward (line 786) | def forward(self, position_ids): method state_dict_for_save_checkpoint (line 792) | def state_dict_for_save_checkpoint(self, destination=None, prefix='', method load_state_dict (line 803) | def load_state_dict(self, state_dict, strict=True): class TransformerLanguageModel (line 819) | class TransformerLanguageModel(torch.nn.Module): method __init__ (line 835) | def __init__( method forward (line 868) | def forward( method state_dict_for_save_checkpoint (line 895) | def state_dict_for_save_checkpoint(self, destination=None, prefix='', method load_state_dict (line 912) | def load_state_dict(self, state_dict, strict=True): class CodeGeeXModel (line 948) | class CodeGeeXModel(torch.nn.Module): method __init__ (line 951) | def __init__( method forward (line 968) | def forward( method state_dict_for_save_checkpoint (line 997) | def state_dict_for_save_checkpoint(self, destination=None, prefix='', method load_state_dict (line 1006) | def load_state_dict(self, state_dict, strict=True): FILE: codegeex/torch/get_ckpt_qkv.py function main (line 9) | def main(): FILE: codegeex/torch/inference.py function get_ltor_masks_and_position_ids (line 12) | def get_ltor_masks_and_position_ids( function get_batch (line 67) | def get_batch( function top_k_logits (line 87) | def top_k_logits(logits, top_k=0, top_p=0.0, filter_value=-float("Inf")): function pad_batch (line 117) | def pad_batch(batch, pad_id, seq_length): function forward_step (line 127) | def forward_step( function get_token_stream (line 158) | def get_token_stream( function switch (line 212) | def switch(val1, val2, boolean): function sample_sequence_batch (line 217) | def sample_sequence_batch( FILE: deployment/server_gradio.py function model_provider (line 14) | def model_provider(args): function add_code_generation_args (line 27) | def add_code_generation_args(parser): function main (line 85) | def main(): FILE: tests/test_inference.py function model_provider (line 12) | def model_provider(args): function add_code_generation_args (line 26) | def add_code_generation_args(parser): function main (line 118) | def main(): FILE: tests/test_inference_megatron.py function set_random_seed (line 17) | def set_random_seed(seed): function model_provider (line 26) | def model_provider(pre_process=True, post_process=True): function add_code_generation_args (line 36) | def add_code_generation_args(parser): function main (line 136) | def main(): FILE: tests/test_inference_oneflow.py function model_provider (line 17) | def model_provider(args): function add_code_generation_args (line 31) | def add_code_generation_args(parser): function main (line 119) | def main(): FILE: tests/test_inference_paddle.py function model_provider (line 15) | def model_provider(args): function add_code_generation_args (line 41) | def add_code_generation_args(parser): function main (line 129) | def main():