SYMBOL INDEX (275 symbols across 14 files) FILE: examples/gpt/gpt_piped.py class GPTModelPiped (line 19) | class GPTModelPiped(VeGiantModule): method __init__ (line 20) | def __init__(self): method _get_batch (line 75) | def _get_batch(self, data): method loss_fn (line 92) | def loss_fn(self, inputs, data): method batch_fn (line 115) | def batch_fn(self, batch, is_train:bool): class LMLogitsPiped (line 137) | class LMLogitsPiped(MegatronModule): method __init__ (line 138) | def __init__(self, hidden_size, vocab_size, init_method): method forward (line 144) | def forward(self, lm_output): class EmbeddingPiped (line 148) | class EmbeddingPiped(Embedding): method __init__ (line 149) | def __init__(self, method forward (line 164) | def forward(self, inputs): class ParallelTransformerLayerPiped (line 168) | class ParallelTransformerLayerPiped(ParallelTransformerLayer): method __init__ (line 169) | def __init__(self, method forward (line 179) | def forward(self, inputs): FILE: examples/gpt/initialize.py function get_learning_rate_scheduler (line 15) | def get_learning_rate_scheduler(optimizer, lr_scheduler_builder): function get_model (line 45) | def get_model(model_provider_func): function get_optimizer (line 63) | def get_optimizer(model): function setup_model_and_optimizer (line 107) | def setup_model_and_optimizer(model, optimizer, train_dataset_provider, ... function initialize_pipeline (line 155) | def initialize_pipeline(model, optimizer, train_dataset_provider, lr_sch... function initialize_distributed (line 159) | def initialize_distributed(num_stages, mp_size, distributed_backend='ncc... function initialize_megatron (line 162) | def initialize_megatron(extra_args_provider=None, args_defaults={}): FILE: examples/gpt/pretrain_gpt2.py function _build_index_mappings (line 26) | def _build_index_mappings(name, data_prefix, documents, sizes, class GPT2DatasetFixed (line 130) | class GPT2DatasetFixed(torch.utils.data.Dataset): method __init__ (line 131) | def __init__(self, name, data_prefix, documents, indexed_dataset, method __len__ (line 146) | def __len__(self): method __getitem__ (line 151) | def __getitem__(self, idx): function build_train_valid_test_datasets (line 181) | def build_train_valid_test_datasets(data_prefix, data_impl, splits_string, function model_provider (line 223) | def model_provider(): function lr_scheduler_builder (line 230) | def lr_scheduler_builder(optimizer): function pretrain (line 257) | def pretrain(model_provider, args_defaults={}): function traing_log (line 273) | def traing_log(loss_dict, iteration): function train_valid_test_dataset_provider (line 301) | def train_valid_test_dataset_provider(train_val_test_num_samples): function train (line 319) | def train(engine, optimizer, lr_scheduler): FILE: src/veGiantModel/__init__.py function initialize (line 17) | def initialize(args, FILE: src/veGiantModel/distributed/__init__.py function get_model_parallel_world_size (line 4) | def get_model_parallel_world_size(): function get_model_parallel_rank (line 7) | def get_model_parallel_rank(): function get_data_parallel_world_size (line 10) | def get_data_parallel_world_size(): function get_model_parallel_group (line 13) | def get_model_parallel_group(): function get_grid (line 16) | def get_grid(): function copy_to_model_parallel_region (line 19) | def copy_to_model_parallel_region(input_): function reduce_from_model_parallel_region (line 22) | def reduce_from_model_parallel_region(input_): function gather_from_model_parallel_region (line 25) | def gather_from_model_parallel_region(input_): FILE: src/veGiantModel/engine/engine.py function is_even (line 44) | def is_even(number): function _tensor_bytes (line 57) | def _tensor_bytes(tensor): function _dtype_to_code (line 60) | def _dtype_to_code(dtype): function _code_to_dtype (line 76) | def _code_to_dtype(code): class VeGiantModelEngine (line 92) | class VeGiantModelEngine(PipelineEngine): method overwrite (line 98) | def overwrite(self, config_params, args): method __init__ (line 117) | def __init__(self, args, method _profiling_func_exit (line 315) | def _profiling_func_exit(self): method _profiling_func_enter (line 318) | def _profiling_func_enter(self, func): method _build_data_iter (line 321) | def _build_data_iter(self, dataset): method _exec_reduce_tied_grads (line 335) | def _exec_reduce_tied_grads(self): method _exec_reduce_grads (line 340) | def _exec_reduce_grads(self): method _reserve_pipe_buffers (line 350) | def _reserve_pipe_buffers(self, num_buffers): method train_batch (line 366) | def train_batch(self, data_iter=None): method eval_batch (line 459) | def eval_batch(self, data_iter): method is_first_stage (line 538) | def is_first_stage(self): method is_last_stage (line 542) | def is_last_stage(self): method _aggregate_metric (line 546) | def _aggregate_metric(self): method _aggregate_total_loss (line 574) | def _aggregate_total_loss(self): method set_dataloader (line 613) | def set_dataloader(self, loader): method set_dataiterator (line 619) | def set_dataiterator(self, iterator): method set_batch_fn (line 625) | def set_batch_fn(self, fn): method is_gradient_accumulation_boundary (line 630) | def is_gradient_accumulation_boundary(self): method tput_log (line 642) | def tput_log(self, *msg): method _next_batch (line 646) | def _next_batch(self): method _exec_bps_forward_pass (line 679) | def _exec_bps_forward_pass(self, buffer_id): method _exec_bps_backward_pass (line 730) | def _exec_bps_backward_pass(self, buffer_id): method _exec_load_micro_batch (line 787) | def _exec_load_micro_batch(self, buffer_id): method _send_tensor_meta (line 838) | def _send_tensor_meta(self, buffer, recv_stage): method _recv_tensor_meta (line 908) | def _recv_tensor_meta(self, send_stage): method _mp_slice (line 971) | def _mp_slice(self, x): method _mp_view (line 975) | def _mp_view(self, x, rank): method _exec_bps_send_partitioned_activations (line 979) | def _exec_bps_send_partitioned_activations(self, buffer_id): method _exec_bps_send_activations (line 1010) | def _exec_bps_send_activations(self, buffer_id): method _exec_bps_send_grads (line 1042) | def _exec_bps_send_grads(self, buffer_id): method _exec_bps_send_partitioned_grads (line 1084) | def _exec_bps_send_partitioned_grads(self, buffer_id): method _exec_bps_sync_all (line 1126) | def _exec_bps_sync_all(self): method _exec_bps_sync_partitioned_grads (line 1129) | def _exec_bps_sync_partitioned_grads(self, buffer_id): method _exec_bps_sync_grads (line 1154) | def _exec_bps_sync_grads(self, buffer_id): method _exec_bps_sync_partitioned_activations (line 1175) | def _exec_bps_sync_partitioned_activations(self, buffer_id): method _exec_bps_sync_activations (line 1208) | def _exec_bps_sync_activations(self, buffer_id): method _exec_bps_recv_partitioned_activations (line 1238) | def _exec_bps_recv_partitioned_activations(self, buffer_id): method _exec_bps_recv_activations (line 1273) | def _exec_bps_recv_activations(self, buffer_id): method _exec_bps_recv_partitioned_grads (line 1307) | def _exec_bps_recv_partitioned_grads(self, buffer_id): method _exec_bps_recv_grads (line 1344) | def _exec_bps_recv_grads(self, buffer_id): method _exec_optimizer_step (line 1380) | def _exec_optimizer_step(self, lr_kwargs=None): method _zero_grads (line 1446) | def _zero_grads(self, inputs): method _allocate_zeros (line 1455) | def _allocate_zeros(self, shape, fp16=None, **kwargs): method _allocate_zeros2 (line 1475) | def _allocate_zeros2(self, shape, dtype, **kwargs): method _allocate_buffer (line 1478) | def _allocate_buffer(self, shape, num_buffers=-1, **kwargs): method _allocate_buffer2 (line 1486) | def _allocate_buffer2(self, shape, dtype, num_buffers=-1, **kwargs): method _allocate_buffers (line 1494) | def _allocate_buffers(self, shapes, requires_grad=False, num_buffers=-1): method _allocate_buffers2 (line 1505) | def _allocate_buffers2(self, shapes, dtypes, requires_grad=False, num_... method forward (line 1516) | def forward(self, *args, **kwargs): method backward (line 1520) | def backward(self, *args, **kwargs): method step (line 1524) | def step(self, *args, **kwargs): method _exec_schedule (line 1546) | def _exec_schedule(self, pipe_schedule): FILE: src/veGiantModel/engine/module.py class VeGiantModule (line 21) | class VeGiantModule(PipelineModule): method __init__ (line 22) | def __init__(self, method _build (line 134) | def _build(self): method _count_layer_params (line 188) | def _count_layer_params(self): method _find_layer_type (line 207) | def _find_layer_type(self, layername): method forward (line 229) | def forward(self, forward_input): method _partition_uniform (line 285) | def _partition_uniform(self, num_items, num_parts): method _partition_balanced (line 298) | def _partition_balanced(self, weights, num_parts, eps=1e-3): method _partition_layers (line 315) | def _partition_layers(self, method='uniform'): method allreduce_tied_weight_gradients (line 379) | def allreduce_tied_weight_gradients(self): method _synchronize_tied_weights (line 385) | def _synchronize_tied_weights(self): method _index_tied_modules (line 394) | def _index_tied_modules(self): method partitions (line 450) | def partitions(self): method stage_owner (line 453) | def stage_owner(self, layer_idx): method _set_bounds (line 460) | def _set_bounds(self, start=None, stop=None): method set_checkpoint_interval (line 470) | def set_checkpoint_interval(self, interval): method topology (line 474) | def topology(self): method mpu (line 478) | def mpu(self): method num_pipeline_stages (line 481) | def num_pipeline_stages(self): method ckpt_prefix (line 484) | def ckpt_prefix(self, checkpoints_path, tag): method ckpt_layer_path (line 500) | def ckpt_layer_path(self, ckpt_dir, local_layer_idx): method save_state_dict (line 510) | def save_state_dict(self, save_dir): method load_state_dir (line 522) | def load_state_dir(self, load_dir, strict=True): method _is_checkpointable (line 543) | def _is_checkpointable(self, funcs): FILE: src/veGiantModel/engine/p2p.py function init_process_groups (line 40) | def init_process_groups(grid): function _is_valid_send_recv (line 49) | def _is_valid_send_recv(src_stage, dest_stage): function send (line 58) | def send(tensor, dest_stage, async_op=False): function _bps_get_name (line 73) | def _bps_get_name(src, dest, name, suffix): function bps_send (line 76) | def bps_send(tensor, dest_stage, name, index, async_op=True): function bps_sync (line 99) | def bps_sync(src_stage, name, index=0): function bps_sync_all (line 110) | def bps_sync_all(): function bps_recv (line 121) | def bps_recv(tensor, src_stage, name, index=0, async_op=True): function _send (line 144) | def _send(tensor, src_rank, group, async_op): function send_grads (line 148) | def send_grads(tensor, grid, async_op=False): function _recv (line 158) | def _recv(tensor, src_rank, group, async_op): function recv_grads (line 164) | def recv_grads(tensor, grid, async_op=False): function send_activations (line 171) | def send_activations(tensor, grid, async_op=False): function recv_activations (line 181) | def recv_activations(tensor, grid, async_op=False): function recv (line 187) | def recv(tensor, src_stage, async_op=False): function barrier (line 200) | def barrier(stage_id): function _get_send_recv_group (line 211) | def _get_send_recv_group(src_stage, dest_stage): FILE: src/veGiantModel/engine/schedule.py class BytePSInferenceSchedule (line 12) | class BytePSInferenceSchedule(PipeSchedule): method __init__ (line 15) | def __init__(self, micro_batches, stages, stage_id, prefetch=True): method steps (line 19) | def steps(self): method num_pipe_buffers (line 69) | def num_pipe_buffers(self): class BytePSTrainSchedule (line 81) | class BytePSTrainSchedule(TrainSchedule): method __init__ (line 88) | def __init__(self, micro_batches, stages, stage_id, prefetch=True): method steps (line 94) | def steps(self): method _steps (line 100) | def _steps(self): method _steps_no_prefetch (line 184) | def _steps_no_prefetch(self): method num_pipe_buffers (line 231) | def num_pipe_buffers(self): class BytePSSendActivation (line 240) | class BytePSSendActivation(BufferOpInstruction): class BytePSRecvActivation (line 243) | class BytePSRecvActivation(BufferOpInstruction): class BytePSSyncActivation (line 246) | class BytePSSyncActivation(BufferOpInstruction): class BytePSSyncGrad (line 249) | class BytePSSyncGrad(BufferOpInstruction): class BytePSSendGrad (line 252) | class BytePSSendGrad(BufferOpInstruction): class BytePSRecvGrad (line 255) | class BytePSRecvGrad(BufferOpInstruction): class BytePSForwardPass (line 258) | class BytePSForwardPass(BufferOpInstruction): class BytePSBackwardPass (line 261) | class BytePSBackwardPass(BufferOpInstruction): class BytePSSyncAll (line 264) | class BytePSSyncAll(PipeInstruction): FILE: src/veGiantModel/engine/topology.py class ProcessTopology (line 14) | class ProcessTopology: method __init__ (line 27) | def __init__(self, axes, dims): method get_rank (line 50) | def get_rank(self, **coord_kwargs): method get_axis_names (line 66) | def get_axis_names(self): method get_rank_repr (line 70) | def get_rank_repr(self, method get_dim (line 104) | def get_dim(self, axis): method get_coord (line 116) | def get_coord(self, rank): method get_axis_comm_lists (line 133) | def get_axis_comm_lists(self, axis): method filter_match (line 173) | def filter_match(self, **filter_kwargs): method get_axis_list (line 198) | def get_axis_list(self, axis, idx): method world_size (line 215) | def world_size(self): method __str__ (line 218) | def __str__(self): function _prime_factors (line 222) | def _prime_factors(N): class PipeDataParallelTopology (line 237) | class PipeDataParallelTopology(ProcessTopology): method __init__ (line 244) | def __init__(self, num_pp, num_dp): class PipeModelDataParallelTopology (line 248) | class PipeModelDataParallelTopology(ProcessTopology): method __init__ (line 250) | def __init__(self, num_dp, num_pp, num_mp): class PipelineParallelGrid (line 255) | class PipelineParallelGrid: method __init__ (line 277) | def __init__(self, topology=None, process_group=None): method get_stage_id (line 388) | def get_stage_id(self): method get_data_parallel_id (line 391) | def get_data_parallel_id(self): method get_model_parallel_id (line 394) | def get_model_parallel_id(self): method get_src_parallel_src_id (line 399) | def get_src_parallel_src_id(self): method _build_p2p_groups (line 406) | def _build_p2p_groups(self): method _build_grads_groups (line 430) | def _build_grads_groups(self): method _build_activation_groups (line 471) | def _build_activation_groups(self): method _is_grid_valid (line 510) | def _is_grid_valid(self): method stage_to_global (line 518) | def stage_to_global(self, stage_id, **kwargs): method stage_to_byteps (line 524) | def stage_to_byteps(self, stage_id): method topology (line 527) | def topology(self): method get_global_rank (line 531) | def get_global_rank(self): method get_pipe_parallel_rank (line 534) | def get_pipe_parallel_rank(self): method get_pipe_parallel_world_size (line 538) | def get_pipe_parallel_world_size(self): method get_pipe_parallel_group (line 542) | def get_pipe_parallel_group(self): method get_data_parallel_rank (line 546) | def get_data_parallel_rank(self): method get_data_parallel_world_size (line 550) | def get_data_parallel_world_size(self): method get_data_parallel_group (line 554) | def get_data_parallel_group(self): method get_model_parallel_rank (line 560) | def get_model_parallel_rank(self): method get_model_parallel_world_size (line 563) | def get_model_parallel_world_size(self): method get_model_parallel_group (line 566) | def get_model_parallel_group(self): method get_slice_parallel_rank (line 570) | def get_slice_parallel_rank(self): method get_slice_parallel_world_size (line 573) | def get_slice_parallel_world_size(self): method get_slice_parallel_group (line 576) | def get_slice_parallel_group(self): method get_slice_parallel_src_rank (line 579) | def get_slice_parallel_src_rank(self): FILE: src/veGiantModel/initialize.py function add_byte_giant_model_customize_args (line 17) | def add_byte_giant_model_customize_args(parser): function initialize_megatron (line 53) | def initialize_megatron(extra_args_provider=None, args_defaults={}): function _init_topology (line 59) | def _init_topology(num_stages, mp_size): function _set_random_seed (line 70) | def _set_random_seed(seed): function init_distribute (line 81) | def init_distribute(num_stages, mp_size, FILE: src/veGiantModel/launcher/launch.py class PropagatingThread (line 13) | class PropagatingThread(threading.Thread): method run (line 18) | def run(self): method join (line 31) | def join(self): function launch_scheduler (line 37) | def launch_scheduler(local_rank): function get_worker0_host (line 65) | def get_worker0_host(): function get_worker0_port (line 69) | def get_worker0_port(): function setup_env (line 73) | def setup_env(local_rank): function launch_bps (line 114) | def launch_bps(local_rank): FILE: src/veGiantModel/module/dense.py class MockModule (line 25) | class MockModule(nn.Module): class LinearFunction (line 32) | class LinearFunction(autograd.Function): method forward (line 35) | def forward(ctx, input_tensor, weight, bias, act_gelu=False, dropout_r... method backward (line 48) | def backward(ctx, grad_out): class FTLinear (line 60) | class FTLinear(nn.Module): method __init__ (line 61) | def __init__(self, in_features, out_features, initializer_range=0.02, ... method forward (line 74) | def forward(self, input_tensor): method extra_repr (line 77) | def extra_repr(self): class LinearTransposeFunction (line 86) | class LinearTransposeFunction(autograd.Function): method forward (line 88) | def forward(ctx, input_tensor, weight, bias, head_num, transpose_type): method backward (line 96) | def backward(ctx, grad_out): class FTLinearTranspose (line 101) | class FTLinearTranspose(nn.Module): method __init__ (line 102) | def __init__(self, in_features, out_features, head_num, transpose_type... method forward (line 115) | def forward(self, input_tensor): method extra_repr (line 118) | def extra_repr(self): function column_parallel_load_hook (line 125) | def column_parallel_load_hook(module, log_fn): function column_serial_load_hook (line 165) | def column_serial_load_hook(module, log_fn): class ColumnSerialLinear (line 211) | class ColumnSerialLinear(MockModule): method __init__ (line 212) | def __init__(self, in_features, out_features, initializer_range=0.02, method forward (line 243) | def forward(self, input_tensor): method extra_repr (line 255) | def extra_repr(self): class ColumnParallelLinear (line 258) | class ColumnParallelLinear(nn.Module): method __init__ (line 259) | def __init__(self, in_features, out_features, initializer_range=0.02, method forward (line 309) | def forward(self, input_tensor): method extra_repr (line 321) | def extra_repr(self): class RowSerialLinear (line 324) | class RowSerialLinear(MockModule): method __init__ (line 325) | def __init__(self, in_features, out_features, initializer_range=0.02, ... method forward (line 365) | def forward(self, input_tensor): method extra_repr (line 381) | def extra_repr(self): class RowParallelLinear (line 384) | class RowParallelLinear(nn.Module): method __init__ (line 385) | def __init__(self, in_features, out_features, initializer_range=0.02, ... method forward (line 438) | def forward(self, input_tensor): method extra_repr (line 450) | def extra_repr(self): class ColumnParallelLinearTranspose (line 454) | class ColumnParallelLinearTranspose(nn.Module): method __init__ (line 455) | def __init__(self, in_features, out_features, head_num, transpose_type... method forward (line 497) | def forward(self, input_tensor): method extra_repr (line 511) | def extra_repr(self): class ColumnSerialLinearTranspose (line 514) | class ColumnSerialLinearTranspose(MockModule): method __init__ (line 515) | def __init__(self, in_features, out_features, head_num, transpose_type... method forward (line 547) | def forward(self, input_tensor): method extra_repr (line 562) | def extra_repr(self): FILE: src/veGiantModel/patcher.py function is_unitialized (line 9) | def is_unitialized(): function initialize_model_parallel (line 14) | def initialize_model_parallel(grid): function model_parallel_is_initialized (line 21) | def model_parallel_is_initialized(): function get_model_parallel_group (line 28) | def get_model_parallel_group(): function get_data_parallel_group (line 35) | def get_data_parallel_group(): function set_model_parallel_world_size (line 42) | def set_model_parallel_world_size(world_size): function get_model_parallel_world_size (line 46) | def get_model_parallel_world_size(): function set_model_parallel_rank (line 51) | def set_model_parallel_rank(rank): function get_model_parallel_rank (line 55) | def get_model_parallel_rank(): function get_model_parallel_src_rank (line 60) | def get_model_parallel_src_rank(): function get_data_parallel_world_size (line 64) | def get_data_parallel_world_size(): function get_data_parallel_rank (line 69) | def get_data_parallel_rank(): function get_pipe_parallel_rank (line 73) | def get_pipe_parallel_rank(): function destroy_model_parallel (line 76) | def destroy_model_parallel(): function get_grid (line 81) | def get_grid(): function get_topo (line 84) | def get_topo(): function _gather (line 113) | def _gather(input_): function build_tokenizer (line 140) | def build_tokenizer(args):