SYMBOL INDEX (255 symbols across 21 files) FILE: calculon/command_line.py class CommandLine (line 20) | class CommandLine: method create_parser (line 26) | def create_parser(subparser): method run_command (line 34) | def run_command(logger, args): method register (line 45) | def register(cls): method command_lines (line 64) | def command_lines(): method all_names (line 68) | def all_names(): FILE: calculon/io.py class NpEncoder (line 22) | class NpEncoder(json.JSONEncoder): method default (line 23) | def default(self, obj): function is_json_extension (line 34) | def is_json_extension(filename): function write_json_file (line 38) | def write_json_file(jdata, filename): function read_json_file (line 46) | def read_json_file(filename): FILE: calculon/llm/all_executions.py class AllExecutions (line 34) | class AllExecutions(calculon.CommandLine): method create_parser (line 39) | def create_parser(subparser): method execution_fields (line 67) | def execution_fields(): method get_batch_size (line 77) | def get_batch_size(data_par, max_batch_size): method all_executions (line 88) | def all_executions(app, syst, num_procs, max_batch_size, datatype, fus... method run_command (line 134) | def run_command(logger, args): method search (line 190) | def search(app, syst, executions): method update_list (line 205) | def update_list(current, candidate, quantity): FILE: calculon/llm/layers.py class Layer (line 21) | class Layer: method __init__ (line 28) | def __init__(self, name, sys, fw_flops=0, agrad_flops=0, wgrad_flops=0, method get_stats_json (line 62) | def get_stats_json(self): method get_stats_str (line 120) | def get_stats_str(self): method set_bytes_per_element (line 149) | def set_bytes_per_element(self, bytes_per_element): method shard_optimizer (line 153) | def shard_optimizer(self, num_procs): method get_fw_flops (line 157) | def get_fw_flops(self): method get_fw_mem_accessed (line 160) | def get_fw_mem_accessed(self): method get_fw_arithmetic_intensity (line 165) | def get_fw_arithmetic_intensity(self): method get_recompute_flag (line 172) | def get_recompute_flag(self): method get_recomm_flag (line 175) | def get_recomm_flag(self): method reuses_activation (line 178) | def reuses_activation(self): method stores_activation (line 181) | def stores_activation(self): method stores_output (line 184) | def stores_output(self): method get_agrad_flops (line 187) | def get_agrad_flops(self): method get_agrad_mem_accessed (line 190) | def get_agrad_mem_accessed(self): method get_agrad_arithmetic_intensity (line 198) | def get_agrad_arithmetic_intensity(self): method get_wgrad_flops (line 205) | def get_wgrad_flops(self): method get_wgrad_mem_accessed (line 208) | def get_wgrad_mem_accessed(self): method get_wgrad_arithmetic_intensity (line 220) | def get_wgrad_arithmetic_intensity(self): method get_optim_step_flops (line 230) | def get_optim_step_flops(self): method get_optim_step_mem_accessed (line 234) | def get_optim_step_mem_accessed(self): method get_optim_step_arithmetic_intensity (line 237) | def get_optim_step_arithmetic_intensity(self): method get_weight (line 244) | def get_weight(self): method get_activation (line 247) | def get_activation(self): method get_output (line 250) | def get_output(self): method get_weight_grad (line 253) | def get_weight_grad(self, sharded=True): method get_activation_grad (line 265) | def get_activation_grad(self): method get_optimizer (line 268) | def get_optimizer(self): method set_processing_time (line 278) | def set_processing_time(self, processing_time): method get_processing_time (line 281) | def get_processing_time(self): method use_matrix_engine (line 284) | def use_matrix_engine(self): method get_comm_bytes (line 287) | def get_comm_bytes(self, stage, baseblock=True): method get_comm_tile (line 290) | def get_comm_tile(self, stage, baseblock=True): method compute_flops_time (line 293) | def compute_flops_time(self, stage): method compute_mem_time (line 310) | def compute_mem_time(self, stage): method compute_net_time (line 323) | def compute_net_time(self, stage, baseblock=True): method get_exposed_net_time (line 326) | def get_exposed_net_time(self, stage, baseblock=True): method get_required_bandwidth (line 329) | def get_required_bandwidth(self, stage, baseblock=True): method compute_processing_time (line 332) | def compute_processing_time(self, stage): class Linear (line 341) | class Linear(Layer): method __init__ (line 342) | def __init__(self, name, sys, batch_seq, c_in, c_out, method use_matrix_engine (line 363) | def use_matrix_engine(self): class LinearOverlapped (line 366) | class LinearOverlapped(Layer): method __init__ (line 367) | def __init__(self, name, sys, batch_seq, c_in, c_out, tensor_par_comm_... method use_matrix_engine (line 438) | def use_matrix_engine(self): method get_comm_bytes (line 441) | def get_comm_bytes(self, stage, baseblock=True): method get_comm_flops (line 479) | def get_comm_flops(self, stage, baseblock=True): method get_num_tiles (line 482) | def get_num_tiles(self): method get_comm_tile (line 485) | def get_comm_tile(self, stage, baseblock=True): method compute_net_time (line 488) | def compute_net_time(self, stage, baseblock=True): method compute_processing_time (line 549) | def compute_processing_time(self, stage): method get_exposed_net_time (line 594) | def get_exposed_net_time(self, stage, baseblock=True): method get_required_bandwidth (line 599) | def get_required_bandwidth(self, stage, baseblock=True): class BatchMatMul (line 607) | class BatchMatMul(Layer): method __init__ (line 608) | def __init__(self, name, sys, batch, size_a, contraction_size, size_b, method use_matrix_engine (line 625) | def use_matrix_engine(self): class LayerNorm (line 630) | class LayerNorm(Layer): method __init__ (line 631) | def __init__(self, name, sys, act_size, hidden, class DropOut (line 652) | class DropOut(Layer): method __init__ (line 653) | def __init__(self, name, sys, act_size, method get_activation (line 672) | def get_activation(self): method get_activation_grad (line 675) | def get_activation_grad(self): method get_fw_mem_accessed (line 678) | def get_fw_mem_accessed(self): method get_agrad_mem_accessed (line 685) | def get_agrad_mem_accessed(self): class GeLU (line 690) | class GeLU(Layer): method __init__ (line 691) | def __init__(self, name, sys, act_size, method get_agrad_mem_accessed (line 713) | def get_agrad_mem_accessed(self): class SoftMax (line 718) | class SoftMax(Layer): method __init__ (line 719) | def __init__(self, name, sys, act_size, method get_agrad_mem_accessed (line 735) | def get_agrad_mem_accessed(self): class ElementWise (line 740) | class ElementWise(Layer): method __init__ (line 741) | def __init__(self, name, sys, operand1, operand2, class Fork (line 760) | class Fork(Layer): method __init__ (line 761) | def __init__(self, name, sys, act_size, num_users, method get_fw_mem_accessed (line 779) | def get_fw_mem_accessed(self): method get_agrad_mem_accessed (line 782) | def get_agrad_mem_accessed(self): class TPComm (line 787) | class TPComm(Layer): method __init__ (line 789) | def __init__(self, name, sys, act_size, net_id, num_peers, tensor_par_... method get_activation (line 835) | def get_activation(self): method get_fw_mem_accessed (line 845) | def get_fw_mem_accessed(self): method get_activation_grad (line 852) | def get_activation_grad(self): method get_agrad_mem_accessed (line 862) | def get_agrad_mem_accessed(self): method get_comm_bytes (line 869) | def get_comm_bytes(self, stage, baseblock=True): method compute_net_time (line 890) | def compute_net_time(self, stage, baseblock=True): method get_exposed_net_time (line 935) | def get_exposed_net_time(self, stage, baseblock=True): method compute_processing_time (line 939) | def compute_processing_time(self, stage): FILE: calculon/llm/llm.py class Llm (line 22) | class Llm: class Application (line 31) | class Application: method __init__ (line 33) | def __init__(self, cfg): method num_parameters (line 42) | def num_parameters(self): class Execution (line 54) | class Execution: method fields (line 58) | def fields(): method from_json (line 68) | def from_json(cfg): method __init__ (line 73) | def __init__(self, num_procs, tensor_par, pipeline_par, data_par, method get_json (line 147) | def get_json(self): method get_peers_json (line 160) | def get_peers_json(self): class Error (line 202) | class Error(Exception): method _factors (line 206) | def _factors(x): method get_all_tensor_parallelisms (line 212) | def get_all_tensor_parallelisms(num_procs, hidden, attn_heads): method get_all_pipeline_parallelisms (line 218) | def get_all_pipeline_parallelisms(num_procs, tensor_par, num_blocks): method get_data_parallelism (line 227) | def get_data_parallelism(num_procs, tensor_par, pipeline_par): method get_valid_pipeline_interleavings (line 233) | def get_valid_pipeline_interleavings(num_blocks, pipeline_par): method get_valid_microbatch_sizes (line 242) | def get_valid_microbatch_sizes( method can_redo_ag (line 252) | def can_redo_ag(tensor_par_comm_type, activation_recompute): method __init__ (line 255) | def __init__(self, app, log): method get_stats_fields (line 417) | def get_stats_fields(): method get_stats_values (line 521) | def get_stats_values(self): method get_stats_json (line 626) | def get_stats_json(self, include_layers): method _build_attn_block (line 638) | def _build_attn_block(self): method _build_mlp_block (line 901) | def _build_mlp_block(self): method compile (line 1027) | def compile(self, sys, exe): method _check_network_assignments (line 1095) | def _check_network_assignments(self): method _compute_block_stats (line 1127) | def _compute_block_stats(self): method _compute_batch_stats (line 1448) | def _compute_batch_stats(self): method _check_mem_caps (line 1930) | def _check_mem_caps(self): method _misc_sanity_checks (line 1942) | def _misc_sanity_checks(self): method run (line 2011) | def run(self, sys): method _get_fw_offload_size (line 2021) | def _get_fw_offload_size(self): method _get_bw_offload_size (line 2035) | def _get_bw_offload_size(self): method get_fw_time (line 2049) | def get_fw_time(self): method get_fw_offload_time (line 2052) | def get_fw_offload_time(self): method get_fw_offload_overhead (line 2055) | def get_fw_offload_overhead(self): method get_bw_time (line 2061) | def get_bw_time(self): method get_optim_step_time (line 2064) | def get_optim_step_time(self): method get_bw_offload_time (line 2067) | def get_bw_offload_time(self): method get_bw_offload_overhead (line 2073) | def get_bw_offload_overhead(self): method get_recompute_time (line 2082) | def get_recompute_time(self): method get_recomm_exposed_time (line 2085) | def get_recomm_exposed_time(self): method get_recomm_link_time (line 2091) | def get_recomm_link_time(self): method get_bubble_time (line 2097) | def get_bubble_time(self): method get_tp_comm_exposed_time (line 2100) | def get_tp_comm_exposed_time(self): method get_pp_comm_exposed_time (line 2103) | def get_pp_comm_exposed_time(self): method get_dp_comm_exposed_time (line 2106) | def get_dp_comm_exposed_time(self): method get_tp_comm_link_time (line 2112) | def get_tp_comm_link_time(self): method get_pp_comm_link_time (line 2115) | def get_pp_comm_link_time(self): method get_dp_comm_link_time (line 2118) | def get_dp_comm_link_time(self): method get_dp_comm_net_time (line 2124) | def get_dp_comm_net_time(self): method get_total_time (line 2130) | def get_total_time(self): method get_useful_flops (line 2144) | def get_useful_flops(self): method get_compute_efficiency (line 2153) | def get_compute_efficiency(self): method get_system_efficiency (line 2161) | def get_system_efficiency(self): method get_total_efficiency (line 2166) | def get_total_efficiency(self): method get_weight_space_min (line 2172) | def get_weight_space_min(self): method get_weight_space (line 2175) | def get_weight_space(self): method get_act_space_min (line 2178) | def get_act_space_min(self): method get_act_space (line 2184) | def get_act_space(self): method get_act_checkpoint_size_min (line 2187) | def get_act_checkpoint_size_min(self): method get_act_checkpoint_size (line 2194) | def get_act_checkpoint_size(self): method get_weight_grad_space_min (line 2203) | def get_weight_grad_space_min(self): method get_weight_grad_space (line 2212) | def get_weight_grad_space(self): method get_act_grad_space_min (line 2218) | def get_act_grad_space_min(self): method get_act_grad_space (line 2221) | def get_act_grad_space(self): method get_optimizer_space_min (line 2229) | def get_optimizer_space_min(self): method get_optimizer_space (line 2235) | def get_optimizer_space(self): method _get_mem_cap_reqs (line 2241) | def _get_mem_cap_reqs(self): method get_mem_tier1_cap_req (line 2273) | def get_mem_tier1_cap_req(self): method get_mem_tier2_cap_req (line 2276) | def get_mem_tier2_cap_req(self): method get_act_offload_bw_req (line 2279) | def get_act_offload_bw_req(self): method get_weight_offload_bw_req (line 2292) | def get_weight_offload_bw_req(self): method get_optim_offload_bw_req (line 2301) | def get_optim_offload_bw_req(self): method get_offload_mem_bw_req (line 2316) | def get_offload_mem_bw_req(self): method get_sample_rate (line 2332) | def get_sample_rate(self): method display_stats (line 2335) | def display_stats(self): FILE: calculon/llm/optimal_execution.py class OptimalExecution (line 30) | class OptimalExecution(calculon.CommandLine): method create_parser (line 35) | def create_parser(subparser): method run_command (line 73) | def run_command(logger, args): method get_batch_size (line 165) | def get_batch_size(data_par, max_batch_size): method search (line 176) | def search(debug, top_n, layers, num_procs, max_batch_size, datatype, method update_list (line 260) | def update_list(current, candidate, quantity): FILE: calculon/llm/parameter_calculator.py class ParameterCalculator (line 23) | class ParameterCalculator(calculon.CommandLine): method create_parser (line 28) | def create_parser(subparser): method run_command (line 39) | def run_command(logger, args): FILE: calculon/llm/runner.py class Runner (line 21) | class Runner(calculon.CommandLine): method create_parser (line 26) | def create_parser(subparser): method run_command (line 44) | def run_command(logger, args): FILE: calculon/llm/validation.py class Validation (line 27) | class Validation(calculon.CommandLine): method create_parser (line 32) | def create_parser(subparser): method run_command (line 43) | def run_command(logger, args): method seqsel_fig1 (line 56) | def seqsel_fig1(logger, args): method seqsel_fig7 (line 184) | def seqsel_fig7(logger, args): method seqsel_tab5 (line 281) | def seqsel_tab5(logger, args): FILE: calculon/memory.py class Memory (line 18) | class Memory: method __init__ (line 21) | def __init__(self, cfg): method capacity (line 31) | def capacity(self): method bandwidth (line 35) | def bandwidth(self): method efficiency (line 38) | def efficiency(self, op_bytes): method throughput (line 44) | def throughput(self, op_bytes): FILE: calculon/network.py class Network (line 19) | class Network: class Op (line 27) | class Op: method __init__ (line 28) | def __init__(self, scalar, offset): method _parse_op (line 33) | def _parse_op(op, scalar, offset): method __init__ (line 43) | def __init__(self, cfg): method size (line 62) | def size(self): method must_be_filled (line 66) | def must_be_filled(self): method processor_usage (line 70) | def processor_usage(self): method time (line 73) | def time(self, op, op_size, comm_size): FILE: calculon/processor.py class Processor (line 18) | class Processor: method __init__ (line 21) | def __init__(self, cfg): method flops (line 37) | def flops(self, datatype): method efficiency (line 40) | def efficiency(self, datatype, op_flops): method throughput (line 46) | def throughput(self, datatype, op_flops): FILE: calculon/system.py class System (line 22) | class System: method supported_datatypes (line 33) | def supported_datatypes(): method __init__ (line 36) | def __init__(self, cfg): method num_networks (line 51) | def num_networks(self): method get_network (line 54) | def get_network(self, tier): method set_datatype (line 58) | def set_datatype(self, datatype): method get_matrix_throughput (line 62) | def get_matrix_throughput(self, flops): method get_vector_throughput (line 65) | def get_vector_throughput(self, flops): method get_mem1_throughput (line 68) | def get_mem1_throughput(self, size): method get_mem2_throughput (line 71) | def get_mem2_throughput(self, size): method compute_offload_time (line 74) | def compute_offload_time(self, size): method get_processing_time (line 77) | def get_processing_time(self, flops_time, mem_time): FILE: calculon/util.py function human_format (line 21) | def human_format(value, v_type='base10', precision=3): function pick (line 66) | def pick(en, a, b): function arg_true_false_all (line 72) | def arg_true_false_all(arg): FILE: calculon/version.py class Version (line 20) | class Version(calculon.CommandLine): method create_parser (line 25) | def create_parser(subparser): method run_command (line 31) | def run_command(logger, args): FILE: scripts/3dplot.py function main (line 12) | def main(args): FILE: scripts/find_huge.py function transformer_attn_size (line 10) | def transformer_attn_size(hidden, layers, attn_size_step=32): function transformer_num_parameters (line 13) | def transformer_num_parameters(hidden, layers, attn_size_step=32): function transformer_t_params (line 21) | def transformer_t_params(hidden, layers): function step_rounder (line 24) | def step_rounder(layer, step=1): function model_ratio (line 27) | def model_ratio(hidden, layers): function human_format (line 30) | def human_format(value, v_type='base10', precision=3): function ratio_layer_scale (line 75) | def ratio_layer_scale(hidden, ratio=128, step=4): function ratio_hidden_scale (line 77) | def ratio_hidden_scale(layers, ratio=128, step=4096): function ratio_param_layer_scale (line 79) | def ratio_param_layer_scale(layers, ratio=128, step=4096): function ratio_param_hidden_scale (line 82) | def ratio_param_hidden_scale(hidden, ratio=128, step=4): FILE: scripts/heatmap.py function main (line 13) | def main(args): FILE: scripts/json_to_csv.py function main (line 10) | def main(args): FILE: setup.py function find_version (line 30) | def find_version(*file_paths): FILE: test/test_json_write_read.py class JsonWriteReadTestCase (line 23) | class JsonWriteReadTestCase(unittest.TestCase): method test_json_read_write (line 24) | def test_json_read_write(self):