SYMBOL INDEX (182 symbols across 28 files) FILE: ae/figure10/plot_latency.py function get_total_decoding_latency (line 19) | def get_total_decoding_latency(df: pd.DataFrame, start, end): function get_intensity (line 69) | def get_intensity(color): FILE: ae/figure10/test_latency.py function simulate_decoding_latency (line 35) | def simulate_decoding_latency(system, bs, seq_len, name, lock): function simulate_prefill_latency (line 54) | def simulate_prefill_latency(system, bs, seq_len, name, lock): FILE: ae/figure11/test_decoding.py function simulate_latency (line 27) | def simulate_latency(system, bs, seq_len, name, lock): FILE: ae/figure12/plot_throughput.py function get_total_decoding_latency (line 33) | def get_total_decoding_latency(df: pd.DataFrame, start, end): function get_intensity (line 125) | def get_intensity(color): FILE: ae/figure12/test_throughput.py function simulate_decoding_latency (line 34) | def simulate_decoding_latency(system, bs, seq_len, name, lock, heuristics): function simulate_prefill_latency (line 55) | def simulate_prefill_latency(system, bs, seq_len, name, lock, heuristics): FILE: ae/figure5/ijkl/plot_transformer.py function read_csv (line 6) | def read_csv(filename: str): FILE: ae/figure7/change_core_size.py function test_core_size (line 38) | def test_core_size(core_configs, lock): FILE: ae/figure8/change_memory_bw.py function test_memory_bandwidth (line 54) | def test_memory_bandwidth(memory_bandwidth, lock): FILE: ae/figure9/change_l1_cache.py function test_SRAM_KB (line 38) | def test_SRAM_KB(SRAM_KB, lock): FILE: cost_model/cost_model.py function calc_systolic_array_area_mm2 (line 91) | def calc_systolic_array_area_mm2(dimension_x, dimension_y, bitwidth, tra... function calc_vector_area_mm2 (line 102) | def calc_vector_area_mm2(int32_count, fp16_count, fp32_count, fp64_count... function calc_cache_sram_area_mm2 (line 114) | def calc_cache_sram_area_mm2(capacity_bytes, sram_bitcell_area_mm2, max_... function calc_reg_file_area (line 137) | def calc_reg_file_area(num_reg_files, D, W, P, transistor_density_mil_mm2): function calc_mem_controller_area_mm2 (line 150) | def calc_mem_controller_area_mm2(mem_tech, width, transistor_density_mil... function calc_mem_phy_area_mm2 (line 170) | def calc_mem_phy_area_mm2(mem_tech, width): function find_logic_sram_transistor_density (line 188) | def find_logic_sram_transistor_density(process_node): function calc_compute_chiplet_area_mm2 (line 205) | def calc_compute_chiplet_area_mm2(configs_dict, verbose=False): function calc_io_die_area_mm2 (line 272) | def calc_io_die_area_mm2(config_dict, verbose=False): FILE: cost_model/regfile_area.py function calculate_regfile_area (line 1) | def calculate_regfile_area(D, W, P): FILE: design_space_exploration/dse.py function read_architecture_template (line 22) | def read_architecture_template(file_path): function template_to_system (line 28) | def template_to_system(arch_specs): function test_template_to_system (line 110) | def test_template_to_system(): function find_cheapest_design (line 125) | def find_cheapest_design( FILE: hardware_model/arch_template.py class ArchitectureTemplate (line 1) | class ArchitectureTemplate: method __init__ (line 2) | def __init__(self, FILE: hardware_model/compute_module.py class VectorUnit (line 5) | class VectorUnit: method __init__ (line 6) | def __init__( class SystolicArray (line 34) | class SystolicArray: method __init__ (line 35) | def __init__( class Core (line 59) | class Core: method __init__ (line 60) | def __init__( class Overhead (line 103) | class Overhead: method __init__ (line 104) | def __init__(self, matmul, softmax, layernorm, gelu): class ComputeModule (line 118) | class ComputeModule: method __init__ (line 119) | def __init__( FILE: hardware_model/device.py class Device (line 6) | class Device: method __init__ (line 7) | def __init__( FILE: hardware_model/interconnect.py class TopologyType (line 5) | class TopologyType(Enum): class LinkModule (line 10) | class LinkModule: method __init__ (line 11) | def __init__( class InterConnectModule (line 35) | class InterConnectModule: method __init__ (line 36) | def __init__( FILE: hardware_model/io_module.py class IOModule (line 1) | class IOModule: method __init__ (line 2) | def __init__(self, bandwidth, latency): FILE: hardware_model/memory_module.py class MemoryModule (line 1) | class MemoryModule: method __init__ (line 2) | def __init__(self, memory_capacity): FILE: hardware_model/system.py class System (line 6) | class System: method __init__ (line 7) | def __init__(self, pcb_module: Device, interconnect: InterConnectModul... FILE: software_model/communication_primitives.py class CommunicationPrimitive (line 14) | class CommunicationPrimitive: method __init__ (line 15) | def __init__(self, data_type: DataType) -> None: class AllReduceMultiPCB (line 21) | class AllReduceMultiPCB(CommunicationPrimitive): method __init__ (line 22) | def __init__(self, data_type: DataType) -> None: method __call__ (line 25) | def __call__(self, tensor: Tensor) -> Any: method simulate (line 30) | def simulate(self, interconnect_module: InterConnectModule) -> None: class Broadcast (line 111) | class Broadcast: method __init__ (line 112) | def __init__(self): method __call__ (line 116) | def __call__(self, src: int, tensor: Tensor): FILE: software_model/gelu.py function gelu_gpu (line 14) | def gelu_gpu(input: torch.Tensor) -> torch.Tensor: class GeLU (line 19) | class GeLU(Operator): method __init__ (line 20) | def __init__(self, data_type: DataType): method __call__ (line 24) | def __call__(self, input: Tensor) -> Tensor: method roofline_model (line 31) | def roofline_model(self, pcb_module: Device): method print_latency (line 55) | def print_latency(self): class ComputationalGraph (line 58) | class ComputationalGraph: method __init__ (line 59) | def __init__(self, M: int, data_type: DataType): method compile_and_simulate (line 63) | def compile_and_simulate(self, pcb_module: Device, compile_mode: str): method run_on_gpu (line 93) | def run_on_gpu(self): method gpu_kernel_launch_overhead (line 114) | def gpu_kernel_launch_overhead(): FILE: software_model/layernorm.py function layernorm_gpu (line 14) | def layernorm_gpu(input: torch.Tensor) -> torch.Tensor: class LayerNorm (line 18) | class LayerNorm(Operator): method __init__ (line 19) | def __init__(self, data_type: DataType): method __call__ (line 23) | def __call__(self, input: Tensor) -> Tensor: method roofline_model (line 33) | def roofline_model(self, pcb_module: Device): method print_latency (line 47) | def print_latency(self): class ComputationalGraph (line 50) | class ComputationalGraph: method __init__ (line 51) | def __init__(self, M: int, N: int, data_type: DataType): class Mapping (line 56) | class Mapping: method __init__ (line 57) | def __init__( method display (line 69) | def display(self): method compile_and_simulate (line 75) | def compile_and_simulate(self, pcb_module: Device, compile_mode: str): method simulate (line 128) | def simulate( class L2TileSimulator (line 169) | class L2TileSimulator: method __init__ (line 170) | def __init__( method simulate_l2_tile_io_cycle_count (line 190) | def simulate_l2_tile_io_cycle_count( method simulate_l2_tile_compute_cycle_count (line 203) | def simulate_l2_tile_compute_cycle_count( class L1TileSimulator (line 235) | class L1TileSimulator: method __init__ (line 236) | def __init__( method simulate_l1_tile_io_cycle_count (line 269) | def simulate_l1_tile_io_cycle_count( method simulate_l1_tile_compute_cycle_count (line 279) | def simulate_l1_tile_compute_cycle_count( method run_on_gpu (line 332) | def run_on_gpu(self): method gpu_kernel_launch_overhead (line 357) | def gpu_kernel_launch_overhead(): FILE: software_model/matmul.py class BatchedMatmul (line 17) | class BatchedMatmul(Operator): method __init__ (line 18) | def __init__(self, data_type: DataType): method __call__ (line 24) | def __call__(self, input1: Tensor, input2: Tensor) -> Tensor: method roofline_model (line 40) | def roofline_model(self, pcb_module: Device): method compile_and_simulate (line 57) | def compile_and_simulate(self, pcb_module: Device, compile_mode: str): method run_on_gpu (line 79) | def run_on_gpu( method gpu_kernel_launch_overhead (line 105) | def gpu_kernel_launch_overhead(): class Matmul (line 122) | class Matmul(Operator): method __init__ (line 123) | def __init__(self, data_type: DataType): method __call__ (line 131) | def __call__(self, input1: Tensor, input2: Tensor) -> Tensor: method roofline_model (line 154) | def roofline_model(self, pcb_module: Device): method print_latency (line 166) | def print_latency(self): method generate_tile_loops (line 173) | def generate_tile_loops(loop_M: int, loop_N: int, loop_K: int, loop_or... class ComputationalGraph (line 206) | class ComputationalGraph: method __init__ (line 207) | def __init__(self, M: int, N: int, K: int, data_type: DataType): method display (line 213) | def display(self): class Mapping (line 219) | class Mapping: method __init__ (line 220) | def __init__( method display (line 250) | def display(self): method find_permutations (line 263) | def find_permutations(n): method compile_and_simulate (line 275) | def compile_and_simulate( method simulate (line 742) | def simulate( class L2TileSimulator (line 972) | class L2TileSimulator: method __init__ (line 973) | def __init__( method simulate_l2_tile_io_cycle_count (line 1009) | def simulate_l2_tile_io_cycle_count( method simulate_l2_tile_compute_cycle_count (line 1022) | def simulate_l2_tile_compute_cycle_count( class L1TileSimulator (line 1293) | class L1TileSimulator: method __init__ (line 1294) | def __init__( method simulate_l1_tile_compute_cycle_count (line 1312) | def simulate_l1_tile_compute_cycle_count( method simulate_systolic_array_cycle_count (line 1357) | def simulate_systolic_array_cycle_count( method run_on_gpu (line 1479) | def run_on_gpu( method gpu_kernel_launch_overhead (line 1528) | def gpu_kernel_launch_overhead(): FILE: software_model/operators.py class Operator (line 7) | class Operator: method __init__ (line 8) | def __init__( class mapping (line 35) | class mapping: class Reshape (line 42) | class Reshape(Operator): method __init__ (line 43) | def __init__(self, data_type: DataType): method __call__ (line 48) | def __call__(self, input: Tensor, output_shape: List[int]) -> Tensor: class Concat (line 61) | class Concat(Operator): method __init__ (line 62) | def __init__(self, data_type: DataType): method __call__ (line 69) | def __call__(self, input1: Tensor, input2: Tensor, concat_dim: int) ->... class Transpose (line 91) | class Transpose(Operator): method __init__ (line 92) | def __init__(self, data_type: DataType): method __call__ (line 97) | def __call__(self, input: Tensor, permute: List[int]) -> Tensor: FILE: software_model/softmax.py class Softmax (line 13) | class Softmax(Operator): method __init__ (line 14) | def __init__(self, data_type: DataType): method __call__ (line 18) | def __call__(self, input: Tensor) -> Tensor: method print_latency (line 28) | def print_latency(self): class ComputationalGraph (line 31) | class ComputationalGraph: method __init__ (line 32) | def __init__(self, M: int, N: int, data_type: DataType): class Mapping (line 37) | class Mapping: method __init__ (line 38) | def __init__( method display (line 54) | def display(self): method roofline_model (line 60) | def roofline_model(self, pcb_module: Device): method compile_and_simulate (line 66) | def compile_and_simulate(self, pcb_module: Device, compile_mode=None): method simulate (line 116) | def simulate( class L2TileSimulator (line 167) | class L2TileSimulator: method __init__ (line 168) | def __init__( method simulate_l2_tile_io_cycle_count (line 188) | def simulate_l2_tile_io_cycle_count( method simulate_l2_tile_compute_cycle_count (line 201) | def simulate_l2_tile_compute_cycle_count( class L1TileSimulator (line 234) | class L1TileSimulator: method __init__ (line 235) | def __init__( method simulate_l1_tile_io_cycle_count (line 269) | def simulate_l1_tile_io_cycle_count( method simulate_l1_tile_compute_cycle_count (line 279) | def simulate_l1_tile_compute_cycle_count( method run_on_gpu (line 294) | def run_on_gpu(self): method gpu_kernel_launch_overhead (line 313) | def gpu_kernel_launch_overhead(): FILE: software_model/transformer.py class TransformerBlockInitComputationTP (line 20) | class TransformerBlockInitComputationTP(Operator): method __init__ (line 21) | def __init__(self, d_model, n_heads, device_count, data_type: DataType): method __call__ (line 60) | def __call__(self, X: Tensor) -> Tensor: method roofline_model (line 114) | def roofline_model(self, system: System): method compile_and_simulate (line 194) | def compile_and_simulate(self, system: System, compile_mode: str): method run_on_gpu (line 286) | def run_on_gpu(self): class TransformerBlockAutoRegressionTP (line 355) | class TransformerBlockAutoRegressionTP(Operator): method __init__ (line 356) | def __init__(self, d_model, n_heads, device_count, data_type: DataType): method __call__ (line 397) | def __call__(self, x: Tensor, seq_len: int) -> Tensor: method roofline_model (line 470) | def roofline_model(self, system: System): method compile_and_simulate (line 551) | def compile_and_simulate(self, system: System, compile_mode: str): method run_on_gpu (line 642) | def run_on_gpu(self): class LLMInitComputationTP (line 712) | class LLMInitComputationTP: method __init__ (line 713) | def __init__( FILE: software_model/utils.py class DataType (line 5) | class DataType: method __init__ (line 6) | def __init__(self, name: str, word_size: int) -> None: class Tensor (line 12) | class Tensor: method __init__ (line 13) | def __init__( FILE: utils.py function size_of_list (line 3) | def size_of_list(list: List): function size (line 9) | def size(list): function closest_factors (line 15) | def closest_factors(n):