SYMBOL INDEX (182 symbols across 28 files)

FILE: ae/figure10/plot_latency.py
  function get_total_decoding_latency (line 19) | def get_total_decoding_latency(df: pd.DataFrame, start, end):
  function get_intensity (line 69) | def get_intensity(color):

FILE: ae/figure10/test_latency.py
  function simulate_decoding_latency (line 35) | def simulate_decoding_latency(system, bs, seq_len, name, lock):
  function simulate_prefill_latency (line 54) | def simulate_prefill_latency(system, bs, seq_len, name, lock):

FILE: ae/figure11/test_decoding.py
  function simulate_latency (line 27) | def simulate_latency(system, bs, seq_len, name, lock):

FILE: ae/figure12/plot_throughput.py
  function get_total_decoding_latency (line 33) | def get_total_decoding_latency(df: pd.DataFrame, start, end):
  function get_intensity (line 125) | def get_intensity(color):

FILE: ae/figure12/test_throughput.py
  function simulate_decoding_latency (line 34) | def simulate_decoding_latency(system, bs, seq_len, name, lock, heuristics):
  function simulate_prefill_latency (line 55) | def simulate_prefill_latency(system, bs, seq_len, name, lock, heuristics):

FILE: ae/figure5/ijkl/plot_transformer.py
  function read_csv (line 6) | def read_csv(filename: str):

FILE: ae/figure7/change_core_size.py
  function test_core_size (line 38) | def test_core_size(core_configs, lock):

FILE: ae/figure8/change_memory_bw.py
  function test_memory_bandwidth (line 54) | def test_memory_bandwidth(memory_bandwidth, lock):

FILE: ae/figure9/change_l1_cache.py
  function test_SRAM_KB (line 38) | def test_SRAM_KB(SRAM_KB, lock):

FILE: cost_model/cost_model.py
  function calc_systolic_array_area_mm2 (line 91) | def calc_systolic_array_area_mm2(dimension_x, dimension_y, bitwidth, tra...
  function calc_vector_area_mm2 (line 102) | def calc_vector_area_mm2(int32_count, fp16_count, fp32_count, fp64_count...
  function calc_cache_sram_area_mm2 (line 114) | def calc_cache_sram_area_mm2(capacity_bytes, sram_bitcell_area_mm2, max_...
  function calc_reg_file_area (line 137) | def calc_reg_file_area(num_reg_files, D, W, P, transistor_density_mil_mm2):
  function calc_mem_controller_area_mm2 (line 150) | def calc_mem_controller_area_mm2(mem_tech, width, transistor_density_mil...
  function calc_mem_phy_area_mm2 (line 170) | def calc_mem_phy_area_mm2(mem_tech, width):
  function find_logic_sram_transistor_density (line 188) | def find_logic_sram_transistor_density(process_node):
  function calc_compute_chiplet_area_mm2 (line 205) | def calc_compute_chiplet_area_mm2(configs_dict, verbose=False):
  function calc_io_die_area_mm2 (line 272) | def calc_io_die_area_mm2(config_dict, verbose=False):

FILE: cost_model/regfile_area.py
  function calculate_regfile_area (line 1) | def calculate_regfile_area(D, W, P):

FILE: design_space_exploration/dse.py
  function read_architecture_template (line 22) | def read_architecture_template(file_path):
  function template_to_system (line 28) | def template_to_system(arch_specs):
  function test_template_to_system (line 110) | def test_template_to_system():
  function find_cheapest_design (line 125) | def find_cheapest_design(

FILE: hardware_model/arch_template.py
  class ArchitectureTemplate (line 1) | class ArchitectureTemplate:
    method __init__ (line 2) | def __init__(self,

FILE: hardware_model/compute_module.py
  class VectorUnit (line 5) | class VectorUnit:
    method __init__ (line 6) | def __init__(
  class SystolicArray (line 34) | class SystolicArray:
    method __init__ (line 35) | def __init__(
  class Core (line 59) | class Core:
    method __init__ (line 60) | def __init__(
  class Overhead (line 103) | class Overhead:
    method __init__ (line 104) | def __init__(self, matmul, softmax, layernorm, gelu):
  class ComputeModule (line 118) | class ComputeModule:
    method __init__ (line 119) | def __init__(

FILE: hardware_model/device.py
  class Device (line 6) | class Device:
    method __init__ (line 7) | def __init__(

FILE: hardware_model/interconnect.py
  class TopologyType (line 5) | class TopologyType(Enum):
  class LinkModule (line 10) | class LinkModule:
    method __init__ (line 11) | def __init__(
  class InterConnectModule (line 35) | class InterConnectModule:
    method __init__ (line 36) | def __init__(

FILE: hardware_model/io_module.py
  class IOModule (line 1) | class IOModule:
    method __init__ (line 2) | def __init__(self, bandwidth, latency):

FILE: hardware_model/memory_module.py
  class MemoryModule (line 1) | class MemoryModule:
    method __init__ (line 2) | def __init__(self, memory_capacity):

FILE: hardware_model/system.py
  class System (line 6) | class System:
    method __init__ (line 7) | def __init__(self, pcb_module: Device, interconnect: InterConnectModul...

FILE: software_model/communication_primitives.py
  class CommunicationPrimitive (line 14) | class CommunicationPrimitive:
    method __init__ (line 15) | def __init__(self, data_type: DataType) -> None:
  class AllReduceMultiPCB (line 21) | class AllReduceMultiPCB(CommunicationPrimitive):
    method __init__ (line 22) | def __init__(self, data_type: DataType) -> None:
    method __call__ (line 25) | def __call__(self, tensor: Tensor) -> Any:
    method simulate (line 30) | def simulate(self, interconnect_module: InterConnectModule) -> None:
  class Broadcast (line 111) | class Broadcast:
    method __init__ (line 112) | def __init__(self):
    method __call__ (line 116) | def __call__(self, src: int, tensor: Tensor):

FILE: software_model/gelu.py
  function gelu_gpu (line 14) | def gelu_gpu(input: torch.Tensor) -> torch.Tensor:
  class GeLU (line 19) | class GeLU(Operator):
    method __init__ (line 20) | def __init__(self, data_type: DataType):
    method __call__ (line 24) | def __call__(self, input: Tensor) -> Tensor:
    method roofline_model (line 31) | def roofline_model(self, pcb_module: Device):
    method print_latency (line 55) | def print_latency(self):
    class ComputationalGraph (line 58) | class ComputationalGraph:
      method __init__ (line 59) | def __init__(self, M: int, data_type: DataType):
    method compile_and_simulate (line 63) | def compile_and_simulate(self, pcb_module: Device, compile_mode: str):
    method run_on_gpu (line 93) | def run_on_gpu(self):
    method gpu_kernel_launch_overhead (line 114) | def gpu_kernel_launch_overhead():

FILE: software_model/layernorm.py
  function layernorm_gpu (line 14) | def layernorm_gpu(input: torch.Tensor) -> torch.Tensor:
  class LayerNorm (line 18) | class LayerNorm(Operator):
    method __init__ (line 19) | def __init__(self, data_type: DataType):
    method __call__ (line 23) | def __call__(self, input: Tensor) -> Tensor:
    method roofline_model (line 33) | def roofline_model(self, pcb_module: Device):
    method print_latency (line 47) | def print_latency(self):
    class ComputationalGraph (line 50) | class ComputationalGraph:
      method __init__ (line 51) | def __init__(self, M: int, N: int, data_type: DataType):
    class Mapping (line 56) | class Mapping:
      method __init__ (line 57) | def __init__(
      method display (line 69) | def display(self):
    method compile_and_simulate (line 75) | def compile_and_simulate(self, pcb_module: Device, compile_mode: str):
    method simulate (line 128) | def simulate(
    class L2TileSimulator (line 169) | class L2TileSimulator:
      method __init__ (line 170) | def __init__(
      method simulate_l2_tile_io_cycle_count (line 190) | def simulate_l2_tile_io_cycle_count(
      method simulate_l2_tile_compute_cycle_count (line 203) | def simulate_l2_tile_compute_cycle_count(
    class L1TileSimulator (line 235) | class L1TileSimulator:
      method __init__ (line 236) | def __init__(
      method simulate_l1_tile_io_cycle_count (line 269) | def simulate_l1_tile_io_cycle_count(
      method simulate_l1_tile_compute_cycle_count (line 279) | def simulate_l1_tile_compute_cycle_count(
    method run_on_gpu (line 332) | def run_on_gpu(self):
    method gpu_kernel_launch_overhead (line 357) | def gpu_kernel_launch_overhead():

FILE: software_model/matmul.py
  class BatchedMatmul (line 17) | class BatchedMatmul(Operator):
    method __init__ (line 18) | def __init__(self, data_type: DataType):
    method __call__ (line 24) | def __call__(self, input1: Tensor, input2: Tensor) -> Tensor:
    method roofline_model (line 40) | def roofline_model(self, pcb_module: Device):
    method compile_and_simulate (line 57) | def compile_and_simulate(self, pcb_module: Device, compile_mode: str):
    method run_on_gpu (line 79) | def run_on_gpu(
    method gpu_kernel_launch_overhead (line 105) | def gpu_kernel_launch_overhead():
  class Matmul (line 122) | class Matmul(Operator):
    method __init__ (line 123) | def __init__(self, data_type: DataType):
    method __call__ (line 131) | def __call__(self, input1: Tensor, input2: Tensor) -> Tensor:
    method roofline_model (line 154) | def roofline_model(self, pcb_module: Device):
    method print_latency (line 166) | def print_latency(self):
    method generate_tile_loops (line 173) | def generate_tile_loops(loop_M: int, loop_N: int, loop_K: int, loop_or...
    class ComputationalGraph (line 206) | class ComputationalGraph:
      method __init__ (line 207) | def __init__(self, M: int, N: int, K: int, data_type: DataType):
      method display (line 213) | def display(self):
    class Mapping (line 219) | class Mapping:
      method __init__ (line 220) | def __init__(
      method display (line 250) | def display(self):
    method find_permutations (line 263) | def find_permutations(n):
    method compile_and_simulate (line 275) | def compile_and_simulate(
    method simulate (line 742) | def simulate(
    class L2TileSimulator (line 972) | class L2TileSimulator:
      method __init__ (line 973) | def __init__(
      method simulate_l2_tile_io_cycle_count (line 1009) | def simulate_l2_tile_io_cycle_count(
      method simulate_l2_tile_compute_cycle_count (line 1022) | def simulate_l2_tile_compute_cycle_count(
    class L1TileSimulator (line 1293) | class L1TileSimulator:
      method __init__ (line 1294) | def __init__(
      method simulate_l1_tile_compute_cycle_count (line 1312) | def simulate_l1_tile_compute_cycle_count(
    method simulate_systolic_array_cycle_count (line 1357) | def simulate_systolic_array_cycle_count(
    method run_on_gpu (line 1479) | def run_on_gpu(
    method gpu_kernel_launch_overhead (line 1528) | def gpu_kernel_launch_overhead():

FILE: software_model/operators.py
  class Operator (line 7) | class Operator:
    method __init__ (line 8) | def __init__(
    class mapping (line 35) | class mapping:
  class Reshape (line 42) | class Reshape(Operator):
    method __init__ (line 43) | def __init__(self, data_type: DataType):
    method __call__ (line 48) | def __call__(self, input: Tensor, output_shape: List[int]) -> Tensor:
  class Concat (line 61) | class Concat(Operator):
    method __init__ (line 62) | def __init__(self, data_type: DataType):
    method __call__ (line 69) | def __call__(self, input1: Tensor, input2: Tensor, concat_dim: int) ->...
  class Transpose (line 91) | class Transpose(Operator):
    method __init__ (line 92) | def __init__(self, data_type: DataType):
    method __call__ (line 97) | def __call__(self, input: Tensor, permute: List[int]) -> Tensor:

FILE: software_model/softmax.py
  class Softmax (line 13) | class Softmax(Operator):
    method __init__ (line 14) | def __init__(self, data_type: DataType):
    method __call__ (line 18) | def __call__(self, input: Tensor) -> Tensor:
    method print_latency (line 28) | def print_latency(self):
    class ComputationalGraph (line 31) | class ComputationalGraph:
      method __init__ (line 32) | def __init__(self, M: int, N: int, data_type: DataType):
    class Mapping (line 37) | class Mapping:
      method __init__ (line 38) | def __init__(
      method display (line 54) | def display(self):
    method roofline_model (line 60) | def roofline_model(self, pcb_module: Device):
    method compile_and_simulate (line 66) | def compile_and_simulate(self, pcb_module: Device, compile_mode=None):
    method simulate (line 116) | def simulate(
    class L2TileSimulator (line 167) | class L2TileSimulator:
      method __init__ (line 168) | def __init__(
      method simulate_l2_tile_io_cycle_count (line 188) | def simulate_l2_tile_io_cycle_count(
      method simulate_l2_tile_compute_cycle_count (line 201) | def simulate_l2_tile_compute_cycle_count(
    class L1TileSimulator (line 234) | class L1TileSimulator:
      method __init__ (line 235) | def __init__(
      method simulate_l1_tile_io_cycle_count (line 269) | def simulate_l1_tile_io_cycle_count(
      method simulate_l1_tile_compute_cycle_count (line 279) | def simulate_l1_tile_compute_cycle_count(
    method run_on_gpu (line 294) | def run_on_gpu(self):
    method gpu_kernel_launch_overhead (line 313) | def gpu_kernel_launch_overhead():

FILE: software_model/transformer.py
  class TransformerBlockInitComputationTP (line 20) | class TransformerBlockInitComputationTP(Operator):
    method __init__ (line 21) | def __init__(self, d_model, n_heads, device_count, data_type: DataType):
    method __call__ (line 60) | def __call__(self, X: Tensor) -> Tensor:
    method roofline_model (line 114) | def roofline_model(self, system: System):
    method compile_and_simulate (line 194) | def compile_and_simulate(self, system: System, compile_mode: str):
    method run_on_gpu (line 286) | def run_on_gpu(self):
  class TransformerBlockAutoRegressionTP (line 355) | class TransformerBlockAutoRegressionTP(Operator):
    method __init__ (line 356) | def __init__(self, d_model, n_heads, device_count, data_type: DataType):
    method __call__ (line 397) | def __call__(self, x: Tensor, seq_len: int) -> Tensor:
    method roofline_model (line 470) | def roofline_model(self, system: System):
    method compile_and_simulate (line 551) | def compile_and_simulate(self, system: System, compile_mode: str):
    method run_on_gpu (line 642) | def run_on_gpu(self):
  class LLMInitComputationTP (line 712) | class LLMInitComputationTP:
    method __init__ (line 713) | def __init__(

FILE: software_model/utils.py
  class DataType (line 5) | class DataType:
    method __init__ (line 6) | def __init__(self, name: str, word_size: int) -> None:
  class Tensor (line 12) | class Tensor:
    method __init__ (line 13) | def __init__(

FILE: utils.py
  function size_of_list (line 3) | def size_of_list(list: List):
  function size (line 9) | def size(list):
  function closest_factors (line 15) | def closest_factors(n):