SYMBOL INDEX (89 symbols across 5 files)

FILE: benchmark.py
  function get_profiler_context (line 50) | def get_profiler_context():
  function debug_print_outputs (line 67) | def debug_print_outputs(sequences, tokenizer, slice=slice(None), referen...

FILE: benchmark_vllm.py
  class BenchmarkRequest (line 26) | class BenchmarkRequest:
  function generate_benchmark_data (line 39) | def generate_benchmark_data(tokenizer, n_requests=512, max_input_length=...
  function print_step_stats (line 75) | def print_step_stats(steps, name):
  function generate_with_timing (line 87) | def generate_with_timing(llm, sequences, sampling_params, collect_detail...

FILE: flex_nano_vllm/inference.py
  class SamplingParams (line 19) | class SamplingParams:
  function sample (line 23) | def sample(logits_BV, greedy=True, to_cpu=False):
  class Sequence (line 43) | class Sequence:
    method __init__ (line 44) | def __init__(self, text: str):
    method add_next_token (line 56) | def add_next_token(self, token_id: torch.Tensor, logits: torch.Tensor,...
    method copy (line 63) | def copy(self):
    method output_ids (line 67) | def output_ids(self):
    method output_logits (line 71) | def output_logits(self):
    method output_probs (line 75) | def output_probs(self):
    method output_length (line 79) | def output_length(self):
    method total_length (line 83) | def total_length(self):
    method total_token_ids (line 87) | def total_token_ids(self):
    method last_token_id (line 93) | def last_token_id(self):
  function process_sampling_params (line 97) | def process_sampling_params(sequences: list[Sequence], sampling_params: ...
  class Inference (line 109) | class Inference:
    method __init__ (line 110) | def __init__(self, model, tokenizer, max_batch_size, max_seq_length, n...
    method _prefill_sequences (line 136) | def _prefill_sequences(
    method prefill_sequences (line 184) | def prefill_sequences(self, sequences: list[Sequence]) -> torch.Tensor:
    method get_decoding_block_mask (line 206) | def get_decoding_block_mask(self, batch_idx: torch.Tensor):
    method _decode_step (line 252) | def _decode_step(self, batch_idx: torch.Tensor, input_ids: torch.Tensor):
    method decode_step (line 266) | def decode_step(self, batch_idx: torch.Tensor, input_ids: torch.Tensor...
    method _check_done (line 287) | def _check_done(self, sequences: list[Sequence]):
    method run_one_step (line 298) | def run_one_step(self):
    method tokenize (line 362) | def tokenize(self, sequences: list[Sequence]):
    method generate (line 369) | def generate(
    method capture_decode_cudagraph (line 430) | def capture_decode_cudagraph(self):
    method print_time_stats (line 464) | def print_time_stats(self, times):

FILE: flex_nano_vllm/modeling_gemma2.py
  class Gemma2RMSNorm (line 54) | class Gemma2RMSNorm(nn.Module):
    method __init__ (line 55) | def __init__(self, dim: int, eps: float = 1e-6):
    method _norm (line 60) | def _norm(self, x):
    method forward (line 63) | def forward(self, x):
    method extra_repr (line 70) | def extra_repr(self):
  class Gemma2MLP (line 73) | class Gemma2MLP(nn.Module):
    method __init__ (line 74) | def __init__(self, config):
    method forward (line 84) | def forward(self, x):
  function rotate_half (line 89) | def rotate_half(x):
  function apply_rotary_pos_emb (line 96) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di...
  function repeat_kv (line 123) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  function eager_attention_forward (line 135) | def eager_attention_forward(
  class Gemma2Attention (line 170) | class Gemma2Attention(nn.Module):
    method __init__ (line 173) | def __init__(self, config: Gemma2Config, layer_idx: int):
    method forward (line 199) | def forward(
  class Gemma2DecoderLayer (line 252) | class Gemma2DecoderLayer(GradientCheckpointingLayer):
    method __init__ (line 253) | def __init__(self, config: Gemma2Config, layer_idx: int):
    method forward (line 267) | def forward(
  class Gemma2RotaryEmbedding (line 312) | class Gemma2RotaryEmbedding(nn.Module):
    method __init__ (line 313) | def __init__(self, config: Gemma2Config, device=None):
    method forward (line 332) | def forward(self, x, position_ids):
  class Gemma2PreTrainedModel (line 347) | class Gemma2PreTrainedModel(PreTrainedModel):
    method _init_weights (line 364) | def _init_weights(self, module):
  class Gemma2Model (line 379) | class Gemma2Model(Gemma2PreTrainedModel):
    method __init__ (line 380) | def __init__(self, config: Gemma2Config):
    method get_input_embeddings (line 396) | def get_input_embeddings(self):
    method set_input_embeddings (line 399) | def set_input_embeddings(self, value):
    method forward (line 404) | def forward(
  class Gemma2ForCausalLM (line 498) | class Gemma2ForCausalLM(Gemma2PreTrainedModel, GenerationMixin):
    method __init__ (line 503) | def __init__(self, config):
    method get_input_embeddings (line 512) | def get_input_embeddings(self):
    method set_input_embeddings (line 515) | def set_input_embeddings(self, value):
    method get_output_embeddings (line 518) | def get_output_embeddings(self):
    method set_output_embeddings (line 521) | def set_output_embeddings(self, new_embeddings):
    method set_decoder (line 524) | def set_decoder(self, decoder):
    method get_decoder (line 527) | def get_decoder(self):
    method forward (line 532) | def forward(

FILE: flex_nano_vllm/paged_attention.py
  function _cdiv (line 23) | def _cdiv(x: int | float | torch.Tensor, multiple: int | float | torch.T...
  class PagedKVCache (line 27) | class PagedKVCache(torch.nn.Module):
    method __init__ (line 28) | def __init__(self, page_table, n_heads, head_dim, dtype):
    method update (line 36) | def update(self, input_pos, k_val, v_val, batch_idx=None):
  class PageTable (line 48) | class PageTable:
    method __init__ (line 59) | def __init__(
    method can_reserve (line 83) | def can_reserve(self, size: int, batch_idx_int: int | None = None) -> ...
    method allocate (line 92) | def allocate(self) -> int:
    method pages_available (line 102) | def pages_available(self) -> int:
    method reserve (line 105) | def reserve(self, batch_idx_int: int, batch_idx: torch.Tensor, seq_len...
    method erase (line 156) | def erase(self, batch_idx: int) -> None:
    method assign (line 169) | def assign(
    method convert_logical_block_mask (line 228) | def convert_logical_block_mask(
    method get_logical_kv_idx (line 296) | def get_logical_kv_idx(self, physical_batch_idx: torch.Tensor, physica...
    method get_mask_mod (line 306) | def get_mask_mod(self, mask_mod: Optional[_mask_mod_signature], batch_...
    method get_score_mod (line 329) | def get_score_mod(self, score_mod: Optional[_score_mod_signature], bat...
    method create_causal_blockmask (line 356) | def create_causal_blockmask(self, B, L):
    method create_prefill_blockmask_no_paging (line 364) | def create_prefill_blockmask_no_paging(self, batch_idx: Tensor, BLOCK_...
    method assign_prefill_no_paging (line 381) | def assign_prefill_no_paging(