SYMBOL INDEX (89 symbols across 5 files) FILE: benchmark.py function get_profiler_context (line 50) | def get_profiler_context(): function debug_print_outputs (line 67) | def debug_print_outputs(sequences, tokenizer, slice=slice(None), referen... FILE: benchmark_vllm.py class BenchmarkRequest (line 26) | class BenchmarkRequest: function generate_benchmark_data (line 39) | def generate_benchmark_data(tokenizer, n_requests=512, max_input_length=... function print_step_stats (line 75) | def print_step_stats(steps, name): function generate_with_timing (line 87) | def generate_with_timing(llm, sequences, sampling_params, collect_detail... FILE: flex_nano_vllm/inference.py class SamplingParams (line 19) | class SamplingParams: function sample (line 23) | def sample(logits_BV, greedy=True, to_cpu=False): class Sequence (line 43) | class Sequence: method __init__ (line 44) | def __init__(self, text: str): method add_next_token (line 56) | def add_next_token(self, token_id: torch.Tensor, logits: torch.Tensor,... method copy (line 63) | def copy(self): method output_ids (line 67) | def output_ids(self): method output_logits (line 71) | def output_logits(self): method output_probs (line 75) | def output_probs(self): method output_length (line 79) | def output_length(self): method total_length (line 83) | def total_length(self): method total_token_ids (line 87) | def total_token_ids(self): method last_token_id (line 93) | def last_token_id(self): function process_sampling_params (line 97) | def process_sampling_params(sequences: list[Sequence], sampling_params: ... class Inference (line 109) | class Inference: method __init__ (line 110) | def __init__(self, model, tokenizer, max_batch_size, max_seq_length, n... method _prefill_sequences (line 136) | def _prefill_sequences( method prefill_sequences (line 184) | def prefill_sequences(self, sequences: list[Sequence]) -> torch.Tensor: method get_decoding_block_mask (line 206) | def get_decoding_block_mask(self, batch_idx: torch.Tensor): method _decode_step (line 252) | def _decode_step(self, batch_idx: torch.Tensor, input_ids: torch.Tensor): method decode_step (line 266) | def decode_step(self, batch_idx: torch.Tensor, input_ids: torch.Tensor... method _check_done (line 287) | def _check_done(self, sequences: list[Sequence]): method run_one_step (line 298) | def run_one_step(self): method tokenize (line 362) | def tokenize(self, sequences: list[Sequence]): method generate (line 369) | def generate( method capture_decode_cudagraph (line 430) | def capture_decode_cudagraph(self): method print_time_stats (line 464) | def print_time_stats(self, times): FILE: flex_nano_vllm/modeling_gemma2.py class Gemma2RMSNorm (line 54) | class Gemma2RMSNorm(nn.Module): method __init__ (line 55) | def __init__(self, dim: int, eps: float = 1e-6): method _norm (line 60) | def _norm(self, x): method forward (line 63) | def forward(self, x): method extra_repr (line 70) | def extra_repr(self): class Gemma2MLP (line 73) | class Gemma2MLP(nn.Module): method __init__ (line 74) | def __init__(self, config): method forward (line 84) | def forward(self, x): function rotate_half (line 89) | def rotate_half(x): function apply_rotary_pos_emb (line 96) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_di... function repeat_kv (line 123) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: function eager_attention_forward (line 135) | def eager_attention_forward( class Gemma2Attention (line 170) | class Gemma2Attention(nn.Module): method __init__ (line 173) | def __init__(self, config: Gemma2Config, layer_idx: int): method forward (line 199) | def forward( class Gemma2DecoderLayer (line 252) | class Gemma2DecoderLayer(GradientCheckpointingLayer): method __init__ (line 253) | def __init__(self, config: Gemma2Config, layer_idx: int): method forward (line 267) | def forward( class Gemma2RotaryEmbedding (line 312) | class Gemma2RotaryEmbedding(nn.Module): method __init__ (line 313) | def __init__(self, config: Gemma2Config, device=None): method forward (line 332) | def forward(self, x, position_ids): class Gemma2PreTrainedModel (line 347) | class Gemma2PreTrainedModel(PreTrainedModel): method _init_weights (line 364) | def _init_weights(self, module): class Gemma2Model (line 379) | class Gemma2Model(Gemma2PreTrainedModel): method __init__ (line 380) | def __init__(self, config: Gemma2Config): method get_input_embeddings (line 396) | def get_input_embeddings(self): method set_input_embeddings (line 399) | def set_input_embeddings(self, value): method forward (line 404) | def forward( class Gemma2ForCausalLM (line 498) | class Gemma2ForCausalLM(Gemma2PreTrainedModel, GenerationMixin): method __init__ (line 503) | def __init__(self, config): method get_input_embeddings (line 512) | def get_input_embeddings(self): method set_input_embeddings (line 515) | def set_input_embeddings(self, value): method get_output_embeddings (line 518) | def get_output_embeddings(self): method set_output_embeddings (line 521) | def set_output_embeddings(self, new_embeddings): method set_decoder (line 524) | def set_decoder(self, decoder): method get_decoder (line 527) | def get_decoder(self): method forward (line 532) | def forward( FILE: flex_nano_vllm/paged_attention.py function _cdiv (line 23) | def _cdiv(x: int | float | torch.Tensor, multiple: int | float | torch.T... class PagedKVCache (line 27) | class PagedKVCache(torch.nn.Module): method __init__ (line 28) | def __init__(self, page_table, n_heads, head_dim, dtype): method update (line 36) | def update(self, input_pos, k_val, v_val, batch_idx=None): class PageTable (line 48) | class PageTable: method __init__ (line 59) | def __init__( method can_reserve (line 83) | def can_reserve(self, size: int, batch_idx_int: int | None = None) -> ... method allocate (line 92) | def allocate(self) -> int: method pages_available (line 102) | def pages_available(self) -> int: method reserve (line 105) | def reserve(self, batch_idx_int: int, batch_idx: torch.Tensor, seq_len... method erase (line 156) | def erase(self, batch_idx: int) -> None: method assign (line 169) | def assign( method convert_logical_block_mask (line 228) | def convert_logical_block_mask( method get_logical_kv_idx (line 296) | def get_logical_kv_idx(self, physical_batch_idx: torch.Tensor, physica... method get_mask_mod (line 306) | def get_mask_mod(self, mask_mod: Optional[_mask_mod_signature], batch_... method get_score_mod (line 329) | def get_score_mod(self, score_mod: Optional[_score_mod_signature], bat... method create_causal_blockmask (line 356) | def create_causal_blockmask(self, B, L): method create_prefill_blockmask_no_paging (line 364) | def create_prefill_blockmask_no_paging(self, batch_idx: Tensor, BLOCK_... method assign_prefill_no_paging (line 381) | def assign_prefill_no_paging(