SYMBOL INDEX (139 symbols across 20 files) FILE: bench.py function main (line 8) | def main(): FILE: example.py function main (line 6) | def main(): FILE: nanovllm/config.py class Config (line 7) | class Config: method __post_init__ (line 20) | def __post_init__(self): FILE: nanovllm/engine/block_manager.py class Block (line 8) | class Block: method __init__ (line 10) | def __init__(self, block_id): method update (line 16) | def update(self, hash: int, token_ids: list[int]): method reset (line 20) | def reset(self): class BlockManager (line 26) | class BlockManager: method __init__ (line 28) | def __init__(self, num_blocks: int, block_size: int): method compute_hash (line 36) | def compute_hash(cls, token_ids: list[int], prefix: int = -1): method _allocate_block (line 43) | def _allocate_block(self, block_id: int) -> Block: method _deallocate_block (line 51) | def _deallocate_block(self, block_id: int) -> Block: method can_allocate (line 56) | def can_allocate(self, seq: Sequence) -> bool: method allocate (line 59) | def allocate(self, seq: Sequence): method deallocate (line 84) | def deallocate(self, seq: Sequence): method can_append (line 93) | def can_append(self, seq: Sequence) -> bool: method may_append (line 96) | def may_append(self, seq: Sequence): FILE: nanovllm/engine/llm_engine.py class LLMEngine (line 15) | class LLMEngine: method __init__ (line 17) | def __init__(self, model, **kwargs): method exit (line 36) | def exit(self): method add_request (line 42) | def add_request(self, prompt: str | list[int], sampling_params: Sampli... method step (line 48) | def step(self): method is_finished (line 56) | def is_finished(self): method generate (line 59) | def generate( FILE: nanovllm/engine/model_runner.py class ModelRunner (line 15) | class ModelRunner: method __init__ (line 17) | def __init__(self, config: Config, rank: int, event: Event | list[Even... method exit (line 50) | def exit(self): method loop (line 61) | def loop(self): method read_shm (line 68) | def read_shm(self): method write_shm (line 76) | def write_shm(self, method_name, *args): method call (line 85) | def call(self, method_name, *args): method warmup_model (line 91) | def warmup_model(self): method allocate_kv_cache (line 100) | def allocate_kv_cache(self): method prepare_block_tables (line 120) | def prepare_block_tables(self, seqs: list[Sequence]): method prepare_prefill (line 126) | def prepare_prefill(self, seqs: list[Sequence]): method prepare_decode (line 164) | def prepare_decode(self, seqs: list[Sequence]): method prepare_sample (line 182) | def prepare_sample(self, seqs: list[Sequence]): method run_model (line 190) | def run_model(self, input_ids: torch.Tensor, positions: torch.Tensor, ... method run (line 208) | def run(self, seqs: list[Sequence], is_prefill: bool) -> list[int]: method capture_cudagraph (line 217) | def capture_cudagraph(self): FILE: nanovllm/engine/scheduler.py class Scheduler (line 8) | class Scheduler: method __init__ (line 10) | def __init__(self, config: Config): method is_finished (line 18) | def is_finished(self): method add (line 21) | def add(self, seq: Sequence): method schedule (line 24) | def schedule(self) -> tuple[list[Sequence], bool]: method preempt (line 60) | def preempt(self, seq: Sequence): method postprocess (line 65) | def postprocess(self, seqs: list[Sequence], token_ids: list[int]) -> l... FILE: nanovllm/engine/sequence.py class SequenceStatus (line 8) | class SequenceStatus(Enum): class Sequence (line 14) | class Sequence: method __init__ (line 18) | def __init__(self, token_ids: list[int], sampling_params = SamplingPar... method __len__ (line 31) | def __len__(self): method __getitem__ (line 34) | def __getitem__(self, key): method is_finished (line 38) | def is_finished(self): method num_completion_tokens (line 42) | def num_completion_tokens(self): method prompt_token_ids (line 46) | def prompt_token_ids(self): method completion_token_ids (line 50) | def completion_token_ids(self): method num_cached_blocks (line 54) | def num_cached_blocks(self): method num_blocks (line 58) | def num_blocks(self): method last_block_num_tokens (line 62) | def last_block_num_tokens(self): method block (line 65) | def block(self, i): method append_token (line 69) | def append_token(self, token_id: int): method __getstate__ (line 74) | def __getstate__(self): method __setstate__ (line 78) | def __setstate__(self, state): FILE: nanovllm/layers/activation.py class SiluAndMul (line 6) | class SiluAndMul(nn.Module): method __init__ (line 8) | def __init__(self): method forward (line 12) | def forward(self, x: torch.Tensor) -> torch.Tensor: FILE: nanovllm/layers/attention.py function store_kvcache_kernel (line 11) | def store_kvcache_kernel( function store_kvcache (line 33) | def store_kvcache(key: torch.Tensor, value: torch.Tensor, k_cache: torch... class Attention (line 43) | class Attention(nn.Module): method __init__ (line 45) | def __init__( method forward (line 59) | def forward(self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor): FILE: nanovllm/layers/embed_head.py class VocabParallelEmbedding (line 9) | class VocabParallelEmbedding(nn.Module): method __init__ (line 11) | def __init__( method weight_loader (line 27) | def weight_loader(self, param: nn.Parameter, loaded_weight: torch.Tens... method forward (line 34) | def forward(self, x: torch.Tensor): class ParallelLMHead (line 45) | class ParallelLMHead(VocabParallelEmbedding): method __init__ (line 47) | def __init__( method forward (line 56) | def forward(self, x: torch.Tensor): FILE: nanovllm/layers/layernorm.py class RMSNorm (line 5) | class RMSNorm(nn.Module): method __init__ (line 7) | def __init__( method rms_forward (line 17) | def rms_forward( method add_rms_forward (line 29) | def add_rms_forward( method forward (line 42) | def forward( FILE: nanovllm/layers/linear.py function divide (line 7) | def divide(numerator, denominator): class LinearBase (line 12) | class LinearBase(nn.Module): method __init__ (line 14) | def __init__( method forward (line 33) | def forward(self, x: torch.Tensor) -> torch.Tensor: class ReplicatedLinear (line 37) | class ReplicatedLinear(LinearBase): method __init__ (line 39) | def __init__( method weight_loader (line 47) | def weight_loader(self, param: nn.Parameter, loaded_weight: torch.Tens... method forward (line 50) | def forward(self, x: torch.Tensor) -> torch.Tensor: class ColumnParallelLinear (line 54) | class ColumnParallelLinear(LinearBase): method __init__ (line 56) | def __init__( method weight_loader (line 65) | def weight_loader(self, param: nn.Parameter, loaded_weight: torch.Tens... method forward (line 72) | def forward(self, x: torch.Tensor) -> torch.Tensor: class MergedColumnParallelLinear (line 76) | class MergedColumnParallelLinear(ColumnParallelLinear): method __init__ (line 78) | def __init__( method weight_loader (line 87) | def weight_loader(self, param: nn.Parameter, loaded_weight: torch.Tens... class QKVParallelLinear (line 96) | class QKVParallelLinear(ColumnParallelLinear): method __init__ (line 98) | def __init__( method weight_loader (line 114) | def weight_loader(self, param: nn.Parameter, loaded_weight: torch.Tens... class RowParallelLinear (line 131) | class RowParallelLinear(LinearBase): method __init__ (line 133) | def __init__( method weight_loader (line 142) | def weight_loader(self, param: nn.Parameter, loaded_weight: torch.Tens... method forward (line 149) | def forward(self, x: torch.Tensor) -> torch.Tensor: FILE: nanovllm/layers/rotary_embedding.py function apply_rotary_emb (line 6) | def apply_rotary_emb( class RotaryEmbedding (line 17) | class RotaryEmbedding(nn.Module): method __init__ (line 19) | def __init__( method forward (line 38) | def forward( function get_rope (line 52) | def get_rope( FILE: nanovllm/layers/sampler.py class Sampler (line 5) | class Sampler(nn.Module): method __init__ (line 7) | def __init__(self): method forward (line 11) | def forward(self, logits: torch.Tensor, temperatures: torch.Tensor): FILE: nanovllm/llm.py class LLM (line 4) | class LLM(LLMEngine): FILE: nanovllm/models/qwen3.py class Qwen3Attention (line 14) | class Qwen3Attention(nn.Module): method __init__ (line 16) | def __init__( method forward (line 71) | def forward( class Qwen3MLP (line 90) | class Qwen3MLP(nn.Module): method __init__ (line 92) | def __init__( method forward (line 112) | def forward(self, x): class Qwen3DecoderLayer (line 119) | class Qwen3DecoderLayer(nn.Module): method __init__ (line 121) | def __init__( method forward (line 145) | def forward( class Qwen3Model (line 161) | class Qwen3Model(nn.Module): method __init__ (line 163) | def __init__( method forward (line 172) | def forward( class Qwen3ForCausalLM (line 185) | class Qwen3ForCausalLM(nn.Module): method __init__ (line 194) | def __init__( method forward (line 204) | def forward( method compute_logits (line 211) | def compute_logits( FILE: nanovllm/sampling_params.py class SamplingParams (line 5) | class SamplingParams: method __post_init__ (line 10) | def __post_init__(self): FILE: nanovllm/utils/context.py class Context (line 6) | class Context: function get_context (line 18) | def get_context(): function set_context (line 21) | def set_context(is_prefill, cu_seqlens_q=None, cu_seqlens_k=None, max_se... function reset_context (line 25) | def reset_context(): FILE: nanovllm/utils/loader.py function default_weight_loader (line 8) | def default_weight_loader(param: nn.Parameter, loaded_weight: torch.Tens... function load_model (line 12) | def load_model(model: nn.Module, path: str):