SYMBOL INDEX (511 symbols across 73 files)

FILE: benchmarks/benchmark_forward.py
  function main (line 17) | def main():
  function benchmark_forward (line 46) | def benchmark_forward(process_idx, args, result_pipe):

FILE: benchmarks/benchmark_inference.py
  function main (line 18) | def main():
  function benchmark_inference (line 45) | def benchmark_inference(process_idx, args, result_pipe):

FILE: benchmarks/benchmark_training.py
  function main (line 17) | def main():
  function benchmark_training (line 50) | def benchmark_training(process_idx, args, result_pipe):

FILE: src/petals/__init__.py
  function _override_bfloat16_mode_default (line 29) | def _override_bfloat16_mode_default():

FILE: src/petals/cli/run_dht.py
  function report_status (line 24) | async def report_status(dht: DHT, node: DHTNode):
  function main (line 37) | def main():

FILE: src/petals/cli/run_server.py
  function main (line 19) | def main():

FILE: src/petals/client/config.py
  class ClientConfig (line 14) | class ClientConfig:

FILE: src/petals/client/from_pretrained.py
  class FromPretrainedMixin (line 17) | class FromPretrainedMixin:
    method from_pretrained (line 19) | def from_pretrained(
  function ignore_keys (line 46) | def ignore_keys(patterns: List[str]):
  function patched_get_checkpoint_shard_files (line 54) | def patched_get_checkpoint_shard_files(

FILE: src/petals/client/inference_session.py
  class _ServerInferenceSession (line 26) | class _ServerInferenceSession:
    method __init__ (line 33) | def __init__(
    method create (line 60) | async def create(
    method _read_inputs_from_queue (line 79) | async def _read_inputs_from_queue(queue: asyncio.Queue, input_timeout:...
    method position (line 87) | def position(self):
    method position (line 91) | def position(self, start_from_position: int):
    method step (line 97) | def step(
    method _collect_next_servers (line 174) | def _collect_next_servers(self) -> List[Tuple[str, str, int, int]]:
    method _step (line 184) | async def _step(self, inputs_serialized: runtime_pb2.ExpertRequest) ->...
    method close (line 190) | def close(self):
    method _aclose_stream (line 198) | async def _aclose_stream(self):
    method __del__ (line 209) | def __del__(self):
    method __enter__ (line 212) | def __enter__(self):
    method __exit__ (line 216) | def __exit__(self, *exc_details):
  class InferenceSession (line 220) | class InferenceSession:
    method __init__ (line 225) | def __init__(self, sequence_manager: RemoteSequenceManager, max_length...
    method num_blocks (line 235) | def num_blocks(self) -> int:
    method position (line 239) | def position(self) -> int:
    method position (line 243) | def position(self, start_from_position: int) -> None:
    method _enter_server_sessions (line 249) | def _enter_server_sessions(self, chosen_spans: List[RemoteSpanInfo]) -...
    method _exit_server_sessions (line 273) | def _exit_server_sessions(self, server_sessions: List[_ServerInference...
    method __enter__ (line 280) | def __enter__(self) -> "InferenceSession":
    method step (line 284) | def step(
    method _update_sequence (line 364) | def _update_sequence(self, server_idx: int, block_idx: int, attempt_no...
    method close (line 393) | def close(self, *exc_details):
    method __exit__ (line 400) | def __exit__(self, *exc_details):
    method __del__ (line 403) | def __del__(self):
    method last_token_id (line 407) | def last_token_id(self) -> Optional[torch.Tensor]:  # Backward compati...
    method last_token_id (line 411) | def last_token_id(self, value: torch.Tensor):  # Backward compatibilit...

FILE: src/petals/client/lm_head.py
  class LMHeadConfig (line 16) | class LMHeadConfig:
  class LMHead (line 23) | class LMHead(nn.Module):
    method __init__ (line 24) | def __init__(self, config: PretrainedConfig):
    method forward (line 50) | def forward(self, hidden_states):
    method chunked_forward (line 63) | def chunked_forward(self, hidden_states):

FILE: src/petals/client/ptune.py
  class PTuneConfig (line 16) | class PTuneConfig:
  class PTuneMixin (line 21) | class PTuneMixin:
    method init_prompts (line 24) | def init_prompts(self, config: PretrainedConfig) -> None:
    method get_prompt (line 43) | def get_prompt(self, batch_size):
  function force_non_empty_weights (line 69) | def force_non_empty_weights():

FILE: src/petals/client/remote_forward_backward.py
  function _forward_unary (line 21) | async def _forward_unary(
  function _backward_unary (line 31) | async def _backward_unary(
  function _forward_stream (line 41) | async def _forward_stream(
  function _backward_stream (line 54) | async def _backward_stream(
  function run_remote_forward (line 67) | async def run_remote_forward(
  function run_remote_backward (line 113) | async def run_remote_backward(

FILE: src/petals/client/remote_generation.py
  class RemotePastKeyValues (line 20) | class RemotePastKeyValues(Cache):
    method __init__ (line 23) | def __init__(self) -> None:
    method __getitem__ (line 28) | def __getitem__(self, _index: int) -> List[torch.Tensor]:
    method get_seq_length (line 31) | def get_seq_length(self, layer_idx: Optional[int] = 0) -> int:
    method get_max_length (line 34) | def get_max_length(self) -> Optional[int]:
    method update_seen (line 37) | def update_seen(self, new_seen: int) -> None:
    method reorder_cache (line 40) | def reorder_cache(self, beam_idx):
  class _SkipTokensMixin (line 47) | class _SkipTokensMixin:
    method prepare_inputs_for_generation (line 50) | def prepare_inputs_for_generation(self, input_ids: torch.LongTensor, *...
  class RemoteGenerationMixin (line 56) | class RemoteGenerationMixin(_SkipTokensMixin):
    method active_session (line 72) | def active_session(self) -> Optional[InferenceSession]:
    method use_session (line 76) | def use_session(self, session: Optional[InferenceSession]) -> ContextM...
    method inference_session (line 80) | def inference_session(self, **kwargs) -> ContextManager[InferenceSessi...
    method generate (line 84) | def generate(
    method _fix_generate_kwargs (line 152) | def _fix_generate_kwargs(kwargs: dict):
    method _reorder_cache (line 163) | def _reorder_cache(past_key_values: RemotePastKeyValues, beam_idx: tor...

FILE: src/petals/client/remote_sequential.py
  class RemoteSequential (line 20) | class RemoteSequential(nn.Module):
    method __init__ (line 25) | def __init__(
    method forward (line 52) | def forward(self, inputs: torch.Tensor, prompts: Optional[torch.Tensor...
    method active_session (line 61) | def active_session(self) -> Optional[InferenceSession]:
    method position (line 70) | def position(self) -> int:
    method use_session (line 76) | def use_session(self, session: Optional[InferenceSession]) -> Inferenc...
    method inference_session (line 86) | def inference_session(self, **kwargs) -> InferenceSession:
    method __getitem__ (line 97) | def __getitem__(self, ix: Union[int, slice]) -> RemoteSequential:
    method __iter__ (line 103) | def __iter__(self):
    method __len__ (line 107) | def __len__(self):
    method extra_repr (line 110) | def extra_repr(self) -> str:

FILE: src/petals/client/routing/sequence_info.py
  class RemoteSequenceInfo (line 14) | class RemoteSequenceInfo:
    method make_empty (line 31) | def make_empty(cls, block_uids: Iterable[ModuleUID]) -> "RemoteSequenc...
    method __getitem__ (line 37) | def __getitem__(self, ix: slice):
    method __len__ (line 45) | def __len__(self):
    method update_ (line 48) | def update_(self, new_block_infos: List[RemoteModuleInfo]):
    method _sort_spans (line 58) | def _sort_spans(block_infos: List[RemoteModuleInfo]):

FILE: src/petals/client/routing/sequence_manager.py
  class SequenceManagerConfig (line 34) | class SequenceManagerConfig(ClientConfig):
    method __init__ (line 35) | def __init__(self, *args, **kwargs):
  class SequenceManagerState (line 46) | class SequenceManagerState:
    method __getitem__ (line 52) | def __getitem__(self, ix: Union[int, slice]) -> SequenceManagerState:
    method __len__ (line 55) | def __len__(self) -> int:
  class RemoteSequenceManager (line 59) | class RemoteSequenceManager:
    method __init__ (line 71) | def __init__(
    method _peer_ids_to_set (line 122) | def _peer_ids_to_set(peer_ids: Optional[Sequence[Union[PeerID, str]]])...
    method make_sequence (line 138) | def make_sequence(
    method _make_sequence_with_min_latency (line 177) | def _make_sequence_with_min_latency(
    method _build_inference_graph (line 217) | def _build_inference_graph(
    method _rtt_to_delay (line 281) | def _rtt_to_delay(
    method _has_cache_for (line 292) | def _has_cache_for(span: RemoteSpanInfo, cache_tokens_needed: Optional...
    method _make_sequence_with_max_throughput (line 302) | def _make_sequence_with_max_throughput(self, start_index: int, end_ind...
    method __getitem__ (line 326) | def __getitem__(self, ix: Union[int, slice]) -> RemoteSequenceManager:
    method update (line 333) | def update(self, *, wait: bool):
    method _update (line 340) | def _update(self):
    method on_request_failure (line 388) | def on_request_failure(self, peer_id: Optional[PeerID]):
    method on_request_success (line 403) | def on_request_success(self, peer_id: PeerID):
    method __len__ (line 407) | def __len__(self):
    method is_alive (line 411) | def is_alive(self):
    method ready (line 415) | def ready(self) -> threading.Event:
    method block_uids (line 419) | def block_uids(self):
    method rpc_info (line 423) | def rpc_info(self):
    method get_retry_delay (line 468) | def get_retry_delay(self, attempt_no: int) -> float:
    method get_request_metadata (line 473) | def get_request_metadata(
    method shutdown (line 489) | def shutdown(self):
  class _SequenceManagerUpdateThread (line 493) | class _SequenceManagerUpdateThread(threading.Thread):
    method __init__ (line 494) | def __init__(self, update_period: float, ref_update_manager: WeakMethod):
    method run (line 502) | def run(self) -> None:
    method shutdown (line 521) | def shutdown(self, timeout: Optional[float] = None):
    method __del__ (line 527) | def __del__(self):
  function maybe_log_traceback (line 531) | def maybe_log_traceback(exc: Exception):
  class MissingBlocksError (line 536) | class MissingBlocksError(RuntimeError):
    method __init__ (line 537) | def __init__(self, block_indices: Union[int, Sequence[int]]):

FILE: src/petals/client/routing/spending_policy.py
  class SpendingPolicyBase (line 9) | class SpendingPolicyBase(ABC):
    method get_points (line 11) | def get_points(self, protocol: str, *args, **kwargs) -> float:
  class NoSpendingPolicy (line 15) | class NoSpendingPolicy(SpendingPolicyBase):
    method get_points (line 16) | def get_points(self, protocol: str, *args, **kwargs) -> float:

FILE: src/petals/client/sequential_autograd.py
  function sequential_forward (line 26) | async def sequential_forward(
  function sequential_backward (line 113) | async def sequential_backward(
  function _gather_forward (line 199) | async def _gather_forward(input_batches, prompt_batches, sequence_manager):
  function _gather_backward (line 209) | async def _gather_backward(
  class _RemoteSequentialAutogradFunction (line 223) | class _RemoteSequentialAutogradFunction(torch.autograd.Function):
    method forward (line 230) | def forward(ctx, inputs: torch.Tensor, prompts: torch.Tensor, sequence...
    method backward (line 253) | def backward(ctx, grad_outputs: torch.Tensor):

FILE: src/petals/data_structures.py
  function parse_uid (line 14) | def parse_uid(uid: ModuleUID) -> Tuple[str, int]:
  class ModelInfo (line 21) | class ModelInfo:
    method to_dict (line 25) | def to_dict(self) -> dict:
    method from_dict (line 29) | def from_dict(cls, source: dict):
  class ServerState (line 33) | class ServerState(Enum):
  class ServerInfo (line 43) | class ServerInfo:
    method to_tuple (line 64) | def to_tuple(self) -> Tuple[int, float, dict]:
    method from_tuple (line 70) | def from_tuple(cls, source: tuple):
  class RemoteModuleInfo (line 78) | class RemoteModuleInfo:
  class RemoteSpanInfo (line 86) | class RemoteSpanInfo:
    method length (line 95) | def length(self) -> int:
    method state (line 99) | def state(self) -> ServerState:
    method throughput (line 103) | def throughput(self) -> float:
  class InferenceMetadata (line 113) | class InferenceMetadata:

FILE: src/petals/models/bloom/block.py
  class WrappedBloomBlock (line 15) | class WrappedBloomBlock(BloomBlock):
    method forward (line 16) | def forward(

FILE: src/petals/models/bloom/config.py
  class DistributedBloomConfig (line 16) | class DistributedBloomConfig(BloomConfig, ClientConfig, PTuneConfig, LMH...
    method from_pretrained (line 24) | def from_pretrained(

FILE: src/petals/models/bloom/model.py
  class DistributedBloomModel (line 21) | class DistributedBloomModel(FromPretrainedMixin, PTuneMixin, BloomModel):
    method __init__ (line 29) | def __init__(self, config: DistributedBloomConfig, *, dht: Optional[hi...
    method forward (line 40) | def forward(
  class DistributedBloomForCausalLM (line 111) | class DistributedBloomForCausalLM(FromPretrainedMixin, RemoteGenerationM...
    method __init__ (line 119) | def __init__(self, config: DistributedBloomConfig):
    method prepare_inputs_for_generation (line 127) | def prepare_inputs_for_generation(
    method _temporary_reorder_cache (line 176) | def _temporary_reorder_cache(self, past_key_values, beam_idx):
    method get_output_embeddings (line 179) | def get_output_embeddings(self):
  class DistributedBloomForSequenceClassification (line 183) | class DistributedBloomForSequenceClassification(FromPretrainedMixin, Blo...
    method __init__ (line 189) | def __init__(self, config: DistributedBloomConfig):

FILE: src/petals/models/falcon/block.py
  function apply_rotary (line 30) | def apply_rotary(query, key, cos, sin):
  class OptimizedFalconRotaryEmbedding (line 34) | class OptimizedFalconRotaryEmbedding(nn.Module):
    method __init__ (line 35) | def __init__(self, head_dim: int, base=10000):
    method _optimized_apply_rotary (line 46) | def _optimized_apply_rotary(self, query, key, cos, sin):
    method cos_sin (line 67) | def cos_sin(self, seq_len: int, past_key_values_length: int, device="c...
    method forward (line 91) | def forward(self, query, key, past_key_values_length=0):
  function split_heads (line 100) | def split_heads(
  class OptimizedFalconAttention (line 113) | class OptimizedFalconAttention(FalconAttention):
    method __init__ (line 114) | def __init__(self, config: FalconConfig):
    method _optimized_split_heads (line 155) | def _optimized_split_heads(self, fused_qkv):
    method forward (line 174) | def forward(
  class OptimizedFalconDecoderLayer (line 286) | class OptimizedFalconDecoderLayer(FalconDecoderLayer):
    method __init__ (line 287) | def __init__(self, config: FalconConfig):
    method _optimized_apply_ln (line 317) | def _optimized_apply_ln(self, hidden_states):
    method forward (line 339) | def forward(
  class WrappedFalconBlock (line 398) | class WrappedFalconBlock(OptimizedFalconDecoderLayer):
    method forward (line 399) | def forward(
    method _reorder_cache_from_bloom_to_falcon (line 440) | def _reorder_cache_from_bloom_to_falcon(self, key_value: KVCache) -> K...
    method _reorder_cache_from_falcon_to_bloom (line 452) | def _reorder_cache_from_falcon_to_bloom(self, key_value: KVCache) -> K...
    method _expand_states (line 464) | def _expand_states(self, state: torch.Tensor) -> torch.Tensor:
    method _collapse_states (line 473) | def _collapse_states(self, state: torch.Tensor) -> torch.Tensor:

FILE: src/petals/models/falcon/config.py
  class DistributedFalconConfig (line 17) | class DistributedFalconConfig(DefaultRevisionMixin, FalconConfig, Client...
    method num_key_value_groups (line 23) | def num_key_value_groups(self) -> int:
    method from_pretrained (line 31) | def from_pretrained(

FILE: src/petals/models/falcon/model.py
  class DistributedFalconModel (line 26) | class DistributedFalconModel(DefaultRevisionMixin, FromPretrainedMixin, ...
    method __init__ (line 34) | def __init__(self, config: DistributedFalconConfig, *, dht: Optional[h...
    method forward (line 45) | def forward(
    method word_embeddings_layernorm (line 116) | def word_embeddings_layernorm(self) -> nn.Module:  # For compatibility...
  class DistributedFalconForCausalLM (line 120) | class DistributedFalconForCausalLM(DefaultRevisionMixin, FromPretrainedM...
    method __init__ (line 126) | def __init__(self, config: DistributedFalconConfig):
    method get_output_embeddings (line 134) | def get_output_embeddings(self):
  class DistributedFalconForSequenceClassification (line 138) | class DistributedFalconForSequenceClassification(
    method __init__ (line 146) | def __init__(self, config: DistributedFalconConfig):

FILE: src/petals/models/llama/block.py
  function apply_rotary_pos_emb (line 26) | def apply_rotary_pos_emb(q, k, cos, sin):
  class OptimizedLlamaAttention (line 32) | class OptimizedLlamaAttention(LlamaAttention):
    method __init__ (line 33) | def __init__(self, *args, **kwargs):
    method _optimized_apply_rotary (line 37) | def _optimized_apply_rotary(self, query_states, key_states, cos, sin):
    method forward (line 44) | def forward(
  class OptimizedLlamaDecoderLayer (line 130) | class OptimizedLlamaDecoderLayer(LlamaDecoderLayer):
    method __init__ (line 131) | def __init__(self, config: LlamaConfig):
    method _optimized_input_layernorm (line 143) | def _optimized_input_layernorm(self, hidden_states):
    method _optimized_output_layernorm (line 150) | def _optimized_output_layernorm(self, hidden_states):
    method forward (line 157) | def forward(
  class WrappedLlamaBlock (line 225) | class WrappedLlamaBlock(OptimizedLlamaDecoderLayer):
    method forward (line 226) | def forward(
    method _reorder_cache_from_bloom_to_llama (line 280) | def _reorder_cache_from_bloom_to_llama(
    method _reorder_cache_from_llama_to_bloom (line 291) | def _reorder_cache_from_llama_to_bloom(

FILE: src/petals/models/llama/config.py
  class DistributedLlamaConfig (line 16) | class DistributedLlamaConfig(LlamaConfig, ClientConfig, PTuneConfig, LMH...
    method num_key_value_groups (line 22) | def num_key_value_groups(self):
    method from_pretrained (line 26) | def from_pretrained(

FILE: src/petals/models/llama/model.py
  class DistributedLlamaModel (line 20) | class DistributedLlamaModel(FromPretrainedMixin, PTuneMixin, LlamaModel):
    method __init__ (line 28) | def __init__(self, config: DistributedLlamaConfig, *, dht: Optional[hi...
    method forward (line 39) | def forward(
    method word_embeddings (line 116) | def word_embeddings(self) -> nn.Embedding:  # For compatibility with R...
    method word_embeddings_layernorm (line 120) | def word_embeddings_layernorm(self) -> nn.Module:  # For compatibility...
    method h (line 124) | def h(self) -> RemoteSequential:  # For compatibility with RemoteGener...
    method ln_f (line 128) | def ln_f(self) -> nn.Module:  # For compatibility with RemoteGeneratio...
  class DistributedLlamaForCausalLM (line 132) | class DistributedLlamaForCausalLM(FromPretrainedMixin, RemoteGenerationM...
    method __init__ (line 138) | def __init__(self, config: DistributedLlamaConfig):
    method get_output_embeddings (line 148) | def get_output_embeddings(self):
    method transformer (line 152) | def transformer(self) -> DistributedLlamaModel:  # For compatibility w...
  class DistributedLlamaForSequenceClassification (line 156) | class DistributedLlamaForSequenceClassification(FromPretrainedMixin, Lla...
    method __init__ (line 162) | def __init__(self, config):
    method transformer (line 173) | def transformer(self) -> DistributedLlamaModel:  # For compatibility w...

FILE: src/petals/models/llama/speculative_model.py
  class DistributedLlamaForSpeculativeGeneration (line 13) | class DistributedLlamaForSpeculativeGeneration(DistributedLlamaForCausal...
    method __init__ (line 14) | def __init__(self, config: DistributedLlamaConfig, small_model: LlamaF...
    method _sample (line 18) | def _sample(

FILE: src/petals/models/mixtral/block.py
  class WrappedMixtralBlock (line 13) | class WrappedMixtralBlock(MixtralDecoderLayer):
    method __init__ (line 14) | def __init__(self, config: MixtralConfig, layer_idx: int):
    method forward (line 21) | def forward(
    method _reorder_cache_from_bloom (line 91) | def _reorder_cache_from_bloom(
    method _reorder_cache_to_bloom (line 103) | def _reorder_cache_to_bloom(

FILE: src/petals/models/mixtral/config.py
  class DistributedMixtralConfig (line 16) | class DistributedMixtralConfig(MixtralConfig, ClientConfig, PTuneConfig,...
    method from_pretrained (line 24) | def from_pretrained(

FILE: src/petals/models/mixtral/model.py
  class DistributedMixtralModel (line 26) | class DistributedMixtralModel(DefaultRevisionMixin, FromPretrainedMixin,...
    method __init__ (line 34) | def __init__(self, config: DistributedMixtralConfig, *, dht: Optional[...
    method forward (line 45) | def forward(
    method word_embeddings (line 125) | def word_embeddings(self) -> nn.Embedding:  # For compatibility with R...
    method word_embeddings_layernorm (line 129) | def word_embeddings_layernorm(self) -> nn.Module:  # For compatibility...
    method h (line 133) | def h(self) -> RemoteSequential:  # For compatibility with RemoteGener...
    method ln_f (line 137) | def ln_f(self) -> nn.Module:  # For compatibility with RemoteGeneratio...
  class DistributedMixtralForCausalLM (line 141) | class DistributedMixtralForCausalLM(FromPretrainedMixin, RemoteGeneratio...
    method __init__ (line 147) | def __init__(self, config: DistributedMixtralConfig):
    method get_output_embeddings (line 155) | def get_output_embeddings(self):
    method transformer (line 159) | def transformer(self) -> DistributedMixtralModel:  # For compatibility...
  class DistributedMixtralForSequenceClassification (line 163) | class DistributedMixtralForSequenceClassification(FromPretrainedMixin, M...
    method __init__ (line 169) | def __init__(self, config: DistributedMixtralConfig):
    method transformer (line 180) | def transformer(self) -> DistributedMixtralModel:  # For compatibility...

FILE: src/petals/server/backend.py
  class TransformerBackend (line 24) | class TransformerBackend(ModuleBackend):
    method __init__ (line 29) | def __init__(
    method get_inference_cache_descriptors (line 88) | def get_inference_cache_descriptors(self, batch_size: int, max_length:...
    method forward (line 101) | def forward(self, *inputs: Union[torch.Tensor, str]) -> Tuple[torch.Te...
    method backward (line 106) | def backward(self, *inputs: Union[torch.Tensor, str]) -> Tuple[torch.T...
    method inference_step (line 112) | def inference_step(
    method _estimate_max_chunk_length (line 146) | def _estimate_max_chunk_length(self, hidden_states: torch.Tensor, infe...
    method _reorder_cache_inplace (line 154) | def _reorder_cache_inplace(self, cache_tensors: torch.Tensor, hypo_ids...
    method _select_layer_past (line 160) | def _select_layer_past(self, cache_tensors: Sequence[torch.Tensor], pr...
    method _update_cache_inplace (line 171) | def _update_cache_inplace(
    method get_pools (line 183) | def get_pools(self) -> Sequence[PrioritizedTaskPool]:
    method get_info (line 186) | def get_info(self) -> Dict[str, Any]:
    method shutdown (line 190) | def shutdown(self):
  function merge_inference_pools_inplace (line 201) | def merge_inference_pools_inplace(backends: Dict[ExpertUID, TransformerB...
  class _MergedInferenceStep (line 216) | class _MergedInferenceStep:
    method __init__ (line 217) | def __init__(self, backends: Dict[ExpertUID, TransformerBackend]):
    method __call__ (line 221) | def __call__(

FILE: src/petals/server/block_functions.py
  function run_rpc_forward (line 32) | async def run_rpc_forward(
  function run_rpc_backward (line 84) | async def run_rpc_backward(
  function iterate_rpc_inference (line 144) | async def iterate_rpc_inference(

FILE: src/petals/server/block_selection.py
  function compute_throughputs (line 12) | def compute_throughputs(spans: Dict[PeerID, RemoteSpanInfo], *, total_bl...
  function _choose_best_start (line 23) | def _choose_best_start(throughputs: np.ndarray, num_blocks: int) -> int:
  function choose_best_blocks (line 28) | def choose_best_blocks(num_blocks: int, module_infos: List[RemoteModuleI...
  function _move_span (line 36) | def _move_span(span: RemoteSpanInfo, new_start: int):
  function should_choose_other_blocks (line 40) | def should_choose_other_blocks(

FILE: src/petals/server/block_utils.py
  function resolve_block_dtype (line 12) | def resolve_block_dtype(config: PretrainedConfig, dtype: Union[str, torc...
  function get_block_size (line 22) | def get_block_size(
  function get_model_block (line 56) | def get_model_block(config, layer_idx: int = 0):

FILE: src/petals/server/from_pretrained.py
  function load_pretrained_block (line 35) | def load_pretrained_block(
  function _load_state_dict_from_repo (line 81) | def _load_state_dict_from_repo(
  function _find_index_file (line 134) | def _find_index_file(
  function _load_state_dict_from_repo_file (line 162) | def _load_state_dict_from_repo_file(
  function _load_state_dict_from_local_file (line 216) | def _load_state_dict_from_local_file(path: str, *, block_prefix: Optiona...

FILE: src/petals/server/handler.py
  class Event (line 48) | class Event(Enum):
  class TransformerConnectionHandler (line 55) | class TransformerConnectionHandler(ConnectionHandler):
    method __init__ (line 60) | def __init__(
    method add_p2p_handlers (line 94) | async def add_p2p_handlers(self, *args, **kwargs) -> None:
    method shutdown (line 100) | def shutdown(self):
    method _gather_inputs (line 109) | async def _gather_inputs(
    method rpc_inference (line 132) | async def rpc_inference(
    method _managed_session (line 198) | def _managed_session(self, session_id: str):
    method _put_into_session_queue (line 214) | def _put_into_session_queue(self, session_id: str, request: runtime_pb...
    method _get_from_session_queue (line 223) | async def _get_from_session_queue(self, session_id: str) -> Optional[r...
    method _listen_to_event_queue (line 227) | async def _listen_to_event_queue(self):
    method _iterate_inference_steps (line 247) | async def _iterate_inference_steps(
    method rpc_push (line 310) | async def rpc_push(self, request: runtime_pb2.ExpertRequest, context: ...
    method _push_outputs (line 320) | async def _push_outputs(
    method rpc_forward (line 352) | async def rpc_forward(self, request: runtime_pb2.ExpertRequest, contex...
    method rpc_forward_stream (line 380) | async def rpc_forward_stream(
    method _serialize_outputs (line 411) | def _serialize_outputs(
    method rpc_backward (line 434) | async def rpc_backward(self, request: runtime_pb2.ExpertRequest, conte...
    method rpc_backward_stream (line 461) | async def rpc_backward_stream(
    method _get_active_adapter (line 490) | def _get_active_adapter(self, metadata: dict) -> str:
    method _serialize_grads (line 496) | def _serialize_grads(
    method _check_uids (line 522) | def _check_uids(self, uids: str) -> Tuple[ModuleUID, ...]:
    method _allocate_cache (line 533) | async def _allocate_cache(
    method _log_request (line 549) | def _log_request(
    method rpc_info (line 575) | async def rpc_info(self, request: runtime_pb2.ExpertUID, context: P2PC...

FILE: src/petals/server/memory_cache.py
  class MemoryCache (line 26) | class MemoryCache:
    method __init__ (line 29) | def __init__(self, max_size_bytes: Optional[int], max_alloc_timeout: O...
    method current_size_bytes (line 44) | def current_size_bytes(self) -> int:
    method current_size_bytes (line 48) | def current_size_bytes(self, value: int):
    method enqueued_size_bytes (line 52) | def enqueued_size_bytes(self) -> int:
    method enqueued_size_bytes (line 56) | def enqueued_size_bytes(self, value: int):
    method bytes_left (line 60) | def bytes_left(self) -> int:
    method handle_counter (line 64) | def handle_counter(self) -> int:
    method handle_counter (line 68) | def handle_counter(self, value: int):
    method allocate_cache (line 72) | async def allocate_cache(
    method get_allocation_size (line 110) | def get_allocation_size(*descriptors: TensorDescriptor) -> int:
    method _schedule_alloc (line 118) | async def _schedule_alloc(
    method _wait_for_free_memory (line 137) | async def _wait_for_free_memory(self, alloc_size: int, timeout: Option...
    method _free (line 169) | def _free(self, alloc_size: int, alloc_task: asyncio.Task):
    method _wait_until_available (line 179) | def _wait_until_available(self, allocated_size: int, timeout: Optional...
    method use_cache (line 196) | def use_cache(self, *handles: Handle) -> Sequence[torch.Tensor]:
  class AllocationFailed (line 224) | class AllocationFailed(Exception):

FILE: src/petals/server/reachability.py
  function validate_reachability (line 22) | def validate_reachability(peer_id, wait_time: float = 7 * 60, retry_dela...
  function check_direct_reachability (line 55) | def check_direct_reachability(max_peers: int = 5, threshold: float = 0.5...
  class ReachabilityProtocol (line 86) | class ReachabilityProtocol(ServicerBase):
    method __init__ (line 89) | def __init__(self, *, probe: Optional[P2P] = None, wait_timeout: float...
    method call_check (line 94) | async def call_check(self, remote_peer: PeerID, *, check_peer: PeerID)...
    method rpc_check (line 106) | async def rpc_check(self, request: dht_pb2.PingRequest, context: P2PCo...
    method serve (line 119) | async def serve(self, p2p: P2P):
    method attach_to_dht (line 127) | def attach_to_dht(cls, dht: DHT, await_ready: bool = False, **kwargs) ...
    method shutdown (line 162) | def shutdown(self):

FILE: src/petals/server/server.py
  class Server (line 46) | class Server:
    method __init__ (line 52) | def __init__(
    method _choose_num_blocks (line 275) | def _choose_num_blocks(self) -> int:
    method run (line 328) | def run(self):
    method _clean_memory_and_fds (line 386) | def _clean_memory_and_fds(self):
    method _choose_blocks (line 403) | def _choose_blocks(self) -> List[int]:
    method _should_choose_other_blocks (line 413) | def _should_choose_other_blocks(self) -> bool:
    method shutdown (line 420) | def shutdown(self, timeout: Optional[float] = 5):
  class ModuleContainer (line 431) | class ModuleContainer(threading.Thread):
    method create (line 436) | def create(
    method __init__ (line 557) | def __init__(
    method run (line 607) | def run(self):
    method run_in_background (line 617) | def run_in_background(self, await_ready=True, timeout=None):
    method ready (line 627) | def ready(self) -> mp.synchronize.Event:
    method is_healthy (line 639) | def is_healthy(self) -> bool:
    method shutdown (line 644) | def shutdown(self):
  class ModuleAnnouncerThread (line 674) | class ModuleAnnouncerThread(threading.Thread):
    method __init__ (line 677) | def __init__(
    method run (line 717) | def run(self) -> None:
    method announce (line 754) | def announce(self, state: ServerState) -> None:
    method _ping_next_servers (line 760) | def _ping_next_servers(self) -> Dict[hivemind.PeerID, float]:
  class RuntimeWithDeduplicatedPools (line 770) | class RuntimeWithDeduplicatedPools(Runtime):
    method __init__ (line 773) | def __init__(self, *args, **kwargs):

FILE: src/petals/server/task_pool.py
  class Task (line 18) | class Task:
    method uid (line 25) | def uid(self) -> int:
  class PrioritizedTaskPool (line 29) | class PrioritizedTaskPool(threading.Thread):
    method __init__ (line 49) | def __init__(
    method run (line 78) | def run(self):
    method terminate (line 88) | def terminate(self):
    method shutdown (line 92) | def shutdown(self):
    method submit_task (line 95) | def submit_task(self, *args: Any, priority: float = 0.0) -> MPFuture:
    method get_task_size (line 113) | def get_task_size(self, task: Task) -> int:
    method load_batch_to_runtime (line 119) | def load_batch_to_runtime(
    method send_outputs_from_runtime (line 133) | def send_outputs_from_runtime(self, uid: int, batch_outputs: List[torc...
    method send_exception_from_runtime (line 144) | def send_exception_from_runtime(self, uid: int, exception: BaseExcepti...
    method empty (line 155) | def empty(self):
    method priority (line 159) | def priority(self) -> Tuple[float, float]:
    method priority (line 164) | def priority(self, item: Tuple[float, float]):
  function _move_to_device_if_tensor (line 170) | def _move_to_device_if_tensor(arg: Any, device: Union[torch.device, str]...

FILE: src/petals/server/task_prioritizer.py
  class TaskPrioritizerBase (line 6) | class TaskPrioritizerBase(ABC):
    method prioritize (line 10) | def prioritize(self, *input: torch.Tensor, points: float = 0.0, **kwar...
  class DummyTaskPrioritizer (line 15) | class DummyTaskPrioritizer(TaskPrioritizerBase):
    method prioritize (line 16) | def prioritize(self, *input: torch.Tensor, points: float = 0.0, **kwar...

FILE: src/petals/server/throughput.py
  function get_server_throughput (line 37) | def get_server_throughput(
  function measure_throughput_info (line 111) | def measure_throughput_info(
  function measure_network_rps (line 147) | def measure_network_rps(
  function _measure_bits_per_second (line 178) | def _measure_bits_per_second(pipe_send: mp.Pipe):
  function measure_compute_rps (line 190) | def measure_compute_rps(
  function synchronize (line 240) | def synchronize(device: torch.device):
  function get_device_name (line 247) | def get_device_name(device: torch.device) -> str:
  function get_dtype_name (line 251) | def get_dtype_name(dtype: torch.dtype, quant_type: QuantType) -> str:

FILE: src/petals/utils/asyncio.py
  function shield_and_wait (line 4) | async def shield_and_wait(task):

FILE: src/petals/utils/auto_config.py
  class _ModelClasses (line 14) | class _ModelClasses:
  function register_model_classes (line 25) | def register_model_classes(*, config: Type[PretrainedConfig], **kwargs):
  class _AutoDistributedBase (line 32) | class _AutoDistributedBase:
    method from_pretrained (line 36) | def from_pretrained(cls, model_name_or_path: Union[str, os.PathLike, N...
  class DefaultRevisionMixin (line 55) | class DefaultRevisionMixin:
    method from_pretrained (line 73) | def from_pretrained(
  class AutoDistributedConfig (line 82) | class AutoDistributedConfig(DefaultRevisionMixin, _AutoDistributedBase):
  class AutoDistributedModel (line 86) | class AutoDistributedModel(DefaultRevisionMixin, _AutoDistributedBase):
  class AutoDistributedModelForCausalLM (line 90) | class AutoDistributedModelForCausalLM(DefaultRevisionMixin, _AutoDistrib...
  class AutoDistributedSpeculativeModel (line 94) | class AutoDistributedSpeculativeModel(DefaultRevisionMixin, _AutoDistrib...
  class AutoDistributedModelForSequenceClassification (line 98) | class AutoDistributedModelForSequenceClassification(DefaultRevisionMixin...

FILE: src/petals/utils/convert_block.py
  class QuantType (line 19) | class QuantType(Enum):
  function convert_block (line 25) | def convert_block(
  function quantize_module (line 76) | def quantize_module(model: nn.Module, *, quant_type: QuantType) -> nn.Mo...
  function make_tensor_parallel (line 118) | def make_tensor_parallel(
  function check_device_balance (line 138) | def check_device_balance(devices: Sequence[torch.device]):

FILE: src/petals/utils/cuda_graphs.py
  function make_inference_graphed_callable (line 5) | def make_inference_graphed_callable(callable: callable, sample_args, num...

FILE: src/petals/utils/dht.py
  function declare_active_modules (line 28) | def declare_active_modules(
  function _declare_active_modules (line 57) | async def _declare_active_modules(
  function get_remote_module_infos (line 74) | def get_remote_module_infos(
  function _get_remote_module_infos (line 95) | async def _get_remote_module_infos(
  function compute_spans (line 134) | def compute_spans(module_infos: List[RemoteModuleInfo], *, min_state: Se...

FILE: src/petals/utils/disk_cache.py
  function _blocks_lock (line 19) | def _blocks_lock(cache_dir: Optional[str], mode: int):
  function allow_cache_reads (line 31) | def allow_cache_reads(cache_dir: Optional[str]):
  function allow_cache_writes (line 36) | def allow_cache_writes(cache_dir: Optional[str]):
  function free_disk_space_for (line 41) | def free_disk_space_for(

FILE: src/petals/utils/hf_auth.py
  function always_needs_auth (line 5) | def always_needs_auth(model_name: Union[str, os.PathLike, None]) -> bool:

FILE: src/petals/utils/logging.py
  function initialize_logs (line 6) | def initialize_logs():

FILE: src/petals/utils/misc.py
  function is_dummy (line 10) | def is_dummy(tensor: torch.Tensor) -> bool:
  function get_size_in_bytes (line 17) | def get_size_in_bytes(dtype: torch.dtype) -> int:
  function docstring_from (line 24) | def docstring_from(source):

FILE: src/petals/utils/packaging.py
  function _mark_masked_tensor (line 9) | def _mark_masked_tensor(index: int) -> bytes:
  function _is_masked_tensor (line 13) | def _is_masked_tensor(item: Any) -> bool:
  function _get_tensor_index (line 17) | def _get_tensor_index(item: bytes) -> int:
  function pack_args_kwargs (line 21) | def pack_args_kwargs(*args, **kwargs) -> Tuple[List[torch.Tensor], Any]:
  function unpack_args_kwargs (line 38) | def unpack_args_kwargs(flat_tensors: List[torch.Tensor], args_structure:...

FILE: src/petals/utils/peft.py
  function check_peft_repository (line 31) | def check_peft_repository(repo_id: str) -> bool:
  function load_specific_module (line 35) | def load_specific_module(block_idx: int, filepath: str, framework: str =...
  function get_adapter_from_repo (line 51) | def get_adapter_from_repo(
  function load_peft (line 72) | def load_peft(
  class AdapterContextMixin (line 132) | class AdapterContextMixin:
    method using_adapter (line 140) | def using_adapter(active_adapter: Optional[str]):
    method active_adapter (line 148) | def active_adapter(self):
    method active_adapter (line 154) | def active_adapter(self, value: Optional[str]):
    method active_adapters (line 158) | def active_adapters(self):
    method set_adapter (line 161) | def set_adapter(self, adapter_names) -> None:
  class LoraLinear (line 173) | class LoraLinear(AdapterContextMixin, lora.Linear):
    method __init__ (line 176) | def __init__(self, base_layer, adapter_name: str):
  class LoraLinear8bitLt (line 184) | class LoraLinear8bitLt(LoraLinear, lora.Linear8bitLt):
  class LoraLinear4bit (line 188) | class LoraLinear4bit(LoraLinear, lora.Linear4bit):
  function create_lora_adapter (line 192) | def create_lora_adapter(block):
  function add_adapter_to_block (line 212) | def add_adapter_to_block(block, block_index, adapter_name, peft_config, ...
  function estimate_adapter_memory_per_block (line 263) | def estimate_adapter_memory_per_block(

FILE: src/petals/utils/ping.py
  function ping (line 15) | async def ping(
  function ping_parallel (line 35) | async def ping_parallel(peer_ids: Sequence[hivemind.PeerID], *args, **kw...
  class PingAggregator (line 40) | class PingAggregator:
    method __init__ (line 41) | def __init__(self, dht: hivemind.DHT, *, ema_alpha: float = 0.2, expir...
    method ping (line 48) | def ping(self, peer_ids: Sequence[hivemind.PeerID], **kwargs) -> None:
    method to_dict (line 60) | def to_dict(self) -> Dict[hivemind.PeerID, float]:

FILE: src/petals/utils/random.py
  function sample_up_to (line 7) | def sample_up_to(population: Collection[T], k: int) -> T:

FILE: src/petals/utils/version.py
  function validate_version (line 14) | def validate_version() -> None:
  function get_compatible_model_repo (line 33) | def get_compatible_model_repo(model_name_or_path: Union[str, os.PathLike...

FILE: tests/conftest.py
  function event_loop (line 15) | def event_loop():
  function cleanup_children (line 31) | def cleanup_children():

FILE: tests/test_aux_functions.py
  function test_bnb_not_imported_when_unnecessary (line 16) | def test_bnb_not_imported_when_unnecessary():
  function test_compute_throughput (line 33) | def test_compute_throughput(inference: bool, n_tokens: int, tensor_paral...
  function test_pack_inputs (line 53) | def test_pack_inputs():

FILE: tests/test_block_exact_match.py
  function test_remote_block_exact_match (line 13) | def test_remote_block_exact_match(atol_forward=1e-4, atol_inference=1e-3):

FILE: tests/test_cache.py
  function _make_tensor_descriptor (line 16) | def _make_tensor_descriptor(num_bytes: int, dtype: Optional[torch.dtype]...
  function test_cache_timeout (line 25) | async def test_cache_timeout():
  function test_unlimited_timeout (line 76) | async def test_unlimited_timeout():
  function test_cache_usage (line 93) | async def test_cache_usage():

FILE: tests/test_chained_calls.py
  function test_forward_backward_exact_match (line 18) | def test_forward_backward_exact_match(atol_forward=1e-4, atol_backward=1...
  function test_chained_inference_exact_match (line 46) | def test_chained_inference_exact_match(atol_inference=1e-4):

FILE: tests/test_dtype.py
  function test_block_dtype (line 12) | def test_block_dtype(torch_dtype):

FILE: tests/test_full_model.py
  function tokenizer (line 14) | def tokenizer():
  function model (line 20) | def model():
  function ref_model (line 27) | def ref_model():
  function test_full_model_exact_match (line 36) | def test_full_model_exact_match(tokenizer, model, ref_model, use_peft, p...
  function make_generate_calls (line 80) | def make_generate_calls(model, inputs, *, max_new_tokens, multiple_calls...
  function test_greedy_generation (line 97) | def test_greedy_generation(tokenizer, model, ref_model, max_new_tokens=4):
  function test_sampling (line 117) | def test_sampling(tokenizer, model, ref_model, max_new_tokens=10):
  function test_beam_search_generation (line 149) | def test_beam_search_generation(tokenizer, model, ref_model, max_new_tok...
  function test_input_ids (line 159) | def test_input_ids(tokenizer, model, ref_model, max_new_tokens=4):

FILE: tests/test_optimized_layers.py
  class UnoptimizedWrappedFalconBlock (line 18) | class UnoptimizedWrappedFalconBlock(FalconDecoderLayer):
    method forward (line 19) | def forward(
    method _reorder_cache_from_bloom_to_falcon (line 58) | def _reorder_cache_from_bloom_to_falcon(self, key_value: KVCache) -> K...
    method _reorder_cache_from_falcon_to_bloom (line 70) | def _reorder_cache_from_falcon_to_bloom(self, key_value: KVCache) -> K...
    method _expand_states (line 82) | def _expand_states(self, state: torch.Tensor) -> torch.Tensor:
    method _collapse_states (line 91) | def _collapse_states(self, state: torch.Tensor) -> torch.Tensor:
  class UnoptimizedWrappedLlamaBlock (line 101) | class UnoptimizedWrappedLlamaBlock(LlamaDecoderLayer):
    method forward (line 102) | def forward(
    method _reorder_cache_from_bloom_to_llama (line 163) | def _reorder_cache_from_bloom_to_llama(
    method _reorder_cache_from_llama_to_bloom (line 175) | def _reorder_cache_from_llama_to_bloom(
  function test_optimized_block (line 189) | def test_optimized_block(device):

FILE: tests/test_peft.py
  function clear_dir (line 14) | def clear_dir(path_to_dir):
  function dir_empty (line 19) | def dir_empty(path_to_dir):
  function test_check_peft (line 25) | def test_check_peft():
  function test_load_noncached (line 31) | def test_load_noncached(tmpdir):
  function test_load_cached (line 44) | def test_load_cached(tmpdir):
  function test_load_layer_exists (line 52) | def test_load_layer_exists(tmpdir):
  function test_load_layer_nonexists (line 59) | def test_load_layer_nonexists(tmpdir):

FILE: tests/test_priority_pool.py
  function _submit_tasks (line 12) | def _submit_tasks(runtime_ready, pools, results_valid):
  function test_priority_pools (line 33) | def test_priority_pools():

FILE: tests/test_remote_sequential.py
  function test_remote_sequential (line 17) | def test_remote_sequential():
  class DummyCustomSequenceManager (line 65) | class DummyCustomSequenceManager(RemoteSequenceManager):
    method rpc_info (line 69) | def rpc_info(self):
    method get_request_metadata (line 76) | def get_request_metadata(self, protocol: str, *args, **kwargs):
  function test_remote_sequential_prompts (line 89) | def test_remote_sequential_prompts(batch_size=2, seq_len=5, pre_seq_len=3):

FILE: tests/test_sequence_manager.py
  function test_sequence_manager_basics (line 18) | def test_sequence_manager_basics(mode: str):
  class RemoteSequenceManagerWithChecks (line 46) | class RemoteSequenceManagerWithChecks(RemoteSequenceManager):
    method __init__ (line 49) | def __init__(self, *args, _was_shut_down: threading.Event, **kwargs):
    method shutdown (line 53) | def shutdown(self):

FILE: tests/test_server_stats.py
  function test_server_info (line 13) | def test_server_info(block_from: int = 2, block_to: int = 5, max_length:...

FILE: tests/test_speculative_generation.py
  function test_remote_block_with_cache_invalidation_exact_match (line 19) | def test_remote_block_with_cache_invalidation_exact_match(atol_forward=1...
  function noisy_model (line 46) | def noisy_model():
  function model (line 58) | def model():
  function tokenizer (line 65) | def tokenizer():
  function test_remote_speculative_generation (line 75) | def test_remote_speculative_generation(tokenizer, model, noisy_model, at...

FILE: tests/test_tensor_parallel.py
  function test_tp_block (line 16) | def test_tp_block(devices, custom_config):