SYMBOL INDEX (511 symbols across 73 files) FILE: benchmarks/benchmark_forward.py function main (line 17) | def main(): function benchmark_forward (line 46) | def benchmark_forward(process_idx, args, result_pipe): FILE: benchmarks/benchmark_inference.py function main (line 18) | def main(): function benchmark_inference (line 45) | def benchmark_inference(process_idx, args, result_pipe): FILE: benchmarks/benchmark_training.py function main (line 17) | def main(): function benchmark_training (line 50) | def benchmark_training(process_idx, args, result_pipe): FILE: src/petals/__init__.py function _override_bfloat16_mode_default (line 29) | def _override_bfloat16_mode_default(): FILE: src/petals/cli/run_dht.py function report_status (line 24) | async def report_status(dht: DHT, node: DHTNode): function main (line 37) | def main(): FILE: src/petals/cli/run_server.py function main (line 19) | def main(): FILE: src/petals/client/config.py class ClientConfig (line 14) | class ClientConfig: FILE: src/petals/client/from_pretrained.py class FromPretrainedMixin (line 17) | class FromPretrainedMixin: method from_pretrained (line 19) | def from_pretrained( function ignore_keys (line 46) | def ignore_keys(patterns: List[str]): function patched_get_checkpoint_shard_files (line 54) | def patched_get_checkpoint_shard_files( FILE: src/petals/client/inference_session.py class _ServerInferenceSession (line 26) | class _ServerInferenceSession: method __init__ (line 33) | def __init__( method create (line 60) | async def create( method _read_inputs_from_queue (line 79) | async def _read_inputs_from_queue(queue: asyncio.Queue, input_timeout:... method position (line 87) | def position(self): method position (line 91) | def position(self, start_from_position: int): method step (line 97) | def step( method _collect_next_servers (line 174) | def _collect_next_servers(self) -> List[Tuple[str, str, int, int]]: method _step (line 184) | async def _step(self, inputs_serialized: runtime_pb2.ExpertRequest) ->... method close (line 190) | def close(self): method _aclose_stream (line 198) | async def _aclose_stream(self): method __del__ (line 209) | def __del__(self): method __enter__ (line 212) | def __enter__(self): method __exit__ (line 216) | def __exit__(self, *exc_details): class InferenceSession (line 220) | class InferenceSession: method __init__ (line 225) | def __init__(self, sequence_manager: RemoteSequenceManager, max_length... method num_blocks (line 235) | def num_blocks(self) -> int: method position (line 239) | def position(self) -> int: method position (line 243) | def position(self, start_from_position: int) -> None: method _enter_server_sessions (line 249) | def _enter_server_sessions(self, chosen_spans: List[RemoteSpanInfo]) -... method _exit_server_sessions (line 273) | def _exit_server_sessions(self, server_sessions: List[_ServerInference... method __enter__ (line 280) | def __enter__(self) -> "InferenceSession": method step (line 284) | def step( method _update_sequence (line 364) | def _update_sequence(self, server_idx: int, block_idx: int, attempt_no... method close (line 393) | def close(self, *exc_details): method __exit__ (line 400) | def __exit__(self, *exc_details): method __del__ (line 403) | def __del__(self): method last_token_id (line 407) | def last_token_id(self) -> Optional[torch.Tensor]: # Backward compati... method last_token_id (line 411) | def last_token_id(self, value: torch.Tensor): # Backward compatibilit... FILE: src/petals/client/lm_head.py class LMHeadConfig (line 16) | class LMHeadConfig: class LMHead (line 23) | class LMHead(nn.Module): method __init__ (line 24) | def __init__(self, config: PretrainedConfig): method forward (line 50) | def forward(self, hidden_states): method chunked_forward (line 63) | def chunked_forward(self, hidden_states): FILE: src/petals/client/ptune.py class PTuneConfig (line 16) | class PTuneConfig: class PTuneMixin (line 21) | class PTuneMixin: method init_prompts (line 24) | def init_prompts(self, config: PretrainedConfig) -> None: method get_prompt (line 43) | def get_prompt(self, batch_size): function force_non_empty_weights (line 69) | def force_non_empty_weights(): FILE: src/petals/client/remote_forward_backward.py function _forward_unary (line 21) | async def _forward_unary( function _backward_unary (line 31) | async def _backward_unary( function _forward_stream (line 41) | async def _forward_stream( function _backward_stream (line 54) | async def _backward_stream( function run_remote_forward (line 67) | async def run_remote_forward( function run_remote_backward (line 113) | async def run_remote_backward( FILE: src/petals/client/remote_generation.py class RemotePastKeyValues (line 20) | class RemotePastKeyValues(Cache): method __init__ (line 23) | def __init__(self) -> None: method __getitem__ (line 28) | def __getitem__(self, _index: int) -> List[torch.Tensor]: method get_seq_length (line 31) | def get_seq_length(self, layer_idx: Optional[int] = 0) -> int: method get_max_length (line 34) | def get_max_length(self) -> Optional[int]: method update_seen (line 37) | def update_seen(self, new_seen: int) -> None: method reorder_cache (line 40) | def reorder_cache(self, beam_idx): class _SkipTokensMixin (line 47) | class _SkipTokensMixin: method prepare_inputs_for_generation (line 50) | def prepare_inputs_for_generation(self, input_ids: torch.LongTensor, *... class RemoteGenerationMixin (line 56) | class RemoteGenerationMixin(_SkipTokensMixin): method active_session (line 72) | def active_session(self) -> Optional[InferenceSession]: method use_session (line 76) | def use_session(self, session: Optional[InferenceSession]) -> ContextM... method inference_session (line 80) | def inference_session(self, **kwargs) -> ContextManager[InferenceSessi... method generate (line 84) | def generate( method _fix_generate_kwargs (line 152) | def _fix_generate_kwargs(kwargs: dict): method _reorder_cache (line 163) | def _reorder_cache(past_key_values: RemotePastKeyValues, beam_idx: tor... FILE: src/petals/client/remote_sequential.py class RemoteSequential (line 20) | class RemoteSequential(nn.Module): method __init__ (line 25) | def __init__( method forward (line 52) | def forward(self, inputs: torch.Tensor, prompts: Optional[torch.Tensor... method active_session (line 61) | def active_session(self) -> Optional[InferenceSession]: method position (line 70) | def position(self) -> int: method use_session (line 76) | def use_session(self, session: Optional[InferenceSession]) -> Inferenc... method inference_session (line 86) | def inference_session(self, **kwargs) -> InferenceSession: method __getitem__ (line 97) | def __getitem__(self, ix: Union[int, slice]) -> RemoteSequential: method __iter__ (line 103) | def __iter__(self): method __len__ (line 107) | def __len__(self): method extra_repr (line 110) | def extra_repr(self) -> str: FILE: src/petals/client/routing/sequence_info.py class RemoteSequenceInfo (line 14) | class RemoteSequenceInfo: method make_empty (line 31) | def make_empty(cls, block_uids: Iterable[ModuleUID]) -> "RemoteSequenc... method __getitem__ (line 37) | def __getitem__(self, ix: slice): method __len__ (line 45) | def __len__(self): method update_ (line 48) | def update_(self, new_block_infos: List[RemoteModuleInfo]): method _sort_spans (line 58) | def _sort_spans(block_infos: List[RemoteModuleInfo]): FILE: src/petals/client/routing/sequence_manager.py class SequenceManagerConfig (line 34) | class SequenceManagerConfig(ClientConfig): method __init__ (line 35) | def __init__(self, *args, **kwargs): class SequenceManagerState (line 46) | class SequenceManagerState: method __getitem__ (line 52) | def __getitem__(self, ix: Union[int, slice]) -> SequenceManagerState: method __len__ (line 55) | def __len__(self) -> int: class RemoteSequenceManager (line 59) | class RemoteSequenceManager: method __init__ (line 71) | def __init__( method _peer_ids_to_set (line 122) | def _peer_ids_to_set(peer_ids: Optional[Sequence[Union[PeerID, str]]])... method make_sequence (line 138) | def make_sequence( method _make_sequence_with_min_latency (line 177) | def _make_sequence_with_min_latency( method _build_inference_graph (line 217) | def _build_inference_graph( method _rtt_to_delay (line 281) | def _rtt_to_delay( method _has_cache_for (line 292) | def _has_cache_for(span: RemoteSpanInfo, cache_tokens_needed: Optional... method _make_sequence_with_max_throughput (line 302) | def _make_sequence_with_max_throughput(self, start_index: int, end_ind... method __getitem__ (line 326) | def __getitem__(self, ix: Union[int, slice]) -> RemoteSequenceManager: method update (line 333) | def update(self, *, wait: bool): method _update (line 340) | def _update(self): method on_request_failure (line 388) | def on_request_failure(self, peer_id: Optional[PeerID]): method on_request_success (line 403) | def on_request_success(self, peer_id: PeerID): method __len__ (line 407) | def __len__(self): method is_alive (line 411) | def is_alive(self): method ready (line 415) | def ready(self) -> threading.Event: method block_uids (line 419) | def block_uids(self): method rpc_info (line 423) | def rpc_info(self): method get_retry_delay (line 468) | def get_retry_delay(self, attempt_no: int) -> float: method get_request_metadata (line 473) | def get_request_metadata( method shutdown (line 489) | def shutdown(self): class _SequenceManagerUpdateThread (line 493) | class _SequenceManagerUpdateThread(threading.Thread): method __init__ (line 494) | def __init__(self, update_period: float, ref_update_manager: WeakMethod): method run (line 502) | def run(self) -> None: method shutdown (line 521) | def shutdown(self, timeout: Optional[float] = None): method __del__ (line 527) | def __del__(self): function maybe_log_traceback (line 531) | def maybe_log_traceback(exc: Exception): class MissingBlocksError (line 536) | class MissingBlocksError(RuntimeError): method __init__ (line 537) | def __init__(self, block_indices: Union[int, Sequence[int]]): FILE: src/petals/client/routing/spending_policy.py class SpendingPolicyBase (line 9) | class SpendingPolicyBase(ABC): method get_points (line 11) | def get_points(self, protocol: str, *args, **kwargs) -> float: class NoSpendingPolicy (line 15) | class NoSpendingPolicy(SpendingPolicyBase): method get_points (line 16) | def get_points(self, protocol: str, *args, **kwargs) -> float: FILE: src/petals/client/sequential_autograd.py function sequential_forward (line 26) | async def sequential_forward( function sequential_backward (line 113) | async def sequential_backward( function _gather_forward (line 199) | async def _gather_forward(input_batches, prompt_batches, sequence_manager): function _gather_backward (line 209) | async def _gather_backward( class _RemoteSequentialAutogradFunction (line 223) | class _RemoteSequentialAutogradFunction(torch.autograd.Function): method forward (line 230) | def forward(ctx, inputs: torch.Tensor, prompts: torch.Tensor, sequence... method backward (line 253) | def backward(ctx, grad_outputs: torch.Tensor): FILE: src/petals/data_structures.py function parse_uid (line 14) | def parse_uid(uid: ModuleUID) -> Tuple[str, int]: class ModelInfo (line 21) | class ModelInfo: method to_dict (line 25) | def to_dict(self) -> dict: method from_dict (line 29) | def from_dict(cls, source: dict): class ServerState (line 33) | class ServerState(Enum): class ServerInfo (line 43) | class ServerInfo: method to_tuple (line 64) | def to_tuple(self) -> Tuple[int, float, dict]: method from_tuple (line 70) | def from_tuple(cls, source: tuple): class RemoteModuleInfo (line 78) | class RemoteModuleInfo: class RemoteSpanInfo (line 86) | class RemoteSpanInfo: method length (line 95) | def length(self) -> int: method state (line 99) | def state(self) -> ServerState: method throughput (line 103) | def throughput(self) -> float: class InferenceMetadata (line 113) | class InferenceMetadata: FILE: src/petals/models/bloom/block.py class WrappedBloomBlock (line 15) | class WrappedBloomBlock(BloomBlock): method forward (line 16) | def forward( FILE: src/petals/models/bloom/config.py class DistributedBloomConfig (line 16) | class DistributedBloomConfig(BloomConfig, ClientConfig, PTuneConfig, LMH... method from_pretrained (line 24) | def from_pretrained( FILE: src/petals/models/bloom/model.py class DistributedBloomModel (line 21) | class DistributedBloomModel(FromPretrainedMixin, PTuneMixin, BloomModel): method __init__ (line 29) | def __init__(self, config: DistributedBloomConfig, *, dht: Optional[hi... method forward (line 40) | def forward( class DistributedBloomForCausalLM (line 111) | class DistributedBloomForCausalLM(FromPretrainedMixin, RemoteGenerationM... method __init__ (line 119) | def __init__(self, config: DistributedBloomConfig): method prepare_inputs_for_generation (line 127) | def prepare_inputs_for_generation( method _temporary_reorder_cache (line 176) | def _temporary_reorder_cache(self, past_key_values, beam_idx): method get_output_embeddings (line 179) | def get_output_embeddings(self): class DistributedBloomForSequenceClassification (line 183) | class DistributedBloomForSequenceClassification(FromPretrainedMixin, Blo... method __init__ (line 189) | def __init__(self, config: DistributedBloomConfig): FILE: src/petals/models/falcon/block.py function apply_rotary (line 30) | def apply_rotary(query, key, cos, sin): class OptimizedFalconRotaryEmbedding (line 34) | class OptimizedFalconRotaryEmbedding(nn.Module): method __init__ (line 35) | def __init__(self, head_dim: int, base=10000): method _optimized_apply_rotary (line 46) | def _optimized_apply_rotary(self, query, key, cos, sin): method cos_sin (line 67) | def cos_sin(self, seq_len: int, past_key_values_length: int, device="c... method forward (line 91) | def forward(self, query, key, past_key_values_length=0): function split_heads (line 100) | def split_heads( class OptimizedFalconAttention (line 113) | class OptimizedFalconAttention(FalconAttention): method __init__ (line 114) | def __init__(self, config: FalconConfig): method _optimized_split_heads (line 155) | def _optimized_split_heads(self, fused_qkv): method forward (line 174) | def forward( class OptimizedFalconDecoderLayer (line 286) | class OptimizedFalconDecoderLayer(FalconDecoderLayer): method __init__ (line 287) | def __init__(self, config: FalconConfig): method _optimized_apply_ln (line 317) | def _optimized_apply_ln(self, hidden_states): method forward (line 339) | def forward( class WrappedFalconBlock (line 398) | class WrappedFalconBlock(OptimizedFalconDecoderLayer): method forward (line 399) | def forward( method _reorder_cache_from_bloom_to_falcon (line 440) | def _reorder_cache_from_bloom_to_falcon(self, key_value: KVCache) -> K... method _reorder_cache_from_falcon_to_bloom (line 452) | def _reorder_cache_from_falcon_to_bloom(self, key_value: KVCache) -> K... method _expand_states (line 464) | def _expand_states(self, state: torch.Tensor) -> torch.Tensor: method _collapse_states (line 473) | def _collapse_states(self, state: torch.Tensor) -> torch.Tensor: FILE: src/petals/models/falcon/config.py class DistributedFalconConfig (line 17) | class DistributedFalconConfig(DefaultRevisionMixin, FalconConfig, Client... method num_key_value_groups (line 23) | def num_key_value_groups(self) -> int: method from_pretrained (line 31) | def from_pretrained( FILE: src/petals/models/falcon/model.py class DistributedFalconModel (line 26) | class DistributedFalconModel(DefaultRevisionMixin, FromPretrainedMixin, ... method __init__ (line 34) | def __init__(self, config: DistributedFalconConfig, *, dht: Optional[h... method forward (line 45) | def forward( method word_embeddings_layernorm (line 116) | def word_embeddings_layernorm(self) -> nn.Module: # For compatibility... class DistributedFalconForCausalLM (line 120) | class DistributedFalconForCausalLM(DefaultRevisionMixin, FromPretrainedM... method __init__ (line 126) | def __init__(self, config: DistributedFalconConfig): method get_output_embeddings (line 134) | def get_output_embeddings(self): class DistributedFalconForSequenceClassification (line 138) | class DistributedFalconForSequenceClassification( method __init__ (line 146) | def __init__(self, config: DistributedFalconConfig): FILE: src/petals/models/llama/block.py function apply_rotary_pos_emb (line 26) | def apply_rotary_pos_emb(q, k, cos, sin): class OptimizedLlamaAttention (line 32) | class OptimizedLlamaAttention(LlamaAttention): method __init__ (line 33) | def __init__(self, *args, **kwargs): method _optimized_apply_rotary (line 37) | def _optimized_apply_rotary(self, query_states, key_states, cos, sin): method forward (line 44) | def forward( class OptimizedLlamaDecoderLayer (line 130) | class OptimizedLlamaDecoderLayer(LlamaDecoderLayer): method __init__ (line 131) | def __init__(self, config: LlamaConfig): method _optimized_input_layernorm (line 143) | def _optimized_input_layernorm(self, hidden_states): method _optimized_output_layernorm (line 150) | def _optimized_output_layernorm(self, hidden_states): method forward (line 157) | def forward( class WrappedLlamaBlock (line 225) | class WrappedLlamaBlock(OptimizedLlamaDecoderLayer): method forward (line 226) | def forward( method _reorder_cache_from_bloom_to_llama (line 280) | def _reorder_cache_from_bloom_to_llama( method _reorder_cache_from_llama_to_bloom (line 291) | def _reorder_cache_from_llama_to_bloom( FILE: src/petals/models/llama/config.py class DistributedLlamaConfig (line 16) | class DistributedLlamaConfig(LlamaConfig, ClientConfig, PTuneConfig, LMH... method num_key_value_groups (line 22) | def num_key_value_groups(self): method from_pretrained (line 26) | def from_pretrained( FILE: src/petals/models/llama/model.py class DistributedLlamaModel (line 20) | class DistributedLlamaModel(FromPretrainedMixin, PTuneMixin, LlamaModel): method __init__ (line 28) | def __init__(self, config: DistributedLlamaConfig, *, dht: Optional[hi... method forward (line 39) | def forward( method word_embeddings (line 116) | def word_embeddings(self) -> nn.Embedding: # For compatibility with R... method word_embeddings_layernorm (line 120) | def word_embeddings_layernorm(self) -> nn.Module: # For compatibility... method h (line 124) | def h(self) -> RemoteSequential: # For compatibility with RemoteGener... method ln_f (line 128) | def ln_f(self) -> nn.Module: # For compatibility with RemoteGeneratio... class DistributedLlamaForCausalLM (line 132) | class DistributedLlamaForCausalLM(FromPretrainedMixin, RemoteGenerationM... method __init__ (line 138) | def __init__(self, config: DistributedLlamaConfig): method get_output_embeddings (line 148) | def get_output_embeddings(self): method transformer (line 152) | def transformer(self) -> DistributedLlamaModel: # For compatibility w... class DistributedLlamaForSequenceClassification (line 156) | class DistributedLlamaForSequenceClassification(FromPretrainedMixin, Lla... method __init__ (line 162) | def __init__(self, config): method transformer (line 173) | def transformer(self) -> DistributedLlamaModel: # For compatibility w... FILE: src/petals/models/llama/speculative_model.py class DistributedLlamaForSpeculativeGeneration (line 13) | class DistributedLlamaForSpeculativeGeneration(DistributedLlamaForCausal... method __init__ (line 14) | def __init__(self, config: DistributedLlamaConfig, small_model: LlamaF... method _sample (line 18) | def _sample( FILE: src/petals/models/mixtral/block.py class WrappedMixtralBlock (line 13) | class WrappedMixtralBlock(MixtralDecoderLayer): method __init__ (line 14) | def __init__(self, config: MixtralConfig, layer_idx: int): method forward (line 21) | def forward( method _reorder_cache_from_bloom (line 91) | def _reorder_cache_from_bloom( method _reorder_cache_to_bloom (line 103) | def _reorder_cache_to_bloom( FILE: src/petals/models/mixtral/config.py class DistributedMixtralConfig (line 16) | class DistributedMixtralConfig(MixtralConfig, ClientConfig, PTuneConfig,... method from_pretrained (line 24) | def from_pretrained( FILE: src/petals/models/mixtral/model.py class DistributedMixtralModel (line 26) | class DistributedMixtralModel(DefaultRevisionMixin, FromPretrainedMixin,... method __init__ (line 34) | def __init__(self, config: DistributedMixtralConfig, *, dht: Optional[... method forward (line 45) | def forward( method word_embeddings (line 125) | def word_embeddings(self) -> nn.Embedding: # For compatibility with R... method word_embeddings_layernorm (line 129) | def word_embeddings_layernorm(self) -> nn.Module: # For compatibility... method h (line 133) | def h(self) -> RemoteSequential: # For compatibility with RemoteGener... method ln_f (line 137) | def ln_f(self) -> nn.Module: # For compatibility with RemoteGeneratio... class DistributedMixtralForCausalLM (line 141) | class DistributedMixtralForCausalLM(FromPretrainedMixin, RemoteGeneratio... method __init__ (line 147) | def __init__(self, config: DistributedMixtralConfig): method get_output_embeddings (line 155) | def get_output_embeddings(self): method transformer (line 159) | def transformer(self) -> DistributedMixtralModel: # For compatibility... class DistributedMixtralForSequenceClassification (line 163) | class DistributedMixtralForSequenceClassification(FromPretrainedMixin, M... method __init__ (line 169) | def __init__(self, config: DistributedMixtralConfig): method transformer (line 180) | def transformer(self) -> DistributedMixtralModel: # For compatibility... FILE: src/petals/server/backend.py class TransformerBackend (line 24) | class TransformerBackend(ModuleBackend): method __init__ (line 29) | def __init__( method get_inference_cache_descriptors (line 88) | def get_inference_cache_descriptors(self, batch_size: int, max_length:... method forward (line 101) | def forward(self, *inputs: Union[torch.Tensor, str]) -> Tuple[torch.Te... method backward (line 106) | def backward(self, *inputs: Union[torch.Tensor, str]) -> Tuple[torch.T... method inference_step (line 112) | def inference_step( method _estimate_max_chunk_length (line 146) | def _estimate_max_chunk_length(self, hidden_states: torch.Tensor, infe... method _reorder_cache_inplace (line 154) | def _reorder_cache_inplace(self, cache_tensors: torch.Tensor, hypo_ids... method _select_layer_past (line 160) | def _select_layer_past(self, cache_tensors: Sequence[torch.Tensor], pr... method _update_cache_inplace (line 171) | def _update_cache_inplace( method get_pools (line 183) | def get_pools(self) -> Sequence[PrioritizedTaskPool]: method get_info (line 186) | def get_info(self) -> Dict[str, Any]: method shutdown (line 190) | def shutdown(self): function merge_inference_pools_inplace (line 201) | def merge_inference_pools_inplace(backends: Dict[ExpertUID, TransformerB... class _MergedInferenceStep (line 216) | class _MergedInferenceStep: method __init__ (line 217) | def __init__(self, backends: Dict[ExpertUID, TransformerBackend]): method __call__ (line 221) | def __call__( FILE: src/petals/server/block_functions.py function run_rpc_forward (line 32) | async def run_rpc_forward( function run_rpc_backward (line 84) | async def run_rpc_backward( function iterate_rpc_inference (line 144) | async def iterate_rpc_inference( FILE: src/petals/server/block_selection.py function compute_throughputs (line 12) | def compute_throughputs(spans: Dict[PeerID, RemoteSpanInfo], *, total_bl... function _choose_best_start (line 23) | def _choose_best_start(throughputs: np.ndarray, num_blocks: int) -> int: function choose_best_blocks (line 28) | def choose_best_blocks(num_blocks: int, module_infos: List[RemoteModuleI... function _move_span (line 36) | def _move_span(span: RemoteSpanInfo, new_start: int): function should_choose_other_blocks (line 40) | def should_choose_other_blocks( FILE: src/petals/server/block_utils.py function resolve_block_dtype (line 12) | def resolve_block_dtype(config: PretrainedConfig, dtype: Union[str, torc... function get_block_size (line 22) | def get_block_size( function get_model_block (line 56) | def get_model_block(config, layer_idx: int = 0): FILE: src/petals/server/from_pretrained.py function load_pretrained_block (line 35) | def load_pretrained_block( function _load_state_dict_from_repo (line 81) | def _load_state_dict_from_repo( function _find_index_file (line 134) | def _find_index_file( function _load_state_dict_from_repo_file (line 162) | def _load_state_dict_from_repo_file( function _load_state_dict_from_local_file (line 216) | def _load_state_dict_from_local_file(path: str, *, block_prefix: Optiona... FILE: src/petals/server/handler.py class Event (line 48) | class Event(Enum): class TransformerConnectionHandler (line 55) | class TransformerConnectionHandler(ConnectionHandler): method __init__ (line 60) | def __init__( method add_p2p_handlers (line 94) | async def add_p2p_handlers(self, *args, **kwargs) -> None: method shutdown (line 100) | def shutdown(self): method _gather_inputs (line 109) | async def _gather_inputs( method rpc_inference (line 132) | async def rpc_inference( method _managed_session (line 198) | def _managed_session(self, session_id: str): method _put_into_session_queue (line 214) | def _put_into_session_queue(self, session_id: str, request: runtime_pb... method _get_from_session_queue (line 223) | async def _get_from_session_queue(self, session_id: str) -> Optional[r... method _listen_to_event_queue (line 227) | async def _listen_to_event_queue(self): method _iterate_inference_steps (line 247) | async def _iterate_inference_steps( method rpc_push (line 310) | async def rpc_push(self, request: runtime_pb2.ExpertRequest, context: ... method _push_outputs (line 320) | async def _push_outputs( method rpc_forward (line 352) | async def rpc_forward(self, request: runtime_pb2.ExpertRequest, contex... method rpc_forward_stream (line 380) | async def rpc_forward_stream( method _serialize_outputs (line 411) | def _serialize_outputs( method rpc_backward (line 434) | async def rpc_backward(self, request: runtime_pb2.ExpertRequest, conte... method rpc_backward_stream (line 461) | async def rpc_backward_stream( method _get_active_adapter (line 490) | def _get_active_adapter(self, metadata: dict) -> str: method _serialize_grads (line 496) | def _serialize_grads( method _check_uids (line 522) | def _check_uids(self, uids: str) -> Tuple[ModuleUID, ...]: method _allocate_cache (line 533) | async def _allocate_cache( method _log_request (line 549) | def _log_request( method rpc_info (line 575) | async def rpc_info(self, request: runtime_pb2.ExpertUID, context: P2PC... FILE: src/petals/server/memory_cache.py class MemoryCache (line 26) | class MemoryCache: method __init__ (line 29) | def __init__(self, max_size_bytes: Optional[int], max_alloc_timeout: O... method current_size_bytes (line 44) | def current_size_bytes(self) -> int: method current_size_bytes (line 48) | def current_size_bytes(self, value: int): method enqueued_size_bytes (line 52) | def enqueued_size_bytes(self) -> int: method enqueued_size_bytes (line 56) | def enqueued_size_bytes(self, value: int): method bytes_left (line 60) | def bytes_left(self) -> int: method handle_counter (line 64) | def handle_counter(self) -> int: method handle_counter (line 68) | def handle_counter(self, value: int): method allocate_cache (line 72) | async def allocate_cache( method get_allocation_size (line 110) | def get_allocation_size(*descriptors: TensorDescriptor) -> int: method _schedule_alloc (line 118) | async def _schedule_alloc( method _wait_for_free_memory (line 137) | async def _wait_for_free_memory(self, alloc_size: int, timeout: Option... method _free (line 169) | def _free(self, alloc_size: int, alloc_task: asyncio.Task): method _wait_until_available (line 179) | def _wait_until_available(self, allocated_size: int, timeout: Optional... method use_cache (line 196) | def use_cache(self, *handles: Handle) -> Sequence[torch.Tensor]: class AllocationFailed (line 224) | class AllocationFailed(Exception): FILE: src/petals/server/reachability.py function validate_reachability (line 22) | def validate_reachability(peer_id, wait_time: float = 7 * 60, retry_dela... function check_direct_reachability (line 55) | def check_direct_reachability(max_peers: int = 5, threshold: float = 0.5... class ReachabilityProtocol (line 86) | class ReachabilityProtocol(ServicerBase): method __init__ (line 89) | def __init__(self, *, probe: Optional[P2P] = None, wait_timeout: float... method call_check (line 94) | async def call_check(self, remote_peer: PeerID, *, check_peer: PeerID)... method rpc_check (line 106) | async def rpc_check(self, request: dht_pb2.PingRequest, context: P2PCo... method serve (line 119) | async def serve(self, p2p: P2P): method attach_to_dht (line 127) | def attach_to_dht(cls, dht: DHT, await_ready: bool = False, **kwargs) ... method shutdown (line 162) | def shutdown(self): FILE: src/petals/server/server.py class Server (line 46) | class Server: method __init__ (line 52) | def __init__( method _choose_num_blocks (line 275) | def _choose_num_blocks(self) -> int: method run (line 328) | def run(self): method _clean_memory_and_fds (line 386) | def _clean_memory_and_fds(self): method _choose_blocks (line 403) | def _choose_blocks(self) -> List[int]: method _should_choose_other_blocks (line 413) | def _should_choose_other_blocks(self) -> bool: method shutdown (line 420) | def shutdown(self, timeout: Optional[float] = 5): class ModuleContainer (line 431) | class ModuleContainer(threading.Thread): method create (line 436) | def create( method __init__ (line 557) | def __init__( method run (line 607) | def run(self): method run_in_background (line 617) | def run_in_background(self, await_ready=True, timeout=None): method ready (line 627) | def ready(self) -> mp.synchronize.Event: method is_healthy (line 639) | def is_healthy(self) -> bool: method shutdown (line 644) | def shutdown(self): class ModuleAnnouncerThread (line 674) | class ModuleAnnouncerThread(threading.Thread): method __init__ (line 677) | def __init__( method run (line 717) | def run(self) -> None: method announce (line 754) | def announce(self, state: ServerState) -> None: method _ping_next_servers (line 760) | def _ping_next_servers(self) -> Dict[hivemind.PeerID, float]: class RuntimeWithDeduplicatedPools (line 770) | class RuntimeWithDeduplicatedPools(Runtime): method __init__ (line 773) | def __init__(self, *args, **kwargs): FILE: src/petals/server/task_pool.py class Task (line 18) | class Task: method uid (line 25) | def uid(self) -> int: class PrioritizedTaskPool (line 29) | class PrioritizedTaskPool(threading.Thread): method __init__ (line 49) | def __init__( method run (line 78) | def run(self): method terminate (line 88) | def terminate(self): method shutdown (line 92) | def shutdown(self): method submit_task (line 95) | def submit_task(self, *args: Any, priority: float = 0.0) -> MPFuture: method get_task_size (line 113) | def get_task_size(self, task: Task) -> int: method load_batch_to_runtime (line 119) | def load_batch_to_runtime( method send_outputs_from_runtime (line 133) | def send_outputs_from_runtime(self, uid: int, batch_outputs: List[torc... method send_exception_from_runtime (line 144) | def send_exception_from_runtime(self, uid: int, exception: BaseExcepti... method empty (line 155) | def empty(self): method priority (line 159) | def priority(self) -> Tuple[float, float]: method priority (line 164) | def priority(self, item: Tuple[float, float]): function _move_to_device_if_tensor (line 170) | def _move_to_device_if_tensor(arg: Any, device: Union[torch.device, str]... FILE: src/petals/server/task_prioritizer.py class TaskPrioritizerBase (line 6) | class TaskPrioritizerBase(ABC): method prioritize (line 10) | def prioritize(self, *input: torch.Tensor, points: float = 0.0, **kwar... class DummyTaskPrioritizer (line 15) | class DummyTaskPrioritizer(TaskPrioritizerBase): method prioritize (line 16) | def prioritize(self, *input: torch.Tensor, points: float = 0.0, **kwar... FILE: src/petals/server/throughput.py function get_server_throughput (line 37) | def get_server_throughput( function measure_throughput_info (line 111) | def measure_throughput_info( function measure_network_rps (line 147) | def measure_network_rps( function _measure_bits_per_second (line 178) | def _measure_bits_per_second(pipe_send: mp.Pipe): function measure_compute_rps (line 190) | def measure_compute_rps( function synchronize (line 240) | def synchronize(device: torch.device): function get_device_name (line 247) | def get_device_name(device: torch.device) -> str: function get_dtype_name (line 251) | def get_dtype_name(dtype: torch.dtype, quant_type: QuantType) -> str: FILE: src/petals/utils/asyncio.py function shield_and_wait (line 4) | async def shield_and_wait(task): FILE: src/petals/utils/auto_config.py class _ModelClasses (line 14) | class _ModelClasses: function register_model_classes (line 25) | def register_model_classes(*, config: Type[PretrainedConfig], **kwargs): class _AutoDistributedBase (line 32) | class _AutoDistributedBase: method from_pretrained (line 36) | def from_pretrained(cls, model_name_or_path: Union[str, os.PathLike, N... class DefaultRevisionMixin (line 55) | class DefaultRevisionMixin: method from_pretrained (line 73) | def from_pretrained( class AutoDistributedConfig (line 82) | class AutoDistributedConfig(DefaultRevisionMixin, _AutoDistributedBase): class AutoDistributedModel (line 86) | class AutoDistributedModel(DefaultRevisionMixin, _AutoDistributedBase): class AutoDistributedModelForCausalLM (line 90) | class AutoDistributedModelForCausalLM(DefaultRevisionMixin, _AutoDistrib... class AutoDistributedSpeculativeModel (line 94) | class AutoDistributedSpeculativeModel(DefaultRevisionMixin, _AutoDistrib... class AutoDistributedModelForSequenceClassification (line 98) | class AutoDistributedModelForSequenceClassification(DefaultRevisionMixin... FILE: src/petals/utils/convert_block.py class QuantType (line 19) | class QuantType(Enum): function convert_block (line 25) | def convert_block( function quantize_module (line 76) | def quantize_module(model: nn.Module, *, quant_type: QuantType) -> nn.Mo... function make_tensor_parallel (line 118) | def make_tensor_parallel( function check_device_balance (line 138) | def check_device_balance(devices: Sequence[torch.device]): FILE: src/petals/utils/cuda_graphs.py function make_inference_graphed_callable (line 5) | def make_inference_graphed_callable(callable: callable, sample_args, num... FILE: src/petals/utils/dht.py function declare_active_modules (line 28) | def declare_active_modules( function _declare_active_modules (line 57) | async def _declare_active_modules( function get_remote_module_infos (line 74) | def get_remote_module_infos( function _get_remote_module_infos (line 95) | async def _get_remote_module_infos( function compute_spans (line 134) | def compute_spans(module_infos: List[RemoteModuleInfo], *, min_state: Se... FILE: src/petals/utils/disk_cache.py function _blocks_lock (line 19) | def _blocks_lock(cache_dir: Optional[str], mode: int): function allow_cache_reads (line 31) | def allow_cache_reads(cache_dir: Optional[str]): function allow_cache_writes (line 36) | def allow_cache_writes(cache_dir: Optional[str]): function free_disk_space_for (line 41) | def free_disk_space_for( FILE: src/petals/utils/hf_auth.py function always_needs_auth (line 5) | def always_needs_auth(model_name: Union[str, os.PathLike, None]) -> bool: FILE: src/petals/utils/logging.py function initialize_logs (line 6) | def initialize_logs(): FILE: src/petals/utils/misc.py function is_dummy (line 10) | def is_dummy(tensor: torch.Tensor) -> bool: function get_size_in_bytes (line 17) | def get_size_in_bytes(dtype: torch.dtype) -> int: function docstring_from (line 24) | def docstring_from(source): FILE: src/petals/utils/packaging.py function _mark_masked_tensor (line 9) | def _mark_masked_tensor(index: int) -> bytes: function _is_masked_tensor (line 13) | def _is_masked_tensor(item: Any) -> bool: function _get_tensor_index (line 17) | def _get_tensor_index(item: bytes) -> int: function pack_args_kwargs (line 21) | def pack_args_kwargs(*args, **kwargs) -> Tuple[List[torch.Tensor], Any]: function unpack_args_kwargs (line 38) | def unpack_args_kwargs(flat_tensors: List[torch.Tensor], args_structure:... FILE: src/petals/utils/peft.py function check_peft_repository (line 31) | def check_peft_repository(repo_id: str) -> bool: function load_specific_module (line 35) | def load_specific_module(block_idx: int, filepath: str, framework: str =... function get_adapter_from_repo (line 51) | def get_adapter_from_repo( function load_peft (line 72) | def load_peft( class AdapterContextMixin (line 132) | class AdapterContextMixin: method using_adapter (line 140) | def using_adapter(active_adapter: Optional[str]): method active_adapter (line 148) | def active_adapter(self): method active_adapter (line 154) | def active_adapter(self, value: Optional[str]): method active_adapters (line 158) | def active_adapters(self): method set_adapter (line 161) | def set_adapter(self, adapter_names) -> None: class LoraLinear (line 173) | class LoraLinear(AdapterContextMixin, lora.Linear): method __init__ (line 176) | def __init__(self, base_layer, adapter_name: str): class LoraLinear8bitLt (line 184) | class LoraLinear8bitLt(LoraLinear, lora.Linear8bitLt): class LoraLinear4bit (line 188) | class LoraLinear4bit(LoraLinear, lora.Linear4bit): function create_lora_adapter (line 192) | def create_lora_adapter(block): function add_adapter_to_block (line 212) | def add_adapter_to_block(block, block_index, adapter_name, peft_config, ... function estimate_adapter_memory_per_block (line 263) | def estimate_adapter_memory_per_block( FILE: src/petals/utils/ping.py function ping (line 15) | async def ping( function ping_parallel (line 35) | async def ping_parallel(peer_ids: Sequence[hivemind.PeerID], *args, **kw... class PingAggregator (line 40) | class PingAggregator: method __init__ (line 41) | def __init__(self, dht: hivemind.DHT, *, ema_alpha: float = 0.2, expir... method ping (line 48) | def ping(self, peer_ids: Sequence[hivemind.PeerID], **kwargs) -> None: method to_dict (line 60) | def to_dict(self) -> Dict[hivemind.PeerID, float]: FILE: src/petals/utils/random.py function sample_up_to (line 7) | def sample_up_to(population: Collection[T], k: int) -> T: FILE: src/petals/utils/version.py function validate_version (line 14) | def validate_version() -> None: function get_compatible_model_repo (line 33) | def get_compatible_model_repo(model_name_or_path: Union[str, os.PathLike... FILE: tests/conftest.py function event_loop (line 15) | def event_loop(): function cleanup_children (line 31) | def cleanup_children(): FILE: tests/test_aux_functions.py function test_bnb_not_imported_when_unnecessary (line 16) | def test_bnb_not_imported_when_unnecessary(): function test_compute_throughput (line 33) | def test_compute_throughput(inference: bool, n_tokens: int, tensor_paral... function test_pack_inputs (line 53) | def test_pack_inputs(): FILE: tests/test_block_exact_match.py function test_remote_block_exact_match (line 13) | def test_remote_block_exact_match(atol_forward=1e-4, atol_inference=1e-3): FILE: tests/test_cache.py function _make_tensor_descriptor (line 16) | def _make_tensor_descriptor(num_bytes: int, dtype: Optional[torch.dtype]... function test_cache_timeout (line 25) | async def test_cache_timeout(): function test_unlimited_timeout (line 76) | async def test_unlimited_timeout(): function test_cache_usage (line 93) | async def test_cache_usage(): FILE: tests/test_chained_calls.py function test_forward_backward_exact_match (line 18) | def test_forward_backward_exact_match(atol_forward=1e-4, atol_backward=1... function test_chained_inference_exact_match (line 46) | def test_chained_inference_exact_match(atol_inference=1e-4): FILE: tests/test_dtype.py function test_block_dtype (line 12) | def test_block_dtype(torch_dtype): FILE: tests/test_full_model.py function tokenizer (line 14) | def tokenizer(): function model (line 20) | def model(): function ref_model (line 27) | def ref_model(): function test_full_model_exact_match (line 36) | def test_full_model_exact_match(tokenizer, model, ref_model, use_peft, p... function make_generate_calls (line 80) | def make_generate_calls(model, inputs, *, max_new_tokens, multiple_calls... function test_greedy_generation (line 97) | def test_greedy_generation(tokenizer, model, ref_model, max_new_tokens=4): function test_sampling (line 117) | def test_sampling(tokenizer, model, ref_model, max_new_tokens=10): function test_beam_search_generation (line 149) | def test_beam_search_generation(tokenizer, model, ref_model, max_new_tok... function test_input_ids (line 159) | def test_input_ids(tokenizer, model, ref_model, max_new_tokens=4): FILE: tests/test_optimized_layers.py class UnoptimizedWrappedFalconBlock (line 18) | class UnoptimizedWrappedFalconBlock(FalconDecoderLayer): method forward (line 19) | def forward( method _reorder_cache_from_bloom_to_falcon (line 58) | def _reorder_cache_from_bloom_to_falcon(self, key_value: KVCache) -> K... method _reorder_cache_from_falcon_to_bloom (line 70) | def _reorder_cache_from_falcon_to_bloom(self, key_value: KVCache) -> K... method _expand_states (line 82) | def _expand_states(self, state: torch.Tensor) -> torch.Tensor: method _collapse_states (line 91) | def _collapse_states(self, state: torch.Tensor) -> torch.Tensor: class UnoptimizedWrappedLlamaBlock (line 101) | class UnoptimizedWrappedLlamaBlock(LlamaDecoderLayer): method forward (line 102) | def forward( method _reorder_cache_from_bloom_to_llama (line 163) | def _reorder_cache_from_bloom_to_llama( method _reorder_cache_from_llama_to_bloom (line 175) | def _reorder_cache_from_llama_to_bloom( function test_optimized_block (line 189) | def test_optimized_block(device): FILE: tests/test_peft.py function clear_dir (line 14) | def clear_dir(path_to_dir): function dir_empty (line 19) | def dir_empty(path_to_dir): function test_check_peft (line 25) | def test_check_peft(): function test_load_noncached (line 31) | def test_load_noncached(tmpdir): function test_load_cached (line 44) | def test_load_cached(tmpdir): function test_load_layer_exists (line 52) | def test_load_layer_exists(tmpdir): function test_load_layer_nonexists (line 59) | def test_load_layer_nonexists(tmpdir): FILE: tests/test_priority_pool.py function _submit_tasks (line 12) | def _submit_tasks(runtime_ready, pools, results_valid): function test_priority_pools (line 33) | def test_priority_pools(): FILE: tests/test_remote_sequential.py function test_remote_sequential (line 17) | def test_remote_sequential(): class DummyCustomSequenceManager (line 65) | class DummyCustomSequenceManager(RemoteSequenceManager): method rpc_info (line 69) | def rpc_info(self): method get_request_metadata (line 76) | def get_request_metadata(self, protocol: str, *args, **kwargs): function test_remote_sequential_prompts (line 89) | def test_remote_sequential_prompts(batch_size=2, seq_len=5, pre_seq_len=3): FILE: tests/test_sequence_manager.py function test_sequence_manager_basics (line 18) | def test_sequence_manager_basics(mode: str): class RemoteSequenceManagerWithChecks (line 46) | class RemoteSequenceManagerWithChecks(RemoteSequenceManager): method __init__ (line 49) | def __init__(self, *args, _was_shut_down: threading.Event, **kwargs): method shutdown (line 53) | def shutdown(self): FILE: tests/test_server_stats.py function test_server_info (line 13) | def test_server_info(block_from: int = 2, block_to: int = 5, max_length:... FILE: tests/test_speculative_generation.py function test_remote_block_with_cache_invalidation_exact_match (line 19) | def test_remote_block_with_cache_invalidation_exact_match(atol_forward=1... function noisy_model (line 46) | def noisy_model(): function model (line 58) | def model(): function tokenizer (line 65) | def tokenizer(): function test_remote_speculative_generation (line 75) | def test_remote_speculative_generation(tokenizer, model, noisy_model, at... FILE: tests/test_tensor_parallel.py function test_tp_block (line 16) | def test_tp_block(devices, custom_config):