SYMBOL INDEX (131 symbols across 22 files) FILE: bloom-inference-scripts/bloom-accelerate-inference.py function get_args (line 13) | def get_args(): function print_rank0 (line 39) | def print_rank0(*msg): function get_world_size (line 65) | def get_world_size() -> int: function generate (line 118) | def generate(): FILE: bloom-inference-scripts/bloom-ds-inference.py function print_rank0 (line 60) | def print_rank0(*msg): function get_repo_root (line 69) | def get_repo_root(model_name_or_path): function get_checkpoint_files (line 93) | def get_checkpoint_files(model_name_or_path): function write_checkpoints_json (line 154) | def write_checkpoints_json(): function generate (line 230) | def generate(): FILE: bloom-inference-scripts/bloom-ds-zero-inference.py function print_rank0 (line 54) | def print_rank0(*msg): function generate (line 162) | def generate(): FILE: inference_server/benchmark.py function benchmark_generation (line 22) | def benchmark_generation(model: ModelDeployment, request: GenerateReques... function get_benchmark_results (line 31) | def get_benchmark_results( function benchmark_end_to_end (line 47) | def benchmark_end_to_end(args: argparse.Namespace) -> None: function get_args (line 88) | def get_args() -> argparse.Namespace: function main (line 112) | def main() -> None: FILE: inference_server/cli.py function get_args (line 9) | def get_args() -> argparse.Namespace: function main (line 15) | def main() -> None: FILE: inference_server/download_model.py function get_args (line 7) | def get_args() -> argparse.Namespace: function main (line 28) | def main() -> None: FILE: inference_server/model_handler/deployment.py class ModelDeployment (line 29) | class ModelDeployment: method __init__ (line 30) | def __init__(self, args: argparse.Namespace, grpc_allowed: bool = False): method should_use_grpc (line 58) | def should_use_grpc(self, deployment_framework: str, grpc_allowed: boo... method initialize_ports (line 63) | def initialize_ports(self): method _is_socket_open (line 68) | def _is_socket_open(self, port): method _is_server_process_alive (line 76) | def _is_server_process_alive(self): method _wait_until_server_is_live (line 89) | def _wait_until_server_is_live(self): method dict_to_proto (line 100) | def dict_to_proto(self, generate_kwargs: dict) -> dict: method _initialize_service (line 110) | def _initialize_service(self, args: argparse.Namespace): method _initialize_grpc_client (line 137) | def _initialize_grpc_client(self): method generate_in_tensor_parallel (line 145) | async def generate_in_tensor_parallel(self, text: List[str], generate_... method generate_async (line 153) | async def generate_async(self, stub_id: int, text: List[str], generate... method forward_in_tensor_parallel (line 159) | async def forward_in_tensor_parallel(self, conditioning_text: List[str... method forward_async (line 167) | async def forward_async(self, stub_id: int, conditioning_text: List[st... method generate (line 172) | def generate(self, **kwargs) -> GenerateResponse: method forward (line 206) | def forward(self, request: ForwardRequest) -> ForwardResponse: method tokenize (line 224) | def tokenize(self, request: TokenizeRequest) -> TokenizeResponse: FILE: inference_server/model_handler/grpc_utils/generation_server.py class GenerationServer (line 14) | class GenerationServer(generation_pb2_grpc.GenerationServiceServicer): method __init__ (line 15) | def __init__(self, model: Model) -> None: method _unpack_proto_query_kwargs (line 18) | def _unpack_proto_query_kwargs(self, query_kwargs): method Generate (line 22) | def Generate(self, request, context): method Forward (line 48) | def Forward(self, request, context): function serve (line 73) | def serve(inference_pipeline, port): FILE: inference_server/model_handler/grpc_utils/pb/generation_pb2_grpc.py class GenerationServiceStub (line 8) | class GenerationServiceStub(object): method __init__ (line 11) | def __init__(self, channel): class GenerationServiceServicer (line 29) | class GenerationServiceServicer(object): method Generate (line 32) | def Generate(self, request, context): method Forward (line 38) | def Forward(self, request, context): function add_GenerationServiceServicer_to_server (line 45) | def add_GenerationServiceServicer_to_server(servicer, server): class GenerationService (line 63) | class GenerationService(object): method Generate (line 67) | def Generate( method Forward (line 96) | def Forward( FILE: inference_server/model_handler/launch.py function get_args (line 13) | def get_args() -> argparse.Namespace: function main (line 26) | def main(): FILE: inference_server/models/__init__.py function get_model_class (line 5) | def get_model_class(deployment_framework: str): function start_inference_engine (line 26) | def start_inference_engine(deployment_framework: str) -> None: FILE: inference_server/models/ds_inference.py class DSInferenceModel (line 19) | class DSInferenceModel(Model): method __init__ (line 20) | def __init__(self, args: Namespace) -> None: class TemporaryCheckpointsJSON (line 71) | class TemporaryCheckpointsJSON: method __init__ (line 72) | def __init__(self, model_path: str): method write_checkpoints_json (line 77) | def write_checkpoints_json(self) -> None: method __enter__ (line 83) | def __enter__(self): method __exit__ (line 88) | def __exit__(self, type, value, traceback): function get_model_path (line 92) | def get_model_path(model_name: str): FILE: inference_server/models/ds_zero.py class DSZeROModel (line 13) | class DSZeROModel(Model): method __init__ (line 14) | def __init__(self, args: Namespace) -> None: FILE: inference_server/models/hf_accelerate.py class HFAccelerateModel (line 9) | class HFAccelerateModel(Model): method __init__ (line 10) | def __init__(self, args: Namespace) -> None: FILE: inference_server/models/hf_cpu.py class HFCPUModel (line 6) | class HFCPUModel(HFAccelerateModel): method __init__ (line 7) | def __init__(self, args: Namespace) -> None: FILE: inference_server/models/model.py class Model (line 20) | class Model: method __init__ (line 21) | def __init__(self, args: argparse.Namespace) -> None: method post_init (line 27) | def post_init(self, model_name: str) -> None: method get_generation_config (line 34) | def get_generation_config(self, request: GenerateRequest) -> Generatio... method generate (line 47) | def generate(self, request: GenerateRequest) -> Union[GenerateResponse... method forward (line 96) | def forward(self, request: ForwardRequest) -> Union[ForwardResponse, E... method tokenize (line 144) | def tokenize(self, request: TokenizeRequest) -> TokenizeResponse: function check_max_input_length (line 151) | def check_max_input_length(input_token_length: int, max_input_length: in... function check_batch_size (line 159) | def check_batch_size(batch_size: int, max_batch_size: int) -> None: function get_hf_model_class (line 168) | def get_hf_model_class(model_class: str) -> Union[AutoModelForCausalLM, ... function load_tokenizer (line 172) | def load_tokenizer(model_name: str) -> AutoTokenizer: FILE: inference_server/server.py class QueryID (line 22) | class QueryID(BaseModel): class Args (line 30) | class Args: method __init__ (line 31) | def __init__(self) -> None: function query_id (line 51) | def query_id(): function tokenize (line 56) | def tokenize(): function generate (line 75) | def generate(): function forward (line 96) | def forward(): FILE: inference_server/utils/requests.py class BaseResponse (line 6) | class BaseResponse(BaseModel): class GenerateRequest (line 11) | class GenerateRequest(BaseModel): method get_generate_kwargs (line 36) | def get_generate_kwargs(self) -> dict: class GenerateResponse (line 44) | class GenerateResponse(BaseResponse): class TokenizeRequest (line 50) | class TokenizeRequest(BaseModel): class TokenizeResponse (line 54) | class TokenizeResponse(BaseResponse): class ForwardRequest (line 59) | class ForwardRequest(BaseModel): class ForwardResponse (line 64) | class ForwardResponse(BaseResponse): function parse_bool (line 69) | def parse_bool(value: str) -> bool: function parse_field (line 78) | def parse_field(kwargs: dict, field: str, dtype: type, default_value: An... function create_generate_request (line 90) | def create_generate_request(text: List[str], generate_kwargs: dict) -> G... function get_filter_dict (line 119) | def get_filter_dict(d: BaseModel) -> dict: FILE: inference_server/utils/utils.py function get_argument_parser (line 31) | def get_argument_parser() -> argparse.ArgumentParser: function parse_args (line 68) | def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace: function run_rank_n (line 77) | def run_rank_n(func: Callable, rank: int = 0, barrier: bool = False) -> ... function print_rank_0 (line 99) | def print_rank_0(*args, **kwargs) -> None: function get_torch_dtype (line 103) | def get_torch_dtype(dtype_str: str) -> torch.dtype: function get_str_dtype (line 114) | def get_str_dtype(dtype_str: torch.dtype) -> str: function get_dummy_batch (line 125) | def get_dummy_batch(batch_size: int, input_sentences: List[str] = None) ... function get_num_tokens_to_generate (line 136) | def get_num_tokens_to_generate(max_new_tokens: int, allowed_max_new_toke... function run_and_log_time (line 143) | def run_and_log_time(execs: Union[List[partial], partial]) -> Tuple[Unio... function pad_ids (line 158) | def pad_ids(arrays, padding, max_length=-1): function get_exception_response (line 168) | def get_exception_response(query_id: int, debug: bool = False): function get_world_size (line 187) | def get_world_size() -> int: function get_cuda_visible_devices (line 197) | def get_cuda_visible_devices() -> List[int]: FILE: server_request.py function get_args (line 6) | def get_args() -> argparse.Namespace: function generate (line 16) | def generate(url: str) -> None: function tokenize (line 32) | def tokenize(url: str) -> None: function forward (line 40) | def forward(url: str) -> None: function query_id (line 61) | def query_id(url: str) -> None: function main (line 68) | def main(): FILE: static/js/index.js function get_temperature (line 20) | function get_temperature() { function get_top_p (line 28) | function get_top_p() { function get_top_k (line 36) | function get_top_k() { function get_repetition_penalty (line 44) | function get_repetition_penalty() { function get_max_new_tokens (line 52) | function get_max_new_tokens() { FILE: ui.py function get_args (line 14) | def get_args() -> argparse.Namespace: class Server (line 28) | class Server: method __init__ (line 29) | def __init__(self, args: argparse.Namespace): method homepage (line 50) | def homepage(self, request: Request) -> HTMLResponse: method generate (line 53) | def generate(self, request: dict) -> JSONResponse: method run (line 61) | def run(self): function main (line 74) | def main() -> None: