Repository: r2d4/openlm Branch: main Commit: c2ac78b6e677 Files: 15 Total size: 34.3 KB Directory structure: gitextract_ijisd4bg/ ├── .gitignore ├── LICENSE ├── README.md ├── examples/ │ ├── api_keys.py │ ├── as_openai.py │ ├── custom_provider.py │ └── multiplex.py ├── openlm/ │ ├── __init__.py │ ├── llm/ │ │ ├── __init__.py │ │ ├── base.py │ │ ├── cohere.py │ │ ├── huggingface.py │ │ └── openai.py │ └── openlm.py └── pyproject.toml ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ env .venv .ruff_cache dist *.egg-info **/__pycache__ ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2023 Matt Rickard Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # OpenLM Drop-in OpenAI-compatible library that can call LLMs from other providers (e.g., HuggingFace, Cohere, and more). ```diff 1c1 < import openai --- > import openlm as openai completion = openai.Completion.create( model=["bloom-560m", "cohere.ai/command"], prompt=["Hello world!", "A second prompt!"] ) print(completion) ``` ### Features * Takes in the same parameters as OpenAI's Completion API and returns a similarly structured response. * Call models from HuggingFace's inference endpoint API, Cohere.ai, OpenAI, or your custom implementation. * Complete multiple prompts on multiple models in the same request. * Very small footprint: OpenLM calls the inference APIs directly rather than using multiple SDKs. ### Installation ```bash pip install openlm ``` ### Examples - [Import as OpenAI](examples/as_openai.py) - [Set up API keys via environment variables or pass a dict](examples/api_keys.py) - [Add a custom model or provider](examples/custom_provider.py) - [Complete multiple prompts on multiple models](examples/multiplex.py) OpenLM currently supports the Completion endpoint, but over time will support more standardized endpoints that make sense. ### [Example with Response](examples/multiplex.py) ```python import sys from pathlib import Path sys.path.append(str(Path(__file__).resolve().parent.parent)) import openlm import json completion = openlm.Completion.create( model=["ada", "huggingface.co/gpt2", "cohere.ai/command"], prompt=["The quick brown fox", "Who jumped over the lazy dog?"], max_tokens=15 ) print(json.dumps(completion, indent=4)) ``` ```json { "id": "504cc502-dc27-43e7-bcc3-b62e178c247e", "object": "text_completion", "created": 1683583267, "choices": [ { "id": "c0487ba2-935d-4dec-b191-f7eff962f117", "model_idx": 0, "model_name": "openai.com/ada", "index": 0, "created": 1683583233, "text": " jumps into the much bigger brown bush.\" \"Alright, people like you can", "usage": { "prompt_tokens": 4, "completion_tokens": 15, "total_tokens": 19 }, "extra": { "id": "cmpl-7E3CCSpJHXfx5yB0TaJU9ON7rNYPT" } }, { "id": "bab92d11-5ba6-4da2-acca-1f3398a78c3e", "model_idx": 0, "model_name": "openai.com/ada", "index": 1, "created": 1683583233, "text": "\n\nIt turns out that saying one's name \"Joe\" is the", "usage": { "prompt_tokens": 7, "completion_tokens": 15, "total_tokens": 22 }, "extra": { "id": "cmpl-7E3CDBbqFy92I2ZbSGoDT5ickAiPD" } }, { "id": "be870636-9d9e-4f74-b8bd-d04766072a7b", "model_idx": 1, "model_name": "huggingface.co/gpt2", "index": 0, "created": 1683583234, "text": "The quick brown foxes, and the short, snuggly fox-scented, soft foxes we have in our household\u2026 all come in two distinct flavours: yellow and orange; and red and white. This mixture is often confused with" }, { "id": "c1abf535-54a9-4b72-8681-d3b4a601da88", "model_idx": 1, "model_name": "huggingface.co/gpt2", "index": 1, "created": 1683583266, "text": "Who jumped over the lazy dog? He probably got it, but there's only so much you do when you lose one.\n\nBut I will say for a moment that there's no way this guy might have picked a fight with Donald Trump." }, { "id": "08e8c351-236a-4497-98f3-488cdc0b6b6a", "model_idx": 2, "model_name": "cohere.ai/command", "index": 0, "created": 1683583267, "text": "\njumps over the lazy dog.", "extra": { "request_id": "0bbb28c0-eb3d-4614-b4d9-1eca88c361ca", "generation_id": "5288dd6f-3ecf-475b-b909-0b226be6a193" } }, { "id": "49ce51e6-9a18-4093-957f-54a1557c8829", "model_idx": 2, "model_name": "cohere.ai/command", "index": 1, "created": 1683583267, "text": "\nThe quick brown fox.", "extra": { "request_id": "ab5d5e03-22a1-42cd-85b2-9b9704c79304", "generation_id": "60493966-abf6-483c-9c47-2ea5c5eeb855" } } ], "usage": { "prompt_tokens": 11, "completion_tokens": 30, "total_tokens": 41 } } ``` ### Other Languages [r2d4/llm.ts](https://github.com/r2d4/llm.ts) is a TypeScript library that has a similar API that sits on top of multiple language models. ### Roadmap - [ ] Streaming API - [ ] Embeddings API ### Contributing Contributions are welcome! Please open an issue or submit a PR. ### License [MIT](LICENSE) ================================================ FILE: examples/api_keys.py ================================================ import sys from pathlib import Path sys.path.append(str(Path(__file__).resolve().parent.parent)) import openlm import json completion = openlm.Completion.create( model=["ada", "distilgpt2", "huggingface.co/"], prompt="Hello world", api_keys={ 'huggingface.co': 'YOUR_API_KEY', # or os.environ["HF_API_TOKEN"] 'cohere.ai': 'YOUR_API_KEY', # or os.environ["COHERE_API_KEY"] 'openai.com': 'YOUR_API_KEY' # or os.environ["OPENAI_API_KEY"] }, ) print(json.dumps(completion, indent=4)) ================================================ FILE: examples/as_openai.py ================================================ import sys from pathlib import Path sys.path.append(str(Path(__file__).resolve().parent.parent)) import openlm as openai import json completion = openai.Completion.create( model="ada", prompt="Hello world" ) print(json.dumps(completion, indent=4)) ''' { "id": "3890d5c3-e6c4-4222-b77d-40a65f1d032b", "object": "text_completion", "created": 1683583320, "choices": [ { "id": "660d576d-8c04-420f-b410-146729c8fc8a", "model_idx": 0, "model_name": "openai.com/ada", "index": 0, "created": 1683583320, "text": ". Details?), website modding, something that makes no sense, and just design", "usage": { "prompt_tokens": 2, "completion_tokens": 16, "total_tokens": 18 }, "extra": { "id": "cmpl-7E3DcN1o6Axv1JIXxHNt11Q0wA1Is" } } ], "usage": { "prompt_tokens": 2, "completion_tokens": 16, "total_tokens": 18 } } ''' ================================================ FILE: examples/custom_provider.py ================================================ import sys from pathlib import Path sys.path.append(str(Path(__file__).resolve().parent.parent)) import openlm import json from typing import Any, Dict, List, Optional, Union class CustomModel(openlm.BaseModel): def create_completion(self, model: Union[str, List[str]], prompt: Union[str, List[str]], suffix: Optional[str] = None, max_tokens: Optional[int] = None, temperature: Optional[float] = None, top_p: Optional[float] = None, n: Optional[int] = None, stream: Optional[bool] = None, logprobs: Optional[int] = None, echo: Optional[bool] = None, stop: Optional[Union[str, List[str]]] = None, presence_penalty: Optional[float] = None, frequency_penalty: Optional[float] = None, best_of: Optional[int] = None, logit_bias: Optional[Dict[str, float]] = None, user: Optional[str] = None) -> Dict[str, Any]: # completions should return a dictionary with the following keys: return { # Required keys: "text": "Hello world!" ## Optional keys: # ,'extra': { # 'key': 'value' # }, # 'usage': { # 'prompt_tokens': 0, # 'completion_tokens': 0, # 'total_tokens': 0, # } } def list_models(self) -> Dict[str, Any]: # list of model names that can be used with this provider return ["your_model_name"] def namespace(self) -> str: # A namespace prevents name collisions between models from different providers. # You will be able to reference your model both as: # your_namespace/your_model_name or your_model_name return "your_namespace" openlm.Completion.register(CustomModel()) # Now you can use your custom model in the same way as the built-in models: completion = openlm.Completion.create( model="your_model_name", prompt="Hello world" ) print(json.dumps(completion, indent=4)) ''' { "id": "12bf5515-e2cc-463d-b120-c21c911364f9", "object": "text_completion", "created": 1683583298, "choices": [ { "id": "2dde9e4e-17c3-4d92-be6f-285fb9a96935", "model_idx": 0, "model_name": "your_namespace/your_model_name", "index": 0, "created": 1683583298, "text": "Hello world!" } ], "usage": { "prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0 } } ''' ================================================ FILE: examples/multiplex.py ================================================ import sys from pathlib import Path sys.path.append(str(Path(__file__).resolve().parent.parent)) import openlm import json completion = openlm.Completion.create( model=["ada", "huggingface.co/gpt2", "cohere.ai/command"], prompt=["The quick brown fox", "Who jumped over the lazy dog?"], max_tokens=15 ) print(json.dumps(completion, indent=4)) ''' { "id": "504cc502-dc27-43e7-bcc3-b62e178c247e", "object": "text_completion", "created": 1683583267, "choices": [ { "id": "c0487ba2-935d-4dec-b191-f7eff962f117", "model_idx": 0, "model_name": "openai.com/ada", "index": 0, "created": 1683583233, "text": " jumps into the much bigger brown bush.\" \"Alright, people like you can", "usage": { "prompt_tokens": 4, "completion_tokens": 15, "total_tokens": 19 }, "extra": { "id": "cmpl-7E3CCSpJHXfx5yB0TaJU9ON7rNYPT" } }, { "id": "bab92d11-5ba6-4da2-acca-1f3398a78c3e", "model_idx": 0, "model_name": "openai.com/ada", "index": 1, "created": 1683583233, "text": "\n\nIt turns out that saying one's name \"Joe\" is the", "usage": { "prompt_tokens": 7, "completion_tokens": 15, "total_tokens": 22 }, "extra": { "id": "cmpl-7E3CDBbqFy92I2ZbSGoDT5ickAiPD" } }, { "id": "be870636-9d9e-4f74-b8bd-d04766072a7b", "model_idx": 1, "model_name": "huggingface.co/gpt2", "index": 0, "created": 1683583234, "text": "The quick brown foxes, and the short, snuggly fox-scented, soft foxes we have in our household\u2026 all come in two distinct flavours: yellow and orange; and red and white. This mixture is often confused with" }, { "id": "c1abf535-54a9-4b72-8681-d3b4a601da88", "model_idx": 1, "model_name": "huggingface.co/gpt2", "index": 1, "created": 1683583266, "text": "Who jumped over the lazy dog? He probably got it, but there's only so much you do when you lose one.\n\nBut I will say for a moment that there's no way this guy might have picked a fight with Donald Trump." }, { "id": "08e8c351-236a-4497-98f3-488cdc0b6b6a", "model_idx": 2, "model_name": "cohere.ai/command", "index": 0, "created": 1683583267, "text": "\njumps over the lazy dog.", "extra": { "request_id": "0bbb28c0-eb3d-4614-b4d9-1eca88c361ca", "generation_id": "5288dd6f-3ecf-475b-b909-0b226be6a193" } }, { "id": "49ce51e6-9a18-4093-957f-54a1557c8829", "model_idx": 2, "model_name": "cohere.ai/command", "index": 1, "created": 1683583267, "text": "\nThe quick brown fox.", "extra": { "request_id": "ab5d5e03-22a1-42cd-85b2-9b9704c79304", "generation_id": "60493966-abf6-483c-9c47-2ea5c5eeb855" } } ], "usage": { "prompt_tokens": 11, "completion_tokens": 30, "total_tokens": 41 } } ''' ================================================ FILE: openlm/__init__.py ================================================ from openlm.openlm import Completion from openlm.llm.base import BaseModel # For backwards compatibility with OpenAI api_key = None ================================================ FILE: openlm/llm/__init__.py ================================================ from .base import BaseModel from .openai import OpenAI from .huggingface import Huggingface from .cohere import Cohere ================================================ FILE: openlm/llm/base.py ================================================ import abc from typing import Any, Dict, List, Optional, Union class BaseCompletion(metaclass=abc.ABCMeta): @abc.abstractmethod def create_completion(self, model: Union[str, List[str]], prompt: Union[str, List[str]], suffix: Optional[str] = None, max_tokens: Optional[int] = None, temperature: Optional[float] = None, top_p: Optional[float] = None, n: Optional[int] = None, stream: Optional[bool] = None, logprobs: Optional[int] = None, echo: Optional[bool] = None, stop: Optional[Union[str, List[str]]] = None, presence_penalty: Optional[float] = None, frequency_penalty: Optional[float] = None, best_of: Optional[int] = None, logit_bias: Optional[Dict[str, float]] = None, user: Optional[str] = None) -> Dict[str, Any]: raise NotImplementedError class BaseModel(BaseCompletion, metaclass=abc.ABCMeta): @abc.abstractmethod def list_models(self) -> Dict[str, Any]: raise NotImplementedError @abc.abstractmethod def namespace(self) -> str: raise NotImplementedError ================================================ FILE: openlm/llm/cohere.py ================================================ from openlm.llm.base import BaseModel import os from typing import Any, Dict, List, Optional, Union import json import requests cohere_models = [ 'command', 'command-nightly', 'command-light', 'command-light-nightly', ] class Cohere(BaseModel): def __init__(self, api_key = os.environ.get("COHERE_API_KEY"), model_list = cohere_models, namespace = 'cohere.ai', base_url = 'https://api.cohere.ai/v1/generate'): self.api_key = api_key self.model_list = model_list self._namespace = namespace self.base_url = base_url def list_models(self): return self.model_list def namespace(self): return self._namespace def create_completion(self, model: Union[str, List[str]], prompt: Union[str, List[str]], suffix: Optional[str] = None, max_tokens: Optional[int] = None, temperature: Optional[float] = None, top_p: Optional[float] = None, n: Optional[int] = None, stream: Optional[bool] = None, logprobs: Optional[int] = None, echo: Optional[bool] = None, stop: Optional[Union[str, List[str]]] = None, presence_penalty: Optional[float] = None, frequency_penalty: Optional[float] = None, best_of: Optional[int] = None, logit_bias: Optional[Dict[str, float]] = None, user: Optional[str] = None) -> Dict[str, Any]: headers = {'Content-Type': 'application/json', 'Authorization': f'Bearer {self.api_key}'} payload = { 'prompt': prompt, 'model': model, 'max_tokens': max_tokens, 'temperature': temperature, 'p': top_p, 'frequency_penalty': frequency_penalty, 'presence_penalty': presence_penalty, 'stop_sequences': stop, } payload_str = json.dumps({k: v for k, v in payload.items() if v is not None}) resp = requests.post(self.base_url, headers=headers, data=payload_str) if resp.status_code != 200: raise ValueError(resp.status_code, resp.text) return self._convert_response(resp.json()) def _convert_request(req): return { 'prompt': req.prompt, 'top_p': req.top_p, 'temperature': req.temperature, 'max_new_tokens': req.max_tokens, } def _convert_response(self, resp): return { 'text': resp['generations'][0]['text'], 'extra': { 'request_id': resp['id'], 'generation_id': resp['generations'][0]['id'], } } ================================================ FILE: openlm/llm/huggingface.py ================================================ from openlm.llm.base import BaseModel import os from typing import Any, Dict, List, Optional, Union import json import requests hf_models = [ 'gpt2', 'distilgpt2', 'gpt2-large', 'gpt2-medium', 'gpt2-xl', 'bigscience/bloom-560m', 'bigscience/bloom-1b', 'bigscience/bloom-3b', 'bigscience/bloom-7b1', 'decapoda-research/llama-7b-hf', 'decapoda-research/llama-13b-hf', 'decapoda-research/llama-30b-hf', 'decapoda-research/llama-65b-hf', 'EleutherAI/gpt-j-6B', 'EleutherAI/gpt-j-2.7B', 'EleutherAI/gpt-neo-125M', 'EleutherAI/gpt-neo-1.3B', 'EleutherAI/gpt-neox-20B', 'EleutherAI/pythia-160m', 'EleutherAI/pythia-70m', 'EleutherAI/pythia-12b', 'cerebras/Cerebras-GPT-111M', 'cerebras/Cerebras-GPT-1.3B', 'cerebras/Cerebras-GPT-2.7B', 'bigcode/santacoder', 'Salesforce/codegen-350M-multi', 'Salesforce/codegen-2b-multi', 'stabilityai/stablelm-tuned-alpha-3b', 'stabilityai/stablelm-tuned-alpha-7b', 'facebook/opt-125m', 'facebook/opt-350m', 'facebook/opt-1.3b', 'facebook/opt-2.7b', 'facebook/opt-6.7b', 'facebook/opt-13b', 'facebook/opt-30b', 'mosaicml/mpt-7b', 'mosaicml/mpt-7b-instruct', 'databricks/dolly-v2-7b', 'databricks/dolly-v2-12b', ] class Huggingface(BaseModel): def __init__(self, api_key = os.environ.get("HF_API_TOKEN"), model_list = hf_models, namespace = 'huggingface.co', base_url = 'https://api-inference.huggingface.co/models'): self.api_key = api_key self.model_list = model_list self._namespace = namespace self.base_url = base_url def list_models(self): return self.model_list def namespace(self): return self._namespace def create_completion(self, model: Union[str, List[str]], prompt: Union[str, List[str]], suffix: Optional[str] = None, max_tokens: Optional[int] = None, temperature: Optional[float] = None, top_p: Optional[float] = None, n: Optional[int] = None, stream: Optional[bool] = None, logprobs: Optional[int] = None, echo: Optional[bool] = None, stop: Optional[Union[str, List[str]]] = None, presence_penalty: Optional[float] = None, frequency_penalty: Optional[float] = None, best_of: Optional[int] = None, logit_bias: Optional[Dict[str, float]] = None, user: Optional[str] = None) -> Dict[str, Any]: headers = {'Content-Type': 'application/json', 'Authorization': f'Bearer {self.api_key}'} payload = { 'inputs': prompt, 'top_p': top_p, 'temperature': temperature, 'max_new_tokens': max_tokens, } payload_str = json.dumps({k: v for k, v in payload.items() if v is not None}) resp = requests.post(self.base_url + '/' + model, headers=headers, data=payload_str) if resp.status_code != 200: raise ValueError(resp.status_code, resp.text) return self._convert_response(resp.json()) def _convert_request(req): return { 'prompt': req.prompt, 'top_p': req.top_p, 'temperature': req.temperature, 'max_new_tokens': req.max_tokens, } def _convert_response(self, resp): return { 'text': resp[0]['generated_text'], } ================================================ FILE: openlm/llm/openai.py ================================================ import json import os from typing import Any, Dict, List, Optional, Union import requests from openlm.llm.base import BaseModel openai_models = [ 'text-davinci-003', 'text-davinci-002', 'text-curie-001', 'text-babbage-001', 'text-ada-001', # aliases 'ada', 'babbage', 'curie', 'davinci', ] class OpenAI(BaseModel): def __init__(self, api_key = os.environ.get("OPENAI_API_KEY"), model_list = openai_models, namespace = 'openai.com', base_url = 'https://api.openai.com/v1/completions'): if api_key is None: raise ValueError("OPENAI_API_KEY is not set or passed as an argument") self.api_key = api_key self.model_list = model_list self._namespace = namespace self.base_url = base_url def create_completion(self, model: Union[str, List[str]], prompt: Union[str, List[str]], suffix: Optional[str] = None, max_tokens: Optional[int] = None, temperature: Optional[float] = None, top_p: Optional[float] = None, n: Optional[int] = None, stream: Optional[bool] = None, logprobs: Optional[int] = None, echo: Optional[bool] = None, stop: Optional[Union[str, List[str]]] = None, presence_penalty: Optional[float] = None, frequency_penalty: Optional[float] = None, best_of: Optional[int] = None, logit_bias: Optional[Dict[str, float]] = None, user: Optional[str] = None) -> Dict[str, Any]: headers = {'Content-Type': 'application/json', 'Authorization': f'Bearer {self.api_key}'} payload = { 'model': model, 'prompt': prompt, 'suffix': suffix, 'max_tokens': max_tokens, 'temperature': temperature, 'top_p': top_p, 'n': n, 'stream': stream, 'logprobs': logprobs, 'echo': echo, 'stop': stop, 'presence_penalty': presence_penalty, 'frequency_penalty': frequency_penalty, 'best_of': best_of, 'logit_bias': logit_bias, 'user': user } payload_str = json.dumps({k: v for k, v in payload.items() if v is not None}) resp = requests.post(self.base_url, headers=headers, data=payload_str).json() if 'error' in resp: raise ValueError(resp['error']) return self._convert_response(resp) def _convert_response(self, response: Dict[str, Any]) -> Dict[str, Any]: return { 'text': response['choices'][0]['text'], 'extra': { 'id': response['id'], }, 'usage': response['usage'], } def list_models(self): return self.model_list def namespace(self): return self._namespace ================================================ FILE: openlm/openlm.py ================================================ from typing import Any, Dict, List, Optional, Union import uuid from openlm.llm import BaseModel, OpenAI, Huggingface, Cohere import time import openlm from concurrent.futures import ThreadPoolExecutor class Completion(): """ OpenAI-compatible completion API """ models = {} aliases = {} @classmethod def create(cls, model: Union[str, List[str]], prompt: Union[str, List[str]], suffix: Optional[str] = None, max_tokens: Optional[int] = None, temperature: Optional[float] = None, top_p: Optional[float] = None, n: Optional[int] = None, stream: Optional[bool] = None, logprobs: Optional[int] = None, echo: Optional[bool] = None, stop: Optional[Union[str, List[str]]] = None, presence_penalty: Optional[float] = None, frequency_penalty: Optional[float] = None, best_of: Optional[int] = None, logit_bias: Optional[Dict[str, float]] = None, user: Optional[str] = None, api_keys: Optional[Dict[str, str]] = None, request_timeout=0) -> Dict[str, Any]: """ Creates a completion request for the OpenAI API. :param model: The ID(s) of the model to use. :param prompt: The prompt(s) to generate completions for. :param suffix: A string to append to the completion(s). :param max_tokens: The maximum number of tokens to generate in the completion(s). :param temperature: The sampling temperature to use. :param top_p: The nucleus sampling probability to use. :param n: The number of completions to generate. :param stream: Whether to stream back partial progress updates. :param logprobs: The number of log probabilities to generate per token. :param echo: Whether to include the prompt(s) in the completion(s). :param stop: The stop sequence(s) to use. :param presence_penalty: The presence penalty to use. :param frequency_penalty: The frequency penalty to use. :param best_of: The number of completions to generate and return the best of. :param logit_bias: A dictionary of token IDs and bias values to use. :param user: The ID of the user making the request. :return: A dictionary containing the completion response. """ cls.register_default() if isinstance(model, str): model = [model] if isinstance(prompt, str): prompt = [prompt] # Create a list of tuples, each containing all the parameters for a call to _generate_completion args = [(m, p, suffix, max_tokens, temperature, top_p, n, stream, logprobs, echo, stop, presence_penalty, frequency_penalty, best_of, logit_bias, user) for m in model for p in prompt] total_usage = { 'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0 } # Use a ThreadPoolExecutor to run _generate_completion in parallel for each set of parameters with ThreadPoolExecutor() as executor: choices = list(executor.map(lambda params: cls._generate_completion(*params), args)) # Sum up the usage from all choices for choice in choices: if 'usage' in choice: total_usage['prompt_tokens'] += choice['usage']['prompt_tokens'] total_usage['completion_tokens'] += choice['usage']['completion_tokens'] total_usage['total_tokens'] += choice['usage']['total_tokens'] return { "id": str(uuid.uuid4()), "object": "text_completion", "created": int(time.time()), "choices": choices, "usage": total_usage, } @classmethod def _generate_completion(cls, model, prompt, suffix, max_tokens, temperature, top_p, n, stream, logprobs, echo, stop, presence_penalty, frequency_penalty, best_of, logit_bias, user): """ Function to generate a single completion. This will be used in parallel execution. """ if model not in cls.aliases: raise ValueError(f"Model {model} not found. OpenLM currently supports the following models:\n{cls._pretty_list_models()}") fqn = cls.aliases[model] try: ret = cls.models[fqn].create_completion( model=fqn[len(cls.models[fqn].namespace())+1:], prompt=prompt, suffix=suffix, max_tokens=max_tokens, temperature=temperature, top_p=top_p, n=n, stream=stream, logprobs=logprobs, echo=echo, stop=stop, presence_penalty=presence_penalty, frequency_penalty=frequency_penalty, best_of=best_of, logit_bias=logit_bias, user=user) except Exception as e: ret = { 'error': f"Error: {e}" } choice = { "id": str(uuid.uuid4()), "model_name": fqn, 'created': int(time.time()), } if 'error' in ret: choice['error'] = ret['error'] if 'text' in ret: choice['text'] = ret['text'] if 'usage' in ret: choice['usage'] = ret['usage'] if 'extra' in ret: choice['extra'] = ret['extra'] return choice @classmethod def register(cls, providers: BaseModel | List[BaseModel]): if not isinstance(providers, list): providers = [providers] for provider in providers: for model in provider.list_models(): fqn = provider.namespace() + '/' + model cls.models[fqn] = provider cls.aliases[model] = fqn cls.aliases[fqn] = fqn if '/' in model: cls.aliases[model.split('/')[1]] = fqn @classmethod def register_default(cls, api_keys: Optional[Dict[str, str]] = None): if openlm.api_key: cls.register(OpenAI(api_key=openlm.api_key)) else: if api_keys and api_keys['openai.com'] is not None: cls.register(OpenAI(api_key=api_keys['openai.com'])) else: cls.register(OpenAI()) if api_keys and api_keys['huggingface.co'] is not None: cls.register(Huggingface(api_key=api_keys['huggingface.co'])) else: cls.register(Huggingface()) if api_keys and api_keys['cohere.ai'] is not None: cls.register(Cohere(api_key=api_keys['cohere.ai'])) else: cls.register(Cohere()) @classmethod def list_models(cls) -> List[str]: reverse_alias = {} for key, value in cls.aliases.items(): # If the value is not in the reverse dictionary, create an empty array for it if value not in reverse_alias: reverse_alias[value] = [] # Append the key to the array for the value in the reverse dictionary reverse_alias[value].append(key) return reverse_alias @classmethod def _pretty_list_models(cls): ret = "" for key, value in cls.list_models().items(): ret += f"-> {value} \n" return ret ================================================ FILE: pyproject.toml ================================================ [tool.poetry] name = "openlm" version = "0.0.4" description = "Drop-in OpenAI-compatible that can call LLMs from other providers" authors = ["Matt Rickard "] maintainers = ["Matt Rickard "] readme = "README.md" license = "MIT" keywords = [ "llm", "ai", "prompt", "large language models", "gpt-3", "chatgpt", ] classifiers = [ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "Intended Audience :: Science/Research", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3 :: Only", "Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Text Processing :: Linguistic", ] urls = { repository = "https://github.com/r2d4/openlm" } [tool.poetry.dependencies] python = ">=3.8.1,<4.0" requests = "^2" [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api"